@unrdf/kgc-probe 26.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,503 @@
1
+ /**
2
+ * @fileoverview Network Probe - Allowlist-Only Network Capability Detection
3
+ *
4
+ * Probes network capabilities with strict allowlist enforcement.
5
+ * CRITICAL: Only probes URLs in config.netAllow array.
6
+ *
7
+ * Guard Constraints (Poka Yoke):
8
+ * - ONLY probe URLs in config.netAllow
9
+ * - If netAllow empty → return denied observation
10
+ * - NO scanning, NO host discovery, NO unauthorized requests
11
+ * - Timeout each request (5s max)
12
+ *
13
+ * @module @unrdf/kgc-probe/network
14
+ */
15
+
16
+ import { z } from 'zod';
17
+
18
+ /**
19
+ * @typedef {Object} Observation
20
+ * @property {string} capability - Capability being probed (e.g., 'fetch-api', 'tls-validation')
21
+ * @property {boolean} available - Whether capability is available
22
+ * @property {string} guardDecision - Guard decision: 'allowed' or 'denied'
23
+ * @property {string} [url] - URL tested (if applicable)
24
+ * @property {Object} [metadata] - Additional metadata about observation
25
+ * @property {number} [metadata.connectionTimeMs] - Connection time in milliseconds
26
+ * @property {number} [metadata.tlsHandshakeTimeMs] - TLS handshake time in milliseconds
27
+ * @property {number} [metadata.responseTimeMs] - Total response time in milliseconds
28
+ * @property {number} [metadata.statusCode] - HTTP status code
29
+ * @property {number} [metadata.dnsTimeMs] - DNS resolution time (estimated from timing)
30
+ * @property {string} [metadata.tlsVersion] - TLS version (if detectable)
31
+ * @property {Object} [metadata.headers] - Response headers (sanitized)
32
+ * @property {string} [metadata.error] - Error message if probe failed
33
+ * @property {string} [reason] - Reason for denial or failure
34
+ */
35
+
36
+ /**
37
+ * Zod schema for probe configuration
38
+ */
39
+ const ProbeConfigSchema = z.object({
40
+ netAllow: z.array(z.string().url()).optional().default([]),
41
+ timeout: z.number().min(100).max(5000).optional().default(5000),
42
+ });
43
+
44
+ /**
45
+ * Zod schema for observation
46
+ */
47
+ const ObservationSchema = z.object({
48
+ capability: z.string(),
49
+ available: z.boolean(),
50
+ guardDecision: z.enum(['allowed', 'denied']),
51
+ url: z.string().url().optional(),
52
+ metadata: z.record(z.unknown()).optional(),
53
+ reason: z.string().optional(),
54
+ });
55
+
56
+ /**
57
+ * Guard: Check if URL is in allowlist
58
+ *
59
+ * @param {string} url - URL to check
60
+ * @param {string[]} allowlist - Allowed URLs
61
+ * @returns {{ allowed: boolean, reason?: string }}
62
+ */
63
+ function guardUrlAllowlist(url, allowlist) {
64
+ if (!allowlist || allowlist.length === 0) {
65
+ return {
66
+ allowed: false,
67
+ reason: 'No URLs in allowlist (netAllow is empty)',
68
+ };
69
+ }
70
+
71
+ const isAllowed = allowlist.includes(url);
72
+ if (!isAllowed) {
73
+ return {
74
+ allowed: false,
75
+ reason: `URL ${url} not in allowlist`,
76
+ };
77
+ }
78
+
79
+ return { allowed: true };
80
+ }
81
+
82
+ /**
83
+ * Probe Fetch API availability
84
+ *
85
+ * @returns {Observation}
86
+ */
87
+ function probeFetchAPI() {
88
+ const available = typeof fetch === 'function';
89
+ return {
90
+ capability: 'fetch-api',
91
+ available,
92
+ guardDecision: 'allowed', // No network call, just API check
93
+ metadata: {
94
+ environment: typeof globalThis !== 'undefined' ? 'global' : 'unknown',
95
+ hasHeaders: typeof Headers !== 'undefined',
96
+ hasRequest: typeof Request !== 'undefined',
97
+ hasResponse: typeof Response !== 'undefined',
98
+ },
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Probe URL with HEAD request (minimal bandwidth)
104
+ *
105
+ * @param {string} url - URL to probe (must be in allowlist)
106
+ * @param {number} timeout - Timeout in milliseconds
107
+ * @returns {Promise<Observation>}
108
+ */
109
+ async function probeUrlWithHead(url, timeout = 5000) {
110
+ const startTime = performance.now();
111
+ const controller = new AbortController();
112
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
113
+
114
+ try {
115
+ const response = await fetch(url, {
116
+ method: 'HEAD',
117
+ signal: controller.signal,
118
+ // Disable caching to get real network timing
119
+ cache: 'no-store',
120
+ });
121
+
122
+ clearTimeout(timeoutId);
123
+ const endTime = performance.now();
124
+ const responseTimeMs = endTime - startTime;
125
+
126
+ // Extract headers (sanitized - no cookies/auth)
127
+ const headers = {};
128
+ const safeHeaders = ['content-type', 'content-length', 'cache-control', 'etag', 'last-modified'];
129
+ for (const header of safeHeaders) {
130
+ const value = response.headers.get(header);
131
+ if (value) {
132
+ headers[header] = value;
133
+ }
134
+ }
135
+
136
+ return {
137
+ capability: 'http-head-request',
138
+ available: true,
139
+ guardDecision: 'allowed',
140
+ url,
141
+ metadata: {
142
+ statusCode: response.status,
143
+ responseTimeMs: Math.round(responseTimeMs * 100) / 100,
144
+ headers,
145
+ redirected: response.redirected,
146
+ type: response.type,
147
+ },
148
+ };
149
+ } catch (error) {
150
+ clearTimeout(timeoutId);
151
+ const endTime = performance.now();
152
+ const responseTimeMs = endTime - startTime;
153
+
154
+ return {
155
+ capability: 'http-head-request',
156
+ available: false,
157
+ guardDecision: 'allowed',
158
+ url,
159
+ metadata: {
160
+ error: error.message,
161
+ errorName: error.name,
162
+ responseTimeMs: Math.round(responseTimeMs * 100) / 100,
163
+ timedOut: error.name === 'AbortError',
164
+ },
165
+ reason: `HEAD request failed: ${error.message}`,
166
+ };
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Probe TLS certificate validation behavior
172
+ *
173
+ * @param {string} url - HTTPS URL to probe (must be in allowlist)
174
+ * @param {number} timeout - Timeout in milliseconds
175
+ * @returns {Promise<Observation>}
176
+ */
177
+ async function probeTlsValidation(url, timeout = 5000) {
178
+ // Only probe HTTPS URLs
179
+ if (!url.startsWith('https://')) {
180
+ return {
181
+ capability: 'tls-certificate-validation',
182
+ available: false,
183
+ guardDecision: 'denied',
184
+ url,
185
+ reason: 'Not an HTTPS URL',
186
+ };
187
+ }
188
+
189
+ const controller = new AbortController();
190
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
191
+
192
+ try {
193
+ const response = await fetch(url, {
194
+ method: 'HEAD',
195
+ signal: controller.signal,
196
+ cache: 'no-store',
197
+ });
198
+
199
+ clearTimeout(timeoutId);
200
+
201
+ return {
202
+ capability: 'tls-certificate-validation',
203
+ available: true,
204
+ guardDecision: 'allowed',
205
+ url,
206
+ metadata: {
207
+ statusCode: response.status,
208
+ validCertificate: true, // If we got here, cert is valid
209
+ protocol: 'https',
210
+ },
211
+ };
212
+ } catch (error) {
213
+ clearTimeout(timeoutId);
214
+
215
+ // Distinguish TLS errors from other errors
216
+ const isTlsError = error.message.includes('certificate') ||
217
+ error.message.includes('TLS') ||
218
+ error.message.includes('SSL');
219
+
220
+ return {
221
+ capability: 'tls-certificate-validation',
222
+ available: false,
223
+ guardDecision: 'allowed',
224
+ url,
225
+ metadata: {
226
+ error: error.message,
227
+ isTlsError,
228
+ protocol: 'https',
229
+ },
230
+ reason: `TLS validation failed: ${error.message}`,
231
+ };
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Probe response payload limits
237
+ *
238
+ * @param {string} url - URL to probe (must be in allowlist)
239
+ * @param {number} timeout - Timeout in milliseconds
240
+ * @returns {Promise<Observation>}
241
+ */
242
+ async function probePayloadLimits(url, timeout = 5000) {
243
+ const controller = new AbortController();
244
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
245
+
246
+ try {
247
+ const response = await fetch(url, {
248
+ method: 'HEAD',
249
+ signal: controller.signal,
250
+ cache: 'no-store',
251
+ });
252
+
253
+ clearTimeout(timeoutId);
254
+
255
+ const contentLength = response.headers.get('content-length');
256
+ const contentLengthBytes = contentLength ? parseInt(contentLength, 10) : null;
257
+
258
+ return {
259
+ capability: 'response-payload-size',
260
+ available: true,
261
+ guardDecision: 'allowed',
262
+ url,
263
+ metadata: {
264
+ contentLengthBytes,
265
+ contentLengthMB: contentLengthBytes ? (contentLengthBytes / (1024 * 1024)).toFixed(2) : null,
266
+ hasContentLength: contentLength !== null,
267
+ statusCode: response.status,
268
+ },
269
+ };
270
+ } catch (error) {
271
+ clearTimeout(timeoutId);
272
+
273
+ return {
274
+ capability: 'response-payload-size',
275
+ available: false,
276
+ guardDecision: 'allowed',
277
+ url,
278
+ metadata: {
279
+ error: error.message,
280
+ },
281
+ reason: `Failed to probe payload limits: ${error.message}`,
282
+ };
283
+ }
284
+ }
285
+
286
+ /**
287
+ * Probe cache headers behavior
288
+ *
289
+ * @param {string} url - URL to probe (must be in allowlist)
290
+ * @param {number} timeout - Timeout in milliseconds
291
+ * @returns {Promise<Observation>}
292
+ */
293
+ async function probeCacheHeaders(url, timeout = 5000) {
294
+ const controller = new AbortController();
295
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
296
+
297
+ try {
298
+ const response = await fetch(url, {
299
+ method: 'HEAD',
300
+ signal: controller.signal,
301
+ cache: 'no-store',
302
+ });
303
+
304
+ clearTimeout(timeoutId);
305
+
306
+ const cacheControl = response.headers.get('cache-control');
307
+ const etag = response.headers.get('etag');
308
+ const lastModified = response.headers.get('last-modified');
309
+ const expires = response.headers.get('expires');
310
+
311
+ return {
312
+ capability: 'cache-headers',
313
+ available: true,
314
+ guardDecision: 'allowed',
315
+ url,
316
+ metadata: {
317
+ cacheControl,
318
+ hasEtag: etag !== null,
319
+ hasLastModified: lastModified !== null,
320
+ hasExpires: expires !== null,
321
+ cacheable: cacheControl ? !cacheControl.includes('no-store') && !cacheControl.includes('no-cache') : false,
322
+ },
323
+ };
324
+ } catch (error) {
325
+ clearTimeout(timeoutId);
326
+
327
+ return {
328
+ capability: 'cache-headers',
329
+ available: false,
330
+ guardDecision: 'allowed',
331
+ url,
332
+ metadata: {
333
+ error: error.message,
334
+ },
335
+ reason: `Failed to probe cache headers: ${error.message}`,
336
+ };
337
+ }
338
+ }
339
+
340
+ /**
341
+ * Probe DNS resolution time (estimated via request timing)
342
+ *
343
+ * @param {string} url - URL to probe (must be in allowlist)
344
+ * @param {number} timeout - Timeout in milliseconds
345
+ * @returns {Promise<Observation>}
346
+ */
347
+ async function probeDnsResolution(url, timeout = 5000) {
348
+ const startTime = performance.now();
349
+ const controller = new AbortController();
350
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
351
+
352
+ try {
353
+ const response = await fetch(url, {
354
+ method: 'HEAD',
355
+ signal: controller.signal,
356
+ cache: 'no-store',
357
+ });
358
+
359
+ clearTimeout(timeoutId);
360
+ const endTime = performance.now();
361
+ const totalTimeMs = endTime - startTime;
362
+
363
+ // DNS time is estimated as part of total connection time
364
+ // We can't isolate DNS precisely in browser/Node without lower-level APIs
365
+ return {
366
+ capability: 'dns-resolution',
367
+ available: true,
368
+ guardDecision: 'allowed',
369
+ url,
370
+ metadata: {
371
+ totalRequestTimeMs: Math.round(totalTimeMs * 100) / 100,
372
+ estimatedDnsIncluded: true,
373
+ statusCode: response.status,
374
+ note: 'DNS time included in totalRequestTimeMs (not isolated)',
375
+ },
376
+ };
377
+ } catch (error) {
378
+ clearTimeout(timeoutId);
379
+
380
+ return {
381
+ capability: 'dns-resolution',
382
+ available: false,
383
+ guardDecision: 'allowed',
384
+ url,
385
+ metadata: {
386
+ error: error.message,
387
+ },
388
+ reason: `DNS resolution probe failed: ${error.message}`,
389
+ };
390
+ }
391
+ }
392
+
393
+ /**
394
+ * Probe network capabilities with allowlist enforcement
395
+ *
396
+ * GUARD CONSTRAINTS:
397
+ * - Only probes URLs in config.netAllow array
398
+ * - If netAllow is empty: returns single denied observation
399
+ * - NO scanning, NO unauthorized requests
400
+ * - Timeout each request (5s max)
401
+ *
402
+ * @param {Object} config - Probe configuration
403
+ * @param {string[]} [config.netAllow=[]] - Allowlist of URLs to probe
404
+ * @param {number} [config.timeout=5000] - Timeout per request in milliseconds (max 5000)
405
+ * @returns {Promise<Observation[]>} Array of observations
406
+ *
407
+ * @example
408
+ * // No URLs allowed
409
+ * const obs1 = await probeNetwork({ netAllow: [] });
410
+ * // obs1 = [{ capability: 'network-probe', guardDecision: 'denied', ... }]
411
+ *
412
+ * @example
413
+ * // Probe allowed URLs
414
+ * const obs2 = await probeNetwork({
415
+ * netAllow: ['https://example.com', 'https://httpbin.org/get'],
416
+ * timeout: 3000
417
+ * });
418
+ * // obs2 = [{ capability: 'fetch-api', ... }, { capability: 'http-head-request', url: 'https://example.com', ... }, ...]
419
+ */
420
+ export async function probeNetwork(config = {}) {
421
+ // Validate config
422
+ const validatedConfig = ProbeConfigSchema.parse(config);
423
+ const { netAllow, timeout } = validatedConfig;
424
+
425
+ const observations = [];
426
+
427
+ // 1. Always probe Fetch API availability (no network call)
428
+ observations.push(probeFetchAPI());
429
+
430
+ // 2. Guard: If no allowlist, return denied observation
431
+ if (!netAllow || netAllow.length === 0) {
432
+ observations.push({
433
+ capability: 'network-probe',
434
+ available: false,
435
+ guardDecision: 'denied',
436
+ reason: 'No URLs in allowlist (config.netAllow is empty)',
437
+ metadata: {
438
+ allowlistSize: 0,
439
+ },
440
+ });
441
+ return observations;
442
+ }
443
+
444
+ // 3. Probe each allowlisted URL
445
+ for (const url of netAllow) {
446
+ // Guard: Verify URL is in allowlist (defensive check)
447
+ const guardResult = guardUrlAllowlist(url, netAllow);
448
+ if (!guardResult.allowed) {
449
+ observations.push({
450
+ capability: 'network-probe',
451
+ available: false,
452
+ guardDecision: 'denied',
453
+ url,
454
+ reason: guardResult.reason,
455
+ });
456
+ continue;
457
+ }
458
+
459
+ // Probe URL with various tests
460
+ try {
461
+ // Test 1: Basic HEAD request
462
+ const headObs = await probeUrlWithHead(url, timeout);
463
+ observations.push(headObs);
464
+
465
+ // Test 2: TLS validation (HTTPS only)
466
+ if (url.startsWith('https://')) {
467
+ const tlsObs = await probeTlsValidation(url, timeout);
468
+ observations.push(tlsObs);
469
+ }
470
+
471
+ // Test 3: Payload limits
472
+ const payloadObs = await probePayloadLimits(url, timeout);
473
+ observations.push(payloadObs);
474
+
475
+ // Test 4: Cache headers
476
+ const cacheObs = await probeCacheHeaders(url, timeout);
477
+ observations.push(cacheObs);
478
+
479
+ // Test 5: DNS resolution timing
480
+ const dnsObs = await probeDnsResolution(url, timeout);
481
+ observations.push(dnsObs);
482
+
483
+ } catch (error) {
484
+ // Catch-all for unexpected errors
485
+ observations.push({
486
+ capability: 'network-probe',
487
+ available: false,
488
+ guardDecision: 'allowed',
489
+ url,
490
+ metadata: {
491
+ error: error.message,
492
+ errorStack: error.stack,
493
+ },
494
+ reason: `Unexpected error: ${error.message}`,
495
+ });
496
+ }
497
+ }
498
+
499
+ // Validate all observations
500
+ // Note: Validation disabled temporarily due to Zod v4 schema issues
501
+ // return observations.map(obs => ObservationSchema.parse(obs));
502
+ return observations;
503
+ }