@pentoshi/clai 0.10.5 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +32 -0
  2. package/dist/agent/runner.js +41 -3
  3. package/dist/agent/runner.js.map +1 -1
  4. package/dist/commands/providers.js +28 -0
  5. package/dist/commands/providers.js.map +1 -1
  6. package/dist/commands/search-providers.d.ts +50 -0
  7. package/dist/commands/search-providers.js +134 -0
  8. package/dist/commands/search-providers.js.map +1 -0
  9. package/dist/commands/update.js +1 -1
  10. package/dist/index.js +8 -0
  11. package/dist/index.js.map +1 -1
  12. package/dist/llm/provider.js +9 -6
  13. package/dist/llm/provider.js.map +1 -1
  14. package/dist/prompts/index.d.ts +1 -1
  15. package/dist/prompts/index.js +6 -0
  16. package/dist/prompts/index.js.map +1 -1
  17. package/dist/safety/classifier.js +40 -0
  18. package/dist/safety/classifier.js.map +1 -1
  19. package/dist/store/config.d.ts +5 -0
  20. package/dist/store/config.js +7 -0
  21. package/dist/store/config.js.map +1 -1
  22. package/dist/store/keys.d.ts +65 -0
  23. package/dist/store/keys.js +164 -28
  24. package/dist/store/keys.js.map +1 -1
  25. package/dist/tools/http.d.ts +12 -1
  26. package/dist/tools/http.js +8 -43
  27. package/dist/tools/http.js.map +1 -1
  28. package/dist/tools/registry.js +52 -0
  29. package/dist/tools/registry.js.map +1 -1
  30. package/dist/tools/shell.d.ts +25 -0
  31. package/dist/tools/shell.js +155 -6
  32. package/dist/tools/shell.js.map +1 -1
  33. package/dist/tools/web/audit.d.ts +154 -0
  34. package/dist/tools/web/audit.js +147 -0
  35. package/dist/tools/web/audit.js.map +1 -0
  36. package/dist/tools/web/budget.d.ts +76 -0
  37. package/dist/tools/web/budget.js +187 -0
  38. package/dist/tools/web/budget.js.map +1 -0
  39. package/dist/tools/web/capture.d.ts +201 -0
  40. package/dist/tools/web/capture.js +380 -0
  41. package/dist/tools/web/capture.js.map +1 -0
  42. package/dist/tools/web/fetch-core.d.ts +66 -0
  43. package/dist/tools/web/fetch-core.js +1123 -0
  44. package/dist/tools/web/fetch-core.js.map +1 -0
  45. package/dist/tools/web/fetch.d.ts +42 -0
  46. package/dist/tools/web/fetch.js +115 -0
  47. package/dist/tools/web/fetch.js.map +1 -0
  48. package/dist/tools/web/providers/brave.d.ts +46 -0
  49. package/dist/tools/web/providers/brave.js +263 -0
  50. package/dist/tools/web/providers/brave.js.map +1 -0
  51. package/dist/tools/web/providers/duckduckgo.d.ts +47 -0
  52. package/dist/tools/web/providers/duckduckgo.js +248 -0
  53. package/dist/tools/web/providers/duckduckgo.js.map +1 -0
  54. package/dist/tools/web/providers/provider.d.ts +99 -0
  55. package/dist/tools/web/providers/provider.js +38 -0
  56. package/dist/tools/web/providers/provider.js.map +1 -0
  57. package/dist/tools/web/providers/tavily.d.ts +52 -0
  58. package/dist/tools/web/providers/tavily.js +285 -0
  59. package/dist/tools/web/providers/tavily.js.map +1 -0
  60. package/dist/tools/web/readable.d.ts +67 -0
  61. package/dist/tools/web/readable.js +248 -0
  62. package/dist/tools/web/readable.js.map +1 -0
  63. package/dist/tools/web/redact.d.ts +120 -0
  64. package/dist/tools/web/redact.js +155 -0
  65. package/dist/tools/web/redact.js.map +1 -0
  66. package/dist/tools/web/search.d.ts +51 -0
  67. package/dist/tools/web/search.js +389 -0
  68. package/dist/tools/web/search.js.map +1 -0
  69. package/dist/tools/web/ssrf-guard.d.ts +85 -0
  70. package/dist/tools/web/ssrf-guard.js +265 -0
  71. package/dist/tools/web/ssrf-guard.js.map +1 -0
  72. package/dist/tools/web/types.d.ts +331 -0
  73. package/dist/tools/web/types.js +71 -0
  74. package/dist/tools/web/types.js.map +1 -0
  75. package/dist/ui/spinner.js +87 -14
  76. package/dist/ui/spinner.js.map +1 -1
  77. package/package.json +3 -1
@@ -0,0 +1,1123 @@
1
+ /**
2
+ * `web.fetch` core orchestration.
3
+ *
4
+ * This module is the deterministic, dependency-injected pipeline that
5
+ * `src/tools/web/fetch.ts` (added in task 4.x) wraps in a `ToolResult`
6
+ * adapter and audit-log emitter. The core itself returns a typed
7
+ * {@link WebFetchOutcome} and never touches the registry, the safety
8
+ * classifier, or `auditLog`.
9
+ *
10
+ * The pipeline implements the design's "Pipeline steps in detail"
11
+ * sequence (`.kiro/specs/web-search-and-fetch/design.md`):
12
+ *
13
+ * 1. argument validation
14
+ * 2. URL parse + SSRF pre-check on hostname literal
15
+ * 3. DNS resolve + IP pin
16
+ * 4. SSRF check on resolved IP
17
+ * 5. pinned `https.request` (or `http.request`) with custom `lookup`
18
+ * 6. TLS handshake capture
19
+ * 7. response headers + body stream with `maxBytes` cap
20
+ * 8. redirect handling (≤ {@link MAX_REDIRECT_HOPS}, re-running
21
+ * validation + SSRF + DNS at each hop)
22
+ * 9. body classification (binary / raw / readable)
23
+ * 10. metadata assembly + 64 KiB budget enforcement
24
+ *
25
+ * Every outbound transport call (DNS, HTTP, HTTPS) is injectable via
26
+ * {@link WebFetchCoreOptions} so tests in epics 3.x, 4.x and 6.x can
27
+ * stub the network deterministically without spinning up a real server.
28
+ */
29
+ import { Buffer } from "node:buffer";
30
+ import { lookup as defaultDnsLookup } from "node:dns/promises";
31
+ import http from "node:http";
32
+ import https from "node:https";
33
+ import { Capture } from "./capture.js";
34
+ import { enforce as enforceBudget } from "./budget.js";
35
+ import { toReadableText } from "./readable.js";
36
+ import { applyToCookies, applyToHeaders } from "./redact.js";
37
+ import { classify as classifyIp, classifyHost, isAllowedScheme, } from "./ssrf-guard.js";
38
+ import { DEFAULT_INCLUDE_HEADERS, DEFAULT_INCLUDE_REDIRECT_CHAIN, DEFAULT_INCLUDE_TIMING, DEFAULT_MAX_BYTES, DEFAULT_REDACT_SENSITIVE, DEFAULT_RESPONSE_MODE, FETCH_TIMEOUT_MS, HTTP_ERROR_BODY_PREVIEW_BYTES, MAX_MAX_BYTES, MAX_REDIRECT_HOPS, METADATA_BUDGET_BYTES, MIN_MAX_BYTES, RESPONSE_MODES, TRUNCATION_MARKER, } from "./types.js";
39
+ // ---------------------------------------------------------------------------
40
+ // Constants
41
+ // ---------------------------------------------------------------------------
42
+ /**
43
+ * Content-Type prefixes that always trigger the `binary-content` error
44
+ * kind, regardless of `responseMode` (Requirements 2.9 + 2.30).
45
+ */
46
+ const BINARY_CONTENT_TYPE_PATTERNS = [
47
+ /^image\//i,
48
+ /^application\/octet-stream/i,
49
+ /^application\/pdf/i,
50
+ /^video\//i,
51
+ ];
52
+ /**
53
+ * Content-Type prefixes that trigger HTML-to-readable-text conversion
54
+ * in `responseMode="readable"` (Requirement 2.4). All other text
55
+ * Content-Types pass through unchanged in that mode (Requirement 2.5).
56
+ */
57
+ const HTML_CONTENT_TYPE_PATTERN = /^(text\/html|application\/xhtml\+xml)/i;
58
+ /** Default User-Agent sent on outbound `web.fetch` requests. */
59
+ const DEFAULT_USER_AGENT = "clai-web-fetch/1.0";
60
+ /** Statuses that carry a `Location` header and trigger a redirect hop. */
61
+ const REDIRECT_STATUSES = new Set([
62
+ 301, 302, 303, 307, 308,
63
+ ]);
64
+ // ---------------------------------------------------------------------------
65
+ // Public entry
66
+ // ---------------------------------------------------------------------------
67
+ /**
68
+ * Run the full `web.fetch` pipeline for the given arguments.
69
+ *
70
+ * Returns a typed {@link WebFetchOutcome}. The outcome is never thrown
71
+ * — argument validation failures, SSRF blocks, network errors, HTTP
72
+ * errors, and timeouts all surface as `ok=false` with a categorical
73
+ * `error.kind` and a human-readable message. The `metadata` field is
74
+ * always populated: pipeline stages that completed before the failure
75
+ * are surfaced (e.g. `resolvedIp` when DNS succeeded but a 4xx came
76
+ * back), and stages that did not run carry default zero/empty values.
77
+ */
78
+ export async function webFetchCore(args, options = {}) {
79
+ const now = options.now ?? (() => Date.now());
80
+ const httpsRequestFn = options.httpsRequest ?? https.request;
81
+ const httpRequestFn = options.httpRequest ?? http.request;
82
+ const dnsLookupFn = options.dnsLookup ?? defaultDnsLookup;
83
+ const t0 = now();
84
+ // --------------------------------------------------------------------- 1
85
+ // Argument validation. Run synchronously before any I/O so a malformed
86
+ // call never reaches DNS or the network.
87
+ const validated = validateArgs(args);
88
+ if (!validated.ok) {
89
+ return errorOutcome({
90
+ requestedUrl: typeof args.url === "string" ? args.url : "",
91
+ finalUrl: typeof args.url === "string" ? args.url : "",
92
+ mode: resolveResponseMode(args.responseMode),
93
+ error: validated.error,
94
+ now,
95
+ t0,
96
+ });
97
+ }
98
+ const a = validated.value;
99
+ // --------------------------------------------------------------------- 2
100
+ // Wire up a single AbortController + 30 s wall-clock timer. The signal
101
+ // is passed to DNS, the request, and the body reader so an abort
102
+ // anywhere in the pipeline collapses every dangling listener.
103
+ const controller = new AbortController();
104
+ let timedOut = false;
105
+ const timeoutHandle = setTimeout(() => {
106
+ timedOut = true;
107
+ controller.abort();
108
+ }, FETCH_TIMEOUT_MS);
109
+ // `unref` so the timer never holds the event loop open if the caller
110
+ // forgets to await us. `setTimeout` returns a `Timeout` in Node which
111
+ // exposes `.unref()`; the cast keeps lib.dom.d.ts happy.
112
+ timeoutHandle.unref?.();
113
+ const initialUrl = new URL(a.url);
114
+ const isHttps = initialUrl.protocol === "https:";
115
+ const capture = new Capture({
116
+ isHttps,
117
+ finalHostname: initialUrl.hostname,
118
+ });
119
+ let lastUrl = a.url;
120
+ try {
121
+ // ----------------------------------------------------------------- 3-9
122
+ // Run the redirect-aware request loop.
123
+ const result = await runRequestLoop({
124
+ args: a,
125
+ capture,
126
+ controller,
127
+ now,
128
+ t0,
129
+ httpsRequestFn,
130
+ httpRequestFn,
131
+ dnsLookupFn,
132
+ });
133
+ lastUrl = result.lastUrl;
134
+ if (!result.ok) {
135
+ return errorOutcome({
136
+ requestedUrl: a.url,
137
+ finalUrl: lastUrl,
138
+ mode: a.responseMode,
139
+ capture,
140
+ error: result.error,
141
+ now,
142
+ t0,
143
+ includeHeaders: a.includeHeaders,
144
+ includeTls: a.includeTls,
145
+ includeTiming: a.includeTiming,
146
+ includeRedirectChain: a.includeRedirectChain,
147
+ redactSensitive: a.redactSensitive,
148
+ });
149
+ }
150
+ // ----------------------------------------------------------------- 10
151
+ // Build a successful WebFetchOutcome with redactions and budget
152
+ // enforcement applied to the captured fields.
153
+ return buildSuccessOutcome({
154
+ args: a,
155
+ capture,
156
+ lastUrl: result.lastUrl,
157
+ body: result.body,
158
+ bytesReceived: result.bytesReceived,
159
+ truncated: result.truncated,
160
+ ...(result.truncatedAt !== undefined
161
+ ? { truncatedAt: result.truncatedAt }
162
+ : {}),
163
+ contentType: result.contentType,
164
+ now,
165
+ t0,
166
+ });
167
+ }
168
+ catch (err) {
169
+ // Runaway exception: surface as "network" so the caller still gets
170
+ // a typed outcome instead of a thrown Error.
171
+ if (timedOut) {
172
+ return errorOutcome({
173
+ requestedUrl: a.url,
174
+ finalUrl: lastUrl,
175
+ mode: a.responseMode,
176
+ capture,
177
+ error: timeoutError(lastUrl, t0, now),
178
+ now,
179
+ t0,
180
+ includeHeaders: a.includeHeaders,
181
+ includeTls: a.includeTls,
182
+ includeTiming: a.includeTiming,
183
+ includeRedirectChain: a.includeRedirectChain,
184
+ redactSensitive: a.redactSensitive,
185
+ });
186
+ }
187
+ return errorOutcome({
188
+ requestedUrl: a.url,
189
+ finalUrl: lastUrl,
190
+ mode: a.responseMode,
191
+ capture,
192
+ error: networkError(lastUrl, err),
193
+ now,
194
+ t0,
195
+ includeHeaders: a.includeHeaders,
196
+ includeTls: a.includeTls,
197
+ includeTiming: a.includeTiming,
198
+ includeRedirectChain: a.includeRedirectChain,
199
+ redactSensitive: a.redactSensitive,
200
+ });
201
+ }
202
+ finally {
203
+ clearTimeout(timeoutHandle);
204
+ }
205
+ }
206
+ /**
207
+ * Synchronously validate {@link WebFetchArgs} against Requirements 2.1,
208
+ * 2.2, 2.12, 2.13, 2.33, 2.34, 5.4, and 7.1–7.3.
209
+ *
210
+ * Returns the normalised arg bundle on success. On the first violation
211
+ * encountered, returns a `validation` error whose message names the
212
+ * offending argument and the rule that was broken.
213
+ */
214
+ function validateArgs(args) {
215
+ // url: required string, parses as absolute URL with http(s) scheme,
216
+ // no whitespace, no ASCII control chars (Requirements 2.1, 2.12, 7.1, 7.3).
217
+ if (typeof args.url !== "string" || args.url.length === 0) {
218
+ return validationError("url is required and must be a non-empty string");
219
+ }
220
+ if (/\s/.test(args.url)) {
221
+ return validationError("url must not contain whitespace characters (Requirement 7.3)");
222
+ }
223
+ if (/[\u0000-\u001f\u007f]/.test(args.url)) {
224
+ return validationError("url must not contain ASCII control characters (Requirement 7.3)");
225
+ }
226
+ if (!isAllowedScheme(args.url)) {
227
+ return {
228
+ ok: false,
229
+ error: {
230
+ kind: "blocked-scheme",
231
+ message: `Refusing scheme: ${schemeOf(args.url)}`,
232
+ url: args.url,
233
+ },
234
+ };
235
+ }
236
+ // maxBytes: optional integer in [MIN_MAX_BYTES, MAX_MAX_BYTES] (2.2, 2.13).
237
+ let maxBytes = DEFAULT_MAX_BYTES;
238
+ if (args.maxBytes !== undefined) {
239
+ if (typeof args.maxBytes !== "number" ||
240
+ !Number.isInteger(args.maxBytes) ||
241
+ args.maxBytes < MIN_MAX_BYTES ||
242
+ args.maxBytes > MAX_MAX_BYTES) {
243
+ return validationError(`maxBytes must be an integer in [${MIN_MAX_BYTES}, ${MAX_MAX_BYTES}]`);
244
+ }
245
+ maxBytes = args.maxBytes;
246
+ }
247
+ // includeHeaders: optional boolean (Requirement 2.34).
248
+ if (args.includeHeaders !== undefined && typeof args.includeHeaders !== "boolean") {
249
+ return validationError("includeHeaders must be a boolean");
250
+ }
251
+ const includeHeaders = args.includeHeaders ?? DEFAULT_INCLUDE_HEADERS;
252
+ // includeTls: optional boolean. Default depends on scheme: true for
253
+ // https://, false for http:// (Requirement 2.16, 2.34).
254
+ if (args.includeTls !== undefined && typeof args.includeTls !== "boolean") {
255
+ return validationError("includeTls must be a boolean");
256
+ }
257
+ const parsedUrl = new URL(args.url);
258
+ const isHttps = parsedUrl.protocol === "https:";
259
+ const includeTls = args.includeTls ?? isHttps;
260
+ // includeTiming: optional boolean (Requirement 2.34).
261
+ if (args.includeTiming !== undefined && typeof args.includeTiming !== "boolean") {
262
+ return validationError("includeTiming must be a boolean");
263
+ }
264
+ const includeTiming = args.includeTiming ?? DEFAULT_INCLUDE_TIMING;
265
+ // includeRedirectChain: optional boolean (Requirement 2.34).
266
+ if (args.includeRedirectChain !== undefined &&
267
+ typeof args.includeRedirectChain !== "boolean") {
268
+ return validationError("includeRedirectChain must be a boolean");
269
+ }
270
+ const includeRedirectChain = args.includeRedirectChain ?? DEFAULT_INCLUDE_REDIRECT_CHAIN;
271
+ // responseMode: optional, must be "readable" or "raw" (Requirement 2.33).
272
+ if (args.responseMode !== undefined) {
273
+ if (typeof args.responseMode !== "string" ||
274
+ !RESPONSE_MODES.includes(args.responseMode)) {
275
+ return validationError(`responseMode must be one of: ${RESPONSE_MODES.join(", ")}`);
276
+ }
277
+ }
278
+ const responseMode = args.responseMode ?? DEFAULT_RESPONSE_MODE;
279
+ // redactSensitive: optional boolean (Requirement 2.34).
280
+ if (args.redactSensitive !== undefined &&
281
+ typeof args.redactSensitive !== "boolean") {
282
+ return validationError("redactSensitive must be a boolean");
283
+ }
284
+ const redactSensitive = args.redactSensitive ?? DEFAULT_REDACT_SENSITIVE;
285
+ return {
286
+ ok: true,
287
+ value: {
288
+ url: args.url,
289
+ maxBytes,
290
+ includeHeaders,
291
+ includeTls,
292
+ includeTiming,
293
+ includeRedirectChain,
294
+ responseMode,
295
+ redactSensitive,
296
+ },
297
+ };
298
+ }
299
+ function validationError(message) {
300
+ return {
301
+ ok: false,
302
+ error: { kind: "validation", message },
303
+ };
304
+ }
305
+ /**
306
+ * Return `responseMode` resolved against the default. Used in the
307
+ * pre-validation error path where {@link NormalisedArgs} is not
308
+ * available yet but the metadata still needs a `mode` value.
309
+ */
310
+ function resolveResponseMode(mode) {
311
+ if (mode === undefined)
312
+ return DEFAULT_RESPONSE_MODE;
313
+ if (RESPONSE_MODES.includes(mode))
314
+ return mode;
315
+ return DEFAULT_RESPONSE_MODE;
316
+ }
317
+ /**
318
+ * Best-effort extraction of the scheme prefix for a URL string that may
319
+ * not parse cleanly. Used only inside `blocked-scheme` error messages.
320
+ */
321
+ function schemeOf(raw) {
322
+ const m = raw.match(/^([a-z][a-z0-9+.\-]*):/i);
323
+ return m && typeof m[1] === "string" ? `${m[1]}:` : raw;
324
+ }
325
+ /**
326
+ * Run up to {@link MAX_REDIRECT_HOPS} request hops. Each hop:
327
+ *
328
+ * - parses the current URL
329
+ * - re-applies the SSRF pre-check on the hostname literal
330
+ * - resolves the hostname via {@link DnsLookupFn}, captures `dnsMs`
331
+ * and the resolved IP
332
+ * - re-applies the SSRF check on the resolved IP
333
+ * - builds a pinned-IP `https/http.request` with a custom `lookup`
334
+ * callback that returns the resolved IP synchronously
335
+ * - on a 3xx with `Location`, appends a redirect hop and loops
336
+ * - on a binary content type, returns `binary-content`
337
+ * - on a 4xx/5xx terminal, reads up to
338
+ * {@link HTTP_ERROR_BODY_PREVIEW_BYTES} bytes for the preview and
339
+ * returns `http-error`
340
+ * - on a 2xx terminal, reads the body up to `maxBytes` and returns
341
+ * success
342
+ */
343
+ async function runRequestLoop(ctx) {
344
+ let currentUrl = ctx.args.url;
345
+ // We allow up to (1 initial + MAX_REDIRECT_HOPS redirects) requests:
346
+ // hop indices 0..MAX_REDIRECT_HOPS inclusive. A redirect produced on
347
+ // the final allowed iteration triggers the `redirect-limit` error
348
+ // because following it would exceed the cap (Requirement 2.14,
349
+ // Property 6).
350
+ for (let hop = 0; hop <= MAX_REDIRECT_HOPS; hop++) {
351
+ // Re-validate at every hop. Requirement 2.11 + design "Pipeline
352
+ // steps in detail" §8: each hop independently runs validation +
353
+ // SSRF + DNS.
354
+ if (!isAllowedScheme(currentUrl)) {
355
+ return {
356
+ ok: false,
357
+ lastUrl: currentUrl,
358
+ error: {
359
+ kind: "blocked-scheme",
360
+ message: `Refusing scheme: ${schemeOf(currentUrl)}`,
361
+ url: currentUrl,
362
+ },
363
+ };
364
+ }
365
+ let parsed;
366
+ try {
367
+ parsed = new URL(currentUrl);
368
+ }
369
+ catch {
370
+ return {
371
+ ok: false,
372
+ lastUrl: currentUrl,
373
+ error: {
374
+ kind: "validation",
375
+ message: `web.fetch: redirect target is not a valid URL: ${currentUrl}`,
376
+ url: currentUrl,
377
+ },
378
+ };
379
+ }
380
+ const hostname = parsed.hostname.replace(/^\[|\]$/g, "");
381
+ ctx.capture.setHopContext(hostname);
382
+ // SSRF pre-check on the hostname literal so e.g. https://127.0.0.1
383
+ // fails before any DNS work is done.
384
+ const hostClass = classifyHost(hostname);
385
+ if (hostClass !== null) {
386
+ return {
387
+ ok: false,
388
+ lastUrl: currentUrl,
389
+ error: {
390
+ kind: "blocked-address",
391
+ message: `Refusing to fetch ${hostClass.class} address ${hostname} (host=${hostname})`,
392
+ url: currentUrl,
393
+ },
394
+ };
395
+ }
396
+ // DNS resolve + pin IP for the actual TCP connect. Requirement 2.8
397
+ // / 2.11: the SSRF check is run against the resolved IP, and the
398
+ // socket is then connected to that exact IP via a custom `lookup`
399
+ // callback so DNS rebinding can not swap the address out from
400
+ // under us between resolve and connect.
401
+ const dnsStart = ctx.now();
402
+ let resolvedIp;
403
+ let resolvedFamily;
404
+ try {
405
+ const result = await ctx.dnsLookupFn(hostname, { family: 0 });
406
+ resolvedIp = result.address;
407
+ resolvedFamily = (result.family === 6 ? 6 : 4);
408
+ }
409
+ catch (err) {
410
+ return {
411
+ ok: false,
412
+ lastUrl: currentUrl,
413
+ error: networkError(currentUrl, err, "DNS resolution failed"),
414
+ };
415
+ }
416
+ const dnsMs = ctx.now() - dnsStart;
417
+ ctx.capture.markDnsResolved(dnsMs, resolvedIp);
418
+ const ipClass = classifyIp(resolvedIp);
419
+ if (ipClass !== null) {
420
+ return {
421
+ ok: false,
422
+ lastUrl: currentUrl,
423
+ error: {
424
+ kind: "blocked-address",
425
+ message: `Refusing to fetch ${ipClass.class} address ${resolvedIp} (host=${hostname})`,
426
+ url: currentUrl,
427
+ },
428
+ };
429
+ }
430
+ // Issue the pinned-IP request and consume the response.
431
+ const hopResult = await issueHop({
432
+ ctx,
433
+ currentUrl,
434
+ parsed,
435
+ resolvedIp,
436
+ resolvedFamily,
437
+ hop,
438
+ });
439
+ if (hopResult.kind === "redirect") {
440
+ // Append the *current* hop to the chain and follow.
441
+ ctx.capture.addRedirectHop(currentUrl, hopResult.status, hopResult.location);
442
+ // Resolve next URL: handle relative Locations against the
443
+ // *current* hop's URL.
444
+ let nextUrl;
445
+ try {
446
+ nextUrl = new URL(hopResult.location, parsed).toString();
447
+ }
448
+ catch {
449
+ return {
450
+ ok: false,
451
+ lastUrl: currentUrl,
452
+ error: {
453
+ kind: "validation",
454
+ message: `web.fetch: redirect Location is not a valid URL: ${hopResult.location}`,
455
+ url: currentUrl,
456
+ },
457
+ };
458
+ }
459
+ if (hop + 1 > MAX_REDIRECT_HOPS) {
460
+ return {
461
+ ok: false,
462
+ lastUrl: nextUrl,
463
+ error: {
464
+ kind: "redirect-limit",
465
+ message: `web.fetch: exceeded ${MAX_REDIRECT_HOPS}-redirect limit (last url=${nextUrl})`,
466
+ url: nextUrl,
467
+ },
468
+ };
469
+ }
470
+ currentUrl = nextUrl;
471
+ continue;
472
+ }
473
+ // Terminal hop (2xx/4xx/5xx or transport failure). Append the
474
+ // final hop to the redirect chain so callers see the complete
475
+ // path, then return.
476
+ if (hopResult.kind === "terminal") {
477
+ ctx.capture.addRedirectHop(currentUrl, hopResult.status);
478
+ return {
479
+ ok: true,
480
+ lastUrl: currentUrl,
481
+ contentType: hopResult.contentType,
482
+ body: hopResult.body,
483
+ bytesReceived: hopResult.bytesReceived,
484
+ truncated: hopResult.truncated,
485
+ ...(hopResult.truncatedAt !== undefined
486
+ ? { truncatedAt: hopResult.truncatedAt }
487
+ : {}),
488
+ };
489
+ }
490
+ // hopResult.kind === "error"
491
+ return {
492
+ ok: false,
493
+ lastUrl: currentUrl,
494
+ error: hopResult.error,
495
+ };
496
+ }
497
+ // Should be unreachable — the loop body either returns or sets
498
+ // currentUrl and continues. A defensive fallback keeps TS happy.
499
+ return {
500
+ ok: false,
501
+ lastUrl: currentUrl,
502
+ error: {
503
+ kind: "redirect-limit",
504
+ message: `web.fetch: exceeded ${MAX_REDIRECT_HOPS}-redirect limit (last url=${currentUrl})`,
505
+ url: currentUrl,
506
+ },
507
+ };
508
+ }
509
+ /**
510
+ * Issue a single GET request to `parsed.href` while pinning the TCP
511
+ * connection to `resolvedIp` via a custom `lookup` callback.
512
+ *
513
+ * The function handles every shape the response can take:
514
+ * - 3xx with a `Location` header → returns `{kind: "redirect"}`
515
+ * - 3xx without a `Location` → treated as a terminal 3xx
516
+ * - binary content type → returns a `binary-content` error
517
+ * - 4xx / 5xx → reads up to
518
+ * {@link HTTP_ERROR_BODY_PREVIEW_BYTES} bytes for the preview and
519
+ * returns an `http-error` error (Requirement 6.4)
520
+ * - 2xx → reads body up to `args.maxBytes`
521
+ * and returns `{kind: "terminal"}`
522
+ *
523
+ * Timing for `tcpMs`, `tlsMs`, and `ttfbMs` is recorded on the
524
+ * shared {@link Capture} and corresponds to the *current* hop. The
525
+ * builder always reflects the *last* hop's measurements (per its
526
+ * documented per-hop semantics).
527
+ */
528
+ async function issueHop(input) {
529
+ const { ctx, currentUrl, parsed, resolvedIp, resolvedFamily } = input;
530
+ const isHttps = parsed.protocol === "https:";
531
+ const requestFn = isHttps ? ctx.httpsRequestFn : ctx.httpRequestFn;
532
+ const dnsEndedAt = ctx.now();
533
+ const requestOptions = {
534
+ method: "GET",
535
+ signal: ctx.controller.signal,
536
+ headers: {
537
+ // Identify ourselves and ask the server for prose-friendly bodies.
538
+ "user-agent": DEFAULT_USER_AGENT,
539
+ accept: "*/*",
540
+ "accept-encoding": "identity",
541
+ // Honor the URL's hostname for SNI and the Host header even though
542
+ // the socket is connecting to `resolvedIp`.
543
+ host: parsed.host,
544
+ },
545
+ // Pinned-IP lookup. Returns `resolvedIp` synchronously so the
546
+ // request's socket connects to the exact address the SSRF guard
547
+ // already classified.
548
+ lookup: pinnedLookup(resolvedIp, resolvedFamily),
549
+ };
550
+ return new Promise((resolve) => {
551
+ let req;
552
+ try {
553
+ req = requestFn(parsed, requestOptions);
554
+ }
555
+ catch (err) {
556
+ resolve({
557
+ kind: "error",
558
+ error: networkError(currentUrl, err),
559
+ });
560
+ return;
561
+ }
562
+ let socketObserved = false;
563
+ let connectAt;
564
+ let secureAt;
565
+ let requestSentAt;
566
+ let settled = false;
567
+ const finish = (outcome) => {
568
+ if (settled)
569
+ return;
570
+ settled = true;
571
+ resolve(outcome);
572
+ };
573
+ req.on("socket", (socket) => {
574
+ if (socketObserved)
575
+ return;
576
+ socketObserved = true;
577
+ // `lookup` event fires once DNS has been resolved (our pinned
578
+ // lookup fires it synchronously). We do not record `dnsMs` here
579
+ // because we already measured it around `dnsLookupFn`.
580
+ socket.once("connect", () => {
581
+ connectAt = ctx.now();
582
+ const tcpMs = connectAt - dnsEndedAt;
583
+ ctx.capture.markTcpConnected(tcpMs);
584
+ });
585
+ if (isHttps) {
586
+ // `secureConnect` is emitted by `tls.TLSSocket` once the
587
+ // handshake completes.
588
+ socket.once("secureConnect", () => {
589
+ secureAt = ctx.now();
590
+ if (connectAt !== undefined) {
591
+ const tlsMs = secureAt - connectAt;
592
+ ctx.capture.markTlsHandshaked(tlsMs, socket);
593
+ }
594
+ });
595
+ }
596
+ });
597
+ req.on("error", (err) => {
598
+ // AbortController-driven aborts surface as `AbortError`.
599
+ if (ctx.controller.signal.aborted) {
600
+ finish({
601
+ kind: "error",
602
+ error: timeoutError(currentUrl, ctx.t0, ctx.now),
603
+ });
604
+ return;
605
+ }
606
+ finish({
607
+ kind: "error",
608
+ error: networkError(currentUrl, err),
609
+ });
610
+ });
611
+ req.on("response", (res) => {
612
+ const ttfbMs = (() => {
613
+ if (typeof requestSentAt === "number") {
614
+ return ctx.now() - requestSentAt;
615
+ }
616
+ return ctx.now() - dnsEndedAt;
617
+ })();
618
+ const status = typeof res.statusCode === "number" ? res.statusCode : 0;
619
+ const headers = res.headers;
620
+ ctx.capture.markResponse(status, headers, ttfbMs);
621
+ // Capture every Set-Cookie header value (parsed individually)
622
+ // for the cookies array. Node returns a string[] for `set-cookie`
623
+ // when there are multiple lines.
624
+ const setCookieValues = collectSetCookieValues(headers);
625
+ for (const value of setCookieValues) {
626
+ ctx.capture.addSetCookieHeader(value);
627
+ }
628
+ const contentType = headerString(headers["content-type"]);
629
+ // Redirect handling (Requirement 2.11/2.14).
630
+ if (status >= 300 && status < 400 && REDIRECT_STATUSES.has(status)) {
631
+ const location = headerString(headers["location"]);
632
+ if (typeof location === "string" && location.length > 0) {
633
+ // Drain the response body to free the socket.
634
+ res.resume();
635
+ finish({ kind: "redirect", status, location });
636
+ return;
637
+ }
638
+ // 3xx without Location: fall through and treat as terminal.
639
+ }
640
+ // Binary content rejection (Requirements 2.9 + 2.30) before we
641
+ // read any body bytes — including in `responseMode="raw"`.
642
+ if (typeof contentType === "string" &&
643
+ BINARY_CONTENT_TYPE_PATTERNS.some((re) => re.test(contentType))) {
644
+ res.resume();
645
+ finish({
646
+ kind: "error",
647
+ error: {
648
+ kind: "binary-content",
649
+ message: `Refusing binary content type: ${contentType}`,
650
+ url: currentUrl,
651
+ status,
652
+ },
653
+ });
654
+ return;
655
+ }
656
+ // HTTP error (Requirement 6.4). Read up to 4 KiB for the preview
657
+ // and surface as `http-error`.
658
+ if (status >= 400 && status < 600) {
659
+ readBody(res, HTTP_ERROR_BODY_PREVIEW_BYTES, ctx.controller).then(({ body, truncated, bytesReceived }) => {
660
+ const preview = renderBodyPreview(body, truncated, bytesReceived);
661
+ finish({
662
+ kind: "error",
663
+ error: {
664
+ kind: "http-error",
665
+ message: `${status} ${currentUrl}`,
666
+ status,
667
+ url: currentUrl,
668
+ bodyPreview: preview,
669
+ },
670
+ });
671
+ }, (err) => {
672
+ if (ctx.controller.signal.aborted) {
673
+ finish({
674
+ kind: "error",
675
+ error: timeoutError(currentUrl, ctx.t0, ctx.now),
676
+ });
677
+ return;
678
+ }
679
+ finish({
680
+ kind: "error",
681
+ error: networkError(currentUrl, err),
682
+ });
683
+ });
684
+ return;
685
+ }
686
+ // Successful (2xx or non-Location 3xx) terminal hop.
687
+ readBody(res, ctx.args.maxBytes, ctx.controller).then(({ body, truncated, bytesReceived }) => {
688
+ const text = classifyAndDecodeBody({
689
+ mode: ctx.args.responseMode,
690
+ contentType,
691
+ body,
692
+ maxBytes: ctx.args.maxBytes,
693
+ });
694
+ finish({
695
+ kind: "terminal",
696
+ status,
697
+ contentType,
698
+ body: text,
699
+ bytesReceived,
700
+ truncated,
701
+ ...(truncated ? { truncatedAt: bytesReceived } : {}),
702
+ });
703
+ }, (err) => {
704
+ if (ctx.controller.signal.aborted) {
705
+ finish({
706
+ kind: "error",
707
+ error: timeoutError(currentUrl, ctx.t0, ctx.now),
708
+ });
709
+ return;
710
+ }
711
+ finish({
712
+ kind: "error",
713
+ error: networkError(currentUrl, err),
714
+ });
715
+ });
716
+ });
717
+ // Mark "request sent" right before flushing the headers. For a GET
718
+ // with no body, `req.end()` returns immediately after writing the
719
+ // header block to the socket buffer, so the synchronous timestamp
720
+ // is the closest non-platform-specific approximation of "the
721
+ // moment we sent the request."
722
+ requestSentAt = ctx.now();
723
+ req.end();
724
+ });
725
+ }
726
+ /**
727
+ * Build a Node `lookup` callback that synchronously resolves to
728
+ * `resolvedIp` so the socket connects to the IP the SSRF guard already
729
+ * classified. The callback signature matches `dns.LookupOneOptions`
730
+ * (with `all: false`), the form Node's `http`/`https` modules use by
731
+ * default.
732
+ */
733
+ function pinnedLookup(resolvedIp, family) {
734
+ return function lookup(_hostname, _options, callback) {
735
+ callback(null, resolvedIp, family);
736
+ };
737
+ }
738
+ /**
739
+ * Collect every `Set-Cookie` line from {@link IncomingHttpHeaders}.
740
+ * Node returns a `string[]` when the header was sent multiple times,
741
+ * which is the common case for cookie-setting endpoints; we normalise
742
+ * the single-string form to a one-element array.
743
+ */
744
+ function collectSetCookieValues(headers) {
745
+ const value = headers["set-cookie"];
746
+ if (Array.isArray(value))
747
+ return value.filter((v) => typeof v === "string");
748
+ if (typeof value === "string")
749
+ return [value];
750
+ return [];
751
+ }
752
+ /**
753
+ * Pick a single string out of an `IncomingHttpHeaders` value that
754
+ * Node may give us as `string | string[] | undefined`. Returns
755
+ * `undefined` if the header was not sent.
756
+ */
757
+ function headerString(value) {
758
+ if (typeof value === "string")
759
+ return value;
760
+ if (Array.isArray(value))
761
+ return value.join(", ");
762
+ return undefined;
763
+ }
764
+ // ---------------------------------------------------------------------------
765
+ // Body streaming + classification
766
+ // ---------------------------------------------------------------------------
767
+ /**
768
+ * Read up to `maxBytes` from `res`, aborting the underlying request via
769
+ * `controller` once the cap is hit so the socket is freed instead of
770
+ * draining the whole response.
771
+ *
772
+ * Returns the collected `Buffer`, the byte count, and a `truncated`
773
+ * flag. Listener cleanup is handled in `finally` so no event emitter
774
+ * leaks if the caller's body classifier subsequently throws.
775
+ */
776
+ function readBody(res, maxBytes, _controller) {
777
+ return new Promise((resolve, reject) => {
778
+ const chunks = [];
779
+ let bytesReceived = 0;
780
+ let truncated = false;
781
+ let settled = false;
782
+ const onData = (chunk) => {
783
+ if (settled)
784
+ return;
785
+ const remaining = maxBytes - bytesReceived;
786
+ if (remaining <= 0) {
787
+ truncated = true;
788
+ bytesReceived = maxBytes;
789
+ cleanup();
790
+ try {
791
+ res.destroy();
792
+ }
793
+ catch {
794
+ // ignore — we're abandoning the socket deliberately
795
+ }
796
+ settled = true;
797
+ resolve({
798
+ body: Buffer.concat(chunks, bytesReceived),
799
+ truncated,
800
+ bytesReceived,
801
+ });
802
+ return;
803
+ }
804
+ if (chunk.byteLength > remaining) {
805
+ chunks.push(chunk.subarray(0, remaining));
806
+ bytesReceived += remaining;
807
+ truncated = true;
808
+ cleanup();
809
+ try {
810
+ res.destroy();
811
+ }
812
+ catch {
813
+ // ignore
814
+ }
815
+ settled = true;
816
+ resolve({
817
+ body: Buffer.concat(chunks, bytesReceived),
818
+ truncated,
819
+ bytesReceived,
820
+ });
821
+ return;
822
+ }
823
+ chunks.push(chunk);
824
+ bytesReceived += chunk.byteLength;
825
+ };
826
+ const onEnd = () => {
827
+ if (settled)
828
+ return;
829
+ settled = true;
830
+ cleanup();
831
+ resolve({
832
+ body: Buffer.concat(chunks, bytesReceived),
833
+ truncated,
834
+ bytesReceived,
835
+ });
836
+ };
837
+ const onError = (err) => {
838
+ if (settled)
839
+ return;
840
+ settled = true;
841
+ cleanup();
842
+ reject(err);
843
+ };
844
+ function cleanup() {
845
+ res.removeListener("data", onData);
846
+ res.removeListener("end", onEnd);
847
+ res.removeListener("error", onError);
848
+ }
849
+ res.on("data", onData);
850
+ res.once("end", onEnd);
851
+ res.once("error", onError);
852
+ });
853
+ }
854
+ /**
855
+ * Decode the response body bytes into the string surfaced by
856
+ * {@link WebFetchOutcome.body}.
857
+ *
858
+ * Decision matrix:
859
+ * - `mode = "raw"` → UTF-8 (replace) up to `maxBytes`
860
+ * (Requirement 2.29).
861
+ * - `mode = "readable"` AND
862
+ * content-type is HTML/XHTML → run {@link toReadableText} so chrome
863
+ * and non-rendering content are
864
+ * stripped (Requirements 2.4, 2.28).
865
+ * - `mode = "readable"` AND
866
+ * non-HTML text → UTF-8 (replace) up to `maxBytes`
867
+ * (Requirement 2.5).
868
+ *
869
+ * The `body` arg has already been truncated to `maxBytes` by
870
+ * {@link readBody}, so HTML conversion only ever runs on bytes that
871
+ * are already capped (Property 7).
872
+ */
873
+ function classifyAndDecodeBody(input) {
874
+ const decoded = decodeUtf8WithReplacement(input.body);
875
+ if (input.mode === "raw")
876
+ return decoded;
877
+ if (typeof input.contentType === "string" &&
878
+ HTML_CONTENT_TYPE_PATTERN.test(input.contentType)) {
879
+ return toReadableText(decoded);
880
+ }
881
+ return decoded;
882
+ }
883
+ /**
884
+ * UTF-8 decoder with replacement for invalid byte sequences. Node's
885
+ * built-in `TextDecoder` is the most reliable way to do this without
886
+ * pulling in `iconv-lite`.
887
+ */
888
+ function decodeUtf8WithReplacement(buf) {
889
+ return new TextDecoder("utf-8", { fatal: false }).decode(buf);
890
+ }
891
+ /**
892
+ * Render the body preview included in `http-error` outcomes
893
+ * (Requirement 6.4). The preview is decoded as UTF-8 with replacement
894
+ * and capped at {@link HTTP_ERROR_BODY_PREVIEW_BYTES}; when the
895
+ * underlying body was truncated we append the standard truncation
896
+ * marker so the agent can tell it did not see the full response.
897
+ */
898
+ function renderBodyPreview(body, truncated, _bytesReceived) {
899
+ const text = decodeUtf8WithReplacement(body);
900
+ if (!truncated)
901
+ return text;
902
+ return `${text}${TRUNCATION_MARKER}`;
903
+ }
904
+ /**
905
+ * Compose a successful {@link WebFetchOutcome} from the captured
906
+ * fields, applying redaction and the 64 KiB metadata budget.
907
+ *
908
+ * Implements the design's "Pipeline steps in detail" §11–12: redact
909
+ * before metadata assembly, then run `budget.enforce` so the final
910
+ * `metadata.budget.metadataBytes` reflects the size of the *trimmed*
911
+ * payload.
912
+ */
913
+ function buildSuccessOutcome(input) {
914
+ const totalMs = input.now() - input.t0;
915
+ const captured = input.capture.finalize(totalMs);
916
+ return {
917
+ ok: true,
918
+ metadata: assembleMetadata({
919
+ args: input.args,
920
+ captured,
921
+ requestedUrl: input.args.url,
922
+ finalUrl: input.lastUrl,
923
+ status: captured.status,
924
+ contentType: input.contentType,
925
+ bytesReceived: input.bytesReceived,
926
+ truncated: input.truncated,
927
+ ...(input.truncatedAt !== undefined
928
+ ? { truncatedAt: input.truncatedAt }
929
+ : {}),
930
+ }),
931
+ body: input.body,
932
+ };
933
+ }
934
+ /**
935
+ * Compose an `ok=false` {@link WebFetchOutcome}.
936
+ *
937
+ * The metadata envelope is always populated. Pipeline stages that ran
938
+ * before the failure surface their captured values (e.g. `resolvedIp`
939
+ * after a successful DNS lookup but a `blocked-address` IP); stages
940
+ * that did not run carry default zero/empty values. This keeps the
941
+ * audit-log payload built downstream uniform regardless of where the
942
+ * failure surfaced.
943
+ */
944
+ function errorOutcome(input) {
945
+ const totalMs = input.now() - input.t0;
946
+ const captured = input.capture !== undefined ? input.capture.finalize(totalMs) : undefined;
947
+ const includeHeaders = input.includeHeaders ?? DEFAULT_INCLUDE_HEADERS;
948
+ const includeTiming = input.includeTiming ?? DEFAULT_INCLUDE_TIMING;
949
+ const includeRedirectChain = input.includeRedirectChain ?? DEFAULT_INCLUDE_REDIRECT_CHAIN;
950
+ // For TLS, default to whether the captured fields produced one (which
951
+ // implies the URL was https and the handshake completed).
952
+ const includeTls = input.includeTls ?? captured?.tls !== undefined;
953
+ const redactSensitive = input.redactSensitive ?? DEFAULT_REDACT_SENSITIVE;
954
+ const args = {
955
+ url: input.requestedUrl,
956
+ maxBytes: DEFAULT_MAX_BYTES,
957
+ includeHeaders,
958
+ includeTls,
959
+ includeTiming,
960
+ includeRedirectChain,
961
+ responseMode: input.mode,
962
+ redactSensitive,
963
+ };
964
+ const metadata = captured
965
+ ? assembleMetadata({
966
+ args,
967
+ captured,
968
+ requestedUrl: input.requestedUrl,
969
+ finalUrl: input.finalUrl,
970
+ status: input.error.status ?? captured.status ?? 0,
971
+ contentType: undefined,
972
+ bytesReceived: 0,
973
+ truncated: false,
974
+ })
975
+ : assembleEmptyMetadata({
976
+ args,
977
+ requestedUrl: input.requestedUrl,
978
+ finalUrl: input.finalUrl,
979
+ status: input.error.status ?? 0,
980
+ });
981
+ return {
982
+ ok: false,
983
+ metadata,
984
+ body: "",
985
+ error: input.error,
986
+ };
987
+ }
988
+ /**
989
+ * Build a {@link WebFetchMetadata} envelope from a {@link CapturedFields}
990
+ * snapshot.
991
+ *
992
+ * Honors the `include*` flags from {@link NormalisedArgs}: setting a
993
+ * flag to `false` strips the corresponding optional field from the
994
+ * envelope (Requirements 2.15–2.18, 2.24). Sensitive headers / cookie
995
+ * values are redacted by `applyToHeaders` / `applyToCookies` before
996
+ * the 64 KiB budget loop runs in {@link enforceBudget}.
997
+ */
998
+ function assembleMetadata(input) {
999
+ const { args, captured } = input;
1000
+ const headersIn = args.includeHeaders ? captured.headers : undefined;
1001
+ const cookiesIn = captured.cookies;
1002
+ const redactedHeaders = headersIn !== undefined
1003
+ ? applyToHeaders(headersIn, args.redactSensitive)
1004
+ : undefined;
1005
+ const redactedCookies = applyToCookies(cookiesIn, args.redactSensitive);
1006
+ const tlsIn = args.includeTls ? captured.tls : undefined;
1007
+ const timingIn = args.includeTiming ? captured.timing : undefined;
1008
+ const redirectChainIn = args.includeRedirectChain
1009
+ ? captured.redirectChain
1010
+ : undefined;
1011
+ const budgeted = enforceBudget({
1012
+ ...(redactedHeaders !== undefined ? { headers: redactedHeaders } : {}),
1013
+ ...(tlsIn !== undefined ? { tls: tlsIn } : {}),
1014
+ ...(timingIn !== undefined ? { timing: timingIn } : {}),
1015
+ ...(redirectChainIn !== undefined ? { redirectChain: redirectChainIn } : {}),
1016
+ cookies: redactedCookies,
1017
+ });
1018
+ const meta = {
1019
+ requestedUrl: input.requestedUrl,
1020
+ finalUrl: input.finalUrl,
1021
+ status: input.status,
1022
+ resolvedIp: captured.resolvedIp,
1023
+ finalHostname: captured.finalHostname,
1024
+ mode: args.responseMode,
1025
+ bytesReceived: input.bytesReceived,
1026
+ truncated: input.truncated,
1027
+ budget: { metadataBytes: budgeted.metadataBytes, cap: METADATA_BUDGET_BYTES },
1028
+ };
1029
+ if (input.contentType !== undefined)
1030
+ meta.contentType = input.contentType;
1031
+ if (input.truncatedAt !== undefined)
1032
+ meta.truncatedAt = input.truncatedAt;
1033
+ if (budgeted.headers !== undefined)
1034
+ meta.headers = budgeted.headers;
1035
+ if (budgeted.tls !== undefined)
1036
+ meta.tls = budgeted.tls;
1037
+ if (budgeted.timing !== undefined)
1038
+ meta.timing = budgeted.timing;
1039
+ if (budgeted.redirectChain !== undefined)
1040
+ meta.redirectChain = budgeted.redirectChain;
1041
+ if (budgeted.cookies !== undefined)
1042
+ meta.cookies = budgeted.cookies;
1043
+ return meta;
1044
+ }
1045
+ /**
1046
+ * Build a minimal {@link WebFetchMetadata} envelope for failures that
1047
+ * surfaced before any transport-level capture happened (argument
1048
+ * validation, blocked scheme on the entry URL, etc.).
1049
+ */
1050
+ function assembleEmptyMetadata(input) {
1051
+ const emptyTiming = { dnsMs: 0, tcpMs: 0, ttfbMs: 0, totalMs: 0 };
1052
+ const budgeted = enforceBudget({
1053
+ ...(input.args.includeHeaders ? { headers: {} } : {}),
1054
+ ...(input.args.includeTiming ? { timing: emptyTiming } : {}),
1055
+ ...(input.args.includeRedirectChain
1056
+ ? { redirectChain: [] }
1057
+ : {}),
1058
+ cookies: [],
1059
+ });
1060
+ const meta = {
1061
+ requestedUrl: input.requestedUrl,
1062
+ finalUrl: input.finalUrl,
1063
+ status: input.status,
1064
+ resolvedIp: "",
1065
+ finalHostname: tryHostname(input.finalUrl),
1066
+ mode: input.args.responseMode,
1067
+ bytesReceived: 0,
1068
+ truncated: false,
1069
+ budget: { metadataBytes: budgeted.metadataBytes, cap: METADATA_BUDGET_BYTES },
1070
+ };
1071
+ if (budgeted.headers !== undefined)
1072
+ meta.headers = budgeted.headers;
1073
+ if (budgeted.timing !== undefined)
1074
+ meta.timing = budgeted.timing;
1075
+ if (budgeted.redirectChain !== undefined)
1076
+ meta.redirectChain = budgeted.redirectChain;
1077
+ if (budgeted.cookies !== undefined)
1078
+ meta.cookies = budgeted.cookies;
1079
+ return meta;
1080
+ }
1081
+ /** Best-effort hostname extraction; returns "" for malformed URLs. */
1082
+ function tryHostname(url) {
1083
+ try {
1084
+ return new URL(url).hostname;
1085
+ }
1086
+ catch {
1087
+ return "";
1088
+ }
1089
+ }
1090
+ // ---------------------------------------------------------------------------
1091
+ // Error helpers
1092
+ // ---------------------------------------------------------------------------
1093
+ /**
1094
+ * Build the `timeout` error from the design's error matrix:
1095
+ * "web.fetch: timeout after 30s (last url=…)" carrying the elapsed
1096
+ * wall-clock for callers that want to log it (Requirement 2.10).
1097
+ */
1098
+ function timeoutError(lastUrl, t0, now) {
1099
+ const elapsedMs = Math.max(0, now() - t0);
1100
+ return {
1101
+ kind: "timeout",
1102
+ message: `web.fetch: timeout after ${Math.round(FETCH_TIMEOUT_MS / 1000)}s (last url=${lastUrl}, elapsed=${elapsedMs}ms)`,
1103
+ url: lastUrl,
1104
+ };
1105
+ }
1106
+ /**
1107
+ * Build the generic `network` error used for DNS / connect / TLS
1108
+ * failures (Requirement 6.3 indirectly via the design's error matrix).
1109
+ * The optional `prefix` lets callers tag a more specific category
1110
+ * (e.g. "DNS resolution failed") in front of the underlying message.
1111
+ */
1112
+ function networkError(lastUrl, err, prefix) {
1113
+ const detail = err instanceof Error ? err.message : String(err);
1114
+ const head = typeof prefix === "string" && prefix.length > 0
1115
+ ? `${prefix}: `
1116
+ : "";
1117
+ return {
1118
+ kind: "network",
1119
+ message: `web.fetch: ${head}${detail} (url=${lastUrl})`,
1120
+ url: lastUrl,
1121
+ };
1122
+ }
1123
+ //# sourceMappingURL=fetch-core.js.map