@j0hanz/superfetch 2.5.3 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +350 -226
  2. package/dist/assets/logo.svg +24837 -24835
  3. package/dist/cache.d.ts +28 -20
  4. package/dist/cache.js +292 -514
  5. package/dist/config.d.ts +41 -7
  6. package/dist/config.js +298 -148
  7. package/dist/crypto.js +25 -12
  8. package/dist/dom-noise-removal.js +379 -421
  9. package/dist/errors.d.ts +2 -2
  10. package/dist/errors.js +25 -8
  11. package/dist/fetch.d.ts +18 -16
  12. package/dist/fetch.js +1132 -526
  13. package/dist/host-normalization.js +40 -10
  14. package/dist/http-native.js +628 -287
  15. package/dist/index.js +67 -7
  16. package/dist/instructions.md +44 -31
  17. package/dist/ip-blocklist.d.ts +8 -0
  18. package/dist/ip-blocklist.js +65 -0
  19. package/dist/json.js +14 -9
  20. package/dist/language-detection.d.ts +2 -11
  21. package/dist/language-detection.js +289 -280
  22. package/dist/markdown-cleanup.d.ts +0 -1
  23. package/dist/markdown-cleanup.js +391 -429
  24. package/dist/mcp-validator.js +4 -2
  25. package/dist/mcp.js +184 -135
  26. package/dist/observability.js +89 -21
  27. package/dist/resources.js +16 -6
  28. package/dist/server-tuning.d.ts +2 -0
  29. package/dist/server-tuning.js +25 -23
  30. package/dist/session.d.ts +1 -0
  31. package/dist/session.js +41 -33
  32. package/dist/tasks.d.ts +2 -0
  33. package/dist/tasks.js +91 -9
  34. package/dist/timer-utils.d.ts +5 -0
  35. package/dist/timer-utils.js +20 -0
  36. package/dist/tools.d.ts +28 -5
  37. package/dist/tools.js +317 -183
  38. package/dist/transform-types.d.ts +5 -1
  39. package/dist/transform.d.ts +3 -2
  40. package/dist/transform.js +1138 -421
  41. package/dist/type-guards.d.ts +1 -0
  42. package/dist/type-guards.js +7 -0
  43. package/dist/workers/transform-child.d.ts +1 -0
  44. package/dist/workers/transform-child.js +118 -0
  45. package/dist/workers/transform-worker.js +87 -78
  46. package/package.json +14 -6
package/dist/fetch.js CHANGED
@@ -1,106 +1,81 @@
1
+ import { Buffer } from 'node:buffer';
1
2
  import { randomUUID } from 'node:crypto';
2
3
  import diagnosticsChannel from 'node:diagnostics_channel';
3
4
  import dns from 'node:dns';
4
- import { BlockList, isIP } from 'node:net';
5
+ import { isIP } from 'node:net';
5
6
  import { performance } from 'node:perf_hooks';
7
+ import { Readable } from 'node:stream';
8
+ import { setTimeout as delay } from 'node:timers/promises';
9
+ import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
6
10
  import { config } from './config.js';
7
11
  import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
12
+ import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
8
13
  import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
9
- import { isObject } from './type-guards.js';
10
- function buildIpv4(parts) {
11
- return parts.join('.');
12
- }
13
- function buildIpv6(parts) {
14
- return parts.map(String).join(':');
15
- }
16
- const IPV6_ZERO = buildIpv6([0, 0, 0, 0, 0, 0, 0, 0]);
17
- const IPV6_LOOPBACK = buildIpv6([0, 0, 0, 0, 0, 0, 0, 1]);
18
- const IPV6_64_FF9B = buildIpv6(['64', 'ff9b', 0, 0, 0, 0, 0, 0]);
19
- const IPV6_64_FF9B_1 = buildIpv6(['64', 'ff9b', 1, 0, 0, 0, 0, 0]);
20
- const IPV6_2001 = buildIpv6(['2001', 0, 0, 0, 0, 0, 0, 0]);
21
- const IPV6_2002 = buildIpv6(['2002', 0, 0, 0, 0, 0, 0, 0]);
22
- const IPV6_FC00 = buildIpv6(['fc00', 0, 0, 0, 0, 0, 0, 0]);
23
- const IPV6_FE80 = buildIpv6(['fe80', 0, 0, 0, 0, 0, 0, 0]);
24
- const IPV6_FF00 = buildIpv6(['ff00', 0, 0, 0, 0, 0, 0, 0]);
25
- const BLOCKED_IPV4_SUBNETS = [
26
- { subnet: buildIpv4([0, 0, 0, 0]), prefix: 8 },
27
- { subnet: buildIpv4([10, 0, 0, 0]), prefix: 8 },
28
- { subnet: buildIpv4([100, 64, 0, 0]), prefix: 10 },
29
- { subnet: buildIpv4([127, 0, 0, 0]), prefix: 8 },
30
- { subnet: buildIpv4([169, 254, 0, 0]), prefix: 16 },
31
- { subnet: buildIpv4([172, 16, 0, 0]), prefix: 12 },
32
- { subnet: buildIpv4([192, 168, 0, 0]), prefix: 16 },
33
- { subnet: buildIpv4([224, 0, 0, 0]), prefix: 4 },
34
- { subnet: buildIpv4([240, 0, 0, 0]), prefix: 4 },
35
- ];
36
- const BLOCKED_IPV6_SUBNETS = [
37
- { subnet: IPV6_ZERO, prefix: 128 },
38
- { subnet: IPV6_LOOPBACK, prefix: 128 },
39
- { subnet: IPV6_64_FF9B, prefix: 96 },
40
- { subnet: IPV6_64_FF9B_1, prefix: 48 },
41
- { subnet: IPV6_2001, prefix: 32 },
42
- { subnet: IPV6_2002, prefix: 16 },
43
- { subnet: IPV6_FC00, prefix: 7 },
44
- { subnet: IPV6_FE80, prefix: 10 },
45
- { subnet: IPV6_FF00, prefix: 8 },
46
- ];
14
+ import { isError, isObject } from './type-guards.js';
15
+ const defaultLogger = {
16
+ debug: logDebug,
17
+ warn: logWarn,
18
+ error: logError,
19
+ };
20
+ const defaultContext = {
21
+ getRequestId,
22
+ getOperationId,
23
+ };
24
+ const defaultRedactor = {
25
+ redact: redactUrl,
26
+ };
27
+ const defaultFetch = (input, init) => globalThis.fetch(input, init);
47
28
  class IpBlocker {
48
- cachedBlockList;
29
+ security;
30
+ blockList = createDefaultBlockList();
31
+ constructor(security) {
32
+ this.security = security;
33
+ }
49
34
  isBlockedIp(candidate) {
50
- if (config.security.blockedHosts.has(candidate))
51
- return true;
52
- const ipType = this.resolveIpType(candidate);
53
- if (!ipType)
35
+ const normalized = candidate.trim().toLowerCase();
36
+ if (!normalized)
54
37
  return false;
55
- const normalized = candidate.toLowerCase();
56
- if (this.isBlockedBySubnetList(normalized, ipType))
38
+ if (this.security.blockedHosts.has(normalized))
57
39
  return true;
58
- return (config.security.blockedIpPattern.test(normalized) ||
59
- config.security.blockedIpv4MappedPattern.test(normalized));
60
- }
61
- resolveIpType(ip) {
62
- const ipType = isIP(ip);
63
- return ipType === 4 || ipType === 6 ? ipType : null;
64
- }
65
- isBlockedBySubnetList(ip, ipType) {
66
- const list = this.getBlockList();
67
- return ipType === 4 ? list.check(ip, 'ipv4') : list.check(ip, 'ipv6');
68
- }
69
- getBlockList() {
70
- if (!this.cachedBlockList) {
71
- const list = new BlockList();
72
- for (const entry of BLOCKED_IPV4_SUBNETS)
73
- list.addSubnet(entry.subnet, entry.prefix, 'ipv4');
74
- for (const entry of BLOCKED_IPV6_SUBNETS)
75
- list.addSubnet(entry.subnet, entry.prefix, 'ipv6');
76
- this.cachedBlockList = list;
77
- }
78
- return this.cachedBlockList;
40
+ const normalizedIp = normalizeIpForBlockList(normalized);
41
+ if (!normalizedIp)
42
+ return false;
43
+ return this.blockList.check(normalizedIp.ip, normalizedIp.family);
79
44
  }
80
45
  }
81
- const ipBlocker = new IpBlocker();
82
- /** Backwards-compatible export */
83
- export function isBlockedIp(ip) {
84
- return ipBlocker.isBlockedIp(ip);
85
- }
86
- /* -------------------------------------------------------------------------------------------------
87
- * URL normalization & hostname policy
88
- * ------------------------------------------------------------------------------------------------- */
89
46
  const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
90
47
  function createValidationError(message) {
91
48
  return createErrorWithCode(message, VALIDATION_ERROR_CODE);
92
49
  }
93
50
  const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
94
51
  class UrlNormalizer {
52
+ constants;
53
+ security;
54
+ ipBlocker;
55
+ blockedHostSuffixes;
56
+ constructor(constants, security, ipBlocker, blockedHostSuffixes) {
57
+ this.constants = constants;
58
+ this.security = security;
59
+ this.ipBlocker = ipBlocker;
60
+ this.blockedHostSuffixes = blockedHostSuffixes;
61
+ }
95
62
  normalize(urlString) {
96
63
  const trimmedUrl = this.requireTrimmedUrl(urlString);
97
- this.assertUrlLength(trimmedUrl);
98
- const url = this.parseUrl(trimmedUrl);
99
- this.assertHttpProtocol(url);
100
- this.assertNoCredentials(url);
64
+ if (trimmedUrl.length > this.constants.maxUrlLength) {
65
+ throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
66
+ }
67
+ if (!URL.canParse(trimmedUrl)) {
68
+ throw createValidationError('Invalid URL format');
69
+ }
70
+ const url = new URL(trimmedUrl);
71
+ if (url.protocol !== 'http:' && url.protocol !== 'https:') {
72
+ throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
73
+ }
74
+ if (url.username || url.password) {
75
+ throw createValidationError('URLs with embedded credentials are not allowed');
76
+ }
101
77
  const hostname = this.normalizeHostname(url);
102
78
  this.assertHostnameAllowed(hostname);
103
- // Canonicalize hostname to avoid trailing-dot variants and keep url.href consistent.
104
79
  url.hostname = hostname;
105
80
  return { normalizedUrl: url.href, hostname };
106
81
  }
@@ -116,32 +91,13 @@ class UrlNormalizer {
116
91
  throw createValidationError('URL cannot be empty');
117
92
  return trimmed;
118
93
  }
119
- assertUrlLength(url) {
120
- if (url.length <= config.constants.maxUrlLength)
121
- return;
122
- throw createValidationError(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`);
123
- }
124
- parseUrl(urlString) {
125
- if (!URL.canParse(urlString))
126
- throw createValidationError('Invalid URL format');
127
- return new URL(urlString);
128
- }
129
- assertHttpProtocol(url) {
130
- if (url.protocol === 'http:' || url.protocol === 'https:')
131
- return;
132
- throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
133
- }
134
- assertNoCredentials(url) {
135
- if (!url.username && !url.password)
136
- return;
137
- throw createValidationError('URLs with embedded credentials are not allowed');
138
- }
139
94
  normalizeHostname(url) {
140
95
  let hostname = url.hostname.toLowerCase();
141
96
  while (hostname.endsWith('.'))
142
97
  hostname = hostname.slice(0, -1);
143
- if (!hostname)
98
+ if (!hostname) {
144
99
  throw createValidationError('URL must have a valid hostname');
100
+ }
145
101
  return hostname;
146
102
  }
147
103
  assertHostnameAllowed(hostname) {
@@ -150,80 +106,62 @@ class UrlNormalizer {
150
106
  this.assertNotBlockedHostnameSuffix(hostname);
151
107
  }
152
108
  assertNotBlockedHost(hostname) {
153
- if (!config.security.blockedHosts.has(hostname))
109
+ if (!this.security.blockedHosts.has(hostname))
154
110
  return;
155
111
  throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
156
112
  }
157
113
  assertNotBlockedIp(hostname) {
158
- if (!ipBlocker.isBlockedIp(hostname))
114
+ if (!this.ipBlocker.isBlockedIp(hostname))
159
115
  return;
160
116
  throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
161
117
  }
162
118
  assertNotBlockedHostnameSuffix(hostname) {
163
- const blocked = BLOCKED_HOST_SUFFIXES.some((suffix) => hostname.endsWith(suffix));
119
+ const blocked = this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
164
120
  if (!blocked)
165
121
  return;
166
122
  throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
167
123
  }
168
124
  }
169
- const urlNormalizer = new UrlNormalizer();
170
- /** Backwards-compatible exports */
171
- export function normalizeUrl(urlString) {
172
- return urlNormalizer.normalize(urlString);
173
- }
174
- export function validateAndNormalizeUrl(urlString) {
175
- return urlNormalizer.validateAndNormalize(urlString);
125
+ function getPatternGroup(groups, key) {
126
+ const value = groups[key];
127
+ if (value === undefined)
128
+ return null;
129
+ if (value === '')
130
+ return null;
131
+ return value;
176
132
  }
177
- const GITHUB_BLOB_RULE = {
178
- name: 'github',
179
- pattern: /^https?:\/\/(?:www\.)?github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.+)$/i,
180
- transform: (match) => {
181
- const owner = match[1] ?? '';
182
- const repo = match[2] ?? '';
183
- const branch = match[3] ?? '';
184
- const path = match[4] ?? '';
185
- return `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`;
186
- },
187
- };
188
- const GITHUB_GIST_RULE = {
189
- name: 'github-gist',
190
- pattern: /^https?:\/\/gist\.github\.com\/([^/]+)\/([a-f0-9]+)(?:#file-(.+)|\/raw\/([^/]+))?$/i,
191
- transform: (match) => {
192
- const user = match[1] ?? '';
193
- const gistId = match[2] ?? '';
194
- const hashFile = match[3];
195
- const rawFile = match[4];
196
- const filename = rawFile ?? hashFile?.replace(/-/g, '.');
197
- const filePath = filename ? `/${filename}` : '';
198
- return `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`;
199
- },
200
- };
201
- const GITLAB_BLOB_RULE = {
202
- name: 'gitlab',
203
- pattern: /^(https?:\/\/(?:[^/]+\.)?gitlab\.com\/[^/]+\/[^/]+)\/-\/blob\/([^/]+)\/(.+)$/i,
204
- transform: (match) => {
205
- const baseUrl = match[1] ?? '';
206
- const branch = match[2] ?? '';
207
- const path = match[3] ?? '';
208
- return `${baseUrl}/-/raw/${branch}/${path}`;
209
- },
210
- };
211
- const BITBUCKET_SRC_RULE = {
212
- name: 'bitbucket',
213
- pattern: /^(https?:\/\/(?:www\.)?bitbucket\.org\/[^/]+\/[^/]+)\/src\/([^/]+)\/(.+)$/i,
214
- transform: (match) => {
215
- const baseUrl = match[1] ?? '';
216
- const branch = match[2] ?? '';
217
- const path = match[3] ?? '';
218
- return `${baseUrl}/raw/${branch}/${path}`;
219
- },
220
- };
221
- const TRANSFORM_RULES = [
222
- GITHUB_BLOB_RULE,
223
- GITHUB_GIST_RULE,
224
- GITLAB_BLOB_RULE,
225
- BITBUCKET_SRC_RULE,
133
+ const GITHUB_BLOB_PATTERN = new URLPattern({
134
+ protocol: 'http{s}?',
135
+ hostname: '{:sub.}?github.com',
136
+ pathname: '/:owner/:repo/blob/:branch/:path+',
137
+ });
138
+ const GITHUB_GIST_PATTERN = new URLPattern({
139
+ protocol: 'http{s}?',
140
+ hostname: 'gist.github.com',
141
+ pathname: '/:user/:gistId',
142
+ });
143
+ const GITHUB_GIST_RAW_PATTERN = new URLPattern({
144
+ protocol: 'http{s}?',
145
+ hostname: 'gist.github.com',
146
+ pathname: '/:user/:gistId/raw/:filePath+',
147
+ });
148
+ const GITLAB_BLOB_PATTERNS = [
149
+ new URLPattern({
150
+ protocol: 'http{s}?',
151
+ hostname: 'gitlab.com',
152
+ pathname: '/:base+/-/blob/:branch/:path+',
153
+ }),
154
+ new URLPattern({
155
+ protocol: 'http{s}?',
156
+ hostname: '*:sub.gitlab.com',
157
+ pathname: '/:base+/-/blob/:branch/:path+',
158
+ }),
226
159
  ];
160
+ const BITBUCKET_SRC_PATTERN = new URLPattern({
161
+ protocol: 'http{s}?',
162
+ hostname: '{:sub.}?bitbucket.org',
163
+ pathname: '/:owner/:repo/src/:branch/:path+',
164
+ });
227
165
  const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
228
166
  const RAW_TEXT_EXTENSIONS = new Set([
229
167
  '.md',
@@ -240,33 +178,57 @@ const RAW_TEXT_EXTENSIONS = new Set([
240
178
  '.org',
241
179
  ]);
242
180
  class RawUrlTransformer {
181
+ logger;
182
+ constructor(logger) {
183
+ this.logger = logger;
184
+ }
243
185
  transformToRawUrl(url) {
244
186
  if (!url)
245
187
  return { url, transformed: false };
246
188
  if (this.isRawUrl(url))
247
189
  return { url, transformed: false };
248
- const { base, hash } = this.splitParams(url);
249
- const result = this.applyRules(base, hash);
250
- if (!result)
190
+ let base;
191
+ let hash;
192
+ let parsed;
193
+ try {
194
+ parsed = new URL(url);
195
+ base = parsed.origin + parsed.pathname;
196
+ ({ hash } = parsed);
197
+ }
198
+ catch {
199
+ ({ base, hash } = this.splitParams(url));
200
+ }
201
+ const match = this.tryTransformWithUrl(base, hash, parsed);
202
+ if (!match)
251
203
  return { url, transformed: false };
252
- logDebug('URL transformed to raw content URL', {
253
- platform: result.platform,
204
+ this.logger.debug('URL transformed to raw content URL', {
205
+ platform: match.platform,
254
206
  original: url.substring(0, 100),
255
- transformed: result.url.substring(0, 100),
207
+ transformed: match.url.substring(0, 100),
256
208
  });
257
- return { url: result.url, transformed: true, platform: result.platform };
209
+ return { url: match.url, transformed: true, platform: match.platform };
258
210
  }
259
- isRawTextContentUrl(url) {
260
- if (!url)
211
+ isRawTextContentUrl(urlString) {
212
+ if (!urlString)
261
213
  return false;
262
- if (this.isRawUrl(url))
214
+ if (this.isRawUrl(urlString))
263
215
  return true;
264
- const { base } = this.splitParams(url);
265
- const lowerBase = base.toLowerCase();
266
- const lastDot = lowerBase.lastIndexOf('.');
267
- if (lastDot === -1)
268
- return false;
269
- return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
216
+ try {
217
+ const url = new URL(urlString);
218
+ const pathname = url.pathname.toLowerCase();
219
+ const lastDot = pathname.lastIndexOf('.');
220
+ if (lastDot === -1)
221
+ return false;
222
+ return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
223
+ }
224
+ catch {
225
+ const { base } = this.splitParams(urlString);
226
+ const lowerBase = base.toLowerCase();
227
+ const lastDot = lowerBase.lastIndexOf('.');
228
+ if (lastDot === -1)
229
+ return false;
230
+ return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
231
+ }
270
232
  }
271
233
  isRawUrl(url) {
272
234
  const lower = url.toLowerCase();
@@ -275,226 +237,340 @@ class RawUrlTransformer {
275
237
  lower.includes('/-/raw/') ||
276
238
  BITBUCKET_RAW_RE.test(lower));
277
239
  }
278
- splitParams(url) {
279
- const hashIndex = url.indexOf('#');
280
- const queryIndex = url.indexOf('?');
281
- const endIndex = Math.min(queryIndex === -1 ? url.length : queryIndex, hashIndex === -1 ? url.length : hashIndex);
282
- const hash = hashIndex !== -1 ? url.slice(hashIndex) : '';
283
- return { base: url.slice(0, endIndex), hash };
284
- }
285
- applyRules(base, hash) {
286
- for (const rule of TRANSFORM_RULES) {
287
- const urlToMatch = rule.name === 'github-gist' && hash.startsWith('#file-')
288
- ? base + hash
289
- : base;
290
- const match = rule.pattern.exec(urlToMatch);
291
- if (match)
292
- return { url: rule.transform(match), platform: rule.name };
293
- }
294
- return null;
295
- }
296
- }
297
- const rawUrlTransformer = new RawUrlTransformer();
298
- /** Backwards-compatible exports */
299
- export function transformToRawUrl(url) {
300
- return rawUrlTransformer.transformToRawUrl(url);
301
- }
302
- export function isRawTextContentUrl(url) {
303
- return rawUrlTransformer.isRawTextContentUrl(url);
304
- }
305
- const DNS_LOOKUP_TIMEOUT_MS = 5000;
306
- class SafeDnsLookup {
307
- lookup(hostname, options, callback) {
308
- const normalizedOptions = this.normalizeOptions(options);
309
- const useAll = Boolean(normalizedOptions.all);
310
- const resolvedFamily = this.resolveFamily(normalizedOptions.family);
311
- const lookupOptions = {
312
- family: normalizedOptions.family,
313
- hints: normalizedOptions.hints,
314
- all: true, // Always request all results; we select based on caller preference.
315
- order: this.resolveOrder(normalizedOptions),
316
- };
317
- const timeout = this.createTimeout(hostname, callback);
318
- const safeCallback = (err, address, family) => {
319
- if (timeout.isDone())
320
- return;
321
- timeout.markDone();
322
- callback(err, address, family);
323
- };
324
- (async () => {
325
- try {
326
- const result = await dns.promises.lookup(hostname, lookupOptions);
327
- const addresses = Array.isArray(result) ? result : [result];
328
- this.handleLookupResult(null, addresses, hostname, resolvedFamily, useAll, safeCallback);
329
- }
330
- catch (error) {
331
- this.handleLookupResult(error, [], hostname, resolvedFamily, useAll, safeCallback);
332
- }
333
- })().catch((error) => {
334
- if (!timeout.isDone()) {
335
- safeCallback(error, []);
336
- }
337
- });
338
- }
339
- normalizeOptions(options) {
340
- return typeof options === 'number' ? { family: options } : options;
341
- }
342
- resolveFamily(family) {
343
- if (family === 'IPv4')
344
- return 4;
345
- if (family === 'IPv6')
346
- return 6;
347
- return family;
348
- }
349
- resolveOrder(options) {
350
- if (options.order)
351
- return options.order;
352
- // legacy `verbatim` option support
353
- if (isObject(options)) {
354
- const legacy = options.verbatim;
355
- if (typeof legacy === 'boolean')
356
- return legacy ? 'verbatim' : 'ipv4first';
357
- }
358
- return 'verbatim';
359
- }
360
- handleLookupResult(error, addresses, hostname, resolvedFamily, useAll, callback) {
361
- if (error) {
362
- callback(error, addresses);
363
- return;
364
- }
365
- const list = this.normalizeResults(addresses, resolvedFamily);
366
- const validationError = this.validateResults(list, hostname);
367
- if (validationError) {
368
- callback(validationError, list);
369
- return;
240
+ splitParams(urlString) {
241
+ try {
242
+ const url = new URL(urlString);
243
+ const base = url.origin + url.pathname;
244
+ return { base, hash: url.hash };
370
245
  }
371
- const selection = this.selectResult(list, useAll, hostname);
372
- if (selection.error) {
373
- callback(selection.error, selection.fallback);
374
- return;
246
+ catch {
247
+ const hashIndex = urlString.indexOf('#');
248
+ const queryIndex = urlString.indexOf('?');
249
+ const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
250
+ const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
251
+ return { base: urlString.slice(0, endIndex), hash };
375
252
  }
376
- callback(null, selection.address, selection.family);
377
253
  }
378
- normalizeResults(addresses, family) {
379
- if (Array.isArray(addresses))
380
- return addresses;
381
- return [{ address: addresses, family: family ?? 4 }];
382
- }
383
- validateResults(list, hostname) {
384
- if (list.length === 0) {
385
- return createErrorWithCode(`No DNS results returned for ${hostname}`, 'ENODATA');
254
+ tryTransformWithUrl(base, hash, preParsed) {
255
+ let parsed = null;
256
+ if (preParsed?.href.startsWith(base)) {
257
+ parsed = preParsed;
386
258
  }
387
- for (const addr of list) {
388
- if (addr.family !== 4 && addr.family !== 6) {
389
- return createErrorWithCode(`Invalid address family returned for ${hostname}`, 'EINVAL');
390
- }
391
- if (ipBlocker.isBlockedIp(addr.address)) {
392
- return createErrorWithCode(`Blocked IP detected for ${hostname}`, 'EBLOCKED');
393
- }
259
+ else if (URL.canParse(base)) {
260
+ parsed = new URL(base);
394
261
  }
262
+ if (!parsed)
263
+ return null;
264
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
265
+ return null;
266
+ const gist = this.transformGithubGist(base, hash);
267
+ if (gist)
268
+ return gist;
269
+ const github = this.transformGithubBlob(base);
270
+ if (github)
271
+ return github;
272
+ const gitlab = this.transformGitLab(base, parsed.origin);
273
+ if (gitlab)
274
+ return gitlab;
275
+ const bitbucket = this.transformBitbucket(base, parsed.origin);
276
+ if (bitbucket)
277
+ return bitbucket;
395
278
  return null;
396
279
  }
397
- selectResult(list, useAll, hostname) {
398
- if (list.length === 0) {
280
+ transformGithubBlob(url) {
281
+ const match = GITHUB_BLOB_PATTERN.exec(url);
282
+ if (!match)
283
+ return null;
284
+ const groups = match.pathname.groups;
285
+ const owner = getPatternGroup(groups, 'owner');
286
+ const repo = getPatternGroup(groups, 'repo');
287
+ const branch = getPatternGroup(groups, 'branch');
288
+ const path = getPatternGroup(groups, 'path');
289
+ if (!owner || !repo || !branch || !path)
290
+ return null;
291
+ return {
292
+ url: `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`,
293
+ platform: 'github',
294
+ };
295
+ }
296
+ transformGithubGist(url, hash) {
297
+ const rawMatch = GITHUB_GIST_RAW_PATTERN.exec(url);
298
+ if (rawMatch) {
299
+ const groups = rawMatch.pathname.groups;
300
+ const user = getPatternGroup(groups, 'user');
301
+ const gistId = getPatternGroup(groups, 'gistId');
302
+ const filePath = getPatternGroup(groups, 'filePath');
303
+ if (!user || !gistId)
304
+ return null;
305
+ const resolvedFilePath = filePath ? `/${filePath}` : '';
399
306
  return {
400
- error: createErrorWithCode(`No DNS results returned for ${hostname}`, 'ENODATA'),
401
- fallback: [],
402
- address: [],
307
+ url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${resolvedFilePath}`,
308
+ platform: 'github-gist',
403
309
  };
404
310
  }
405
- if (useAll)
406
- return { address: list, fallback: list };
407
- const first = list.at(0);
408
- if (!first) {
311
+ const match = GITHUB_GIST_PATTERN.exec(url);
312
+ if (!match)
313
+ return null;
314
+ const groups = match.pathname.groups;
315
+ const user = getPatternGroup(groups, 'user');
316
+ const gistId = getPatternGroup(groups, 'gistId');
317
+ if (!user || !gistId)
318
+ return null;
319
+ let filePath = '';
320
+ if (hash.startsWith('#file-')) {
321
+ const filename = hash.slice('#file-'.length).replace(/-/g, '.');
322
+ if (filename)
323
+ filePath = `/${filename}`;
324
+ }
325
+ return {
326
+ url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
327
+ platform: 'github-gist',
328
+ };
329
+ }
330
+ transformGitLab(url, origin) {
331
+ for (const pattern of GITLAB_BLOB_PATTERNS) {
332
+ const match = pattern.exec(url);
333
+ if (!match)
334
+ continue;
335
+ const groups = match.pathname.groups;
336
+ const base = getPatternGroup(groups, 'base');
337
+ const branch = getPatternGroup(groups, 'branch');
338
+ const path = getPatternGroup(groups, 'path');
339
+ if (!base || !branch || !path)
340
+ return null;
409
341
  return {
410
- error: createErrorWithCode(`No DNS results returned for ${hostname}`, 'ENODATA'),
411
- fallback: [],
412
- address: [],
342
+ url: `${origin}/${base}/-/raw/${branch}/${path}`,
343
+ platform: 'gitlab',
413
344
  };
414
345
  }
415
- return { address: first.address, family: first.family, fallback: list };
346
+ return null;
416
347
  }
417
- createTimeout(hostname, callback) {
418
- let done = false;
419
- const timer = setTimeout(() => {
420
- if (done)
421
- return;
422
- done = true;
423
- callback(createErrorWithCode(`DNS lookup timed out for ${hostname}`, 'ETIMEOUT'), []);
424
- }, DNS_LOOKUP_TIMEOUT_MS);
425
- timer.unref();
348
+ transformBitbucket(url, origin) {
349
+ const match = BITBUCKET_SRC_PATTERN.exec(url);
350
+ if (!match)
351
+ return null;
352
+ const groups = match.pathname.groups;
353
+ const owner = getPatternGroup(groups, 'owner');
354
+ const repo = getPatternGroup(groups, 'repo');
355
+ const branch = getPatternGroup(groups, 'branch');
356
+ const path = getPatternGroup(groups, 'path');
357
+ if (!owner || !repo || !branch || !path)
358
+ return null;
426
359
  return {
427
- isDone: () => done,
428
- markDone: () => {
429
- done = true;
430
- clearTimeout(timer);
431
- },
360
+ url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
361
+ platform: 'bitbucket',
432
362
  };
433
363
  }
434
364
  }
435
- const safeDns = new SafeDnsLookup();
436
- async function assertSafeDnsLookup(hostname) {
437
- await new Promise((resolve, reject) => {
438
- safeDns.lookup(hostname, { all: true }, (err) => {
439
- if (err) {
440
- reject(err);
441
- return;
442
- }
443
- resolve();
444
- });
445
- });
446
- }
447
- /* -------------------------------------------------------------------------------------------------
448
- * Fetch error mapping (request-level)
449
- * ------------------------------------------------------------------------------------------------- */
450
- function parseRetryAfter(header) {
451
- if (!header)
452
- return 60;
453
- const parsed = Number.parseInt(header, 10);
454
- return Number.isNaN(parsed) ? 60 : parsed;
365
+ const DNS_LOOKUP_TIMEOUT_MS = 5000;
366
+ const CNAME_LOOKUP_MAX_DEPTH = 5;
367
+ function normalizeDnsName(value) {
368
+ let normalized = value.trim().toLowerCase();
369
+ while (normalized.endsWith('.'))
370
+ normalized = normalized.slice(0, -1);
371
+ return normalized;
455
372
  }
456
- class FetchErrorFactory {
457
- canceled(url) {
458
- return new FetchError('Request was canceled', url, 499, {
459
- reason: 'aborted',
460
- });
373
+ function createAbortRace(signal, onAbort) {
374
+ if (!signal) {
375
+ return { abortPromise: null, cleanup: () => { } };
461
376
  }
462
- timeout(url, timeoutMs) {
463
- return new FetchError(`Request timeout after ${timeoutMs}ms`, url, 504, {
464
- timeout: timeoutMs,
465
- });
377
+ if (signal.aborted) {
378
+ return {
379
+ abortPromise: Promise.reject(onAbort()),
380
+ cleanup: () => { },
381
+ };
466
382
  }
467
- rateLimited(url, retryAfterHeader) {
468
- return new FetchError('Too many requests', url, 429, {
469
- retryAfter: parseRetryAfter(retryAfterHeader),
470
- });
383
+ let abortListener = null;
384
+ const abortPromise = new Promise((_, reject) => {
385
+ abortListener = () => {
386
+ reject(onAbort());
387
+ };
388
+ signal.addEventListener('abort', abortListener, { once: true });
389
+ });
390
+ const cleanup = () => {
391
+ if (!abortListener)
392
+ return;
393
+ try {
394
+ signal.removeEventListener('abort', abortListener);
395
+ }
396
+ catch {
397
+ // Ignore listener cleanup failures; they are non-fatal by design.
398
+ }
399
+ abortListener = null;
400
+ };
401
+ return { abortPromise, cleanup };
402
+ }
403
+ async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
404
+ const controller = new AbortController();
405
+ const timeoutPromise = delay(timeoutMs, null, {
406
+ ref: false,
407
+ signal: controller.signal,
408
+ })
409
+ .then(() => Promise.reject(onTimeout()))
410
+ .catch((err) => {
411
+ if (isError(err) && err.name === 'AbortError')
412
+ return new Promise(() => { });
413
+ throw err;
414
+ });
415
+ const abortRace = createAbortRace(signal, onAbort ?? (() => new Error('Request was canceled')));
416
+ try {
417
+ return await Promise.race(abortRace.abortPromise
418
+ ? [promise, timeoutPromise, abortRace.abortPromise]
419
+ : [promise, timeoutPromise]);
471
420
  }
472
- http(url, status, statusText) {
473
- return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
421
+ finally {
422
+ controller.abort();
423
+ abortRace.cleanup();
474
424
  }
475
- tooManyRedirects(url) {
476
- return new FetchError('Too many redirects', url);
425
+ }
426
+ function createAbortSignalError() {
427
+ const err = new Error('Request was canceled');
428
+ err.name = 'AbortError';
429
+ return err;
430
+ }
431
+ class SafeDnsResolver {
432
+ ipBlocker;
433
+ security;
434
+ blockedHostSuffixes;
435
+ constructor(ipBlocker, security, blockedHostSuffixes) {
436
+ this.ipBlocker = ipBlocker;
437
+ this.security = security;
438
+ this.blockedHostSuffixes = blockedHostSuffixes;
477
439
  }
478
- missingRedirectLocation(url) {
479
- return new FetchError('Redirect response missing Location header', url);
440
+ async assertSafeHostname(hostname, signal) {
441
+ const normalizedHostname = normalizeDnsName(hostname);
442
+ if (!normalizedHostname) {
443
+ throw createErrorWithCode('Invalid hostname provided', 'EINVAL');
444
+ }
445
+ if (signal?.aborted) {
446
+ throw createAbortSignalError();
447
+ }
448
+ if (isIP(normalizedHostname)) {
449
+ if (this.ipBlocker.isBlockedIp(normalizedHostname)) {
450
+ throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
451
+ }
452
+ return;
453
+ }
454
+ await this.assertNoBlockedCname(normalizedHostname, signal);
455
+ const resultPromise = dns.promises.lookup(normalizedHostname, {
456
+ all: true,
457
+ order: 'verbatim',
458
+ });
459
+ const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
460
+ if (addresses.length === 0) {
461
+ throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
462
+ }
463
+ for (const addr of addresses) {
464
+ if (addr.family !== 4 && addr.family !== 6) {
465
+ throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
466
+ }
467
+ if (this.ipBlocker.isBlockedIp(addr.address)) {
468
+ throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
469
+ }
470
+ }
480
471
  }
481
- sizeLimit(url, maxBytes) {
482
- return new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
472
+ isBlockedHostname(hostname) {
473
+ if (this.security.blockedHosts.has(hostname))
474
+ return true;
475
+ return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
483
476
  }
484
- network(url, message) {
485
- return new FetchError(`Network error: Could not reach ${url}`, url, undefined, message ? { message } : {});
477
+ async assertNoBlockedCname(hostname, signal) {
478
+ let current = hostname;
479
+ const seen = new Set();
480
+ for (let depth = 0; depth < CNAME_LOOKUP_MAX_DEPTH; depth += 1) {
481
+ if (!current || seen.has(current))
482
+ return;
483
+ seen.add(current);
484
+ const cnames = await this.resolveCname(current, signal);
485
+ if (cnames.length === 0)
486
+ return;
487
+ for (const cname of cnames) {
488
+ if (this.isBlockedHostname(cname)) {
489
+ throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
490
+ }
491
+ }
492
+ current = cnames[0] ?? '';
493
+ }
486
494
  }
487
- unknown(url, message) {
488
- return new FetchError(message, url);
495
+ async resolveCname(hostname, signal) {
496
+ try {
497
+ const resultPromise = dns.promises.resolveCname(hostname);
498
+ const cnames = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS CNAME lookup timed out for ${hostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
499
+ return cnames
500
+ .map((value) => normalizeDnsName(value))
501
+ .filter((value) => value.length > 0);
502
+ }
503
+ catch (error) {
504
+ if (isError(error) && error.name === 'AbortError') {
505
+ throw error;
506
+ }
507
+ if (isSystemError(error) &&
508
+ (error.code === 'ENODATA' ||
509
+ error.code === 'ENOTFOUND' ||
510
+ error.code === 'ENODOMAIN')) {
511
+ return [];
512
+ }
513
+ return [];
514
+ }
489
515
  }
490
516
  }
491
- const fetchErrors = new FetchErrorFactory();
517
+ function parseRetryAfter(header) {
518
+ if (!header)
519
+ return 60;
520
+ const trimmed = header.trim();
521
+ // Retry-After can be seconds or an HTTP-date.
522
+ const seconds = Number.parseInt(trimmed, 10);
523
+ if (!Number.isNaN(seconds) && seconds >= 0)
524
+ return seconds;
525
+ const dateMs = Date.parse(trimmed);
526
+ if (Number.isNaN(dateMs))
527
+ return 60;
528
+ const deltaMs = dateMs - Date.now();
529
+ if (deltaMs <= 0)
530
+ return 0;
531
+ return Math.ceil(deltaMs / 1000);
532
+ }
533
+ function createCanceledFetchError(url) {
534
+ return new FetchError('Request was canceled', url, 499, {
535
+ reason: 'aborted',
536
+ });
537
+ }
538
+ function createTimeoutFetchError(url, timeoutMs) {
539
+ return new FetchError(`Request timeout after ${timeoutMs}ms`, url, 504, {
540
+ timeout: timeoutMs,
541
+ });
542
+ }
543
+ function createRateLimitedFetchError(url, retryAfterHeader) {
544
+ return new FetchError('Too many requests', url, 429, {
545
+ retryAfter: parseRetryAfter(retryAfterHeader),
546
+ });
547
+ }
548
+ function createHttpFetchError(url, status, statusText) {
549
+ return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
550
+ }
551
+ function createTooManyRedirectsFetchError(url) {
552
+ return new FetchError('Too many redirects', url);
553
+ }
554
+ function createMissingRedirectLocationFetchError(url) {
555
+ return new FetchError('Redirect response missing Location header', url);
556
+ }
557
+ function createNetworkFetchError(url, message) {
558
+ return new FetchError(`Network error: Could not reach ${url}`, url, undefined, message ? { message } : {});
559
+ }
560
+ function createUnknownFetchError(url, message) {
561
+ return new FetchError(message, url);
562
+ }
563
+ function createAbortedFetchError(url) {
564
+ return new FetchError('Request was aborted during response read', url, 499, {
565
+ reason: 'aborted',
566
+ });
567
+ }
492
568
  function isAbortError(error) {
493
- return (error instanceof Error &&
569
+ return (isError(error) &&
494
570
  (error.name === 'AbortError' || error.name === 'TimeoutError'));
495
571
  }
496
572
  function isTimeoutError(error) {
497
- return error instanceof Error && error.name === 'TimeoutError';
573
+ return isError(error) && error.name === 'TimeoutError';
498
574
  }
499
575
  function resolveErrorUrl(error, fallback) {
500
576
  if (error instanceof FetchError)
@@ -510,122 +586,165 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
510
586
  const url = resolveErrorUrl(error, fallbackUrl);
511
587
  if (isAbortError(error)) {
512
588
  return isTimeoutError(error)
513
- ? fetchErrors.timeout(url, timeoutMs)
514
- : fetchErrors.canceled(url);
589
+ ? createTimeoutFetchError(url, timeoutMs)
590
+ : createCanceledFetchError(url);
591
+ }
592
+ if (!isError(error))
593
+ return createUnknownFetchError(url, 'Unexpected error');
594
+ if (!isSystemError(error))
595
+ return createNetworkFetchError(url, error.message);
596
+ const { code } = error;
597
+ if (code === 'ETIMEOUT') {
598
+ return new FetchError(error.message, url, 504, { code });
515
599
  }
516
- if (error instanceof Error)
517
- return fetchErrors.network(url, error.message);
518
- return fetchErrors.unknown(url, 'Unexpected error');
600
+ if (code === VALIDATION_ERROR_CODE ||
601
+ code === 'EBADREDIRECT' ||
602
+ code === 'EBLOCKED' ||
603
+ code === 'ENODATA' ||
604
+ code === 'EINVAL') {
605
+ return new FetchError(error.message, url, 400, { code });
606
+ }
607
+ return new FetchError(`Network error: Could not reach ${url}`, url, undefined, {
608
+ code,
609
+ message: error.message,
610
+ });
519
611
  }
520
612
  const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
521
613
  const SLOW_REQUEST_THRESHOLD_MS = 5000;
522
614
  class FetchTelemetry {
615
+ logger;
616
+ context;
617
+ redactor;
618
+ constructor(logger, context, redactor) {
619
+ this.logger = logger;
620
+ this.context = context;
621
+ this.redactor = redactor;
622
+ }
623
+ redact(url) {
624
+ return this.redactor.redact(url);
625
+ }
523
626
  start(url, method) {
524
- const safeUrl = redactUrl(url);
525
- const contextRequestId = getRequestId();
526
- const operationId = getOperationId();
627
+ const safeUrl = this.redactor.redact(url);
628
+ const contextRequestId = this.context.getRequestId();
629
+ const operationId = this.context.getOperationId();
527
630
  const ctx = {
528
631
  requestId: randomUUID(),
529
632
  startTime: performance.now(),
530
633
  url: safeUrl,
531
634
  method: method.toUpperCase(),
532
- ...(contextRequestId ? { contextRequestId } : {}),
533
- ...(operationId ? { operationId } : {}),
534
635
  };
535
- this.publish({
636
+ if (contextRequestId)
637
+ ctx.contextRequestId = contextRequestId;
638
+ if (operationId)
639
+ ctx.operationId = operationId;
640
+ const event = {
536
641
  v: 1,
537
642
  type: 'start',
538
643
  requestId: ctx.requestId,
539
644
  method: ctx.method,
540
645
  url: ctx.url,
541
- ...(ctx.contextRequestId
542
- ? { contextRequestId: ctx.contextRequestId }
543
- : {}),
544
- ...(ctx.operationId ? { operationId: ctx.operationId } : {}),
545
- });
546
- logDebug('HTTP Request', {
646
+ };
647
+ if (ctx.contextRequestId)
648
+ event.contextRequestId = ctx.contextRequestId;
649
+ if (ctx.operationId)
650
+ event.operationId = ctx.operationId;
651
+ this.publish(event);
652
+ const logData = {
547
653
  requestId: ctx.requestId,
548
654
  method: ctx.method,
549
655
  url: ctx.url,
550
- ...(ctx.contextRequestId
551
- ? { contextRequestId: ctx.contextRequestId }
552
- : {}),
553
- ...(ctx.operationId ? { operationId: ctx.operationId } : {}),
554
- });
656
+ };
657
+ if (ctx.contextRequestId)
658
+ logData.contextRequestId = ctx.contextRequestId;
659
+ if (ctx.operationId)
660
+ logData.operationId = ctx.operationId;
661
+ this.logger.debug('HTTP Request', logData);
555
662
  return ctx;
556
663
  }
557
664
  recordResponse(context, response, contentSize) {
558
665
  const duration = performance.now() - context.startTime;
559
666
  const durationLabel = `${Math.round(duration)}ms`;
560
- this.publish({
667
+ const event = {
561
668
  v: 1,
562
669
  type: 'end',
563
670
  requestId: context.requestId,
564
671
  status: response.status,
565
672
  duration,
566
- ...(context.contextRequestId
567
- ? { contextRequestId: context.contextRequestId }
568
- : {}),
569
- ...(context.operationId ? { operationId: context.operationId } : {}),
570
- });
673
+ };
674
+ if (context.contextRequestId)
675
+ event.contextRequestId = context.contextRequestId;
676
+ if (context.operationId)
677
+ event.operationId = context.operationId;
678
+ this.publish(event);
571
679
  const contentType = response.headers.get('content-type') ?? undefined;
572
680
  const contentLengthHeader = response.headers.get('content-length');
573
681
  const size = contentLengthHeader ??
574
682
  (contentSize === undefined ? undefined : String(contentSize));
575
- logDebug('HTTP Response', {
683
+ const logData = {
576
684
  requestId: context.requestId,
577
685
  status: response.status,
578
686
  url: context.url,
579
687
  duration: durationLabel,
580
- ...(context.contextRequestId
581
- ? { contextRequestId: context.contextRequestId }
582
- : {}),
583
- ...(context.operationId ? { operationId: context.operationId } : {}),
584
- ...(contentType ? { contentType } : {}),
585
- ...(size ? { size } : {}),
586
- });
688
+ };
689
+ if (context.contextRequestId)
690
+ logData.contextRequestId = context.contextRequestId;
691
+ if (context.operationId)
692
+ logData.operationId = context.operationId;
693
+ if (contentType)
694
+ logData.contentType = contentType;
695
+ if (size)
696
+ logData.size = size;
697
+ this.logger.debug('HTTP Response', logData);
587
698
  if (duration > SLOW_REQUEST_THRESHOLD_MS) {
588
- logWarn('Slow HTTP request detected', {
699
+ const warnData = {
589
700
  requestId: context.requestId,
590
701
  url: context.url,
591
702
  duration: durationLabel,
592
- ...(context.contextRequestId
593
- ? { contextRequestId: context.contextRequestId }
594
- : {}),
595
- ...(context.operationId ? { operationId: context.operationId } : {}),
596
- });
703
+ };
704
+ if (context.contextRequestId)
705
+ warnData.contextRequestId = context.contextRequestId;
706
+ if (context.operationId)
707
+ warnData.operationId = context.operationId;
708
+ this.logger.warn('Slow HTTP request detected', warnData);
597
709
  }
598
710
  }
599
711
  recordError(context, error, status) {
600
712
  const duration = performance.now() - context.startTime;
601
- const err = error instanceof Error ? error : new Error(String(error));
713
+ const err = isError(error) ? error : new Error(String(error));
602
714
  const code = isSystemError(err) ? err.code : undefined;
603
- this.publish({
715
+ const event = {
604
716
  v: 1,
605
717
  type: 'error',
606
718
  requestId: context.requestId,
607
719
  url: context.url,
608
720
  error: err.message,
609
721
  duration,
610
- ...(code !== undefined ? { code } : {}),
611
- ...(status !== undefined ? { status } : {}),
612
- ...(context.contextRequestId
613
- ? { contextRequestId: context.contextRequestId }
614
- : {}),
615
- ...(context.operationId ? { operationId: context.operationId } : {}),
616
- });
617
- const log = status === 429 ? logWarn : logError;
618
- log('HTTP Request Error', {
722
+ };
723
+ if (code !== undefined)
724
+ event.code = code;
725
+ if (status !== undefined)
726
+ event.status = status;
727
+ if (context.contextRequestId)
728
+ event.contextRequestId = context.contextRequestId;
729
+ if (context.operationId)
730
+ event.operationId = context.operationId;
731
+ this.publish(event);
732
+ const logData = {
619
733
  requestId: context.requestId,
620
734
  url: context.url,
621
735
  status,
622
736
  code,
623
737
  error: err.message,
624
- ...(context.contextRequestId
625
- ? { contextRequestId: context.contextRequestId }
626
- : {}),
627
- ...(context.operationId ? { operationId: context.operationId } : {}),
628
- });
738
+ };
739
+ if (context.contextRequestId)
740
+ logData.contextRequestId = context.contextRequestId;
741
+ if (context.operationId)
742
+ logData.operationId = context.operationId;
743
+ if (status === 429) {
744
+ this.logger.warn('HTTP Request Error', logData);
745
+ return;
746
+ }
747
+ this.logger.error('HTTP Request Error', logData);
629
748
  }
630
749
  publish(event) {
631
750
  if (!fetchChannel.hasSubscribers)
@@ -634,49 +753,50 @@ class FetchTelemetry {
634
753
  fetchChannel.publish(event);
635
754
  }
636
755
  catch {
637
- // Best-effort; subscriber failures must not crash request path.
756
+ // Best-effort telemetry; never crash request path.
638
757
  }
639
758
  }
640
759
  }
641
- const telemetry = new FetchTelemetry();
642
- /** Backwards-compatible exports */
643
- export function startFetchTelemetry(url, method) {
644
- return telemetry.start(url, method);
645
- }
646
- export function recordFetchResponse(context, response, contentSize) {
647
- telemetry.recordResponse(context, response, contentSize);
648
- }
649
- export function recordFetchError(context, error, status) {
650
- telemetry.recordError(context, error, status);
651
- }
652
- /* -------------------------------------------------------------------------------------------------
653
- * Redirect handling
654
- * ------------------------------------------------------------------------------------------------- */
655
760
  const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
656
761
  function isRedirectStatus(status) {
657
762
  return REDIRECT_STATUSES.has(status);
658
763
  }
659
764
  function cancelResponseBody(response) {
660
765
  const cancelPromise = response.body?.cancel();
661
- if (cancelPromise)
662
- cancelPromise.catch(() => {
663
- /* ignore */
664
- });
766
+ if (!cancelPromise)
767
+ return;
768
+ void cancelPromise.catch(() => undefined);
665
769
  }
666
770
  class RedirectFollower {
771
+ fetchFn;
772
+ normalizeUrl;
773
+ preflight;
774
+ constructor(fetchFn, normalizeUrl, preflight) {
775
+ this.fetchFn = fetchFn;
776
+ this.normalizeUrl = normalizeUrl;
777
+ this.preflight = preflight;
778
+ }
667
779
  async fetchWithRedirects(url, init, maxRedirects) {
668
780
  let currentUrl = url;
669
781
  const redirectLimit = Math.max(0, maxRedirects);
670
782
  for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
671
- const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount));
783
+ const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
784
+ if (this.preflight) {
785
+ await this.preflight(currentUrl, init.signal ?? undefined);
786
+ }
787
+ return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount);
788
+ });
672
789
  if (!nextUrl)
673
790
  return { response, url: currentUrl };
674
791
  currentUrl = nextUrl;
675
792
  }
676
- throw fetchErrors.tooManyRedirects(currentUrl);
793
+ throw createTooManyRedirectsFetchError(currentUrl);
677
794
  }
678
795
  async performFetchCycle(currentUrl, init, redirectLimit, redirectCount) {
679
- const response = await fetch(currentUrl, { ...init, redirect: 'manual' });
796
+ const response = await this.fetchFn(currentUrl, {
797
+ ...init,
798
+ redirect: 'manual',
799
+ });
680
800
  if (!isRedirectStatus(response.status))
681
801
  return { response };
682
802
  this.assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount);
@@ -691,23 +811,24 @@ class RedirectFollower {
691
811
  if (redirectCount < redirectLimit)
692
812
  return;
693
813
  cancelResponseBody(response);
694
- throw fetchErrors.tooManyRedirects(currentUrl);
814
+ throw createTooManyRedirectsFetchError(currentUrl);
695
815
  }
696
816
  getRedirectLocation(response, currentUrl) {
697
817
  const location = response.headers.get('location');
698
818
  if (location)
699
819
  return location;
700
820
  cancelResponseBody(response);
701
- throw fetchErrors.missingRedirectLocation(currentUrl);
821
+ throw createMissingRedirectLocationFetchError(currentUrl);
702
822
  }
703
823
  resolveRedirectTarget(baseUrl, location) {
704
- if (!URL.canParse(location, baseUrl))
824
+ if (!URL.canParse(location, baseUrl)) {
705
825
  throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
826
+ }
706
827
  const resolved = new URL(location, baseUrl);
707
828
  if (resolved.username || resolved.password) {
708
829
  throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
709
830
  }
710
- return validateAndNormalizeUrl(resolved.href);
831
+ return this.normalizeUrl(resolved.href);
711
832
  }
712
833
  annotateRedirectError(error, url) {
713
834
  if (!isObject(error))
@@ -724,94 +845,266 @@ class RedirectFollower {
724
845
  }
725
846
  }
726
847
  }
727
- const redirectFollower = new RedirectFollower();
728
- /** Backwards-compatible export */
729
- export async function fetchWithRedirects(url, init, maxRedirects) {
730
- return redirectFollower.fetchWithRedirects(url, init, maxRedirects);
848
+ function getCharsetFromContentType(contentType) {
849
+ if (!contentType)
850
+ return undefined;
851
+ const match = /charset=([^;]+)/i.exec(contentType);
852
+ const charsetGroup = match?.[1];
853
+ if (!charsetGroup)
854
+ return undefined;
855
+ let charset = charsetGroup.trim();
856
+ if (charset.startsWith('"') && charset.endsWith('"')) {
857
+ charset = charset.slice(1, -1);
858
+ }
859
+ return charset.trim();
731
860
  }
732
- /* -------------------------------------------------------------------------------------------------
733
- * Response reading (max size + abort-aware streaming)
734
- * ------------------------------------------------------------------------------------------------- */
735
- function assertContentLengthWithinLimit(response, url, maxBytes) {
736
- const header = response.headers.get('content-length');
737
- if (!header)
738
- return;
739
- const contentLength = Number.parseInt(header, 10);
740
- if (Number.isNaN(contentLength) || contentLength <= maxBytes)
741
- return;
742
- cancelResponseBody(response);
743
- throw fetchErrors.sizeLimit(url, maxBytes);
861
+ function createDecoder(encoding) {
862
+ if (!encoding)
863
+ return new TextDecoder('utf-8');
864
+ try {
865
+ return new TextDecoder(encoding);
866
+ }
867
+ catch {
868
+ return new TextDecoder('utf-8');
869
+ }
870
+ }
871
+ function normalizeEncodingLabel(encoding) {
872
+ return encoding?.trim().toLowerCase() ?? '';
873
+ }
874
+ function isUnicodeWideEncoding(encoding) {
875
+ const normalized = normalizeEncodingLabel(encoding);
876
+ return (normalized.startsWith('utf-16') ||
877
+ normalized.startsWith('utf-32') ||
878
+ normalized === 'ucs-2' ||
879
+ normalized === 'unicodefffe' ||
880
+ normalized === 'unicodefeff');
881
+ }
882
+ const BOM_SIGNATURES = [
883
+ // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
884
+ { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
885
+ { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
886
+ { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
887
+ { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
888
+ { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
889
+ ];
890
+ function detectBomEncoding(buffer) {
891
+ for (const { bytes, encoding } of BOM_SIGNATURES) {
892
+ if (startsWithBytes(buffer, bytes))
893
+ return encoding;
894
+ }
895
+ return undefined;
896
+ }
897
+ function readQuotedValue(input, startIndex) {
898
+ const first = input[startIndex];
899
+ if (!first)
900
+ return '';
901
+ const quoted = first === '"' || first === "'";
902
+ if (quoted) {
903
+ const end = input.indexOf(first, startIndex + 1);
904
+ return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
905
+ }
906
+ const tail = input.slice(startIndex);
907
+ const stop = tail.search(/[\s/>]/);
908
+ return (stop === -1 ? tail : tail.slice(0, stop)).trim();
909
+ }
910
+ function extractHtmlCharset(headSnippet) {
911
+ const lower = headSnippet.toLowerCase();
912
+ const charsetToken = 'charset=';
913
+ const charsetIdx = lower.indexOf(charsetToken);
914
+ if (charsetIdx === -1)
915
+ return undefined;
916
+ const valueStart = charsetIdx + charsetToken.length;
917
+ const charset = readQuotedValue(headSnippet, valueStart);
918
+ return charset ? charset.toLowerCase() : undefined;
919
+ }
920
+ function extractXmlEncoding(headSnippet) {
921
+ const lower = headSnippet.toLowerCase();
922
+ const xmlStart = lower.indexOf('<?xml');
923
+ if (xmlStart === -1)
924
+ return undefined;
925
+ const xmlEnd = lower.indexOf('?>', xmlStart);
926
+ const declaration = xmlEnd === -1
927
+ ? headSnippet.slice(xmlStart)
928
+ : headSnippet.slice(xmlStart, xmlEnd + 2);
929
+ const declarationLower = declaration.toLowerCase();
930
+ const encodingToken = 'encoding=';
931
+ const encodingIdx = declarationLower.indexOf(encodingToken);
932
+ if (encodingIdx === -1)
933
+ return undefined;
934
+ const valueStart = encodingIdx + encodingToken.length;
935
+ const encoding = readQuotedValue(declaration, valueStart);
936
+ return encoding ? encoding.toLowerCase() : undefined;
937
+ }
938
+ function detectHtmlDeclaredEncoding(buffer) {
939
+ const scanSize = Math.min(buffer.length, 8_192);
940
+ if (scanSize === 0)
941
+ return undefined;
942
+ const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
943
+ return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
944
+ }
945
+ function resolveEncoding(declaredEncoding, sample) {
946
+ const bomEncoding = detectBomEncoding(sample);
947
+ if (bomEncoding)
948
+ return bomEncoding;
949
+ if (declaredEncoding)
950
+ return declaredEncoding;
951
+ return detectHtmlDeclaredEncoding(sample);
952
+ }
953
+ const BINARY_SIGNATURES = [
954
+ [0x25, 0x50, 0x44, 0x46],
955
+ [0x89, 0x50, 0x4e, 0x47],
956
+ [0x47, 0x49, 0x46, 0x38],
957
+ [0xff, 0xd8, 0xff],
958
+ [0x52, 0x49, 0x46, 0x46],
959
+ [0x42, 0x4d],
960
+ [0x49, 0x49, 0x2a, 0x00],
961
+ [0x4d, 0x4d, 0x00, 0x2a],
962
+ [0x00, 0x00, 0x01, 0x00],
963
+ [0x50, 0x4b, 0x03, 0x04],
964
+ [0x1f, 0x8b],
965
+ [0x42, 0x5a, 0x68],
966
+ [0x52, 0x61, 0x72, 0x21],
967
+ [0x37, 0x7a, 0xbc, 0xaf],
968
+ [0x7f, 0x45, 0x4c, 0x46],
969
+ [0x4d, 0x5a],
970
+ [0xcf, 0xfa, 0xed, 0xfe],
971
+ [0x00, 0x61, 0x73, 0x6d],
972
+ [0x1a, 0x45, 0xdf, 0xa3],
973
+ [0x66, 0x74, 0x79, 0x70],
974
+ [0x46, 0x4c, 0x56],
975
+ [0x49, 0x44, 0x33],
976
+ [0xff, 0xfb],
977
+ [0xff, 0xfa],
978
+ [0x4f, 0x67, 0x67, 0x53],
979
+ [0x66, 0x4c, 0x61, 0x43],
980
+ [0x4d, 0x54, 0x68, 0x64],
981
+ [0x77, 0x4f, 0x46, 0x46],
982
+ [0x00, 0x01, 0x00, 0x00],
983
+ [0x4f, 0x54, 0x54, 0x4f],
984
+ [0x53, 0x51, 0x4c, 0x69],
985
+ ];
986
+ function startsWithBytes(buffer, signature) {
987
+ const sigLen = signature.length;
988
+ if (buffer.length < sigLen)
989
+ return false;
990
+ for (let i = 0; i < sigLen; i += 1) {
991
+ if (buffer[i] !== signature[i])
992
+ return false;
993
+ }
994
+ return true;
995
+ }
996
+ function hasNullByte(buffer, limit) {
997
+ const checkLen = Math.min(buffer.length, limit);
998
+ return buffer.subarray(0, checkLen).includes(0x00);
999
+ }
1000
+ function isBinaryContent(buffer, encoding) {
1001
+ for (const signature of BINARY_SIGNATURES) {
1002
+ if (startsWithBytes(buffer, signature))
1003
+ return true;
1004
+ }
1005
+ return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
744
1006
  }
745
1007
  class ResponseTextReader {
746
- async read(response, url, maxBytes, signal) {
747
- assertContentLengthWithinLimit(response, url, maxBytes);
1008
+ async read(response, url, maxBytes, signal, encoding) {
1009
+ const { buffer, encoding: effectiveEncoding } = await this.readBuffer(response, url, maxBytes, signal, encoding);
1010
+ const decoder = createDecoder(effectiveEncoding);
1011
+ const text = decoder.decode(buffer);
1012
+ return { text, size: buffer.byteLength };
1013
+ }
1014
+ async readBuffer(response, url, maxBytes, signal, encoding) {
1015
+ if (signal?.aborted) {
1016
+ cancelResponseBody(response);
1017
+ throw createAbortedFetchError(url);
1018
+ }
1019
+ const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
748
1020
  if (!response.body) {
749
- const text = await response.text();
750
- const size = Buffer.byteLength(text);
751
- if (size > maxBytes)
752
- throw fetchErrors.sizeLimit(url, maxBytes);
753
- return { text, size };
1021
+ if (signal?.aborted)
1022
+ throw createCanceledFetchError(url);
1023
+ const arrayBuffer = await response.arrayBuffer();
1024
+ const length = Math.min(arrayBuffer.byteLength, limit);
1025
+ const buffer = new Uint8Array(arrayBuffer, 0, length);
1026
+ const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
1027
+ if (isBinaryContent(buffer, effectiveEncoding)) {
1028
+ throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
1029
+ }
1030
+ return { buffer, encoding: effectiveEncoding, size: buffer.byteLength };
754
1031
  }
755
- return this.readStreamWithLimit(response.body, url, maxBytes, signal);
1032
+ return this.readStreamToBuffer(response.body, url, limit, signal, encoding);
756
1033
  }
757
- async readStreamWithLimit(stream, url, maxBytes, signal) {
758
- const decoder = new TextDecoder();
759
- const parts = [];
1034
+ async readNext(reader, abortPromise) {
1035
+ return abortPromise
1036
+ ? await Promise.race([reader.read(), abortPromise])
1037
+ : await reader.read();
1038
+ }
1039
+ async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
1040
+ let effectiveEncoding = encoding;
1041
+ let decoder = null;
1042
+ const chunks = [];
760
1043
  let total = 0;
761
1044
  const reader = stream.getReader();
1045
+ const abortRace = createAbortRace(signal, () => createAbortedFetchError(url));
762
1046
  try {
763
- await this.throwIfAborted(signal, url, reader);
764
- let result = await reader.read();
1047
+ let result = await this.readNext(reader, abortRace.abortPromise);
1048
+ if (!result.done) {
1049
+ effectiveEncoding =
1050
+ resolveEncoding(encoding, result.value) ?? encoding ?? 'utf-8';
1051
+ decoder = createDecoder(effectiveEncoding);
1052
+ }
1053
+ let checkedBinary = false;
765
1054
  while (!result.done) {
766
- total += result.value.byteLength;
767
- if (total > maxBytes)
768
- throw fetchErrors.sizeLimit(url, maxBytes);
769
- const decoded = decoder.decode(result.value, { stream: true });
770
- if (decoded)
771
- parts.push(decoded);
772
- await this.throwIfAborted(signal, url, reader);
773
- result = await reader.read();
1055
+ const chunk = result.value;
1056
+ if (!checkedBinary) {
1057
+ checkedBinary = true;
1058
+ if (isBinaryContent(chunk, decoder?.encoding)) {
1059
+ await this.cancelReaderQuietly(reader);
1060
+ throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
1061
+ }
1062
+ }
1063
+ const newTotal = total + chunk.length;
1064
+ if (newTotal > maxBytes) {
1065
+ const remaining = maxBytes - total;
1066
+ if (remaining > 0) {
1067
+ chunks.push(chunk.subarray(0, remaining));
1068
+ total += remaining;
1069
+ }
1070
+ await this.cancelReaderQuietly(reader);
1071
+ break;
1072
+ }
1073
+ chunks.push(chunk);
1074
+ total = newTotal;
1075
+ result = await this.readNext(reader, abortRace.abortPromise);
774
1076
  }
775
1077
  }
776
1078
  catch (error) {
777
1079
  await this.cancelReaderQuietly(reader);
778
- if (signal?.aborted)
779
- throw new FetchError('Request was aborted during response read', url, 499, { reason: 'aborted' });
780
- throw error;
1080
+ this.handleReadingError(error, url, signal);
781
1081
  }
782
1082
  finally {
1083
+ abortRace.cleanup();
783
1084
  reader.releaseLock();
784
1085
  }
785
- const final = decoder.decode();
786
- if (final)
787
- parts.push(final);
788
- return { text: parts.join(''), size: total };
1086
+ return {
1087
+ buffer: Buffer.concat(chunks, total),
1088
+ encoding: effectiveEncoding ?? 'utf-8',
1089
+ size: total,
1090
+ };
789
1091
  }
790
- async throwIfAborted(signal, url, reader) {
791
- if (!signal?.aborted)
792
- return;
793
- await this.cancelReaderQuietly(reader);
794
- throw new FetchError('Request was aborted during response read', url, 499, {
795
- reason: 'aborted',
796
- });
1092
+ handleReadingError(error, url, signal) {
1093
+ if (error instanceof FetchError)
1094
+ throw error;
1095
+ if (signal?.aborted)
1096
+ throw createAbortedFetchError(url);
1097
+ throw error;
797
1098
  }
798
1099
  async cancelReaderQuietly(reader) {
799
1100
  try {
800
1101
  await reader.cancel();
801
1102
  }
802
1103
  catch {
803
- // ignore
1104
+ // Ignore cancellation failures; stream teardown must proceed.
804
1105
  }
805
1106
  }
806
1107
  }
807
- const responseReader = new ResponseTextReader();
808
- /** Backwards-compatible export */
809
- export async function readResponseText(response, url, maxBytes, signal) {
810
- return responseReader.read(response, url, maxBytes, signal);
811
- }
812
- /* -------------------------------------------------------------------------------------------------
813
- * HTTP fetcher (headers, signals, response handling)
814
- * ------------------------------------------------------------------------------------------------- */
815
1108
  const DEFAULT_HEADERS = {
816
1109
  'User-Agent': config.fetcher.userAgent,
817
1110
  Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -820,57 +1113,370 @@ const DEFAULT_HEADERS = {
820
1113
  Connection: 'keep-alive',
821
1114
  };
822
1115
  function buildHeaders() {
823
- return { ...DEFAULT_HEADERS };
1116
+ return DEFAULT_HEADERS;
824
1117
  }
825
1118
  function buildRequestSignal(timeoutMs, external) {
1119
+ if (timeoutMs <= 0)
1120
+ return external;
826
1121
  const timeoutSignal = AbortSignal.timeout(timeoutMs);
827
1122
  return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
828
1123
  }
829
1124
  function buildRequestInit(headers, signal) {
830
- return { method: 'GET', headers, signal };
1125
+ return {
1126
+ method: 'GET',
1127
+ headers,
1128
+ ...(signal ? { signal } : {}),
1129
+ };
831
1130
  }
832
1131
  function resolveResponseError(response, finalUrl) {
833
1132
  if (response.status === 429) {
834
- return fetchErrors.rateLimited(finalUrl, response.headers.get('retry-after'));
1133
+ return createRateLimitedFetchError(finalUrl, response.headers.get('retry-after'));
835
1134
  }
836
1135
  return response.ok
837
1136
  ? null
838
- : fetchErrors.http(finalUrl, response.status, response.statusText);
1137
+ : createHttpFetchError(finalUrl, response.status, response.statusText);
1138
+ }
1139
+ function resolveMediaType(contentType) {
1140
+ if (!contentType)
1141
+ return null;
1142
+ const semiIndex = contentType.indexOf(';');
1143
+ const mediaType = semiIndex === -1 ? contentType : contentType.slice(0, semiIndex);
1144
+ const trimmed = mediaType.trim();
1145
+ return trimmed ? trimmed.toLowerCase() : null;
1146
+ }
1147
+ const TEXTUAL_MEDIA_TYPES = new Set([
1148
+ 'application/json',
1149
+ 'application/ld+json',
1150
+ 'application/xml',
1151
+ 'application/xhtml+xml',
1152
+ 'application/javascript',
1153
+ 'application/ecmascript',
1154
+ 'application/x-javascript',
1155
+ 'application/x-yaml',
1156
+ 'application/yaml',
1157
+ 'application/markdown',
1158
+ ]);
1159
+ function isTextLikeMediaType(mediaType) {
1160
+ if (mediaType.startsWith('text/'))
1161
+ return true;
1162
+ if (TEXTUAL_MEDIA_TYPES.has(mediaType))
1163
+ return true;
1164
+ return (mediaType.endsWith('+json') ||
1165
+ mediaType.endsWith('+xml') ||
1166
+ mediaType.endsWith('+yaml') ||
1167
+ mediaType.endsWith('+text') ||
1168
+ mediaType.endsWith('+markdown'));
1169
+ }
1170
+ function assertSupportedContentType(contentType, url) {
1171
+ const mediaType = resolveMediaType(contentType);
1172
+ if (!mediaType)
1173
+ return;
1174
+ if (!isTextLikeMediaType(mediaType)) {
1175
+ throw new FetchError(`Unsupported content type: ${mediaType}`, url);
1176
+ }
1177
+ }
1178
+ function extractEncodingTokens(value) {
1179
+ const tokens = [];
1180
+ let i = 0;
1181
+ const len = value.length;
1182
+ while (i < len) {
1183
+ while (i < len &&
1184
+ (value.charCodeAt(i) === 44 || value.charCodeAt(i) <= 32)) {
1185
+ i += 1;
1186
+ }
1187
+ if (i >= len)
1188
+ break;
1189
+ const start = i;
1190
+ while (i < len && value.charCodeAt(i) !== 44)
1191
+ i += 1;
1192
+ const token = value.slice(start, i).trim().toLowerCase();
1193
+ if (token)
1194
+ tokens.push(token);
1195
+ if (i < len && value.charCodeAt(i) === 44)
1196
+ i += 1;
1197
+ }
1198
+ return tokens;
1199
+ }
1200
+ function parseSingleContentEncoding(value) {
1201
+ if (!value)
1202
+ return null;
1203
+ const tokens = extractEncodingTokens(value);
1204
+ if (tokens.length === 0)
1205
+ return null;
1206
+ if (tokens.length > 1)
1207
+ return undefined;
1208
+ return tokens[0] ?? null;
1209
+ }
1210
+ function createUnsupportedContentEncodingError(url, encodingHeader) {
1211
+ return new FetchError(`Unsupported Content-Encoding: ${encodingHeader}`, url, 415, {
1212
+ reason: 'unsupported_content_encoding',
1213
+ encoding: encodingHeader,
1214
+ });
1215
+ }
1216
+ function createPumpedStream(initialChunk, reader) {
1217
+ return new ReadableStream({
1218
+ start(controller) {
1219
+ if (initialChunk.byteLength > 0) {
1220
+ controller.enqueue(initialChunk);
1221
+ }
1222
+ },
1223
+ async pull(controller) {
1224
+ try {
1225
+ const { done, value } = await reader.read();
1226
+ if (done) {
1227
+ controller.close();
1228
+ }
1229
+ else {
1230
+ controller.enqueue(value);
1231
+ }
1232
+ }
1233
+ catch (error) {
1234
+ controller.error(error);
1235
+ }
1236
+ },
1237
+ cancel(reason) {
1238
+ void reader.cancel(reason).catch(() => undefined);
1239
+ },
1240
+ });
1241
+ }
1242
+ function isLikelyCompressed(chunk, encoding) {
1243
+ if (chunk.byteLength === 0)
1244
+ return false;
1245
+ if (encoding === 'gzip') {
1246
+ return chunk.byteLength >= 2 && chunk[0] === 0x1f && chunk[1] === 0x8b;
1247
+ }
1248
+ if (encoding === 'deflate') {
1249
+ if (chunk.byteLength < 2)
1250
+ return false;
1251
+ const byte0 = chunk[0] ?? 0;
1252
+ const byte1 = chunk[1] ?? 0;
1253
+ const cm = byte0 & 0x0f;
1254
+ if (cm !== 8)
1255
+ return false;
1256
+ return (byte0 * 256 + byte1) % 31 === 0;
1257
+ }
1258
+ let nonPrintable = 0;
1259
+ const limit = Math.min(chunk.length, 50);
1260
+ for (let i = 0; i < limit; i += 1) {
1261
+ const b = chunk[i] ?? 0;
1262
+ if (b < 0x09 || (b > 0x0d && b < 0x20) || b === 0x7f)
1263
+ nonPrintable += 1;
1264
+ }
1265
+ return nonPrintable / limit > 0.1;
839
1266
  }
840
- async function handleFetchResponse(response, finalUrl, ctx, signal) {
1267
+ async function decodeResponseIfNeeded(response, url, signal) {
1268
+ const encodingHeader = response.headers.get('content-encoding');
1269
+ const encoding = parseSingleContentEncoding(encodingHeader);
1270
+ if (encoding === null || encoding === 'identity')
1271
+ return response;
1272
+ if (encoding === undefined) {
1273
+ throw createUnsupportedContentEncodingError(url, encodingHeader ?? '');
1274
+ }
1275
+ if (encoding !== 'gzip' && encoding !== 'deflate' && encoding !== 'br') {
1276
+ throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
1277
+ }
1278
+ if (!response.body)
1279
+ return response;
1280
+ // Peek at first chunk to check if actually compressed
1281
+ const reader = response.body.getReader();
1282
+ let initialChunk;
1283
+ try {
1284
+ const { done, value } = await reader.read();
1285
+ if (done) {
1286
+ return new Response(null, {
1287
+ status: response.status,
1288
+ statusText: response.statusText,
1289
+ headers: response.headers,
1290
+ });
1291
+ }
1292
+ initialChunk = value;
1293
+ }
1294
+ catch (error) {
1295
+ // If read fails, throw properly
1296
+ throw new FetchError(`Failed to read response body: ${isError(error) ? error.message : String(error)}`, url, 502);
1297
+ }
1298
+ if (!isLikelyCompressed(initialChunk, encoding)) {
1299
+ const body = createPumpedStream(initialChunk, reader);
1300
+ const headers = new Headers(response.headers);
1301
+ headers.delete('content-encoding');
1302
+ headers.delete('content-length');
1303
+ return new Response(body, {
1304
+ status: response.status,
1305
+ statusText: response.statusText,
1306
+ headers,
1307
+ });
1308
+ }
1309
+ // Set up decompression
1310
+ let decompressor = null;
1311
+ switch (encoding) {
1312
+ case 'gzip':
1313
+ decompressor = createGunzip();
1314
+ break;
1315
+ case 'deflate':
1316
+ decompressor = createInflate();
1317
+ break;
1318
+ case 'br':
1319
+ decompressor = createBrotliDecompress();
1320
+ break;
1321
+ default:
1322
+ // Should have been caught by parseSingleContentEncoding check, but safe fallback
1323
+ decompressor = null;
1324
+ }
1325
+ if (!decompressor) {
1326
+ // Should be unreachable if encoding valid
1327
+ throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
1328
+ }
1329
+ const sourceStream = Readable.fromWeb(createPumpedStream(initialChunk, reader));
1330
+ const decodedNodeStream = sourceStream.pipe(decompressor);
1331
+ const abortHandler = () => {
1332
+ sourceStream.destroy();
1333
+ decompressor.destroy();
1334
+ decodedNodeStream.destroy();
1335
+ };
1336
+ if (signal) {
1337
+ signal.addEventListener('abort', abortHandler, { once: true });
1338
+ }
1339
+ const decodedBody = Readable.toWeb(decodedNodeStream);
1340
+ const headers = new Headers(response.headers);
1341
+ headers.delete('content-encoding');
1342
+ headers.delete('content-length');
1343
+ if (signal) {
1344
+ decodedNodeStream.once('close', () => {
1345
+ signal.removeEventListener('abort', abortHandler);
1346
+ });
1347
+ decodedNodeStream.once('error', () => {
1348
+ signal.removeEventListener('abort', abortHandler);
1349
+ });
1350
+ }
1351
+ return new Response(decodedBody, {
1352
+ status: response.status,
1353
+ statusText: response.statusText,
1354
+ headers,
1355
+ });
1356
+ }
1357
+ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
841
1358
  const responseError = resolveResponseError(response, finalUrl);
842
1359
  if (responseError) {
843
1360
  cancelResponseBody(response);
844
1361
  throw responseError;
845
1362
  }
846
- const { text, size } = await responseReader.read(response, finalUrl, config.fetcher.maxContentLength, signal);
847
- telemetry.recordResponse(ctx, response, size);
848
- return text;
1363
+ const decodedResponse = await decodeResponseIfNeeded(response, finalUrl, signal);
1364
+ const contentType = decodedResponse.headers.get('content-type');
1365
+ assertSupportedContentType(contentType, finalUrl);
1366
+ const declaredEncoding = getCharsetFromContentType(contentType ?? null);
1367
+ if (mode === 'text') {
1368
+ const { text, size } = await reader.read(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
1369
+ telemetry.recordResponse(ctx, decodedResponse, size);
1370
+ return { kind: 'text', text, size };
1371
+ }
1372
+ const { buffer, encoding, size } = await reader.readBuffer(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
1373
+ telemetry.recordResponse(ctx, decodedResponse, size);
1374
+ return { kind: 'buffer', buffer, encoding, size };
1375
+ }
1376
+ function extractHostname(url) {
1377
+ if (!URL.canParse(url)) {
1378
+ throw createErrorWithCode('Invalid URL', 'EINVAL');
1379
+ }
1380
+ return new URL(url).hostname;
1381
+ }
1382
+ function createDnsPreflight(dnsResolver) {
1383
+ return async (url, signal) => {
1384
+ const hostname = extractHostname(url);
1385
+ await dnsResolver.assertSafeHostname(hostname, signal);
1386
+ };
849
1387
  }
850
1388
  class HttpFetcher {
1389
+ fetcherConfig;
1390
+ dnsResolver;
1391
+ redirectFollower;
1392
+ reader;
1393
+ telemetry;
1394
+ constructor(fetcherConfig, dnsResolver, redirectFollower, reader, telemetry) {
1395
+ this.fetcherConfig = fetcherConfig;
1396
+ this.dnsResolver = dnsResolver;
1397
+ this.redirectFollower = redirectFollower;
1398
+ this.reader = reader;
1399
+ this.telemetry = telemetry;
1400
+ }
851
1401
  async fetchNormalizedUrl(normalizedUrl, options) {
852
- const { hostname } = new URL(normalizedUrl);
853
- await assertSafeDnsLookup(hostname);
854
- const timeoutMs = config.fetcher.timeout;
1402
+ return this.fetchNormalized(normalizedUrl, 'text', options);
1403
+ }
1404
+ async fetchNormalizedUrlBuffer(normalizedUrl, options) {
1405
+ return this.fetchNormalized(normalizedUrl, 'buffer', options);
1406
+ }
1407
+ async fetchNormalized(normalizedUrl, mode, options) {
1408
+ const hostname = extractHostname(normalizedUrl);
1409
+ const timeoutMs = this.fetcherConfig.timeout;
855
1410
  const headers = buildHeaders();
856
1411
  const signal = buildRequestSignal(timeoutMs, options?.signal);
857
1412
  const init = buildRequestInit(headers, signal);
858
- const ctx = telemetry.start(normalizedUrl, 'GET');
1413
+ const ctx = this.telemetry.start(normalizedUrl, 'GET');
859
1414
  try {
860
- const { response, url: finalUrl } = await redirectFollower.fetchWithRedirects(normalizedUrl, init, config.fetcher.maxRedirects);
861
- ctx.url = finalUrl;
862
- return await handleFetchResponse(response, finalUrl, ctx, init.signal ?? undefined);
1415
+ await this.dnsResolver.assertSafeHostname(hostname, signal ?? undefined);
1416
+ const { response, url: finalUrl } = await this.redirectFollower.fetchWithRedirects(normalizedUrl, init, this.fetcherConfig.maxRedirects);
1417
+ ctx.url = this.telemetry.redact(finalUrl);
1418
+ const payload = await readAndRecordDecodedResponse(response, finalUrl, ctx, this.telemetry, this.reader, this.fetcherConfig.maxContentLength, mode, init.signal ?? undefined);
1419
+ if (payload.kind === 'text')
1420
+ return payload.text;
1421
+ return { buffer: payload.buffer, encoding: payload.encoding };
863
1422
  }
864
1423
  catch (error) {
865
1424
  const mapped = mapFetchError(error, normalizedUrl, timeoutMs);
866
- ctx.url = mapped.url;
867
- telemetry.recordError(ctx, mapped, mapped.statusCode);
1425
+ ctx.url = this.telemetry.redact(mapped.url);
1426
+ this.telemetry.recordError(ctx, mapped, mapped.statusCode);
868
1427
  throw mapped;
869
1428
  }
870
1429
  }
871
1430
  }
872
- const httpFetcher = new HttpFetcher();
873
- /** Backwards-compatible export */
1431
+ const ipBlocker = new IpBlocker(config.security);
1432
+ const urlNormalizer = new UrlNormalizer(config.constants, config.security, ipBlocker, BLOCKED_HOST_SUFFIXES);
1433
+ const rawUrlTransformer = new RawUrlTransformer(defaultLogger);
1434
+ const dnsResolver = new SafeDnsResolver(ipBlocker, config.security, BLOCKED_HOST_SUFFIXES);
1435
+ const telemetry = new FetchTelemetry(defaultLogger, defaultContext, defaultRedactor);
1436
+ const normalizeRedirectUrl = (url) => urlNormalizer.validateAndNormalize(url);
1437
+ const dnsPreflight = createDnsPreflight(dnsResolver);
1438
+ // Redirect follower with per-hop DNS preflight.
1439
+ const secureRedirectFollower = new RedirectFollower(defaultFetch, normalizeRedirectUrl, dnsPreflight);
1440
+ const responseReader = new ResponseTextReader();
1441
+ const httpFetcher = new HttpFetcher(config.fetcher, dnsResolver, secureRedirectFollower, responseReader, telemetry);
1442
+ export function isBlockedIp(ip) {
1443
+ return ipBlocker.isBlockedIp(ip);
1444
+ }
1445
+ export function normalizeUrl(urlString) {
1446
+ return urlNormalizer.normalize(urlString);
1447
+ }
1448
+ export function validateAndNormalizeUrl(urlString) {
1449
+ return urlNormalizer.validateAndNormalize(urlString);
1450
+ }
1451
+ export function transformToRawUrl(url) {
1452
+ return rawUrlTransformer.transformToRawUrl(url);
1453
+ }
1454
+ export function isRawTextContentUrl(url) {
1455
+ return rawUrlTransformer.isRawTextContentUrl(url);
1456
+ }
1457
+ export function startFetchTelemetry(url, method) {
1458
+ return telemetry.start(url, method);
1459
+ }
1460
+ export function recordFetchResponse(context, response, contentSize) {
1461
+ telemetry.recordResponse(context, response, contentSize);
1462
+ }
1463
+ export function recordFetchError(context, error, status) {
1464
+ telemetry.recordError(context, error, status);
1465
+ }
1466
+ export async function fetchWithRedirects(url, init, maxRedirects) {
1467
+ return secureRedirectFollower.fetchWithRedirects(url, init, maxRedirects);
1468
+ }
1469
+ export async function readResponseText(response, url, maxBytes, signal, encoding) {
1470
+ const decodedResponse = await decodeResponseIfNeeded(response, url, signal);
1471
+ return responseReader.read(decodedResponse, url, maxBytes, signal, encoding);
1472
+ }
1473
+ export async function readResponseBuffer(response, url, maxBytes, signal, encoding) {
1474
+ const decodedResponse = await decodeResponseIfNeeded(response, url, signal);
1475
+ return responseReader.readBuffer(decodedResponse, url, maxBytes, signal, encoding);
1476
+ }
874
1477
  export async function fetchNormalizedUrl(normalizedUrl, options) {
875
1478
  return httpFetcher.fetchNormalizedUrl(normalizedUrl, options);
876
1479
  }
1480
+ export async function fetchNormalizedUrlBuffer(normalizedUrl, options) {
1481
+ return httpFetcher.fetchNormalizedUrlBuffer(normalizedUrl, options);
1482
+ }