@j0hanz/superfetch 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +139 -46
  2. package/dist/cache.d.ts +42 -0
  3. package/dist/cache.js +565 -0
  4. package/dist/config/env-parsers.d.ts +1 -0
  5. package/dist/config/env-parsers.js +12 -0
  6. package/dist/config/index.d.ts +7 -0
  7. package/dist/config/index.js +20 -8
  8. package/dist/config/types/content.d.ts +1 -0
  9. package/dist/config.d.ts +77 -0
  10. package/dist/config.js +261 -0
  11. package/dist/crypto.d.ts +2 -0
  12. package/dist/crypto.js +32 -0
  13. package/dist/errors.d.ts +10 -0
  14. package/dist/errors.js +28 -0
  15. package/dist/fetch.d.ts +40 -0
  16. package/dist/fetch.js +910 -0
  17. package/dist/http/auth.js +161 -2
  18. package/dist/http/base-middleware.d.ts +7 -0
  19. package/dist/http/base-middleware.js +143 -0
  20. package/dist/http/cors.d.ts +0 -5
  21. package/dist/http/cors.js +0 -6
  22. package/dist/http/download-routes.js +6 -2
  23. package/dist/http/error-handler.d.ts +2 -0
  24. package/dist/http/error-handler.js +55 -0
  25. package/dist/http/host-allowlist.d.ts +3 -0
  26. package/dist/http/host-allowlist.js +117 -0
  27. package/dist/http/mcp-routes.d.ts +8 -2
  28. package/dist/http/mcp-routes.js +101 -8
  29. package/dist/http/mcp-session-eviction.d.ts +3 -0
  30. package/dist/http/mcp-session-eviction.js +24 -0
  31. package/dist/http/mcp-session-init.d.ts +7 -0
  32. package/dist/http/mcp-session-init.js +94 -0
  33. package/dist/http/mcp-session-slots.d.ts +17 -0
  34. package/dist/http/mcp-session-slots.js +55 -0
  35. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  36. package/dist/http/mcp-session-transport-init.js +41 -0
  37. package/dist/http/mcp-session-types.d.ts +5 -0
  38. package/dist/http/mcp-session-types.js +1 -0
  39. package/dist/http/mcp-session.d.ts +9 -9
  40. package/dist/http/mcp-session.js +5 -114
  41. package/dist/http/mcp-sessions.d.ts +41 -0
  42. package/dist/http/mcp-sessions.js +392 -0
  43. package/dist/http/rate-limit.js +2 -2
  44. package/dist/http/server-middleware.d.ts +6 -1
  45. package/dist/http/server-middleware.js +3 -117
  46. package/dist/http/server-shutdown.js +1 -1
  47. package/dist/http/server-tuning.d.ts +9 -0
  48. package/dist/http/server-tuning.js +45 -0
  49. package/dist/http/server.js +206 -9
  50. package/dist/http/session-cleanup.js +8 -5
  51. package/dist/http.d.ts +78 -0
  52. package/dist/http.js +1437 -0
  53. package/dist/index.js +3 -3
  54. package/dist/mcp.d.ts +3 -0
  55. package/dist/mcp.js +94 -0
  56. package/dist/middleware/error-handler.d.ts +1 -1
  57. package/dist/middleware/error-handler.js +31 -30
  58. package/dist/observability.d.ts +16 -0
  59. package/dist/observability.js +78 -0
  60. package/dist/resources/cached-content-params.d.ts +5 -0
  61. package/dist/resources/cached-content-params.js +36 -0
  62. package/dist/resources/cached-content.js +33 -33
  63. package/dist/server.js +21 -6
  64. package/dist/services/cache-events.d.ts +8 -0
  65. package/dist/services/cache-events.js +19 -0
  66. package/dist/services/cache.d.ts +5 -4
  67. package/dist/services/cache.js +49 -45
  68. package/dist/services/context.d.ts +2 -0
  69. package/dist/services/context.js +3 -0
  70. package/dist/services/extractor.d.ts +1 -0
  71. package/dist/services/extractor.js +77 -40
  72. package/dist/services/fetcher/agents.js +1 -1
  73. package/dist/services/fetcher/dns-selection.js +1 -1
  74. package/dist/services/fetcher/interceptors.js +29 -60
  75. package/dist/services/fetcher/redirects.js +12 -4
  76. package/dist/services/fetcher/response.js +18 -8
  77. package/dist/services/fetcher.d.ts +23 -0
  78. package/dist/services/fetcher.js +553 -13
  79. package/dist/services/logger.js +4 -1
  80. package/dist/services/telemetry.d.ts +19 -0
  81. package/dist/services/telemetry.js +43 -0
  82. package/dist/services/transform-worker-pool.d.ts +10 -3
  83. package/dist/services/transform-worker-pool.js +213 -184
  84. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -3
  85. package/dist/tools/handlers/fetch-single.shared.js +131 -2
  86. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -0
  87. package/dist/tools/handlers/fetch-url.tool.js +56 -12
  88. package/dist/tools/index.d.ts +1 -0
  89. package/dist/tools/index.js +13 -1
  90. package/dist/tools/schemas.d.ts +2 -0
  91. package/dist/tools/schemas.js +8 -0
  92. package/dist/tools/utils/content-shaping.js +19 -4
  93. package/dist/tools/utils/content-transform-core.d.ts +5 -0
  94. package/dist/tools/utils/content-transform-core.js +180 -0
  95. package/dist/tools/utils/content-transform-workers.d.ts +1 -0
  96. package/dist/tools/utils/content-transform-workers.js +1 -0
  97. package/dist/tools/utils/content-transform.d.ts +2 -1
  98. package/dist/tools/utils/content-transform.js +37 -136
  99. package/dist/tools/utils/fetch-pipeline.js +47 -56
  100. package/dist/tools/utils/frontmatter.d.ts +3 -0
  101. package/dist/tools/utils/frontmatter.js +73 -0
  102. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  103. package/dist/tools/utils/markdown-heuristics.js +19 -0
  104. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  105. package/dist/tools/utils/markdown-signals.js +19 -0
  106. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  107. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  108. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  109. package/dist/tools/utils/raw-markdown.js +149 -0
  110. package/dist/tools.d.ts +104 -0
  111. package/dist/tools.js +421 -0
  112. package/dist/transform.d.ts +69 -0
  113. package/dist/transform.js +1509 -0
  114. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  115. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  116. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  117. package/dist/transformers/markdown/frontmatter.js +45 -0
  118. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  119. package/dist/transformers/markdown/noise-rule.js +80 -0
  120. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  121. package/dist/transformers/markdown/turndown-instance.js +19 -0
  122. package/dist/transformers/markdown.d.ts +5 -0
  123. package/dist/transformers/markdown.js +314 -0
  124. package/dist/transformers/markdown.transformer.js +2 -189
  125. package/dist/utils/cancellation.d.ts +1 -0
  126. package/dist/utils/cancellation.js +18 -0
  127. package/dist/utils/code-language-bash.d.ts +1 -0
  128. package/dist/utils/code-language-bash.js +48 -0
  129. package/dist/utils/code-language-core.d.ts +2 -0
  130. package/dist/utils/code-language-core.js +13 -0
  131. package/dist/utils/code-language-detectors.d.ts +5 -0
  132. package/dist/utils/code-language-detectors.js +142 -0
  133. package/dist/utils/code-language-helpers.d.ts +5 -0
  134. package/dist/utils/code-language-helpers.js +62 -0
  135. package/dist/utils/code-language-parsing.d.ts +5 -0
  136. package/dist/utils/code-language-parsing.js +62 -0
  137. package/dist/utils/code-language.js +250 -46
  138. package/dist/utils/error-details.d.ts +3 -0
  139. package/dist/utils/error-details.js +12 -0
  140. package/dist/utils/filename-generator.js +14 -3
  141. package/dist/utils/host-normalizer.d.ts +1 -0
  142. package/dist/utils/host-normalizer.js +37 -0
  143. package/dist/utils/ip-address.d.ts +4 -0
  144. package/dist/utils/ip-address.js +6 -0
  145. package/dist/utils/tool-error-handler.js +12 -17
  146. package/dist/utils/url-redactor.d.ts +1 -0
  147. package/dist/utils/url-redactor.js +13 -0
  148. package/dist/utils/url-validator.js +35 -20
  149. package/dist/workers/transform-worker.js +82 -38
  150. package/package.json +13 -10
@@ -1,9 +1,538 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import diagnosticsChannel from 'node:diagnostics_channel';
3
+ import dns from 'node:dns';
4
+ import os from 'node:os';
5
+ import { performance } from 'node:perf_hooks';
6
+ import { Agent } from 'undici';
1
7
  import { config } from '../config/index.js';
2
- import { dispatcher } from './fetcher/agents.js';
3
- import { createHttpError, createRateLimitError, mapFetchError, } from './fetcher/errors.js';
4
- import { recordFetchError, recordFetchResponse, startFetchTelemetry, } from './fetcher/interceptors.js';
5
- import { fetchWithRedirects } from './fetcher/redirects.js';
6
- import { readResponseText } from './fetcher/response.js';
8
+ import { FetchError } from '../errors/app-error.js';
9
+ import { createErrorWithCode, isSystemError } from '../utils/error-details.js';
10
+ import { isRecord } from '../utils/guards.js';
11
+ import { redactUrl } from '../utils/url-redactor.js';
12
+ import { isBlockedIp, validateAndNormalizeUrl, } from '../utils/url-validator.js';
13
+ import { getOperationId, getRequestId } from './context.js';
14
+ import { logDebug, logError, logWarn } from './logger.js';
15
+ const DNS_LOOKUP_TIMEOUT_MS = 5000;
16
+ function normalizeLookupResults(addresses, family) {
17
+ if (Array.isArray(addresses)) {
18
+ return addresses;
19
+ }
20
+ return [{ address: addresses, family: family ?? 4 }];
21
+ }
22
+ function findBlockedIpError(list, hostname) {
23
+ for (const addr of list) {
24
+ const ip = typeof addr === 'string' ? addr : addr.address;
25
+ if (!isBlockedIp(ip)) {
26
+ continue;
27
+ }
28
+ return createErrorWithCode(`Blocked IP detected for ${hostname}`, 'EBLOCKED');
29
+ }
30
+ return null;
31
+ }
32
+ function findInvalidFamilyError(list, hostname) {
33
+ for (const addr of list) {
34
+ const family = typeof addr === 'string' ? 0 : addr.family;
35
+ if (family === 4 || family === 6)
36
+ continue;
37
+ return createErrorWithCode(`Invalid address family returned for ${hostname}`, 'EINVAL');
38
+ }
39
+ return null;
40
+ }
41
+ function createNoDnsResultsError(hostname) {
42
+ return createErrorWithCode(`No DNS results returned for ${hostname}`, 'ENODATA');
43
+ }
44
+ function createEmptySelection(hostname) {
45
+ return {
46
+ error: createNoDnsResultsError(hostname),
47
+ fallback: [],
48
+ address: [],
49
+ };
50
+ }
51
+ function selectLookupResult(list, useAll, hostname) {
52
+ if (list.length === 0)
53
+ return createEmptySelection(hostname);
54
+ if (useAll)
55
+ return { address: list, fallback: list };
56
+ const first = list.at(0);
57
+ if (!first)
58
+ return createEmptySelection(hostname);
59
+ return {
60
+ address: first.address,
61
+ family: first.family,
62
+ fallback: list,
63
+ };
64
+ }
65
+ function findLookupError(list, hostname) {
66
+ return (findInvalidFamilyError(list, hostname) ?? findBlockedIpError(list, hostname));
67
+ }
68
+ function handleLookupResult(error, addresses, hostname, resolvedFamily, useAll, callback) {
69
+ if (error) {
70
+ callback(error, addresses);
71
+ return;
72
+ }
73
+ const list = normalizeLookupResults(addresses, resolvedFamily);
74
+ const lookupError = findLookupError(list, hostname);
75
+ if (lookupError) {
76
+ callback(lookupError, list);
77
+ return;
78
+ }
79
+ const selection = selectLookupResult(list, useAll, hostname);
80
+ if (selection.error) {
81
+ callback(selection.error, selection.fallback);
82
+ return;
83
+ }
84
+ callback(null, selection.address, selection.family);
85
+ }
86
+ function resolveDns(hostname, options, callback) {
87
+ const { normalizedOptions, useAll, resolvedFamily } = buildLookupContext(options);
88
+ const lookupOptions = buildLookupOptions(normalizedOptions);
89
+ const timeout = createLookupTimeout(hostname, callback);
90
+ const safeCallback = wrapLookupCallback(callback, timeout);
91
+ dns.lookup(hostname, lookupOptions, createLookupCallback(hostname, resolvedFamily, useAll, safeCallback));
92
+ }
93
+ function normalizeLookupOptions(options) {
94
+ return typeof options === 'number' ? { family: options } : options;
95
+ }
96
+ function buildLookupContext(options) {
97
+ const normalizedOptions = normalizeLookupOptions(options);
98
+ return {
99
+ normalizedOptions,
100
+ useAll: Boolean(normalizedOptions.all),
101
+ resolvedFamily: resolveFamily(normalizedOptions.family),
102
+ };
103
+ }
104
+ const DEFAULT_DNS_ORDER = 'verbatim';
105
+ function resolveResultOrder(options) {
106
+ if (options.order)
107
+ return options.order;
108
+ const legacyVerbatim = getLegacyVerbatim(options);
109
+ if (legacyVerbatim !== undefined) {
110
+ return legacyVerbatim ? 'verbatim' : 'ipv4first';
111
+ }
112
+ return DEFAULT_DNS_ORDER;
113
+ }
114
+ function getLegacyVerbatim(options) {
115
+ if (isRecord(options)) {
116
+ const { verbatim } = options;
117
+ return typeof verbatim === 'boolean' ? verbatim : undefined;
118
+ }
119
+ return undefined;
120
+ }
121
+ function buildLookupOptions(normalizedOptions) {
122
+ return {
123
+ family: normalizedOptions.family,
124
+ hints: normalizedOptions.hints,
125
+ all: true,
126
+ order: resolveResultOrder(normalizedOptions),
127
+ };
128
+ }
129
+ function createLookupCallback(hostname, resolvedFamily, useAll, callback) {
130
+ return (err, addresses) => {
131
+ handleLookupResult(err, addresses, hostname, resolvedFamily, useAll, callback);
132
+ };
133
+ }
134
+ function resolveFamily(family) {
135
+ if (family === 'IPv4')
136
+ return 4;
137
+ if (family === 'IPv6')
138
+ return 6;
139
+ return family;
140
+ }
141
+ function createLookupTimeout(hostname, callback) {
142
+ let done = false;
143
+ const timer = setTimeout(() => {
144
+ if (done)
145
+ return;
146
+ done = true;
147
+ callback(createErrorWithCode(`DNS lookup timed out for ${hostname}`, 'ETIMEOUT'), []);
148
+ }, DNS_LOOKUP_TIMEOUT_MS);
149
+ timer.unref();
150
+ return {
151
+ isDone: () => done,
152
+ markDone: () => {
153
+ done = true;
154
+ clearTimeout(timer);
155
+ },
156
+ };
157
+ }
158
+ function wrapLookupCallback(callback, timeout) {
159
+ return (err, address, family) => {
160
+ if (timeout.isDone())
161
+ return;
162
+ timeout.markDone();
163
+ callback(err, address, family);
164
+ };
165
+ }
166
+ function getAgentOptions() {
167
+ const cpuCount = os.availableParallelism();
168
+ return {
169
+ keepAliveTimeout: 60000,
170
+ connections: Math.max(cpuCount * 2, 25),
171
+ pipelining: 1,
172
+ connect: { lookup: resolveDns },
173
+ };
174
+ }
175
+ export const dispatcher = new Agent(getAgentOptions());
176
+ export function destroyAgents() {
177
+ void dispatcher.close();
178
+ }
179
+ function parseRetryAfter(header) {
180
+ if (!header)
181
+ return 60;
182
+ const parsed = parseInt(header, 10);
183
+ return Number.isNaN(parsed) ? 60 : parsed;
184
+ }
185
+ function createCanceledError(url) {
186
+ return new FetchError('Request was canceled', url, 499, {
187
+ reason: 'aborted',
188
+ });
189
+ }
190
+ function createTimeoutError(url, timeoutMs) {
191
+ return new FetchError(`Request timeout after ${timeoutMs}ms`, url, 504, {
192
+ timeout: timeoutMs,
193
+ });
194
+ }
195
+ function createRateLimitError(url, headerValue) {
196
+ const retryAfter = parseRetryAfter(headerValue);
197
+ return new FetchError('Too many requests', url, 429, { retryAfter });
198
+ }
199
+ function createHttpError(url, status, statusText) {
200
+ return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
201
+ }
202
+ function createNetworkError(url, message) {
203
+ const details = message ? { message } : undefined;
204
+ return new FetchError(`Network error: Could not reach ${url}`, url, undefined, details ?? {});
205
+ }
206
+ function createUnknownError(url, message) {
207
+ return new FetchError(message, url);
208
+ }
209
+ function isAbortError(error) {
210
+ return (error instanceof Error &&
211
+ (error.name === 'AbortError' || error.name === 'TimeoutError'));
212
+ }
213
+ function isTimeoutError(error) {
214
+ return error instanceof Error && error.name === 'TimeoutError';
215
+ }
216
+ function getRequestUrl(record) {
217
+ const value = record.requestUrl;
218
+ return typeof value === 'string' ? value : null;
219
+ }
220
+ function resolveErrorUrl(error, fallback) {
221
+ if (error instanceof FetchError)
222
+ return error.url;
223
+ if (!isRecord(error))
224
+ return fallback;
225
+ const requestUrl = getRequestUrl(error);
226
+ if (requestUrl)
227
+ return requestUrl;
228
+ return fallback;
229
+ }
230
+ function mapFetchError(error, fallbackUrl, timeoutMs) {
231
+ if (error instanceof FetchError)
232
+ return error;
233
+ const url = resolveErrorUrl(error, fallbackUrl);
234
+ if (isAbortError(error)) {
235
+ if (isTimeoutError(error)) {
236
+ return createTimeoutError(url, timeoutMs);
237
+ }
238
+ return createCanceledError(url);
239
+ }
240
+ if (error instanceof Error) {
241
+ return createNetworkError(url, error.message);
242
+ }
243
+ return createUnknownError(url, 'Unexpected error');
244
+ }
245
+ const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
246
+ function publishFetchEvent(event) {
247
+ if (!fetchChannel.hasSubscribers)
248
+ return;
249
+ try {
250
+ fetchChannel.publish(event);
251
+ }
252
+ catch {
253
+ // Avoid crashing the publisher if a subscriber throws.
254
+ }
255
+ }
256
+ export function startFetchTelemetry(url, method) {
257
+ const safeUrl = redactUrl(url);
258
+ const contextRequestId = getRequestId();
259
+ const operationId = getOperationId();
260
+ const context = {
261
+ requestId: randomUUID(),
262
+ startTime: performance.now(),
263
+ url: safeUrl,
264
+ method: method.toUpperCase(),
265
+ ...(contextRequestId ? { contextRequestId } : {}),
266
+ ...(operationId ? { operationId } : {}),
267
+ };
268
+ publishFetchEvent({
269
+ v: 1,
270
+ type: 'start',
271
+ requestId: context.requestId,
272
+ method: context.method,
273
+ url: context.url,
274
+ ...(context.contextRequestId
275
+ ? { contextRequestId: context.contextRequestId }
276
+ : {}),
277
+ ...(context.operationId ? { operationId: context.operationId } : {}),
278
+ });
279
+ logDebug('HTTP Request', {
280
+ requestId: context.requestId,
281
+ method: context.method,
282
+ url: context.url,
283
+ ...(context.contextRequestId
284
+ ? { contextRequestId: context.contextRequestId }
285
+ : {}),
286
+ ...(context.operationId ? { operationId: context.operationId } : {}),
287
+ });
288
+ return context;
289
+ }
290
+ export function recordFetchResponse(context, response, contentSize) {
291
+ const duration = performance.now() - context.startTime;
292
+ const durationLabel = `${Math.round(duration)}ms`;
293
+ publishFetchEvent({
294
+ v: 1,
295
+ type: 'end',
296
+ requestId: context.requestId,
297
+ status: response.status,
298
+ duration,
299
+ ...(context.contextRequestId
300
+ ? { contextRequestId: context.contextRequestId }
301
+ : {}),
302
+ ...(context.operationId ? { operationId: context.operationId } : {}),
303
+ });
304
+ const contentType = response.headers.get('content-type');
305
+ const contentLength = response.headers.get('content-length') ??
306
+ (contentSize === undefined ? undefined : String(contentSize));
307
+ logDebug('HTTP Response', {
308
+ requestId: context.requestId,
309
+ status: response.status,
310
+ url: context.url,
311
+ duration: durationLabel,
312
+ ...(context.contextRequestId
313
+ ? { contextRequestId: context.contextRequestId }
314
+ : {}),
315
+ ...(context.operationId ? { operationId: context.operationId } : {}),
316
+ ...(contentType ? { contentType } : {}),
317
+ ...(contentLength ? { size: contentLength } : {}),
318
+ });
319
+ if (duration > 5000) {
320
+ logWarn('Slow HTTP request detected', {
321
+ requestId: context.requestId,
322
+ url: context.url,
323
+ duration: durationLabel,
324
+ ...(context.contextRequestId
325
+ ? { contextRequestId: context.contextRequestId }
326
+ : {}),
327
+ ...(context.operationId ? { operationId: context.operationId } : {}),
328
+ });
329
+ }
330
+ }
331
+ export function recordFetchError(context, error, status) {
332
+ const duration = performance.now() - context.startTime;
333
+ const err = error instanceof Error ? error : new Error(String(error));
334
+ const event = {
335
+ v: 1,
336
+ type: 'error',
337
+ requestId: context.requestId,
338
+ url: context.url,
339
+ error: err.message,
340
+ duration,
341
+ ...(context.contextRequestId
342
+ ? { contextRequestId: context.contextRequestId }
343
+ : {}),
344
+ ...(context.operationId ? { operationId: context.operationId } : {}),
345
+ };
346
+ const code = isSystemError(err) ? err.code : undefined;
347
+ if (code !== undefined) {
348
+ event.code = code;
349
+ }
350
+ if (status !== undefined) {
351
+ event.status = status;
352
+ }
353
+ publishFetchEvent(event);
354
+ const log = status === 429 ? logWarn : logError;
355
+ log('HTTP Request Error', {
356
+ requestId: context.requestId,
357
+ url: context.url,
358
+ status,
359
+ code,
360
+ error: err.message,
361
+ ...(context.contextRequestId
362
+ ? { contextRequestId: context.contextRequestId }
363
+ : {}),
364
+ ...(context.operationId ? { operationId: context.operationId } : {}),
365
+ });
366
+ }
367
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
368
+ function isRedirectStatus(status) {
369
+ return REDIRECT_STATUSES.has(status);
370
+ }
371
+ function cancelResponseBody(response) {
372
+ const cancelPromise = response.body?.cancel();
373
+ if (cancelPromise) {
374
+ cancelPromise.catch(() => {
375
+ // Best-effort cancellation; ignore failures.
376
+ });
377
+ }
378
+ }
379
+ async function performFetchCycle(currentUrl, init, redirectLimit, redirectCount) {
380
+ const response = await fetch(currentUrl, { ...init, redirect: 'manual' });
381
+ if (!isRedirectStatus(response.status)) {
382
+ return { response };
383
+ }
384
+ assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount);
385
+ const location = getRedirectLocation(response, currentUrl);
386
+ cancelResponseBody(response);
387
+ return {
388
+ response,
389
+ nextUrl: resolveRedirectTarget(currentUrl, location),
390
+ };
391
+ }
392
+ function assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount) {
393
+ if (redirectCount < redirectLimit)
394
+ return;
395
+ cancelResponseBody(response);
396
+ throw new FetchError('Too many redirects', currentUrl);
397
+ }
398
+ function getRedirectLocation(response, currentUrl) {
399
+ const location = response.headers.get('location');
400
+ if (location)
401
+ return location;
402
+ cancelResponseBody(response);
403
+ throw new FetchError('Redirect response missing Location header', currentUrl);
404
+ }
405
+ function annotateRedirectError(error, url) {
406
+ if (!isRecord(error))
407
+ return;
408
+ error.requestUrl = url;
409
+ }
410
+ function resolveRedirectTarget(baseUrl, location) {
411
+ if (!URL.canParse(location, baseUrl)) {
412
+ throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
413
+ }
414
+ const resolved = new URL(location, baseUrl);
415
+ if (resolved.username || resolved.password) {
416
+ throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
417
+ }
418
+ return validateAndNormalizeUrl(resolved.href);
419
+ }
420
+ export async function fetchWithRedirects(url, init, maxRedirects) {
421
+ let currentUrl = url;
422
+ const redirectLimit = Math.max(0, maxRedirects);
423
+ for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
424
+ const { response, nextUrl } = await performFetchCycleSafely(currentUrl, init, redirectLimit, redirectCount);
425
+ if (!nextUrl) {
426
+ return { response, url: currentUrl };
427
+ }
428
+ currentUrl = nextUrl;
429
+ }
430
+ throw new FetchError('Too many redirects', currentUrl);
431
+ }
432
+ async function performFetchCycleSafely(currentUrl, init, redirectLimit, redirectCount) {
433
+ try {
434
+ return await performFetchCycle(currentUrl, init, redirectLimit, redirectCount);
435
+ }
436
+ catch (error) {
437
+ annotateRedirectError(error, currentUrl);
438
+ throw error;
439
+ }
440
+ }
441
+ function assertContentLengthWithinLimit(response, url, maxBytes) {
442
+ const contentLengthHeader = response.headers.get('content-length');
443
+ if (!contentLengthHeader)
444
+ return;
445
+ const contentLength = Number.parseInt(contentLengthHeader, 10);
446
+ if (Number.isNaN(contentLength) || contentLength <= maxBytes) {
447
+ return;
448
+ }
449
+ cancelResponseBody(response);
450
+ throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
451
+ }
452
+ function createReadState() {
453
+ return {
454
+ decoder: new TextDecoder(),
455
+ parts: [],
456
+ total: 0,
457
+ };
458
+ }
459
+ function appendChunk(state, chunk, maxBytes, url) {
460
+ state.total += chunk.byteLength;
461
+ if (state.total > maxBytes) {
462
+ throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
463
+ }
464
+ const decoded = state.decoder.decode(chunk, { stream: true });
465
+ if (decoded)
466
+ state.parts.push(decoded);
467
+ }
468
+ function finalizeRead(state) {
469
+ const decoded = state.decoder.decode();
470
+ if (decoded)
471
+ state.parts.push(decoded);
472
+ }
473
+ function createAbortError(url) {
474
+ return new FetchError('Request was aborted during response read', url, 499, {
475
+ reason: 'aborted',
476
+ });
477
+ }
478
+ async function cancelReaderQuietly(reader) {
479
+ try {
480
+ await reader.cancel();
481
+ }
482
+ catch {
483
+ // Ignore cancel errors; we're already failing this read.
484
+ }
485
+ }
486
+ async function throwIfAborted(signal, url, reader) {
487
+ if (!signal?.aborted)
488
+ return;
489
+ await cancelReaderQuietly(reader);
490
+ throw createAbortError(url);
491
+ }
492
+ async function handleReadFailure(error, signal, url, reader) {
493
+ const aborted = signal?.aborted ?? false;
494
+ await cancelReaderQuietly(reader);
495
+ if (aborted) {
496
+ throw createAbortError(url);
497
+ }
498
+ throw error;
499
+ }
500
+ async function readAllChunks(reader, state, url, maxBytes, signal) {
501
+ await throwIfAborted(signal, url, reader);
502
+ let result = await reader.read();
503
+ while (!result.done) {
504
+ appendChunk(state, result.value, maxBytes, url);
505
+ await throwIfAborted(signal, url, reader);
506
+ result = await reader.read();
507
+ }
508
+ }
509
+ async function readStreamWithLimit(stream, url, maxBytes, signal) {
510
+ const state = createReadState();
511
+ const reader = stream.getReader();
512
+ try {
513
+ await readAllChunks(reader, state, url, maxBytes, signal);
514
+ }
515
+ catch (error) {
516
+ await handleReadFailure(error, signal, url, reader);
517
+ }
518
+ finally {
519
+ reader.releaseLock();
520
+ }
521
+ finalizeRead(state);
522
+ return { text: state.parts.join(''), size: state.total };
523
+ }
524
+ export async function readResponseText(response, url, maxBytes, signal) {
525
+ assertContentLengthWithinLimit(response, url, maxBytes);
526
+ if (!response.body) {
527
+ const text = await response.text();
528
+ const size = Buffer.byteLength(text);
529
+ if (size > maxBytes) {
530
+ throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
531
+ }
532
+ return { text, size };
533
+ }
534
+ return readStreamWithLimit(response.body, url, maxBytes, signal);
535
+ }
7
536
  const DEFAULT_HEADERS = {
8
537
  'User-Agent': config.fetcher.userAgent,
9
538
  Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -12,7 +541,7 @@ const DEFAULT_HEADERS = {
12
541
  Connection: 'keep-alive',
13
542
  };
14
543
  function buildHeaders() {
15
- return new Headers(DEFAULT_HEADERS);
544
+ return { ...DEFAULT_HEADERS };
16
545
  }
17
546
  function buildRequestSignal(timeoutMs, external) {
18
547
  const timeoutSignal = AbortSignal.timeout(timeoutMs);
@@ -28,14 +557,25 @@ function buildRequestInit(headers, signal) {
28
557
  dispatcher,
29
558
  };
30
559
  }
560
+ function resolveResponseError(response, finalUrl) {
561
+ return (resolveRateLimitError(response, finalUrl) ??
562
+ resolveHttpError(response, finalUrl));
563
+ }
564
+ function resolveRateLimitError(response, finalUrl) {
565
+ return response.status === 429
566
+ ? createRateLimitError(finalUrl, response.headers.get('retry-after'))
567
+ : null;
568
+ }
569
+ function resolveHttpError(response, finalUrl) {
570
+ return response.ok
571
+ ? null
572
+ : createHttpError(finalUrl, response.status, response.statusText);
573
+ }
31
574
  async function handleFetchResponse(response, finalUrl, telemetry, signal) {
32
- if (response.status === 429) {
33
- void response.body?.cancel();
34
- throw createRateLimitError(finalUrl, response.headers.get('retry-after'));
35
- }
36
- if (!response.ok) {
37
- void response.body?.cancel();
38
- throw createHttpError(finalUrl, response.status, response.statusText);
575
+ const responseError = resolveResponseError(response, finalUrl);
576
+ if (responseError) {
577
+ cancelResponseBody(response);
578
+ throw responseError;
39
579
  }
40
580
  const { text, size } = await readResponseText(response, finalUrl, config.fetcher.maxContentLength, signal);
41
581
  recordFetchResponse(telemetry, response, size);
@@ -1,13 +1,16 @@
1
1
  import { config } from '../config/index.js';
2
- import { getRequestId, getSessionId } from './context.js';
2
+ import { getOperationId, getRequestId, getSessionId } from './context.js';
3
3
  function formatMetadata(meta) {
4
4
  const requestId = getRequestId();
5
5
  const sessionId = getSessionId();
6
+ const operationId = getOperationId();
6
7
  const contextMeta = {};
7
8
  if (requestId)
8
9
  contextMeta.requestId = requestId;
9
10
  if (sessionId)
10
11
  contextMeta.sessionId = sessionId;
12
+ if (operationId)
13
+ contextMeta.operationId = operationId;
11
14
  const merged = { ...contextMeta, ...meta };
12
15
  return Object.keys(merged).length > 0 ? ` ${JSON.stringify(merged)}` : '';
13
16
  }
@@ -0,0 +1,19 @@
1
+ export interface TransformStageEvent {
2
+ v: 1;
3
+ type: 'stage';
4
+ stage: string;
5
+ durationMs: number;
6
+ url: string;
7
+ requestId?: string;
8
+ operationId?: string;
9
+ truncated?: boolean;
10
+ }
11
+ export interface TransformStageContext {
12
+ readonly stage: string;
13
+ readonly startTime: number;
14
+ readonly url: string;
15
+ }
16
+ export declare function startTransformStage(url: string, stage: string): TransformStageContext | null;
17
+ export declare function endTransformStage(context: TransformStageContext | null, options?: {
18
+ truncated?: boolean;
19
+ }): void;
@@ -0,0 +1,43 @@
1
+ import diagnosticsChannel from 'node:diagnostics_channel';
2
+ import { performance } from 'node:perf_hooks';
3
+ import { redactUrl } from '../utils/url-redactor.js';
4
+ import { getOperationId, getRequestId } from './context.js';
5
+ const transformChannel = diagnosticsChannel.channel('superfetch.transform');
6
+ function publishTransformEvent(event) {
7
+ if (!transformChannel.hasSubscribers)
8
+ return;
9
+ try {
10
+ transformChannel.publish(event);
11
+ }
12
+ catch {
13
+ // Avoid crashing the publisher if a subscriber throws.
14
+ }
15
+ }
16
+ export function startTransformStage(url, stage) {
17
+ if (!transformChannel.hasSubscribers)
18
+ return null;
19
+ return {
20
+ stage,
21
+ startTime: performance.now(),
22
+ url: redactUrl(url),
23
+ };
24
+ }
25
+ export function endTransformStage(context, options) {
26
+ if (!context)
27
+ return;
28
+ const requestId = getRequestId();
29
+ const operationId = getOperationId();
30
+ const event = {
31
+ v: 1,
32
+ type: 'stage',
33
+ stage: context.stage,
34
+ durationMs: performance.now() - context.startTime,
35
+ url: context.url,
36
+ ...(requestId ? { requestId } : {}),
37
+ ...(operationId ? { operationId } : {}),
38
+ ...(options?.truncated !== undefined
39
+ ? { truncated: options.truncated }
40
+ : {}),
41
+ };
42
+ publishTransformEvent(event);
43
+ }
@@ -1,4 +1,11 @@
1
1
  import type { MarkdownTransformResult } from '../config/types/content.js';
2
- import type { WorkerTransformRequest } from './transform-worker-types.js';
3
- export declare function transformInWorker(request: Omit<WorkerTransformRequest, 'id'>, signal?: AbortSignal): Promise<MarkdownTransformResult>;
4
- export declare function destroyTransformWorkers(): Promise<void>;
2
+ interface TransformWorkerPool {
3
+ transform(html: string, url: string, options: {
4
+ includeMetadata: boolean;
5
+ signal?: AbortSignal;
6
+ }): Promise<MarkdownTransformResult>;
7
+ close(): Promise<void>;
8
+ }
9
+ export declare function getOrCreateTransformWorkerPool(): TransformWorkerPool;
10
+ export declare function shutdownTransformWorkerPool(): Promise<void>;
11
+ export {};