@j0hanz/superfetch 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +120 -38
  2. package/dist/cache.d.ts +42 -0
  3. package/dist/cache.js +565 -0
  4. package/dist/config/env-parsers.d.ts +1 -0
  5. package/dist/config/env-parsers.js +12 -0
  6. package/dist/config/index.d.ts +7 -0
  7. package/dist/config/index.js +10 -3
  8. package/dist/config/types/content.d.ts +1 -0
  9. package/dist/config.d.ts +77 -0
  10. package/dist/config.js +261 -0
  11. package/dist/crypto.d.ts +2 -0
  12. package/dist/crypto.js +32 -0
  13. package/dist/errors.d.ts +10 -0
  14. package/dist/errors.js +28 -0
  15. package/dist/fetch.d.ts +40 -0
  16. package/dist/fetch.js +910 -0
  17. package/dist/http/base-middleware.d.ts +7 -0
  18. package/dist/http/base-middleware.js +143 -0
  19. package/dist/http/cors.d.ts +0 -5
  20. package/dist/http/cors.js +0 -6
  21. package/dist/http/download-routes.js +6 -2
  22. package/dist/http/error-handler.d.ts +2 -0
  23. package/dist/http/error-handler.js +55 -0
  24. package/dist/http/mcp-routes.js +2 -2
  25. package/dist/http/mcp-sessions.d.ts +3 -5
  26. package/dist/http/mcp-sessions.js +8 -8
  27. package/dist/http/server-tuning.d.ts +9 -0
  28. package/dist/http/server-tuning.js +45 -0
  29. package/dist/http/server.d.ts +0 -10
  30. package/dist/http/server.js +33 -333
  31. package/dist/http.d.ts +78 -0
  32. package/dist/http.js +1437 -0
  33. package/dist/index.js +3 -3
  34. package/dist/mcp.d.ts +3 -0
  35. package/dist/mcp.js +94 -0
  36. package/dist/observability.d.ts +16 -0
  37. package/dist/observability.js +78 -0
  38. package/dist/server.js +20 -5
  39. package/dist/services/cache.d.ts +1 -1
  40. package/dist/services/context.d.ts +2 -0
  41. package/dist/services/context.js +3 -0
  42. package/dist/services/extractor.d.ts +1 -0
  43. package/dist/services/extractor.js +28 -2
  44. package/dist/services/fetcher.d.ts +2 -0
  45. package/dist/services/fetcher.js +35 -14
  46. package/dist/services/logger.js +4 -1
  47. package/dist/services/telemetry.d.ts +19 -0
  48. package/dist/services/telemetry.js +43 -0
  49. package/dist/services/transform-worker-pool.d.ts +10 -3
  50. package/dist/services/transform-worker-pool.js +213 -184
  51. package/dist/tools/handlers/fetch-url.tool.js +8 -6
  52. package/dist/tools/index.d.ts +1 -0
  53. package/dist/tools/index.js +13 -1
  54. package/dist/tools/schemas.d.ts +2 -0
  55. package/dist/tools/schemas.js +8 -0
  56. package/dist/tools/utils/content-transform-core.d.ts +5 -0
  57. package/dist/tools/utils/content-transform-core.js +180 -0
  58. package/dist/tools/utils/content-transform-workers.d.ts +1 -0
  59. package/dist/tools/utils/content-transform-workers.js +1 -0
  60. package/dist/tools/utils/content-transform.d.ts +3 -5
  61. package/dist/tools/utils/content-transform.js +35 -148
  62. package/dist/tools/utils/raw-markdown.js +15 -1
  63. package/dist/tools.d.ts +104 -0
  64. package/dist/tools.js +421 -0
  65. package/dist/transform.d.ts +69 -0
  66. package/dist/transform.js +1509 -0
  67. package/dist/transformers/markdown.d.ts +4 -1
  68. package/dist/transformers/markdown.js +182 -53
  69. package/dist/utils/cancellation.d.ts +1 -0
  70. package/dist/utils/cancellation.js +18 -0
  71. package/dist/utils/code-language.d.ts +0 -9
  72. package/dist/utils/code-language.js +5 -5
  73. package/dist/utils/host-normalizer.d.ts +1 -0
  74. package/dist/utils/host-normalizer.js +37 -0
  75. package/dist/utils/url-redactor.d.ts +1 -0
  76. package/dist/utils/url-redactor.js +13 -0
  77. package/dist/utils/url-validator.js +8 -5
  78. package/dist/workers/transform-worker.js +82 -38
  79. package/package.json +8 -7
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  import { parseArgs } from 'node:util';
3
- import { logError } from './services/logger.js';
4
- import { startHttpServer } from './http/server.js';
5
- import { startStdioServer } from './server.js';
3
+ import { startHttpServer } from './http.js';
4
+ import { startStdioServer } from './mcp.js';
5
+ import { logError } from './observability.js';
6
6
  const { values } = parseArgs({
7
7
  options: {
8
8
  stdio: { type: 'boolean', default: false },
package/dist/mcp.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ export declare function createMcpServer(): McpServer;
3
+ export declare function startStdioServer(): Promise<void>;
package/dist/mcp.js ADDED
@@ -0,0 +1,94 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
3
+ import { registerCachedContentResource } from './cache.js';
4
+ import { config } from './config.js';
5
+ import { destroyAgents } from './fetch.js';
6
+ import { logError, logInfo } from './observability.js';
7
+ import { registerTools } from './tools.js';
8
+ import { shutdownTransformWorkerPool } from './transform.js';
9
+ function createServerInfo() {
10
+ return {
11
+ name: config.server.name,
12
+ version: config.server.version,
13
+ };
14
+ }
15
+ function createServerCapabilities() {
16
+ return {
17
+ tools: { listChanged: false },
18
+ resources: { listChanged: true, subscribe: true },
19
+ logging: {},
20
+ };
21
+ }
22
+ function createServerInstructions(serverVersion) {
23
+ return `superFetch MCP server |${serverVersion}| A high-performance web content fetching and processing server.`;
24
+ }
25
+ export function createMcpServer() {
26
+ const server = new McpServer(createServerInfo(), {
27
+ capabilities: createServerCapabilities(),
28
+ instructions: createServerInstructions(config.server.version),
29
+ });
30
+ registerTools(server);
31
+ registerCachedContentResource(server);
32
+ return server;
33
+ }
34
+ function attachServerErrorHandler(server) {
35
+ server.server.onerror = (error) => {
36
+ logError('[MCP Error]', error instanceof Error ? error : { error });
37
+ };
38
+ }
39
+ function handleShutdownSignal(server, signal) {
40
+ process.stderr.write(`\n${signal} received, shutting down superFetch MCP server...\n`);
41
+ Promise.resolve()
42
+ .then(async () => {
43
+ destroyAgents();
44
+ await shutdownTransformWorkerPool();
45
+ await server.close();
46
+ })
47
+ .catch((err) => {
48
+ logError('Error during shutdown', err instanceof Error ? err : undefined);
49
+ })
50
+ .finally(() => {
51
+ process.exit(0);
52
+ });
53
+ }
54
+ function createShutdownHandler(server) {
55
+ let shuttingDown = false;
56
+ let initialSignal = null;
57
+ return (signal) => {
58
+ if (shuttingDown) {
59
+ logInfo('Shutdown already in progress; ignoring signal', {
60
+ signal,
61
+ initialSignal,
62
+ });
63
+ return;
64
+ }
65
+ shuttingDown = true;
66
+ initialSignal = signal;
67
+ handleShutdownSignal(server, signal);
68
+ };
69
+ }
70
+ function registerSignalHandlers(handler) {
71
+ process.once('SIGINT', () => {
72
+ handler('SIGINT');
73
+ });
74
+ process.once('SIGTERM', () => {
75
+ handler('SIGTERM');
76
+ });
77
+ }
78
+ async function connectStdioServer(server, transport) {
79
+ try {
80
+ await server.connect(transport);
81
+ logInfo('superFetch MCP server running on stdio');
82
+ }
83
+ catch (error) {
84
+ logError('Failed to start stdio server', error instanceof Error ? error : undefined);
85
+ process.exit(1);
86
+ }
87
+ }
88
+ export async function startStdioServer() {
89
+ const server = createMcpServer();
90
+ const transport = new StdioServerTransport();
91
+ attachServerErrorHandler(server);
92
+ registerSignalHandlers(createShutdownHandler(server));
93
+ await connectStdioServer(server, transport);
94
+ }
@@ -0,0 +1,16 @@
1
+ export type LogMetadata = Record<string, unknown>;
2
+ interface RequestContext {
3
+ readonly requestId: string;
4
+ readonly sessionId?: string;
5
+ readonly operationId?: string;
6
+ }
7
+ export declare function runWithRequestContext<T>(context: RequestContext, fn: () => T): T;
8
+ export declare function getRequestId(): string | undefined;
9
+ export declare function getSessionId(): string | undefined;
10
+ export declare function getOperationId(): string | undefined;
11
+ export declare function logInfo(message: string, meta?: LogMetadata): void;
12
+ export declare function logDebug(message: string, meta?: LogMetadata): void;
13
+ export declare function logWarn(message: string, meta?: LogMetadata): void;
14
+ export declare function logError(message: string, error?: Error | LogMetadata): void;
15
+ export declare function redactUrl(rawUrl: string): string;
16
+ export {};
@@ -0,0 +1,78 @@
1
+ import { AsyncLocalStorage } from 'node:async_hooks';
2
+ import { config } from './config.js';
3
+ const requestContext = new AsyncLocalStorage();
4
+ export function runWithRequestContext(context, fn) {
5
+ return requestContext.run(context, fn);
6
+ }
7
+ export function getRequestId() {
8
+ return requestContext.getStore()?.requestId;
9
+ }
10
+ export function getSessionId() {
11
+ return requestContext.getStore()?.sessionId;
12
+ }
13
+ export function getOperationId() {
14
+ return requestContext.getStore()?.operationId;
15
+ }
16
+ function formatMetadata(meta) {
17
+ const requestId = getRequestId();
18
+ const sessionId = getSessionId();
19
+ const operationId = getOperationId();
20
+ const contextMeta = {};
21
+ if (requestId)
22
+ contextMeta.requestId = requestId;
23
+ if (sessionId)
24
+ contextMeta.sessionId = sessionId;
25
+ if (operationId)
26
+ contextMeta.operationId = operationId;
27
+ const merged = { ...contextMeta, ...meta };
28
+ return Object.keys(merged).length > 0 ? ` ${JSON.stringify(merged)}` : '';
29
+ }
30
+ function createTimestamp() {
31
+ return new Date().toISOString();
32
+ }
33
+ function formatLogEntry(level, message, meta) {
34
+ return `[${createTimestamp()}] ${level.toUpperCase()}: ${message}${formatMetadata(meta)}`;
35
+ }
36
+ function shouldLog(level) {
37
+ // Debug logs only when LOG_LEVEL=debug
38
+ if (level === 'debug')
39
+ return config.logging.level === 'debug';
40
+ // All other levels always log
41
+ return true;
42
+ }
43
+ export function logInfo(message, meta) {
44
+ if (shouldLog('info')) {
45
+ process.stderr.write(`${formatLogEntry('info', message, meta)}\n`);
46
+ }
47
+ }
48
+ export function logDebug(message, meta) {
49
+ if (shouldLog('debug')) {
50
+ process.stderr.write(`${formatLogEntry('debug', message, meta)}\n`);
51
+ }
52
+ }
53
+ export function logWarn(message, meta) {
54
+ if (shouldLog('warn')) {
55
+ process.stderr.write(`${formatLogEntry('warn', message, meta)}\n`);
56
+ }
57
+ }
58
+ export function logError(message, error) {
59
+ if (!shouldLog('error'))
60
+ return;
61
+ const errorMeta = error instanceof Error
62
+ ? { error: error.message, stack: error.stack }
63
+ : (error ?? {});
64
+ process.stderr.write(`${formatLogEntry('error', message, errorMeta)}\n`);
65
+ }
66
+ export function redactUrl(rawUrl) {
67
+ try {
68
+ const url = new URL(rawUrl);
69
+ url.username = '';
70
+ url.password = '';
71
+ url.hash = '';
72
+ url.search = '';
73
+ return url.toString();
74
+ }
75
+ catch {
76
+ return rawUrl;
77
+ }
78
+ }
package/dist/server.js CHANGED
@@ -3,6 +3,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
3
3
  import { config } from './config/index.js';
4
4
  import { destroyAgents } from './services/fetcher.js';
5
5
  import { logError, logInfo } from './services/logger.js';
6
+ import { shutdownTransformWorkerPool } from './services/transform-worker-pool.js';
6
7
  import { registerTools } from './tools/index.js';
7
8
  import { registerCachedContentResource } from './resources/cached-content.js';
8
9
  function createServerInfo() {
@@ -37,9 +38,12 @@ function attachServerErrorHandler(server) {
37
38
  }
38
39
  function handleShutdownSignal(server, signal) {
39
40
  process.stderr.write(`\n${signal} received, shutting down superFetch MCP server...\n`);
40
- destroyAgents();
41
- server
42
- .close()
41
+ Promise.resolve()
42
+ .then(async () => {
43
+ destroyAgents();
44
+ await shutdownTransformWorkerPool();
45
+ await server.close();
46
+ })
43
47
  .catch((err) => {
44
48
  logError('Error during shutdown', err instanceof Error ? err : undefined);
45
49
  })
@@ -48,15 +52,26 @@ function handleShutdownSignal(server, signal) {
48
52
  });
49
53
  }
50
54
  function createShutdownHandler(server) {
55
+ let shuttingDown = false;
56
+ let initialSignal = null;
51
57
  return (signal) => {
58
+ if (shuttingDown) {
59
+ logInfo('Shutdown already in progress; ignoring signal', {
60
+ signal,
61
+ initialSignal,
62
+ });
63
+ return;
64
+ }
65
+ shuttingDown = true;
66
+ initialSignal = signal;
52
67
  handleShutdownSignal(server, signal);
53
68
  };
54
69
  }
55
70
  function registerSignalHandlers(handler) {
56
- process.on('SIGINT', () => {
71
+ process.once('SIGINT', () => {
57
72
  handler('SIGINT');
58
73
  });
59
- process.on('SIGTERM', () => {
74
+ process.once('SIGTERM', () => {
60
75
  handler('SIGTERM');
61
76
  });
62
77
  }
@@ -1,5 +1,5 @@
1
1
  import type { CacheEntry } from '../config/types/content.js';
2
- export interface CacheUpdateEvent {
2
+ interface CacheUpdateEvent {
3
3
  cacheKey: string;
4
4
  namespace: string;
5
5
  urlHash: string;
@@ -1,8 +1,10 @@
1
1
  interface RequestContext {
2
2
  readonly requestId: string;
3
3
  readonly sessionId?: string;
4
+ readonly operationId?: string;
4
5
  }
5
6
  export declare function runWithRequestContext<T>(context: RequestContext, fn: () => T): T;
6
7
  export declare function getRequestId(): string | undefined;
7
8
  export declare function getSessionId(): string | undefined;
9
+ export declare function getOperationId(): string | undefined;
8
10
  export {};
@@ -9,3 +9,6 @@ export function getRequestId() {
9
9
  export function getSessionId() {
10
10
  return requestContext.getStore()?.sessionId;
11
11
  }
12
+ export function getOperationId() {
13
+ return requestContext.getStore()?.operationId;
14
+ }
@@ -1,4 +1,5 @@
1
1
  import type { ExtractionResult } from '../config/types/content.js';
2
2
  export declare function extractContent(html: string, url: string, options?: {
3
3
  extractArticle?: boolean;
4
+ signal?: AbortSignal;
4
5
  }): ExtractionResult;
@@ -1,10 +1,13 @@
1
1
  import { parseHTML } from 'linkedom';
2
2
  import { Readability } from '@mozilla/readability';
3
+ import { FetchError } from '../errors/app-error.js';
4
+ import { throwIfAborted } from '../utils/cancellation.js';
3
5
  import { getErrorMessage } from '../utils/error-details.js';
4
6
  import { isRecord } from '../utils/guards.js';
5
7
  import { truncateHtml } from '../utils/html-truncator.js';
6
8
  import { logError, logInfo, logWarn } from './logger.js';
7
9
  import { extractMetadata } from './metadata-collector.js';
10
+ import { endTransformStage, startTransformStage } from './telemetry.js';
8
11
  function isReadabilityCompatible(doc) {
9
12
  if (!isRecord(doc))
10
13
  return false;
@@ -64,7 +67,9 @@ function addOptionalField(target, key, value) {
64
67
  return;
65
68
  target[key] = value;
66
69
  }
67
- export function extractContent(html, url, options = { extractArticle: true }) {
70
+ export function extractContent(html, url, options = {
71
+ extractArticle: true,
72
+ }) {
68
73
  if (!isValidInput(html, url)) {
69
74
  return { article: null, metadata: {} };
70
75
  }
@@ -72,15 +77,36 @@ export function extractContent(html, url, options = { extractArticle: true }) {
72
77
  }
73
78
  function tryExtractContent(html, url, options) {
74
79
  try {
80
+ throwIfAborted(options.signal, url, 'extract:begin');
81
+ const parseStage = startTransformStage(url, 'extract:parse');
75
82
  const { document } = parseHTML(truncateHtml(html));
83
+ endTransformStage(parseStage);
84
+ throwIfAborted(options.signal, url, 'extract:parsed');
76
85
  applyBaseUri(document, url);
86
+ const metadataStage = startTransformStage(url, 'extract:metadata');
77
87
  const metadata = extractMetadata(document);
88
+ endTransformStage(metadataStage);
89
+ throwIfAborted(options.signal, url, 'extract:metadata');
90
+ let article;
91
+ if (options.extractArticle) {
92
+ const articleStage = startTransformStage(url, 'extract:article');
93
+ article = resolveArticleExtraction(document, options.extractArticle);
94
+ endTransformStage(articleStage);
95
+ }
96
+ else {
97
+ article = null;
98
+ }
99
+ throwIfAborted(options.signal, url, 'extract:article');
78
100
  return {
79
- article: resolveArticleExtraction(document, options.extractArticle),
101
+ article,
80
102
  metadata,
81
103
  };
82
104
  }
83
105
  catch (error) {
106
+ if (error instanceof FetchError) {
107
+ throw error;
108
+ }
109
+ throwIfAborted(options.signal, url, 'extract:error');
84
110
  logError('Failed to extract content', error instanceof Error ? error : undefined);
85
111
  return { article: null, metadata: {} };
86
112
  }
@@ -7,6 +7,8 @@ interface FetchTelemetryContext {
7
7
  startTime: number;
8
8
  url: string;
9
9
  method: string;
10
+ contextRequestId?: string;
11
+ operationId?: string;
10
12
  }
11
13
  export declare function startFetchTelemetry(url: string, method: string): FetchTelemetryContext;
12
14
  export declare function recordFetchResponse(context: FetchTelemetryContext, response: Response, contentSize?: number): void;
@@ -8,7 +8,9 @@ import { config } from '../config/index.js';
8
8
  import { FetchError } from '../errors/app-error.js';
9
9
  import { createErrorWithCode, isSystemError } from '../utils/error-details.js';
10
10
  import { isRecord } from '../utils/guards.js';
11
+ import { redactUrl } from '../utils/url-redactor.js';
11
12
  import { isBlockedIp, validateAndNormalizeUrl, } from '../utils/url-validator.js';
13
+ import { getOperationId, getRequestId } from './context.js';
12
14
  import { logDebug, logError, logWarn } from './logger.js';
13
15
  const DNS_LOOKUP_TIMEOUT_MS = 5000;
14
16
  function normalizeLookupResults(addresses, family) {
@@ -241,19 +243,6 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
241
243
  return createUnknownError(url, 'Unexpected error');
242
244
  }
243
245
  const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
244
- function redactUrl(rawUrl) {
245
- try {
246
- const url = new URL(rawUrl);
247
- url.username = '';
248
- url.password = '';
249
- url.hash = '';
250
- url.search = '';
251
- return url.toString();
252
- }
253
- catch {
254
- return rawUrl;
255
- }
256
- }
257
246
  function publishFetchEvent(event) {
258
247
  if (!fetchChannel.hasSubscribers)
259
248
  return;
@@ -266,11 +255,15 @@ function publishFetchEvent(event) {
266
255
  }
267
256
  export function startFetchTelemetry(url, method) {
268
257
  const safeUrl = redactUrl(url);
258
+ const contextRequestId = getRequestId();
259
+ const operationId = getOperationId();
269
260
  const context = {
270
261
  requestId: randomUUID(),
271
262
  startTime: performance.now(),
272
263
  url: safeUrl,
273
264
  method: method.toUpperCase(),
265
+ ...(contextRequestId ? { contextRequestId } : {}),
266
+ ...(operationId ? { operationId } : {}),
274
267
  };
275
268
  publishFetchEvent({
276
269
  v: 1,
@@ -278,11 +271,19 @@ export function startFetchTelemetry(url, method) {
278
271
  requestId: context.requestId,
279
272
  method: context.method,
280
273
  url: context.url,
274
+ ...(context.contextRequestId
275
+ ? { contextRequestId: context.contextRequestId }
276
+ : {}),
277
+ ...(context.operationId ? { operationId: context.operationId } : {}),
281
278
  });
282
279
  logDebug('HTTP Request', {
283
280
  requestId: context.requestId,
284
281
  method: context.method,
285
282
  url: context.url,
283
+ ...(context.contextRequestId
284
+ ? { contextRequestId: context.contextRequestId }
285
+ : {}),
286
+ ...(context.operationId ? { operationId: context.operationId } : {}),
286
287
  });
287
288
  return context;
288
289
  }
@@ -295,6 +296,10 @@ export function recordFetchResponse(context, response, contentSize) {
295
296
  requestId: context.requestId,
296
297
  status: response.status,
297
298
  duration,
299
+ ...(context.contextRequestId
300
+ ? { contextRequestId: context.contextRequestId }
301
+ : {}),
302
+ ...(context.operationId ? { operationId: context.operationId } : {}),
298
303
  });
299
304
  const contentType = response.headers.get('content-type');
300
305
  const contentLength = response.headers.get('content-length') ??
@@ -304,6 +309,10 @@ export function recordFetchResponse(context, response, contentSize) {
304
309
  status: response.status,
305
310
  url: context.url,
306
311
  duration: durationLabel,
312
+ ...(context.contextRequestId
313
+ ? { contextRequestId: context.contextRequestId }
314
+ : {}),
315
+ ...(context.operationId ? { operationId: context.operationId } : {}),
307
316
  ...(contentType ? { contentType } : {}),
308
317
  ...(contentLength ? { size: contentLength } : {}),
309
318
  });
@@ -312,6 +321,10 @@ export function recordFetchResponse(context, response, contentSize) {
312
321
  requestId: context.requestId,
313
322
  url: context.url,
314
323
  duration: durationLabel,
324
+ ...(context.contextRequestId
325
+ ? { contextRequestId: context.contextRequestId }
326
+ : {}),
327
+ ...(context.operationId ? { operationId: context.operationId } : {}),
315
328
  });
316
329
  }
317
330
  }
@@ -325,6 +338,10 @@ export function recordFetchError(context, error, status) {
325
338
  url: context.url,
326
339
  error: err.message,
327
340
  duration,
341
+ ...(context.contextRequestId
342
+ ? { contextRequestId: context.contextRequestId }
343
+ : {}),
344
+ ...(context.operationId ? { operationId: context.operationId } : {}),
328
345
  };
329
346
  const code = isSystemError(err) ? err.code : undefined;
330
347
  if (code !== undefined) {
@@ -341,6 +358,10 @@ export function recordFetchError(context, error, status) {
341
358
  status,
342
359
  code,
343
360
  error: err.message,
361
+ ...(context.contextRequestId
362
+ ? { contextRequestId: context.contextRequestId }
363
+ : {}),
364
+ ...(context.operationId ? { operationId: context.operationId } : {}),
344
365
  });
345
366
  }
346
367
  const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
@@ -520,7 +541,7 @@ const DEFAULT_HEADERS = {
520
541
  Connection: 'keep-alive',
521
542
  };
522
543
  function buildHeaders() {
523
- return DEFAULT_HEADERS;
544
+ return { ...DEFAULT_HEADERS };
524
545
  }
525
546
  function buildRequestSignal(timeoutMs, external) {
526
547
  const timeoutSignal = AbortSignal.timeout(timeoutMs);
@@ -1,13 +1,16 @@
1
1
  import { config } from '../config/index.js';
2
- import { getRequestId, getSessionId } from './context.js';
2
+ import { getOperationId, getRequestId, getSessionId } from './context.js';
3
3
  function formatMetadata(meta) {
4
4
  const requestId = getRequestId();
5
5
  const sessionId = getSessionId();
6
+ const operationId = getOperationId();
6
7
  const contextMeta = {};
7
8
  if (requestId)
8
9
  contextMeta.requestId = requestId;
9
10
  if (sessionId)
10
11
  contextMeta.sessionId = sessionId;
12
+ if (operationId)
13
+ contextMeta.operationId = operationId;
11
14
  const merged = { ...contextMeta, ...meta };
12
15
  return Object.keys(merged).length > 0 ? ` ${JSON.stringify(merged)}` : '';
13
16
  }
@@ -0,0 +1,19 @@
1
+ export interface TransformStageEvent {
2
+ v: 1;
3
+ type: 'stage';
4
+ stage: string;
5
+ durationMs: number;
6
+ url: string;
7
+ requestId?: string;
8
+ operationId?: string;
9
+ truncated?: boolean;
10
+ }
11
+ export interface TransformStageContext {
12
+ readonly stage: string;
13
+ readonly startTime: number;
14
+ readonly url: string;
15
+ }
16
+ export declare function startTransformStage(url: string, stage: string): TransformStageContext | null;
17
+ export declare function endTransformStage(context: TransformStageContext | null, options?: {
18
+ truncated?: boolean;
19
+ }): void;
@@ -0,0 +1,43 @@
1
+ import diagnosticsChannel from 'node:diagnostics_channel';
2
+ import { performance } from 'node:perf_hooks';
3
+ import { redactUrl } from '../utils/url-redactor.js';
4
+ import { getOperationId, getRequestId } from './context.js';
5
+ const transformChannel = diagnosticsChannel.channel('superfetch.transform');
6
+ function publishTransformEvent(event) {
7
+ if (!transformChannel.hasSubscribers)
8
+ return;
9
+ try {
10
+ transformChannel.publish(event);
11
+ }
12
+ catch {
13
+ // Avoid crashing the publisher if a subscriber throws.
14
+ }
15
+ }
16
+ export function startTransformStage(url, stage) {
17
+ if (!transformChannel.hasSubscribers)
18
+ return null;
19
+ return {
20
+ stage,
21
+ startTime: performance.now(),
22
+ url: redactUrl(url),
23
+ };
24
+ }
25
+ export function endTransformStage(context, options) {
26
+ if (!context)
27
+ return;
28
+ const requestId = getRequestId();
29
+ const operationId = getOperationId();
30
+ const event = {
31
+ v: 1,
32
+ type: 'stage',
33
+ stage: context.stage,
34
+ durationMs: performance.now() - context.startTime,
35
+ url: context.url,
36
+ ...(requestId ? { requestId } : {}),
37
+ ...(operationId ? { operationId } : {}),
38
+ ...(options?.truncated !== undefined
39
+ ? { truncated: options.truncated }
40
+ : {}),
41
+ };
42
+ publishTransformEvent(event);
43
+ }
@@ -1,4 +1,11 @@
1
1
  import type { MarkdownTransformResult } from '../config/types/content.js';
2
- import type { WorkerTransformRequest } from './transform-worker-types.js';
3
- export declare function transformInWorker(request: Omit<WorkerTransformRequest, 'id'>, signal?: AbortSignal): Promise<MarkdownTransformResult>;
4
- export declare function destroyTransformWorkers(): Promise<void>;
2
+ interface TransformWorkerPool {
3
+ transform(html: string, url: string, options: {
4
+ includeMetadata: boolean;
5
+ signal?: AbortSignal;
6
+ }): Promise<MarkdownTransformResult>;
7
+ close(): Promise<void>;
8
+ }
9
+ export declare function getOrCreateTransformWorkerPool(): TransformWorkerPool;
10
+ export declare function shutdownTransformWorkerPool(): Promise<void>;
11
+ export {};