@j0hanz/superfetch 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -38
- package/dist/cache.d.ts +42 -0
- package/dist/cache.js +565 -0
- package/dist/config/env-parsers.d.ts +1 -0
- package/dist/config/env-parsers.js +12 -0
- package/dist/config/index.d.ts +7 -0
- package/dist/config/index.js +10 -3
- package/dist/config/types/content.d.ts +1 -0
- package/dist/config.d.ts +77 -0
- package/dist/config.js +261 -0
- package/dist/crypto.d.ts +2 -0
- package/dist/crypto.js +32 -0
- package/dist/errors.d.ts +10 -0
- package/dist/errors.js +28 -0
- package/dist/fetch.d.ts +40 -0
- package/dist/fetch.js +910 -0
- package/dist/http/base-middleware.d.ts +7 -0
- package/dist/http/base-middleware.js +143 -0
- package/dist/http/cors.d.ts +0 -5
- package/dist/http/cors.js +0 -6
- package/dist/http/download-routes.js +6 -2
- package/dist/http/error-handler.d.ts +2 -0
- package/dist/http/error-handler.js +55 -0
- package/dist/http/mcp-routes.js +2 -2
- package/dist/http/mcp-sessions.d.ts +3 -5
- package/dist/http/mcp-sessions.js +8 -8
- package/dist/http/server-tuning.d.ts +9 -0
- package/dist/http/server-tuning.js +45 -0
- package/dist/http/server.d.ts +0 -10
- package/dist/http/server.js +33 -333
- package/dist/http.d.ts +78 -0
- package/dist/http.js +1437 -0
- package/dist/index.js +3 -3
- package/dist/mcp.d.ts +3 -0
- package/dist/mcp.js +94 -0
- package/dist/observability.d.ts +16 -0
- package/dist/observability.js +78 -0
- package/dist/server.js +20 -5
- package/dist/services/cache.d.ts +1 -1
- package/dist/services/context.d.ts +2 -0
- package/dist/services/context.js +3 -0
- package/dist/services/extractor.d.ts +1 -0
- package/dist/services/extractor.js +28 -2
- package/dist/services/fetcher.d.ts +2 -0
- package/dist/services/fetcher.js +35 -14
- package/dist/services/logger.js +4 -1
- package/dist/services/telemetry.d.ts +19 -0
- package/dist/services/telemetry.js +43 -0
- package/dist/services/transform-worker-pool.d.ts +10 -3
- package/dist/services/transform-worker-pool.js +213 -184
- package/dist/tools/handlers/fetch-url.tool.js +8 -6
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.js +13 -1
- package/dist/tools/schemas.d.ts +2 -0
- package/dist/tools/schemas.js +8 -0
- package/dist/tools/utils/content-transform-core.d.ts +5 -0
- package/dist/tools/utils/content-transform-core.js +180 -0
- package/dist/tools/utils/content-transform-workers.d.ts +1 -0
- package/dist/tools/utils/content-transform-workers.js +1 -0
- package/dist/tools/utils/content-transform.d.ts +3 -5
- package/dist/tools/utils/content-transform.js +35 -148
- package/dist/tools/utils/raw-markdown.js +15 -1
- package/dist/tools.d.ts +104 -0
- package/dist/tools.js +421 -0
- package/dist/transform.d.ts +69 -0
- package/dist/transform.js +1509 -0
- package/dist/transformers/markdown.d.ts +4 -1
- package/dist/transformers/markdown.js +182 -53
- package/dist/utils/cancellation.d.ts +1 -0
- package/dist/utils/cancellation.js +18 -0
- package/dist/utils/code-language.d.ts +0 -9
- package/dist/utils/code-language.js +5 -5
- package/dist/utils/host-normalizer.d.ts +1 -0
- package/dist/utils/host-normalizer.js +37 -0
- package/dist/utils/url-redactor.d.ts +1 -0
- package/dist/utils/url-redactor.js +13 -0
- package/dist/utils/url-validator.js +8 -5
- package/dist/workers/transform-worker.js +82 -38
- package/package.json +8 -7
package/dist/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { parseArgs } from 'node:util';
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
3
|
+
import { startHttpServer } from './http.js';
|
|
4
|
+
import { startStdioServer } from './mcp.js';
|
|
5
|
+
import { logError } from './observability.js';
|
|
6
6
|
const { values } = parseArgs({
|
|
7
7
|
options: {
|
|
8
8
|
stdio: { type: 'boolean', default: false },
|
package/dist/mcp.d.ts
ADDED
package/dist/mcp.js
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
3
|
+
import { registerCachedContentResource } from './cache.js';
|
|
4
|
+
import { config } from './config.js';
|
|
5
|
+
import { destroyAgents } from './fetch.js';
|
|
6
|
+
import { logError, logInfo } from './observability.js';
|
|
7
|
+
import { registerTools } from './tools.js';
|
|
8
|
+
import { shutdownTransformWorkerPool } from './transform.js';
|
|
9
|
+
function createServerInfo() {
|
|
10
|
+
return {
|
|
11
|
+
name: config.server.name,
|
|
12
|
+
version: config.server.version,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function createServerCapabilities() {
|
|
16
|
+
return {
|
|
17
|
+
tools: { listChanged: false },
|
|
18
|
+
resources: { listChanged: true, subscribe: true },
|
|
19
|
+
logging: {},
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
function createServerInstructions(serverVersion) {
|
|
23
|
+
return `superFetch MCP server |${serverVersion}| A high-performance web content fetching and processing server.`;
|
|
24
|
+
}
|
|
25
|
+
export function createMcpServer() {
|
|
26
|
+
const server = new McpServer(createServerInfo(), {
|
|
27
|
+
capabilities: createServerCapabilities(),
|
|
28
|
+
instructions: createServerInstructions(config.server.version),
|
|
29
|
+
});
|
|
30
|
+
registerTools(server);
|
|
31
|
+
registerCachedContentResource(server);
|
|
32
|
+
return server;
|
|
33
|
+
}
|
|
34
|
+
function attachServerErrorHandler(server) {
|
|
35
|
+
server.server.onerror = (error) => {
|
|
36
|
+
logError('[MCP Error]', error instanceof Error ? error : { error });
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function handleShutdownSignal(server, signal) {
|
|
40
|
+
process.stderr.write(`\n${signal} received, shutting down superFetch MCP server...\n`);
|
|
41
|
+
Promise.resolve()
|
|
42
|
+
.then(async () => {
|
|
43
|
+
destroyAgents();
|
|
44
|
+
await shutdownTransformWorkerPool();
|
|
45
|
+
await server.close();
|
|
46
|
+
})
|
|
47
|
+
.catch((err) => {
|
|
48
|
+
logError('Error during shutdown', err instanceof Error ? err : undefined);
|
|
49
|
+
})
|
|
50
|
+
.finally(() => {
|
|
51
|
+
process.exit(0);
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
function createShutdownHandler(server) {
|
|
55
|
+
let shuttingDown = false;
|
|
56
|
+
let initialSignal = null;
|
|
57
|
+
return (signal) => {
|
|
58
|
+
if (shuttingDown) {
|
|
59
|
+
logInfo('Shutdown already in progress; ignoring signal', {
|
|
60
|
+
signal,
|
|
61
|
+
initialSignal,
|
|
62
|
+
});
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
shuttingDown = true;
|
|
66
|
+
initialSignal = signal;
|
|
67
|
+
handleShutdownSignal(server, signal);
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
function registerSignalHandlers(handler) {
|
|
71
|
+
process.once('SIGINT', () => {
|
|
72
|
+
handler('SIGINT');
|
|
73
|
+
});
|
|
74
|
+
process.once('SIGTERM', () => {
|
|
75
|
+
handler('SIGTERM');
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
async function connectStdioServer(server, transport) {
|
|
79
|
+
try {
|
|
80
|
+
await server.connect(transport);
|
|
81
|
+
logInfo('superFetch MCP server running on stdio');
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
logError('Failed to start stdio server', error instanceof Error ? error : undefined);
|
|
85
|
+
process.exit(1);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
export async function startStdioServer() {
|
|
89
|
+
const server = createMcpServer();
|
|
90
|
+
const transport = new StdioServerTransport();
|
|
91
|
+
attachServerErrorHandler(server);
|
|
92
|
+
registerSignalHandlers(createShutdownHandler(server));
|
|
93
|
+
await connectStdioServer(server, transport);
|
|
94
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export type LogMetadata = Record<string, unknown>;
|
|
2
|
+
interface RequestContext {
|
|
3
|
+
readonly requestId: string;
|
|
4
|
+
readonly sessionId?: string;
|
|
5
|
+
readonly operationId?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function runWithRequestContext<T>(context: RequestContext, fn: () => T): T;
|
|
8
|
+
export declare function getRequestId(): string | undefined;
|
|
9
|
+
export declare function getSessionId(): string | undefined;
|
|
10
|
+
export declare function getOperationId(): string | undefined;
|
|
11
|
+
export declare function logInfo(message: string, meta?: LogMetadata): void;
|
|
12
|
+
export declare function logDebug(message: string, meta?: LogMetadata): void;
|
|
13
|
+
export declare function logWarn(message: string, meta?: LogMetadata): void;
|
|
14
|
+
export declare function logError(message: string, error?: Error | LogMetadata): void;
|
|
15
|
+
export declare function redactUrl(rawUrl: string): string;
|
|
16
|
+
export {};
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
|
+
import { config } from './config.js';
|
|
3
|
+
const requestContext = new AsyncLocalStorage();
|
|
4
|
+
export function runWithRequestContext(context, fn) {
|
|
5
|
+
return requestContext.run(context, fn);
|
|
6
|
+
}
|
|
7
|
+
export function getRequestId() {
|
|
8
|
+
return requestContext.getStore()?.requestId;
|
|
9
|
+
}
|
|
10
|
+
export function getSessionId() {
|
|
11
|
+
return requestContext.getStore()?.sessionId;
|
|
12
|
+
}
|
|
13
|
+
export function getOperationId() {
|
|
14
|
+
return requestContext.getStore()?.operationId;
|
|
15
|
+
}
|
|
16
|
+
function formatMetadata(meta) {
|
|
17
|
+
const requestId = getRequestId();
|
|
18
|
+
const sessionId = getSessionId();
|
|
19
|
+
const operationId = getOperationId();
|
|
20
|
+
const contextMeta = {};
|
|
21
|
+
if (requestId)
|
|
22
|
+
contextMeta.requestId = requestId;
|
|
23
|
+
if (sessionId)
|
|
24
|
+
contextMeta.sessionId = sessionId;
|
|
25
|
+
if (operationId)
|
|
26
|
+
contextMeta.operationId = operationId;
|
|
27
|
+
const merged = { ...contextMeta, ...meta };
|
|
28
|
+
return Object.keys(merged).length > 0 ? ` ${JSON.stringify(merged)}` : '';
|
|
29
|
+
}
|
|
30
|
+
function createTimestamp() {
|
|
31
|
+
return new Date().toISOString();
|
|
32
|
+
}
|
|
33
|
+
function formatLogEntry(level, message, meta) {
|
|
34
|
+
return `[${createTimestamp()}] ${level.toUpperCase()}: ${message}${formatMetadata(meta)}`;
|
|
35
|
+
}
|
|
36
|
+
function shouldLog(level) {
|
|
37
|
+
// Debug logs only when LOG_LEVEL=debug
|
|
38
|
+
if (level === 'debug')
|
|
39
|
+
return config.logging.level === 'debug';
|
|
40
|
+
// All other levels always log
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
export function logInfo(message, meta) {
|
|
44
|
+
if (shouldLog('info')) {
|
|
45
|
+
process.stderr.write(`${formatLogEntry('info', message, meta)}\n`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
export function logDebug(message, meta) {
|
|
49
|
+
if (shouldLog('debug')) {
|
|
50
|
+
process.stderr.write(`${formatLogEntry('debug', message, meta)}\n`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
export function logWarn(message, meta) {
|
|
54
|
+
if (shouldLog('warn')) {
|
|
55
|
+
process.stderr.write(`${formatLogEntry('warn', message, meta)}\n`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
export function logError(message, error) {
|
|
59
|
+
if (!shouldLog('error'))
|
|
60
|
+
return;
|
|
61
|
+
const errorMeta = error instanceof Error
|
|
62
|
+
? { error: error.message, stack: error.stack }
|
|
63
|
+
: (error ?? {});
|
|
64
|
+
process.stderr.write(`${formatLogEntry('error', message, errorMeta)}\n`);
|
|
65
|
+
}
|
|
66
|
+
export function redactUrl(rawUrl) {
|
|
67
|
+
try {
|
|
68
|
+
const url = new URL(rawUrl);
|
|
69
|
+
url.username = '';
|
|
70
|
+
url.password = '';
|
|
71
|
+
url.hash = '';
|
|
72
|
+
url.search = '';
|
|
73
|
+
return url.toString();
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return rawUrl;
|
|
77
|
+
}
|
|
78
|
+
}
|
package/dist/server.js
CHANGED
|
@@ -3,6 +3,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
3
3
|
import { config } from './config/index.js';
|
|
4
4
|
import { destroyAgents } from './services/fetcher.js';
|
|
5
5
|
import { logError, logInfo } from './services/logger.js';
|
|
6
|
+
import { shutdownTransformWorkerPool } from './services/transform-worker-pool.js';
|
|
6
7
|
import { registerTools } from './tools/index.js';
|
|
7
8
|
import { registerCachedContentResource } from './resources/cached-content.js';
|
|
8
9
|
function createServerInfo() {
|
|
@@ -37,9 +38,12 @@ function attachServerErrorHandler(server) {
|
|
|
37
38
|
}
|
|
38
39
|
function handleShutdownSignal(server, signal) {
|
|
39
40
|
process.stderr.write(`\n${signal} received, shutting down superFetch MCP server...\n`);
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
Promise.resolve()
|
|
42
|
+
.then(async () => {
|
|
43
|
+
destroyAgents();
|
|
44
|
+
await shutdownTransformWorkerPool();
|
|
45
|
+
await server.close();
|
|
46
|
+
})
|
|
43
47
|
.catch((err) => {
|
|
44
48
|
logError('Error during shutdown', err instanceof Error ? err : undefined);
|
|
45
49
|
})
|
|
@@ -48,15 +52,26 @@ function handleShutdownSignal(server, signal) {
|
|
|
48
52
|
});
|
|
49
53
|
}
|
|
50
54
|
function createShutdownHandler(server) {
|
|
55
|
+
let shuttingDown = false;
|
|
56
|
+
let initialSignal = null;
|
|
51
57
|
return (signal) => {
|
|
58
|
+
if (shuttingDown) {
|
|
59
|
+
logInfo('Shutdown already in progress; ignoring signal', {
|
|
60
|
+
signal,
|
|
61
|
+
initialSignal,
|
|
62
|
+
});
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
shuttingDown = true;
|
|
66
|
+
initialSignal = signal;
|
|
52
67
|
handleShutdownSignal(server, signal);
|
|
53
68
|
};
|
|
54
69
|
}
|
|
55
70
|
function registerSignalHandlers(handler) {
|
|
56
|
-
process.
|
|
71
|
+
process.once('SIGINT', () => {
|
|
57
72
|
handler('SIGINT');
|
|
58
73
|
});
|
|
59
|
-
process.
|
|
74
|
+
process.once('SIGTERM', () => {
|
|
60
75
|
handler('SIGTERM');
|
|
61
76
|
});
|
|
62
77
|
}
|
package/dist/services/cache.d.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
interface RequestContext {
|
|
2
2
|
readonly requestId: string;
|
|
3
3
|
readonly sessionId?: string;
|
|
4
|
+
readonly operationId?: string;
|
|
4
5
|
}
|
|
5
6
|
export declare function runWithRequestContext<T>(context: RequestContext, fn: () => T): T;
|
|
6
7
|
export declare function getRequestId(): string | undefined;
|
|
7
8
|
export declare function getSessionId(): string | undefined;
|
|
9
|
+
export declare function getOperationId(): string | undefined;
|
|
8
10
|
export {};
|
package/dist/services/context.js
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import { parseHTML } from 'linkedom';
|
|
2
2
|
import { Readability } from '@mozilla/readability';
|
|
3
|
+
import { FetchError } from '../errors/app-error.js';
|
|
4
|
+
import { throwIfAborted } from '../utils/cancellation.js';
|
|
3
5
|
import { getErrorMessage } from '../utils/error-details.js';
|
|
4
6
|
import { isRecord } from '../utils/guards.js';
|
|
5
7
|
import { truncateHtml } from '../utils/html-truncator.js';
|
|
6
8
|
import { logError, logInfo, logWarn } from './logger.js';
|
|
7
9
|
import { extractMetadata } from './metadata-collector.js';
|
|
10
|
+
import { endTransformStage, startTransformStage } from './telemetry.js';
|
|
8
11
|
function isReadabilityCompatible(doc) {
|
|
9
12
|
if (!isRecord(doc))
|
|
10
13
|
return false;
|
|
@@ -64,7 +67,9 @@ function addOptionalField(target, key, value) {
|
|
|
64
67
|
return;
|
|
65
68
|
target[key] = value;
|
|
66
69
|
}
|
|
67
|
-
export function extractContent(html, url, options = {
|
|
70
|
+
export function extractContent(html, url, options = {
|
|
71
|
+
extractArticle: true,
|
|
72
|
+
}) {
|
|
68
73
|
if (!isValidInput(html, url)) {
|
|
69
74
|
return { article: null, metadata: {} };
|
|
70
75
|
}
|
|
@@ -72,15 +77,36 @@ export function extractContent(html, url, options = { extractArticle: true }) {
|
|
|
72
77
|
}
|
|
73
78
|
function tryExtractContent(html, url, options) {
|
|
74
79
|
try {
|
|
80
|
+
throwIfAborted(options.signal, url, 'extract:begin');
|
|
81
|
+
const parseStage = startTransformStage(url, 'extract:parse');
|
|
75
82
|
const { document } = parseHTML(truncateHtml(html));
|
|
83
|
+
endTransformStage(parseStage);
|
|
84
|
+
throwIfAborted(options.signal, url, 'extract:parsed');
|
|
76
85
|
applyBaseUri(document, url);
|
|
86
|
+
const metadataStage = startTransformStage(url, 'extract:metadata');
|
|
77
87
|
const metadata = extractMetadata(document);
|
|
88
|
+
endTransformStage(metadataStage);
|
|
89
|
+
throwIfAborted(options.signal, url, 'extract:metadata');
|
|
90
|
+
let article;
|
|
91
|
+
if (options.extractArticle) {
|
|
92
|
+
const articleStage = startTransformStage(url, 'extract:article');
|
|
93
|
+
article = resolveArticleExtraction(document, options.extractArticle);
|
|
94
|
+
endTransformStage(articleStage);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
article = null;
|
|
98
|
+
}
|
|
99
|
+
throwIfAborted(options.signal, url, 'extract:article');
|
|
78
100
|
return {
|
|
79
|
-
article
|
|
101
|
+
article,
|
|
80
102
|
metadata,
|
|
81
103
|
};
|
|
82
104
|
}
|
|
83
105
|
catch (error) {
|
|
106
|
+
if (error instanceof FetchError) {
|
|
107
|
+
throw error;
|
|
108
|
+
}
|
|
109
|
+
throwIfAborted(options.signal, url, 'extract:error');
|
|
84
110
|
logError('Failed to extract content', error instanceof Error ? error : undefined);
|
|
85
111
|
return { article: null, metadata: {} };
|
|
86
112
|
}
|
|
@@ -7,6 +7,8 @@ interface FetchTelemetryContext {
|
|
|
7
7
|
startTime: number;
|
|
8
8
|
url: string;
|
|
9
9
|
method: string;
|
|
10
|
+
contextRequestId?: string;
|
|
11
|
+
operationId?: string;
|
|
10
12
|
}
|
|
11
13
|
export declare function startFetchTelemetry(url: string, method: string): FetchTelemetryContext;
|
|
12
14
|
export declare function recordFetchResponse(context: FetchTelemetryContext, response: Response, contentSize?: number): void;
|
package/dist/services/fetcher.js
CHANGED
|
@@ -8,7 +8,9 @@ import { config } from '../config/index.js';
|
|
|
8
8
|
import { FetchError } from '../errors/app-error.js';
|
|
9
9
|
import { createErrorWithCode, isSystemError } from '../utils/error-details.js';
|
|
10
10
|
import { isRecord } from '../utils/guards.js';
|
|
11
|
+
import { redactUrl } from '../utils/url-redactor.js';
|
|
11
12
|
import { isBlockedIp, validateAndNormalizeUrl, } from '../utils/url-validator.js';
|
|
13
|
+
import { getOperationId, getRequestId } from './context.js';
|
|
12
14
|
import { logDebug, logError, logWarn } from './logger.js';
|
|
13
15
|
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
14
16
|
function normalizeLookupResults(addresses, family) {
|
|
@@ -241,19 +243,6 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
241
243
|
return createUnknownError(url, 'Unexpected error');
|
|
242
244
|
}
|
|
243
245
|
const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
|
|
244
|
-
function redactUrl(rawUrl) {
|
|
245
|
-
try {
|
|
246
|
-
const url = new URL(rawUrl);
|
|
247
|
-
url.username = '';
|
|
248
|
-
url.password = '';
|
|
249
|
-
url.hash = '';
|
|
250
|
-
url.search = '';
|
|
251
|
-
return url.toString();
|
|
252
|
-
}
|
|
253
|
-
catch {
|
|
254
|
-
return rawUrl;
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
246
|
function publishFetchEvent(event) {
|
|
258
247
|
if (!fetchChannel.hasSubscribers)
|
|
259
248
|
return;
|
|
@@ -266,11 +255,15 @@ function publishFetchEvent(event) {
|
|
|
266
255
|
}
|
|
267
256
|
export function startFetchTelemetry(url, method) {
|
|
268
257
|
const safeUrl = redactUrl(url);
|
|
258
|
+
const contextRequestId = getRequestId();
|
|
259
|
+
const operationId = getOperationId();
|
|
269
260
|
const context = {
|
|
270
261
|
requestId: randomUUID(),
|
|
271
262
|
startTime: performance.now(),
|
|
272
263
|
url: safeUrl,
|
|
273
264
|
method: method.toUpperCase(),
|
|
265
|
+
...(contextRequestId ? { contextRequestId } : {}),
|
|
266
|
+
...(operationId ? { operationId } : {}),
|
|
274
267
|
};
|
|
275
268
|
publishFetchEvent({
|
|
276
269
|
v: 1,
|
|
@@ -278,11 +271,19 @@ export function startFetchTelemetry(url, method) {
|
|
|
278
271
|
requestId: context.requestId,
|
|
279
272
|
method: context.method,
|
|
280
273
|
url: context.url,
|
|
274
|
+
...(context.contextRequestId
|
|
275
|
+
? { contextRequestId: context.contextRequestId }
|
|
276
|
+
: {}),
|
|
277
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
281
278
|
});
|
|
282
279
|
logDebug('HTTP Request', {
|
|
283
280
|
requestId: context.requestId,
|
|
284
281
|
method: context.method,
|
|
285
282
|
url: context.url,
|
|
283
|
+
...(context.contextRequestId
|
|
284
|
+
? { contextRequestId: context.contextRequestId }
|
|
285
|
+
: {}),
|
|
286
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
286
287
|
});
|
|
287
288
|
return context;
|
|
288
289
|
}
|
|
@@ -295,6 +296,10 @@ export function recordFetchResponse(context, response, contentSize) {
|
|
|
295
296
|
requestId: context.requestId,
|
|
296
297
|
status: response.status,
|
|
297
298
|
duration,
|
|
299
|
+
...(context.contextRequestId
|
|
300
|
+
? { contextRequestId: context.contextRequestId }
|
|
301
|
+
: {}),
|
|
302
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
298
303
|
});
|
|
299
304
|
const contentType = response.headers.get('content-type');
|
|
300
305
|
const contentLength = response.headers.get('content-length') ??
|
|
@@ -304,6 +309,10 @@ export function recordFetchResponse(context, response, contentSize) {
|
|
|
304
309
|
status: response.status,
|
|
305
310
|
url: context.url,
|
|
306
311
|
duration: durationLabel,
|
|
312
|
+
...(context.contextRequestId
|
|
313
|
+
? { contextRequestId: context.contextRequestId }
|
|
314
|
+
: {}),
|
|
315
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
307
316
|
...(contentType ? { contentType } : {}),
|
|
308
317
|
...(contentLength ? { size: contentLength } : {}),
|
|
309
318
|
});
|
|
@@ -312,6 +321,10 @@ export function recordFetchResponse(context, response, contentSize) {
|
|
|
312
321
|
requestId: context.requestId,
|
|
313
322
|
url: context.url,
|
|
314
323
|
duration: durationLabel,
|
|
324
|
+
...(context.contextRequestId
|
|
325
|
+
? { contextRequestId: context.contextRequestId }
|
|
326
|
+
: {}),
|
|
327
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
315
328
|
});
|
|
316
329
|
}
|
|
317
330
|
}
|
|
@@ -325,6 +338,10 @@ export function recordFetchError(context, error, status) {
|
|
|
325
338
|
url: context.url,
|
|
326
339
|
error: err.message,
|
|
327
340
|
duration,
|
|
341
|
+
...(context.contextRequestId
|
|
342
|
+
? { contextRequestId: context.contextRequestId }
|
|
343
|
+
: {}),
|
|
344
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
328
345
|
};
|
|
329
346
|
const code = isSystemError(err) ? err.code : undefined;
|
|
330
347
|
if (code !== undefined) {
|
|
@@ -341,6 +358,10 @@ export function recordFetchError(context, error, status) {
|
|
|
341
358
|
status,
|
|
342
359
|
code,
|
|
343
360
|
error: err.message,
|
|
361
|
+
...(context.contextRequestId
|
|
362
|
+
? { contextRequestId: context.contextRequestId }
|
|
363
|
+
: {}),
|
|
364
|
+
...(context.operationId ? { operationId: context.operationId } : {}),
|
|
344
365
|
});
|
|
345
366
|
}
|
|
346
367
|
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
@@ -520,7 +541,7 @@ const DEFAULT_HEADERS = {
|
|
|
520
541
|
Connection: 'keep-alive',
|
|
521
542
|
};
|
|
522
543
|
function buildHeaders() {
|
|
523
|
-
return DEFAULT_HEADERS;
|
|
544
|
+
return { ...DEFAULT_HEADERS };
|
|
524
545
|
}
|
|
525
546
|
function buildRequestSignal(timeoutMs, external) {
|
|
526
547
|
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
package/dist/services/logger.js
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import { config } from '../config/index.js';
|
|
2
|
-
import { getRequestId, getSessionId } from './context.js';
|
|
2
|
+
import { getOperationId, getRequestId, getSessionId } from './context.js';
|
|
3
3
|
function formatMetadata(meta) {
|
|
4
4
|
const requestId = getRequestId();
|
|
5
5
|
const sessionId = getSessionId();
|
|
6
|
+
const operationId = getOperationId();
|
|
6
7
|
const contextMeta = {};
|
|
7
8
|
if (requestId)
|
|
8
9
|
contextMeta.requestId = requestId;
|
|
9
10
|
if (sessionId)
|
|
10
11
|
contextMeta.sessionId = sessionId;
|
|
12
|
+
if (operationId)
|
|
13
|
+
contextMeta.operationId = operationId;
|
|
11
14
|
const merged = { ...contextMeta, ...meta };
|
|
12
15
|
return Object.keys(merged).length > 0 ? ` ${JSON.stringify(merged)}` : '';
|
|
13
16
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export interface TransformStageEvent {
|
|
2
|
+
v: 1;
|
|
3
|
+
type: 'stage';
|
|
4
|
+
stage: string;
|
|
5
|
+
durationMs: number;
|
|
6
|
+
url: string;
|
|
7
|
+
requestId?: string;
|
|
8
|
+
operationId?: string;
|
|
9
|
+
truncated?: boolean;
|
|
10
|
+
}
|
|
11
|
+
export interface TransformStageContext {
|
|
12
|
+
readonly stage: string;
|
|
13
|
+
readonly startTime: number;
|
|
14
|
+
readonly url: string;
|
|
15
|
+
}
|
|
16
|
+
export declare function startTransformStage(url: string, stage: string): TransformStageContext | null;
|
|
17
|
+
export declare function endTransformStage(context: TransformStageContext | null, options?: {
|
|
18
|
+
truncated?: boolean;
|
|
19
|
+
}): void;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
2
|
+
import { performance } from 'node:perf_hooks';
|
|
3
|
+
import { redactUrl } from '../utils/url-redactor.js';
|
|
4
|
+
import { getOperationId, getRequestId } from './context.js';
|
|
5
|
+
const transformChannel = diagnosticsChannel.channel('superfetch.transform');
|
|
6
|
+
function publishTransformEvent(event) {
|
|
7
|
+
if (!transformChannel.hasSubscribers)
|
|
8
|
+
return;
|
|
9
|
+
try {
|
|
10
|
+
transformChannel.publish(event);
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
// Avoid crashing the publisher if a subscriber throws.
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
export function startTransformStage(url, stage) {
|
|
17
|
+
if (!transformChannel.hasSubscribers)
|
|
18
|
+
return null;
|
|
19
|
+
return {
|
|
20
|
+
stage,
|
|
21
|
+
startTime: performance.now(),
|
|
22
|
+
url: redactUrl(url),
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export function endTransformStage(context, options) {
|
|
26
|
+
if (!context)
|
|
27
|
+
return;
|
|
28
|
+
const requestId = getRequestId();
|
|
29
|
+
const operationId = getOperationId();
|
|
30
|
+
const event = {
|
|
31
|
+
v: 1,
|
|
32
|
+
type: 'stage',
|
|
33
|
+
stage: context.stage,
|
|
34
|
+
durationMs: performance.now() - context.startTime,
|
|
35
|
+
url: context.url,
|
|
36
|
+
...(requestId ? { requestId } : {}),
|
|
37
|
+
...(operationId ? { operationId } : {}),
|
|
38
|
+
...(options?.truncated !== undefined
|
|
39
|
+
? { truncated: options.truncated }
|
|
40
|
+
: {}),
|
|
41
|
+
};
|
|
42
|
+
publishTransformEvent(event);
|
|
43
|
+
}
|
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import type { MarkdownTransformResult } from '../config/types/content.js';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
interface TransformWorkerPool {
|
|
3
|
+
transform(html: string, url: string, options: {
|
|
4
|
+
includeMetadata: boolean;
|
|
5
|
+
signal?: AbortSignal;
|
|
6
|
+
}): Promise<MarkdownTransformResult>;
|
|
7
|
+
close(): Promise<void>;
|
|
8
|
+
}
|
|
9
|
+
export declare function getOrCreateTransformWorkerPool(): TransformWorkerPool;
|
|
10
|
+
export declare function shutdownTransformWorkerPool(): Promise<void>;
|
|
11
|
+
export {};
|