@j0hanz/fetch-url-mcp 1.12.0 → 1.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -17
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +61 -20
- package/dist/http/helpers.d.ts +1 -1
- package/dist/http/helpers.d.ts.map +1 -1
- package/dist/http/helpers.js +7 -9
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +271 -54
- package/dist/http/rate-limit.d.ts.map +1 -1
- package/dist/http/rate-limit.js +2 -1
- package/dist/index.js +5 -4
- package/dist/lib/config.d.ts +1 -1
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js +8 -1
- package/dist/lib/core.d.ts +8 -4
- package/dist/lib/core.d.ts.map +1 -1
- package/dist/lib/core.js +240 -73
- package/dist/lib/fetch-pipeline.d.ts.map +1 -1
- package/dist/lib/fetch-pipeline.js +15 -2
- package/dist/lib/http.d.ts.map +1 -1
- package/dist/lib/http.js +1 -1
- package/dist/lib/mcp-interop.d.ts +15 -3
- package/dist/lib/mcp-interop.d.ts.map +1 -1
- package/dist/lib/mcp-interop.js +92 -23
- package/dist/lib/url.d.ts.map +1 -1
- package/dist/lib/url.js +1 -1
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/resources/index.d.ts +4 -0
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +39 -4
- package/dist/schemas.d.ts +5 -5
- package/dist/schemas.d.ts.map +1 -1
- package/dist/schemas.js +7 -9
- package/dist/server.d.ts +3 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +20 -11
- package/dist/tasks/execution.d.ts +1 -1
- package/dist/tasks/execution.d.ts.map +1 -1
- package/dist/tasks/execution.js +72 -25
- package/dist/tasks/handlers.d.ts.map +1 -1
- package/dist/tasks/handlers.js +31 -24
- package/dist/tasks/manager.d.ts +5 -2
- package/dist/tasks/manager.d.ts.map +1 -1
- package/dist/tasks/manager.js +58 -19
- package/dist/tasks/owner.d.ts +5 -0
- package/dist/tasks/owner.d.ts.map +1 -1
- package/dist/tasks/owner.js +15 -7
- package/dist/tasks/registry.d.ts +10 -8
- package/dist/tasks/registry.d.ts.map +1 -1
- package/dist/tasks/registry.js +27 -15
- package/dist/tools/fetch-url.d.ts +2 -0
- package/dist/tools/fetch-url.d.ts.map +1 -1
- package/dist/tools/fetch-url.js +76 -21
- package/dist/transform/dom-prep.d.ts.map +1 -1
- package/dist/transform/dom-prep.js +6 -6
- package/dist/transform/transform.d.ts.map +1 -1
- package/dist/transform/transform.js +17 -14
- package/dist/transform/worker-pool.d.ts.map +1 -1
- package/dist/transform/worker-pool.js +43 -3
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/tasks/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/tasks/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AAGvE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,MAAM,sBAAsB,GAAG,UAAU,GAAG,UAAU,GAAG,WAAW,CAAC;AAE3E,MAAM,WAAW,yBAAyB,CAAC,KAAK,GAAG,OAAO;IACxD,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,CAAC,IAAI,EAAE,OAAO,KAAK,KAAK,CAAC;IACzC,OAAO,EAAE,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,EAAE,gBAAgB,KAAK,OAAO,CAAC,YAAY,CAAC,CAAC;IAC1E,0BAA0B,CAAC,EAAE,CAAC,MAAM,EAAE,YAAY,KAAK,MAAM,GAAG,SAAS,CAAC;IAC1E,WAAW,CAAC,EAAE,sBAAsB,CAAC;IACrC,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAqBD,wBAAgB,uBAAuB,CAAC,KAAK,EAC3C,MAAM,EAAE,SAAS,EACjB,UAAU,EAAE,yBAAyB,CAAC,KAAK,CAAC,GAC3C,IAAI,CAKN;AAED,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,SAAS,EACjB,IAAI,EAAE,MAAM,GACX,IAAI,CAEN;AAED,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,SAAS,EACjB,IAAI,EAAE,MAAM,GACX,yBAAyB,GAAG,SAAS,CAEvC;AAED,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,SAAS,EACjB,IAAI,EAAE,MAAM,GACX,sBAAsB,GAAG,SAAS,CAEpC;AAED,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAE3E;AAED,wBAAgB,6BAA6B,CAAC,MAAM,EAAE,SAAS,GAAG,OAAO,CAExE;AAED,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,SAAS,EACjB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,sBAAsB,GAC9B,IAAI,CAIN"}
|
package/dist/tasks/registry.js
CHANGED
|
@@ -1,27 +1,39 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
import { registerServerLifecycleCleanup } from '../lib/mcp-interop.js';
|
|
2
|
+
const taskCapableToolsByServer = new WeakMap();
|
|
3
|
+
function getServerToolMap(server) {
|
|
4
|
+
let toolMap = taskCapableToolsByServer.get(server);
|
|
5
|
+
if (toolMap)
|
|
6
|
+
return toolMap;
|
|
7
|
+
toolMap = new Map();
|
|
8
|
+
taskCapableToolsByServer.set(server, toolMap);
|
|
9
|
+
registerServerLifecycleCleanup(server, () => {
|
|
10
|
+
taskCapableToolsByServer.delete(server);
|
|
11
|
+
});
|
|
12
|
+
return toolMap;
|
|
13
|
+
}
|
|
14
|
+
export function registerTaskCapableTool(server, descriptor) {
|
|
15
|
+
getServerToolMap(server).set(descriptor.name, {
|
|
4
16
|
...descriptor,
|
|
5
17
|
taskSupport: descriptor.taskSupport ?? 'optional',
|
|
6
18
|
});
|
|
7
19
|
}
|
|
8
|
-
export function unregisterTaskCapableTool(name) {
|
|
9
|
-
|
|
20
|
+
export function unregisterTaskCapableTool(server, name) {
|
|
21
|
+
getServerToolMap(server).delete(name);
|
|
10
22
|
}
|
|
11
|
-
export function getTaskCapableTool(name) {
|
|
12
|
-
return
|
|
23
|
+
export function getTaskCapableTool(server, name) {
|
|
24
|
+
return getServerToolMap(server).get(name);
|
|
13
25
|
}
|
|
14
|
-
export function getTaskCapableToolSupport(name) {
|
|
15
|
-
return
|
|
26
|
+
export function getTaskCapableToolSupport(server, name) {
|
|
27
|
+
return getServerToolMap(server).get(name)?.taskSupport;
|
|
16
28
|
}
|
|
17
|
-
export function hasTaskCapableTool(name) {
|
|
18
|
-
return
|
|
29
|
+
export function hasTaskCapableTool(server, name) {
|
|
30
|
+
return getServerToolMap(server).has(name);
|
|
19
31
|
}
|
|
20
|
-
export function hasRegisteredTaskCapableTools() {
|
|
21
|
-
return
|
|
32
|
+
export function hasRegisteredTaskCapableTools(server) {
|
|
33
|
+
return getServerToolMap(server).size > 0;
|
|
22
34
|
}
|
|
23
|
-
export function setTaskCapableToolSupport(name, support) {
|
|
24
|
-
const descriptor =
|
|
35
|
+
export function setTaskCapableToolSupport(server, name, support) {
|
|
36
|
+
const descriptor = getServerToolMap(server).get(name);
|
|
25
37
|
if (!descriptor)
|
|
26
38
|
return;
|
|
27
39
|
descriptor.taskSupport = support;
|
|
@@ -14,10 +14,12 @@ interface ToolResponseBase {
|
|
|
14
14
|
isError?: boolean;
|
|
15
15
|
}
|
|
16
16
|
export declare const FETCH_URL_TOOL_NAME = "fetch-url";
|
|
17
|
+
export declare function buildFetchUrlContentBlocks(structuredContent: Record<string, unknown>): ContentBlock[];
|
|
17
18
|
export declare function getFetchCompletionStatusMessage(result: ServerResult): string | undefined;
|
|
18
19
|
export declare class FetchUrlProgressPlan {
|
|
19
20
|
private readonly reporter;
|
|
20
21
|
private readonly context;
|
|
22
|
+
private readonly total;
|
|
21
23
|
constructor(reporter: ProgressReporter, context: string);
|
|
22
24
|
reportStart(): void;
|
|
23
25
|
reportStage(stage: SharedFetchStage): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AACvE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAY7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,EAGL,KAAK,gBAAgB,
|
|
1
|
+
{"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AACvE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAY7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,EAGL,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EACtB,MAAM,uBAAuB,CAAC;AAW/B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAIL,KAAK,sBAAsB,EAE5B,MAAM,sBAAsB,CAAC;AAE9B,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AA6F/C,wBAAgB,0BAA0B,CACxC,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACzC,YAAY,EAAE,CAUhB;AAuCD,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,YAAY,GACnB,MAAM,GAAG,SAAS,CAUpB;AAED,qBAAa,oBAAoB;IAI7B,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAJ1B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAa;gBAGhB,QAAQ,EAAE,gBAAgB,EAC1B,OAAO,EAAE,MAAM;IAGlC,WAAW,IAAI,IAAI;IAInB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,IAAI;IAM1C,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAQxC,aAAa,CAAC,SAAS,EAAE,OAAO,GAAG,IAAI;IAQvC,OAAO,CAAC,QAAQ;CAiCjB;AA8FD,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAiC3B;AAqBD,MAAM,WAAW,wBAAwB;IACvC,cAAc,EAAE,CAAC,OAAO,EAAE,sBAAsB,KAAK,IAAI,CAAC;CAC3D;AAqBD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,wBAAwB,CAuCzE"}
|
package/dist/tools/fetch-url.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
|
|
2
|
-
import { config,
|
|
2
|
+
import { config, logError, logInfo, logWarn } from '../lib/core.js';
|
|
3
3
|
import { finalizeInlineMarkdown, markdownTransform, performSharedFetch, withSignal, } from '../lib/fetch-pipeline.js';
|
|
4
|
-
import { createProgressReporter, handleToolError, } from '../lib/mcp-interop.js';
|
|
5
|
-
import { composeAbortSignal, isAbortError, isObject, parseUrlOrNull, toError, } from '../lib/utils.js';
|
|
4
|
+
import { createProgressReporter, handleToolError, registerToolPresentation, } from '../lib/mcp-interop.js';
|
|
5
|
+
import { composeAbortSignal, FetchError, isAbortError, isObject, parseUrlOrNull, toError, } from '../lib/utils.js';
|
|
6
6
|
import { formatZodError } from '../lib/zod.js';
|
|
7
7
|
import { fetchUrlInputSchema, fetchUrlOutputSchema, normalizeExtractedMetadata, normalizePageTitle, } from '../schemas.js';
|
|
8
8
|
import { withRequestContextIfMissing } from '../tasks/owner.js';
|
|
@@ -70,17 +70,23 @@ function validateStructuredContent(structuredContent, inputUrl) {
|
|
|
70
70
|
logWarn('Tool output schema validation failed', {
|
|
71
71
|
url: inputUrl,
|
|
72
72
|
issues,
|
|
73
|
-
});
|
|
73
|
+
}, 'fetch-url');
|
|
74
74
|
throw new McpError(ErrorCode.InternalError, 'fetch-url produced output that does not match its declared outputSchema', { issues });
|
|
75
75
|
}
|
|
76
|
-
function
|
|
77
|
-
|
|
76
|
+
export function buildFetchUrlContentBlocks(structuredContent) {
|
|
77
|
+
const markdown = typeof structuredContent['markdown'] === 'string'
|
|
78
|
+
? structuredContent['markdown']
|
|
79
|
+
: '';
|
|
80
|
+
return [
|
|
81
|
+
{ type: 'text', text: markdown },
|
|
82
|
+
{ type: 'text', text: JSON.stringify(structuredContent) },
|
|
83
|
+
];
|
|
78
84
|
}
|
|
79
85
|
function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
80
86
|
const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
|
|
81
87
|
validateStructuredContent(structuredContent, inputUrl);
|
|
82
88
|
return {
|
|
83
|
-
content:
|
|
89
|
+
content: buildFetchUrlContentBlocks(structuredContent),
|
|
84
90
|
structuredContent,
|
|
85
91
|
};
|
|
86
92
|
}
|
|
@@ -117,40 +123,53 @@ export function getFetchCompletionStatusMessage(result) {
|
|
|
117
123
|
export class FetchUrlProgressPlan {
|
|
118
124
|
reporter;
|
|
119
125
|
context;
|
|
126
|
+
total = Step.DONE;
|
|
120
127
|
constructor(reporter, context) {
|
|
121
128
|
this.reporter = reporter;
|
|
122
129
|
this.context = context;
|
|
123
130
|
}
|
|
124
131
|
reportStart() {
|
|
125
|
-
this.reporter.report(Step.START, 'Preparing request');
|
|
132
|
+
this.reporter.report(Step.START, 'Preparing request', this.total);
|
|
126
133
|
}
|
|
127
134
|
reportStage(stage) {
|
|
128
135
|
const mapped = this.mapStage(stage);
|
|
129
136
|
if (!mapped)
|
|
130
137
|
return;
|
|
131
|
-
this.reporter.report(mapped.step, mapped.message);
|
|
138
|
+
this.reporter.report(mapped.step, mapped.message, this.total);
|
|
132
139
|
}
|
|
133
140
|
reportSuccess(contentSize) {
|
|
134
|
-
this.reporter.report(Step.DONE, buildFetchSuccessSummary(contentSize));
|
|
141
|
+
this.reporter.report(Step.DONE, buildFetchSuccessSummary(contentSize), this.total);
|
|
135
142
|
}
|
|
136
143
|
reportFailure(cancelled) {
|
|
137
|
-
this.reporter.report(Step.DONE, cancelled ? 'Cancelled' : 'Failed');
|
|
144
|
+
this.reporter.report(Step.DONE, cancelled ? 'Cancelled' : 'Failed', this.total);
|
|
138
145
|
}
|
|
139
146
|
mapStage(stage) {
|
|
140
147
|
switch (stage) {
|
|
141
148
|
case 'resolve_url':
|
|
142
|
-
return {
|
|
149
|
+
return {
|
|
150
|
+
step: Step.RESOLVE_URL,
|
|
151
|
+
message: 'Resolving URL',
|
|
152
|
+
};
|
|
143
153
|
case 'fetch_remote':
|
|
144
154
|
return {
|
|
145
155
|
step: Step.FETCH,
|
|
146
156
|
message: `Fetching ${this.context}`,
|
|
147
157
|
};
|
|
148
158
|
case 'response_ready':
|
|
149
|
-
return {
|
|
159
|
+
return {
|
|
160
|
+
step: Step.RESPONSE,
|
|
161
|
+
message: 'Received response',
|
|
162
|
+
};
|
|
150
163
|
case 'transform_start':
|
|
151
|
-
return {
|
|
164
|
+
return {
|
|
165
|
+
step: Step.TRANSFORM,
|
|
166
|
+
message: 'Parsing HTML -> Markdown',
|
|
167
|
+
};
|
|
152
168
|
case 'prepare_output':
|
|
153
|
-
return {
|
|
169
|
+
return {
|
|
170
|
+
step: Step.PREPARE,
|
|
171
|
+
message: 'Fetch completed',
|
|
172
|
+
};
|
|
154
173
|
case 'finalize_output':
|
|
155
174
|
return undefined;
|
|
156
175
|
}
|
|
@@ -182,13 +201,32 @@ function buildFetchOptions(url, signal, progressPlan) {
|
|
|
182
201
|
async function executeFetch(input, extra) {
|
|
183
202
|
const { url } = input;
|
|
184
203
|
const signal = buildToolAbortSignal(extra?.signal);
|
|
204
|
+
const startedAt = performance.now();
|
|
205
|
+
const relatedTaskMeta = extra?._meta?.['io.modelcontextprotocol/related-task'];
|
|
206
|
+
const relatedTask = isObject(relatedTaskMeta) ? relatedTaskMeta : undefined;
|
|
185
207
|
const progressPlan = new FetchUrlProgressPlan(createProgressReporter(extra), formatUrlForDisplay(url));
|
|
186
|
-
logDebug('Fetching URL', { url });
|
|
187
208
|
try {
|
|
209
|
+
logInfo('fetch-url started', {
|
|
210
|
+
inputUrl: url,
|
|
211
|
+
hasProgressToken: extra?._meta?.progressToken !== undefined,
|
|
212
|
+
...(isObject(relatedTask) && typeof relatedTask['taskId'] === 'string'
|
|
213
|
+
? { taskId: relatedTask['taskId'] }
|
|
214
|
+
: {}),
|
|
215
|
+
}, 'fetch-url');
|
|
188
216
|
progressPlan.reportStart();
|
|
189
217
|
const { pipeline, inlineResult } = await performSharedFetch(buildFetchOptions(url, signal, progressPlan));
|
|
218
|
+
const truncated = inlineResult.truncated ?? pipeline.data.truncated;
|
|
219
|
+
logInfo('fetch-url completed', {
|
|
220
|
+
inputUrl: url,
|
|
221
|
+
resolvedUrl: pipeline.url,
|
|
222
|
+
...(pipeline.finalUrl ? { finalUrl: pipeline.finalUrl } : {}),
|
|
223
|
+
contentSize: inlineResult.contentSize,
|
|
224
|
+
durationMs: Math.round(performance.now() - startedAt),
|
|
225
|
+
...(truncated ? { truncated: true } : {}),
|
|
226
|
+
}, 'fetch-url');
|
|
227
|
+
const response = buildResponse(pipeline, inlineResult, url);
|
|
190
228
|
progressPlan.reportSuccess(inlineResult.contentSize);
|
|
191
|
-
return
|
|
229
|
+
return response;
|
|
192
230
|
}
|
|
193
231
|
catch (error) {
|
|
194
232
|
progressPlan.reportFailure(isAbortError(error));
|
|
@@ -196,8 +234,22 @@ async function executeFetch(input, extra) {
|
|
|
196
234
|
}
|
|
197
235
|
}
|
|
198
236
|
export async function fetchUrlToolHandler(input, extra) {
|
|
237
|
+
const startedAt = performance.now();
|
|
199
238
|
return executeFetch(input, extra).catch((error) => {
|
|
200
|
-
|
|
239
|
+
const durationMs = Math.round(performance.now() - startedAt);
|
|
240
|
+
if (error instanceof McpError) {
|
|
241
|
+
logError('fetch-url tool failed', { url: input.url, durationMs, error: toError(error) }, 'fetch-url');
|
|
242
|
+
}
|
|
243
|
+
else if (error instanceof FetchError || isAbortError(error)) {
|
|
244
|
+
logWarn('fetch-url request failed', {
|
|
245
|
+
url: input.url,
|
|
246
|
+
error: toError(error).message,
|
|
247
|
+
durationMs,
|
|
248
|
+
}, 'fetch-url');
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
logError('fetch-url request failed unexpectedly', { url: input.url, error: toError(error).message, durationMs }, 'fetch-url');
|
|
252
|
+
}
|
|
201
253
|
if (error instanceof McpError) {
|
|
202
254
|
throw error;
|
|
203
255
|
}
|
|
@@ -238,13 +290,13 @@ function createTaskCapableDescriptor() {
|
|
|
238
290
|
}
|
|
239
291
|
export function registerTools(server) {
|
|
240
292
|
if (!config.tools.enabled.includes(FETCH_URL_TOOL_NAME)) {
|
|
241
|
-
unregisterTaskCapableTool(FETCH_URL_TOOL_NAME);
|
|
293
|
+
unregisterTaskCapableTool(server, FETCH_URL_TOOL_NAME);
|
|
242
294
|
return {
|
|
243
295
|
setTaskSupport: () => { },
|
|
244
296
|
};
|
|
245
297
|
}
|
|
246
298
|
const descriptor = createTaskCapableDescriptor();
|
|
247
|
-
registerTaskCapableTool(descriptor);
|
|
299
|
+
registerTaskCapableTool(server, descriptor);
|
|
248
300
|
const registeredTool = server.registerTool(TOOL_DEFINITION.name, {
|
|
249
301
|
title: TOOL_DEFINITION.title,
|
|
250
302
|
description: TOOL_DEFINITION.description,
|
|
@@ -254,8 +306,11 @@ export function registerTools(server) {
|
|
|
254
306
|
execution: { taskSupport: 'optional' },
|
|
255
307
|
icons: [TOOL_ICON],
|
|
256
308
|
}, withRequestContextIfMissing(TOOL_DEFINITION.handler));
|
|
309
|
+
registerToolPresentation(server, TOOL_DEFINITION.name, {
|
|
310
|
+
icons: [TOOL_ICON],
|
|
311
|
+
});
|
|
257
312
|
const updateTaskSupport = (support) => {
|
|
258
|
-
setTaskCapableToolSupport(FETCH_URL_TOOL_NAME, support);
|
|
313
|
+
setTaskCapableToolSupport(server, FETCH_URL_TOOL_NAME, support);
|
|
259
314
|
registeredTool.execution = { taskSupport: support };
|
|
260
315
|
};
|
|
261
316
|
updateTaskSupport('optional');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/transform/dom-prep.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/transform/dom-prep.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AA8mBnD,eAAO,MAAM,sBAAsB,QAAmB,CAAC;AAyCvD,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA6B9D;AAuBD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAY/D;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CASR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AA0RD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA2BjE;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAE1D;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAY9D;AAWD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAQ3D;AAuDD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAON;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AA0ED,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,MAAM,GAAG,QAAQ,GAChC,MAAM,CAaR;AAiMD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,EACzB,QAAQ,EAAE,QAAQ,GACjB,QAAQ,GAAG,IAAI,CAsCjB"}
|
|
@@ -457,7 +457,7 @@ function stripNoise(document, signal) {
|
|
|
457
457
|
if (config.noiseRemoval.debug) {
|
|
458
458
|
logDebug('Noise removal audit enabled', {
|
|
459
459
|
categories: [...(context.flags.navFooter ? ['nav-footer'] : [])],
|
|
460
|
-
});
|
|
460
|
+
}, 'transform');
|
|
461
461
|
}
|
|
462
462
|
// Structural Removal
|
|
463
463
|
removeNodes(document.querySelectorAll(context.noiseSelector));
|
|
@@ -1261,23 +1261,23 @@ function passesEmptySectionRatio(articleDoc) {
|
|
|
1261
1261
|
}
|
|
1262
1262
|
export function evaluateArticleContent(article, document) {
|
|
1263
1263
|
if (!passesContentRatioGate(article.textContent.length, document)) {
|
|
1264
|
-
logDebug('FAILED passesContentRatioGate');
|
|
1264
|
+
logDebug('FAILED passesContentRatioGate', undefined, 'transform');
|
|
1265
1265
|
return null;
|
|
1266
1266
|
}
|
|
1267
1267
|
if (!passesRetentionRulesFromHtml(document, article.content)) {
|
|
1268
|
-
logDebug('FAILED passesRetentionRulesFromHtml');
|
|
1268
|
+
logDebug('FAILED passesRetentionRulesFromHtml', undefined, 'transform');
|
|
1269
1269
|
return null;
|
|
1270
1270
|
}
|
|
1271
1271
|
if (hasTruncatedSentences(article.textContent)) {
|
|
1272
|
-
logDebug('FAILED hasTruncatedSentences');
|
|
1272
|
+
logDebug('FAILED hasTruncatedSentences', undefined, 'transform');
|
|
1273
1273
|
return null;
|
|
1274
1274
|
}
|
|
1275
1275
|
const articleDoc = parseHTML(`<!DOCTYPE html><html><body>${article.content}</body></html>`).document;
|
|
1276
1276
|
if (!passesEmptySectionRatio(articleDoc)) {
|
|
1277
1277
|
const headings = articleDoc.querySelectorAll('h1,h2,h3,h4,h5,h6');
|
|
1278
|
-
logDebug(`FAILED passesEmptySectionRatio: ${headings.length} headings
|
|
1278
|
+
logDebug(`FAILED passesEmptySectionRatio: ${headings.length} headings`, undefined, 'transform');
|
|
1279
1279
|
for (const h of headings) {
|
|
1280
|
-
logDebug(`H: ${h.textContent} ${String(hasSectionContent(h))}
|
|
1280
|
+
logDebug(`H: ${h.textContent} ${String(hasSectionContent(h))}`, undefined, 'transform');
|
|
1281
1281
|
}
|
|
1282
1282
|
return null;
|
|
1283
1283
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AA2CA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAkB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AA+BpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;
|
|
1
|
+
{"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AA2CA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAkB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AA+BpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAoMD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA0bD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAuKD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACjC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACxC,GACA,MAAM,CA0BR;AA+DD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAKD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,qBAAqB,EAAE,OAAO,GAC7B,aAAa,GAAG,SAAS,CAuB3B;AA6bD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAMzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAkH1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,GACrB,CAAC"}
|
|
@@ -67,7 +67,7 @@ class StageTracker {
|
|
|
67
67
|
durationMs: Math.round(durationMs),
|
|
68
68
|
thresholdMs: Math.round(warnThresholdMs),
|
|
69
69
|
url: context.url,
|
|
70
|
-
});
|
|
70
|
+
}, 'transform');
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
const event = {
|
|
@@ -134,7 +134,7 @@ class StageTracker {
|
|
|
134
134
|
logDebug('Diagnostic channel publish failed', {
|
|
135
135
|
stage: event.stage,
|
|
136
136
|
error: getErrorMessage(error),
|
|
137
|
-
});
|
|
137
|
+
}, 'transform');
|
|
138
138
|
}
|
|
139
139
|
}
|
|
140
140
|
runTrackedSync(url, signal, fn) {
|
|
@@ -195,7 +195,7 @@ function truncateHtml(html, inputTruncated = false) {
|
|
|
195
195
|
size: getUtf8ByteLength(html),
|
|
196
196
|
maxSize,
|
|
197
197
|
truncatedSize: getUtf8ByteLength(content),
|
|
198
|
-
});
|
|
198
|
+
}, 'transform');
|
|
199
199
|
return { html: content, truncated: true };
|
|
200
200
|
}
|
|
201
201
|
const MIN_SPA_CONTENT_LENGTH = 100;
|
|
@@ -334,7 +334,7 @@ function validateReaderability(doc, url, signal) {
|
|
|
334
334
|
if (textLength < MIN_SPA_CONTENT_LENGTH) {
|
|
335
335
|
logWarn('Very minimal server-rendered content detected (< 100 chars). ' +
|
|
336
336
|
'This might be a client-side rendered (SPA) application. ' +
|
|
337
|
-
'Content extraction may be incomplete.', { textLength });
|
|
337
|
+
'Content extraction may be incomplete.', { textLength }, 'transform');
|
|
338
338
|
}
|
|
339
339
|
throwIfAborted(signal, url, 'extract:article:readabilityCheck');
|
|
340
340
|
if (textLength >= MIN_READERABLE_TEXT_LENGTH && !isProbablyReaderable(doc)) {
|
|
@@ -385,7 +385,7 @@ function mapReadabilityResult(parsed) {
|
|
|
385
385
|
// runs later in buildContentSource), so this clone starts from raw HTML.
|
|
386
386
|
function extractArticle(document, url, signal) {
|
|
387
387
|
if (!isReadabilityCompatible(document)) {
|
|
388
|
-
logWarn('Document not compatible with Readability');
|
|
388
|
+
logWarn('Document not compatible with Readability', undefined, 'transform');
|
|
389
389
|
return null;
|
|
390
390
|
}
|
|
391
391
|
try {
|
|
@@ -398,17 +398,17 @@ function extractArticle(document, url, signal) {
|
|
|
398
398
|
return mapReadabilityResult(parsed);
|
|
399
399
|
}
|
|
400
400
|
catch (error) {
|
|
401
|
-
logError('Failed to extract article with Readability', error instanceof Error ? error : undefined);
|
|
401
|
+
logError('Failed to extract article with Readability', error instanceof Error ? error : undefined, 'transform');
|
|
402
402
|
return null;
|
|
403
403
|
}
|
|
404
404
|
}
|
|
405
405
|
function isValidInput(html, url) {
|
|
406
406
|
if (typeof html !== 'string' || html.length === 0) {
|
|
407
|
-
logWarn('extractContent called with invalid HTML input');
|
|
407
|
+
logWarn('extractContent called with invalid HTML input', undefined, 'transform');
|
|
408
408
|
return false;
|
|
409
409
|
}
|
|
410
410
|
if (typeof url !== 'string' || url.length === 0) {
|
|
411
|
-
logWarn('extractContent called with invalid URL');
|
|
411
|
+
logWarn('extractContent called with invalid URL', undefined, 'transform');
|
|
412
412
|
return false;
|
|
413
413
|
}
|
|
414
414
|
return true;
|
|
@@ -421,7 +421,7 @@ function applyBaseUri(document, url) {
|
|
|
421
421
|
logInfo('Failed to set baseURI (non-critical)', {
|
|
422
422
|
url: url.substring(0, 100),
|
|
423
423
|
error: getErrorMessage(error),
|
|
424
|
-
});
|
|
424
|
+
}, 'transform');
|
|
425
425
|
}
|
|
426
426
|
}
|
|
427
427
|
function createEmptyExtractionContext() {
|
|
@@ -477,7 +477,7 @@ function extractContentContext(html, url, options) {
|
|
|
477
477
|
if (error instanceof FetchError)
|
|
478
478
|
throw error;
|
|
479
479
|
throwIfAborted(options.signal, url, 'extract:error');
|
|
480
|
-
logError('Failed to extract content', error instanceof Error ? error : undefined);
|
|
480
|
+
logError('Failed to extract content', error instanceof Error ? error : undefined, 'transform');
|
|
481
481
|
return createEmptyExtractionContext();
|
|
482
482
|
}
|
|
483
483
|
}
|
|
@@ -613,7 +613,7 @@ export function htmlToMarkdown(html, metadata, options) {
|
|
|
613
613
|
catch (error) {
|
|
614
614
|
if (error instanceof FetchError)
|
|
615
615
|
throw error;
|
|
616
|
-
logError('Failed to convert HTML to markdown', error instanceof Error ? error : undefined);
|
|
616
|
+
logError('Failed to convert HTML to markdown', error instanceof Error ? error : undefined, 'transform');
|
|
617
617
|
throw new FetchError('Failed to convert HTML to markdown', url, 500, {
|
|
618
618
|
reason: 'markdown_convert_failed',
|
|
619
619
|
});
|
|
@@ -644,7 +644,7 @@ function tryTransformRawContent(params) {
|
|
|
644
644
|
return null;
|
|
645
645
|
logDebug('Preserving raw markdown content', {
|
|
646
646
|
url: params.url.substring(0, 80),
|
|
647
|
-
});
|
|
647
|
+
}, 'transform');
|
|
648
648
|
const { content, title } = buildRawMarkdownPayload({
|
|
649
649
|
rawContent: params.html,
|
|
650
650
|
url: params.url,
|
|
@@ -989,11 +989,13 @@ async function transformWithWorkerPool(htmlOrBuffer, url, options) {
|
|
|
989
989
|
});
|
|
990
990
|
}
|
|
991
991
|
function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
992
|
+
const poolStats = getWorkerPoolStats();
|
|
992
993
|
const isQueueFull = error instanceof FetchError && error.details['reason'] === 'queue_full';
|
|
993
994
|
if (isQueueFull) {
|
|
994
995
|
logWarn('Transform worker queue full; falling back to in-process', {
|
|
995
996
|
url: redactUrl(url),
|
|
996
|
-
|
|
997
|
+
...(poolStats ?? {}),
|
|
998
|
+
}, 'transform');
|
|
997
999
|
return transformInputInProcess(htmlOrBuffer, url, options);
|
|
998
1000
|
}
|
|
999
1001
|
throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
@@ -1005,7 +1007,8 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
|
1005
1007
|
logWarn('Transform worker failed; falling back to in-process', {
|
|
1006
1008
|
url: redactUrl(url),
|
|
1007
1009
|
error: message,
|
|
1008
|
-
|
|
1010
|
+
...(poolStats ?? {}),
|
|
1011
|
+
}, 'transform');
|
|
1009
1012
|
return transformInputInProcess(htmlOrBuffer, url, options);
|
|
1010
1013
|
}
|
|
1011
1014
|
async function runWorkerTransformWithFallback(htmlOrBuffer, url, options) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker-pool.d.ts","sourceRoot":"","sources":["../../src/transform/worker-pool.ts"],"names":[],"mappings":"AA2BA,OAAO,KAAK,EACV,uBAAuB,EAKxB,MAAM,YAAY,CAAC;AAqIpB,UAAU,mBAAmB;IAC3B,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,aAAa,IAAI,MAAM,CAAC;IACxB,gBAAgB,IAAI,MAAM,CAAC;IAC3B,WAAW,IAAI,MAAM,CAAC;CACvB;AA6JD,cAAM,UAAW,YAAW,mBAAmB;IAC7C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAkC;IAExE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkC;IAC1D,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAChD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAEhD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgC;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAmC;IAC5D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0B;IAErD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAA6B;gBAEhD,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IASrC,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC;IAC7B,SAAS,CACb,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GACA,OAAO,CAAC,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"worker-pool.d.ts","sourceRoot":"","sources":["../../src/transform/worker-pool.ts"],"names":[],"mappings":"AA2BA,OAAO,KAAK,EACV,uBAAuB,EAKxB,MAAM,YAAY,CAAC;AAqIpB,UAAU,mBAAmB;IAC3B,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,aAAa,IAAI,MAAM,CAAC;IACxB,gBAAgB,IAAI,MAAM,CAAC;IAC3B,WAAW,IAAI,MAAM,CAAC;CACvB;AA6JD,cAAM,UAAW,YAAW,mBAAmB;IAC7C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAkC;IAExE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkC;IAC1D,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAChD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAEhD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgC;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAmC;IAC5D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0B;IAErD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAA6B;gBAEhD,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IASrC,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC;IAC7B,SAAS,CACb,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GACA,OAAO,CAAC,uBAAuB,CAAC;IAoDnC,aAAa,IAAI,MAAM;IAIvB,gBAAgB,IAAI,MAAM;IAI1B,WAAW,IAAI,MAAM;IAIrB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAWpB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAqC5B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,iBAAiB;IAoDzB,OAAO,CAAC,aAAa;YA4BP,aAAa;IA2B3B,OAAO,CAAC,kBAAkB;IAY1B,OAAO,CAAC,WAAW;IA4CnB,OAAO,CAAC,cAAc;IAiCtB,OAAO,CAAC,aAAa;IAkCrB,OAAO,CAAC,eAAe;IAgCvB,OAAO,CAAC,mBAAmB;IA8B3B,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,QAAQ;IAShB,OAAO,CAAC,YAAY;IAmBpB,OAAO,CAAC,UAAU;IAgClB,OAAO,CAAC,iBAAiB;IAgCzB,OAAO,CAAC,gBAAgB;IAyDxB,OAAO,CAAC,YAAY;IAwBpB,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,iBAAiB;CAS1B;AAMD,wBAAgB,qBAAqB,IAAI,UAAU,CAclD;AAED,wBAAgB,kBAAkB,IAAI;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,IAAI,CAOP;AAED,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAIxD"}
|
|
@@ -4,7 +4,7 @@ import process from 'node:process';
|
|
|
4
4
|
import { isSharedArrayBuffer } from 'node:util/types';
|
|
5
5
|
import { isMainThread, isMarkedAsUntransferable, parentPort, Worker, } from 'node:worker_threads';
|
|
6
6
|
import { z } from 'zod';
|
|
7
|
-
import { config, logWarn } from '../lib/core.js';
|
|
7
|
+
import { config, logDebug, logInfo, logWarn } from '../lib/core.js';
|
|
8
8
|
import { createAbortError, createUnrefTimeout, FetchError, getErrorMessage, } from '../lib/utils.js';
|
|
9
9
|
import { formatZodError } from '../lib/zod.js';
|
|
10
10
|
import { extractedMetadataSchema } from '../schemas.js';
|
|
@@ -232,6 +232,13 @@ class WorkerPool {
|
|
|
232
232
|
if (options.signal?.aborted)
|
|
233
233
|
throw createAbortError(url, 'transform:enqueue');
|
|
234
234
|
if (this.queue.depth >= this.queueMax) {
|
|
235
|
+
logWarn('Transform worker queue capacity reached', {
|
|
236
|
+
queueDepth: this.queue.depth,
|
|
237
|
+
queueMax: this.queueMax,
|
|
238
|
+
activeWorkers: this.busyCount,
|
|
239
|
+
capacity: this.capacity,
|
|
240
|
+
url,
|
|
241
|
+
}, 'transform');
|
|
235
242
|
throw new FetchError('Transform worker queue is full', url, HTTP_SERVICE_UNAVAILABLE, {
|
|
236
243
|
reason: 'queue_full',
|
|
237
244
|
stage: 'transform:enqueue',
|
|
@@ -263,6 +270,12 @@ class WorkerPool {
|
|
|
263
270
|
if (this.closed)
|
|
264
271
|
return;
|
|
265
272
|
this.closed = true;
|
|
273
|
+
logInfo('Shutting down transform worker pool', {
|
|
274
|
+
workers: this.workers.length,
|
|
275
|
+
activeWorkers: this.busyCount,
|
|
276
|
+
queueDepth: this.queue.depth,
|
|
277
|
+
inflight: this.inflight.size,
|
|
278
|
+
}, 'transform');
|
|
266
279
|
const terminations = this.workers
|
|
267
280
|
.map((slot) => slot?.worker.terminate().catch(() => undefined))
|
|
268
281
|
.filter((p) => p !== undefined);
|
|
@@ -374,6 +387,10 @@ class WorkerPool {
|
|
|
374
387
|
name,
|
|
375
388
|
...(resourceLimits ? { resourceLimits } : {}),
|
|
376
389
|
});
|
|
390
|
+
logDebug('Spawned transform worker', {
|
|
391
|
+
workerIndex,
|
|
392
|
+
workerName: name,
|
|
393
|
+
}, 'transform');
|
|
377
394
|
worker.unref();
|
|
378
395
|
worker.on('message', (raw) => {
|
|
379
396
|
this.onWorkerMessage(workerIndex, raw);
|
|
@@ -400,7 +417,7 @@ class WorkerPool {
|
|
|
400
417
|
workerIndex,
|
|
401
418
|
workerName: slot.name,
|
|
402
419
|
threadId: slot.worker.threadId,
|
|
403
|
-
});
|
|
420
|
+
}, 'transform');
|
|
404
421
|
if (slot.busy && slot.currentTaskId) {
|
|
405
422
|
try {
|
|
406
423
|
this.failTask(slot.currentTaskId, new FetchError(message, '', HTTP_SERVICE_UNAVAILABLE, {
|
|
@@ -424,6 +441,11 @@ class WorkerPool {
|
|
|
424
441
|
this.restartBackoff.set(workerIndex, attempts + 1);
|
|
425
442
|
if (attempts > 0) {
|
|
426
443
|
const delayMs = Math.min(1000 * 2 ** (attempts - 1), 30_000);
|
|
444
|
+
logWarn('Scheduling transform worker restart with backoff', {
|
|
445
|
+
workerIndex,
|
|
446
|
+
delayMs,
|
|
447
|
+
attempt: attempts + 1,
|
|
448
|
+
}, 'transform');
|
|
427
449
|
setTimeout(() => {
|
|
428
450
|
if (this.closed)
|
|
429
451
|
return;
|
|
@@ -519,7 +541,13 @@ class WorkerPool {
|
|
|
519
541
|
maybeScaleUp() {
|
|
520
542
|
if (this.getQueueDepth() > this.capacity * POOL_SCALE_THRESHOLD &&
|
|
521
543
|
this.capacity < this.maxCapacity) {
|
|
544
|
+
const previousCapacity = this.capacity;
|
|
522
545
|
this.capacity += 1;
|
|
546
|
+
logInfo('Scaled transform worker pool', {
|
|
547
|
+
fromCapacity: previousCapacity,
|
|
548
|
+
toCapacity: this.capacity,
|
|
549
|
+
queueDepth: this.getQueueDepth(),
|
|
550
|
+
}, 'transform');
|
|
523
551
|
}
|
|
524
552
|
}
|
|
525
553
|
drainQueue() {
|
|
@@ -586,6 +614,12 @@ class WorkerPool {
|
|
|
586
614
|
const inflight = this.takeInflight(task.id);
|
|
587
615
|
if (!inflight)
|
|
588
616
|
return;
|
|
617
|
+
logWarn('Transform worker task timed out', {
|
|
618
|
+
taskId: task.id,
|
|
619
|
+
url: task.url,
|
|
620
|
+
workerIndex,
|
|
621
|
+
timeoutMs: this.timeoutMs,
|
|
622
|
+
}, 'transform');
|
|
589
623
|
this.abortAndCleanTask(inflight, new FetchError('Request timeout', task.url, HTTP_GATEWAY_TIMEOUT, {
|
|
590
624
|
reason: 'timeout',
|
|
591
625
|
stage: 'transform:worker-timeout',
|
|
@@ -637,7 +671,13 @@ class WorkerPool {
|
|
|
637
671
|
let workerPool = null;
|
|
638
672
|
export function getOrCreateWorkerPool() {
|
|
639
673
|
const size = config.transform.maxWorkerScale === 0 ? 0 : POOL_MIN_WORKERS;
|
|
640
|
-
|
|
674
|
+
if (!workerPool) {
|
|
675
|
+
workerPool = new WorkerPool(size, DEFAULT_TIMEOUT_MS);
|
|
676
|
+
logInfo('Initialized transform worker pool', {
|
|
677
|
+
initialCapacity: workerPool.getCapacity(),
|
|
678
|
+
timeoutMs: DEFAULT_TIMEOUT_MS,
|
|
679
|
+
}, 'transform');
|
|
680
|
+
}
|
|
641
681
|
return workerPool;
|
|
642
682
|
}
|
|
643
683
|
export function getWorkerPoolStats() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/fetch-url-mcp",
|
|
3
|
-
"version": "1.12.
|
|
3
|
+
"version": "1.12.2",
|
|
4
4
|
"mcpName": "io.github.j0hanz/fetch-url-mcp",
|
|
5
5
|
"description": "An MCP server that fetches web pages and converts them to clean, readable Markdown.",
|
|
6
6
|
"type": "module",
|
|
@@ -74,7 +74,7 @@
|
|
|
74
74
|
"test:coverage": "node scripts/tasks.mjs test --coverage",
|
|
75
75
|
"knip": "knip",
|
|
76
76
|
"knip:fix": "knip --fix",
|
|
77
|
-
"inspector": "npm run build && npx -y @modelcontextprotocol/inspector node dist/index.js
|
|
77
|
+
"inspector": "npm run build && npx -y @modelcontextprotocol/inspector node dist/index.js",
|
|
78
78
|
"prepublishOnly": "npm run lint && npm run type-check && npm run build"
|
|
79
79
|
},
|
|
80
80
|
"dependencies": {
|