@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -21
- package/dist/cli.d.ts +3 -3
- package/dist/cli.js +15 -8
- package/dist/http/auth.d.ts +6 -6
- package/dist/http/auth.js +78 -23
- package/dist/http/health.d.ts +1 -2
- package/dist/http/health.js +7 -18
- package/dist/http/helpers.d.ts +3 -11
- package/dist/http/helpers.js +28 -26
- package/dist/http/native.d.ts +0 -1
- package/dist/http/native.js +63 -41
- package/dist/http/rate-limit.d.ts +2 -2
- package/dist/http/rate-limit.js +11 -16
- package/dist/index.d.ts +0 -1
- package/dist/index.js +17 -20
- package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
- package/dist/lib/content.js +1356 -0
- package/dist/lib/core.d.ts +253 -0
- package/dist/lib/core.js +1228 -0
- package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
- package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
- package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
- package/dist/{fetch.js → lib/http.js} +721 -1004
- package/dist/lib/mcp-tools.d.ts +28 -0
- package/dist/lib/mcp-tools.js +107 -0
- package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
- package/dist/{tool-progress.js → lib/progress.js} +9 -14
- package/dist/lib/task-handlers.d.ts +5 -0
- package/dist/{mcp.js → lib/task-handlers.js} +95 -31
- package/dist/lib/url.d.ts +70 -0
- package/dist/lib/url.js +686 -0
- package/dist/lib/utils.d.ts +58 -0
- package/dist/lib/utils.js +304 -0
- package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
- package/dist/{prompts.js → prompts/index.js} +1 -2
- package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
- package/dist/{resources.js → resources/index.js} +87 -64
- package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
- package/dist/{instructions.js → resources/instructions.js} +5 -3
- package/dist/schemas/inputs.d.ts +7 -0
- package/dist/schemas/inputs.js +24 -0
- package/dist/schemas/outputs.d.ts +23 -0
- package/dist/schemas/outputs.js +77 -0
- package/dist/server.d.ts +0 -1
- package/dist/server.js +26 -25
- package/dist/tasks/execution.d.ts +0 -1
- package/dist/tasks/execution.js +106 -70
- package/dist/tasks/manager.d.ts +11 -3
- package/dist/tasks/manager.js +97 -73
- package/dist/tasks/owner.d.ts +3 -3
- package/dist/tasks/owner.js +2 -2
- package/dist/tasks/tool-registry.d.ts +11 -0
- package/dist/tasks/tool-registry.js +13 -0
- package/dist/tools/fetch-url.d.ts +28 -0
- package/dist/{tools.js → tools/fetch-url.js} +95 -147
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +4 -0
- package/dist/transform/html-translators.d.ts +1 -0
- package/dist/transform/html-translators.js +454 -0
- package/dist/transform/metadata.d.ts +4 -0
- package/dist/transform/metadata.js +183 -0
- package/dist/transform/transform.d.ts +0 -1
- package/dist/transform/transform.js +44 -679
- package/dist/transform/types.d.ts +9 -12
- package/dist/transform/types.js +0 -1
- package/dist/transform/worker-pool.d.ts +0 -1
- package/dist/transform/worker-pool.js +7 -16
- package/dist/transform/workers/shared.d.ts +7 -0
- package/dist/transform/workers/shared.js +130 -0
- package/dist/transform/workers/transform-child.d.ts +0 -1
- package/dist/transform/workers/transform-child.js +5 -135
- package/dist/transform/workers/transform-worker.d.ts +0 -1
- package/dist/transform/workers/transform-worker.js +7 -128
- package/package.json +11 -7
- package/dist/cache.d.ts +0 -54
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js +0 -261
- package/dist/cache.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts +0 -141
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -473
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts +0 -4
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js +0 -56
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts +0 -2
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js +0 -494
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/download.d.ts +0 -4
- package/dist/download.d.ts.map +0 -1
- package/dist/download.js +0 -106
- package/dist/download.js.map +0 -1
- package/dist/errors.d.ts +0 -11
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -65
- package/dist/errors.js.map +0 -1
- package/dist/examples/mcp-fetch-url-client.js +0 -329
- package/dist/examples/mcp-fetch-url-client.js.map +0 -1
- package/dist/fetch-content.d.ts +0 -5
- package/dist/fetch-content.d.ts.map +0 -1
- package/dist/fetch-content.js +0 -164
- package/dist/fetch-content.js.map +0 -1
- package/dist/fetch-stream.d.ts +0 -5
- package/dist/fetch-stream.d.ts.map +0 -1
- package/dist/fetch-stream.js +0 -29
- package/dist/fetch-stream.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/host-normalization.d.ts +0 -2
- package/dist/host-normalization.d.ts.map +0 -1
- package/dist/host-normalization.js +0 -91
- package/dist/host-normalization.js.map +0 -1
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js.map +0 -1
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js.map +0 -1
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js.map +0 -1
- package/dist/http/native.d.ts.map +0 -1
- package/dist/http/native.js.map +0 -1
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/instructions.d.ts.map +0 -1
- package/dist/instructions.js.map +0 -1
- package/dist/ip-blocklist.d.ts +0 -9
- package/dist/ip-blocklist.d.ts.map +0 -1
- package/dist/ip-blocklist.js +0 -79
- package/dist/ip-blocklist.js.map +0 -1
- package/dist/json.d.ts +0 -2
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js +0 -45
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts +0 -3
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js +0 -355
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js +0 -534
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp-validator.d.ts +0 -17
- package/dist/mcp-validator.d.ts.map +0 -1
- package/dist/mcp-validator.js +0 -45
- package/dist/mcp-validator.js.map +0 -1
- package/dist/mcp.d.ts +0 -4
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts +0 -23
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js +0 -238
- package/dist/observability.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/resources.d.ts.map +0 -1
- package/dist/resources.js.map +0 -1
- package/dist/server-tuning.d.ts +0 -15
- package/dist/server-tuning.d.ts.map +0 -1
- package/dist/server-tuning.js +0 -49
- package/dist/server-tuning.js.map +0 -1
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js.map +0 -1
- package/dist/session.d.ts +0 -42
- package/dist/session.d.ts.map +0 -1
- package/dist/session.js +0 -255
- package/dist/session.js.map +0 -1
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js.map +0 -1
- package/dist/tasks/manager.d.ts.map +0 -1
- package/dist/tasks/manager.js.map +0 -1
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js.map +0 -1
- package/dist/timer-utils.d.ts +0 -6
- package/dist/timer-utils.d.ts.map +0 -1
- package/dist/timer-utils.js +0 -27
- package/dist/timer-utils.js.map +0 -1
- package/dist/tool-errors.d.ts +0 -12
- package/dist/tool-errors.d.ts.map +0 -1
- package/dist/tool-errors.js +0 -55
- package/dist/tool-errors.js.map +0 -1
- package/dist/tool-pipeline.d.ts.map +0 -1
- package/dist/tool-pipeline.js.map +0 -1
- package/dist/tool-progress.d.ts.map +0 -1
- package/dist/tool-progress.js.map +0 -1
- package/dist/tools.d.ts +0 -54
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js.map +0 -1
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js.map +0 -1
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js.map +0 -1
- package/dist/transform/workers/transform-child.d.ts.map +0 -1
- package/dist/transform/workers/transform-child.js.map +0 -1
- package/dist/transform/workers/transform-worker.d.ts.map +0 -1
- package/dist/transform/workers/transform-worker.js.map +0 -1
- package/dist/type-guards.d.ts +0 -16
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js +0 -13
- package/dist/type-guards.js.map +0 -1
|
@@ -1,114 +1,17 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
|
|
2
3
|
import { z } from 'zod';
|
|
3
|
-
import * as cache from '
|
|
4
|
-
import { config } from '
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import { handleToolError } from '
|
|
8
|
-
import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord, readString, serializeMarkdownResult, TRUNCATION_MARKER, withSignal, } from '
|
|
9
|
-
import { createProgressReporter, } from '
|
|
10
|
-
import { isObject } from '
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
export { createProgressReporter, } from './tool-progress.js';
|
|
15
|
-
export const fetchUrlInputSchema = z.strictObject({
|
|
16
|
-
url: z
|
|
17
|
-
.url({ protocol: /^https?$/i })
|
|
18
|
-
.min(1)
|
|
19
|
-
.max(config.constants.maxUrlLength)
|
|
20
|
-
.describe(`Target URL. Max ${config.constants.maxUrlLength} chars.`),
|
|
21
|
-
skipNoiseRemoval: z
|
|
22
|
-
.boolean()
|
|
23
|
-
.optional()
|
|
24
|
-
.describe('Preserve navigation/footers (disable noise filtering).'),
|
|
25
|
-
forceRefresh: z
|
|
26
|
-
.boolean()
|
|
27
|
-
.optional()
|
|
28
|
-
.describe('Bypass cache and fetch fresh content.'),
|
|
29
|
-
maxInlineChars: z
|
|
30
|
-
.number()
|
|
31
|
-
.int()
|
|
32
|
-
.min(0)
|
|
33
|
-
.max(config.constants.maxHtmlSize)
|
|
34
|
-
.optional()
|
|
35
|
-
.describe(`Inline markdown limit (0-${config.constants.maxHtmlSize}, 0=unlimited). Lower of this or global limit applies.`),
|
|
36
|
-
});
|
|
37
|
-
export const fetchUrlOutputSchema = z.strictObject({
|
|
38
|
-
url: z
|
|
39
|
-
.string()
|
|
40
|
-
.min(1)
|
|
41
|
-
.max(config.constants.maxUrlLength)
|
|
42
|
-
.describe('Fetched URL.'),
|
|
43
|
-
inputUrl: z
|
|
44
|
-
.string()
|
|
45
|
-
.max(config.constants.maxUrlLength)
|
|
46
|
-
.optional()
|
|
47
|
-
.describe('Original requested URL.'),
|
|
48
|
-
resolvedUrl: z
|
|
49
|
-
.string()
|
|
50
|
-
.max(config.constants.maxUrlLength)
|
|
51
|
-
.optional()
|
|
52
|
-
.describe('Final URL after raw-content transformations.'),
|
|
53
|
-
finalUrl: z
|
|
54
|
-
.string()
|
|
55
|
-
.max(config.constants.maxUrlLength)
|
|
56
|
-
.optional()
|
|
57
|
-
.describe('Final URL after HTTP redirects.'),
|
|
58
|
-
cacheResourceUri: z
|
|
59
|
-
.string()
|
|
60
|
-
.max(config.constants.maxUrlLength)
|
|
61
|
-
.optional()
|
|
62
|
-
.describe('URI for resources/read to get full markdown.'),
|
|
63
|
-
title: z.string().max(512).optional().describe('Page title.'),
|
|
64
|
-
metadata: z
|
|
65
|
-
.strictObject({
|
|
66
|
-
title: z.string().max(512).optional().describe('Detected page title.'),
|
|
67
|
-
description: z
|
|
68
|
-
.string()
|
|
69
|
-
.max(2048)
|
|
70
|
-
.optional()
|
|
71
|
-
.describe('Detected page description.'),
|
|
72
|
-
author: z.string().max(512).optional().describe('Detected page author.'),
|
|
73
|
-
image: z
|
|
74
|
-
.string()
|
|
75
|
-
.max(config.constants.maxUrlLength)
|
|
76
|
-
.optional()
|
|
77
|
-
.describe('Detected page preview image URL.'),
|
|
78
|
-
favicon: z
|
|
79
|
-
.string()
|
|
80
|
-
.max(config.constants.maxUrlLength)
|
|
81
|
-
.optional()
|
|
82
|
-
.describe('Detected page favicon URL.'),
|
|
83
|
-
publishedAt: z
|
|
84
|
-
.string()
|
|
85
|
-
.max(64)
|
|
86
|
-
.optional()
|
|
87
|
-
.describe('Detected publication date.'),
|
|
88
|
-
modifiedAt: z
|
|
89
|
-
.string()
|
|
90
|
-
.max(64)
|
|
91
|
-
.optional()
|
|
92
|
-
.describe('Detected last modified date.'),
|
|
93
|
-
})
|
|
94
|
-
.optional()
|
|
95
|
-
.describe('Extracted page metadata.'),
|
|
96
|
-
markdown: (config.constants.maxInlineContentChars > 0
|
|
97
|
-
? z.string().max(config.constants.maxInlineContentChars)
|
|
98
|
-
: z.string())
|
|
99
|
-
.optional()
|
|
100
|
-
.describe('Extracted Markdown. May be truncated (check truncated field).'),
|
|
101
|
-
fromCache: z.boolean().optional().describe('True if served from cache.'),
|
|
102
|
-
fetchedAt: z.string().max(64).optional().describe('ISO timestamp of fetch.'),
|
|
103
|
-
contentSize: z
|
|
104
|
-
.number()
|
|
105
|
-
.int()
|
|
106
|
-
.min(0)
|
|
107
|
-
.max(config.constants.maxHtmlSize * 4)
|
|
108
|
-
.optional()
|
|
109
|
-
.describe('Full markdown size before truncation.'),
|
|
110
|
-
truncated: z.boolean().optional().describe('True if markdown was truncated.'),
|
|
111
|
-
});
|
|
4
|
+
import * as cache from '../lib/core.js';
|
|
5
|
+
import { config } from '../lib/core.js';
|
|
6
|
+
import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from '../lib/core.js';
|
|
7
|
+
import { generateSafeFilename } from '../lib/http.js';
|
|
8
|
+
import { handleToolError } from '../lib/mcp-tools.js';
|
|
9
|
+
import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord, readString, serializeMarkdownResult, TRUNCATION_MARKER, withSignal, } from '../lib/mcp-tools.js';
|
|
10
|
+
import { createProgressReporter, } from '../lib/mcp-tools.js';
|
|
11
|
+
import { isAbortError, isObject, toError } from '../lib/utils.js';
|
|
12
|
+
import { fetchUrlInputSchema } from '../schemas/inputs.js';
|
|
13
|
+
import { fetchUrlOutputSchema } from '../schemas/outputs.js';
|
|
14
|
+
import { registerTaskCapableTool, unregisterTaskCapableTool, } from '../tasks/tool-registry.js';
|
|
112
15
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
113
16
|
const FETCH_URL_TOOL_DESCRIPTION = `
|
|
114
17
|
<role>Web Content Extractor</role>
|
|
@@ -125,6 +28,7 @@ const TOOL_ICON = {
|
|
|
125
28
|
src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
|
|
126
29
|
mimeType: 'image/svg+xml',
|
|
127
30
|
};
|
|
31
|
+
const JSON_SCHEMA_DRAFT_2020_12_URI = 'https://json-schema.org/draft/2020-12/schema';
|
|
128
32
|
/* -------------------------------------------------------------------------------------------------
|
|
129
33
|
* Tool response builders
|
|
130
34
|
* ------------------------------------------------------------------------------------------------- */
|
|
@@ -138,7 +42,7 @@ function buildEmbeddedResource(content, url, title) {
|
|
|
138
42
|
if (!content)
|
|
139
43
|
return null;
|
|
140
44
|
const filename = generateSafeFilename(url, title, undefined, '.md');
|
|
141
|
-
const uri =
|
|
45
|
+
const uri = `internal://inline/${encodeURIComponent(filename)}`;
|
|
142
46
|
const resource = {
|
|
143
47
|
uri,
|
|
144
48
|
mimeType: 'text/markdown',
|
|
@@ -178,11 +82,11 @@ function appendIfPresent(items, value) {
|
|
|
178
82
|
/* -------------------------------------------------------------------------------------------------
|
|
179
83
|
* Tool abort signal
|
|
180
84
|
* ------------------------------------------------------------------------------------------------- */
|
|
85
|
+
const HARD_TOOL_TIMEOUT_MS = 300_000;
|
|
181
86
|
function buildToolAbortSignal(extraSignal) {
|
|
182
87
|
const { timeoutMs } = config.tools;
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
88
|
+
const effectiveTimeout = timeoutMs > 0 ? timeoutMs : HARD_TOOL_TIMEOUT_MS;
|
|
89
|
+
const timeoutSignal = AbortSignal.timeout(effectiveTimeout);
|
|
186
90
|
if (!extraSignal)
|
|
187
91
|
return timeoutSignal;
|
|
188
92
|
return AbortSignal.any([extraSignal, timeoutSignal]);
|
|
@@ -195,19 +99,21 @@ function truncateStr(value, max) {
|
|
|
195
99
|
return value;
|
|
196
100
|
return value.slice(0, max);
|
|
197
101
|
}
|
|
102
|
+
const METADATA_FIELD_LIMITS = {
|
|
103
|
+
title: 512,
|
|
104
|
+
description: 2048,
|
|
105
|
+
author: 512,
|
|
106
|
+
};
|
|
198
107
|
function truncateMetadata(metadata) {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
? { author: truncateStr(metadata.author, 512) }
|
|
209
|
-
: {}),
|
|
210
|
-
};
|
|
108
|
+
const result = { ...metadata };
|
|
109
|
+
for (const [field, limit] of Object.entries(METADATA_FIELD_LIMITS)) {
|
|
110
|
+
const key = field;
|
|
111
|
+
const value = result[key];
|
|
112
|
+
if (typeof value === 'string' && value.length > limit) {
|
|
113
|
+
result[key] = value.slice(0, limit);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return result;
|
|
211
117
|
}
|
|
212
118
|
function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
213
119
|
const cacheResourceUri = resolveCacheResourceUri(pipeline.cacheKey);
|
|
@@ -280,7 +186,7 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
|
280
186
|
/* -------------------------------------------------------------------------------------------------
|
|
281
187
|
* fetch-url tool implementation
|
|
282
188
|
* ------------------------------------------------------------------------------------------------- */
|
|
283
|
-
|
|
189
|
+
function getUrlContext(urlStr) {
|
|
284
190
|
try {
|
|
285
191
|
const u = new URL(urlStr);
|
|
286
192
|
const host = u.hostname.replace(/^www\./, '');
|
|
@@ -321,11 +227,7 @@ export function getUrlContext(urlStr) {
|
|
|
321
227
|
}
|
|
322
228
|
}
|
|
323
229
|
async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
|
|
324
|
-
const
|
|
325
|
-
if (!progress)
|
|
326
|
-
return;
|
|
327
|
-
progress.report(step, message);
|
|
328
|
-
};
|
|
230
|
+
const contextStr = getUrlContext(url);
|
|
329
231
|
return performSharedFetch({
|
|
330
232
|
url,
|
|
331
233
|
...withSignal(signal),
|
|
@@ -333,48 +235,67 @@ async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefre
|
|
|
333
235
|
...(forceRefresh ? { forceRefresh: true } : {}),
|
|
334
236
|
...(maxInlineChars !== undefined ? { maxInlineChars } : {}),
|
|
335
237
|
transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
|
|
336
|
-
|
|
337
|
-
reportProgress(2, `fetch-url: ${contextStr} [converting to Markdown]`);
|
|
238
|
+
reportFetchProgress(progress, 2, contextStr, 'converting to Markdown');
|
|
338
239
|
return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal, skipNoiseRemoval);
|
|
339
240
|
},
|
|
340
241
|
serialize: serializeMarkdownResult,
|
|
341
242
|
deserialize: parseCachedMarkdownResult,
|
|
342
243
|
});
|
|
343
244
|
}
|
|
245
|
+
function buildFetchProgressMessage(context, state) {
|
|
246
|
+
if (state === 'completed' || state === 'cancelled' || state === 'failed') {
|
|
247
|
+
return `fetch-url: ${context} • ${state}`;
|
|
248
|
+
}
|
|
249
|
+
return `fetch-url: ${context} [${state}]`;
|
|
250
|
+
}
|
|
251
|
+
function reportFetchProgress(progress, step, context, state) {
|
|
252
|
+
if (!progress)
|
|
253
|
+
return;
|
|
254
|
+
progress.report(step, buildFetchProgressMessage(context, state));
|
|
255
|
+
}
|
|
344
256
|
async function executeFetch(input, extra) {
|
|
345
257
|
const { url } = input;
|
|
346
258
|
const signal = buildToolAbortSignal(extra?.signal);
|
|
347
259
|
const progress = createProgressReporter(extra);
|
|
348
260
|
const contextStr = getUrlContext(url);
|
|
349
|
-
progress
|
|
261
|
+
reportFetchProgress(progress, 0, contextStr, 'starting');
|
|
350
262
|
logDebug('Fetching URL', { url });
|
|
351
263
|
try {
|
|
352
|
-
progress
|
|
264
|
+
reportFetchProgress(progress, 1, contextStr, 'fetching HTML');
|
|
353
265
|
const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
|
|
354
266
|
if (pipeline.fromCache) {
|
|
355
|
-
progress
|
|
267
|
+
reportFetchProgress(progress, 3, contextStr, 'loaded from cache');
|
|
356
268
|
}
|
|
357
|
-
progress
|
|
269
|
+
reportFetchProgress(progress, 4, contextStr, 'completed');
|
|
358
270
|
return buildResponse(pipeline, inlineResult, url);
|
|
359
271
|
}
|
|
360
272
|
catch (error) {
|
|
361
|
-
const isAbort = error
|
|
362
|
-
progress
|
|
273
|
+
const isAbort = isAbortError(error);
|
|
274
|
+
reportFetchProgress(progress, 4, contextStr, isAbort ? 'cancelled' : 'failed');
|
|
363
275
|
throw error;
|
|
364
276
|
}
|
|
365
277
|
}
|
|
366
278
|
export async function fetchUrlToolHandler(input, extra) {
|
|
367
279
|
return executeFetch(input, extra).catch((error) => {
|
|
368
|
-
logError('fetch-url tool error', error
|
|
280
|
+
logError('fetch-url tool error', toError(error));
|
|
369
281
|
return handleToolError(error, input.url, 'Failed to fetch URL');
|
|
370
282
|
});
|
|
371
283
|
}
|
|
284
|
+
function withJsonSchema202012(schema) {
|
|
285
|
+
if (typeof schema['$schema'] === 'string')
|
|
286
|
+
return schema;
|
|
287
|
+
return {
|
|
288
|
+
$schema: JSON_SCHEMA_DRAFT_2020_12_URI,
|
|
289
|
+
...schema,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
372
292
|
const TOOL_DEFINITION = {
|
|
373
293
|
name: FETCH_URL_TOOL_NAME,
|
|
374
294
|
title: 'Fetch URL',
|
|
375
295
|
description: FETCH_URL_TOOL_DESCRIPTION,
|
|
376
296
|
inputSchema: fetchUrlInputSchema,
|
|
377
|
-
|
|
297
|
+
// Explicitly mark JSON Schema dialect for MCP clients and static reviews.
|
|
298
|
+
outputSchema: withJsonSchema202012(z.toJSONSchema(fetchUrlOutputSchema)),
|
|
378
299
|
handler: fetchUrlToolHandler,
|
|
379
300
|
execution: {
|
|
380
301
|
taskSupport: 'optional',
|
|
@@ -386,6 +307,18 @@ const TOOL_DEFINITION = {
|
|
|
386
307
|
openWorldHint: true,
|
|
387
308
|
},
|
|
388
309
|
};
|
|
310
|
+
function applyRegisteredToolExecutionMetadata(registeredTool, execution) {
|
|
311
|
+
// SDK workaround: RegisteredTool does not expose `execution` in its public type.
|
|
312
|
+
// Keep the mutation localized to one helper so future SDK upgrades touch one place.
|
|
313
|
+
registeredTool.execution = execution;
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Stdio-path guard: ensures a request context (requestId, sessionId) is set
|
|
317
|
+
* in AsyncLocalStorage before invoking the handler. On the HTTP path the SDK
|
|
318
|
+
* populates `extra.requestId`/`extra.requestInfo`, so this is a no-op there.
|
|
319
|
+
* On the stdio path there is no SDK-provided context, so we derive one from
|
|
320
|
+
* the extra fields or generate a fresh UUID.
|
|
321
|
+
*/
|
|
389
322
|
export function withRequestContextIfMissing(handler) {
|
|
390
323
|
return async (params, extra) => {
|
|
391
324
|
const existingRequestId = getRequestId();
|
|
@@ -422,8 +355,25 @@ function resolveSessionIdFromExtra(extra) {
|
|
|
422
355
|
return typeof headerValue === 'string' ? headerValue : undefined;
|
|
423
356
|
}
|
|
424
357
|
export function registerTools(server) {
|
|
425
|
-
if (!config.tools.enabled.includes(FETCH_URL_TOOL_NAME))
|
|
358
|
+
if (!config.tools.enabled.includes(FETCH_URL_TOOL_NAME)) {
|
|
359
|
+
unregisterTaskCapableTool(FETCH_URL_TOOL_NAME);
|
|
426
360
|
return;
|
|
361
|
+
}
|
|
362
|
+
registerTaskCapableTool({
|
|
363
|
+
name: FETCH_URL_TOOL_NAME,
|
|
364
|
+
parseArguments: (args) => {
|
|
365
|
+
const parsed = fetchUrlInputSchema.safeParse(args);
|
|
366
|
+
if (!parsed.success) {
|
|
367
|
+
const flat = z.flattenError(parsed.error);
|
|
368
|
+
const details = Object.entries(flat.fieldErrors)
|
|
369
|
+
.map(([k, v]) => `${k}: ${v.join(', ')}`)
|
|
370
|
+
.join('; ') || flat.formErrors.join('; ');
|
|
371
|
+
throw new McpError(ErrorCode.InvalidParams, `Invalid arguments for ${FETCH_URL_TOOL_NAME}: ${details}`);
|
|
372
|
+
}
|
|
373
|
+
return parsed.data;
|
|
374
|
+
},
|
|
375
|
+
execute: fetchUrlToolHandler,
|
|
376
|
+
});
|
|
427
377
|
const registeredTool = server.registerTool(TOOL_DEFINITION.name, {
|
|
428
378
|
title: TOOL_DEFINITION.title,
|
|
429
379
|
description: TOOL_DEFINITION.description,
|
|
@@ -433,9 +383,7 @@ export function registerTools(server) {
|
|
|
433
383
|
execution: TOOL_DEFINITION.execution,
|
|
434
384
|
icons: [TOOL_ICON],
|
|
435
385
|
}, withRequestContextIfMissing(TOOL_DEFINITION.handler));
|
|
436
|
-
// SDK workaround:
|
|
437
|
-
//
|
|
438
|
-
|
|
439
|
-
registeredTool.execution = TOOL_DEFINITION.execution;
|
|
386
|
+
// SDK typing gap workaround: preserve runtime `execution` metadata until the
|
|
387
|
+
// registered tool type includes this field.
|
|
388
|
+
applyRegisteredToolExecutionMetadata(registeredTool, TOOL_DEFINITION.execution);
|
|
440
389
|
}
|
|
441
|
-
//# sourceMappingURL=tools.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function translateHtmlFragmentToMarkdown(html: string): string;
|