@j0hanz/superfetch 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +356 -223
- package/dist/assets/logo.svg +24837 -24835
- package/dist/cache.d.ts +28 -20
- package/dist/cache.js +292 -514
- package/dist/config.d.ts +41 -7
- package/dist/config.js +298 -148
- package/dist/crypto.js +25 -12
- package/dist/dom-noise-removal.js +379 -421
- package/dist/errors.d.ts +2 -2
- package/dist/errors.js +25 -8
- package/dist/fetch.d.ts +18 -16
- package/dist/fetch.js +1132 -526
- package/dist/host-normalization.js +40 -10
- package/dist/http-native.js +628 -287
- package/dist/index.js +67 -7
- package/dist/instructions.md +44 -30
- package/dist/ip-blocklist.d.ts +8 -0
- package/dist/ip-blocklist.js +65 -0
- package/dist/json.js +14 -9
- package/dist/language-detection.d.ts +2 -11
- package/dist/language-detection.js +289 -280
- package/dist/markdown-cleanup.d.ts +0 -1
- package/dist/markdown-cleanup.js +391 -429
- package/dist/mcp-validator.js +4 -2
- package/dist/mcp.js +184 -135
- package/dist/observability.js +89 -21
- package/dist/resources.js +16 -6
- package/dist/server-tuning.d.ts +2 -0
- package/dist/server-tuning.js +25 -23
- package/dist/session.d.ts +1 -0
- package/dist/session.js +41 -33
- package/dist/tasks.d.ts +2 -0
- package/dist/tasks.js +91 -9
- package/dist/timer-utils.d.ts +5 -0
- package/dist/timer-utils.js +20 -0
- package/dist/tools.d.ts +28 -5
- package/dist/tools.js +317 -183
- package/dist/transform-types.d.ts +5 -1
- package/dist/transform.d.ts +3 -2
- package/dist/transform.js +1138 -421
- package/dist/type-guards.d.ts +1 -0
- package/dist/type-guards.js +7 -0
- package/dist/workers/transform-child.d.ts +1 -0
- package/dist/workers/transform-child.js +118 -0
- package/dist/workers/transform-worker.js +87 -78
- package/package.json +21 -13
package/dist/tools.js
CHANGED
|
@@ -3,19 +3,27 @@ import { z } from 'zod';
|
|
|
3
3
|
import * as cache from './cache.js';
|
|
4
4
|
import { config } from './config.js';
|
|
5
5
|
import { FetchError, getErrorMessage, isSystemError } from './errors.js';
|
|
6
|
-
import {
|
|
6
|
+
import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
|
|
7
7
|
import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
|
|
8
|
-
import {
|
|
8
|
+
import { transformBufferToMarkdown } from './transform.js';
|
|
9
9
|
import { isObject } from './type-guards.js';
|
|
10
10
|
const TRUNCATION_MARKER = '...[truncated]';
|
|
11
11
|
const FETCH_PROGRESS_TOTAL = 4;
|
|
12
12
|
const PROGRESS_NOTIFICATION_TIMEOUT_MS = 5000;
|
|
13
|
-
const fetchUrlInputSchema = z.strictObject({
|
|
13
|
+
export const fetchUrlInputSchema = z.strictObject({
|
|
14
14
|
url: z
|
|
15
15
|
.url({ protocol: /^https?$/i })
|
|
16
16
|
.min(1)
|
|
17
17
|
.max(config.constants.maxUrlLength)
|
|
18
18
|
.describe('The URL of the webpage to fetch and convert to Markdown'),
|
|
19
|
+
skipNoiseRemoval: z
|
|
20
|
+
.boolean()
|
|
21
|
+
.optional()
|
|
22
|
+
.describe('When true, preserves navigation, footers, and other elements normally filtered as noise'),
|
|
23
|
+
forceRefresh: z
|
|
24
|
+
.boolean()
|
|
25
|
+
.optional()
|
|
26
|
+
.describe('When true, bypasses the cache and fetches fresh content from the URL'),
|
|
19
27
|
});
|
|
20
28
|
const fetchUrlOutputSchema = z.strictObject({
|
|
21
29
|
url: z
|
|
@@ -34,16 +42,29 @@ const fetchUrlOutputSchema = z.strictObject({
|
|
|
34
42
|
.optional()
|
|
35
43
|
.describe('The normalized or transformed URL that was fetched'),
|
|
36
44
|
title: z.string().max(512).optional().describe('Page title'),
|
|
37
|
-
markdown:
|
|
38
|
-
.string()
|
|
39
|
-
.
|
|
45
|
+
markdown: (config.constants.maxInlineContentChars > 0
|
|
46
|
+
? z.string().max(config.constants.maxInlineContentChars)
|
|
47
|
+
: z.string())
|
|
40
48
|
.optional()
|
|
41
49
|
.describe('The extracted content in Markdown format'),
|
|
50
|
+
truncated: z
|
|
51
|
+
.boolean()
|
|
52
|
+
.optional()
|
|
53
|
+
.describe('Whether the returned markdown was truncated'),
|
|
42
54
|
error: z
|
|
43
55
|
.string()
|
|
44
56
|
.max(2048)
|
|
45
57
|
.optional()
|
|
46
58
|
.describe('Error message if the request failed'),
|
|
59
|
+
statusCode: z
|
|
60
|
+
.number()
|
|
61
|
+
.int()
|
|
62
|
+
.optional()
|
|
63
|
+
.describe('HTTP status code for failed requests'),
|
|
64
|
+
details: z
|
|
65
|
+
.record(z.string(), z.unknown())
|
|
66
|
+
.optional()
|
|
67
|
+
.describe('Additional error details when available'),
|
|
47
68
|
});
|
|
48
69
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
49
70
|
const FETCH_URL_TOOL_DESCRIPTION = `
|
|
@@ -55,7 +76,7 @@ This tool is useful for:
|
|
|
55
76
|
- Caching content to speed up repeated queries.
|
|
56
77
|
|
|
57
78
|
Limitations:
|
|
58
|
-
-
|
|
79
|
+
- Inline output may be truncated when MAX_INLINE_CONTENT_CHARS is set.
|
|
59
80
|
- Does not execute complex client-side JavaScript interactions.
|
|
60
81
|
`.trim();
|
|
61
82
|
// Specific icon for the fetch-url tool (download cloud / web)
|
|
@@ -63,17 +84,53 @@ const TOOL_ICON = {
|
|
|
63
84
|
src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
|
|
64
85
|
mimeType: 'image/svg+xml',
|
|
65
86
|
};
|
|
87
|
+
function asRecord(value) {
|
|
88
|
+
return isObject(value) ? value : undefined;
|
|
89
|
+
}
|
|
90
|
+
function readUnknown(obj, key) {
|
|
91
|
+
const record = asRecord(obj);
|
|
92
|
+
return record ? record[key] : undefined;
|
|
93
|
+
}
|
|
94
|
+
function readString(obj, key) {
|
|
95
|
+
const value = readUnknown(obj, key);
|
|
96
|
+
return typeof value === 'string' ? value : undefined;
|
|
97
|
+
}
|
|
98
|
+
function readNestedRecord(obj, keys) {
|
|
99
|
+
let current = obj;
|
|
100
|
+
for (const key of keys) {
|
|
101
|
+
current = readUnknown(current, key);
|
|
102
|
+
if (current === undefined)
|
|
103
|
+
return undefined;
|
|
104
|
+
}
|
|
105
|
+
return asRecord(current);
|
|
106
|
+
}
|
|
107
|
+
function safeJsonParse(value) {
|
|
108
|
+
try {
|
|
109
|
+
return JSON.parse(value);
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
function withSignal(signal) {
|
|
116
|
+
return signal === undefined ? {} : { signal };
|
|
117
|
+
}
|
|
118
|
+
function buildToolAbortSignal(extraSignal) {
|
|
119
|
+
const { timeoutMs } = config.tools;
|
|
120
|
+
if (timeoutMs <= 0)
|
|
121
|
+
return extraSignal;
|
|
122
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
123
|
+
if (!extraSignal)
|
|
124
|
+
return timeoutSignal;
|
|
125
|
+
return AbortSignal.any([extraSignal, timeoutSignal]);
|
|
126
|
+
}
|
|
66
127
|
/* -------------------------------------------------------------------------------------------------
|
|
67
128
|
* Progress reporting
|
|
68
129
|
* ------------------------------------------------------------------------------------------------- */
|
|
69
130
|
function resolveRelatedTaskMeta(meta) {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if (!isObject(related))
|
|
74
|
-
return undefined;
|
|
75
|
-
const { taskId } = related;
|
|
76
|
-
return typeof taskId === 'string' ? { taskId } : undefined;
|
|
131
|
+
const related = readUnknown(meta, 'io.modelcontextprotocol/related-task');
|
|
132
|
+
const taskId = readString(related, 'taskId');
|
|
133
|
+
return taskId ? { taskId } : undefined;
|
|
77
134
|
}
|
|
78
135
|
class ToolProgressReporter {
|
|
79
136
|
token;
|
|
@@ -94,79 +151,138 @@ class ToolProgressReporter {
|
|
|
94
151
|
return new ToolProgressReporter(token, sendNotification, relatedTaskMeta);
|
|
95
152
|
}
|
|
96
153
|
async report(progress, message) {
|
|
154
|
+
const notification = {
|
|
155
|
+
method: 'notifications/progress',
|
|
156
|
+
params: {
|
|
157
|
+
progressToken: this.token,
|
|
158
|
+
progress,
|
|
159
|
+
total: FETCH_PROGRESS_TOTAL,
|
|
160
|
+
message,
|
|
161
|
+
...(this.relatedTaskMeta
|
|
162
|
+
? {
|
|
163
|
+
_meta: {
|
|
164
|
+
'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
|
|
165
|
+
},
|
|
166
|
+
}
|
|
167
|
+
: {}),
|
|
168
|
+
},
|
|
169
|
+
};
|
|
170
|
+
let timeoutId;
|
|
171
|
+
const timeoutPromise = new Promise((resolve) => {
|
|
172
|
+
timeoutId = setTimeout(() => {
|
|
173
|
+
resolve({ timeout: true });
|
|
174
|
+
}, PROGRESS_NOTIFICATION_TIMEOUT_MS);
|
|
175
|
+
timeoutId.unref();
|
|
176
|
+
});
|
|
97
177
|
try {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
}
|
|
105
|
-
const sendPromise = this.sendNotification({
|
|
106
|
-
method: 'notifications/progress',
|
|
107
|
-
params: {
|
|
108
|
-
progressToken: this.token,
|
|
109
|
-
progress,
|
|
110
|
-
total: FETCH_PROGRESS_TOTAL,
|
|
111
|
-
message,
|
|
112
|
-
...(this.relatedTaskMeta
|
|
113
|
-
? {
|
|
114
|
-
_meta: {
|
|
115
|
-
'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
|
|
116
|
-
},
|
|
117
|
-
}
|
|
118
|
-
: {}),
|
|
119
|
-
},
|
|
120
|
-
}).finally(() => {
|
|
121
|
-
if (timeoutId)
|
|
122
|
-
clearTimeout(timeoutId);
|
|
123
|
-
});
|
|
124
|
-
await Promise.race([sendPromise, timeoutPromise]);
|
|
178
|
+
const outcome = await Promise.race([
|
|
179
|
+
this.sendNotification(notification).then(() => ({ ok: true })),
|
|
180
|
+
timeoutPromise,
|
|
181
|
+
]);
|
|
182
|
+
if ('timeout' in outcome) {
|
|
183
|
+
logWarn('Progress notification timed out', { progress, message });
|
|
184
|
+
}
|
|
125
185
|
}
|
|
126
186
|
catch (error) {
|
|
127
|
-
|
|
128
|
-
error.message === 'Progress notification timeout';
|
|
129
|
-
const logMessage = isTimeout
|
|
130
|
-
? 'Progress notification timed out'
|
|
131
|
-
: 'Failed to send progress notification';
|
|
132
|
-
logWarn(logMessage, {
|
|
187
|
+
logWarn('Failed to send progress notification', {
|
|
133
188
|
error: getErrorMessage(error),
|
|
134
189
|
progress,
|
|
135
190
|
message,
|
|
136
191
|
});
|
|
137
192
|
}
|
|
193
|
+
finally {
|
|
194
|
+
if (timeoutId)
|
|
195
|
+
clearTimeout(timeoutId);
|
|
196
|
+
}
|
|
138
197
|
}
|
|
139
198
|
}
|
|
140
199
|
export function createProgressReporter(extra) {
|
|
141
200
|
return ToolProgressReporter.create(extra);
|
|
142
201
|
}
|
|
202
|
+
function getOpenCodeFence(content) {
|
|
203
|
+
const FENCE_PATTERN = /^([ \t]*)(`{3,}|~{3,})/gm;
|
|
204
|
+
let match;
|
|
205
|
+
let inFence = false;
|
|
206
|
+
let fenceChar = null;
|
|
207
|
+
let fenceLength = 0;
|
|
208
|
+
while ((match = FENCE_PATTERN.exec(content)) !== null) {
|
|
209
|
+
const marker = match[2];
|
|
210
|
+
if (!marker)
|
|
211
|
+
continue;
|
|
212
|
+
const [char] = marker;
|
|
213
|
+
if (!char)
|
|
214
|
+
continue;
|
|
215
|
+
const { length } = marker;
|
|
216
|
+
if (!inFence) {
|
|
217
|
+
inFence = true;
|
|
218
|
+
fenceChar = char;
|
|
219
|
+
fenceLength = length;
|
|
220
|
+
}
|
|
221
|
+
else if (char === fenceChar && length >= fenceLength) {
|
|
222
|
+
inFence = false;
|
|
223
|
+
fenceChar = null;
|
|
224
|
+
fenceLength = 0;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
if (inFence && fenceChar) {
|
|
228
|
+
return { fenceChar, fenceLength };
|
|
229
|
+
}
|
|
230
|
+
return null;
|
|
231
|
+
}
|
|
232
|
+
function findSafeLinkBoundary(content, limit) {
|
|
233
|
+
const lastBracket = content.lastIndexOf('[', limit);
|
|
234
|
+
if (lastBracket === -1)
|
|
235
|
+
return limit;
|
|
236
|
+
const afterBracket = content.substring(lastBracket, limit);
|
|
237
|
+
const closedPattern = /^\[[^\]]*\]\([^)]*\)/;
|
|
238
|
+
if (closedPattern.test(afterBracket))
|
|
239
|
+
return limit;
|
|
240
|
+
const start = lastBracket > 0 && content[lastBracket - 1] === '!'
|
|
241
|
+
? lastBracket - 1
|
|
242
|
+
: lastBracket;
|
|
243
|
+
return start;
|
|
244
|
+
}
|
|
245
|
+
function truncateWithMarker(content, limit, marker) {
|
|
246
|
+
if (content.length <= limit)
|
|
247
|
+
return content;
|
|
248
|
+
const maxContentLength = Math.max(0, limit - marker.length);
|
|
249
|
+
const tentativeContent = content.substring(0, maxContentLength);
|
|
250
|
+
const openFence = getOpenCodeFence(tentativeContent);
|
|
251
|
+
if (openFence) {
|
|
252
|
+
const fenceCloser = `\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`;
|
|
253
|
+
const adjustedLength = Math.max(0, limit - marker.length - fenceCloser.length);
|
|
254
|
+
return `${content.substring(0, adjustedLength)}${fenceCloser}${marker}`;
|
|
255
|
+
}
|
|
256
|
+
const safeBoundary = findSafeLinkBoundary(content, maxContentLength);
|
|
257
|
+
if (safeBoundary < maxContentLength) {
|
|
258
|
+
return `${content.substring(0, safeBoundary)}${marker}`;
|
|
259
|
+
}
|
|
260
|
+
return `${tentativeContent}${marker}`;
|
|
261
|
+
}
|
|
143
262
|
class InlineContentLimiter {
|
|
144
263
|
apply(content, cacheKey) {
|
|
145
264
|
const contentSize = content.length;
|
|
146
265
|
const inlineLimit = config.constants.maxInlineContentChars;
|
|
266
|
+
if (inlineLimit <= 0) {
|
|
267
|
+
return { content, contentSize };
|
|
268
|
+
}
|
|
147
269
|
if (contentSize <= inlineLimit) {
|
|
148
270
|
return { content, contentSize };
|
|
149
271
|
}
|
|
150
|
-
const
|
|
151
|
-
|
|
152
|
-
|
|
272
|
+
const isTruncated = contentSize > inlineLimit;
|
|
273
|
+
const resourceUri = cacheKey && (cache.isEnabled() || isTruncated)
|
|
274
|
+
? cache.toResourceUri(cacheKey)
|
|
275
|
+
: null;
|
|
276
|
+
const truncatedContent = truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER);
|
|
277
|
+
if (resourceUri) {
|
|
278
|
+
return {
|
|
279
|
+
content: truncatedContent,
|
|
280
|
+
contentSize,
|
|
281
|
+
resourceUri,
|
|
282
|
+
resourceMimeType: 'text/markdown',
|
|
283
|
+
truncated: true,
|
|
284
|
+
};
|
|
153
285
|
}
|
|
154
|
-
return {
|
|
155
|
-
contentSize,
|
|
156
|
-
resourceUri,
|
|
157
|
-
resourceMimeType: 'text/markdown',
|
|
158
|
-
};
|
|
159
|
-
}
|
|
160
|
-
resolveResourceUri(cacheKey) {
|
|
161
|
-
if (!cache.isEnabled() || !cacheKey)
|
|
162
|
-
return null;
|
|
163
|
-
return cache.toResourceUri(cacheKey);
|
|
164
|
-
}
|
|
165
|
-
buildTruncatedFallback(content, contentSize, inlineLimit) {
|
|
166
|
-
const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
|
|
167
|
-
const truncatedContent = content.length > inlineLimit
|
|
168
|
-
? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
|
|
169
|
-
: content;
|
|
170
286
|
return {
|
|
171
287
|
content: truncatedContent,
|
|
172
288
|
contentSize,
|
|
@@ -181,13 +297,10 @@ function applyInlineContentLimit(content, cacheKey) {
|
|
|
181
297
|
/* -------------------------------------------------------------------------------------------------
|
|
182
298
|
* Tool response blocks (text + optional resource + optional link)
|
|
183
299
|
* ------------------------------------------------------------------------------------------------- */
|
|
184
|
-
function serializeStructuredContent(structuredContent) {
|
|
185
|
-
return JSON.stringify(structuredContent);
|
|
186
|
-
}
|
|
187
300
|
function buildTextBlock(structuredContent) {
|
|
188
301
|
return {
|
|
189
302
|
type: 'text',
|
|
190
|
-
text:
|
|
303
|
+
text: JSON.stringify(structuredContent),
|
|
191
304
|
};
|
|
192
305
|
}
|
|
193
306
|
function buildResourceLink(inlineResult, name) {
|
|
@@ -208,7 +321,7 @@ function buildEmbeddedResource(content, url, title) {
|
|
|
208
321
|
if (!content)
|
|
209
322
|
return null;
|
|
210
323
|
const filename = cache.generateSafeFilename(url, title, undefined, '.md');
|
|
211
|
-
const uri =
|
|
324
|
+
const uri = new URL(filename, 'file:///').href;
|
|
212
325
|
return {
|
|
213
326
|
type: 'resource',
|
|
214
327
|
resource: {
|
|
@@ -232,21 +345,20 @@ function appendResourceBlocks(params) {
|
|
|
232
345
|
if (link)
|
|
233
346
|
blocks.push(link);
|
|
234
347
|
}
|
|
235
|
-
function buildToolContentBlocks(
|
|
236
|
-
const blocks = [
|
|
348
|
+
function buildToolContentBlocks(params) {
|
|
349
|
+
const blocks = [
|
|
350
|
+
buildTextBlock(params.structuredContent),
|
|
351
|
+
];
|
|
237
352
|
appendResourceBlocks({
|
|
238
353
|
blocks,
|
|
239
|
-
inlineResult,
|
|
240
|
-
resourceName,
|
|
241
|
-
url,
|
|
242
|
-
title,
|
|
243
|
-
fullContent,
|
|
354
|
+
inlineResult: params.inlineResult,
|
|
355
|
+
resourceName: params.resourceName,
|
|
356
|
+
url: params.url,
|
|
357
|
+
title: params.title,
|
|
358
|
+
fullContent: params.fullContent,
|
|
244
359
|
});
|
|
245
360
|
return blocks;
|
|
246
361
|
}
|
|
247
|
-
/* -------------------------------------------------------------------------------------------------
|
|
248
|
-
* Fetch pipeline executor (normalize → raw-transform → cache → fetch → transform → persist)
|
|
249
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
250
362
|
function resolveNormalizedUrl(url) {
|
|
251
363
|
const { normalizedUrl: validatedUrl } = normalizeUrl(url);
|
|
252
364
|
const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
|
|
@@ -260,16 +372,16 @@ function logRawUrlTransformation(resolvedUrl) {
|
|
|
260
372
|
});
|
|
261
373
|
}
|
|
262
374
|
function extractTitle(value) {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
const { title } = value;
|
|
375
|
+
const record = asRecord(value);
|
|
376
|
+
const title = record ? record.title : undefined;
|
|
266
377
|
return typeof title === 'string' ? title : undefined;
|
|
267
378
|
}
|
|
268
|
-
function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
|
|
269
|
-
const log = reason
|
|
379
|
+
function logCacheMiss(reason, cacheNamespace, normalizedUrl, error) {
|
|
380
|
+
const log = reason.startsWith('deserialize') ? logWarn : logDebug;
|
|
270
381
|
log(`Cache miss due to ${reason}`, {
|
|
271
382
|
namespace: cacheNamespace,
|
|
272
383
|
url: normalizedUrl,
|
|
384
|
+
...(error ? { error: getErrorMessage(error) } : {}),
|
|
273
385
|
});
|
|
274
386
|
}
|
|
275
387
|
function attemptCacheRetrieval(params) {
|
|
@@ -283,7 +395,14 @@ function attemptCacheRetrieval(params) {
|
|
|
283
395
|
logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
|
|
284
396
|
return null;
|
|
285
397
|
}
|
|
286
|
-
|
|
398
|
+
let data;
|
|
399
|
+
try {
|
|
400
|
+
data = deserialize(cached.content);
|
|
401
|
+
}
|
|
402
|
+
catch (error) {
|
|
403
|
+
logCacheMiss('deserialize exception', cacheNamespace, normalizedUrl, error);
|
|
404
|
+
return null;
|
|
405
|
+
}
|
|
287
406
|
if (data === undefined) {
|
|
288
407
|
logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
|
|
289
408
|
return null;
|
|
@@ -298,7 +417,7 @@ function attemptCacheRetrieval(params) {
|
|
|
298
417
|
};
|
|
299
418
|
}
|
|
300
419
|
function persistCache(params) {
|
|
301
|
-
const { cacheKey, data, serialize, normalizedUrl } = params;
|
|
420
|
+
const { cacheKey, data, serialize, normalizedUrl, cacheNamespace, force } = params;
|
|
302
421
|
if (!cacheKey)
|
|
303
422
|
return;
|
|
304
423
|
const serializer = serialize ?? JSON.stringify;
|
|
@@ -307,36 +426,49 @@ function persistCache(params) {
|
|
|
307
426
|
url: normalizedUrl,
|
|
308
427
|
...(title === undefined ? {} : { title }),
|
|
309
428
|
};
|
|
310
|
-
|
|
429
|
+
try {
|
|
430
|
+
cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
|
|
431
|
+
}
|
|
432
|
+
catch (error) {
|
|
433
|
+
logWarn('Failed to persist cache entry', {
|
|
434
|
+
namespace: cacheNamespace,
|
|
435
|
+
url: normalizedUrl,
|
|
436
|
+
error: getErrorMessage(error),
|
|
437
|
+
});
|
|
438
|
+
}
|
|
311
439
|
}
|
|
312
440
|
export async function executeFetchPipeline(options) {
|
|
313
441
|
const resolvedUrl = resolveNormalizedUrl(options.url);
|
|
314
442
|
logRawUrlTransformation(resolvedUrl);
|
|
315
443
|
const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
444
|
+
if (!options.forceRefresh) {
|
|
445
|
+
const cachedResult = attemptCacheRetrieval({
|
|
446
|
+
cacheKey,
|
|
447
|
+
deserialize: options.deserialize,
|
|
448
|
+
cacheNamespace: options.cacheNamespace,
|
|
449
|
+
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
450
|
+
});
|
|
451
|
+
if (cachedResult) {
|
|
452
|
+
return { ...cachedResult, originalUrl: resolvedUrl.originalUrl };
|
|
453
|
+
}
|
|
454
|
+
}
|
|
324
455
|
logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
|
|
325
|
-
const
|
|
326
|
-
const
|
|
327
|
-
const data = await options.transform(html, resolvedUrl.normalizedUrl);
|
|
456
|
+
const { buffer, encoding } = await fetchNormalizedUrlBuffer(resolvedUrl.normalizedUrl, withSignal(options.signal));
|
|
457
|
+
const data = await options.transform({ buffer, encoding }, resolvedUrl.normalizedUrl);
|
|
328
458
|
if (cache.isEnabled()) {
|
|
329
459
|
persistCache({
|
|
330
460
|
cacheKey,
|
|
331
461
|
data,
|
|
332
462
|
serialize: options.serialize,
|
|
333
463
|
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
464
|
+
cacheNamespace: options.cacheNamespace,
|
|
334
465
|
});
|
|
335
466
|
}
|
|
336
467
|
return {
|
|
337
468
|
data,
|
|
338
469
|
fromCache: false,
|
|
339
470
|
url: resolvedUrl.normalizedUrl,
|
|
471
|
+
originalUrl: resolvedUrl.originalUrl,
|
|
340
472
|
fetchedAt: new Date().toISOString(),
|
|
341
473
|
cacheKey,
|
|
342
474
|
};
|
|
@@ -346,22 +478,38 @@ export async function performSharedFetch(options, deps = {}) {
|
|
|
346
478
|
const pipelineOptions = {
|
|
347
479
|
url: options.url,
|
|
348
480
|
cacheNamespace: 'markdown',
|
|
349
|
-
...(options.signal
|
|
481
|
+
...withSignal(options.signal),
|
|
482
|
+
...(options.cacheVary ? { cacheVary: options.cacheVary } : {}),
|
|
483
|
+
...(options.forceRefresh ? { forceRefresh: true } : {}),
|
|
350
484
|
transform: options.transform,
|
|
351
485
|
...(options.serialize ? { serialize: options.serialize } : {}),
|
|
352
486
|
...(options.deserialize ? { deserialize: options.deserialize } : {}),
|
|
353
487
|
};
|
|
354
488
|
const pipeline = await executePipeline(pipelineOptions);
|
|
355
489
|
const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
|
|
490
|
+
if (inlineResult.truncated && !pipeline.fromCache && !cache.isEnabled()) {
|
|
491
|
+
persistCache({
|
|
492
|
+
cacheKey: pipeline.cacheKey ?? null,
|
|
493
|
+
data: pipeline.data,
|
|
494
|
+
serialize: options.serialize,
|
|
495
|
+
normalizedUrl: pipeline.url,
|
|
496
|
+
cacheNamespace: 'markdown',
|
|
497
|
+
force: true,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
356
500
|
return { pipeline, inlineResult };
|
|
357
501
|
}
|
|
358
502
|
/* -------------------------------------------------------------------------------------------------
|
|
359
503
|
* Tool error mapping
|
|
360
504
|
* ------------------------------------------------------------------------------------------------- */
|
|
361
|
-
export function createToolErrorResponse(message, url) {
|
|
505
|
+
export function createToolErrorResponse(message, url, extra) {
|
|
362
506
|
const structuredContent = {
|
|
363
507
|
error: message,
|
|
364
508
|
url,
|
|
509
|
+
...(extra?.statusCode !== undefined
|
|
510
|
+
? { statusCode: extra.statusCode }
|
|
511
|
+
: {}),
|
|
512
|
+
...(extra?.details ? { details: extra.details } : {}),
|
|
365
513
|
};
|
|
366
514
|
return {
|
|
367
515
|
content: [buildTextBlock(structuredContent)],
|
|
@@ -385,57 +533,44 @@ function resolveToolErrorMessage(error, fallbackMessage) {
|
|
|
385
533
|
}
|
|
386
534
|
export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
|
|
387
535
|
const message = resolveToolErrorMessage(error, fallbackMessage);
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
return isObject(parsed) ? parsed : undefined;
|
|
394
|
-
}
|
|
395
|
-
catch {
|
|
396
|
-
return undefined;
|
|
536
|
+
if (error instanceof FetchError) {
|
|
537
|
+
return createToolErrorResponse(message, url, {
|
|
538
|
+
statusCode: error.statusCode,
|
|
539
|
+
details: error.details,
|
|
540
|
+
});
|
|
397
541
|
}
|
|
542
|
+
return createToolErrorResponse(message, url);
|
|
398
543
|
}
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
}
|
|
408
|
-
function resolveOptionalTitle(parsed) {
|
|
409
|
-
const { title } = parsed;
|
|
410
|
-
if (title === undefined)
|
|
411
|
-
return undefined;
|
|
412
|
-
return typeof title === 'string' ? title : undefined;
|
|
413
|
-
}
|
|
414
|
-
function resolveTruncatedFlag(parsed) {
|
|
415
|
-
const { truncated } = parsed;
|
|
416
|
-
return typeof truncated === 'boolean' ? truncated : false;
|
|
417
|
-
}
|
|
544
|
+
const cachedMarkdownSchema = z
|
|
545
|
+
.object({
|
|
546
|
+
markdown: z.string().optional(),
|
|
547
|
+
content: z.string().optional(),
|
|
548
|
+
title: z.string().optional(),
|
|
549
|
+
truncated: z.boolean().optional(),
|
|
550
|
+
})
|
|
551
|
+
.catchall(z.unknown())
|
|
552
|
+
.refine((value) => typeof value.markdown === 'string' || typeof value.content === 'string', { message: 'Missing markdown/content' });
|
|
418
553
|
export function parseCachedMarkdownResult(cached) {
|
|
419
|
-
const parsed =
|
|
420
|
-
|
|
554
|
+
const parsed = safeJsonParse(cached);
|
|
555
|
+
const result = cachedMarkdownSchema.safeParse(parsed);
|
|
556
|
+
if (!result.success)
|
|
421
557
|
return undefined;
|
|
422
|
-
const
|
|
423
|
-
if (
|
|
424
|
-
return undefined;
|
|
425
|
-
const title = resolveOptionalTitle(parsed);
|
|
426
|
-
if (parsed.title !== undefined && title === undefined)
|
|
558
|
+
const markdown = result.data.markdown ?? result.data.content;
|
|
559
|
+
if (typeof markdown !== 'string')
|
|
427
560
|
return undefined;
|
|
428
561
|
return {
|
|
429
|
-
content:
|
|
430
|
-
markdown
|
|
431
|
-
title,
|
|
432
|
-
truncated:
|
|
562
|
+
content: markdown,
|
|
563
|
+
markdown,
|
|
564
|
+
title: result.data.title,
|
|
565
|
+
truncated: result.data.truncated ?? false,
|
|
433
566
|
};
|
|
434
567
|
}
|
|
435
|
-
const markdownTransform = async (
|
|
436
|
-
const result = await
|
|
568
|
+
const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
|
|
569
|
+
const result = await transformBufferToMarkdown(input.buffer, url, {
|
|
437
570
|
includeMetadata: true,
|
|
438
|
-
|
|
571
|
+
encoding: input.encoding,
|
|
572
|
+
...withSignal(signal),
|
|
573
|
+
...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
|
|
439
574
|
});
|
|
440
575
|
return { ...result, content: result.markdown };
|
|
441
576
|
};
|
|
@@ -450,16 +585,25 @@ function serializeMarkdownResult(result) {
|
|
|
450
585
|
* fetch-url tool implementation
|
|
451
586
|
* ------------------------------------------------------------------------------------------------- */
|
|
452
587
|
function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
588
|
+
const truncated = inlineResult.truncated ?? pipeline.data.truncated;
|
|
453
589
|
return {
|
|
454
|
-
url: pipeline.url,
|
|
590
|
+
url: pipeline.originalUrl ?? pipeline.url,
|
|
455
591
|
resolvedUrl: pipeline.url,
|
|
456
592
|
inputUrl,
|
|
457
593
|
title: pipeline.data.title,
|
|
458
594
|
markdown: inlineResult.content,
|
|
595
|
+
...(truncated ? { truncated: true } : {}),
|
|
459
596
|
};
|
|
460
597
|
}
|
|
461
598
|
function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
|
|
462
|
-
return buildToolContentBlocks(
|
|
599
|
+
return buildToolContentBlocks({
|
|
600
|
+
structuredContent,
|
|
601
|
+
inlineResult,
|
|
602
|
+
resourceName: 'Fetched markdown',
|
|
603
|
+
url: pipeline.url,
|
|
604
|
+
...(pipeline.data.title !== undefined && { title: pipeline.data.title }),
|
|
605
|
+
fullContent: pipeline.data.content,
|
|
606
|
+
});
|
|
463
607
|
}
|
|
464
608
|
function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
465
609
|
const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
|
|
@@ -469,15 +613,17 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
|
469
613
|
structuredContent,
|
|
470
614
|
};
|
|
471
615
|
}
|
|
472
|
-
async function fetchPipeline(url, signal, progress) {
|
|
616
|
+
async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh) {
|
|
473
617
|
return performSharedFetch({
|
|
474
618
|
url,
|
|
475
|
-
...(signal
|
|
476
|
-
|
|
619
|
+
...withSignal(signal),
|
|
620
|
+
...(skipNoiseRemoval ? { cacheVary: { skipNoiseRemoval: true } } : {}),
|
|
621
|
+
...(forceRefresh ? { forceRefresh: true } : {}),
|
|
622
|
+
transform: async ({ buffer, encoding }, normalizedUrl) => {
|
|
477
623
|
if (progress) {
|
|
478
|
-
|
|
624
|
+
void progress.report(3, 'Transforming content');
|
|
479
625
|
}
|
|
480
|
-
return markdownTransform(
|
|
626
|
+
return markdownTransform({ buffer, encoding }, normalizedUrl, signal, skipNoiseRemoval);
|
|
481
627
|
},
|
|
482
628
|
serialize: serializeMarkdownResult,
|
|
483
629
|
deserialize: parseCachedMarkdownResult,
|
|
@@ -488,26 +634,19 @@ async function executeFetch(input, extra) {
|
|
|
488
634
|
if (!url) {
|
|
489
635
|
return createToolErrorResponse('URL is required', '');
|
|
490
636
|
}
|
|
491
|
-
const
|
|
492
|
-
const { timeoutMs } = config.tools;
|
|
493
|
-
const signal = timeoutMs > 0
|
|
494
|
-
? AbortSignal.any([
|
|
495
|
-
...(extraSignal ? [extraSignal] : []),
|
|
496
|
-
AbortSignal.timeout(timeoutMs),
|
|
497
|
-
])
|
|
498
|
-
: extraSignal;
|
|
637
|
+
const signal = buildToolAbortSignal(extra?.signal);
|
|
499
638
|
const progress = createProgressReporter(extra);
|
|
500
|
-
|
|
639
|
+
void progress.report(1, 'Validating URL');
|
|
501
640
|
logDebug('Fetching URL', { url });
|
|
502
|
-
|
|
503
|
-
const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress);
|
|
641
|
+
void progress.report(2, 'Fetching content');
|
|
642
|
+
const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh);
|
|
504
643
|
if (pipeline.fromCache) {
|
|
505
|
-
|
|
644
|
+
void progress.report(3, 'Using cached content');
|
|
506
645
|
}
|
|
507
646
|
if (inlineResult.error) {
|
|
508
647
|
return createToolErrorResponse(inlineResult.error, url);
|
|
509
648
|
}
|
|
510
|
-
|
|
649
|
+
void progress.report(4, 'Finalizing response');
|
|
511
650
|
return buildResponse(pipeline, inlineResult, url);
|
|
512
651
|
}
|
|
513
652
|
export async function fetchUrlToolHandler(input, extra) {
|
|
@@ -564,26 +703,21 @@ function resolveSessionIdFromExtra(extra) {
|
|
|
564
703
|
const { sessionId } = extra;
|
|
565
704
|
if (typeof sessionId === 'string')
|
|
566
705
|
return sessionId;
|
|
567
|
-
const
|
|
568
|
-
|
|
569
|
-
return undefined;
|
|
570
|
-
const { headers } = requestInfo;
|
|
571
|
-
if (!isObject(headers))
|
|
572
|
-
return undefined;
|
|
573
|
-
const headerValue = headers['mcp-session-id'];
|
|
706
|
+
const headers = readNestedRecord(extra, ['requestInfo', 'headers']);
|
|
707
|
+
const headerValue = headers ? headers['mcp-session-id'] : undefined;
|
|
574
708
|
return typeof headerValue === 'string' ? headerValue : undefined;
|
|
575
709
|
}
|
|
576
710
|
export function registerTools(server) {
|
|
577
|
-
if (config.tools.enabled.includes(FETCH_URL_TOOL_NAME))
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
}
|
|
711
|
+
if (!config.tools.enabled.includes(FETCH_URL_TOOL_NAME))
|
|
712
|
+
return;
|
|
713
|
+
server.registerTool(TOOL_DEFINITION.name, {
|
|
714
|
+
title: TOOL_DEFINITION.title,
|
|
715
|
+
description: TOOL_DEFINITION.description,
|
|
716
|
+
inputSchema: TOOL_DEFINITION.inputSchema,
|
|
717
|
+
outputSchema: TOOL_DEFINITION.outputSchema,
|
|
718
|
+
annotations: TOOL_DEFINITION.annotations,
|
|
719
|
+
execution: TOOL_DEFINITION.execution,
|
|
720
|
+
// Use specific tool icon here
|
|
721
|
+
icons: [TOOL_ICON],
|
|
722
|
+
}, withRequestContextIfMissing(TOOL_DEFINITION.handler));
|
|
589
723
|
}
|