@j0hanz/fetch-url-mcp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +570 -0
  2. package/dist/AGENTS.md +115 -0
  3. package/dist/assets/logo.svg +24837 -0
  4. package/dist/cache.d.ts +47 -0
  5. package/dist/cache.js +316 -0
  6. package/dist/cli.d.ts +17 -0
  7. package/dist/cli.js +48 -0
  8. package/dist/config.d.ts +142 -0
  9. package/dist/config.js +480 -0
  10. package/dist/crypto.d.ts +3 -0
  11. package/dist/crypto.js +49 -0
  12. package/dist/dom-noise-removal.d.ts +1 -0
  13. package/dist/dom-noise-removal.js +488 -0
  14. package/dist/errors.d.ts +10 -0
  15. package/dist/errors.js +61 -0
  16. package/dist/fetch.d.ts +42 -0
  17. package/dist/fetch.js +1544 -0
  18. package/dist/host-normalization.d.ts +1 -0
  19. package/dist/host-normalization.js +77 -0
  20. package/dist/http-native.d.ts +5 -0
  21. package/dist/http-native.js +1313 -0
  22. package/dist/index.d.ts +2 -0
  23. package/dist/index.js +91 -0
  24. package/dist/instructions.md +57 -0
  25. package/dist/ip-blocklist.d.ts +8 -0
  26. package/dist/ip-blocklist.js +74 -0
  27. package/dist/json.d.ts +1 -0
  28. package/dist/json.js +34 -0
  29. package/dist/language-detection.d.ts +2 -0
  30. package/dist/language-detection.js +364 -0
  31. package/dist/markdown-cleanup.d.ts +6 -0
  32. package/dist/markdown-cleanup.js +474 -0
  33. package/dist/mcp-validator.d.ts +15 -0
  34. package/dist/mcp-validator.js +44 -0
  35. package/dist/mcp.d.ts +4 -0
  36. package/dist/mcp.js +421 -0
  37. package/dist/observability.d.ts +21 -0
  38. package/dist/observability.js +211 -0
  39. package/dist/prompts.d.ts +7 -0
  40. package/dist/prompts.js +28 -0
  41. package/dist/resources.d.ts +8 -0
  42. package/dist/resources.js +216 -0
  43. package/dist/server-tuning.d.ts +13 -0
  44. package/dist/server-tuning.js +47 -0
  45. package/dist/server.d.ts +4 -0
  46. package/dist/server.js +174 -0
  47. package/dist/session.d.ts +39 -0
  48. package/dist/session.js +218 -0
  49. package/dist/tasks.d.ts +63 -0
  50. package/dist/tasks.js +327 -0
  51. package/dist/timer-utils.d.ts +5 -0
  52. package/dist/timer-utils.js +20 -0
  53. package/dist/tools.d.ts +135 -0
  54. package/dist/tools.js +812 -0
  55. package/dist/transform-types.d.ts +126 -0
  56. package/dist/transform-types.js +5 -0
  57. package/dist/transform.d.ts +36 -0
  58. package/dist/transform.js +2341 -0
  59. package/dist/type-guards.d.ts +14 -0
  60. package/dist/type-guards.js +13 -0
  61. package/dist/workers/transform-child.d.ts +1 -0
  62. package/dist/workers/transform-child.js +136 -0
  63. package/dist/workers/transform-worker.d.ts +1 -0
  64. package/dist/workers/transform-worker.js +128 -0
  65. package/package.json +91 -0
package/dist/tools.js ADDED
@@ -0,0 +1,812 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import { z } from 'zod';
3
+ import * as cache from './cache.js';
4
+ import { config } from './config.js';
5
+ import { FetchError, getErrorMessage, isSystemError } from './errors.js';
6
+ import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
7
+ import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
8
+ import { transformBufferToMarkdown } from './transform.js';
9
+ import { isObject } from './type-guards.js';
10
+ const TRUNCATION_MARKER = '...[truncated]';
11
+ const FETCH_PROGRESS_TOTAL = 4;
12
+ const PROGRESS_NOTIFICATION_TIMEOUT_MS = 5000;
13
+ export const fetchUrlInputSchema = z.strictObject({
14
+ url: z
15
+ .url({ protocol: /^https?$/i })
16
+ .min(1)
17
+ .max(config.constants.maxUrlLength)
18
+ .describe('The URL of the webpage to fetch and convert to Markdown'),
19
+ skipNoiseRemoval: z
20
+ .boolean()
21
+ .optional()
22
+ .describe('When true, preserves navigation, footers, and other elements normally filtered as noise'),
23
+ forceRefresh: z
24
+ .boolean()
25
+ .optional()
26
+ .describe('When true, bypasses the cache and fetches fresh content from the URL'),
27
+ maxInlineChars: z
28
+ .number()
29
+ .int()
30
+ .min(0)
31
+ .max(config.constants.maxHtmlSize)
32
+ .optional()
33
+ .describe('Optional per-call inline markdown limit. 0 means unlimited. If a global inline limit is configured, the lower value is used.'),
34
+ });
35
+ const fetchUrlOutputSchema = z.strictObject({
36
+ url: z
37
+ .string()
38
+ .min(1)
39
+ .max(config.constants.maxUrlLength)
40
+ .describe('The fetched URL'),
41
+ inputUrl: z
42
+ .string()
43
+ .max(config.constants.maxUrlLength)
44
+ .optional()
45
+ .describe('The original URL provided by the caller'),
46
+ resolvedUrl: z
47
+ .string()
48
+ .max(config.constants.maxUrlLength)
49
+ .optional()
50
+ .describe('The normalized or transformed URL that was fetched'),
51
+ finalUrl: z
52
+ .string()
53
+ .max(config.constants.maxUrlLength)
54
+ .optional()
55
+ .describe('The final response URL after redirects'),
56
+ title: z.string().max(512).optional().describe('Page title'),
57
+ metadata: z
58
+ .strictObject({
59
+ title: z.string().max(512).optional().describe('Detected page title'),
60
+ description: z
61
+ .string()
62
+ .max(2048)
63
+ .optional()
64
+ .describe('Detected page description'),
65
+ author: z.string().max(512).optional().describe('Detected page author'),
66
+ image: z
67
+ .string()
68
+ .max(config.constants.maxUrlLength)
69
+ .optional()
70
+ .describe('Detected page preview image URL'),
71
+ favicon: z
72
+ .string()
73
+ .max(config.constants.maxUrlLength)
74
+ .optional()
75
+ .describe('Detected page favicon URL'),
76
+ publishedAt: z
77
+ .string()
78
+ .max(64)
79
+ .optional()
80
+ .describe('Detected publication date (if present)'),
81
+ modifiedAt: z
82
+ .string()
83
+ .max(64)
84
+ .optional()
85
+ .describe('Detected last modified date (if present)'),
86
+ })
87
+ .optional()
88
+ .describe('Detected metadata extracted from page markup'),
89
+ markdown: (config.constants.maxInlineContentChars > 0
90
+ ? z.string().max(config.constants.maxInlineContentChars)
91
+ : z.string())
92
+ .optional()
93
+ .describe('The extracted content in Markdown format'),
94
+ fromCache: z
95
+ .boolean()
96
+ .optional()
97
+ .describe('Whether this response was served from cache'),
98
+ fetchedAt: z
99
+ .string()
100
+ .max(64)
101
+ .optional()
102
+ .describe('ISO timestamp of fetch/cache retrieval time'),
103
+ contentSize: z
104
+ .number()
105
+ .int()
106
+ .min(0)
107
+ .max(config.constants.maxHtmlSize * 4)
108
+ .optional()
109
+ .describe('Full markdown size in characters before inline truncation'),
110
+ truncated: z
111
+ .boolean()
112
+ .optional()
113
+ .describe('Whether the returned markdown was truncated'),
114
+ error: z
115
+ .string()
116
+ .max(2048)
117
+ .optional()
118
+ .describe('Error message if the request failed'),
119
+ statusCode: z
120
+ .number()
121
+ .int()
122
+ .optional()
123
+ .describe('HTTP status code for failed requests'),
124
+ details: z
125
+ .record(z.string(), z.unknown())
126
+ .optional()
127
+ .describe('Additional error details when available'),
128
+ });
129
+ export const FETCH_URL_TOOL_NAME = 'fetch-url';
130
+ const FETCH_URL_TOOL_DESCRIPTION = `
131
+ Fetches a webpage and converts it to clean Markdown format optimized for LLM context.
132
+
133
+ This tool is useful for:
134
+ - Reading documentation, blog posts, or articles.
135
+ - Extracting main content while removing navigation and ads (noise removal).
136
+ - Caching content to speed up repeated queries.
137
+
138
+ Limitations:
139
+ - Inline output may be truncated when MAX_INLINE_CONTENT_CHARS is set.
140
+ - Does not execute complex client-side JavaScript interactions.
141
+ `.trim();
142
+ // Specific icon for the fetch-url tool (download cloud / web)
143
+ const TOOL_ICON = {
144
+ src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
145
+ mimeType: 'image/svg+xml',
146
+ };
147
+ function asRecord(value) {
148
+ return isObject(value) ? value : undefined;
149
+ }
150
+ function readUnknown(obj, key) {
151
+ const record = asRecord(obj);
152
+ return record ? record[key] : undefined;
153
+ }
154
+ function readString(obj, key) {
155
+ const value = readUnknown(obj, key);
156
+ return typeof value === 'string' ? value : undefined;
157
+ }
158
+ function readNestedRecord(obj, keys) {
159
+ let current = obj;
160
+ for (const key of keys) {
161
+ current = readUnknown(current, key);
162
+ if (current === undefined)
163
+ return undefined;
164
+ }
165
+ return asRecord(current);
166
+ }
167
+ function safeJsonParse(value) {
168
+ try {
169
+ return JSON.parse(value);
170
+ }
171
+ catch {
172
+ return undefined;
173
+ }
174
+ }
175
+ function withSignal(signal) {
176
+ return signal === undefined ? {} : { signal };
177
+ }
178
+ function buildToolAbortSignal(extraSignal) {
179
+ const { timeoutMs } = config.tools;
180
+ if (timeoutMs <= 0)
181
+ return extraSignal;
182
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
183
+ if (!extraSignal)
184
+ return timeoutSignal;
185
+ return AbortSignal.any([extraSignal, timeoutSignal]);
186
+ }
187
+ /* -------------------------------------------------------------------------------------------------
188
+ * Progress reporting
189
+ * ------------------------------------------------------------------------------------------------- */
190
+ function resolveRelatedTaskMeta(meta) {
191
+ const related = readUnknown(meta, 'io.modelcontextprotocol/related-task');
192
+ const taskId = readString(related, 'taskId');
193
+ return taskId ? { taskId } : undefined;
194
+ }
195
+ class ToolProgressReporter {
196
+ token;
197
+ sendNotification;
198
+ relatedTaskMeta;
199
+ onProgress;
200
+ reportQueue = Promise.resolve();
201
+ constructor(token, sendNotification, relatedTaskMeta, onProgress) {
202
+ this.token = token;
203
+ this.sendNotification = sendNotification;
204
+ this.relatedTaskMeta = relatedTaskMeta;
205
+ this.onProgress = onProgress;
206
+ }
207
+ static create(extra) {
208
+ const token = extra?._meta?.progressToken ?? null;
209
+ const sendNotification = extra?.sendNotification;
210
+ const relatedTaskMeta = resolveRelatedTaskMeta(extra?._meta);
211
+ const onProgress = extra?.onProgress;
212
+ if (token === null && !onProgress) {
213
+ return { report: async () => { } };
214
+ }
215
+ return new ToolProgressReporter(token, sendNotification, relatedTaskMeta, onProgress);
216
+ }
217
+ async report(progress, message) {
218
+ if (this.onProgress) {
219
+ try {
220
+ this.onProgress(progress, message);
221
+ }
222
+ catch (error) {
223
+ logWarn('Progress callback failed', {
224
+ error: getErrorMessage(error),
225
+ progress,
226
+ message,
227
+ });
228
+ }
229
+ }
230
+ if (this.token === null || !this.sendNotification)
231
+ return;
232
+ const { sendNotification } = this;
233
+ const notification = {
234
+ method: 'notifications/progress',
235
+ params: {
236
+ progressToken: this.token,
237
+ progress,
238
+ total: FETCH_PROGRESS_TOTAL,
239
+ message,
240
+ ...(this.relatedTaskMeta
241
+ ? {
242
+ _meta: {
243
+ 'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
244
+ },
245
+ }
246
+ : {}),
247
+ },
248
+ };
249
+ this.reportQueue = this.reportQueue.then(async () => {
250
+ let timeoutId;
251
+ const timeoutPromise = new Promise((resolve) => {
252
+ timeoutId = setTimeout(() => {
253
+ resolve({ timeout: true });
254
+ }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
255
+ timeoutId.unref();
256
+ });
257
+ try {
258
+ const outcome = await Promise.race([
259
+ sendNotification(notification).then(() => ({ ok: true })),
260
+ timeoutPromise,
261
+ ]);
262
+ if ('timeout' in outcome) {
263
+ logWarn('Progress notification timed out', { progress, message });
264
+ }
265
+ }
266
+ catch (error) {
267
+ logWarn('Failed to send progress notification', {
268
+ error: getErrorMessage(error),
269
+ progress,
270
+ message,
271
+ });
272
+ }
273
+ finally {
274
+ if (timeoutId)
275
+ clearTimeout(timeoutId);
276
+ }
277
+ });
278
+ await this.reportQueue;
279
+ }
280
+ }
281
+ export function createProgressReporter(extra) {
282
+ return ToolProgressReporter.create(extra);
283
+ }
284
+ function getOpenCodeFence(content) {
285
+ const FENCE_PATTERN = /^([ \t]*)(`{3,}|~{3,})/gm;
286
+ let match;
287
+ let inFence = false;
288
+ let fenceChar = null;
289
+ let fenceLength = 0;
290
+ while ((match = FENCE_PATTERN.exec(content)) !== null) {
291
+ const marker = match[2];
292
+ if (!marker)
293
+ continue;
294
+ const [char] = marker;
295
+ if (!char)
296
+ continue;
297
+ const { length } = marker;
298
+ if (!inFence) {
299
+ inFence = true;
300
+ fenceChar = char;
301
+ fenceLength = length;
302
+ }
303
+ else if (char === fenceChar && length >= fenceLength) {
304
+ inFence = false;
305
+ fenceChar = null;
306
+ fenceLength = 0;
307
+ }
308
+ }
309
+ if (inFence && fenceChar) {
310
+ return { fenceChar, fenceLength };
311
+ }
312
+ return null;
313
+ }
314
+ function findSafeLinkBoundary(content, limit) {
315
+ const lastBracket = content.lastIndexOf('[', limit);
316
+ if (lastBracket === -1)
317
+ return limit;
318
+ const afterBracket = content.substring(lastBracket, limit);
319
+ const closedPattern = /^\[[^\]]*\]\([^)]*\)/;
320
+ if (closedPattern.test(afterBracket))
321
+ return limit;
322
+ const start = lastBracket > 0 && content[lastBracket - 1] === '!'
323
+ ? lastBracket - 1
324
+ : lastBracket;
325
+ return start;
326
+ }
327
+ function truncateWithMarker(content, limit, marker) {
328
+ if (content.length <= limit)
329
+ return content;
330
+ const maxContentLength = Math.max(0, limit - marker.length);
331
+ const tentativeContent = content.substring(0, maxContentLength);
332
+ const openFence = getOpenCodeFence(tentativeContent);
333
+ if (openFence) {
334
+ const fenceCloser = `\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`;
335
+ const adjustedLength = Math.max(0, limit - marker.length - fenceCloser.length);
336
+ return `${content.substring(0, adjustedLength)}${fenceCloser}${marker}`;
337
+ }
338
+ const safeBoundary = findSafeLinkBoundary(content, maxContentLength);
339
+ if (safeBoundary < maxContentLength) {
340
+ return `${content.substring(0, safeBoundary)}${marker}`;
341
+ }
342
+ return `${tentativeContent}${marker}`;
343
+ }
344
+ function appendTruncationMarker(content, marker) {
345
+ if (!content)
346
+ return marker;
347
+ if (content.endsWith(marker))
348
+ return content;
349
+ const openFence = getOpenCodeFence(content);
350
+ const contentWithFence = openFence
351
+ ? `${content}\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`
352
+ : content;
353
+ const safeBoundary = findSafeLinkBoundary(contentWithFence, contentWithFence.length);
354
+ if (safeBoundary < contentWithFence.length) {
355
+ return `${contentWithFence.substring(0, safeBoundary)}${marker}`;
356
+ }
357
+ return `${contentWithFence}${marker}`;
358
+ }
359
+ class InlineContentLimiter {
360
+ apply(content, inlineLimitOverride) {
361
+ const contentSize = content.length;
362
+ const inlineLimit = this.resolveInlineLimit(inlineLimitOverride);
363
+ if (inlineLimit <= 0) {
364
+ return { content, contentSize };
365
+ }
366
+ if (contentSize <= inlineLimit) {
367
+ return { content, contentSize };
368
+ }
369
+ const truncatedContent = truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER);
370
+ return {
371
+ content: truncatedContent,
372
+ contentSize,
373
+ truncated: true,
374
+ };
375
+ }
376
+ resolveInlineLimit(inlineLimitOverride) {
377
+ const globalLimit = config.constants.maxInlineContentChars;
378
+ if (inlineLimitOverride === undefined)
379
+ return globalLimit;
380
+ if (globalLimit > 0 && inlineLimitOverride > 0) {
381
+ return Math.min(inlineLimitOverride, globalLimit);
382
+ }
383
+ if (globalLimit > 0 && inlineLimitOverride === 0)
384
+ return globalLimit;
385
+ return inlineLimitOverride;
386
+ }
387
+ }
388
+ const inlineLimiter = new InlineContentLimiter();
389
+ function applyInlineContentLimit(content, inlineLimitOverride) {
390
+ return inlineLimiter.apply(content, inlineLimitOverride);
391
+ }
392
+ /* -------------------------------------------------------------------------------------------------
393
+ * Tool response blocks (text only)
394
+ * ------------------------------------------------------------------------------------------------- */
395
+ function buildTextBlock(structuredContent) {
396
+ return {
397
+ type: 'text',
398
+ text: JSON.stringify(structuredContent),
399
+ };
400
+ }
401
+ function buildToolContentBlocks(structuredContent) {
402
+ return [buildTextBlock(structuredContent)];
403
+ }
404
+ function resolveNormalizedUrl(url) {
405
+ const { normalizedUrl: validatedUrl } = normalizeUrl(url);
406
+ const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
407
+ return { normalizedUrl, originalUrl: validatedUrl, transformed };
408
+ }
409
+ function logRawUrlTransformation(resolvedUrl) {
410
+ if (!resolvedUrl.transformed)
411
+ return;
412
+ logDebug('Using transformed raw content URL', {
413
+ original: resolvedUrl.originalUrl,
414
+ });
415
+ }
416
+ function extractTitle(value) {
417
+ const record = asRecord(value);
418
+ const title = record ? record.title : undefined;
419
+ return typeof title === 'string' ? title : undefined;
420
+ }
421
+ function logCacheMiss(reason, cacheNamespace, normalizedUrl, error) {
422
+ const log = reason.startsWith('deserialize') ? logWarn : logDebug;
423
+ log(`Cache miss due to ${reason}`, {
424
+ namespace: cacheNamespace,
425
+ url: normalizedUrl,
426
+ ...(error ? { error: getErrorMessage(error) } : {}),
427
+ });
428
+ }
429
+ function attemptCacheRetrieval(params) {
430
+ const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
431
+ if (!cacheKey)
432
+ return null;
433
+ const cached = cache.get(cacheKey);
434
+ if (!cached)
435
+ return null;
436
+ if (!deserialize) {
437
+ logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
438
+ return null;
439
+ }
440
+ let data;
441
+ try {
442
+ data = deserialize(cached.content);
443
+ }
444
+ catch (error) {
445
+ logCacheMiss('deserialize exception', cacheNamespace, normalizedUrl, error);
446
+ return null;
447
+ }
448
+ if (data === undefined) {
449
+ logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
450
+ return null;
451
+ }
452
+ logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
453
+ return {
454
+ data,
455
+ fromCache: true,
456
+ url: normalizedUrl,
457
+ fetchedAt: cached.fetchedAt,
458
+ cacheKey,
459
+ };
460
+ }
461
+ function persistCache(params) {
462
+ const { cacheKey, data, serialize, normalizedUrl, cacheNamespace, force } = params;
463
+ if (!cacheKey)
464
+ return;
465
+ const serializer = serialize ?? JSON.stringify;
466
+ const title = extractTitle(data);
467
+ const metadata = {
468
+ url: normalizedUrl,
469
+ ...(title === undefined ? {} : { title }),
470
+ };
471
+ try {
472
+ cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
473
+ }
474
+ catch (error) {
475
+ logWarn('Failed to persist cache entry', {
476
+ namespace: cacheNamespace,
477
+ url: normalizedUrl,
478
+ error: getErrorMessage(error),
479
+ });
480
+ }
481
+ }
482
+ export async function executeFetchPipeline(options) {
483
+ const resolvedUrl = resolveNormalizedUrl(options.url);
484
+ logRawUrlTransformation(resolvedUrl);
485
+ const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
486
+ if (!options.forceRefresh) {
487
+ const cachedResult = attemptCacheRetrieval({
488
+ cacheKey,
489
+ deserialize: options.deserialize,
490
+ cacheNamespace: options.cacheNamespace,
491
+ normalizedUrl: resolvedUrl.normalizedUrl,
492
+ });
493
+ if (cachedResult) {
494
+ return { ...cachedResult, originalUrl: resolvedUrl.originalUrl };
495
+ }
496
+ }
497
+ logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
498
+ const { buffer, encoding, truncated, finalUrl } = await fetchNormalizedUrlBuffer(resolvedUrl.normalizedUrl, withSignal(options.signal));
499
+ const transformUrl = finalUrl || resolvedUrl.normalizedUrl;
500
+ const data = await options.transform({ buffer, encoding, ...(truncated ? { truncated: true } : {}) }, transformUrl);
501
+ if (cache.isEnabled()) {
502
+ persistCache({
503
+ cacheKey,
504
+ data,
505
+ serialize: options.serialize,
506
+ normalizedUrl: finalUrl || resolvedUrl.normalizedUrl,
507
+ cacheNamespace: options.cacheNamespace,
508
+ });
509
+ if (finalUrl && finalUrl !== resolvedUrl.normalizedUrl) {
510
+ const finalCacheKey = cache.createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
511
+ if (finalCacheKey && finalCacheKey !== cacheKey) {
512
+ persistCache({
513
+ cacheKey: finalCacheKey,
514
+ data,
515
+ serialize: options.serialize,
516
+ normalizedUrl: finalUrl,
517
+ cacheNamespace: options.cacheNamespace,
518
+ });
519
+ }
520
+ }
521
+ }
522
+ return {
523
+ data,
524
+ fromCache: false,
525
+ url: resolvedUrl.normalizedUrl,
526
+ originalUrl: resolvedUrl.originalUrl,
527
+ finalUrl,
528
+ fetchedAt: new Date().toISOString(),
529
+ cacheKey,
530
+ };
531
+ }
532
+ export async function performSharedFetch(options, deps = {}) {
533
+ const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
534
+ const pipelineOptions = {
535
+ url: options.url,
536
+ cacheNamespace: 'markdown',
537
+ ...withSignal(options.signal),
538
+ ...(options.cacheVary ? { cacheVary: options.cacheVary } : {}),
539
+ ...(options.forceRefresh ? { forceRefresh: true } : {}),
540
+ transform: options.transform,
541
+ ...(options.serialize ? { serialize: options.serialize } : {}),
542
+ ...(options.deserialize ? { deserialize: options.deserialize } : {}),
543
+ };
544
+ const pipeline = await executePipeline(pipelineOptions);
545
+ const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
546
+ return { pipeline, inlineResult };
547
+ }
548
+ /* -------------------------------------------------------------------------------------------------
549
+ * Tool error mapping
550
+ * ------------------------------------------------------------------------------------------------- */
551
+ export function createToolErrorResponse(message, url, extra) {
552
+ const structuredContent = {
553
+ error: message,
554
+ url,
555
+ ...(extra?.statusCode !== undefined
556
+ ? { statusCode: extra.statusCode }
557
+ : {}),
558
+ ...(extra?.details ? { details: extra.details } : {}),
559
+ };
560
+ return {
561
+ content: [buildTextBlock(structuredContent)],
562
+ structuredContent,
563
+ isError: true,
564
+ };
565
+ }
566
+ function isValidationError(error) {
567
+ return (error instanceof Error &&
568
+ isSystemError(error) &&
569
+ error.code === 'VALIDATION_ERROR');
570
+ }
571
+ function resolveToolErrorMessage(error, fallbackMessage) {
572
+ if (isValidationError(error) || error instanceof FetchError) {
573
+ return error.message;
574
+ }
575
+ if (error instanceof Error) {
576
+ return `${fallbackMessage}: ${error.message}`;
577
+ }
578
+ return `${fallbackMessage}: Unknown error`;
579
+ }
580
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
581
+ const message = resolveToolErrorMessage(error, fallbackMessage);
582
+ if (error instanceof FetchError) {
583
+ return createToolErrorResponse(message, url, {
584
+ statusCode: error.statusCode,
585
+ details: error.details,
586
+ });
587
+ }
588
+ return createToolErrorResponse(message, url);
589
+ }
590
+ function normalizeExtractedMetadata(metadata) {
591
+ if (!metadata)
592
+ return undefined;
593
+ const normalized = {
594
+ ...(metadata.title ? { title: metadata.title } : {}),
595
+ ...(metadata.description ? { description: metadata.description } : {}),
596
+ ...(metadata.author ? { author: metadata.author } : {}),
597
+ ...(metadata.image ? { image: metadata.image } : {}),
598
+ ...(metadata.favicon ? { favicon: metadata.favicon } : {}),
599
+ ...(metadata.publishedAt ? { publishedAt: metadata.publishedAt } : {}),
600
+ ...(metadata.modifiedAt ? { modifiedAt: metadata.modifiedAt } : {}),
601
+ };
602
+ if (Object.keys(normalized).length === 0)
603
+ return undefined;
604
+ return normalized;
605
+ }
606
+ const cachedMarkdownSchema = z
607
+ .object({
608
+ markdown: z.string().optional(),
609
+ content: z.string().optional(),
610
+ title: z.string().optional(),
611
+ metadata: z
612
+ .strictObject({
613
+ title: z.string().optional(),
614
+ description: z.string().optional(),
615
+ author: z.string().optional(),
616
+ image: z.string().optional(),
617
+ favicon: z.string().optional(),
618
+ publishedAt: z.string().optional(),
619
+ modifiedAt: z.string().optional(),
620
+ })
621
+ .optional(),
622
+ truncated: z.boolean().optional(),
623
+ })
624
+ .catchall(z.unknown())
625
+ .refine((value) => typeof value.markdown === 'string' || typeof value.content === 'string', { message: 'Missing markdown/content' });
626
+ export function parseCachedMarkdownResult(cached) {
627
+ const parsed = safeJsonParse(cached);
628
+ const result = cachedMarkdownSchema.safeParse(parsed);
629
+ if (!result.success)
630
+ return undefined;
631
+ const markdown = result.data.markdown ?? result.data.content;
632
+ if (typeof markdown !== 'string')
633
+ return undefined;
634
+ const metadata = normalizeExtractedMetadata(result.data.metadata);
635
+ return {
636
+ content: markdown,
637
+ markdown,
638
+ title: result.data.title,
639
+ ...(metadata ? { metadata } : {}),
640
+ truncated: result.data.truncated ?? false,
641
+ };
642
+ }
643
+ const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
644
+ const result = await transformBufferToMarkdown(input.buffer, url, {
645
+ includeMetadata: true,
646
+ encoding: input.encoding,
647
+ ...withSignal(signal),
648
+ ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
649
+ ...(input.truncated ? { inputTruncated: true } : {}),
650
+ });
651
+ const truncated = Boolean(result.truncated || input.truncated);
652
+ return { ...result, content: result.markdown, truncated };
653
+ };
654
+ function serializeMarkdownResult(result) {
655
+ return JSON.stringify({
656
+ markdown: result.markdown,
657
+ title: result.title,
658
+ metadata: result.metadata,
659
+ truncated: result.truncated,
660
+ });
661
+ }
662
+ /* -------------------------------------------------------------------------------------------------
663
+ * fetch-url tool implementation
664
+ * ------------------------------------------------------------------------------------------------- */
665
+ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
666
+ const truncated = inlineResult.truncated ?? pipeline.data.truncated;
667
+ let markdown = inlineResult.content;
668
+ if (pipeline.data.truncated &&
669
+ !inlineResult.truncated &&
670
+ typeof markdown === 'string') {
671
+ markdown = appendTruncationMarker(markdown, TRUNCATION_MARKER);
672
+ }
673
+ const { metadata } = pipeline.data;
674
+ return {
675
+ url: pipeline.originalUrl ?? pipeline.url,
676
+ resolvedUrl: pipeline.url,
677
+ ...(pipeline.finalUrl ? { finalUrl: pipeline.finalUrl } : {}),
678
+ inputUrl,
679
+ title: pipeline.data.title,
680
+ ...(metadata ? { metadata } : {}),
681
+ markdown,
682
+ fromCache: pipeline.fromCache,
683
+ fetchedAt: pipeline.fetchedAt,
684
+ contentSize: inlineResult.contentSize,
685
+ ...(truncated ? { truncated: true } : {}),
686
+ };
687
+ }
688
+ function buildFetchUrlContentBlocks(structuredContent) {
689
+ return buildToolContentBlocks(structuredContent);
690
+ }
691
+ function buildResponse(pipeline, inlineResult, inputUrl) {
692
+ const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
693
+ const content = buildFetchUrlContentBlocks(structuredContent);
694
+ // Runtime validation guard: verify output matches schema
695
+ const validation = fetchUrlOutputSchema.safeParse(structuredContent);
696
+ if (!validation.success) {
697
+ logWarn('Tool output schema validation failed', {
698
+ url: inputUrl,
699
+ issues: validation.error.issues,
700
+ });
701
+ }
702
+ return {
703
+ content,
704
+ structuredContent,
705
+ };
706
+ }
707
+ async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
708
+ return performSharedFetch({
709
+ url,
710
+ ...withSignal(signal),
711
+ ...(skipNoiseRemoval ? { cacheVary: { skipNoiseRemoval: true } } : {}),
712
+ ...(forceRefresh ? { forceRefresh: true } : {}),
713
+ ...(maxInlineChars !== undefined ? { maxInlineChars } : {}),
714
+ transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
715
+ if (progress) {
716
+ void progress.report(3, 'Transforming content');
717
+ }
718
+ return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal, skipNoiseRemoval);
719
+ },
720
+ serialize: serializeMarkdownResult,
721
+ deserialize: parseCachedMarkdownResult,
722
+ });
723
+ }
724
+ async function executeFetch(input, extra) {
725
+ const { url } = input;
726
+ if (!url) {
727
+ return createToolErrorResponse('URL is required', '');
728
+ }
729
+ const signal = buildToolAbortSignal(extra?.signal);
730
+ const progress = createProgressReporter(extra);
731
+ void progress.report(1, 'Validating URL');
732
+ logDebug('Fetching URL', { url });
733
+ void progress.report(2, 'Fetching content');
734
+ const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
735
+ if (pipeline.fromCache) {
736
+ void progress.report(3, 'Using cached content');
737
+ }
738
+ void progress.report(4, 'Finalizing response');
739
+ return buildResponse(pipeline, inlineResult, url);
740
+ }
741
+ export async function fetchUrlToolHandler(input, extra) {
742
+ return executeFetch(input, extra).catch((error) => {
743
+ logError('fetch-url tool error', error instanceof Error ? error : undefined);
744
+ return handleToolError(error, input.url, 'Failed to fetch URL');
745
+ });
746
+ }
747
+ const TOOL_DEFINITION = {
748
+ name: FETCH_URL_TOOL_NAME,
749
+ title: 'Fetch URL',
750
+ description: FETCH_URL_TOOL_DESCRIPTION,
751
+ inputSchema: fetchUrlInputSchema,
752
+ outputSchema: fetchUrlOutputSchema,
753
+ handler: fetchUrlToolHandler,
754
+ execution: {
755
+ taskSupport: 'optional',
756
+ },
757
+ annotations: {
758
+ readOnlyHint: true,
759
+ destructiveHint: false,
760
+ idempotentHint: true,
761
+ openWorldHint: true,
762
+ },
763
+ };
764
+ export function withRequestContextIfMissing(handler) {
765
+ return async (params, extra) => {
766
+ const existingRequestId = getRequestId();
767
+ if (existingRequestId) {
768
+ return handler(params, extra);
769
+ }
770
+ const derivedRequestId = resolveRequestIdFromExtra(extra) ?? randomUUID();
771
+ const derivedSessionId = resolveSessionIdFromExtra(extra);
772
+ return runWithRequestContext({
773
+ requestId: derivedRequestId,
774
+ operationId: derivedRequestId,
775
+ ...(derivedSessionId ? { sessionId: derivedSessionId } : {}),
776
+ }, () => handler(params, extra));
777
+ };
778
+ }
779
+ function resolveRequestIdFromExtra(extra) {
780
+ if (!isObject(extra))
781
+ return undefined;
782
+ const { requestId } = extra;
783
+ if (typeof requestId === 'string')
784
+ return requestId;
785
+ if (typeof requestId === 'number')
786
+ return String(requestId);
787
+ return undefined;
788
+ }
789
+ function resolveSessionIdFromExtra(extra) {
790
+ if (!isObject(extra))
791
+ return undefined;
792
+ const { sessionId } = extra;
793
+ if (typeof sessionId === 'string')
794
+ return sessionId;
795
+ const headers = readNestedRecord(extra, ['requestInfo', 'headers']);
796
+ const headerValue = headers ? headers['mcp-session-id'] : undefined;
797
+ return typeof headerValue === 'string' ? headerValue : undefined;
798
+ }
799
+ export function registerTools(server) {
800
+ if (!config.tools.enabled.includes(FETCH_URL_TOOL_NAME))
801
+ return;
802
+ server.registerTool(TOOL_DEFINITION.name, {
803
+ title: TOOL_DEFINITION.title,
804
+ description: TOOL_DEFINITION.description,
805
+ inputSchema: TOOL_DEFINITION.inputSchema,
806
+ outputSchema: TOOL_DEFINITION.outputSchema,
807
+ annotations: TOOL_DEFINITION.annotations,
808
+ execution: TOOL_DEFINITION.execution,
809
+ // Use specific tool icon here
810
+ icons: [TOOL_ICON],
811
+ }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
812
+ }