@j0hanz/superfetch 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +356 -223
  2. package/dist/assets/logo.svg +24837 -24835
  3. package/dist/cache.d.ts +28 -20
  4. package/dist/cache.js +292 -514
  5. package/dist/config.d.ts +41 -7
  6. package/dist/config.js +298 -148
  7. package/dist/crypto.js +25 -12
  8. package/dist/dom-noise-removal.js +379 -421
  9. package/dist/errors.d.ts +2 -2
  10. package/dist/errors.js +25 -8
  11. package/dist/fetch.d.ts +18 -16
  12. package/dist/fetch.js +1132 -526
  13. package/dist/host-normalization.js +40 -10
  14. package/dist/http-native.js +628 -287
  15. package/dist/index.js +67 -7
  16. package/dist/instructions.md +44 -30
  17. package/dist/ip-blocklist.d.ts +8 -0
  18. package/dist/ip-blocklist.js +65 -0
  19. package/dist/json.js +14 -9
  20. package/dist/language-detection.d.ts +2 -11
  21. package/dist/language-detection.js +289 -280
  22. package/dist/markdown-cleanup.d.ts +0 -1
  23. package/dist/markdown-cleanup.js +391 -429
  24. package/dist/mcp-validator.js +4 -2
  25. package/dist/mcp.js +184 -135
  26. package/dist/observability.js +89 -21
  27. package/dist/resources.js +16 -6
  28. package/dist/server-tuning.d.ts +2 -0
  29. package/dist/server-tuning.js +25 -23
  30. package/dist/session.d.ts +1 -0
  31. package/dist/session.js +41 -33
  32. package/dist/tasks.d.ts +2 -0
  33. package/dist/tasks.js +91 -9
  34. package/dist/timer-utils.d.ts +5 -0
  35. package/dist/timer-utils.js +20 -0
  36. package/dist/tools.d.ts +28 -5
  37. package/dist/tools.js +317 -183
  38. package/dist/transform-types.d.ts +5 -1
  39. package/dist/transform.d.ts +3 -2
  40. package/dist/transform.js +1138 -421
  41. package/dist/type-guards.d.ts +1 -0
  42. package/dist/type-guards.js +7 -0
  43. package/dist/workers/transform-child.d.ts +1 -0
  44. package/dist/workers/transform-child.js +118 -0
  45. package/dist/workers/transform-worker.js +87 -78
  46. package/package.json +21 -13
package/dist/tools.js CHANGED
@@ -3,19 +3,27 @@ import { z } from 'zod';
3
3
  import * as cache from './cache.js';
4
4
  import { config } from './config.js';
5
5
  import { FetchError, getErrorMessage, isSystemError } from './errors.js';
6
- import { fetchNormalizedUrl, normalizeUrl, transformToRawUrl, } from './fetch.js';
6
+ import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
7
7
  import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
8
- import { transformHtmlToMarkdown } from './transform.js';
8
+ import { transformBufferToMarkdown } from './transform.js';
9
9
  import { isObject } from './type-guards.js';
10
10
  const TRUNCATION_MARKER = '...[truncated]';
11
11
  const FETCH_PROGRESS_TOTAL = 4;
12
12
  const PROGRESS_NOTIFICATION_TIMEOUT_MS = 5000;
13
- const fetchUrlInputSchema = z.strictObject({
13
+ export const fetchUrlInputSchema = z.strictObject({
14
14
  url: z
15
15
  .url({ protocol: /^https?$/i })
16
16
  .min(1)
17
17
  .max(config.constants.maxUrlLength)
18
18
  .describe('The URL of the webpage to fetch and convert to Markdown'),
19
+ skipNoiseRemoval: z
20
+ .boolean()
21
+ .optional()
22
+ .describe('When true, preserves navigation, footers, and other elements normally filtered as noise'),
23
+ forceRefresh: z
24
+ .boolean()
25
+ .optional()
26
+ .describe('When true, bypasses the cache and fetches fresh content from the URL'),
19
27
  });
20
28
  const fetchUrlOutputSchema = z.strictObject({
21
29
  url: z
@@ -34,16 +42,29 @@ const fetchUrlOutputSchema = z.strictObject({
34
42
  .optional()
35
43
  .describe('The normalized or transformed URL that was fetched'),
36
44
  title: z.string().max(512).optional().describe('Page title'),
37
- markdown: z
38
- .string()
39
- .max(config.constants.maxInlineContentChars)
45
+ markdown: (config.constants.maxInlineContentChars > 0
46
+ ? z.string().max(config.constants.maxInlineContentChars)
47
+ : z.string())
40
48
  .optional()
41
49
  .describe('The extracted content in Markdown format'),
50
+ truncated: z
51
+ .boolean()
52
+ .optional()
53
+ .describe('Whether the returned markdown was truncated'),
42
54
  error: z
43
55
  .string()
44
56
  .max(2048)
45
57
  .optional()
46
58
  .describe('Error message if the request failed'),
59
+ statusCode: z
60
+ .number()
61
+ .int()
62
+ .optional()
63
+ .describe('HTTP status code for failed requests'),
64
+ details: z
65
+ .record(z.string(), z.unknown())
66
+ .optional()
67
+ .describe('Additional error details when available'),
47
68
  });
48
69
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
49
70
  const FETCH_URL_TOOL_DESCRIPTION = `
@@ -55,7 +76,7 @@ This tool is useful for:
55
76
  - Caching content to speed up repeated queries.
56
77
 
57
78
  Limitations:
58
- - Returns truncated content if it exceeds ${config.constants.maxInlineContentChars} characters.
79
+ - Inline output may be truncated when MAX_INLINE_CONTENT_CHARS is set.
59
80
  - Does not execute complex client-side JavaScript interactions.
60
81
  `.trim();
61
82
  // Specific icon for the fetch-url tool (download cloud / web)
@@ -63,17 +84,53 @@ const TOOL_ICON = {
63
84
  src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
64
85
  mimeType: 'image/svg+xml',
65
86
  };
87
+ function asRecord(value) {
88
+ return isObject(value) ? value : undefined;
89
+ }
90
+ function readUnknown(obj, key) {
91
+ const record = asRecord(obj);
92
+ return record ? record[key] : undefined;
93
+ }
94
+ function readString(obj, key) {
95
+ const value = readUnknown(obj, key);
96
+ return typeof value === 'string' ? value : undefined;
97
+ }
98
+ function readNestedRecord(obj, keys) {
99
+ let current = obj;
100
+ for (const key of keys) {
101
+ current = readUnknown(current, key);
102
+ if (current === undefined)
103
+ return undefined;
104
+ }
105
+ return asRecord(current);
106
+ }
107
+ function safeJsonParse(value) {
108
+ try {
109
+ return JSON.parse(value);
110
+ }
111
+ catch {
112
+ return undefined;
113
+ }
114
+ }
115
+ function withSignal(signal) {
116
+ return signal === undefined ? {} : { signal };
117
+ }
118
+ function buildToolAbortSignal(extraSignal) {
119
+ const { timeoutMs } = config.tools;
120
+ if (timeoutMs <= 0)
121
+ return extraSignal;
122
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
123
+ if (!extraSignal)
124
+ return timeoutSignal;
125
+ return AbortSignal.any([extraSignal, timeoutSignal]);
126
+ }
66
127
  /* -------------------------------------------------------------------------------------------------
67
128
  * Progress reporting
68
129
  * ------------------------------------------------------------------------------------------------- */
69
130
  function resolveRelatedTaskMeta(meta) {
70
- if (!meta)
71
- return undefined;
72
- const related = meta['io.modelcontextprotocol/related-task'];
73
- if (!isObject(related))
74
- return undefined;
75
- const { taskId } = related;
76
- return typeof taskId === 'string' ? { taskId } : undefined;
131
+ const related = readUnknown(meta, 'io.modelcontextprotocol/related-task');
132
+ const taskId = readString(related, 'taskId');
133
+ return taskId ? { taskId } : undefined;
77
134
  }
78
135
  class ToolProgressReporter {
79
136
  token;
@@ -94,79 +151,138 @@ class ToolProgressReporter {
94
151
  return new ToolProgressReporter(token, sendNotification, relatedTaskMeta);
95
152
  }
96
153
  async report(progress, message) {
154
+ const notification = {
155
+ method: 'notifications/progress',
156
+ params: {
157
+ progressToken: this.token,
158
+ progress,
159
+ total: FETCH_PROGRESS_TOTAL,
160
+ message,
161
+ ...(this.relatedTaskMeta
162
+ ? {
163
+ _meta: {
164
+ 'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
165
+ },
166
+ }
167
+ : {}),
168
+ },
169
+ };
170
+ let timeoutId;
171
+ const timeoutPromise = new Promise((resolve) => {
172
+ timeoutId = setTimeout(() => {
173
+ resolve({ timeout: true });
174
+ }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
175
+ timeoutId.unref();
176
+ });
97
177
  try {
98
- let timeoutId;
99
- const timeoutPromise = new Promise((_, reject) => {
100
- timeoutId = setTimeout(() => {
101
- reject(new Error('Progress notification timeout'));
102
- }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
103
- timeoutId.unref();
104
- });
105
- const sendPromise = this.sendNotification({
106
- method: 'notifications/progress',
107
- params: {
108
- progressToken: this.token,
109
- progress,
110
- total: FETCH_PROGRESS_TOTAL,
111
- message,
112
- ...(this.relatedTaskMeta
113
- ? {
114
- _meta: {
115
- 'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
116
- },
117
- }
118
- : {}),
119
- },
120
- }).finally(() => {
121
- if (timeoutId)
122
- clearTimeout(timeoutId);
123
- });
124
- await Promise.race([sendPromise, timeoutPromise]);
178
+ const outcome = await Promise.race([
179
+ this.sendNotification(notification).then(() => ({ ok: true })),
180
+ timeoutPromise,
181
+ ]);
182
+ if ('timeout' in outcome) {
183
+ logWarn('Progress notification timed out', { progress, message });
184
+ }
125
185
  }
126
186
  catch (error) {
127
- const isTimeout = error instanceof Error &&
128
- error.message === 'Progress notification timeout';
129
- const logMessage = isTimeout
130
- ? 'Progress notification timed out'
131
- : 'Failed to send progress notification';
132
- logWarn(logMessage, {
187
+ logWarn('Failed to send progress notification', {
133
188
  error: getErrorMessage(error),
134
189
  progress,
135
190
  message,
136
191
  });
137
192
  }
193
+ finally {
194
+ if (timeoutId)
195
+ clearTimeout(timeoutId);
196
+ }
138
197
  }
139
198
  }
140
199
  export function createProgressReporter(extra) {
141
200
  return ToolProgressReporter.create(extra);
142
201
  }
202
+ function getOpenCodeFence(content) {
203
+ const FENCE_PATTERN = /^([ \t]*)(`{3,}|~{3,})/gm;
204
+ let match;
205
+ let inFence = false;
206
+ let fenceChar = null;
207
+ let fenceLength = 0;
208
+ while ((match = FENCE_PATTERN.exec(content)) !== null) {
209
+ const marker = match[2];
210
+ if (!marker)
211
+ continue;
212
+ const [char] = marker;
213
+ if (!char)
214
+ continue;
215
+ const { length } = marker;
216
+ if (!inFence) {
217
+ inFence = true;
218
+ fenceChar = char;
219
+ fenceLength = length;
220
+ }
221
+ else if (char === fenceChar && length >= fenceLength) {
222
+ inFence = false;
223
+ fenceChar = null;
224
+ fenceLength = 0;
225
+ }
226
+ }
227
+ if (inFence && fenceChar) {
228
+ return { fenceChar, fenceLength };
229
+ }
230
+ return null;
231
+ }
232
+ function findSafeLinkBoundary(content, limit) {
233
+ const lastBracket = content.lastIndexOf('[', limit);
234
+ if (lastBracket === -1)
235
+ return limit;
236
+ const afterBracket = content.substring(lastBracket, limit);
237
+ const closedPattern = /^\[[^\]]*\]\([^)]*\)/;
238
+ if (closedPattern.test(afterBracket))
239
+ return limit;
240
+ const start = lastBracket > 0 && content[lastBracket - 1] === '!'
241
+ ? lastBracket - 1
242
+ : lastBracket;
243
+ return start;
244
+ }
245
+ function truncateWithMarker(content, limit, marker) {
246
+ if (content.length <= limit)
247
+ return content;
248
+ const maxContentLength = Math.max(0, limit - marker.length);
249
+ const tentativeContent = content.substring(0, maxContentLength);
250
+ const openFence = getOpenCodeFence(tentativeContent);
251
+ if (openFence) {
252
+ const fenceCloser = `\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`;
253
+ const adjustedLength = Math.max(0, limit - marker.length - fenceCloser.length);
254
+ return `${content.substring(0, adjustedLength)}${fenceCloser}${marker}`;
255
+ }
256
+ const safeBoundary = findSafeLinkBoundary(content, maxContentLength);
257
+ if (safeBoundary < maxContentLength) {
258
+ return `${content.substring(0, safeBoundary)}${marker}`;
259
+ }
260
+ return `${tentativeContent}${marker}`;
261
+ }
143
262
  class InlineContentLimiter {
144
263
  apply(content, cacheKey) {
145
264
  const contentSize = content.length;
146
265
  const inlineLimit = config.constants.maxInlineContentChars;
266
+ if (inlineLimit <= 0) {
267
+ return { content, contentSize };
268
+ }
147
269
  if (contentSize <= inlineLimit) {
148
270
  return { content, contentSize };
149
271
  }
150
- const resourceUri = this.resolveResourceUri(cacheKey);
151
- if (!resourceUri) {
152
- return this.buildTruncatedFallback(content, contentSize, inlineLimit);
272
+ const isTruncated = contentSize > inlineLimit;
273
+ const resourceUri = cacheKey && (cache.isEnabled() || isTruncated)
274
+ ? cache.toResourceUri(cacheKey)
275
+ : null;
276
+ const truncatedContent = truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER);
277
+ if (resourceUri) {
278
+ return {
279
+ content: truncatedContent,
280
+ contentSize,
281
+ resourceUri,
282
+ resourceMimeType: 'text/markdown',
283
+ truncated: true,
284
+ };
153
285
  }
154
- return {
155
- contentSize,
156
- resourceUri,
157
- resourceMimeType: 'text/markdown',
158
- };
159
- }
160
- resolveResourceUri(cacheKey) {
161
- if (!cache.isEnabled() || !cacheKey)
162
- return null;
163
- return cache.toResourceUri(cacheKey);
164
- }
165
- buildTruncatedFallback(content, contentSize, inlineLimit) {
166
- const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
167
- const truncatedContent = content.length > inlineLimit
168
- ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
169
- : content;
170
286
  return {
171
287
  content: truncatedContent,
172
288
  contentSize,
@@ -181,13 +297,10 @@ function applyInlineContentLimit(content, cacheKey) {
181
297
  /* -------------------------------------------------------------------------------------------------
182
298
  * Tool response blocks (text + optional resource + optional link)
183
299
  * ------------------------------------------------------------------------------------------------- */
184
- function serializeStructuredContent(structuredContent) {
185
- return JSON.stringify(structuredContent);
186
- }
187
300
  function buildTextBlock(structuredContent) {
188
301
  return {
189
302
  type: 'text',
190
- text: serializeStructuredContent(structuredContent),
303
+ text: JSON.stringify(structuredContent),
191
304
  };
192
305
  }
193
306
  function buildResourceLink(inlineResult, name) {
@@ -208,7 +321,7 @@ function buildEmbeddedResource(content, url, title) {
208
321
  if (!content)
209
322
  return null;
210
323
  const filename = cache.generateSafeFilename(url, title, undefined, '.md');
211
- const uri = `file:///${filename}`;
324
+ const uri = new URL(filename, 'file:///').href;
212
325
  return {
213
326
  type: 'resource',
214
327
  resource: {
@@ -232,21 +345,20 @@ function appendResourceBlocks(params) {
232
345
  if (link)
233
346
  blocks.push(link);
234
347
  }
235
- function buildToolContentBlocks(structuredContent, _fromCache, inlineResult, resourceName, _cacheKey, fullContent, url, title) {
236
- const blocks = [buildTextBlock(structuredContent)];
348
+ function buildToolContentBlocks(params) {
349
+ const blocks = [
350
+ buildTextBlock(params.structuredContent),
351
+ ];
237
352
  appendResourceBlocks({
238
353
  blocks,
239
- inlineResult,
240
- resourceName,
241
- url,
242
- title,
243
- fullContent,
354
+ inlineResult: params.inlineResult,
355
+ resourceName: params.resourceName,
356
+ url: params.url,
357
+ title: params.title,
358
+ fullContent: params.fullContent,
244
359
  });
245
360
  return blocks;
246
361
  }
247
- /* -------------------------------------------------------------------------------------------------
248
- * Fetch pipeline executor (normalize → raw-transform → cache → fetch → transform → persist)
249
- * ------------------------------------------------------------------------------------------------- */
250
362
  function resolveNormalizedUrl(url) {
251
363
  const { normalizedUrl: validatedUrl } = normalizeUrl(url);
252
364
  const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
@@ -260,16 +372,16 @@ function logRawUrlTransformation(resolvedUrl) {
260
372
  });
261
373
  }
262
374
  function extractTitle(value) {
263
- if (!isObject(value))
264
- return undefined;
265
- const { title } = value;
375
+ const record = asRecord(value);
376
+ const title = record ? record.title : undefined;
266
377
  return typeof title === 'string' ? title : undefined;
267
378
  }
268
- function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
269
- const log = reason === 'deserialize failure' ? logWarn : logDebug;
379
+ function logCacheMiss(reason, cacheNamespace, normalizedUrl, error) {
380
+ const log = reason.startsWith('deserialize') ? logWarn : logDebug;
270
381
  log(`Cache miss due to ${reason}`, {
271
382
  namespace: cacheNamespace,
272
383
  url: normalizedUrl,
384
+ ...(error ? { error: getErrorMessage(error) } : {}),
273
385
  });
274
386
  }
275
387
  function attemptCacheRetrieval(params) {
@@ -283,7 +395,14 @@ function attemptCacheRetrieval(params) {
283
395
  logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
284
396
  return null;
285
397
  }
286
- const data = deserialize(cached.content);
398
+ let data;
399
+ try {
400
+ data = deserialize(cached.content);
401
+ }
402
+ catch (error) {
403
+ logCacheMiss('deserialize exception', cacheNamespace, normalizedUrl, error);
404
+ return null;
405
+ }
287
406
  if (data === undefined) {
288
407
  logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
289
408
  return null;
@@ -298,7 +417,7 @@ function attemptCacheRetrieval(params) {
298
417
  };
299
418
  }
300
419
  function persistCache(params) {
301
- const { cacheKey, data, serialize, normalizedUrl } = params;
420
+ const { cacheKey, data, serialize, normalizedUrl, cacheNamespace, force } = params;
302
421
  if (!cacheKey)
303
422
  return;
304
423
  const serializer = serialize ?? JSON.stringify;
@@ -307,36 +426,49 @@ function persistCache(params) {
307
426
  url: normalizedUrl,
308
427
  ...(title === undefined ? {} : { title }),
309
428
  };
310
- cache.set(cacheKey, serializer(data), metadata);
429
+ try {
430
+ cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
431
+ }
432
+ catch (error) {
433
+ logWarn('Failed to persist cache entry', {
434
+ namespace: cacheNamespace,
435
+ url: normalizedUrl,
436
+ error: getErrorMessage(error),
437
+ });
438
+ }
311
439
  }
312
440
  export async function executeFetchPipeline(options) {
313
441
  const resolvedUrl = resolveNormalizedUrl(options.url);
314
442
  logRawUrlTransformation(resolvedUrl);
315
443
  const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
316
- const cachedResult = attemptCacheRetrieval({
317
- cacheKey,
318
- deserialize: options.deserialize,
319
- cacheNamespace: options.cacheNamespace,
320
- normalizedUrl: resolvedUrl.normalizedUrl,
321
- });
322
- if (cachedResult)
323
- return cachedResult;
444
+ if (!options.forceRefresh) {
445
+ const cachedResult = attemptCacheRetrieval({
446
+ cacheKey,
447
+ deserialize: options.deserialize,
448
+ cacheNamespace: options.cacheNamespace,
449
+ normalizedUrl: resolvedUrl.normalizedUrl,
450
+ });
451
+ if (cachedResult) {
452
+ return { ...cachedResult, originalUrl: resolvedUrl.originalUrl };
453
+ }
454
+ }
324
455
  logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
325
- const fetchOptions = options.signal === undefined ? {} : { signal: options.signal };
326
- const html = await fetchNormalizedUrl(resolvedUrl.normalizedUrl, fetchOptions);
327
- const data = await options.transform(html, resolvedUrl.normalizedUrl);
456
+ const { buffer, encoding } = await fetchNormalizedUrlBuffer(resolvedUrl.normalizedUrl, withSignal(options.signal));
457
+ const data = await options.transform({ buffer, encoding }, resolvedUrl.normalizedUrl);
328
458
  if (cache.isEnabled()) {
329
459
  persistCache({
330
460
  cacheKey,
331
461
  data,
332
462
  serialize: options.serialize,
333
463
  normalizedUrl: resolvedUrl.normalizedUrl,
464
+ cacheNamespace: options.cacheNamespace,
334
465
  });
335
466
  }
336
467
  return {
337
468
  data,
338
469
  fromCache: false,
339
470
  url: resolvedUrl.normalizedUrl,
471
+ originalUrl: resolvedUrl.originalUrl,
340
472
  fetchedAt: new Date().toISOString(),
341
473
  cacheKey,
342
474
  };
@@ -346,22 +478,38 @@ export async function performSharedFetch(options, deps = {}) {
346
478
  const pipelineOptions = {
347
479
  url: options.url,
348
480
  cacheNamespace: 'markdown',
349
- ...(options.signal === undefined ? {} : { signal: options.signal }),
481
+ ...withSignal(options.signal),
482
+ ...(options.cacheVary ? { cacheVary: options.cacheVary } : {}),
483
+ ...(options.forceRefresh ? { forceRefresh: true } : {}),
350
484
  transform: options.transform,
351
485
  ...(options.serialize ? { serialize: options.serialize } : {}),
352
486
  ...(options.deserialize ? { deserialize: options.deserialize } : {}),
353
487
  };
354
488
  const pipeline = await executePipeline(pipelineOptions);
355
489
  const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
490
+ if (inlineResult.truncated && !pipeline.fromCache && !cache.isEnabled()) {
491
+ persistCache({
492
+ cacheKey: pipeline.cacheKey ?? null,
493
+ data: pipeline.data,
494
+ serialize: options.serialize,
495
+ normalizedUrl: pipeline.url,
496
+ cacheNamespace: 'markdown',
497
+ force: true,
498
+ });
499
+ }
356
500
  return { pipeline, inlineResult };
357
501
  }
358
502
  /* -------------------------------------------------------------------------------------------------
359
503
  * Tool error mapping
360
504
  * ------------------------------------------------------------------------------------------------- */
361
- export function createToolErrorResponse(message, url) {
505
+ export function createToolErrorResponse(message, url, extra) {
362
506
  const structuredContent = {
363
507
  error: message,
364
508
  url,
509
+ ...(extra?.statusCode !== undefined
510
+ ? { statusCode: extra.statusCode }
511
+ : {}),
512
+ ...(extra?.details ? { details: extra.details } : {}),
365
513
  };
366
514
  return {
367
515
  content: [buildTextBlock(structuredContent)],
@@ -385,57 +533,44 @@ function resolveToolErrorMessage(error, fallbackMessage) {
385
533
  }
386
534
  export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
387
535
  const message = resolveToolErrorMessage(error, fallbackMessage);
388
- return createToolErrorResponse(message, url);
389
- }
390
- function parseJsonRecord(input) {
391
- try {
392
- const parsed = JSON.parse(input);
393
- return isObject(parsed) ? parsed : undefined;
394
- }
395
- catch {
396
- return undefined;
536
+ if (error instanceof FetchError) {
537
+ return createToolErrorResponse(message, url, {
538
+ statusCode: error.statusCode,
539
+ details: error.details,
540
+ });
397
541
  }
542
+ return createToolErrorResponse(message, url);
398
543
  }
399
- function resolveMarkdownContent(parsed) {
400
- const { markdown } = parsed;
401
- if (typeof markdown === 'string')
402
- return markdown;
403
- const { content } = parsed;
404
- if (typeof content === 'string')
405
- return content;
406
- return undefined;
407
- }
408
- function resolveOptionalTitle(parsed) {
409
- const { title } = parsed;
410
- if (title === undefined)
411
- return undefined;
412
- return typeof title === 'string' ? title : undefined;
413
- }
414
- function resolveTruncatedFlag(parsed) {
415
- const { truncated } = parsed;
416
- return typeof truncated === 'boolean' ? truncated : false;
417
- }
544
+ const cachedMarkdownSchema = z
545
+ .object({
546
+ markdown: z.string().optional(),
547
+ content: z.string().optional(),
548
+ title: z.string().optional(),
549
+ truncated: z.boolean().optional(),
550
+ })
551
+ .catchall(z.unknown())
552
+ .refine((value) => typeof value.markdown === 'string' || typeof value.content === 'string', { message: 'Missing markdown/content' });
418
553
  export function parseCachedMarkdownResult(cached) {
419
- const parsed = parseJsonRecord(cached);
420
- if (!parsed)
554
+ const parsed = safeJsonParse(cached);
555
+ const result = cachedMarkdownSchema.safeParse(parsed);
556
+ if (!result.success)
421
557
  return undefined;
422
- const resolvedContent = resolveMarkdownContent(parsed);
423
- if (resolvedContent === undefined)
424
- return undefined;
425
- const title = resolveOptionalTitle(parsed);
426
- if (parsed.title !== undefined && title === undefined)
558
+ const markdown = result.data.markdown ?? result.data.content;
559
+ if (typeof markdown !== 'string')
427
560
  return undefined;
428
561
  return {
429
- content: resolvedContent,
430
- markdown: resolvedContent,
431
- title,
432
- truncated: resolveTruncatedFlag(parsed),
562
+ content: markdown,
563
+ markdown,
564
+ title: result.data.title,
565
+ truncated: result.data.truncated ?? false,
433
566
  };
434
567
  }
435
- const markdownTransform = async (html, url, signal) => {
436
- const result = await transformHtmlToMarkdown(html, url, {
568
+ const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
569
+ const result = await transformBufferToMarkdown(input.buffer, url, {
437
570
  includeMetadata: true,
438
- ...(signal === undefined ? {} : { signal }),
571
+ encoding: input.encoding,
572
+ ...withSignal(signal),
573
+ ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
439
574
  });
440
575
  return { ...result, content: result.markdown };
441
576
  };
@@ -450,16 +585,25 @@ function serializeMarkdownResult(result) {
450
585
  * fetch-url tool implementation
451
586
  * ------------------------------------------------------------------------------------------------- */
452
587
  function buildStructuredContent(pipeline, inlineResult, inputUrl) {
588
+ const truncated = inlineResult.truncated ?? pipeline.data.truncated;
453
589
  return {
454
- url: pipeline.url,
590
+ url: pipeline.originalUrl ?? pipeline.url,
455
591
  resolvedUrl: pipeline.url,
456
592
  inputUrl,
457
593
  title: pipeline.data.title,
458
594
  markdown: inlineResult.content,
595
+ ...(truncated ? { truncated: true } : {}),
459
596
  };
460
597
  }
461
598
  function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
462
- return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
599
+ return buildToolContentBlocks({
600
+ structuredContent,
601
+ inlineResult,
602
+ resourceName: 'Fetched markdown',
603
+ url: pipeline.url,
604
+ ...(pipeline.data.title !== undefined && { title: pipeline.data.title }),
605
+ fullContent: pipeline.data.content,
606
+ });
463
607
  }
464
608
  function buildResponse(pipeline, inlineResult, inputUrl) {
465
609
  const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
@@ -469,15 +613,17 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
469
613
  structuredContent,
470
614
  };
471
615
  }
472
- async function fetchPipeline(url, signal, progress) {
616
+ async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh) {
473
617
  return performSharedFetch({
474
618
  url,
475
- ...(signal === undefined ? {} : { signal }),
476
- transform: async (html, normalizedUrl) => {
619
+ ...withSignal(signal),
620
+ ...(skipNoiseRemoval ? { cacheVary: { skipNoiseRemoval: true } } : {}),
621
+ ...(forceRefresh ? { forceRefresh: true } : {}),
622
+ transform: async ({ buffer, encoding }, normalizedUrl) => {
477
623
  if (progress) {
478
- await progress.report(3, 'Transforming content');
624
+ void progress.report(3, 'Transforming content');
479
625
  }
480
- return markdownTransform(html, normalizedUrl, signal);
626
+ return markdownTransform({ buffer, encoding }, normalizedUrl, signal, skipNoiseRemoval);
481
627
  },
482
628
  serialize: serializeMarkdownResult,
483
629
  deserialize: parseCachedMarkdownResult,
@@ -488,26 +634,19 @@ async function executeFetch(input, extra) {
488
634
  if (!url) {
489
635
  return createToolErrorResponse('URL is required', '');
490
636
  }
491
- const { signal: extraSignal } = extra ?? {};
492
- const { timeoutMs } = config.tools;
493
- const signal = timeoutMs > 0
494
- ? AbortSignal.any([
495
- ...(extraSignal ? [extraSignal] : []),
496
- AbortSignal.timeout(timeoutMs),
497
- ])
498
- : extraSignal;
637
+ const signal = buildToolAbortSignal(extra?.signal);
499
638
  const progress = createProgressReporter(extra);
500
- await progress.report(1, 'Validating URL');
639
+ void progress.report(1, 'Validating URL');
501
640
  logDebug('Fetching URL', { url });
502
- await progress.report(2, 'Fetching content');
503
- const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress);
641
+ void progress.report(2, 'Fetching content');
642
+ const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh);
504
643
  if (pipeline.fromCache) {
505
- await progress.report(3, 'Using cached content');
644
+ void progress.report(3, 'Using cached content');
506
645
  }
507
646
  if (inlineResult.error) {
508
647
  return createToolErrorResponse(inlineResult.error, url);
509
648
  }
510
- await progress.report(4, 'Finalizing response');
649
+ void progress.report(4, 'Finalizing response');
511
650
  return buildResponse(pipeline, inlineResult, url);
512
651
  }
513
652
  export async function fetchUrlToolHandler(input, extra) {
@@ -564,26 +703,21 @@ function resolveSessionIdFromExtra(extra) {
564
703
  const { sessionId } = extra;
565
704
  if (typeof sessionId === 'string')
566
705
  return sessionId;
567
- const { requestInfo } = extra;
568
- if (!isObject(requestInfo))
569
- return undefined;
570
- const { headers } = requestInfo;
571
- if (!isObject(headers))
572
- return undefined;
573
- const headerValue = headers['mcp-session-id'];
706
+ const headers = readNestedRecord(extra, ['requestInfo', 'headers']);
707
+ const headerValue = headers ? headers['mcp-session-id'] : undefined;
574
708
  return typeof headerValue === 'string' ? headerValue : undefined;
575
709
  }
576
710
  export function registerTools(server) {
577
- if (config.tools.enabled.includes(FETCH_URL_TOOL_NAME)) {
578
- server.registerTool(TOOL_DEFINITION.name, {
579
- title: TOOL_DEFINITION.title,
580
- description: TOOL_DEFINITION.description,
581
- inputSchema: TOOL_DEFINITION.inputSchema,
582
- outputSchema: TOOL_DEFINITION.outputSchema,
583
- annotations: TOOL_DEFINITION.annotations,
584
- execution: TOOL_DEFINITION.execution,
585
- // Use specific tool icon here
586
- icons: [TOOL_ICON],
587
- }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
588
- }
711
+ if (!config.tools.enabled.includes(FETCH_URL_TOOL_NAME))
712
+ return;
713
+ server.registerTool(TOOL_DEFINITION.name, {
714
+ title: TOOL_DEFINITION.title,
715
+ description: TOOL_DEFINITION.description,
716
+ inputSchema: TOOL_DEFINITION.inputSchema,
717
+ outputSchema: TOOL_DEFINITION.outputSchema,
718
+ annotations: TOOL_DEFINITION.annotations,
719
+ execution: TOOL_DEFINITION.execution,
720
+ // Use specific tool icon here
721
+ icons: [TOOL_ICON],
722
+ }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
589
723
  }