@j0hanz/superfetch 2.4.3 → 2.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tools.js CHANGED
@@ -15,7 +15,7 @@ const fetchUrlInputSchema = z.strictObject({
15
15
  .url({ protocol: /^https?$/i })
16
16
  .min(1)
17
17
  .max(config.constants.maxUrlLength)
18
- .describe('The URL to fetch'),
18
+ .describe('The URL of the webpage to fetch and convert to Markdown'),
19
19
  });
20
20
  const fetchUrlOutputSchema = z.strictObject({
21
21
  url: z
@@ -46,55 +46,130 @@ const fetchUrlOutputSchema = z.strictObject({
46
46
  .describe('Error message if the request failed'),
47
47
  });
48
48
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
49
- export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
49
+ export const FETCH_URL_TOOL_DESCRIPTION = `
50
+ Fetches a webpage and converts it to clean Markdown format optimized for LLM context.
51
+
52
+ This tool is useful for:
53
+ - Reading documentation, blog posts, or articles.
54
+ - Extracting main content while removing navigation and ads (noise removal).
55
+ - Caching content to speed up repeated queries.
56
+
57
+ Limitations:
58
+ - Returns truncated content if it exceeds ${config.constants.maxInlineContentChars} characters.
59
+ - Does not execute complex client-side JavaScript interactions.
60
+ `.trim();
61
+ // Specific icon for the fetch-url tool (download cloud / web)
62
+ const TOOL_ICON = {
63
+ src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
64
+ mimeType: 'image/svg+xml',
65
+ };
66
+ /* -------------------------------------------------------------------------------------------------
67
+ * Progress reporting
68
+ * ------------------------------------------------------------------------------------------------- */
69
+ class ToolProgressReporter {
70
+ token;
71
+ sendNotification;
72
+ constructor(token, sendNotification) {
73
+ this.token = token;
74
+ this.sendNotification = sendNotification;
75
+ }
76
+ static create(extra) {
77
+ const token = extra?._meta?.progressToken ?? null;
78
+ const sendNotification = extra?.sendNotification;
79
+ if (token === null || !sendNotification) {
80
+ return { report: async () => { } };
81
+ }
82
+ return new ToolProgressReporter(token, sendNotification);
83
+ }
84
+ async report(progress, message) {
85
+ try {
86
+ await Promise.race([
87
+ this.sendNotification({
88
+ method: 'notifications/progress',
89
+ params: {
90
+ progressToken: this.token,
91
+ progress,
92
+ total: FETCH_PROGRESS_TOTAL,
93
+ message,
94
+ },
95
+ }),
96
+ new Promise((_, reject) => {
97
+ setTimeout(() => {
98
+ reject(new Error('Progress notification timeout'));
99
+ }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
100
+ }),
101
+ ]);
102
+ }
103
+ catch (error) {
104
+ const isTimeout = error instanceof Error &&
105
+ error.message === 'Progress notification timeout';
106
+ const logMessage = isTimeout
107
+ ? 'Progress notification timed out'
108
+ : 'Failed to send progress notification';
109
+ logWarn(logMessage, {
110
+ error: getErrorMessage(error),
111
+ progress,
112
+ message,
113
+ });
114
+ }
115
+ }
116
+ }
50
117
  export function createProgressReporter(extra) {
51
- const token = extra?._meta?.progressToken ?? null;
52
- const sendNotification = extra?.sendNotification;
53
- if (token === null || !sendNotification) {
54
- return { report: async () => { } };
118
+ return ToolProgressReporter.create(extra);
119
+ }
120
+ class InlineContentLimiter {
121
+ apply(content, cacheKey) {
122
+ const contentSize = content.length;
123
+ const inlineLimit = config.constants.maxInlineContentChars;
124
+ if (contentSize <= inlineLimit) {
125
+ return { content, contentSize };
126
+ }
127
+ const resourceUri = this.resolveResourceUri(cacheKey);
128
+ if (!resourceUri) {
129
+ return this.buildTruncatedFallback(content, contentSize, inlineLimit);
130
+ }
131
+ return {
132
+ contentSize,
133
+ resourceUri,
134
+ resourceMimeType: 'text/markdown',
135
+ };
136
+ }
137
+ resolveResourceUri(cacheKey) {
138
+ if (!cache.isEnabled() || !cacheKey)
139
+ return null;
140
+ return cache.toResourceUri(cacheKey);
141
+ }
142
+ buildTruncatedFallback(content, contentSize, inlineLimit) {
143
+ const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
144
+ const truncatedContent = content.length > inlineLimit
145
+ ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
146
+ : content;
147
+ return {
148
+ content: truncatedContent,
149
+ contentSize,
150
+ truncated: true,
151
+ };
55
152
  }
56
- return {
57
- report: async (progress, message) => {
58
- try {
59
- await Promise.race([
60
- sendNotification({
61
- method: 'notifications/progress',
62
- params: {
63
- progressToken: token,
64
- progress,
65
- total: FETCH_PROGRESS_TOTAL,
66
- message,
67
- },
68
- }),
69
- new Promise((_, reject) => {
70
- setTimeout(() => {
71
- reject(new Error('Progress notification timeout'));
72
- }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
73
- }),
74
- ]);
75
- }
76
- catch (error) {
77
- const isTimeout = error instanceof Error &&
78
- error.message === 'Progress notification timeout';
79
- const logMessage = isTimeout
80
- ? 'Progress notification timed out'
81
- : 'Failed to send progress notification';
82
- logWarn(logMessage, {
83
- error: getErrorMessage(error),
84
- progress,
85
- message,
86
- });
87
- }
88
- },
89
- };
90
153
  }
154
+ const inlineLimiter = new InlineContentLimiter();
155
+ function applyInlineContentLimit(content, cacheKey) {
156
+ return inlineLimiter.apply(content, cacheKey);
157
+ }
158
+ /* -------------------------------------------------------------------------------------------------
159
+ * Tool response blocks (text + optional resource + optional link)
160
+ * ------------------------------------------------------------------------------------------------- */
91
161
  function serializeStructuredContent(structuredContent) {
92
162
  return JSON.stringify(structuredContent);
93
163
  }
164
+ function buildTextBlock(structuredContent) {
165
+ return {
166
+ type: 'text',
167
+ text: serializeStructuredContent(structuredContent),
168
+ };
169
+ }
94
170
  function buildResourceLink(inlineResult, name) {
95
- if (!inlineResult.resourceUri) {
171
+ if (!inlineResult.resourceUri)
96
172
  return null;
97
- }
98
173
  const block = {
99
174
  type: 'resource_link',
100
175
  uri: inlineResult.resourceUri,
@@ -107,9 +182,8 @@ function buildResourceLink(inlineResult, name) {
107
182
  return block;
108
183
  }
109
184
  function buildEmbeddedResource(content, url, title) {
110
- if (!content) {
185
+ if (!content)
111
186
  return null;
112
- }
113
187
  const filename = cache.generateSafeFilename(url, title, undefined, '.md');
114
188
  const uri = `file:///${filename}`;
115
189
  return {
@@ -121,28 +195,21 @@ function buildEmbeddedResource(content, url, title) {
121
195
  },
122
196
  };
123
197
  }
124
- function appendResourceBlocks({ blocks, inlineResult, resourceName, url, title, fullContent, }) {
198
+ function appendResourceBlocks(params) {
199
+ const { blocks, inlineResult, resourceName, url, title, fullContent } = params;
125
200
  const contentToEmbed = config.runtime.httpMode
126
201
  ? inlineResult.content
127
202
  : (fullContent ?? inlineResult.content);
128
203
  if (contentToEmbed && url) {
129
- const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
130
- if (embeddedResource) {
131
- blocks.push(embeddedResource);
132
- }
133
- }
134
- const resourceLink = buildResourceLink(inlineResult, resourceName);
135
- if (resourceLink) {
136
- blocks.push(resourceLink);
204
+ const embedded = buildEmbeddedResource(contentToEmbed, url, title);
205
+ if (embedded)
206
+ blocks.push(embedded);
137
207
  }
208
+ const link = buildResourceLink(inlineResult, resourceName);
209
+ if (link)
210
+ blocks.push(link);
138
211
  }
139
- function buildTextBlock(structuredContent) {
140
- return {
141
- type: 'text',
142
- text: serializeStructuredContent(structuredContent),
143
- };
144
- }
145
- function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
212
+ function buildToolContentBlocks(structuredContent, _fromCache, inlineResult, resourceName, _cacheKey, fullContent, url, title) {
146
213
  const blocks = [buildTextBlock(structuredContent)];
147
214
  appendResourceBlocks({
148
215
  blocks,
@@ -154,39 +221,36 @@ function buildToolContentBlocks(structuredContent, fromCache, inlineResult, reso
154
221
  });
155
222
  return blocks;
156
223
  }
157
- function applyInlineContentLimit(content, cacheKey) {
158
- const contentSize = content.length;
159
- const inlineLimit = config.constants.maxInlineContentChars;
160
- if (contentSize <= inlineLimit) {
161
- return { content, contentSize };
162
- }
163
- const resourceUri = resolveResourceUri(cacheKey);
164
- if (!resourceUri) {
165
- return buildTruncatedFallback(content, contentSize, inlineLimit);
166
- }
167
- return {
168
- contentSize,
169
- resourceUri,
170
- resourceMimeType: 'text/markdown',
171
- };
224
+ /* -------------------------------------------------------------------------------------------------
225
+ * Fetch pipeline executor (normalize → raw-transform → cache → fetch → transform → persist)
226
+ * ------------------------------------------------------------------------------------------------- */
227
+ function resolveNormalizedUrl(url) {
228
+ const { normalizedUrl: validatedUrl } = normalizeUrl(url);
229
+ const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
230
+ return { normalizedUrl, originalUrl: validatedUrl, transformed };
172
231
  }
173
- function resolveResourceUri(cacheKey) {
174
- if (!cache.isEnabled() || !cacheKey)
175
- return null;
176
- return cache.toResourceUri(cacheKey);
232
+ function logRawUrlTransformation(resolvedUrl) {
233
+ if (!resolvedUrl.transformed)
234
+ return;
235
+ logDebug('Using transformed raw content URL', {
236
+ original: resolvedUrl.originalUrl,
237
+ });
177
238
  }
178
- function buildTruncatedFallback(content, contentSize, inlineLimit) {
179
- const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
180
- const truncatedContent = content.length > inlineLimit
181
- ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
182
- : content;
183
- return {
184
- content: truncatedContent,
185
- contentSize,
186
- truncated: true,
187
- };
239
+ function extractTitle(value) {
240
+ if (!isObject(value))
241
+ return undefined;
242
+ const { title } = value;
243
+ return typeof title === 'string' ? title : undefined;
244
+ }
245
+ function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
246
+ const log = reason === 'deserialize failure' ? logWarn : logDebug;
247
+ log(`Cache miss due to ${reason}`, {
248
+ namespace: cacheNamespace,
249
+ url: normalizedUrl,
250
+ });
188
251
  }
189
- function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normalizedUrl, }) {
252
+ function attemptCacheRetrieval(params) {
253
+ const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
190
254
  if (!cacheKey)
191
255
  return null;
192
256
  const cached = cache.get(cacheKey);
@@ -210,10 +274,17 @@ function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normaliz
210
274
  cacheKey,
211
275
  };
212
276
  }
213
- function resolveNormalizedUrl(url) {
214
- const { normalizedUrl: validatedUrl } = normalizeUrl(url);
215
- const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
216
- return { normalizedUrl, originalUrl: validatedUrl, transformed };
277
+ function persistCache(params) {
278
+ const { cacheKey, data, serialize, normalizedUrl } = params;
279
+ if (!cacheKey)
280
+ return;
281
+ const serializer = serialize ?? JSON.stringify;
282
+ const title = extractTitle(data);
283
+ const metadata = {
284
+ url: normalizedUrl,
285
+ ...(title === undefined ? {} : { title }),
286
+ };
287
+ cache.set(cacheKey, serializer(data), metadata);
217
288
  }
218
289
  export async function executeFetchPipeline(options) {
219
290
  const resolvedUrl = resolveNormalizedUrl(options.url);
@@ -247,38 +318,6 @@ export async function executeFetchPipeline(options) {
247
318
  cacheKey,
248
319
  };
249
320
  }
250
- function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
251
- if (!cacheKey)
252
- return;
253
- const serializer = serialize ?? JSON.stringify;
254
- const title = extractTitle(data);
255
- const metadata = {
256
- url: normalizedUrl,
257
- ...(title === undefined ? {} : { title }),
258
- };
259
- cache.set(cacheKey, serializer(data), metadata);
260
- }
261
- function extractTitle(value) {
262
- if (!isObject(value))
263
- return undefined;
264
- const { title } = value;
265
- return typeof title === 'string' ? title : undefined;
266
- }
267
- function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
268
- // Deserialize failures indicate unexpected data; surface at warn level.
269
- const log = reason === 'deserialize failure' ? logWarn : logDebug;
270
- log(`Cache miss due to ${reason}`, {
271
- namespace: cacheNamespace,
272
- url: normalizedUrl,
273
- });
274
- }
275
- function logRawUrlTransformation(resolvedUrl) {
276
- if (!resolvedUrl.transformed)
277
- return;
278
- logDebug('Using transformed raw content URL', {
279
- original: resolvedUrl.originalUrl,
280
- });
281
- }
282
321
  export async function performSharedFetch(options, deps = {}) {
283
322
  const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
284
323
  const pipelineOptions = {
@@ -293,6 +332,9 @@ export async function performSharedFetch(options, deps = {}) {
293
332
  const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
294
333
  return { pipeline, inlineResult };
295
334
  }
335
+ /* -------------------------------------------------------------------------------------------------
336
+ * Tool error mapping
337
+ * ------------------------------------------------------------------------------------------------- */
296
338
  export function createToolErrorResponse(message, url) {
297
339
  const structuredContent = {
298
340
  error: message,
@@ -304,10 +346,6 @@ export function createToolErrorResponse(message, url) {
304
346
  isError: true,
305
347
  };
306
348
  }
307
- export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
308
- const message = resolveToolErrorMessage(error, fallbackMessage);
309
- return createToolErrorResponse(message, url);
310
- }
311
349
  function isValidationError(error) {
312
350
  return (error instanceof Error &&
313
351
  isSystemError(error) &&
@@ -322,6 +360,10 @@ function resolveToolErrorMessage(error, fallbackMessage) {
322
360
  }
323
361
  return `${fallbackMessage}: Unknown error`;
324
362
  }
363
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
364
+ const message = resolveToolErrorMessage(error, fallbackMessage);
365
+ return createToolErrorResponse(message, url);
366
+ }
325
367
  function parseJsonRecord(input) {
326
368
  try {
327
369
  const parsed = JSON.parse(input);
@@ -381,6 +423,9 @@ function serializeMarkdownResult(result) {
381
423
  truncated: result.truncated,
382
424
  });
383
425
  }
426
+ /* -------------------------------------------------------------------------------------------------
427
+ * fetch-url tool implementation
428
+ * ------------------------------------------------------------------------------------------------- */
384
429
  function buildStructuredContent(pipeline, inlineResult, inputUrl) {
385
430
  return {
386
431
  url: pipeline.url,
@@ -393,6 +438,14 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
393
438
  function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
394
439
  return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
395
440
  }
441
+ function buildResponse(pipeline, inlineResult, inputUrl) {
442
+ const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
443
+ const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
444
+ return {
445
+ content,
446
+ structuredContent,
447
+ };
448
+ }
396
449
  async function fetchPipeline(url, signal, progress) {
397
450
  return performSharedFetch({
398
451
  url,
@@ -407,14 +460,6 @@ async function fetchPipeline(url, signal, progress) {
407
460
  deserialize: parseCachedMarkdownResult,
408
461
  });
409
462
  }
410
- function buildResponse(pipeline, inlineResult, inputUrl) {
411
- const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
412
- const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
413
- return {
414
- content,
415
- structuredContent,
416
- };
417
- }
418
463
  async function executeFetch(input, extra) {
419
464
  const { url } = input;
420
465
  if (!url) {
@@ -432,6 +477,7 @@ async function executeFetch(input, extra) {
432
477
  await progress.report(1, 'Validating URL');
433
478
  logDebug('Fetching URL', { url });
434
479
  await progress.report(2, 'Fetching content');
480
+ await progress.report(2, 'Fetching content'); // preserve existing behavior
435
481
  const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress);
436
482
  if (pipeline.fromCache) {
437
483
  await progress.report(3, 'Using cached content');
@@ -482,13 +528,16 @@ function resolveRequestIdFromExtra(extra) {
482
528
  return String(requestId);
483
529
  return undefined;
484
530
  }
485
- export function registerTools(server, serverIcons) {
486
- server.registerTool(TOOL_DEFINITION.name, {
487
- title: TOOL_DEFINITION.title,
488
- description: TOOL_DEFINITION.description,
489
- inputSchema: TOOL_DEFINITION.inputSchema,
490
- outputSchema: TOOL_DEFINITION.outputSchema,
491
- annotations: TOOL_DEFINITION.annotations,
492
- ...(serverIcons ? { icons: serverIcons } : {}),
493
- }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
531
+ export function registerTools(server) {
532
+ if (config.tools.enabled.includes(FETCH_URL_TOOL_NAME)) {
533
+ server.registerTool(TOOL_DEFINITION.name, {
534
+ title: TOOL_DEFINITION.title,
535
+ description: TOOL_DEFINITION.description,
536
+ inputSchema: TOOL_DEFINITION.inputSchema,
537
+ outputSchema: TOOL_DEFINITION.outputSchema,
538
+ annotations: TOOL_DEFINITION.annotations,
539
+ // Use specific tool icon here
540
+ icons: [TOOL_ICON],
541
+ }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
542
+ }
494
543
  }
@@ -3,10 +3,12 @@ export interface StageBudget {
3
3
  totalBudgetMs: number;
4
4
  elapsedMs: number;
5
5
  }
6
+ /** Backwards-compatible exports */
6
7
  export declare function startTransformStage(url: string, stage: string, budget?: StageBudget): TransformStageContext | null;
7
8
  export declare function endTransformStage(context: TransformStageContext | null, options?: {
8
9
  truncated?: boolean;
9
10
  }): number;
11
+ /** Backwards-compatible export */
10
12
  export declare function extractContent(html: string, url: string, options?: {
11
13
  extractArticle?: boolean;
12
14
  signal?: AbortSignal;
@@ -21,11 +23,11 @@ export declare function isExtractionSufficient(article: ExtractedArticle | null,
21
23
  export declare function determineContentExtractionSource(article: ExtractedArticle | null): article is ExtractedArticle;
22
24
  export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
23
25
  export declare function transformHtmlToMarkdownInProcess(html: string, url: string, options: TransformOptions): MarkdownTransformResult;
24
- export declare function shutdownTransformWorkerPool(): Promise<void>;
25
26
  export interface TransformPoolStats {
26
27
  queueDepth: number;
27
28
  activeWorkers: number;
28
29
  capacity: number;
29
30
  }
30
31
  export declare function getTransformPoolStats(): TransformPoolStats | null;
32
+ export declare function shutdownTransformWorkerPool(): Promise<void>;
31
33
  export declare function transformHtmlToMarkdown(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;