@j0hanz/superfetch 2.4.3 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/tools.js CHANGED
@@ -47,54 +47,113 @@ const fetchUrlOutputSchema = z.strictObject({
47
47
  });
48
48
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
49
49
  export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
50
+ /* -------------------------------------------------------------------------------------------------
51
+ * Progress reporting
52
+ * ------------------------------------------------------------------------------------------------- */
53
+ class ToolProgressReporter {
54
+ token;
55
+ sendNotification;
56
+ constructor(token, sendNotification) {
57
+ this.token = token;
58
+ this.sendNotification = sendNotification;
59
+ }
60
+ static create(extra) {
61
+ const token = extra?._meta?.progressToken ?? null;
62
+ const sendNotification = extra?.sendNotification;
63
+ if (token === null || !sendNotification) {
64
+ return { report: async () => { } };
65
+ }
66
+ return new ToolProgressReporter(token, sendNotification);
67
+ }
68
+ async report(progress, message) {
69
+ try {
70
+ await Promise.race([
71
+ this.sendNotification({
72
+ method: 'notifications/progress',
73
+ params: {
74
+ progressToken: this.token,
75
+ progress,
76
+ total: FETCH_PROGRESS_TOTAL,
77
+ message,
78
+ },
79
+ }),
80
+ new Promise((_, reject) => {
81
+ setTimeout(() => {
82
+ reject(new Error('Progress notification timeout'));
83
+ }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
84
+ }),
85
+ ]);
86
+ }
87
+ catch (error) {
88
+ const isTimeout = error instanceof Error &&
89
+ error.message === 'Progress notification timeout';
90
+ const logMessage = isTimeout
91
+ ? 'Progress notification timed out'
92
+ : 'Failed to send progress notification';
93
+ logWarn(logMessage, {
94
+ error: getErrorMessage(error),
95
+ progress,
96
+ message,
97
+ });
98
+ }
99
+ }
100
+ }
50
101
  export function createProgressReporter(extra) {
51
- const token = extra?._meta?.progressToken ?? null;
52
- const sendNotification = extra?.sendNotification;
53
- if (token === null || !sendNotification) {
54
- return { report: async () => { } };
102
+ return ToolProgressReporter.create(extra);
103
+ }
104
+ class InlineContentLimiter {
105
+ apply(content, cacheKey) {
106
+ const contentSize = content.length;
107
+ const inlineLimit = config.constants.maxInlineContentChars;
108
+ if (contentSize <= inlineLimit) {
109
+ return { content, contentSize };
110
+ }
111
+ const resourceUri = this.resolveResourceUri(cacheKey);
112
+ if (!resourceUri) {
113
+ return this.buildTruncatedFallback(content, contentSize, inlineLimit);
114
+ }
115
+ return {
116
+ contentSize,
117
+ resourceUri,
118
+ resourceMimeType: 'text/markdown',
119
+ };
120
+ }
121
+ resolveResourceUri(cacheKey) {
122
+ if (!cache.isEnabled() || !cacheKey)
123
+ return null;
124
+ return cache.toResourceUri(cacheKey);
125
+ }
126
+ buildTruncatedFallback(content, contentSize, inlineLimit) {
127
+ const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
128
+ const truncatedContent = content.length > inlineLimit
129
+ ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
130
+ : content;
131
+ return {
132
+ content: truncatedContent,
133
+ contentSize,
134
+ truncated: true,
135
+ };
55
136
  }
56
- return {
57
- report: async (progress, message) => {
58
- try {
59
- await Promise.race([
60
- sendNotification({
61
- method: 'notifications/progress',
62
- params: {
63
- progressToken: token,
64
- progress,
65
- total: FETCH_PROGRESS_TOTAL,
66
- message,
67
- },
68
- }),
69
- new Promise((_, reject) => {
70
- setTimeout(() => {
71
- reject(new Error('Progress notification timeout'));
72
- }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
73
- }),
74
- ]);
75
- }
76
- catch (error) {
77
- const isTimeout = error instanceof Error &&
78
- error.message === 'Progress notification timeout';
79
- const logMessage = isTimeout
80
- ? 'Progress notification timed out'
81
- : 'Failed to send progress notification';
82
- logWarn(logMessage, {
83
- error: getErrorMessage(error),
84
- progress,
85
- message,
86
- });
87
- }
88
- },
89
- };
90
137
  }
138
+ const inlineLimiter = new InlineContentLimiter();
139
+ function applyInlineContentLimit(content, cacheKey) {
140
+ return inlineLimiter.apply(content, cacheKey);
141
+ }
142
+ /* -------------------------------------------------------------------------------------------------
143
+ * Tool response blocks (text + optional resource + optional link)
144
+ * ------------------------------------------------------------------------------------------------- */
91
145
  function serializeStructuredContent(structuredContent) {
92
146
  return JSON.stringify(structuredContent);
93
147
  }
148
+ function buildTextBlock(structuredContent) {
149
+ return {
150
+ type: 'text',
151
+ text: serializeStructuredContent(structuredContent),
152
+ };
153
+ }
94
154
  function buildResourceLink(inlineResult, name) {
95
- if (!inlineResult.resourceUri) {
155
+ if (!inlineResult.resourceUri)
96
156
  return null;
97
- }
98
157
  const block = {
99
158
  type: 'resource_link',
100
159
  uri: inlineResult.resourceUri,
@@ -107,9 +166,8 @@ function buildResourceLink(inlineResult, name) {
107
166
  return block;
108
167
  }
109
168
  function buildEmbeddedResource(content, url, title) {
110
- if (!content) {
169
+ if (!content)
111
170
  return null;
112
- }
113
171
  const filename = cache.generateSafeFilename(url, title, undefined, '.md');
114
172
  const uri = `file:///${filename}`;
115
173
  return {
@@ -121,28 +179,21 @@ function buildEmbeddedResource(content, url, title) {
121
179
  },
122
180
  };
123
181
  }
124
- function appendResourceBlocks({ blocks, inlineResult, resourceName, url, title, fullContent, }) {
182
+ function appendResourceBlocks(params) {
183
+ const { blocks, inlineResult, resourceName, url, title, fullContent } = params;
125
184
  const contentToEmbed = config.runtime.httpMode
126
185
  ? inlineResult.content
127
186
  : (fullContent ?? inlineResult.content);
128
187
  if (contentToEmbed && url) {
129
- const embeddedResource = buildEmbeddedResource(contentToEmbed, url, title);
130
- if (embeddedResource) {
131
- blocks.push(embeddedResource);
132
- }
188
+ const embedded = buildEmbeddedResource(contentToEmbed, url, title);
189
+ if (embedded)
190
+ blocks.push(embedded);
133
191
  }
134
- const resourceLink = buildResourceLink(inlineResult, resourceName);
135
- if (resourceLink) {
136
- blocks.push(resourceLink);
137
- }
138
- }
139
- function buildTextBlock(structuredContent) {
140
- return {
141
- type: 'text',
142
- text: serializeStructuredContent(structuredContent),
143
- };
192
+ const link = buildResourceLink(inlineResult, resourceName);
193
+ if (link)
194
+ blocks.push(link);
144
195
  }
145
- function buildToolContentBlocks(structuredContent, fromCache, inlineResult, resourceName, cacheKey, fullContent, url, title) {
196
+ function buildToolContentBlocks(structuredContent, _fromCache, inlineResult, resourceName, _cacheKey, fullContent, url, title) {
146
197
  const blocks = [buildTextBlock(structuredContent)];
147
198
  appendResourceBlocks({
148
199
  blocks,
@@ -154,39 +205,36 @@ function buildToolContentBlocks(structuredContent, fromCache, inlineResult, reso
154
205
  });
155
206
  return blocks;
156
207
  }
157
- function applyInlineContentLimit(content, cacheKey) {
158
- const contentSize = content.length;
159
- const inlineLimit = config.constants.maxInlineContentChars;
160
- if (contentSize <= inlineLimit) {
161
- return { content, contentSize };
162
- }
163
- const resourceUri = resolveResourceUri(cacheKey);
164
- if (!resourceUri) {
165
- return buildTruncatedFallback(content, contentSize, inlineLimit);
166
- }
167
- return {
168
- contentSize,
169
- resourceUri,
170
- resourceMimeType: 'text/markdown',
171
- };
208
+ /* -------------------------------------------------------------------------------------------------
209
+ * Fetch pipeline executor (normalize → raw-transform → cache → fetch → transform → persist)
210
+ * ------------------------------------------------------------------------------------------------- */
211
+ function resolveNormalizedUrl(url) {
212
+ const { normalizedUrl: validatedUrl } = normalizeUrl(url);
213
+ const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
214
+ return { normalizedUrl, originalUrl: validatedUrl, transformed };
172
215
  }
173
- function resolveResourceUri(cacheKey) {
174
- if (!cache.isEnabled() || !cacheKey)
175
- return null;
176
- return cache.toResourceUri(cacheKey);
216
+ function logRawUrlTransformation(resolvedUrl) {
217
+ if (!resolvedUrl.transformed)
218
+ return;
219
+ logDebug('Using transformed raw content URL', {
220
+ original: resolvedUrl.originalUrl,
221
+ });
177
222
  }
178
- function buildTruncatedFallback(content, contentSize, inlineLimit) {
179
- const maxContentLength = Math.max(0, inlineLimit - TRUNCATION_MARKER.length);
180
- const truncatedContent = content.length > inlineLimit
181
- ? `${content.substring(0, maxContentLength)}${TRUNCATION_MARKER}`
182
- : content;
183
- return {
184
- content: truncatedContent,
185
- contentSize,
186
- truncated: true,
187
- };
223
+ function extractTitle(value) {
224
+ if (!isObject(value))
225
+ return undefined;
226
+ const { title } = value;
227
+ return typeof title === 'string' ? title : undefined;
228
+ }
229
+ function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
230
+ const log = reason === 'deserialize failure' ? logWarn : logDebug;
231
+ log(`Cache miss due to ${reason}`, {
232
+ namespace: cacheNamespace,
233
+ url: normalizedUrl,
234
+ });
188
235
  }
189
- function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normalizedUrl, }) {
236
+ function attemptCacheRetrieval(params) {
237
+ const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
190
238
  if (!cacheKey)
191
239
  return null;
192
240
  const cached = cache.get(cacheKey);
@@ -210,10 +258,17 @@ function attemptCacheRetrieval({ cacheKey, deserialize, cacheNamespace, normaliz
210
258
  cacheKey,
211
259
  };
212
260
  }
213
- function resolveNormalizedUrl(url) {
214
- const { normalizedUrl: validatedUrl } = normalizeUrl(url);
215
- const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
216
- return { normalizedUrl, originalUrl: validatedUrl, transformed };
261
+ function persistCache(params) {
262
+ const { cacheKey, data, serialize, normalizedUrl } = params;
263
+ if (!cacheKey)
264
+ return;
265
+ const serializer = serialize ?? JSON.stringify;
266
+ const title = extractTitle(data);
267
+ const metadata = {
268
+ url: normalizedUrl,
269
+ ...(title === undefined ? {} : { title }),
270
+ };
271
+ cache.set(cacheKey, serializer(data), metadata);
217
272
  }
218
273
  export async function executeFetchPipeline(options) {
219
274
  const resolvedUrl = resolveNormalizedUrl(options.url);
@@ -247,38 +302,6 @@ export async function executeFetchPipeline(options) {
247
302
  cacheKey,
248
303
  };
249
304
  }
250
- function persistCache({ cacheKey, data, serialize, normalizedUrl, }) {
251
- if (!cacheKey)
252
- return;
253
- const serializer = serialize ?? JSON.stringify;
254
- const title = extractTitle(data);
255
- const metadata = {
256
- url: normalizedUrl,
257
- ...(title === undefined ? {} : { title }),
258
- };
259
- cache.set(cacheKey, serializer(data), metadata);
260
- }
261
- function extractTitle(value) {
262
- if (!isObject(value))
263
- return undefined;
264
- const { title } = value;
265
- return typeof title === 'string' ? title : undefined;
266
- }
267
- function logCacheMiss(reason, cacheNamespace, normalizedUrl) {
268
- // Deserialize failures indicate unexpected data; surface at warn level.
269
- const log = reason === 'deserialize failure' ? logWarn : logDebug;
270
- log(`Cache miss due to ${reason}`, {
271
- namespace: cacheNamespace,
272
- url: normalizedUrl,
273
- });
274
- }
275
- function logRawUrlTransformation(resolvedUrl) {
276
- if (!resolvedUrl.transformed)
277
- return;
278
- logDebug('Using transformed raw content URL', {
279
- original: resolvedUrl.originalUrl,
280
- });
281
- }
282
305
  export async function performSharedFetch(options, deps = {}) {
283
306
  const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
284
307
  const pipelineOptions = {
@@ -293,6 +316,9 @@ export async function performSharedFetch(options, deps = {}) {
293
316
  const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.cacheKey ?? null);
294
317
  return { pipeline, inlineResult };
295
318
  }
319
+ /* -------------------------------------------------------------------------------------------------
320
+ * Tool error mapping
321
+ * ------------------------------------------------------------------------------------------------- */
296
322
  export function createToolErrorResponse(message, url) {
297
323
  const structuredContent = {
298
324
  error: message,
@@ -304,10 +330,6 @@ export function createToolErrorResponse(message, url) {
304
330
  isError: true,
305
331
  };
306
332
  }
307
- export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
308
- const message = resolveToolErrorMessage(error, fallbackMessage);
309
- return createToolErrorResponse(message, url);
310
- }
311
333
  function isValidationError(error) {
312
334
  return (error instanceof Error &&
313
335
  isSystemError(error) &&
@@ -322,6 +344,10 @@ function resolveToolErrorMessage(error, fallbackMessage) {
322
344
  }
323
345
  return `${fallbackMessage}: Unknown error`;
324
346
  }
347
+ export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
348
+ const message = resolveToolErrorMessage(error, fallbackMessage);
349
+ return createToolErrorResponse(message, url);
350
+ }
325
351
  function parseJsonRecord(input) {
326
352
  try {
327
353
  const parsed = JSON.parse(input);
@@ -381,6 +407,9 @@ function serializeMarkdownResult(result) {
381
407
  truncated: result.truncated,
382
408
  });
383
409
  }
410
+ /* -------------------------------------------------------------------------------------------------
411
+ * fetch-url tool implementation
412
+ * ------------------------------------------------------------------------------------------------- */
384
413
  function buildStructuredContent(pipeline, inlineResult, inputUrl) {
385
414
  return {
386
415
  url: pipeline.url,
@@ -393,6 +422,14 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
393
422
  function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
394
423
  return buildToolContentBlocks(structuredContent, pipeline.fromCache, inlineResult, 'Fetched markdown', pipeline.cacheKey, pipeline.data.content, pipeline.url, pipeline.data.title);
395
424
  }
425
+ function buildResponse(pipeline, inlineResult, inputUrl) {
426
+ const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
427
+ const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
428
+ return {
429
+ content,
430
+ structuredContent,
431
+ };
432
+ }
396
433
  async function fetchPipeline(url, signal, progress) {
397
434
  return performSharedFetch({
398
435
  url,
@@ -407,14 +444,6 @@ async function fetchPipeline(url, signal, progress) {
407
444
  deserialize: parseCachedMarkdownResult,
408
445
  });
409
446
  }
410
- function buildResponse(pipeline, inlineResult, inputUrl) {
411
- const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
412
- const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
413
- return {
414
- content,
415
- structuredContent,
416
- };
417
- }
418
447
  async function executeFetch(input, extra) {
419
448
  const { url } = input;
420
449
  if (!url) {
@@ -432,6 +461,7 @@ async function executeFetch(input, extra) {
432
461
  await progress.report(1, 'Validating URL');
433
462
  logDebug('Fetching URL', { url });
434
463
  await progress.report(2, 'Fetching content');
464
+ await progress.report(2, 'Fetching content'); // preserve existing behavior
435
465
  const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress);
436
466
  if (pipeline.fromCache) {
437
467
  await progress.report(3, 'Using cached content');
@@ -3,10 +3,12 @@ export interface StageBudget {
3
3
  totalBudgetMs: number;
4
4
  elapsedMs: number;
5
5
  }
6
+ /** Backwards-compatible exports */
6
7
  export declare function startTransformStage(url: string, stage: string, budget?: StageBudget): TransformStageContext | null;
7
8
  export declare function endTransformStage(context: TransformStageContext | null, options?: {
8
9
  truncated?: boolean;
9
10
  }): number;
11
+ /** Backwards-compatible export */
10
12
  export declare function extractContent(html: string, url: string, options?: {
11
13
  extractArticle?: boolean;
12
14
  signal?: AbortSignal;
@@ -21,11 +23,11 @@ export declare function isExtractionSufficient(article: ExtractedArticle | null,
21
23
  export declare function determineContentExtractionSource(article: ExtractedArticle | null): article is ExtractedArticle;
22
24
  export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
23
25
  export declare function transformHtmlToMarkdownInProcess(html: string, url: string, options: TransformOptions): MarkdownTransformResult;
24
- export declare function shutdownTransformWorkerPool(): Promise<void>;
25
26
  export interface TransformPoolStats {
26
27
  queueDepth: number;
27
28
  activeWorkers: number;
28
29
  capacity: number;
29
30
  }
30
31
  export declare function getTransformPoolStats(): TransformPoolStats | null;
32
+ export declare function shutdownTransformWorkerPool(): Promise<void>;
31
33
  export declare function transformHtmlToMarkdown(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;