@j0hanz/fetch-url-mcp 1.1.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/README.md +18 -0
  2. package/dist/cache.d.ts +9 -3
  3. package/dist/cache.d.ts.map +1 -0
  4. package/dist/cache.js +44 -110
  5. package/dist/cache.js.map +1 -0
  6. package/dist/cli.d.ts +1 -0
  7. package/dist/cli.d.ts.map +1 -0
  8. package/dist/cli.js +9 -4
  9. package/dist/cli.js.map +1 -0
  10. package/dist/config.d.ts +2 -3
  11. package/dist/config.d.ts.map +1 -0
  12. package/dist/config.js +18 -25
  13. package/dist/config.js.map +1 -0
  14. package/dist/crypto.d.ts +1 -0
  15. package/dist/crypto.d.ts.map +1 -0
  16. package/dist/crypto.js +1 -0
  17. package/dist/crypto.js.map +1 -0
  18. package/dist/dom-noise-removal.d.ts +2 -1
  19. package/dist/dom-noise-removal.d.ts.map +1 -0
  20. package/dist/dom-noise-removal.js +8 -4
  21. package/dist/dom-noise-removal.js.map +1 -0
  22. package/dist/download.d.ts +4 -0
  23. package/dist/download.d.ts.map +1 -0
  24. package/dist/download.js +106 -0
  25. package/dist/download.js.map +1 -0
  26. package/dist/errors.d.ts +1 -0
  27. package/dist/errors.d.ts.map +1 -0
  28. package/dist/errors.js +1 -0
  29. package/dist/errors.js.map +1 -0
  30. package/dist/examples/mcp-fetch-url-client.js +329 -0
  31. package/dist/examples/mcp-fetch-url-client.js.map +1 -0
  32. package/dist/fetch-content.d.ts +1 -0
  33. package/dist/fetch-content.d.ts.map +1 -0
  34. package/dist/fetch-content.js +14 -14
  35. package/dist/fetch-content.js.map +1 -0
  36. package/dist/fetch-stream.d.ts +1 -0
  37. package/dist/fetch-stream.d.ts.map +1 -0
  38. package/dist/fetch-stream.js +6 -3
  39. package/dist/fetch-stream.js.map +1 -0
  40. package/dist/fetch.d.ts +1 -0
  41. package/dist/fetch.d.ts.map +1 -0
  42. package/dist/fetch.js +178 -126
  43. package/dist/fetch.js.map +1 -0
  44. package/dist/host-normalization.d.ts +1 -0
  45. package/dist/host-normalization.d.ts.map +1 -0
  46. package/dist/host-normalization.js +19 -6
  47. package/dist/host-normalization.js.map +1 -0
  48. package/dist/http/auth.d.ts +35 -0
  49. package/dist/http/auth.d.ts.map +1 -0
  50. package/dist/http/auth.js +283 -0
  51. package/dist/http/auth.js.map +1 -0
  52. package/dist/http/health.d.ts +7 -0
  53. package/dist/http/health.d.ts.map +1 -0
  54. package/dist/http/health.js +166 -0
  55. package/dist/http/health.js.map +1 -0
  56. package/dist/http/helpers.d.ts +58 -0
  57. package/dist/http/helpers.d.ts.map +1 -0
  58. package/dist/http/helpers.js +372 -0
  59. package/dist/http/helpers.js.map +1 -0
  60. package/dist/{http-native.d.ts → http/native.d.ts} +1 -0
  61. package/dist/http/native.d.ts.map +1 -0
  62. package/dist/http/native.js +529 -0
  63. package/dist/http/native.js.map +1 -0
  64. package/dist/http/rate-limit.d.ts +13 -0
  65. package/dist/http/rate-limit.d.ts.map +1 -0
  66. package/dist/http/rate-limit.js +81 -0
  67. package/dist/http/rate-limit.js.map +1 -0
  68. package/dist/index.d.ts +1 -0
  69. package/dist/index.d.ts.map +1 -0
  70. package/dist/index.js +2 -1
  71. package/dist/index.js.map +1 -0
  72. package/dist/instructions.d.ts +2 -0
  73. package/dist/instructions.d.ts.map +1 -0
  74. package/dist/instructions.js +108 -0
  75. package/dist/instructions.js.map +1 -0
  76. package/dist/ip-blocklist.d.ts +1 -0
  77. package/dist/ip-blocklist.d.ts.map +1 -0
  78. package/dist/ip-blocklist.js +2 -0
  79. package/dist/ip-blocklist.js.map +1 -0
  80. package/dist/json.d.ts +2 -1
  81. package/dist/json.d.ts.map +1 -0
  82. package/dist/json.js +19 -6
  83. package/dist/json.js.map +1 -0
  84. package/dist/language-detection.d.ts +1 -0
  85. package/dist/language-detection.d.ts.map +1 -0
  86. package/dist/language-detection.js +1 -0
  87. package/dist/language-detection.js.map +1 -0
  88. package/dist/markdown-cleanup.d.ts +2 -1
  89. package/dist/markdown-cleanup.d.ts.map +1 -0
  90. package/dist/markdown-cleanup.js +51 -52
  91. package/dist/markdown-cleanup.js.map +1 -0
  92. package/dist/mcp-validator.d.ts +1 -0
  93. package/dist/mcp-validator.d.ts.map +1 -0
  94. package/dist/mcp-validator.js +16 -8
  95. package/dist/mcp-validator.js.map +1 -0
  96. package/dist/mcp.d.ts +2 -2
  97. package/dist/mcp.d.ts.map +1 -0
  98. package/dist/mcp.js +17 -333
  99. package/dist/mcp.js.map +1 -0
  100. package/dist/observability.d.ts +2 -0
  101. package/dist/observability.d.ts.map +1 -0
  102. package/dist/observability.js +30 -5
  103. package/dist/observability.js.map +1 -0
  104. package/dist/prompts.d.ts +1 -0
  105. package/dist/prompts.d.ts.map +1 -0
  106. package/dist/prompts.js +15 -3
  107. package/dist/prompts.js.map +1 -0
  108. package/dist/resources.d.ts +1 -0
  109. package/dist/resources.d.ts.map +1 -0
  110. package/dist/resources.js +30 -23
  111. package/dist/resources.js.map +1 -0
  112. package/dist/server-tuning.d.ts +1 -0
  113. package/dist/server-tuning.d.ts.map +1 -0
  114. package/dist/server-tuning.js +11 -15
  115. package/dist/server-tuning.js.map +1 -0
  116. package/dist/server.d.ts +1 -0
  117. package/dist/server.d.ts.map +1 -0
  118. package/dist/server.js +23 -23
  119. package/dist/server.js.map +1 -0
  120. package/dist/session.d.ts +1 -0
  121. package/dist/session.d.ts.map +1 -0
  122. package/dist/session.js +55 -28
  123. package/dist/session.js.map +1 -0
  124. package/dist/tasks/execution.d.ts +42 -0
  125. package/dist/tasks/execution.d.ts.map +1 -0
  126. package/dist/tasks/execution.js +232 -0
  127. package/dist/tasks/execution.js.map +1 -0
  128. package/dist/{tasks.d.ts → tasks/manager.d.ts} +6 -0
  129. package/dist/tasks/manager.d.ts.map +1 -0
  130. package/dist/{tasks.js → tasks/manager.js} +86 -37
  131. package/dist/tasks/manager.js.map +1 -0
  132. package/dist/tasks/owner.d.ts +33 -0
  133. package/dist/tasks/owner.d.ts.map +1 -0
  134. package/dist/tasks/owner.js +99 -0
  135. package/dist/tasks/owner.js.map +1 -0
  136. package/dist/timer-utils.d.ts +1 -0
  137. package/dist/timer-utils.d.ts.map +1 -0
  138. package/dist/timer-utils.js +12 -5
  139. package/dist/timer-utils.js.map +1 -0
  140. package/dist/tool-errors.d.ts +12 -0
  141. package/dist/tool-errors.d.ts.map +1 -0
  142. package/dist/tool-errors.js +52 -0
  143. package/dist/tool-errors.js.map +1 -0
  144. package/dist/tool-pipeline.d.ts +72 -0
  145. package/dist/tool-pipeline.d.ts.map +1 -0
  146. package/dist/tool-pipeline.js +407 -0
  147. package/dist/tool-pipeline.js.map +1 -0
  148. package/dist/tool-progress.d.ts +32 -0
  149. package/dist/tool-progress.d.ts.map +1 -0
  150. package/dist/tool-progress.js +123 -0
  151. package/dist/tool-progress.js.map +1 -0
  152. package/dist/tools.d.ts +35 -111
  153. package/dist/tools.d.ts.map +1 -0
  154. package/dist/tools.js +93 -544
  155. package/dist/tools.js.map +1 -0
  156. package/dist/{transform.d.ts → transform/transform.d.ts} +2 -1
  157. package/dist/transform/transform.d.ts.map +1 -0
  158. package/dist/{transform.js → transform/transform.js} +76 -776
  159. package/dist/transform/transform.js.map +1 -0
  160. package/dist/{transform-types.d.ts → transform/types.d.ts} +1 -0
  161. package/dist/transform/types.d.ts.map +1 -0
  162. package/dist/{transform-types.js → transform/types.js} +1 -0
  163. package/dist/transform/types.js.map +1 -0
  164. package/dist/transform/worker-pool.d.ts +93 -0
  165. package/dist/transform/worker-pool.d.ts.map +1 -0
  166. package/dist/transform/worker-pool.js +759 -0
  167. package/dist/transform/worker-pool.js.map +1 -0
  168. package/dist/transform/workers/transform-child.d.ts +2 -0
  169. package/dist/transform/workers/transform-child.d.ts.map +1 -0
  170. package/dist/{workers → transform/workers}/transform-child.js +3 -1
  171. package/dist/transform/workers/transform-child.js.map +1 -0
  172. package/dist/transform/workers/transform-worker.d.ts +2 -0
  173. package/dist/transform/workers/transform-worker.d.ts.map +1 -0
  174. package/dist/{workers → transform/workers}/transform-worker.js +2 -1
  175. package/dist/transform/workers/transform-worker.js.map +1 -0
  176. package/dist/type-guards.d.ts +1 -0
  177. package/dist/type-guards.d.ts.map +1 -0
  178. package/dist/type-guards.js +1 -0
  179. package/dist/type-guards.js.map +1 -0
  180. package/package.json +6 -7
  181. package/dist/AGENTS.md +0 -152
  182. package/dist/http-native.js +0 -1320
  183. package/dist/instructions.md +0 -113
  184. package/dist/workers/transform-child.d.ts +0 -1
  185. package/dist/workers/transform-worker.d.ts +0 -1
package/dist/tools.js CHANGED
@@ -2,20 +2,22 @@ import { randomUUID } from 'node:crypto';
2
2
  import { z } from 'zod';
3
3
  import * as cache from './cache.js';
4
4
  import { config } from './config.js';
5
- import { FetchError, getErrorMessage, isSystemError } from './errors.js';
6
- import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
5
+ import { generateSafeFilename } from './download.js';
7
6
  import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
8
- import { transformBufferToMarkdown } from './transform.js';
7
+ import { createToolErrorResponse, handleToolError } from './tool-errors.js';
8
+ import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord, readString, serializeMarkdownResult, TRUNCATION_MARKER, withSignal, } from './tool-pipeline.js';
9
+ import { createProgressReporter, } from './tool-progress.js';
9
10
  import { isObject } from './type-guards.js';
10
- const TRUNCATION_MARKER = '...[truncated]';
11
- const FETCH_PROGRESS_TOTAL = 4;
12
- const PROGRESS_NOTIFICATION_TIMEOUT_MS = 5000;
11
+ // Re-export public API so existing consumers keep working.
12
+ export { createToolErrorResponse, handleToolError } from './tool-errors.js';
13
+ export { executeFetchPipeline, parseCachedMarkdownResult, performSharedFetch, } from './tool-pipeline.js';
14
+ export { createProgressReporter, } from './tool-progress.js';
13
15
  export const fetchUrlInputSchema = z.strictObject({
14
16
  url: z
15
17
  .url({ protocol: /^https?$/i })
16
18
  .min(1)
17
19
  .max(config.constants.maxUrlLength)
18
- .describe('The URL of the webpage to fetch and convert to Markdown'),
20
+ .describe(`The URL of the webpage to fetch and convert to Markdown. Max ${config.constants.maxUrlLength} characters.`),
19
21
  skipNoiseRemoval: z
20
22
  .boolean()
21
23
  .optional()
@@ -30,9 +32,9 @@ export const fetchUrlInputSchema = z.strictObject({
30
32
  .min(0)
31
33
  .max(config.constants.maxHtmlSize)
32
34
  .optional()
33
- .describe('Optional per-call inline markdown limit. 0 means unlimited. If a global inline limit is configured, the lower value is used.'),
35
+ .describe(`Optional per-call inline markdown limit (0 to ${config.constants.maxHtmlSize}). 0 means unlimited. If a global inline limit is configured, the lower value is used.`),
34
36
  });
35
- const fetchUrlOutputSchema = z.strictObject({
37
+ export const fetchUrlOutputSchema = z.strictObject({
36
38
  url: z
37
39
  .string()
38
40
  .min(1)
@@ -95,7 +97,7 @@ const fetchUrlOutputSchema = z.strictObject({
95
97
  ? z.string().max(config.constants.maxInlineContentChars)
96
98
  : z.string())
97
99
  .optional()
98
- .describe('The extracted content in Markdown format'),
100
+ .describe('The extracted content in Markdown format. May be truncated if exceeding inline limits; check "truncated" field'),
99
101
  fromCache: z
100
102
  .boolean()
101
103
  .optional()
@@ -116,20 +118,6 @@ const fetchUrlOutputSchema = z.strictObject({
116
118
  .boolean()
117
119
  .optional()
118
120
  .describe('Whether the returned markdown was truncated'),
119
- error: z
120
- .string()
121
- .max(2048)
122
- .optional()
123
- .describe('Error message if the request failed'),
124
- statusCode: z
125
- .number()
126
- .int()
127
- .optional()
128
- .describe('HTTP status code for failed requests'),
129
- details: z
130
- .record(z.string(), z.unknown())
131
- .optional()
132
- .describe('Additional error details when available'),
133
121
  });
134
122
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
135
123
  const FETCH_URL_TOOL_DESCRIPTION = `
@@ -140,262 +128,21 @@ This tool is useful for:
140
128
  - Extracting main content while removing navigation and ads (noise removal).
141
129
  - Caching content to speed up repeated queries.
142
130
 
131
+ Key behaviors:
132
+ - GitHub, GitLab, and Bitbucket URLs are auto-transformed to raw content endpoints; check resolvedUrl.
133
+ - If truncated is true in the response, use cacheResourceUri with resources/read to retrieve the full content.
134
+ - For long-running fetches or large pages, invoke with task: {} to get a taskId and poll tasks/get until complete.
135
+
143
136
  Limitations:
144
- - Inline output may be truncated when MAX_INLINE_CONTENT_CHARS is set.
145
- - Does not execute complex client-side JavaScript interactions.
137
+ - Does not execute client-side JavaScript; JS-rendered pages may be incomplete.
138
+ - If the error code is queue_full, the worker pool is busy — retry the call using task mode (task: {}) instead.
146
139
  `.trim();
147
- // Specific icon for the fetch-url tool (download cloud / web)
148
140
  const TOOL_ICON = {
149
141
  src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
150
142
  mimeType: 'image/svg+xml',
151
143
  };
152
- function asRecord(value) {
153
- return isObject(value) ? value : undefined;
154
- }
155
- function readUnknown(obj, key) {
156
- const record = asRecord(obj);
157
- return record ? record[key] : undefined;
158
- }
159
- function readString(obj, key) {
160
- const value = readUnknown(obj, key);
161
- return typeof value === 'string' ? value : undefined;
162
- }
163
- function readNestedRecord(obj, keys) {
164
- let current = obj;
165
- for (const key of keys) {
166
- current = readUnknown(current, key);
167
- if (current === undefined)
168
- return undefined;
169
- }
170
- return asRecord(current);
171
- }
172
- function safeJsonParse(value) {
173
- try {
174
- return JSON.parse(value);
175
- }
176
- catch {
177
- return undefined;
178
- }
179
- }
180
- function withSignal(signal) {
181
- return signal === undefined ? {} : { signal };
182
- }
183
- function buildToolAbortSignal(extraSignal) {
184
- const { timeoutMs } = config.tools;
185
- if (timeoutMs <= 0)
186
- return extraSignal;
187
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
188
- if (!extraSignal)
189
- return timeoutSignal;
190
- return AbortSignal.any([extraSignal, timeoutSignal]);
191
- }
192
144
  /* -------------------------------------------------------------------------------------------------
193
- * Progress reporting
194
- * ------------------------------------------------------------------------------------------------- */
195
- function resolveRelatedTaskMeta(meta) {
196
- const related = readUnknown(meta, 'io.modelcontextprotocol/related-task');
197
- const taskId = readString(related, 'taskId');
198
- return taskId ? { taskId } : undefined;
199
- }
200
- class ToolProgressReporter {
201
- token;
202
- sendNotification;
203
- relatedTaskMeta;
204
- onProgress;
205
- reportQueue = Promise.resolve();
206
- constructor(token, sendNotification, relatedTaskMeta, onProgress) {
207
- this.token = token;
208
- this.sendNotification = sendNotification;
209
- this.relatedTaskMeta = relatedTaskMeta;
210
- this.onProgress = onProgress;
211
- }
212
- static create(extra) {
213
- const token = extra?._meta?.progressToken ?? null;
214
- const sendNotification = extra?.sendNotification;
215
- const relatedTaskMeta = resolveRelatedTaskMeta(extra?._meta);
216
- const onProgress = extra?.onProgress;
217
- if (token === null && !onProgress) {
218
- return { report: async () => { } };
219
- }
220
- return new ToolProgressReporter(token, sendNotification, relatedTaskMeta, onProgress);
221
- }
222
- async report(progress, message) {
223
- if (this.onProgress) {
224
- try {
225
- this.onProgress(progress, message);
226
- }
227
- catch (error) {
228
- logWarn('Progress callback failed', {
229
- error: getErrorMessage(error),
230
- progress,
231
- message,
232
- });
233
- }
234
- }
235
- if (this.token === null || !this.sendNotification)
236
- return;
237
- const { sendNotification } = this;
238
- const notification = {
239
- method: 'notifications/progress',
240
- params: {
241
- progressToken: this.token,
242
- progress,
243
- total: FETCH_PROGRESS_TOTAL,
244
- message,
245
- ...(this.relatedTaskMeta
246
- ? {
247
- _meta: {
248
- 'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
249
- },
250
- }
251
- : {}),
252
- },
253
- };
254
- this.reportQueue = this.reportQueue.then(async () => {
255
- let timeoutId;
256
- const timeoutPromise = new Promise((resolve) => {
257
- timeoutId = setTimeout(() => {
258
- resolve({ timeout: true });
259
- }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
260
- timeoutId.unref();
261
- });
262
- try {
263
- const outcome = await Promise.race([
264
- sendNotification(notification).then(() => ({ ok: true })),
265
- timeoutPromise,
266
- ]);
267
- if ('timeout' in outcome) {
268
- logWarn('Progress notification timed out', { progress, message });
269
- }
270
- }
271
- catch (error) {
272
- logWarn('Failed to send progress notification', {
273
- error: getErrorMessage(error),
274
- progress,
275
- message,
276
- });
277
- }
278
- finally {
279
- if (timeoutId)
280
- clearTimeout(timeoutId);
281
- }
282
- });
283
- await this.reportQueue;
284
- }
285
- }
286
- export function createProgressReporter(extra) {
287
- return ToolProgressReporter.create(extra);
288
- }
289
- function getOpenCodeFence(content) {
290
- const FENCE_PATTERN = /^([ \t]*)(`{3,}|~{3,})/gm;
291
- let match;
292
- let inFence = false;
293
- let fenceChar = null;
294
- let fenceLength = 0;
295
- while ((match = FENCE_PATTERN.exec(content)) !== null) {
296
- const marker = match[2];
297
- if (!marker)
298
- continue;
299
- const [char] = marker;
300
- if (!char)
301
- continue;
302
- const { length } = marker;
303
- if (!inFence) {
304
- inFence = true;
305
- fenceChar = char;
306
- fenceLength = length;
307
- }
308
- else if (char === fenceChar && length >= fenceLength) {
309
- inFence = false;
310
- fenceChar = null;
311
- fenceLength = 0;
312
- }
313
- }
314
- if (inFence && fenceChar) {
315
- return { fenceChar, fenceLength };
316
- }
317
- return null;
318
- }
319
- function findSafeLinkBoundary(content, limit) {
320
- const lastBracket = content.lastIndexOf('[', limit);
321
- if (lastBracket === -1)
322
- return limit;
323
- const afterBracket = content.substring(lastBracket, limit);
324
- const closedPattern = /^\[[^\]]*\]\([^)]*\)/;
325
- if (closedPattern.test(afterBracket))
326
- return limit;
327
- const start = lastBracket > 0 && content[lastBracket - 1] === '!'
328
- ? lastBracket - 1
329
- : lastBracket;
330
- return start;
331
- }
332
- function truncateWithMarker(content, limit, marker) {
333
- if (content.length <= limit)
334
- return content;
335
- const maxContentLength = Math.max(0, limit - marker.length);
336
- const tentativeContent = content.substring(0, maxContentLength);
337
- const openFence = getOpenCodeFence(tentativeContent);
338
- if (openFence) {
339
- const fenceCloser = `\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`;
340
- const adjustedLength = Math.max(0, limit - marker.length - fenceCloser.length);
341
- return `${content.substring(0, adjustedLength)}${fenceCloser}${marker}`;
342
- }
343
- const safeBoundary = findSafeLinkBoundary(content, maxContentLength);
344
- if (safeBoundary < maxContentLength) {
345
- return `${content.substring(0, safeBoundary)}${marker}`;
346
- }
347
- return `${tentativeContent}${marker}`;
348
- }
349
- function appendTruncationMarker(content, marker) {
350
- if (!content)
351
- return marker;
352
- if (content.endsWith(marker))
353
- return content;
354
- const openFence = getOpenCodeFence(content);
355
- const contentWithFence = openFence
356
- ? `${content}\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`
357
- : content;
358
- const safeBoundary = findSafeLinkBoundary(contentWithFence, contentWithFence.length);
359
- if (safeBoundary < contentWithFence.length) {
360
- return `${contentWithFence.substring(0, safeBoundary)}${marker}`;
361
- }
362
- return `${contentWithFence}${marker}`;
363
- }
364
- class InlineContentLimiter {
365
- apply(content, inlineLimitOverride) {
366
- const contentSize = content.length;
367
- const inlineLimit = this.resolveInlineLimit(inlineLimitOverride);
368
- if (inlineLimit <= 0) {
369
- return { content, contentSize };
370
- }
371
- if (contentSize <= inlineLimit) {
372
- return { content, contentSize };
373
- }
374
- const truncatedContent = truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER);
375
- return {
376
- content: truncatedContent,
377
- contentSize,
378
- truncated: true,
379
- };
380
- }
381
- resolveInlineLimit(inlineLimitOverride) {
382
- const globalLimit = config.constants.maxInlineContentChars;
383
- if (inlineLimitOverride === undefined)
384
- return globalLimit;
385
- if (globalLimit > 0 && inlineLimitOverride > 0) {
386
- return Math.min(inlineLimitOverride, globalLimit);
387
- }
388
- if (globalLimit > 0 && inlineLimitOverride === 0)
389
- return globalLimit;
390
- return inlineLimitOverride;
391
- }
392
- }
393
- const inlineLimiter = new InlineContentLimiter();
394
- function applyInlineContentLimit(content, inlineLimitOverride) {
395
- return inlineLimiter.apply(content, inlineLimitOverride);
396
- }
397
- /* -------------------------------------------------------------------------------------------------
398
- * Tool response blocks (text + optional embedded resource)
145
+ * Tool response builders
399
146
  * ------------------------------------------------------------------------------------------------- */
400
147
  function buildTextBlock(structuredContent) {
401
148
  return {
@@ -406,7 +153,7 @@ function buildTextBlock(structuredContent) {
406
153
  function buildEmbeddedResource(content, url, title) {
407
154
  if (!content)
408
155
  return null;
409
- const filename = cache.generateSafeFilename(url, title, undefined, '.md');
156
+ const filename = generateSafeFilename(url, title, undefined, '.md');
410
157
  const uri = new URL(filename, 'file:///').href;
411
158
  const resource = {
412
159
  uri,
@@ -436,275 +183,48 @@ function buildCacheResourceLink(cacheResourceUri, contentSize, fetchedAt) {
436
183
  }
437
184
  function buildToolContentBlocks(structuredContent, resourceLink, embeddedResource) {
438
185
  const blocks = [buildTextBlock(structuredContent)];
439
- if (resourceLink) {
440
- blocks.push(resourceLink);
441
- }
442
- if (embeddedResource) {
443
- blocks.push(embeddedResource);
444
- }
186
+ appendIfPresent(blocks, resourceLink);
187
+ appendIfPresent(blocks, embeddedResource);
445
188
  return blocks;
446
189
  }
447
- function resolveNormalizedUrl(url) {
448
- const { normalizedUrl: validatedUrl } = normalizeUrl(url);
449
- const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
450
- return { normalizedUrl, originalUrl: validatedUrl, transformed };
451
- }
452
- function logRawUrlTransformation(resolvedUrl) {
453
- if (!resolvedUrl.transformed)
454
- return;
455
- logDebug('Using transformed raw content URL', {
456
- original: resolvedUrl.originalUrl,
457
- });
458
- }
459
- function extractTitle(value) {
460
- const record = asRecord(value);
461
- const title = record ? record['title'] : undefined;
462
- return typeof title === 'string' ? title : undefined;
463
- }
464
- function logCacheMiss(reason, cacheNamespace, normalizedUrl, error) {
465
- const log = reason.startsWith('deserialize') ? logWarn : logDebug;
466
- log(`Cache miss due to ${reason}`, {
467
- namespace: cacheNamespace,
468
- url: normalizedUrl,
469
- ...(error ? { error: getErrorMessage(error) } : {}),
470
- });
471
- }
472
- function attemptCacheRetrieval(params) {
473
- const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
474
- if (!cacheKey)
475
- return null;
476
- const cached = cache.get(cacheKey);
477
- if (!cached)
478
- return null;
479
- if (!deserialize) {
480
- logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
481
- return null;
482
- }
483
- let data;
484
- try {
485
- data = deserialize(cached.content);
486
- }
487
- catch (error) {
488
- logCacheMiss('deserialize exception', cacheNamespace, normalizedUrl, error);
489
- return null;
490
- }
491
- if (data === undefined) {
492
- logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
493
- return null;
494
- }
495
- logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
496
- return {
497
- data,
498
- fromCache: true,
499
- url: normalizedUrl,
500
- fetchedAt: cached.fetchedAt,
501
- cacheKey,
502
- };
503
- }
504
- function persistCache(params) {
505
- const { cacheKey, data, serialize, normalizedUrl, cacheNamespace, force } = params;
506
- if (!cacheKey)
507
- return;
508
- const serializer = serialize ?? JSON.stringify;
509
- const title = extractTitle(data);
510
- const metadata = {
511
- url: normalizedUrl,
512
- ...(title === undefined ? {} : { title }),
513
- };
514
- try {
515
- cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
516
- }
517
- catch (error) {
518
- logWarn('Failed to persist cache entry', {
519
- namespace: cacheNamespace,
520
- url: normalizedUrl,
521
- error: getErrorMessage(error),
522
- });
523
- }
524
- }
525
- export async function executeFetchPipeline(options) {
526
- const resolvedUrl = resolveNormalizedUrl(options.url);
527
- logRawUrlTransformation(resolvedUrl);
528
- const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
529
- if (!options.forceRefresh) {
530
- const cachedResult = attemptCacheRetrieval({
531
- cacheKey,
532
- deserialize: options.deserialize,
533
- cacheNamespace: options.cacheNamespace,
534
- normalizedUrl: resolvedUrl.normalizedUrl,
535
- });
536
- if (cachedResult) {
537
- return { ...cachedResult, originalUrl: resolvedUrl.originalUrl };
538
- }
539
- }
540
- logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
541
- const { buffer, encoding, truncated, finalUrl } = await fetchNormalizedUrlBuffer(resolvedUrl.normalizedUrl, withSignal(options.signal));
542
- const transformUrl = finalUrl || resolvedUrl.normalizedUrl;
543
- const data = await options.transform({ buffer, encoding, ...(truncated ? { truncated: true } : {}) }, transformUrl);
544
- if (cache.isEnabled()) {
545
- persistCache({
546
- cacheKey,
547
- data,
548
- serialize: options.serialize,
549
- normalizedUrl: finalUrl || resolvedUrl.normalizedUrl,
550
- cacheNamespace: options.cacheNamespace,
551
- });
552
- if (finalUrl && finalUrl !== resolvedUrl.normalizedUrl) {
553
- const finalCacheKey = cache.createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
554
- if (finalCacheKey && finalCacheKey !== cacheKey) {
555
- persistCache({
556
- cacheKey: finalCacheKey,
557
- data,
558
- serialize: options.serialize,
559
- normalizedUrl: finalUrl,
560
- cacheNamespace: options.cacheNamespace,
561
- });
562
- }
563
- }
564
- }
565
- return {
566
- data,
567
- fromCache: false,
568
- url: resolvedUrl.normalizedUrl,
569
- originalUrl: resolvedUrl.originalUrl,
570
- finalUrl,
571
- fetchedAt: new Date().toISOString(),
572
- cacheKey,
573
- };
574
- }
575
- export async function performSharedFetch(options, deps = {}) {
576
- const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
577
- const pipelineOptions = {
578
- url: options.url,
579
- cacheNamespace: 'markdown',
580
- ...withSignal(options.signal),
581
- ...(options.cacheVary ? { cacheVary: options.cacheVary } : {}),
582
- ...(options.forceRefresh ? { forceRefresh: true } : {}),
583
- transform: options.transform,
584
- ...(options.serialize ? { serialize: options.serialize } : {}),
585
- ...(options.deserialize ? { deserialize: options.deserialize } : {}),
586
- };
587
- const pipeline = await executePipeline(pipelineOptions);
588
- const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
589
- return { pipeline, inlineResult };
190
+ function appendIfPresent(items, value) {
191
+ if (value !== null && value !== undefined)
192
+ items.push(value);
590
193
  }
591
194
  /* -------------------------------------------------------------------------------------------------
592
- * Tool error mapping
195
+ * Tool abort signal
593
196
  * ------------------------------------------------------------------------------------------------- */
594
- export function createToolErrorResponse(message, url, extra) {
595
- const structuredContent = {
596
- error: message,
597
- url,
598
- ...(extra?.statusCode !== undefined
599
- ? { statusCode: extra.statusCode }
600
- : {}),
601
- ...(extra?.details ? { details: extra.details } : {}),
602
- };
603
- return {
604
- content: [buildTextBlock(structuredContent)],
605
- structuredContent,
606
- isError: true,
607
- };
608
- }
609
- function isValidationError(error) {
610
- return (error instanceof Error &&
611
- isSystemError(error) &&
612
- error.code === 'VALIDATION_ERROR');
613
- }
614
- function resolveToolErrorMessage(error, fallbackMessage) {
615
- if (isValidationError(error) || error instanceof FetchError) {
616
- return error.message;
617
- }
618
- if (error instanceof Error) {
619
- return `${fallbackMessage}: ${error.message}`;
620
- }
621
- return `${fallbackMessage}: Unknown error`;
622
- }
623
- export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
624
- const message = resolveToolErrorMessage(error, fallbackMessage);
625
- if (error instanceof FetchError) {
626
- return createToolErrorResponse(message, url, {
627
- statusCode: error.statusCode,
628
- details: error.details,
629
- });
630
- }
631
- return createToolErrorResponse(message, url);
197
+ function buildToolAbortSignal(extraSignal) {
198
+ const { timeoutMs } = config.tools;
199
+ if (timeoutMs <= 0)
200
+ return extraSignal;
201
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
202
+ if (!extraSignal)
203
+ return timeoutSignal;
204
+ return AbortSignal.any([extraSignal, timeoutSignal]);
632
205
  }
633
- function normalizeExtractedMetadata(metadata) {
634
- if (!metadata)
635
- return undefined;
636
- const normalized = {
637
- ...(metadata.title ? { title: metadata.title } : {}),
638
- ...(metadata.description ? { description: metadata.description } : {}),
639
- ...(metadata.author ? { author: metadata.author } : {}),
640
- ...(metadata.image ? { image: metadata.image } : {}),
641
- ...(metadata.favicon ? { favicon: metadata.favicon } : {}),
642
- ...(metadata.publishedAt ? { publishedAt: metadata.publishedAt } : {}),
643
- ...(metadata.modifiedAt ? { modifiedAt: metadata.modifiedAt } : {}),
644
- };
645
- if (Object.keys(normalized).length === 0)
646
- return undefined;
647
- return normalized;
206
+ /* -------------------------------------------------------------------------------------------------
207
+ * Structured response assembly
208
+ * ------------------------------------------------------------------------------------------------- */
209
+ function truncateStr(value, max) {
210
+ if (value === undefined || value.length <= max)
211
+ return value;
212
+ return value.slice(0, max);
648
213
  }
649
- const cachedMarkdownSchema = z
650
- .object({
651
- markdown: z.string().optional(),
652
- content: z.string().optional(),
653
- title: z.string().optional(),
654
- metadata: z
655
- .strictObject({
656
- title: z.string().optional(),
657
- description: z.string().optional(),
658
- author: z.string().optional(),
659
- image: z.string().optional(),
660
- favicon: z.string().optional(),
661
- publishedAt: z.string().optional(),
662
- modifiedAt: z.string().optional(),
663
- })
664
- .optional(),
665
- truncated: z.boolean().optional(),
666
- })
667
- .catchall(z.unknown())
668
- .refine((value) => typeof value.markdown === 'string' || typeof value.content === 'string', { message: 'Missing markdown/content' });
669
- export function parseCachedMarkdownResult(cached) {
670
- const parsed = safeJsonParse(cached);
671
- const result = cachedMarkdownSchema.safeParse(parsed);
672
- if (!result.success)
673
- return undefined;
674
- const markdown = result.data.markdown ?? result.data.content;
675
- if (typeof markdown !== 'string')
676
- return undefined;
677
- const metadata = normalizeExtractedMetadata(result.data.metadata);
214
+ function truncateMetadata(metadata) {
678
215
  return {
679
- content: markdown,
680
- markdown,
681
- title: result.data.title,
682
- ...(metadata ? { metadata } : {}),
683
- truncated: result.data.truncated ?? false,
216
+ ...metadata,
217
+ ...(metadata.title !== undefined
218
+ ? { title: truncateStr(metadata.title, 512) }
219
+ : {}),
220
+ ...(metadata.description !== undefined
221
+ ? { description: truncateStr(metadata.description, 2048) }
222
+ : {}),
223
+ ...(metadata.author !== undefined
224
+ ? { author: truncateStr(metadata.author, 512) }
225
+ : {}),
684
226
  };
685
227
  }
686
- const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
687
- const result = await transformBufferToMarkdown(input.buffer, url, {
688
- includeMetadata: true,
689
- encoding: input.encoding,
690
- ...withSignal(signal),
691
- ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
692
- ...(input.truncated ? { inputTruncated: true } : {}),
693
- });
694
- const truncated = Boolean(result.truncated || input.truncated);
695
- return { ...result, content: result.markdown, truncated };
696
- };
697
- function serializeMarkdownResult(result) {
698
- return JSON.stringify({
699
- markdown: result.markdown,
700
- title: result.title,
701
- metadata: result.metadata,
702
- truncated: result.truncated,
703
- });
704
- }
705
- /* -------------------------------------------------------------------------------------------------
706
- * fetch-url tool implementation
707
- * ------------------------------------------------------------------------------------------------- */
708
228
  function buildStructuredContent(pipeline, inlineResult, inputUrl) {
709
229
  const cacheResourceUri = resolveCacheResourceUri(pipeline.cacheKey);
710
230
  const truncated = inlineResult.truncated ?? pipeline.data.truncated;
@@ -716,8 +236,8 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
716
236
  ...(pipeline.finalUrl ? { finalUrl: pipeline.finalUrl } : {}),
717
237
  ...(cacheResourceUri ? { cacheResourceUri } : {}),
718
238
  inputUrl,
719
- title: pipeline.data.title,
720
- ...(metadata ? { metadata } : {}),
239
+ title: truncateStr(pipeline.data.title, 512),
240
+ ...(metadata ? { metadata: truncateMetadata(metadata) } : {}),
721
241
  markdown,
722
242
  fromCache: pipeline.fromCache,
723
243
  fetchedAt: pipeline.fetchedAt,
@@ -758,7 +278,6 @@ function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
758
278
  function buildResponse(pipeline, inlineResult, inputUrl) {
759
279
  const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
760
280
  const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
761
- // Runtime validation guard: verify output matches schema
762
281
  const validation = fetchUrlOutputSchema.safeParse(structuredContent);
763
282
  if (!validation.success) {
764
283
  logWarn('Tool output schema validation failed', {
@@ -771,6 +290,26 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
771
290
  structuredContent,
772
291
  };
773
292
  }
293
+ /* -------------------------------------------------------------------------------------------------
294
+ * fetch-url tool implementation
295
+ * ------------------------------------------------------------------------------------------------- */
296
+ export function getUrlContext(urlStr) {
297
+ try {
298
+ const u = new URL(urlStr);
299
+ const host = u.hostname.replace(/^www\./, '');
300
+ const path = u.pathname;
301
+ if (path === '/' || path === '')
302
+ return host;
303
+ let basename = path.split('/').filter(Boolean).pop();
304
+ if (basename && basename.length > 20) {
305
+ basename = `${basename.substring(0, 17)}...`;
306
+ }
307
+ return basename ? `${host}/…/${basename}` : host;
308
+ }
309
+ catch {
310
+ return 'unknown';
311
+ }
312
+ }
774
313
  async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
775
314
  return performSharedFetch({
776
315
  url,
@@ -780,7 +319,8 @@ async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefre
780
319
  ...(maxInlineChars !== undefined ? { maxInlineChars } : {}),
781
320
  transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
782
321
  if (progress) {
783
- void progress.report(3, 'Transforming content');
322
+ const contextStr = getUrlContext(url);
323
+ void progress.report(2, `fetch-url: ${contextStr} [transforming]`);
784
324
  }
785
325
  return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal, skipNoiseRemoval);
786
326
  },
@@ -795,15 +335,23 @@ async function executeFetch(input, extra) {
795
335
  }
796
336
  const signal = buildToolAbortSignal(extra?.signal);
797
337
  const progress = createProgressReporter(extra);
798
- void progress.report(1, 'Validating URL');
338
+ const contextStr = getUrlContext(url);
339
+ void progress.report(0, `fetch-url: ${contextStr} [starting]`);
799
340
  logDebug('Fetching URL', { url });
800
- void progress.report(2, 'Fetching content');
801
- const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
802
- if (pipeline.fromCache) {
803
- void progress.report(3, 'Using cached content');
341
+ try {
342
+ void progress.report(1, `fetch-url: ${contextStr} [fetching]`);
343
+ const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
344
+ if (pipeline.fromCache) {
345
+ void progress.report(3, `fetch-url: ${contextStr} [using cache]`);
346
+ }
347
+ void progress.report(4, `fetch-url: ${contextStr} • success`);
348
+ return buildResponse(pipeline, inlineResult, url);
349
+ }
350
+ catch (error) {
351
+ const isAbort = error instanceof Error && error.name === 'AbortError';
352
+ void progress.report(4, `fetch-url: ${contextStr} • ${isAbort ? 'cancelled' : 'failed'}`);
353
+ throw error;
804
354
  }
805
- void progress.report(4, 'Finalizing response');
806
- return buildResponse(pipeline, inlineResult, url);
807
355
  }
808
356
  export async function fetchUrlToolHandler(input, extra) {
809
357
  return executeFetch(input, extra).catch((error) => {
@@ -877,3 +425,4 @@ export function registerTools(server) {
877
425
  }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
878
426
  registeredTool.execution = TOOL_DEFINITION.execution;
879
427
  }
428
+ //# sourceMappingURL=tools.js.map