@j0hanz/fetch-url-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/dist/cache.d.ts +9 -3
  2. package/dist/cache.d.ts.map +1 -0
  3. package/dist/cache.js +44 -110
  4. package/dist/cache.js.map +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +9 -4
  8. package/dist/cli.js.map +1 -0
  9. package/dist/config.d.ts +2 -3
  10. package/dist/config.d.ts.map +1 -0
  11. package/dist/config.js +18 -25
  12. package/dist/config.js.map +1 -0
  13. package/dist/crypto.d.ts +1 -0
  14. package/dist/crypto.d.ts.map +1 -0
  15. package/dist/crypto.js +1 -0
  16. package/dist/crypto.js.map +1 -0
  17. package/dist/dom-noise-removal.d.ts +2 -1
  18. package/dist/dom-noise-removal.d.ts.map +1 -0
  19. package/dist/dom-noise-removal.js +8 -4
  20. package/dist/dom-noise-removal.js.map +1 -0
  21. package/dist/download.d.ts +4 -0
  22. package/dist/download.d.ts.map +1 -0
  23. package/dist/download.js +106 -0
  24. package/dist/download.js.map +1 -0
  25. package/dist/errors.d.ts +1 -0
  26. package/dist/errors.d.ts.map +1 -0
  27. package/dist/errors.js +1 -0
  28. package/dist/errors.js.map +1 -0
  29. package/dist/examples/mcp-fetch-url-client.js +19 -3
  30. package/dist/examples/mcp-fetch-url-client.js.map +1 -1
  31. package/dist/fetch-content.d.ts +1 -0
  32. package/dist/fetch-content.d.ts.map +1 -0
  33. package/dist/fetch-content.js +14 -14
  34. package/dist/fetch-content.js.map +1 -0
  35. package/dist/fetch-stream.d.ts +1 -0
  36. package/dist/fetch-stream.d.ts.map +1 -0
  37. package/dist/fetch-stream.js +6 -3
  38. package/dist/fetch-stream.js.map +1 -0
  39. package/dist/fetch.d.ts +1 -0
  40. package/dist/fetch.d.ts.map +1 -0
  41. package/dist/fetch.js +120 -51
  42. package/dist/fetch.js.map +1 -0
  43. package/dist/host-normalization.d.ts +1 -0
  44. package/dist/host-normalization.d.ts.map +1 -0
  45. package/dist/host-normalization.js +19 -6
  46. package/dist/host-normalization.js.map +1 -0
  47. package/dist/http/auth.d.ts +35 -0
  48. package/dist/http/auth.d.ts.map +1 -0
  49. package/dist/http/auth.js +283 -0
  50. package/dist/http/auth.js.map +1 -0
  51. package/dist/http/health.d.ts +7 -0
  52. package/dist/http/health.d.ts.map +1 -0
  53. package/dist/http/health.js +166 -0
  54. package/dist/http/health.js.map +1 -0
  55. package/dist/http/helpers.d.ts +58 -0
  56. package/dist/http/helpers.d.ts.map +1 -0
  57. package/dist/http/helpers.js +372 -0
  58. package/dist/http/helpers.js.map +1 -0
  59. package/dist/{http-native.d.ts → http/native.d.ts} +1 -0
  60. package/dist/http/native.d.ts.map +1 -0
  61. package/dist/http/native.js +529 -0
  62. package/dist/http/native.js.map +1 -0
  63. package/dist/http/rate-limit.d.ts +13 -0
  64. package/dist/http/rate-limit.d.ts.map +1 -0
  65. package/dist/http/rate-limit.js +81 -0
  66. package/dist/http/rate-limit.js.map +1 -0
  67. package/dist/index.d.ts +1 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +2 -1
  70. package/dist/index.js.map +1 -0
  71. package/dist/instructions.d.ts +2 -0
  72. package/dist/instructions.d.ts.map +1 -0
  73. package/dist/instructions.js +108 -0
  74. package/dist/instructions.js.map +1 -0
  75. package/dist/ip-blocklist.d.ts +1 -0
  76. package/dist/ip-blocklist.d.ts.map +1 -0
  77. package/dist/ip-blocklist.js +2 -0
  78. package/dist/ip-blocklist.js.map +1 -0
  79. package/dist/json.d.ts +2 -1
  80. package/dist/json.d.ts.map +1 -0
  81. package/dist/json.js +19 -6
  82. package/dist/json.js.map +1 -0
  83. package/dist/language-detection.d.ts +1 -0
  84. package/dist/language-detection.d.ts.map +1 -0
  85. package/dist/language-detection.js +1 -0
  86. package/dist/language-detection.js.map +1 -0
  87. package/dist/markdown-cleanup.d.ts +2 -1
  88. package/dist/markdown-cleanup.d.ts.map +1 -0
  89. package/dist/markdown-cleanup.js +51 -52
  90. package/dist/markdown-cleanup.js.map +1 -0
  91. package/dist/mcp-validator.d.ts +1 -0
  92. package/dist/mcp-validator.d.ts.map +1 -0
  93. package/dist/mcp-validator.js +16 -8
  94. package/dist/mcp-validator.js.map +1 -0
  95. package/dist/mcp.d.ts +2 -2
  96. package/dist/mcp.d.ts.map +1 -0
  97. package/dist/mcp.js +17 -333
  98. package/dist/mcp.js.map +1 -0
  99. package/dist/observability.d.ts +2 -0
  100. package/dist/observability.d.ts.map +1 -0
  101. package/dist/observability.js +30 -5
  102. package/dist/observability.js.map +1 -0
  103. package/dist/prompts.d.ts +1 -0
  104. package/dist/prompts.d.ts.map +1 -0
  105. package/dist/prompts.js +15 -3
  106. package/dist/prompts.js.map +1 -0
  107. package/dist/resources.d.ts +1 -0
  108. package/dist/resources.d.ts.map +1 -0
  109. package/dist/resources.js +30 -23
  110. package/dist/resources.js.map +1 -0
  111. package/dist/server-tuning.d.ts +1 -0
  112. package/dist/server-tuning.d.ts.map +1 -0
  113. package/dist/server-tuning.js +11 -15
  114. package/dist/server-tuning.js.map +1 -0
  115. package/dist/server.d.ts +1 -0
  116. package/dist/server.d.ts.map +1 -0
  117. package/dist/server.js +23 -23
  118. package/dist/server.js.map +1 -0
  119. package/dist/session.d.ts +1 -0
  120. package/dist/session.d.ts.map +1 -0
  121. package/dist/session.js +55 -28
  122. package/dist/session.js.map +1 -0
  123. package/dist/tasks/execution.d.ts +42 -0
  124. package/dist/tasks/execution.d.ts.map +1 -0
  125. package/dist/tasks/execution.js +232 -0
  126. package/dist/tasks/execution.js.map +1 -0
  127. package/dist/{tasks.d.ts → tasks/manager.d.ts} +6 -0
  128. package/dist/tasks/manager.d.ts.map +1 -0
  129. package/dist/{tasks.js → tasks/manager.js} +86 -37
  130. package/dist/tasks/manager.js.map +1 -0
  131. package/dist/tasks/owner.d.ts +33 -0
  132. package/dist/tasks/owner.d.ts.map +1 -0
  133. package/dist/tasks/owner.js +99 -0
  134. package/dist/tasks/owner.js.map +1 -0
  135. package/dist/timer-utils.d.ts +1 -0
  136. package/dist/timer-utils.d.ts.map +1 -0
  137. package/dist/timer-utils.js +12 -5
  138. package/dist/timer-utils.js.map +1 -0
  139. package/dist/tool-errors.d.ts +12 -0
  140. package/dist/tool-errors.d.ts.map +1 -0
  141. package/dist/tool-errors.js +52 -0
  142. package/dist/tool-errors.js.map +1 -0
  143. package/dist/tool-pipeline.d.ts +72 -0
  144. package/dist/tool-pipeline.d.ts.map +1 -0
  145. package/dist/tool-pipeline.js +407 -0
  146. package/dist/tool-pipeline.js.map +1 -0
  147. package/dist/tool-progress.d.ts +32 -0
  148. package/dist/tool-progress.d.ts.map +1 -0
  149. package/dist/tool-progress.js +123 -0
  150. package/dist/tool-progress.js.map +1 -0
  151. package/dist/tools.d.ts +35 -111
  152. package/dist/tools.d.ts.map +1 -0
  153. package/dist/tools.js +93 -566
  154. package/dist/tools.js.map +1 -0
  155. package/dist/{transform.d.ts → transform/transform.d.ts} +2 -1
  156. package/dist/transform/transform.d.ts.map +1 -0
  157. package/dist/{transform.js → transform/transform.js} +73 -769
  158. package/dist/transform/transform.js.map +1 -0
  159. package/dist/{transform-types.d.ts → transform/types.d.ts} +1 -0
  160. package/dist/transform/types.d.ts.map +1 -0
  161. package/dist/{transform-types.js → transform/types.js} +1 -0
  162. package/dist/transform/types.js.map +1 -0
  163. package/dist/transform/worker-pool.d.ts +93 -0
  164. package/dist/transform/worker-pool.d.ts.map +1 -0
  165. package/dist/transform/worker-pool.js +759 -0
  166. package/dist/transform/worker-pool.js.map +1 -0
  167. package/dist/transform/workers/transform-child.d.ts +2 -0
  168. package/dist/transform/workers/transform-child.d.ts.map +1 -0
  169. package/dist/{workers → transform/workers}/transform-child.js +3 -1
  170. package/dist/transform/workers/transform-child.js.map +1 -0
  171. package/dist/transform/workers/transform-worker.d.ts +2 -0
  172. package/dist/transform/workers/transform-worker.d.ts.map +1 -0
  173. package/dist/{workers → transform/workers}/transform-worker.js +2 -1
  174. package/dist/transform/workers/transform-worker.js.map +1 -0
  175. package/dist/type-guards.d.ts +1 -0
  176. package/dist/type-guards.d.ts.map +1 -0
  177. package/dist/type-guards.js +1 -0
  178. package/dist/type-guards.js.map +1 -0
  179. package/package.json +6 -7
  180. package/dist/AGENTS.md +0 -152
  181. package/dist/http-native.js +0 -1320
  182. package/dist/instructions.md +0 -113
  183. package/dist/workers/transform-child.d.ts +0 -1
  184. package/dist/workers/transform-worker.d.ts +0 -1
package/dist/tools.js CHANGED
@@ -2,20 +2,22 @@ import { randomUUID } from 'node:crypto';
2
2
  import { z } from 'zod';
3
3
  import * as cache from './cache.js';
4
4
  import { config } from './config.js';
5
- import { FetchError, getErrorMessage, isSystemError } from './errors.js';
6
- import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
5
+ import { generateSafeFilename } from './download.js';
7
6
  import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
8
- import { transformBufferToMarkdown } from './transform.js';
7
+ import { createToolErrorResponse, handleToolError } from './tool-errors.js';
8
+ import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord, readString, serializeMarkdownResult, TRUNCATION_MARKER, withSignal, } from './tool-pipeline.js';
9
+ import { createProgressReporter, } from './tool-progress.js';
9
10
  import { isObject } from './type-guards.js';
10
- const TRUNCATION_MARKER = '...[truncated]';
11
- const FETCH_PROGRESS_TOTAL = 4;
12
- const PROGRESS_NOTIFICATION_TIMEOUT_MS = 5000;
11
+ // Re-export public API so existing consumers keep working.
12
+ export { createToolErrorResponse, handleToolError } from './tool-errors.js';
13
+ export { executeFetchPipeline, parseCachedMarkdownResult, performSharedFetch, } from './tool-pipeline.js';
14
+ export { createProgressReporter, } from './tool-progress.js';
13
15
  export const fetchUrlInputSchema = z.strictObject({
14
16
  url: z
15
17
  .url({ protocol: /^https?$/i })
16
18
  .min(1)
17
19
  .max(config.constants.maxUrlLength)
18
- .describe('The URL of the webpage to fetch and convert to Markdown'),
20
+ .describe(`The URL of the webpage to fetch and convert to Markdown. Max ${config.constants.maxUrlLength} characters.`),
19
21
  skipNoiseRemoval: z
20
22
  .boolean()
21
23
  .optional()
@@ -30,9 +32,9 @@ export const fetchUrlInputSchema = z.strictObject({
30
32
  .min(0)
31
33
  .max(config.constants.maxHtmlSize)
32
34
  .optional()
33
- .describe('Optional per-call inline markdown limit. 0 means unlimited. If a global inline limit is configured, the lower value is used.'),
35
+ .describe(`Optional per-call inline markdown limit (0 to ${config.constants.maxHtmlSize}). 0 means unlimited. If a global inline limit is configured, the lower value is used.`),
34
36
  });
35
- const fetchUrlOutputSchema = z.strictObject({
37
+ export const fetchUrlOutputSchema = z.strictObject({
36
38
  url: z
37
39
  .string()
38
40
  .min(1)
@@ -95,7 +97,7 @@ const fetchUrlOutputSchema = z.strictObject({
95
97
  ? z.string().max(config.constants.maxInlineContentChars)
96
98
  : z.string())
97
99
  .optional()
98
- .describe('The extracted content in Markdown format'),
100
+ .describe('The extracted content in Markdown format. May be truncated if exceeding inline limits; check "truncated" field'),
99
101
  fromCache: z
100
102
  .boolean()
101
103
  .optional()
@@ -116,20 +118,6 @@ const fetchUrlOutputSchema = z.strictObject({
116
118
  .boolean()
117
119
  .optional()
118
120
  .describe('Whether the returned markdown was truncated'),
119
- error: z
120
- .string()
121
- .max(2048)
122
- .optional()
123
- .describe('Error message if the request failed'),
124
- statusCode: z
125
- .number()
126
- .int()
127
- .optional()
128
- .describe('HTTP status code for failed requests'),
129
- details: z
130
- .record(z.string(), z.unknown())
131
- .optional()
132
- .describe('Additional error details when available'),
133
121
  });
134
122
  export const FETCH_URL_TOOL_NAME = 'fetch-url';
135
123
  const FETCH_URL_TOOL_DESCRIPTION = `
@@ -140,262 +128,21 @@ This tool is useful for:
140
128
  - Extracting main content while removing navigation and ads (noise removal).
141
129
  - Caching content to speed up repeated queries.
142
130
 
131
+ Key behaviors:
132
+ - GitHub, GitLab, and Bitbucket URLs are auto-transformed to raw content endpoints; check resolvedUrl.
133
+ - If truncated is true in the response, use cacheResourceUri with resources/read to retrieve the full content.
134
+ - For long-running fetches or large pages, invoke with task: {} to get a taskId and poll tasks/get until complete.
135
+
143
136
  Limitations:
144
- - Inline output may be truncated when MAX_INLINE_CONTENT_CHARS is set.
145
- - Does not execute complex client-side JavaScript interactions.
137
+ - Does not execute client-side JavaScript; JS-rendered pages may be incomplete.
138
+ - If the error code is queue_full, the worker pool is busy — retry the call using task mode (task: {}) instead.
146
139
  `.trim();
147
- // Specific icon for the fetch-url tool (download cloud / web)
148
140
  const TOOL_ICON = {
149
141
  src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
150
142
  mimeType: 'image/svg+xml',
151
143
  };
152
- function asRecord(value) {
153
- return isObject(value) ? value : undefined;
154
- }
155
- function readUnknown(obj, key) {
156
- const record = asRecord(obj);
157
- return record ? record[key] : undefined;
158
- }
159
- function readString(obj, key) {
160
- const value = readUnknown(obj, key);
161
- return typeof value === 'string' ? value : undefined;
162
- }
163
- function readNestedRecord(obj, keys) {
164
- let current = obj;
165
- for (const key of keys) {
166
- current = readUnknown(current, key);
167
- if (current === undefined)
168
- return undefined;
169
- }
170
- return asRecord(current);
171
- }
172
- function safeJsonParse(value) {
173
- try {
174
- return JSON.parse(value);
175
- }
176
- catch {
177
- return undefined;
178
- }
179
- }
180
- function withSignal(signal) {
181
- return signal === undefined ? {} : { signal };
182
- }
183
- function buildToolAbortSignal(extraSignal) {
184
- const { timeoutMs } = config.tools;
185
- if (timeoutMs <= 0)
186
- return extraSignal;
187
- const timeoutSignal = AbortSignal.timeout(timeoutMs);
188
- if (!extraSignal)
189
- return timeoutSignal;
190
- return AbortSignal.any([extraSignal, timeoutSignal]);
191
- }
192
- /* -------------------------------------------------------------------------------------------------
193
- * Progress reporting
194
- * ------------------------------------------------------------------------------------------------- */
195
- function resolveRelatedTaskMeta(meta) {
196
- const related = readUnknown(meta, 'io.modelcontextprotocol/related-task');
197
- const taskId = readString(related, 'taskId');
198
- return taskId ? { taskId } : undefined;
199
- }
200
- class ToolProgressReporter {
201
- token;
202
- sendNotification;
203
- relatedTaskMeta;
204
- onProgress;
205
- reportQueue = Promise.resolve();
206
- constructor(token, sendNotification, relatedTaskMeta, onProgress) {
207
- this.token = token;
208
- this.sendNotification = sendNotification;
209
- this.relatedTaskMeta = relatedTaskMeta;
210
- this.onProgress = onProgress;
211
- }
212
- static create(extra) {
213
- const token = extra?._meta?.progressToken ?? null;
214
- const sendNotification = extra?.sendNotification;
215
- const relatedTaskMeta = resolveRelatedTaskMeta(extra?._meta);
216
- const onProgress = extra?.onProgress;
217
- if (token === null && !onProgress) {
218
- return { report: async () => { } };
219
- }
220
- return new ToolProgressReporter(token, sendNotification, relatedTaskMeta, onProgress);
221
- }
222
- async report(progress, message) {
223
- if (this.onProgress) {
224
- try {
225
- this.onProgress(progress, message);
226
- }
227
- catch (error) {
228
- logWarn('Progress callback failed', {
229
- error: getErrorMessage(error),
230
- progress,
231
- message,
232
- });
233
- }
234
- }
235
- if (this.token === null || !this.sendNotification)
236
- return;
237
- const { sendNotification } = this;
238
- const notification = {
239
- method: 'notifications/progress',
240
- params: {
241
- progressToken: this.token,
242
- progress,
243
- total: FETCH_PROGRESS_TOTAL,
244
- message,
245
- ...(this.relatedTaskMeta
246
- ? {
247
- _meta: {
248
- 'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
249
- },
250
- }
251
- : {}),
252
- },
253
- };
254
- this.reportQueue = this.reportQueue.then(async () => {
255
- let timeoutId;
256
- const timeoutPromise = new Promise((resolve) => {
257
- timeoutId = setTimeout(() => {
258
- resolve({ timeout: true });
259
- }, PROGRESS_NOTIFICATION_TIMEOUT_MS);
260
- timeoutId.unref();
261
- });
262
- try {
263
- const outcome = await Promise.race([
264
- sendNotification(notification).then(() => ({ ok: true })),
265
- timeoutPromise,
266
- ]);
267
- if ('timeout' in outcome) {
268
- logWarn('Progress notification timed out', { progress, message });
269
- }
270
- }
271
- catch (error) {
272
- logWarn('Failed to send progress notification', {
273
- error: getErrorMessage(error),
274
- progress,
275
- message,
276
- });
277
- }
278
- finally {
279
- if (timeoutId)
280
- clearTimeout(timeoutId);
281
- }
282
- });
283
- await this.reportQueue;
284
- }
285
- }
286
- export function createProgressReporter(extra) {
287
- return ToolProgressReporter.create(extra);
288
- }
289
- function getOpenCodeFence(content) {
290
- const FENCE_PATTERN = /^([ \t]*)(`{3,}|~{3,})/gm;
291
- let match;
292
- let inFence = false;
293
- let fenceChar = null;
294
- let fenceLength = 0;
295
- while ((match = FENCE_PATTERN.exec(content)) !== null) {
296
- const marker = match[2];
297
- if (!marker)
298
- continue;
299
- const [char] = marker;
300
- if (!char)
301
- continue;
302
- const { length } = marker;
303
- if (!inFence) {
304
- inFence = true;
305
- fenceChar = char;
306
- fenceLength = length;
307
- }
308
- else if (char === fenceChar && length >= fenceLength) {
309
- inFence = false;
310
- fenceChar = null;
311
- fenceLength = 0;
312
- }
313
- }
314
- if (inFence && fenceChar) {
315
- return { fenceChar, fenceLength };
316
- }
317
- return null;
318
- }
319
- function findSafeLinkBoundary(content, limit) {
320
- const lastBracket = content.lastIndexOf('[', limit);
321
- if (lastBracket === -1)
322
- return limit;
323
- const afterBracket = content.substring(lastBracket, limit);
324
- const closedPattern = /^\[[^\]]*\]\([^)]*\)/;
325
- if (closedPattern.test(afterBracket))
326
- return limit;
327
- const start = lastBracket > 0 && content[lastBracket - 1] === '!'
328
- ? lastBracket - 1
329
- : lastBracket;
330
- return start;
331
- }
332
- function truncateWithMarker(content, limit, marker) {
333
- if (content.length <= limit)
334
- return content;
335
- const maxContentLength = Math.max(0, limit - marker.length);
336
- const tentativeContent = content.substring(0, maxContentLength);
337
- const openFence = getOpenCodeFence(tentativeContent);
338
- if (openFence) {
339
- const fenceCloser = `\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`;
340
- const adjustedLength = Math.max(0, limit - marker.length - fenceCloser.length);
341
- return `${content.substring(0, adjustedLength)}${fenceCloser}${marker}`;
342
- }
343
- const safeBoundary = findSafeLinkBoundary(content, maxContentLength);
344
- if (safeBoundary < maxContentLength) {
345
- return `${content.substring(0, safeBoundary)}${marker}`;
346
- }
347
- return `${tentativeContent}${marker}`;
348
- }
349
- function appendTruncationMarker(content, marker) {
350
- if (!content)
351
- return marker;
352
- if (content.endsWith(marker))
353
- return content;
354
- const openFence = getOpenCodeFence(content);
355
- const contentWithFence = openFence
356
- ? `${content}\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`
357
- : content;
358
- const safeBoundary = findSafeLinkBoundary(contentWithFence, contentWithFence.length);
359
- if (safeBoundary < contentWithFence.length) {
360
- return `${contentWithFence.substring(0, safeBoundary)}${marker}`;
361
- }
362
- return `${contentWithFence}${marker}`;
363
- }
364
- class InlineContentLimiter {
365
- apply(content, inlineLimitOverride) {
366
- const contentSize = content.length;
367
- const inlineLimit = this.resolveInlineLimit(inlineLimitOverride);
368
- if (inlineLimit <= 0) {
369
- return { content, contentSize };
370
- }
371
- if (contentSize <= inlineLimit) {
372
- return { content, contentSize };
373
- }
374
- const truncatedContent = truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER);
375
- return {
376
- content: truncatedContent,
377
- contentSize,
378
- truncated: true,
379
- };
380
- }
381
- resolveInlineLimit(inlineLimitOverride) {
382
- const globalLimit = config.constants.maxInlineContentChars;
383
- if (inlineLimitOverride === undefined)
384
- return globalLimit;
385
- if (globalLimit > 0 && inlineLimitOverride > 0) {
386
- return Math.min(inlineLimitOverride, globalLimit);
387
- }
388
- if (globalLimit > 0 && inlineLimitOverride === 0)
389
- return globalLimit;
390
- return inlineLimitOverride;
391
- }
392
- }
393
- const inlineLimiter = new InlineContentLimiter();
394
- function applyInlineContentLimit(content, inlineLimitOverride) {
395
- return inlineLimiter.apply(content, inlineLimitOverride);
396
- }
397
144
  /* -------------------------------------------------------------------------------------------------
398
- * Tool response blocks (text + optional embedded resource)
145
+ * Tool response builders
399
146
  * ------------------------------------------------------------------------------------------------- */
400
147
  function buildTextBlock(structuredContent) {
401
148
  return {
@@ -406,7 +153,7 @@ function buildTextBlock(structuredContent) {
406
153
  function buildEmbeddedResource(content, url, title) {
407
154
  if (!content)
408
155
  return null;
409
- const filename = cache.generateSafeFilename(url, title, undefined, '.md');
156
+ const filename = generateSafeFilename(url, title, undefined, '.md');
410
157
  const uri = new URL(filename, 'file:///').href;
411
158
  const resource = {
412
159
  uri,
@@ -436,297 +183,48 @@ function buildCacheResourceLink(cacheResourceUri, contentSize, fetchedAt) {
436
183
  }
437
184
  function buildToolContentBlocks(structuredContent, resourceLink, embeddedResource) {
438
185
  const blocks = [buildTextBlock(structuredContent)];
439
- if (resourceLink) {
440
- blocks.push(resourceLink);
441
- }
442
- if (embeddedResource) {
443
- blocks.push(embeddedResource);
444
- }
186
+ appendIfPresent(blocks, resourceLink);
187
+ appendIfPresent(blocks, embeddedResource);
445
188
  return blocks;
446
189
  }
447
- function resolveNormalizedUrl(url) {
448
- const { normalizedUrl: validatedUrl } = normalizeUrl(url);
449
- const transformedResult = transformToRawUrl(validatedUrl);
450
- if (!transformedResult.transformed) {
451
- return {
452
- normalizedUrl: validatedUrl,
453
- originalUrl: validatedUrl,
454
- transformed: false,
455
- };
456
- }
457
- // Re-validate transformed URLs so blocked-host and length policies still apply.
458
- const { normalizedUrl: transformedUrl } = normalizeUrl(transformedResult.url);
459
- return {
460
- normalizedUrl: transformedUrl,
461
- originalUrl: validatedUrl,
462
- transformed: true,
463
- };
464
- }
465
- function logRawUrlTransformation(resolvedUrl) {
466
- if (!resolvedUrl.transformed)
467
- return;
468
- logDebug('Using transformed raw content URL', {
469
- original: resolvedUrl.originalUrl,
470
- });
471
- }
472
- function extractTitle(value) {
473
- const record = asRecord(value);
474
- const title = record ? record['title'] : undefined;
475
- return typeof title === 'string' ? title : undefined;
476
- }
477
- function logCacheMiss(reason, cacheNamespace, normalizedUrl, error) {
478
- const log = reason.startsWith('deserialize') ? logWarn : logDebug;
479
- log(`Cache miss due to ${reason}`, {
480
- namespace: cacheNamespace,
481
- url: normalizedUrl,
482
- ...(error ? { error: getErrorMessage(error) } : {}),
483
- });
484
- }
485
- function attemptCacheRetrieval(params) {
486
- const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
487
- if (!cacheKey)
488
- return null;
489
- const cached = cache.get(cacheKey);
490
- if (!cached)
491
- return null;
492
- if (!deserialize) {
493
- logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
494
- return null;
495
- }
496
- let data;
497
- try {
498
- data = deserialize(cached.content);
499
- }
500
- catch (error) {
501
- logCacheMiss('deserialize exception', cacheNamespace, normalizedUrl, error);
502
- return null;
503
- }
504
- if (data === undefined) {
505
- logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
506
- return null;
507
- }
508
- logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
509
- const finalUrl = cached.url !== normalizedUrl ? cached.url : undefined;
510
- return {
511
- data,
512
- fromCache: true,
513
- url: normalizedUrl,
514
- ...(finalUrl ? { finalUrl } : {}),
515
- fetchedAt: cached.fetchedAt,
516
- cacheKey,
517
- };
518
- }
519
- function persistCache(params) {
520
- const { cacheKey, data, serialize, normalizedUrl, cacheNamespace, force } = params;
521
- if (!cacheKey)
522
- return;
523
- const serializer = serialize ?? JSON.stringify;
524
- const title = extractTitle(data);
525
- const metadata = {
526
- url: normalizedUrl,
527
- ...(title === undefined ? {} : { title }),
528
- };
529
- try {
530
- cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
531
- }
532
- catch (error) {
533
- logWarn('Failed to persist cache entry', {
534
- namespace: cacheNamespace,
535
- url: normalizedUrl,
536
- error: getErrorMessage(error),
537
- });
538
- }
539
- }
540
- export async function executeFetchPipeline(options) {
541
- const resolvedUrl = resolveNormalizedUrl(options.url);
542
- logRawUrlTransformation(resolvedUrl);
543
- const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
544
- if (!options.forceRefresh) {
545
- const cachedResult = attemptCacheRetrieval({
546
- cacheKey,
547
- deserialize: options.deserialize,
548
- cacheNamespace: options.cacheNamespace,
549
- normalizedUrl: resolvedUrl.normalizedUrl,
550
- });
551
- if (cachedResult) {
552
- return { ...cachedResult, originalUrl: resolvedUrl.originalUrl };
553
- }
554
- }
555
- logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
556
- const { buffer, encoding, truncated, finalUrl } = await fetchNormalizedUrlBuffer(resolvedUrl.normalizedUrl, withSignal(options.signal));
557
- const transformUrl = finalUrl || resolvedUrl.normalizedUrl;
558
- const data = await options.transform({ buffer, encoding, ...(truncated ? { truncated: true } : {}) }, transformUrl);
559
- if (cache.isEnabled()) {
560
- persistCache({
561
- cacheKey,
562
- data,
563
- serialize: options.serialize,
564
- normalizedUrl: finalUrl || resolvedUrl.normalizedUrl,
565
- cacheNamespace: options.cacheNamespace,
566
- });
567
- if (finalUrl && finalUrl !== resolvedUrl.normalizedUrl) {
568
- const finalCacheKey = cache.createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
569
- if (finalCacheKey && finalCacheKey !== cacheKey) {
570
- persistCache({
571
- cacheKey: finalCacheKey,
572
- data,
573
- serialize: options.serialize,
574
- normalizedUrl: finalUrl,
575
- cacheNamespace: options.cacheNamespace,
576
- });
577
- }
578
- }
579
- }
580
- return {
581
- data,
582
- fromCache: false,
583
- url: resolvedUrl.normalizedUrl,
584
- originalUrl: resolvedUrl.originalUrl,
585
- finalUrl,
586
- fetchedAt: new Date().toISOString(),
587
- cacheKey,
588
- };
589
- }
590
- export async function performSharedFetch(options, deps = {}) {
591
- const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
592
- const pipelineOptions = {
593
- url: options.url,
594
- cacheNamespace: 'markdown',
595
- ...withSignal(options.signal),
596
- ...(options.cacheVary ? { cacheVary: options.cacheVary } : {}),
597
- ...(options.forceRefresh ? { forceRefresh: true } : {}),
598
- transform: options.transform,
599
- ...(options.serialize ? { serialize: options.serialize } : {}),
600
- ...(options.deserialize ? { deserialize: options.deserialize } : {}),
601
- };
602
- const pipeline = await executePipeline(pipelineOptions);
603
- const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
604
- return { pipeline, inlineResult };
190
+ function appendIfPresent(items, value) {
191
+ if (value !== null && value !== undefined)
192
+ items.push(value);
605
193
  }
606
194
  /* -------------------------------------------------------------------------------------------------
607
- * Tool error mapping
195
+ * Tool abort signal
608
196
  * ------------------------------------------------------------------------------------------------- */
609
- export function createToolErrorResponse(message, url, extra) {
610
- const structuredContent = {
611
- error: message,
612
- url,
613
- ...(extra?.statusCode !== undefined
614
- ? { statusCode: extra.statusCode }
615
- : {}),
616
- ...(extra?.details ? { details: extra.details } : {}),
617
- };
618
- return {
619
- content: [buildTextBlock(structuredContent)],
620
- structuredContent,
621
- isError: true,
622
- };
623
- }
624
- function isValidationError(error) {
625
- return (error instanceof Error &&
626
- isSystemError(error) &&
627
- error.code === 'VALIDATION_ERROR');
628
- }
629
- function resolveToolErrorMessage(error, fallbackMessage) {
630
- if (isValidationError(error) || error instanceof FetchError) {
631
- return error.message;
632
- }
633
- if (error instanceof Error) {
634
- return `${fallbackMessage}: ${error.message}`;
635
- }
636
- return `${fallbackMessage}: Unknown error`;
637
- }
638
- export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
639
- const message = resolveToolErrorMessage(error, fallbackMessage);
640
- if (error instanceof FetchError) {
641
- return createToolErrorResponse(message, url, {
642
- statusCode: error.statusCode,
643
- details: error.details,
644
- });
645
- }
646
- return createToolErrorResponse(message, url);
197
+ function buildToolAbortSignal(extraSignal) {
198
+ const { timeoutMs } = config.tools;
199
+ if (timeoutMs <= 0)
200
+ return extraSignal;
201
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
202
+ if (!extraSignal)
203
+ return timeoutSignal;
204
+ return AbortSignal.any([extraSignal, timeoutSignal]);
647
205
  }
648
- function normalizeExtractedMetadata(metadata) {
649
- if (!metadata)
650
- return undefined;
651
- const normalized = {
652
- ...(metadata.title ? { title: metadata.title } : {}),
653
- ...(metadata.description ? { description: metadata.description } : {}),
654
- ...(metadata.author ? { author: metadata.author } : {}),
655
- ...(metadata.image ? { image: metadata.image } : {}),
656
- ...(metadata.favicon ? { favicon: metadata.favicon } : {}),
657
- ...(metadata.publishedAt ? { publishedAt: metadata.publishedAt } : {}),
658
- ...(metadata.modifiedAt ? { modifiedAt: metadata.modifiedAt } : {}),
659
- };
660
- if (Object.keys(normalized).length === 0)
661
- return undefined;
662
- return normalized;
206
+ /* -------------------------------------------------------------------------------------------------
207
+ * Structured response assembly
208
+ * ------------------------------------------------------------------------------------------------- */
209
+ function truncateStr(value, max) {
210
+ if (value === undefined || value.length <= max)
211
+ return value;
212
+ return value.slice(0, max);
663
213
  }
664
- const cachedMarkdownSchema = z
665
- .object({
666
- markdown: z.string().optional(),
667
- content: z.string().optional(),
668
- title: z.string().optional(),
669
- metadata: z
670
- .strictObject({
671
- title: z.string().optional(),
672
- description: z.string().optional(),
673
- author: z.string().optional(),
674
- image: z.string().optional(),
675
- favicon: z.string().optional(),
676
- publishedAt: z.string().optional(),
677
- modifiedAt: z.string().optional(),
678
- })
679
- .optional(),
680
- truncated: z.boolean().optional(),
681
- })
682
- .catchall(z.unknown())
683
- .refine((value) => typeof value.markdown === 'string' || typeof value.content === 'string', { message: 'Missing markdown/content' });
684
- export function parseCachedMarkdownResult(cached) {
685
- const parsed = safeJsonParse(cached);
686
- const result = cachedMarkdownSchema.safeParse(parsed);
687
- if (!result.success)
688
- return undefined;
689
- const markdown = result.data.markdown ?? result.data.content;
690
- if (typeof markdown !== 'string')
691
- return undefined;
692
- const metadata = normalizeExtractedMetadata(result.data.metadata);
693
- const truncated = result.data.truncated ?? false;
694
- const persistedMarkdown = truncated
695
- ? appendTruncationMarker(markdown, TRUNCATION_MARKER)
696
- : markdown;
214
+ function truncateMetadata(metadata) {
697
215
  return {
698
- content: persistedMarkdown,
699
- markdown: persistedMarkdown,
700
- title: result.data.title,
701
- ...(metadata ? { metadata } : {}),
702
- truncated,
216
+ ...metadata,
217
+ ...(metadata.title !== undefined
218
+ ? { title: truncateStr(metadata.title, 512) }
219
+ : {}),
220
+ ...(metadata.description !== undefined
221
+ ? { description: truncateStr(metadata.description, 2048) }
222
+ : {}),
223
+ ...(metadata.author !== undefined
224
+ ? { author: truncateStr(metadata.author, 512) }
225
+ : {}),
703
226
  };
704
227
  }
705
- const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
706
- const result = await transformBufferToMarkdown(input.buffer, url, {
707
- includeMetadata: true,
708
- encoding: input.encoding,
709
- ...withSignal(signal),
710
- ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
711
- ...(input.truncated ? { inputTruncated: true } : {}),
712
- });
713
- const truncated = Boolean(result.truncated || input.truncated);
714
- return { ...result, content: result.markdown, truncated };
715
- };
716
- function serializeMarkdownResult(result) {
717
- const persistedMarkdown = result.truncated
718
- ? appendTruncationMarker(result.markdown, TRUNCATION_MARKER)
719
- : result.markdown;
720
- return JSON.stringify({
721
- markdown: persistedMarkdown,
722
- title: result.title,
723
- metadata: result.metadata,
724
- truncated: result.truncated,
725
- });
726
- }
727
- /* -------------------------------------------------------------------------------------------------
728
- * fetch-url tool implementation
729
- * ------------------------------------------------------------------------------------------------- */
730
228
  function buildStructuredContent(pipeline, inlineResult, inputUrl) {
731
229
  const cacheResourceUri = resolveCacheResourceUri(pipeline.cacheKey);
732
230
  const truncated = inlineResult.truncated ?? pipeline.data.truncated;
@@ -738,8 +236,8 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
738
236
  ...(pipeline.finalUrl ? { finalUrl: pipeline.finalUrl } : {}),
739
237
  ...(cacheResourceUri ? { cacheResourceUri } : {}),
740
238
  inputUrl,
741
- title: pipeline.data.title,
742
- ...(metadata ? { metadata } : {}),
239
+ title: truncateStr(pipeline.data.title, 512),
240
+ ...(metadata ? { metadata: truncateMetadata(metadata) } : {}),
743
241
  markdown,
744
242
  fromCache: pipeline.fromCache,
745
243
  fetchedAt: pipeline.fetchedAt,
@@ -780,7 +278,6 @@ function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
780
278
  function buildResponse(pipeline, inlineResult, inputUrl) {
781
279
  const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
782
280
  const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
783
- // Runtime validation guard: verify output matches schema
784
281
  const validation = fetchUrlOutputSchema.safeParse(structuredContent);
785
282
  if (!validation.success) {
786
283
  logWarn('Tool output schema validation failed', {
@@ -793,6 +290,26 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
793
290
  structuredContent,
794
291
  };
795
292
  }
293
+ /* -------------------------------------------------------------------------------------------------
294
+ * fetch-url tool implementation
295
+ * ------------------------------------------------------------------------------------------------- */
296
+ export function getUrlContext(urlStr) {
297
+ try {
298
+ const u = new URL(urlStr);
299
+ const host = u.hostname.replace(/^www\./, '');
300
+ const path = u.pathname;
301
+ if (path === '/' || path === '')
302
+ return host;
303
+ let basename = path.split('/').filter(Boolean).pop();
304
+ if (basename && basename.length > 20) {
305
+ basename = `${basename.substring(0, 17)}...`;
306
+ }
307
+ return basename ? `${host}/…/${basename}` : host;
308
+ }
309
+ catch {
310
+ return 'unknown';
311
+ }
312
+ }
796
313
  async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
797
314
  return performSharedFetch({
798
315
  url,
@@ -802,7 +319,8 @@ async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefre
802
319
  ...(maxInlineChars !== undefined ? { maxInlineChars } : {}),
803
320
  transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
804
321
  if (progress) {
805
- void progress.report(3, 'Transforming content');
322
+ const contextStr = getUrlContext(url);
323
+ void progress.report(2, `fetch-url: ${contextStr} [transforming]`);
806
324
  }
807
325
  return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal, skipNoiseRemoval);
808
326
  },
@@ -817,15 +335,23 @@ async function executeFetch(input, extra) {
817
335
  }
818
336
  const signal = buildToolAbortSignal(extra?.signal);
819
337
  const progress = createProgressReporter(extra);
820
- void progress.report(1, 'Validating URL');
338
+ const contextStr = getUrlContext(url);
339
+ void progress.report(0, `fetch-url: ${contextStr} [starting]`);
821
340
  logDebug('Fetching URL', { url });
822
- void progress.report(2, 'Fetching content');
823
- const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
824
- if (pipeline.fromCache) {
825
- void progress.report(3, 'Using cached content');
341
+ try {
342
+ void progress.report(1, `fetch-url: ${contextStr} [fetching]`);
343
+ const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
344
+ if (pipeline.fromCache) {
345
+ void progress.report(3, `fetch-url: ${contextStr} [using cache]`);
346
+ }
347
+ void progress.report(4, `fetch-url: ${contextStr} • success`);
348
+ return buildResponse(pipeline, inlineResult, url);
349
+ }
350
+ catch (error) {
351
+ const isAbort = error instanceof Error && error.name === 'AbortError';
352
+ void progress.report(4, `fetch-url: ${contextStr} • ${isAbort ? 'cancelled' : 'failed'}`);
353
+ throw error;
826
354
  }
827
- void progress.report(4, 'Finalizing response');
828
- return buildResponse(pipeline, inlineResult, url);
829
355
  }
830
356
  export async function fetchUrlToolHandler(input, extra) {
831
357
  return executeFetch(input, extra).catch((error) => {
@@ -899,3 +425,4 @@ export function registerTools(server) {
899
425
  }, withRequestContextIfMissing(TOOL_DEFINITION.handler));
900
426
  registeredTool.execution = TOOL_DEFINITION.execution;
901
427
  }
428
+ //# sourceMappingURL=tools.js.map