@j0hanz/fetch-url-mcp 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.d.ts +9 -3
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +44 -110
- package/dist/cache.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +9 -4
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +2 -3
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +18 -25
- package/dist/config.js.map +1 -0
- package/dist/crypto.d.ts +1 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/crypto.js +1 -0
- package/dist/crypto.js.map +1 -0
- package/dist/dom-noise-removal.d.ts +2 -1
- package/dist/dom-noise-removal.d.ts.map +1 -0
- package/dist/dom-noise-removal.js +8 -4
- package/dist/dom-noise-removal.js.map +1 -0
- package/dist/download.d.ts +4 -0
- package/dist/download.d.ts.map +1 -0
- package/dist/download.js +106 -0
- package/dist/download.js.map +1 -0
- package/dist/errors.d.ts +1 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +1 -0
- package/dist/errors.js.map +1 -0
- package/dist/examples/mcp-fetch-url-client.js +19 -3
- package/dist/examples/mcp-fetch-url-client.js.map +1 -1
- package/dist/fetch-content.d.ts +1 -0
- package/dist/fetch-content.d.ts.map +1 -0
- package/dist/fetch-content.js +14 -14
- package/dist/fetch-content.js.map +1 -0
- package/dist/fetch-stream.d.ts +1 -0
- package/dist/fetch-stream.d.ts.map +1 -0
- package/dist/fetch-stream.js +6 -3
- package/dist/fetch-stream.js.map +1 -0
- package/dist/fetch.d.ts +1 -0
- package/dist/fetch.d.ts.map +1 -0
- package/dist/fetch.js +120 -51
- package/dist/fetch.js.map +1 -0
- package/dist/host-normalization.d.ts +1 -0
- package/dist/host-normalization.d.ts.map +1 -0
- package/dist/host-normalization.js +19 -6
- package/dist/host-normalization.js.map +1 -0
- package/dist/http/auth.d.ts +35 -0
- package/dist/http/auth.d.ts.map +1 -0
- package/dist/http/auth.js +283 -0
- package/dist/http/auth.js.map +1 -0
- package/dist/http/health.d.ts +7 -0
- package/dist/http/health.d.ts.map +1 -0
- package/dist/http/health.js +166 -0
- package/dist/http/health.js.map +1 -0
- package/dist/http/helpers.d.ts +58 -0
- package/dist/http/helpers.d.ts.map +1 -0
- package/dist/http/helpers.js +372 -0
- package/dist/http/helpers.js.map +1 -0
- package/dist/{http-native.d.ts → http/native.d.ts} +1 -0
- package/dist/http/native.d.ts.map +1 -0
- package/dist/http/native.js +529 -0
- package/dist/http/native.js.map +1 -0
- package/dist/http/rate-limit.d.ts +13 -0
- package/dist/http/rate-limit.d.ts.map +1 -0
- package/dist/http/rate-limit.js +81 -0
- package/dist/http/rate-limit.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -0
- package/dist/instructions.d.ts +2 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +108 -0
- package/dist/instructions.js.map +1 -0
- package/dist/ip-blocklist.d.ts +1 -0
- package/dist/ip-blocklist.d.ts.map +1 -0
- package/dist/ip-blocklist.js +2 -0
- package/dist/ip-blocklist.js.map +1 -0
- package/dist/json.d.ts +2 -1
- package/dist/json.d.ts.map +1 -0
- package/dist/json.js +19 -6
- package/dist/json.js.map +1 -0
- package/dist/language-detection.d.ts +1 -0
- package/dist/language-detection.d.ts.map +1 -0
- package/dist/language-detection.js +1 -0
- package/dist/language-detection.js.map +1 -0
- package/dist/markdown-cleanup.d.ts +2 -1
- package/dist/markdown-cleanup.d.ts.map +1 -0
- package/dist/markdown-cleanup.js +51 -52
- package/dist/markdown-cleanup.js.map +1 -0
- package/dist/mcp-validator.d.ts +1 -0
- package/dist/mcp-validator.d.ts.map +1 -0
- package/dist/mcp-validator.js +16 -8
- package/dist/mcp-validator.js.map +1 -0
- package/dist/mcp.d.ts +2 -2
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +17 -333
- package/dist/mcp.js.map +1 -0
- package/dist/observability.d.ts +2 -0
- package/dist/observability.d.ts.map +1 -0
- package/dist/observability.js +30 -5
- package/dist/observability.js.map +1 -0
- package/dist/prompts.d.ts +1 -0
- package/dist/prompts.d.ts.map +1 -0
- package/dist/prompts.js +15 -3
- package/dist/prompts.js.map +1 -0
- package/dist/resources.d.ts +1 -0
- package/dist/resources.d.ts.map +1 -0
- package/dist/resources.js +30 -23
- package/dist/resources.js.map +1 -0
- package/dist/server-tuning.d.ts +1 -0
- package/dist/server-tuning.d.ts.map +1 -0
- package/dist/server-tuning.js +11 -15
- package/dist/server-tuning.js.map +1 -0
- package/dist/server.d.ts +1 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +23 -23
- package/dist/server.js.map +1 -0
- package/dist/session.d.ts +1 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +55 -28
- package/dist/session.js.map +1 -0
- package/dist/tasks/execution.d.ts +42 -0
- package/dist/tasks/execution.d.ts.map +1 -0
- package/dist/tasks/execution.js +232 -0
- package/dist/tasks/execution.js.map +1 -0
- package/dist/{tasks.d.ts → tasks/manager.d.ts} +6 -0
- package/dist/tasks/manager.d.ts.map +1 -0
- package/dist/{tasks.js → tasks/manager.js} +86 -37
- package/dist/tasks/manager.js.map +1 -0
- package/dist/tasks/owner.d.ts +33 -0
- package/dist/tasks/owner.d.ts.map +1 -0
- package/dist/tasks/owner.js +99 -0
- package/dist/tasks/owner.js.map +1 -0
- package/dist/timer-utils.d.ts +1 -0
- package/dist/timer-utils.d.ts.map +1 -0
- package/dist/timer-utils.js +12 -5
- package/dist/timer-utils.js.map +1 -0
- package/dist/tool-errors.d.ts +12 -0
- package/dist/tool-errors.d.ts.map +1 -0
- package/dist/tool-errors.js +52 -0
- package/dist/tool-errors.js.map +1 -0
- package/dist/tool-pipeline.d.ts +72 -0
- package/dist/tool-pipeline.d.ts.map +1 -0
- package/dist/tool-pipeline.js +407 -0
- package/dist/tool-pipeline.js.map +1 -0
- package/dist/tool-progress.d.ts +32 -0
- package/dist/tool-progress.d.ts.map +1 -0
- package/dist/tool-progress.js +123 -0
- package/dist/tool-progress.js.map +1 -0
- package/dist/tools.d.ts +35 -111
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +93 -566
- package/dist/tools.js.map +1 -0
- package/dist/{transform.d.ts → transform/transform.d.ts} +2 -1
- package/dist/transform/transform.d.ts.map +1 -0
- package/dist/{transform.js → transform/transform.js} +73 -769
- package/dist/transform/transform.js.map +1 -0
- package/dist/{transform-types.d.ts → transform/types.d.ts} +1 -0
- package/dist/transform/types.d.ts.map +1 -0
- package/dist/{transform-types.js → transform/types.js} +1 -0
- package/dist/transform/types.js.map +1 -0
- package/dist/transform/worker-pool.d.ts +93 -0
- package/dist/transform/worker-pool.d.ts.map +1 -0
- package/dist/transform/worker-pool.js +759 -0
- package/dist/transform/worker-pool.js.map +1 -0
- package/dist/transform/workers/transform-child.d.ts +2 -0
- package/dist/transform/workers/transform-child.d.ts.map +1 -0
- package/dist/{workers → transform/workers}/transform-child.js +3 -1
- package/dist/transform/workers/transform-child.js.map +1 -0
- package/dist/transform/workers/transform-worker.d.ts +2 -0
- package/dist/transform/workers/transform-worker.d.ts.map +1 -0
- package/dist/{workers → transform/workers}/transform-worker.js +2 -1
- package/dist/transform/workers/transform-worker.js.map +1 -0
- package/dist/type-guards.d.ts +1 -0
- package/dist/type-guards.d.ts.map +1 -0
- package/dist/type-guards.js +1 -0
- package/dist/type-guards.js.map +1 -0
- package/package.json +6 -7
- package/dist/AGENTS.md +0 -152
- package/dist/http-native.js +0 -1320
- package/dist/instructions.md +0 -113
- package/dist/workers/transform-child.d.ts +0 -1
- package/dist/workers/transform-worker.d.ts +0 -1
package/dist/tools.js
CHANGED
|
@@ -2,20 +2,22 @@ import { randomUUID } from 'node:crypto';
|
|
|
2
2
|
import { z } from 'zod';
|
|
3
3
|
import * as cache from './cache.js';
|
|
4
4
|
import { config } from './config.js';
|
|
5
|
-
import {
|
|
6
|
-
import { fetchNormalizedUrlBuffer, normalizeUrl, transformToRawUrl, } from './fetch.js';
|
|
5
|
+
import { generateSafeFilename } from './download.js';
|
|
7
6
|
import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
|
|
8
|
-
import {
|
|
7
|
+
import { createToolErrorResponse, handleToolError } from './tool-errors.js';
|
|
8
|
+
import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord, readString, serializeMarkdownResult, TRUNCATION_MARKER, withSignal, } from './tool-pipeline.js';
|
|
9
|
+
import { createProgressReporter, } from './tool-progress.js';
|
|
9
10
|
import { isObject } from './type-guards.js';
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
// Re-export public API so existing consumers keep working.
|
|
12
|
+
export { createToolErrorResponse, handleToolError } from './tool-errors.js';
|
|
13
|
+
export { executeFetchPipeline, parseCachedMarkdownResult, performSharedFetch, } from './tool-pipeline.js';
|
|
14
|
+
export { createProgressReporter, } from './tool-progress.js';
|
|
13
15
|
export const fetchUrlInputSchema = z.strictObject({
|
|
14
16
|
url: z
|
|
15
17
|
.url({ protocol: /^https?$/i })
|
|
16
18
|
.min(1)
|
|
17
19
|
.max(config.constants.maxUrlLength)
|
|
18
|
-
.describe(
|
|
20
|
+
.describe(`The URL of the webpage to fetch and convert to Markdown. Max ${config.constants.maxUrlLength} characters.`),
|
|
19
21
|
skipNoiseRemoval: z
|
|
20
22
|
.boolean()
|
|
21
23
|
.optional()
|
|
@@ -30,9 +32,9 @@ export const fetchUrlInputSchema = z.strictObject({
|
|
|
30
32
|
.min(0)
|
|
31
33
|
.max(config.constants.maxHtmlSize)
|
|
32
34
|
.optional()
|
|
33
|
-
.describe(
|
|
35
|
+
.describe(`Optional per-call inline markdown limit (0 to ${config.constants.maxHtmlSize}). 0 means unlimited. If a global inline limit is configured, the lower value is used.`),
|
|
34
36
|
});
|
|
35
|
-
const fetchUrlOutputSchema = z.strictObject({
|
|
37
|
+
export const fetchUrlOutputSchema = z.strictObject({
|
|
36
38
|
url: z
|
|
37
39
|
.string()
|
|
38
40
|
.min(1)
|
|
@@ -95,7 +97,7 @@ const fetchUrlOutputSchema = z.strictObject({
|
|
|
95
97
|
? z.string().max(config.constants.maxInlineContentChars)
|
|
96
98
|
: z.string())
|
|
97
99
|
.optional()
|
|
98
|
-
.describe('The extracted content in Markdown format'),
|
|
100
|
+
.describe('The extracted content in Markdown format. May be truncated if exceeding inline limits; check "truncated" field'),
|
|
99
101
|
fromCache: z
|
|
100
102
|
.boolean()
|
|
101
103
|
.optional()
|
|
@@ -116,20 +118,6 @@ const fetchUrlOutputSchema = z.strictObject({
|
|
|
116
118
|
.boolean()
|
|
117
119
|
.optional()
|
|
118
120
|
.describe('Whether the returned markdown was truncated'),
|
|
119
|
-
error: z
|
|
120
|
-
.string()
|
|
121
|
-
.max(2048)
|
|
122
|
-
.optional()
|
|
123
|
-
.describe('Error message if the request failed'),
|
|
124
|
-
statusCode: z
|
|
125
|
-
.number()
|
|
126
|
-
.int()
|
|
127
|
-
.optional()
|
|
128
|
-
.describe('HTTP status code for failed requests'),
|
|
129
|
-
details: z
|
|
130
|
-
.record(z.string(), z.unknown())
|
|
131
|
-
.optional()
|
|
132
|
-
.describe('Additional error details when available'),
|
|
133
121
|
});
|
|
134
122
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
135
123
|
const FETCH_URL_TOOL_DESCRIPTION = `
|
|
@@ -140,262 +128,21 @@ This tool is useful for:
|
|
|
140
128
|
- Extracting main content while removing navigation and ads (noise removal).
|
|
141
129
|
- Caching content to speed up repeated queries.
|
|
142
130
|
|
|
131
|
+
Key behaviors:
|
|
132
|
+
- GitHub, GitLab, and Bitbucket URLs are auto-transformed to raw content endpoints; check resolvedUrl.
|
|
133
|
+
- If truncated is true in the response, use cacheResourceUri with resources/read to retrieve the full content.
|
|
134
|
+
- For long-running fetches or large pages, invoke with task: {} to get a taskId and poll tasks/get until complete.
|
|
135
|
+
|
|
143
136
|
Limitations:
|
|
144
|
-
-
|
|
145
|
-
-
|
|
137
|
+
- Does not execute client-side JavaScript; JS-rendered pages may be incomplete.
|
|
138
|
+
- If the error code is queue_full, the worker pool is busy — retry the call using task mode (task: {}) instead.
|
|
146
139
|
`.trim();
|
|
147
|
-
// Specific icon for the fetch-url tool (download cloud / web)
|
|
148
140
|
const TOOL_ICON = {
|
|
149
141
|
src: 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSJjdXJyZW50Q29sb3IiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMjEgMTV2NGEyIDIgMCAwIDEtMiAySDVhMiAyIDAgMCAxLTItMnYtNCIvPjxwb2x5bGluZSBwb2ludHM9IjcgMTAgMTIgMTUgMTcgMTAiLz48bGluZSB4MT0iMTIiIHkxPSIxNSIgeDI9IjEyIiB5Mj0iMyIvPjwvc3ZnPg==',
|
|
150
142
|
mimeType: 'image/svg+xml',
|
|
151
143
|
};
|
|
152
|
-
function asRecord(value) {
|
|
153
|
-
return isObject(value) ? value : undefined;
|
|
154
|
-
}
|
|
155
|
-
function readUnknown(obj, key) {
|
|
156
|
-
const record = asRecord(obj);
|
|
157
|
-
return record ? record[key] : undefined;
|
|
158
|
-
}
|
|
159
|
-
function readString(obj, key) {
|
|
160
|
-
const value = readUnknown(obj, key);
|
|
161
|
-
return typeof value === 'string' ? value : undefined;
|
|
162
|
-
}
|
|
163
|
-
function readNestedRecord(obj, keys) {
|
|
164
|
-
let current = obj;
|
|
165
|
-
for (const key of keys) {
|
|
166
|
-
current = readUnknown(current, key);
|
|
167
|
-
if (current === undefined)
|
|
168
|
-
return undefined;
|
|
169
|
-
}
|
|
170
|
-
return asRecord(current);
|
|
171
|
-
}
|
|
172
|
-
function safeJsonParse(value) {
|
|
173
|
-
try {
|
|
174
|
-
return JSON.parse(value);
|
|
175
|
-
}
|
|
176
|
-
catch {
|
|
177
|
-
return undefined;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
function withSignal(signal) {
|
|
181
|
-
return signal === undefined ? {} : { signal };
|
|
182
|
-
}
|
|
183
|
-
function buildToolAbortSignal(extraSignal) {
|
|
184
|
-
const { timeoutMs } = config.tools;
|
|
185
|
-
if (timeoutMs <= 0)
|
|
186
|
-
return extraSignal;
|
|
187
|
-
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
188
|
-
if (!extraSignal)
|
|
189
|
-
return timeoutSignal;
|
|
190
|
-
return AbortSignal.any([extraSignal, timeoutSignal]);
|
|
191
|
-
}
|
|
192
|
-
/* -------------------------------------------------------------------------------------------------
|
|
193
|
-
* Progress reporting
|
|
194
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
195
|
-
function resolveRelatedTaskMeta(meta) {
|
|
196
|
-
const related = readUnknown(meta, 'io.modelcontextprotocol/related-task');
|
|
197
|
-
const taskId = readString(related, 'taskId');
|
|
198
|
-
return taskId ? { taskId } : undefined;
|
|
199
|
-
}
|
|
200
|
-
class ToolProgressReporter {
|
|
201
|
-
token;
|
|
202
|
-
sendNotification;
|
|
203
|
-
relatedTaskMeta;
|
|
204
|
-
onProgress;
|
|
205
|
-
reportQueue = Promise.resolve();
|
|
206
|
-
constructor(token, sendNotification, relatedTaskMeta, onProgress) {
|
|
207
|
-
this.token = token;
|
|
208
|
-
this.sendNotification = sendNotification;
|
|
209
|
-
this.relatedTaskMeta = relatedTaskMeta;
|
|
210
|
-
this.onProgress = onProgress;
|
|
211
|
-
}
|
|
212
|
-
static create(extra) {
|
|
213
|
-
const token = extra?._meta?.progressToken ?? null;
|
|
214
|
-
const sendNotification = extra?.sendNotification;
|
|
215
|
-
const relatedTaskMeta = resolveRelatedTaskMeta(extra?._meta);
|
|
216
|
-
const onProgress = extra?.onProgress;
|
|
217
|
-
if (token === null && !onProgress) {
|
|
218
|
-
return { report: async () => { } };
|
|
219
|
-
}
|
|
220
|
-
return new ToolProgressReporter(token, sendNotification, relatedTaskMeta, onProgress);
|
|
221
|
-
}
|
|
222
|
-
async report(progress, message) {
|
|
223
|
-
if (this.onProgress) {
|
|
224
|
-
try {
|
|
225
|
-
this.onProgress(progress, message);
|
|
226
|
-
}
|
|
227
|
-
catch (error) {
|
|
228
|
-
logWarn('Progress callback failed', {
|
|
229
|
-
error: getErrorMessage(error),
|
|
230
|
-
progress,
|
|
231
|
-
message,
|
|
232
|
-
});
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
if (this.token === null || !this.sendNotification)
|
|
236
|
-
return;
|
|
237
|
-
const { sendNotification } = this;
|
|
238
|
-
const notification = {
|
|
239
|
-
method: 'notifications/progress',
|
|
240
|
-
params: {
|
|
241
|
-
progressToken: this.token,
|
|
242
|
-
progress,
|
|
243
|
-
total: FETCH_PROGRESS_TOTAL,
|
|
244
|
-
message,
|
|
245
|
-
...(this.relatedTaskMeta
|
|
246
|
-
? {
|
|
247
|
-
_meta: {
|
|
248
|
-
'io.modelcontextprotocol/related-task': this.relatedTaskMeta,
|
|
249
|
-
},
|
|
250
|
-
}
|
|
251
|
-
: {}),
|
|
252
|
-
},
|
|
253
|
-
};
|
|
254
|
-
this.reportQueue = this.reportQueue.then(async () => {
|
|
255
|
-
let timeoutId;
|
|
256
|
-
const timeoutPromise = new Promise((resolve) => {
|
|
257
|
-
timeoutId = setTimeout(() => {
|
|
258
|
-
resolve({ timeout: true });
|
|
259
|
-
}, PROGRESS_NOTIFICATION_TIMEOUT_MS);
|
|
260
|
-
timeoutId.unref();
|
|
261
|
-
});
|
|
262
|
-
try {
|
|
263
|
-
const outcome = await Promise.race([
|
|
264
|
-
sendNotification(notification).then(() => ({ ok: true })),
|
|
265
|
-
timeoutPromise,
|
|
266
|
-
]);
|
|
267
|
-
if ('timeout' in outcome) {
|
|
268
|
-
logWarn('Progress notification timed out', { progress, message });
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
catch (error) {
|
|
272
|
-
logWarn('Failed to send progress notification', {
|
|
273
|
-
error: getErrorMessage(error),
|
|
274
|
-
progress,
|
|
275
|
-
message,
|
|
276
|
-
});
|
|
277
|
-
}
|
|
278
|
-
finally {
|
|
279
|
-
if (timeoutId)
|
|
280
|
-
clearTimeout(timeoutId);
|
|
281
|
-
}
|
|
282
|
-
});
|
|
283
|
-
await this.reportQueue;
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
export function createProgressReporter(extra) {
|
|
287
|
-
return ToolProgressReporter.create(extra);
|
|
288
|
-
}
|
|
289
|
-
function getOpenCodeFence(content) {
|
|
290
|
-
const FENCE_PATTERN = /^([ \t]*)(`{3,}|~{3,})/gm;
|
|
291
|
-
let match;
|
|
292
|
-
let inFence = false;
|
|
293
|
-
let fenceChar = null;
|
|
294
|
-
let fenceLength = 0;
|
|
295
|
-
while ((match = FENCE_PATTERN.exec(content)) !== null) {
|
|
296
|
-
const marker = match[2];
|
|
297
|
-
if (!marker)
|
|
298
|
-
continue;
|
|
299
|
-
const [char] = marker;
|
|
300
|
-
if (!char)
|
|
301
|
-
continue;
|
|
302
|
-
const { length } = marker;
|
|
303
|
-
if (!inFence) {
|
|
304
|
-
inFence = true;
|
|
305
|
-
fenceChar = char;
|
|
306
|
-
fenceLength = length;
|
|
307
|
-
}
|
|
308
|
-
else if (char === fenceChar && length >= fenceLength) {
|
|
309
|
-
inFence = false;
|
|
310
|
-
fenceChar = null;
|
|
311
|
-
fenceLength = 0;
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
if (inFence && fenceChar) {
|
|
315
|
-
return { fenceChar, fenceLength };
|
|
316
|
-
}
|
|
317
|
-
return null;
|
|
318
|
-
}
|
|
319
|
-
function findSafeLinkBoundary(content, limit) {
|
|
320
|
-
const lastBracket = content.lastIndexOf('[', limit);
|
|
321
|
-
if (lastBracket === -1)
|
|
322
|
-
return limit;
|
|
323
|
-
const afterBracket = content.substring(lastBracket, limit);
|
|
324
|
-
const closedPattern = /^\[[^\]]*\]\([^)]*\)/;
|
|
325
|
-
if (closedPattern.test(afterBracket))
|
|
326
|
-
return limit;
|
|
327
|
-
const start = lastBracket > 0 && content[lastBracket - 1] === '!'
|
|
328
|
-
? lastBracket - 1
|
|
329
|
-
: lastBracket;
|
|
330
|
-
return start;
|
|
331
|
-
}
|
|
332
|
-
function truncateWithMarker(content, limit, marker) {
|
|
333
|
-
if (content.length <= limit)
|
|
334
|
-
return content;
|
|
335
|
-
const maxContentLength = Math.max(0, limit - marker.length);
|
|
336
|
-
const tentativeContent = content.substring(0, maxContentLength);
|
|
337
|
-
const openFence = getOpenCodeFence(tentativeContent);
|
|
338
|
-
if (openFence) {
|
|
339
|
-
const fenceCloser = `\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`;
|
|
340
|
-
const adjustedLength = Math.max(0, limit - marker.length - fenceCloser.length);
|
|
341
|
-
return `${content.substring(0, adjustedLength)}${fenceCloser}${marker}`;
|
|
342
|
-
}
|
|
343
|
-
const safeBoundary = findSafeLinkBoundary(content, maxContentLength);
|
|
344
|
-
if (safeBoundary < maxContentLength) {
|
|
345
|
-
return `${content.substring(0, safeBoundary)}${marker}`;
|
|
346
|
-
}
|
|
347
|
-
return `${tentativeContent}${marker}`;
|
|
348
|
-
}
|
|
349
|
-
function appendTruncationMarker(content, marker) {
|
|
350
|
-
if (!content)
|
|
351
|
-
return marker;
|
|
352
|
-
if (content.endsWith(marker))
|
|
353
|
-
return content;
|
|
354
|
-
const openFence = getOpenCodeFence(content);
|
|
355
|
-
const contentWithFence = openFence
|
|
356
|
-
? `${content}\n${openFence.fenceChar.repeat(openFence.fenceLength)}\n`
|
|
357
|
-
: content;
|
|
358
|
-
const safeBoundary = findSafeLinkBoundary(contentWithFence, contentWithFence.length);
|
|
359
|
-
if (safeBoundary < contentWithFence.length) {
|
|
360
|
-
return `${contentWithFence.substring(0, safeBoundary)}${marker}`;
|
|
361
|
-
}
|
|
362
|
-
return `${contentWithFence}${marker}`;
|
|
363
|
-
}
|
|
364
|
-
class InlineContentLimiter {
|
|
365
|
-
apply(content, inlineLimitOverride) {
|
|
366
|
-
const contentSize = content.length;
|
|
367
|
-
const inlineLimit = this.resolveInlineLimit(inlineLimitOverride);
|
|
368
|
-
if (inlineLimit <= 0) {
|
|
369
|
-
return { content, contentSize };
|
|
370
|
-
}
|
|
371
|
-
if (contentSize <= inlineLimit) {
|
|
372
|
-
return { content, contentSize };
|
|
373
|
-
}
|
|
374
|
-
const truncatedContent = truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER);
|
|
375
|
-
return {
|
|
376
|
-
content: truncatedContent,
|
|
377
|
-
contentSize,
|
|
378
|
-
truncated: true,
|
|
379
|
-
};
|
|
380
|
-
}
|
|
381
|
-
resolveInlineLimit(inlineLimitOverride) {
|
|
382
|
-
const globalLimit = config.constants.maxInlineContentChars;
|
|
383
|
-
if (inlineLimitOverride === undefined)
|
|
384
|
-
return globalLimit;
|
|
385
|
-
if (globalLimit > 0 && inlineLimitOverride > 0) {
|
|
386
|
-
return Math.min(inlineLimitOverride, globalLimit);
|
|
387
|
-
}
|
|
388
|
-
if (globalLimit > 0 && inlineLimitOverride === 0)
|
|
389
|
-
return globalLimit;
|
|
390
|
-
return inlineLimitOverride;
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
const inlineLimiter = new InlineContentLimiter();
|
|
394
|
-
function applyInlineContentLimit(content, inlineLimitOverride) {
|
|
395
|
-
return inlineLimiter.apply(content, inlineLimitOverride);
|
|
396
|
-
}
|
|
397
144
|
/* -------------------------------------------------------------------------------------------------
|
|
398
|
-
* Tool response
|
|
145
|
+
* Tool response builders
|
|
399
146
|
* ------------------------------------------------------------------------------------------------- */
|
|
400
147
|
function buildTextBlock(structuredContent) {
|
|
401
148
|
return {
|
|
@@ -406,7 +153,7 @@ function buildTextBlock(structuredContent) {
|
|
|
406
153
|
function buildEmbeddedResource(content, url, title) {
|
|
407
154
|
if (!content)
|
|
408
155
|
return null;
|
|
409
|
-
const filename =
|
|
156
|
+
const filename = generateSafeFilename(url, title, undefined, '.md');
|
|
410
157
|
const uri = new URL(filename, 'file:///').href;
|
|
411
158
|
const resource = {
|
|
412
159
|
uri,
|
|
@@ -436,297 +183,48 @@ function buildCacheResourceLink(cacheResourceUri, contentSize, fetchedAt) {
|
|
|
436
183
|
}
|
|
437
184
|
function buildToolContentBlocks(structuredContent, resourceLink, embeddedResource) {
|
|
438
185
|
const blocks = [buildTextBlock(structuredContent)];
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
}
|
|
442
|
-
if (embeddedResource) {
|
|
443
|
-
blocks.push(embeddedResource);
|
|
444
|
-
}
|
|
186
|
+
appendIfPresent(blocks, resourceLink);
|
|
187
|
+
appendIfPresent(blocks, embeddedResource);
|
|
445
188
|
return blocks;
|
|
446
189
|
}
|
|
447
|
-
function
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
if (!transformedResult.transformed) {
|
|
451
|
-
return {
|
|
452
|
-
normalizedUrl: validatedUrl,
|
|
453
|
-
originalUrl: validatedUrl,
|
|
454
|
-
transformed: false,
|
|
455
|
-
};
|
|
456
|
-
}
|
|
457
|
-
// Re-validate transformed URLs so blocked-host and length policies still apply.
|
|
458
|
-
const { normalizedUrl: transformedUrl } = normalizeUrl(transformedResult.url);
|
|
459
|
-
return {
|
|
460
|
-
normalizedUrl: transformedUrl,
|
|
461
|
-
originalUrl: validatedUrl,
|
|
462
|
-
transformed: true,
|
|
463
|
-
};
|
|
464
|
-
}
|
|
465
|
-
function logRawUrlTransformation(resolvedUrl) {
|
|
466
|
-
if (!resolvedUrl.transformed)
|
|
467
|
-
return;
|
|
468
|
-
logDebug('Using transformed raw content URL', {
|
|
469
|
-
original: resolvedUrl.originalUrl,
|
|
470
|
-
});
|
|
471
|
-
}
|
|
472
|
-
function extractTitle(value) {
|
|
473
|
-
const record = asRecord(value);
|
|
474
|
-
const title = record ? record['title'] : undefined;
|
|
475
|
-
return typeof title === 'string' ? title : undefined;
|
|
476
|
-
}
|
|
477
|
-
function logCacheMiss(reason, cacheNamespace, normalizedUrl, error) {
|
|
478
|
-
const log = reason.startsWith('deserialize') ? logWarn : logDebug;
|
|
479
|
-
log(`Cache miss due to ${reason}`, {
|
|
480
|
-
namespace: cacheNamespace,
|
|
481
|
-
url: normalizedUrl,
|
|
482
|
-
...(error ? { error: getErrorMessage(error) } : {}),
|
|
483
|
-
});
|
|
484
|
-
}
|
|
485
|
-
function attemptCacheRetrieval(params) {
|
|
486
|
-
const { cacheKey, deserialize, cacheNamespace, normalizedUrl } = params;
|
|
487
|
-
if (!cacheKey)
|
|
488
|
-
return null;
|
|
489
|
-
const cached = cache.get(cacheKey);
|
|
490
|
-
if (!cached)
|
|
491
|
-
return null;
|
|
492
|
-
if (!deserialize) {
|
|
493
|
-
logCacheMiss('missing deserializer', cacheNamespace, normalizedUrl);
|
|
494
|
-
return null;
|
|
495
|
-
}
|
|
496
|
-
let data;
|
|
497
|
-
try {
|
|
498
|
-
data = deserialize(cached.content);
|
|
499
|
-
}
|
|
500
|
-
catch (error) {
|
|
501
|
-
logCacheMiss('deserialize exception', cacheNamespace, normalizedUrl, error);
|
|
502
|
-
return null;
|
|
503
|
-
}
|
|
504
|
-
if (data === undefined) {
|
|
505
|
-
logCacheMiss('deserialize failure', cacheNamespace, normalizedUrl);
|
|
506
|
-
return null;
|
|
507
|
-
}
|
|
508
|
-
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
509
|
-
const finalUrl = cached.url !== normalizedUrl ? cached.url : undefined;
|
|
510
|
-
return {
|
|
511
|
-
data,
|
|
512
|
-
fromCache: true,
|
|
513
|
-
url: normalizedUrl,
|
|
514
|
-
...(finalUrl ? { finalUrl } : {}),
|
|
515
|
-
fetchedAt: cached.fetchedAt,
|
|
516
|
-
cacheKey,
|
|
517
|
-
};
|
|
518
|
-
}
|
|
519
|
-
function persistCache(params) {
|
|
520
|
-
const { cacheKey, data, serialize, normalizedUrl, cacheNamespace, force } = params;
|
|
521
|
-
if (!cacheKey)
|
|
522
|
-
return;
|
|
523
|
-
const serializer = serialize ?? JSON.stringify;
|
|
524
|
-
const title = extractTitle(data);
|
|
525
|
-
const metadata = {
|
|
526
|
-
url: normalizedUrl,
|
|
527
|
-
...(title === undefined ? {} : { title }),
|
|
528
|
-
};
|
|
529
|
-
try {
|
|
530
|
-
cache.set(cacheKey, serializer(data), metadata, force ? { force: true } : undefined);
|
|
531
|
-
}
|
|
532
|
-
catch (error) {
|
|
533
|
-
logWarn('Failed to persist cache entry', {
|
|
534
|
-
namespace: cacheNamespace,
|
|
535
|
-
url: normalizedUrl,
|
|
536
|
-
error: getErrorMessage(error),
|
|
537
|
-
});
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
export async function executeFetchPipeline(options) {
|
|
541
|
-
const resolvedUrl = resolveNormalizedUrl(options.url);
|
|
542
|
-
logRawUrlTransformation(resolvedUrl);
|
|
543
|
-
const cacheKey = cache.createCacheKey(options.cacheNamespace, resolvedUrl.normalizedUrl, options.cacheVary);
|
|
544
|
-
if (!options.forceRefresh) {
|
|
545
|
-
const cachedResult = attemptCacheRetrieval({
|
|
546
|
-
cacheKey,
|
|
547
|
-
deserialize: options.deserialize,
|
|
548
|
-
cacheNamespace: options.cacheNamespace,
|
|
549
|
-
normalizedUrl: resolvedUrl.normalizedUrl,
|
|
550
|
-
});
|
|
551
|
-
if (cachedResult) {
|
|
552
|
-
return { ...cachedResult, originalUrl: resolvedUrl.originalUrl };
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
logDebug('Fetching URL', { url: resolvedUrl.normalizedUrl });
|
|
556
|
-
const { buffer, encoding, truncated, finalUrl } = await fetchNormalizedUrlBuffer(resolvedUrl.normalizedUrl, withSignal(options.signal));
|
|
557
|
-
const transformUrl = finalUrl || resolvedUrl.normalizedUrl;
|
|
558
|
-
const data = await options.transform({ buffer, encoding, ...(truncated ? { truncated: true } : {}) }, transformUrl);
|
|
559
|
-
if (cache.isEnabled()) {
|
|
560
|
-
persistCache({
|
|
561
|
-
cacheKey,
|
|
562
|
-
data,
|
|
563
|
-
serialize: options.serialize,
|
|
564
|
-
normalizedUrl: finalUrl || resolvedUrl.normalizedUrl,
|
|
565
|
-
cacheNamespace: options.cacheNamespace,
|
|
566
|
-
});
|
|
567
|
-
if (finalUrl && finalUrl !== resolvedUrl.normalizedUrl) {
|
|
568
|
-
const finalCacheKey = cache.createCacheKey(options.cacheNamespace, finalUrl, options.cacheVary);
|
|
569
|
-
if (finalCacheKey && finalCacheKey !== cacheKey) {
|
|
570
|
-
persistCache({
|
|
571
|
-
cacheKey: finalCacheKey,
|
|
572
|
-
data,
|
|
573
|
-
serialize: options.serialize,
|
|
574
|
-
normalizedUrl: finalUrl,
|
|
575
|
-
cacheNamespace: options.cacheNamespace,
|
|
576
|
-
});
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
}
|
|
580
|
-
return {
|
|
581
|
-
data,
|
|
582
|
-
fromCache: false,
|
|
583
|
-
url: resolvedUrl.normalizedUrl,
|
|
584
|
-
originalUrl: resolvedUrl.originalUrl,
|
|
585
|
-
finalUrl,
|
|
586
|
-
fetchedAt: new Date().toISOString(),
|
|
587
|
-
cacheKey,
|
|
588
|
-
};
|
|
589
|
-
}
|
|
590
|
-
export async function performSharedFetch(options, deps = {}) {
|
|
591
|
-
const executePipeline = deps.executeFetchPipeline ?? executeFetchPipeline;
|
|
592
|
-
const pipelineOptions = {
|
|
593
|
-
url: options.url,
|
|
594
|
-
cacheNamespace: 'markdown',
|
|
595
|
-
...withSignal(options.signal),
|
|
596
|
-
...(options.cacheVary ? { cacheVary: options.cacheVary } : {}),
|
|
597
|
-
...(options.forceRefresh ? { forceRefresh: true } : {}),
|
|
598
|
-
transform: options.transform,
|
|
599
|
-
...(options.serialize ? { serialize: options.serialize } : {}),
|
|
600
|
-
...(options.deserialize ? { deserialize: options.deserialize } : {}),
|
|
601
|
-
};
|
|
602
|
-
const pipeline = await executePipeline(pipelineOptions);
|
|
603
|
-
const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
|
|
604
|
-
return { pipeline, inlineResult };
|
|
190
|
+
function appendIfPresent(items, value) {
|
|
191
|
+
if (value !== null && value !== undefined)
|
|
192
|
+
items.push(value);
|
|
605
193
|
}
|
|
606
194
|
/* -------------------------------------------------------------------------------------------------
|
|
607
|
-
* Tool
|
|
195
|
+
* Tool abort signal
|
|
608
196
|
* ------------------------------------------------------------------------------------------------- */
|
|
609
|
-
|
|
610
|
-
const
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
};
|
|
618
|
-
return {
|
|
619
|
-
content: [buildTextBlock(structuredContent)],
|
|
620
|
-
structuredContent,
|
|
621
|
-
isError: true,
|
|
622
|
-
};
|
|
623
|
-
}
|
|
624
|
-
function isValidationError(error) {
|
|
625
|
-
return (error instanceof Error &&
|
|
626
|
-
isSystemError(error) &&
|
|
627
|
-
error.code === 'VALIDATION_ERROR');
|
|
628
|
-
}
|
|
629
|
-
function resolveToolErrorMessage(error, fallbackMessage) {
|
|
630
|
-
if (isValidationError(error) || error instanceof FetchError) {
|
|
631
|
-
return error.message;
|
|
632
|
-
}
|
|
633
|
-
if (error instanceof Error) {
|
|
634
|
-
return `${fallbackMessage}: ${error.message}`;
|
|
635
|
-
}
|
|
636
|
-
return `${fallbackMessage}: Unknown error`;
|
|
637
|
-
}
|
|
638
|
-
export function handleToolError(error, url, fallbackMessage = 'Operation failed') {
|
|
639
|
-
const message = resolveToolErrorMessage(error, fallbackMessage);
|
|
640
|
-
if (error instanceof FetchError) {
|
|
641
|
-
return createToolErrorResponse(message, url, {
|
|
642
|
-
statusCode: error.statusCode,
|
|
643
|
-
details: error.details,
|
|
644
|
-
});
|
|
645
|
-
}
|
|
646
|
-
return createToolErrorResponse(message, url);
|
|
197
|
+
function buildToolAbortSignal(extraSignal) {
|
|
198
|
+
const { timeoutMs } = config.tools;
|
|
199
|
+
if (timeoutMs <= 0)
|
|
200
|
+
return extraSignal;
|
|
201
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
202
|
+
if (!extraSignal)
|
|
203
|
+
return timeoutSignal;
|
|
204
|
+
return AbortSignal.any([extraSignal, timeoutSignal]);
|
|
647
205
|
}
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
...(metadata.image ? { image: metadata.image } : {}),
|
|
656
|
-
...(metadata.favicon ? { favicon: metadata.favicon } : {}),
|
|
657
|
-
...(metadata.publishedAt ? { publishedAt: metadata.publishedAt } : {}),
|
|
658
|
-
...(metadata.modifiedAt ? { modifiedAt: metadata.modifiedAt } : {}),
|
|
659
|
-
};
|
|
660
|
-
if (Object.keys(normalized).length === 0)
|
|
661
|
-
return undefined;
|
|
662
|
-
return normalized;
|
|
206
|
+
/* -------------------------------------------------------------------------------------------------
|
|
207
|
+
* Structured response assembly
|
|
208
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
209
|
+
function truncateStr(value, max) {
|
|
210
|
+
if (value === undefined || value.length <= max)
|
|
211
|
+
return value;
|
|
212
|
+
return value.slice(0, max);
|
|
663
213
|
}
|
|
664
|
-
|
|
665
|
-
.object({
|
|
666
|
-
markdown: z.string().optional(),
|
|
667
|
-
content: z.string().optional(),
|
|
668
|
-
title: z.string().optional(),
|
|
669
|
-
metadata: z
|
|
670
|
-
.strictObject({
|
|
671
|
-
title: z.string().optional(),
|
|
672
|
-
description: z.string().optional(),
|
|
673
|
-
author: z.string().optional(),
|
|
674
|
-
image: z.string().optional(),
|
|
675
|
-
favicon: z.string().optional(),
|
|
676
|
-
publishedAt: z.string().optional(),
|
|
677
|
-
modifiedAt: z.string().optional(),
|
|
678
|
-
})
|
|
679
|
-
.optional(),
|
|
680
|
-
truncated: z.boolean().optional(),
|
|
681
|
-
})
|
|
682
|
-
.catchall(z.unknown())
|
|
683
|
-
.refine((value) => typeof value.markdown === 'string' || typeof value.content === 'string', { message: 'Missing markdown/content' });
|
|
684
|
-
export function parseCachedMarkdownResult(cached) {
|
|
685
|
-
const parsed = safeJsonParse(cached);
|
|
686
|
-
const result = cachedMarkdownSchema.safeParse(parsed);
|
|
687
|
-
if (!result.success)
|
|
688
|
-
return undefined;
|
|
689
|
-
const markdown = result.data.markdown ?? result.data.content;
|
|
690
|
-
if (typeof markdown !== 'string')
|
|
691
|
-
return undefined;
|
|
692
|
-
const metadata = normalizeExtractedMetadata(result.data.metadata);
|
|
693
|
-
const truncated = result.data.truncated ?? false;
|
|
694
|
-
const persistedMarkdown = truncated
|
|
695
|
-
? appendTruncationMarker(markdown, TRUNCATION_MARKER)
|
|
696
|
-
: markdown;
|
|
214
|
+
function truncateMetadata(metadata) {
|
|
697
215
|
return {
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
216
|
+
...metadata,
|
|
217
|
+
...(metadata.title !== undefined
|
|
218
|
+
? { title: truncateStr(metadata.title, 512) }
|
|
219
|
+
: {}),
|
|
220
|
+
...(metadata.description !== undefined
|
|
221
|
+
? { description: truncateStr(metadata.description, 2048) }
|
|
222
|
+
: {}),
|
|
223
|
+
...(metadata.author !== undefined
|
|
224
|
+
? { author: truncateStr(metadata.author, 512) }
|
|
225
|
+
: {}),
|
|
703
226
|
};
|
|
704
227
|
}
|
|
705
|
-
const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
|
|
706
|
-
const result = await transformBufferToMarkdown(input.buffer, url, {
|
|
707
|
-
includeMetadata: true,
|
|
708
|
-
encoding: input.encoding,
|
|
709
|
-
...withSignal(signal),
|
|
710
|
-
...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
|
|
711
|
-
...(input.truncated ? { inputTruncated: true } : {}),
|
|
712
|
-
});
|
|
713
|
-
const truncated = Boolean(result.truncated || input.truncated);
|
|
714
|
-
return { ...result, content: result.markdown, truncated };
|
|
715
|
-
};
|
|
716
|
-
function serializeMarkdownResult(result) {
|
|
717
|
-
const persistedMarkdown = result.truncated
|
|
718
|
-
? appendTruncationMarker(result.markdown, TRUNCATION_MARKER)
|
|
719
|
-
: result.markdown;
|
|
720
|
-
return JSON.stringify({
|
|
721
|
-
markdown: persistedMarkdown,
|
|
722
|
-
title: result.title,
|
|
723
|
-
metadata: result.metadata,
|
|
724
|
-
truncated: result.truncated,
|
|
725
|
-
});
|
|
726
|
-
}
|
|
727
|
-
/* -------------------------------------------------------------------------------------------------
|
|
728
|
-
* fetch-url tool implementation
|
|
729
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
730
228
|
function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
731
229
|
const cacheResourceUri = resolveCacheResourceUri(pipeline.cacheKey);
|
|
732
230
|
const truncated = inlineResult.truncated ?? pipeline.data.truncated;
|
|
@@ -738,8 +236,8 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
|
738
236
|
...(pipeline.finalUrl ? { finalUrl: pipeline.finalUrl } : {}),
|
|
739
237
|
...(cacheResourceUri ? { cacheResourceUri } : {}),
|
|
740
238
|
inputUrl,
|
|
741
|
-
title: pipeline.data.title,
|
|
742
|
-
...(metadata ? { metadata } : {}),
|
|
239
|
+
title: truncateStr(pipeline.data.title, 512),
|
|
240
|
+
...(metadata ? { metadata: truncateMetadata(metadata) } : {}),
|
|
743
241
|
markdown,
|
|
744
242
|
fromCache: pipeline.fromCache,
|
|
745
243
|
fetchedAt: pipeline.fetchedAt,
|
|
@@ -780,7 +278,6 @@ function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
|
|
|
780
278
|
function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
781
279
|
const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
|
|
782
280
|
const content = buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult);
|
|
783
|
-
// Runtime validation guard: verify output matches schema
|
|
784
281
|
const validation = fetchUrlOutputSchema.safeParse(structuredContent);
|
|
785
282
|
if (!validation.success) {
|
|
786
283
|
logWarn('Tool output schema validation failed', {
|
|
@@ -793,6 +290,26 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
|
793
290
|
structuredContent,
|
|
794
291
|
};
|
|
795
292
|
}
|
|
293
|
+
/* -------------------------------------------------------------------------------------------------
|
|
294
|
+
* fetch-url tool implementation
|
|
295
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
296
|
+
export function getUrlContext(urlStr) {
|
|
297
|
+
try {
|
|
298
|
+
const u = new URL(urlStr);
|
|
299
|
+
const host = u.hostname.replace(/^www\./, '');
|
|
300
|
+
const path = u.pathname;
|
|
301
|
+
if (path === '/' || path === '')
|
|
302
|
+
return host;
|
|
303
|
+
let basename = path.split('/').filter(Boolean).pop();
|
|
304
|
+
if (basename && basename.length > 20) {
|
|
305
|
+
basename = `${basename.substring(0, 17)}...`;
|
|
306
|
+
}
|
|
307
|
+
return basename ? `${host}/…/${basename}` : host;
|
|
308
|
+
}
|
|
309
|
+
catch {
|
|
310
|
+
return 'unknown';
|
|
311
|
+
}
|
|
312
|
+
}
|
|
796
313
|
async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
|
|
797
314
|
return performSharedFetch({
|
|
798
315
|
url,
|
|
@@ -802,7 +319,8 @@ async function fetchPipeline(url, signal, progress, skipNoiseRemoval, forceRefre
|
|
|
802
319
|
...(maxInlineChars !== undefined ? { maxInlineChars } : {}),
|
|
803
320
|
transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
|
|
804
321
|
if (progress) {
|
|
805
|
-
|
|
322
|
+
const contextStr = getUrlContext(url);
|
|
323
|
+
void progress.report(2, `fetch-url: ${contextStr} [transforming]`);
|
|
806
324
|
}
|
|
807
325
|
return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal, skipNoiseRemoval);
|
|
808
326
|
},
|
|
@@ -817,15 +335,23 @@ async function executeFetch(input, extra) {
|
|
|
817
335
|
}
|
|
818
336
|
const signal = buildToolAbortSignal(extra?.signal);
|
|
819
337
|
const progress = createProgressReporter(extra);
|
|
820
|
-
|
|
338
|
+
const contextStr = getUrlContext(url);
|
|
339
|
+
void progress.report(0, `fetch-url: ${contextStr} [starting]`);
|
|
821
340
|
logDebug('Fetching URL', { url });
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
341
|
+
try {
|
|
342
|
+
void progress.report(1, `fetch-url: ${contextStr} [fetching]`);
|
|
343
|
+
const { pipeline, inlineResult } = await fetchPipeline(url, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
|
|
344
|
+
if (pipeline.fromCache) {
|
|
345
|
+
void progress.report(3, `fetch-url: ${contextStr} [using cache]`);
|
|
346
|
+
}
|
|
347
|
+
void progress.report(4, `fetch-url: ${contextStr} • success`);
|
|
348
|
+
return buildResponse(pipeline, inlineResult, url);
|
|
349
|
+
}
|
|
350
|
+
catch (error) {
|
|
351
|
+
const isAbort = error instanceof Error && error.name === 'AbortError';
|
|
352
|
+
void progress.report(4, `fetch-url: ${contextStr} • ${isAbort ? 'cancelled' : 'failed'}`);
|
|
353
|
+
throw error;
|
|
826
354
|
}
|
|
827
|
-
void progress.report(4, 'Finalizing response');
|
|
828
|
-
return buildResponse(pipeline, inlineResult, url);
|
|
829
355
|
}
|
|
830
356
|
export async function fetchUrlToolHandler(input, extra) {
|
|
831
357
|
return executeFetch(input, extra).catch((error) => {
|
|
@@ -899,3 +425,4 @@ export function registerTools(server) {
|
|
|
899
425
|
}, withRequestContextIfMissing(TOOL_DEFINITION.handler));
|
|
900
426
|
registeredTool.execution = TOOL_DEFINITION.execution;
|
|
901
427
|
}
|
|
428
|
+
//# sourceMappingURL=tools.js.map
|