@j0hanz/fetch-url-mcp 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/http/auth.js +4 -1
- package/dist/http/native.js +14 -2
- package/dist/lib/content.js +1 -1
- package/dist/lib/core.d.ts +2 -0
- package/dist/lib/core.js +7 -24
- package/dist/lib/http.d.ts +2 -0
- package/dist/lib/http.js +60 -45
- package/dist/lib/net-utils.d.ts +3 -0
- package/dist/lib/net-utils.js +21 -0
- package/dist/lib/task-handlers.js +3 -0
- package/dist/lib/types.d.ts +4 -0
- package/dist/lib/types.js +1 -0
- package/dist/lib/url.d.ts +1 -1
- package/dist/lib/url.js +4 -23
- package/dist/prompts/index.d.ts +1 -5
- package/dist/resources/index.d.ts +1 -5
- package/dist/resources/index.js +1 -0
- package/dist/resources/instructions.js +3 -3
- package/dist/schemas/outputs.d.ts +0 -1
- package/dist/schemas/outputs.js +0 -5
- package/dist/server.js +5 -7
- package/dist/tasks/execution.js +2 -1
- package/dist/tasks/manager.js +2 -0
- package/dist/tools/fetch-url.js +8 -73
- package/dist/transform/transform.js +19 -8
- package/package.json +2 -2
package/dist/http/auth.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Buffer } from 'node:buffer';
|
|
2
2
|
import { randomBytes } from 'node:crypto';
|
|
3
3
|
import { InvalidTokenError, ServerError, } from '@modelcontextprotocol/sdk/server/auth/errors.js';
|
|
4
|
-
import { config } from '../lib/core.js';
|
|
4
|
+
import { config, logWarn } from '../lib/core.js';
|
|
5
5
|
import { normalizeHost } from '../lib/url.js';
|
|
6
6
|
import { hmacSha256Hex, timingSafeEqualUtf8 } from '../lib/utils.js';
|
|
7
7
|
import { isObject } from '../lib/utils.js';
|
|
@@ -186,6 +186,9 @@ export function ensureMcpProtocolVersion(req, res, options) {
|
|
|
186
186
|
// Permissive backward-compat fallback: clients predating MCP 2025-03-26 do not
|
|
187
187
|
// send MCP-Protocol-Version. Accepting requests without the header keeps older
|
|
188
188
|
// integrations working. Pass requireHeader: true to enforce strict version checking.
|
|
189
|
+
logWarn('MCP-Protocol-Version header missing; defaulting to permissive fallback', {
|
|
190
|
+
remoteAddress: req.socket.remoteAddress,
|
|
191
|
+
});
|
|
189
192
|
return true;
|
|
190
193
|
}
|
|
191
194
|
sendError(res, -32600, 'Missing MCP-Protocol-Version header');
|
package/dist/http/native.js
CHANGED
|
@@ -139,6 +139,11 @@ class McpSessionGateway {
|
|
|
139
139
|
sendError(ctx.res, -32600, 'Session not found', 404);
|
|
140
140
|
return;
|
|
141
141
|
}
|
|
142
|
+
const fingerprint = buildAuthFingerprint(ctx.auth);
|
|
143
|
+
if (!fingerprint || session.authFingerprint !== fingerprint) {
|
|
144
|
+
sendError(ctx.res, -32600, 'Session not found', 404);
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
142
147
|
if (!ensureMcpProtocolVersion(ctx.req, ctx.res, {
|
|
143
148
|
requireHeader: true,
|
|
144
149
|
expectedVersion: session.negotiatedProtocolVersion,
|
|
@@ -147,7 +152,9 @@ class McpSessionGateway {
|
|
|
147
152
|
}
|
|
148
153
|
const acceptHeader = getHeaderValue(ctx.req, 'accept');
|
|
149
154
|
if (!acceptsEventStream(acceptHeader)) {
|
|
150
|
-
sendJson(ctx.res,
|
|
155
|
+
sendJson(ctx.res, 406, {
|
|
156
|
+
error: 'Not Acceptable: expected text/event-stream',
|
|
157
|
+
});
|
|
151
158
|
return;
|
|
152
159
|
}
|
|
153
160
|
this.store.touch(sessionId);
|
|
@@ -164,6 +171,11 @@ class McpSessionGateway {
|
|
|
164
171
|
sendError(ctx.res, -32600, 'Session not found', 404);
|
|
165
172
|
return;
|
|
166
173
|
}
|
|
174
|
+
const fingerprint = buildAuthFingerprint(ctx.auth);
|
|
175
|
+
if (!fingerprint || session.authFingerprint !== fingerprint) {
|
|
176
|
+
sendError(ctx.res, -32600, 'Session not found', 404);
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
167
179
|
if (!ensureMcpProtocolVersion(ctx.req, ctx.res, {
|
|
168
180
|
requireHeader: true,
|
|
169
181
|
expectedVersion: session.negotiatedProtocolVersion,
|
|
@@ -172,7 +184,7 @@ class McpSessionGateway {
|
|
|
172
184
|
}
|
|
173
185
|
await session.transport.close();
|
|
174
186
|
this.cleanupSessionRecord(sessionId, 'session-delete');
|
|
175
|
-
|
|
187
|
+
sendJson(ctx.res, 200, { status: 'closed' });
|
|
176
188
|
}
|
|
177
189
|
async getOrCreateTransport(ctx, requestId) {
|
|
178
190
|
const sessionId = getMcpSessionId(ctx.req);
|
package/dist/lib/content.js
CHANGED
|
@@ -19,7 +19,7 @@ const HEADER_NOISE_PATTERN = /\b(site-header|masthead|topbar|navbar|nav(?:bar)?|
|
|
|
19
19
|
const FIXED_OR_HIGH_Z_PATTERN = /\b(?:fixed|sticky|z-(?:4\d|50)|isolate)\b/;
|
|
20
20
|
const SKIP_URL_PREFIXES = [
|
|
21
21
|
'#',
|
|
22
|
-
'
|
|
22
|
+
'javascript:',
|
|
23
23
|
'mailto:',
|
|
24
24
|
'tel:',
|
|
25
25
|
'data:',
|
package/dist/lib/core.d.ts
CHANGED
|
@@ -131,6 +131,7 @@ export declare const config: {
|
|
|
131
131
|
allowedHosts: Set<string>;
|
|
132
132
|
apiKey: string | undefined;
|
|
133
133
|
allowRemote: boolean;
|
|
134
|
+
allowLocalFetch: boolean;
|
|
134
135
|
};
|
|
135
136
|
auth: AuthConfig;
|
|
136
137
|
rateLimit: {
|
|
@@ -187,6 +188,7 @@ export declare function keys(): readonly string[];
|
|
|
187
188
|
export declare function getEntryMeta(cacheKey: string): {
|
|
188
189
|
url: string;
|
|
189
190
|
title?: string;
|
|
191
|
+
fetchedAt?: string;
|
|
190
192
|
} | undefined;
|
|
191
193
|
export declare function isEnabled(): boolean;
|
|
192
194
|
type LogMetadata = Record<string, unknown>;
|
package/dist/lib/core.js
CHANGED
|
@@ -4,11 +4,11 @@ import { accessSync, constants as fsConstants, readFileSync } from 'node:fs';
|
|
|
4
4
|
import { findPackageJSON } from 'node:module';
|
|
5
5
|
import { isIP } from 'node:net';
|
|
6
6
|
import process from 'node:process';
|
|
7
|
-
import { domainToASCII } from 'node:url';
|
|
8
7
|
import { inspect, stripVTControlCharacters } from 'node:util';
|
|
9
8
|
import {} from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
10
9
|
import {} from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
|
11
10
|
import { z } from 'zod';
|
|
11
|
+
import { buildIpv4, normalizeHostname, stripTrailingDots, } from './net-utils.js';
|
|
12
12
|
import { getErrorMessage, isAbortError, sha256Hex, stableStringify as stableJsonStringify, startAbortableIntervalLoop, } from './utils.js';
|
|
13
13
|
export const serverVersion = readServerVersion(import.meta.url);
|
|
14
14
|
const LOG_LEVELS = ['debug', 'info', 'warn', 'error'];
|
|
@@ -57,31 +57,11 @@ function loadEnvFileIfAvailable() {
|
|
|
57
57
|
}
|
|
58
58
|
loadEnvFileIfAvailable();
|
|
59
59
|
const { env } = process;
|
|
60
|
-
function buildIpv4(parts) {
|
|
61
|
-
return parts.join('.');
|
|
62
|
-
}
|
|
63
|
-
function stripTrailingDots(value) {
|
|
64
|
-
let result = value;
|
|
65
|
-
while (result.endsWith('.'))
|
|
66
|
-
result = result.slice(0, -1);
|
|
67
|
-
return result;
|
|
68
|
-
}
|
|
69
60
|
function formatHostForUrl(hostname) {
|
|
70
61
|
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
71
62
|
return `[${hostname}]`;
|
|
72
63
|
return hostname;
|
|
73
64
|
}
|
|
74
|
-
function normalizeHostname(value) {
|
|
75
|
-
const trimmed = value.trim();
|
|
76
|
-
if (!trimmed)
|
|
77
|
-
return null;
|
|
78
|
-
const lowered = trimmed.toLowerCase();
|
|
79
|
-
const ipType = isIP(lowered);
|
|
80
|
-
if (ipType)
|
|
81
|
-
return stripTrailingDots(lowered);
|
|
82
|
-
const ascii = domainToASCII(lowered);
|
|
83
|
-
return ascii ? stripTrailingDots(ascii) : null;
|
|
84
|
-
}
|
|
85
65
|
function normalizeHostValue(value) {
|
|
86
66
|
const raw = value.trim();
|
|
87
67
|
if (!raw)
|
|
@@ -458,6 +438,7 @@ export const config = {
|
|
|
458
438
|
allowedHosts: parseAllowedHosts(env['ALLOWED_HOSTS']),
|
|
459
439
|
apiKey: env['API_KEY'],
|
|
460
440
|
allowRemote,
|
|
441
|
+
allowLocalFetch: parseBoolean(env['ALLOW_LOCAL_FETCH'], false),
|
|
461
442
|
},
|
|
462
443
|
auth: buildAuthConfig(baseUrl),
|
|
463
444
|
rateLimit: {
|
|
@@ -705,9 +686,11 @@ export function getEntryMeta(cacheKey) {
|
|
|
705
686
|
const entry = store.peek(cacheKey);
|
|
706
687
|
if (!entry)
|
|
707
688
|
return undefined;
|
|
708
|
-
return
|
|
709
|
-
|
|
710
|
-
|
|
689
|
+
return {
|
|
690
|
+
url: entry.url,
|
|
691
|
+
...(entry.title !== undefined ? { title: entry.title } : {}),
|
|
692
|
+
...(entry.fetchedAt ? { fetchedAt: entry.fetchedAt } : {}),
|
|
693
|
+
};
|
|
711
694
|
}
|
|
712
695
|
export function isEnabled() {
|
|
713
696
|
return store.isEnabled();
|
package/dist/lib/http.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type ServerResponse } from 'node:http';
|
|
2
|
+
import { Agent } from 'undici';
|
|
2
3
|
import { type TransformResult } from './url.js';
|
|
3
4
|
export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
|
|
4
5
|
export declare function handleDownload(res: ServerResponse, namespace: string, hash: string): void;
|
|
@@ -27,6 +28,7 @@ export declare function recordFetchError(context: FetchTelemetryContext, error:
|
|
|
27
28
|
export declare function fetchWithRedirects(url: string, init: RequestInit, maxRedirects: number): Promise<{
|
|
28
29
|
response: Response;
|
|
29
30
|
url: string;
|
|
31
|
+
agent?: Agent;
|
|
30
32
|
}>;
|
|
31
33
|
export declare function readResponseText(response: Response, url: string, maxBytes: number, signal?: AbortSignal, encoding?: string): Promise<{
|
|
32
34
|
text: string;
|
package/dist/lib/http.js
CHANGED
|
@@ -15,7 +15,7 @@ import { Agent } from 'undici';
|
|
|
15
15
|
import { z } from 'zod';
|
|
16
16
|
import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
|
|
17
17
|
import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
|
|
18
|
-
import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
|
|
18
|
+
import { createErrorWithCode, FetchError, isAbortError, isError, isObject, isSystemError, toError, } from './utils.js';
|
|
19
19
|
const FILENAME_RULES = {
|
|
20
20
|
MAX_LEN: 200,
|
|
21
21
|
UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
|
|
@@ -315,10 +315,6 @@ function createFetchError(input, url) {
|
|
|
315
315
|
return new FetchError(input.message ?? 'Unexpected error', url);
|
|
316
316
|
}
|
|
317
317
|
}
|
|
318
|
-
function isAbortError(error) {
|
|
319
|
-
return (isError(error) &&
|
|
320
|
-
(error.name === 'AbortError' || error.name === 'TimeoutError'));
|
|
321
|
-
}
|
|
322
318
|
function isTimeoutError(error) {
|
|
323
319
|
return isError(error) && error.name === 'TimeoutError';
|
|
324
320
|
}
|
|
@@ -334,7 +330,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
334
330
|
if (error instanceof FetchError)
|
|
335
331
|
return error;
|
|
336
332
|
const url = resolveErrorUrl(error, fallbackUrl);
|
|
337
|
-
if (isAbortError(error)) {
|
|
333
|
+
if (isAbortError(error) || isTimeoutError(error)) {
|
|
338
334
|
return isTimeoutError(error)
|
|
339
335
|
? createFetchError({ kind: 'timeout', timeout: timeoutMs }, url)
|
|
340
336
|
: createFetchError({ kind: 'canceled' }, url);
|
|
@@ -386,16 +382,26 @@ class RedirectFollower {
|
|
|
386
382
|
async fetchWithRedirects(url, init, maxRedirects) {
|
|
387
383
|
let currentUrl = url;
|
|
388
384
|
const redirectLimit = Math.max(0, maxRedirects);
|
|
385
|
+
const visited = new Set();
|
|
389
386
|
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
390
|
-
|
|
387
|
+
if (visited.has(currentUrl)) {
|
|
388
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
389
|
+
}
|
|
390
|
+
visited.add(currentUrl);
|
|
391
|
+
const { response, nextUrl, agent: returnedAgent, } = await this.withRedirectErrorContext(currentUrl, async () => {
|
|
391
392
|
let ipAddress;
|
|
392
393
|
if (this.preflight) {
|
|
393
394
|
ipAddress = await this.preflight(currentUrl, init.signal ?? undefined);
|
|
394
395
|
}
|
|
395
396
|
return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress);
|
|
396
397
|
});
|
|
397
|
-
if (!nextUrl)
|
|
398
|
-
return {
|
|
398
|
+
if (!nextUrl) {
|
|
399
|
+
return {
|
|
400
|
+
response,
|
|
401
|
+
url: currentUrl,
|
|
402
|
+
...(returnedAgent ? { agent: returnedAgent } : {}),
|
|
403
|
+
};
|
|
404
|
+
}
|
|
399
405
|
currentUrl = nextUrl;
|
|
400
406
|
}
|
|
401
407
|
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
@@ -405,9 +411,10 @@ class RedirectFollower {
|
|
|
405
411
|
...init,
|
|
406
412
|
redirect: 'manual',
|
|
407
413
|
};
|
|
414
|
+
let agent;
|
|
408
415
|
if (ipAddress) {
|
|
409
416
|
const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
|
|
410
|
-
|
|
417
|
+
agent = new Agent({
|
|
411
418
|
connect: {
|
|
412
419
|
lookup: (hostname, options, callback) => {
|
|
413
420
|
const family = isIP(ipAddress) === 6 ? 6 : 4;
|
|
@@ -428,25 +435,36 @@ class RedirectFollower {
|
|
|
428
435
|
});
|
|
429
436
|
fetchInit.dispatcher = agent;
|
|
430
437
|
}
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
438
|
+
let closeAgent = true;
|
|
439
|
+
try {
|
|
440
|
+
const response = await this.fetchFn(currentUrl, fetchInit);
|
|
441
|
+
// Only follow redirects if the status code indicates a redirect and there's a Location header.
|
|
442
|
+
if (!isRedirectStatus(response.status)) {
|
|
443
|
+
closeAgent = false;
|
|
444
|
+
return { response, ...(agent ? { agent } : {}) };
|
|
445
|
+
}
|
|
446
|
+
if (redirectCount >= redirectLimit) {
|
|
447
|
+
cancelResponseBody(response);
|
|
448
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
449
|
+
}
|
|
450
|
+
const location = this.getRedirectLocation(response, currentUrl);
|
|
435
451
|
cancelResponseBody(response);
|
|
436
|
-
|
|
452
|
+
const nextUrl = this.resolveRedirectTarget(currentUrl, location);
|
|
453
|
+
const parsedNextUrl = new URL(nextUrl);
|
|
454
|
+
if (parsedNextUrl.protocol !== 'http:' &&
|
|
455
|
+
parsedNextUrl.protocol !== 'https:') {
|
|
456
|
+
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
457
|
+
}
|
|
458
|
+
return {
|
|
459
|
+
response,
|
|
460
|
+
nextUrl,
|
|
461
|
+
};
|
|
437
462
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
if (parsedNextUrl.protocol !== 'http:' &&
|
|
443
|
-
parsedNextUrl.protocol !== 'https:') {
|
|
444
|
-
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
463
|
+
finally {
|
|
464
|
+
if (closeAgent) {
|
|
465
|
+
await agent?.close();
|
|
466
|
+
}
|
|
445
467
|
}
|
|
446
|
-
return {
|
|
447
|
-
response,
|
|
448
|
-
nextUrl,
|
|
449
|
-
};
|
|
450
468
|
}
|
|
451
469
|
getRedirectLocation(response, currentUrl) {
|
|
452
470
|
const location = response.headers.get('location');
|
|
@@ -694,16 +712,8 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
694
712
|
clearAbortListener();
|
|
695
713
|
abortDecodePipeline();
|
|
696
714
|
void decodedReader.cancel(error).catch(() => undefined);
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
encoding: encodingHeader ?? encodings.join(','),
|
|
700
|
-
error: isError(error) ? error.message : String(error),
|
|
701
|
-
});
|
|
702
|
-
return new Response(passthroughBranch, {
|
|
703
|
-
status: response.status,
|
|
704
|
-
statusText: response.statusText,
|
|
705
|
-
headers,
|
|
706
|
-
});
|
|
715
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
716
|
+
throw new FetchError(`Content-Encoding decode failed for ${redactUrl(url)}: ${isError(error) ? error.message : String(error)}`, url);
|
|
707
717
|
}
|
|
708
718
|
}
|
|
709
719
|
class ResponseTextReader {
|
|
@@ -851,17 +861,16 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
|
|
|
851
861
|
cancelResponseBody(response);
|
|
852
862
|
throw responseError;
|
|
853
863
|
}
|
|
854
|
-
const
|
|
855
|
-
const contentType = decodedResponse.headers.get('content-type');
|
|
864
|
+
const contentType = response.headers.get('content-type');
|
|
856
865
|
assertSupportedContentType(contentType, finalUrl);
|
|
857
866
|
const declaredEncoding = getCharsetFromContentType(contentType ?? null);
|
|
858
867
|
if (mode === 'text') {
|
|
859
|
-
const { text, size, truncated } = await reader.read(
|
|
860
|
-
telemetry.recordResponse(ctx,
|
|
868
|
+
const { text, size, truncated } = await reader.read(response, finalUrl, maxBytes, signal, declaredEncoding);
|
|
869
|
+
telemetry.recordResponse(ctx, response, size);
|
|
861
870
|
return { kind: 'text', text, size, truncated };
|
|
862
871
|
}
|
|
863
|
-
const { buffer, encoding, size, truncated } = await reader.readBuffer(
|
|
864
|
-
telemetry.recordResponse(ctx,
|
|
872
|
+
const { buffer, encoding, size, truncated } = await reader.readBuffer(response, finalUrl, maxBytes, signal, declaredEncoding);
|
|
873
|
+
telemetry.recordResponse(ctx, response, size);
|
|
865
874
|
return { kind: 'buffer', buffer, encoding, size, truncated };
|
|
866
875
|
}
|
|
867
876
|
function isReadableStreamLike(value) {
|
|
@@ -1053,8 +1062,10 @@ class HttpFetcher {
|
|
|
1053
1062
|
const signal = buildRequestSignal(timeoutMs, options?.signal);
|
|
1054
1063
|
const init = buildRequestInit(headers, signal);
|
|
1055
1064
|
const ctx = this.telemetry.start(normalizedUrl, 'GET');
|
|
1065
|
+
let agent;
|
|
1056
1066
|
try {
|
|
1057
|
-
const { response, url: finalUrl } = await this.redirectFollower.fetchWithRedirects(normalizedUrl, init, this.fetcherConfig.maxRedirects);
|
|
1067
|
+
const { response, url: finalUrl, agent: returnedAgent, } = await this.redirectFollower.fetchWithRedirects(normalizedUrl, init, this.fetcherConfig.maxRedirects);
|
|
1068
|
+
agent = returnedAgent;
|
|
1058
1069
|
ctx.url = this.telemetry.redact(finalUrl);
|
|
1059
1070
|
return await this.readPayload(response, finalUrl, ctx, mode, init.signal ?? undefined);
|
|
1060
1071
|
}
|
|
@@ -1064,6 +1075,9 @@ class HttpFetcher {
|
|
|
1064
1075
|
this.telemetry.recordError(ctx, mapped, mapped.statusCode);
|
|
1065
1076
|
throw mapped;
|
|
1066
1077
|
}
|
|
1078
|
+
finally {
|
|
1079
|
+
await agent?.close();
|
|
1080
|
+
}
|
|
1067
1081
|
}
|
|
1068
1082
|
async readPayload(response, finalUrl, ctx, mode, signal) {
|
|
1069
1083
|
try {
|
|
@@ -1087,8 +1101,9 @@ const DEFAULT_HEADERS = {
|
|
|
1087
1101
|
'User-Agent': config.fetcher.userAgent,
|
|
1088
1102
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
1089
1103
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
1090
|
-
|
|
1091
|
-
|
|
1104
|
+
// Accept-Encoding and Connection are forbidden Fetch API headers.
|
|
1105
|
+
// The undici-based globalThis.fetch manages content negotiation and
|
|
1106
|
+
// decompression transparently per the Fetch spec.
|
|
1092
1107
|
};
|
|
1093
1108
|
function buildHeaders() {
|
|
1094
1109
|
return DEFAULT_HEADERS;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { isIP } from 'node:net';
|
|
2
|
+
import { domainToASCII } from 'node:url';
|
|
3
|
+
export function buildIpv4(parts) {
|
|
4
|
+
return parts.join('.');
|
|
5
|
+
}
|
|
6
|
+
export function stripTrailingDots(value) {
|
|
7
|
+
let result = value;
|
|
8
|
+
while (result.endsWith('.'))
|
|
9
|
+
result = result.slice(0, -1);
|
|
10
|
+
return result;
|
|
11
|
+
}
|
|
12
|
+
export function normalizeHostname(value) {
|
|
13
|
+
const trimmed = value.trim();
|
|
14
|
+
if (!trimmed)
|
|
15
|
+
return null;
|
|
16
|
+
const lowered = trimmed.toLowerCase();
|
|
17
|
+
if (isIP(lowered))
|
|
18
|
+
return stripTrailingDots(lowered);
|
|
19
|
+
const ascii = domainToASCII(lowered);
|
|
20
|
+
return ascii ? stripTrailingDots(ascii) : null;
|
|
21
|
+
}
|
|
@@ -136,6 +136,7 @@ function resolveOwnerScopedExtra(extra) {
|
|
|
136
136
|
};
|
|
137
137
|
}
|
|
138
138
|
function getSdkCallToolHandler(server) {
|
|
139
|
+
// S-2: see tests/sdk-compat-guard.test.ts
|
|
139
140
|
const maybeHandlers = Reflect.get(server.server, '_requestHandlers');
|
|
140
141
|
if (!(maybeHandlers instanceof Map))
|
|
141
142
|
return null;
|
|
@@ -209,6 +210,8 @@ export function registerTaskHandlers(server) {
|
|
|
209
210
|
...(task.statusMessage ? { statusMessage: task.statusMessage } : {}),
|
|
210
211
|
});
|
|
211
212
|
}
|
|
213
|
+
// Forward-compat: input_required is a valid MCP task status but not currently
|
|
214
|
+
// produced by any tool in this server. Kept for future spec support.
|
|
212
215
|
if (task.status === 'input_required') {
|
|
213
216
|
throw new McpError(ErrorCode.InvalidRequest, 'Task requires additional input', { taskId: task.taskId, status: 'input_required' });
|
|
214
217
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/lib/url.d.ts
CHANGED
package/dist/lib/url.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import dns from 'node:dns';
|
|
2
2
|
import { BlockList, isIP, SocketAddress } from 'node:net';
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
3
|
+
import { config, logDebug } from './core.js';
|
|
4
|
+
import { buildIpv4, normalizeHostname } from './net-utils.js';
|
|
5
5
|
import { createErrorWithCode, isError, isSystemError } from './utils.js';
|
|
6
6
|
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
7
7
|
const CNAME_LOOKUP_MAX_DEPTH = 5;
|
|
@@ -76,7 +76,7 @@ export class SafeDnsResolver {
|
|
|
76
76
|
if (isCloudMetadataHost(normalizedHostname)) {
|
|
77
77
|
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Cloud metadata endpoints are not allowed`, 'EBLOCKED');
|
|
78
78
|
}
|
|
79
|
-
if (
|
|
79
|
+
if (!isLocalFetchAllowed() &&
|
|
80
80
|
this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
81
81
|
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
82
82
|
}
|
|
@@ -227,15 +227,6 @@ function normalizeBracketedIpv6(value) {
|
|
|
227
227
|
return null;
|
|
228
228
|
return normalizeHostname(ipv6);
|
|
229
229
|
}
|
|
230
|
-
function normalizeHostname(value) {
|
|
231
|
-
const trimmed = trimToNull(value)?.toLowerCase();
|
|
232
|
-
if (!trimmed)
|
|
233
|
-
return null;
|
|
234
|
-
if (isIP(trimmed))
|
|
235
|
-
return stripTrailingDots(trimmed);
|
|
236
|
-
const ascii = domainToASCII(trimmed);
|
|
237
|
-
return ascii ? stripTrailingDots(ascii) : null;
|
|
238
|
-
}
|
|
239
230
|
function parseHostWithUrl(value) {
|
|
240
231
|
const candidateUrl = `http://${value}`;
|
|
241
232
|
if (!URL.canParse(candidateUrl))
|
|
@@ -252,16 +243,6 @@ function trimToNull(value) {
|
|
|
252
243
|
const trimmed = value.trim();
|
|
253
244
|
return trimmed ? trimmed : null;
|
|
254
245
|
}
|
|
255
|
-
function stripTrailingDots(value) {
|
|
256
|
-
// Keep loop (rather than regex) to preserve exact behavior and avoid hidden allocations.
|
|
257
|
-
let result = value;
|
|
258
|
-
while (result.endsWith('.'))
|
|
259
|
-
result = result.slice(0, -1);
|
|
260
|
-
return result;
|
|
261
|
-
}
|
|
262
|
-
function buildIpv4(parts) {
|
|
263
|
-
return parts.join('.');
|
|
264
|
-
}
|
|
265
246
|
function buildIpv6(parts) {
|
|
266
247
|
return parts.map(String).join(':');
|
|
267
248
|
}
|
|
@@ -590,7 +571,7 @@ function isCloudMetadataHost(hostname) {
|
|
|
590
571
|
return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
|
|
591
572
|
}
|
|
592
573
|
function isLocalFetchAllowed() {
|
|
593
|
-
return
|
|
574
|
+
return config.security.allowLocalFetch;
|
|
594
575
|
}
|
|
595
576
|
export class IpBlocker {
|
|
596
577
|
security;
|
package/dist/prompts/index.d.ts
CHANGED
|
@@ -1,7 +1,3 @@
|
|
|
1
1
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
-
|
|
3
|
-
src: string;
|
|
4
|
-
mimeType: string;
|
|
5
|
-
}
|
|
2
|
+
import type { IconInfo } from '../lib/types.js';
|
|
6
3
|
export declare function registerGetHelpPrompt(server: McpServer, instructions: string, iconInfo?: IconInfo): void;
|
|
7
|
-
export {};
|
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
import { type McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
-
|
|
3
|
-
src: string;
|
|
4
|
-
mimeType: string;
|
|
5
|
-
}
|
|
2
|
+
import type { IconInfo } from '../lib/types.js';
|
|
6
3
|
export declare function registerInstructionResource(server: McpServer, instructions: string, iconInfo?: IconInfo): void;
|
|
7
4
|
export declare function registerCacheResourceTemplate(server: McpServer, iconInfo?: IconInfo): void;
|
|
8
|
-
export {};
|
package/dist/resources/index.js
CHANGED
|
@@ -9,12 +9,12 @@ export function buildServerInstructions() {
|
|
|
9
9
|
|
|
10
10
|
<capabilities>
|
|
11
11
|
- Tools: \`${FETCH_URL_TOOL_NAME}\` (READ-ONLY).
|
|
12
|
-
- Resources: \`internal://
|
|
12
|
+
- Resources: \`internal://instructions\` (server usage guidance).
|
|
13
13
|
- Prompts: \`get-help\` (returns these instructions).
|
|
14
14
|
</capabilities>
|
|
15
15
|
|
|
16
16
|
<workflows>
|
|
17
|
-
1. Standard: Call \`${FETCH_URL_TOOL_NAME}\` -> Read \`markdown\`. If \`truncated: true\`,
|
|
17
|
+
1. Standard: Call \`${FETCH_URL_TOOL_NAME}\` -> Read \`markdown\`. If \`truncated: true\`, retry with \`forceRefresh: true\`.
|
|
18
18
|
2. Fresh: Set \`forceRefresh: true\` to bypass cache.
|
|
19
19
|
3. Full-Fidelity: Set \`skipNoiseRemoval: true\` to preserve nav/footers.
|
|
20
20
|
4. Async: Add \`task: { ttl: <ms> }\` to \`tools/call\` -> Poll \`tasks/get\` -> Call \`tasks/result\`.
|
|
@@ -24,7 +24,7 @@ export function buildServerInstructions() {
|
|
|
24
24
|
- Blocked: localhost, private IPs (10.x, 172.16-31.x, 192.168.x), metadata endpoints (169.254.169.254), .local/.internal.
|
|
25
25
|
- Limits: Max HTML ${maxHtmlSizeMb}MB. Max ${config.fetcher.maxRedirects} redirects.
|
|
26
26
|
- Cache: ${config.cache.maxKeys} entries, ${cacheSizeMb}MB, ${cacheTtlHours}h TTL.
|
|
27
|
-
- Cache scope: process-local and ephemeral.
|
|
27
|
+
- Cache scope: process-local and ephemeral.
|
|
28
28
|
- No JS: Client-side rendered pages may be incomplete.
|
|
29
29
|
- Binary: Not supported.
|
|
30
30
|
- Batch JSON-RPC: Array requests (\`[{...}]\`) are rejected with HTTP 400.
|
|
@@ -4,7 +4,6 @@ export declare const fetchUrlOutputSchema: z.ZodObject<{
|
|
|
4
4
|
inputUrl: z.ZodOptional<z.ZodString>;
|
|
5
5
|
resolvedUrl: z.ZodOptional<z.ZodString>;
|
|
6
6
|
finalUrl: z.ZodOptional<z.ZodString>;
|
|
7
|
-
cacheResourceUri: z.ZodOptional<z.ZodString>;
|
|
8
7
|
title: z.ZodOptional<z.ZodString>;
|
|
9
8
|
metadata: z.ZodOptional<z.ZodObject<{
|
|
10
9
|
title: z.ZodOptional<z.ZodString>;
|
package/dist/schemas/outputs.js
CHANGED
|
@@ -21,11 +21,6 @@ export const fetchUrlOutputSchema = z.strictObject({
|
|
|
21
21
|
.max(config.constants.maxUrlLength)
|
|
22
22
|
.optional()
|
|
23
23
|
.describe('Final URL after HTTP redirects.'),
|
|
24
|
-
cacheResourceUri: z
|
|
25
|
-
.string()
|
|
26
|
-
.max(config.constants.maxUrlLength)
|
|
27
|
-
.optional()
|
|
28
|
-
.describe('URI for resources/read to get full markdown.'),
|
|
29
24
|
title: z.string().max(512).optional().describe('Page title.'),
|
|
30
25
|
metadata: z
|
|
31
26
|
.strictObject({
|
package/dist/server.js
CHANGED
|
@@ -8,10 +8,13 @@ import { logError, logInfo, setLogLevel, setMcpServer } from './lib/core.js';
|
|
|
8
8
|
import { abortAllTaskExecutions, registerTaskHandlers, } from './lib/mcp-tools.js';
|
|
9
9
|
import { toError } from './lib/utils.js';
|
|
10
10
|
import { registerGetHelpPrompt } from './prompts/index.js';
|
|
11
|
-
import {
|
|
11
|
+
import { registerInstructionResource } from './resources/index.js';
|
|
12
12
|
import { buildServerInstructions } from './resources/instructions.js';
|
|
13
13
|
import { registerAllTools } from './tools/index.js';
|
|
14
14
|
import { shutdownTransformWorkerPool } from './transform/transform.js';
|
|
15
|
+
/* -------------------------------------------------------------------------------------------------
|
|
16
|
+
* Icons + server info
|
|
17
|
+
* ------------------------------------------------------------------------------------------------- */
|
|
15
18
|
async function getLocalIconInfo() {
|
|
16
19
|
const name = 'logo.svg';
|
|
17
20
|
const mime = 'image/svg+xml';
|
|
@@ -31,13 +34,9 @@ const serverInstructions = buildServerInstructions();
|
|
|
31
34
|
function createServerCapabilities() {
|
|
32
35
|
return {
|
|
33
36
|
logging: {},
|
|
34
|
-
resources: {
|
|
35
|
-
subscribe: true,
|
|
36
|
-
listChanged: true,
|
|
37
|
-
},
|
|
37
|
+
resources: { subscribe: true, listChanged: true },
|
|
38
38
|
tools: {},
|
|
39
39
|
prompts: {},
|
|
40
|
-
completions: {},
|
|
41
40
|
tasks: {
|
|
42
41
|
list: {},
|
|
43
42
|
cancel: {},
|
|
@@ -85,7 +84,6 @@ async function createMcpServerWithOptions(options) {
|
|
|
85
84
|
registerAllTools(server);
|
|
86
85
|
registerGetHelpPrompt(server, serverInstructions, localIcon);
|
|
87
86
|
registerInstructionResource(server, serverInstructions, localIcon);
|
|
88
|
-
registerCacheResourceTemplate(server, localIcon);
|
|
89
87
|
// NOTE: Internally patches server.close and server.server.onclose for cleanup
|
|
90
88
|
// callbacks, and intercepts tools/call via Reflect.get on private SDK state.
|
|
91
89
|
// See src/lib/task-handlers.ts for risk documentation (S-2, S-3).
|
package/dist/tasks/execution.js
CHANGED
|
@@ -6,6 +6,7 @@ import { isObject } from '../lib/utils.js';
|
|
|
6
6
|
import { taskManager, } from './manager.js';
|
|
7
7
|
import { compact, tryReadToolStructuredError, } from './owner.js';
|
|
8
8
|
import { getTaskCapableTool, hasTaskCapableTool, } from './tool-registry.js';
|
|
9
|
+
const TASK_NOT_FOUND_ERROR_CODE = RESOURCE_NOT_FOUND_ERROR_CODE;
|
|
9
10
|
/* -------------------------------------------------------------------------------------------------
|
|
10
11
|
* Abort-controller management for in-flight task executions
|
|
11
12
|
* ------------------------------------------------------------------------------------------------- */
|
|
@@ -113,7 +114,7 @@ function buildTaskStatusNotificationParams(task) {
|
|
|
113
114
|
* Validation helpers
|
|
114
115
|
* ------------------------------------------------------------------------------------------------- */
|
|
115
116
|
export function throwTaskNotFound() {
|
|
116
|
-
throw new McpError(
|
|
117
|
+
throw new McpError(TASK_NOT_FOUND_ERROR_CODE, 'Task not found');
|
|
117
118
|
}
|
|
118
119
|
function resolveTaskCapableTool(name) {
|
|
119
120
|
const descriptor = getTaskCapableTool(name);
|
package/dist/tasks/manager.js
CHANGED
|
@@ -69,6 +69,8 @@ class TaskManager {
|
|
|
69
69
|
applyTaskUpdate(task, updates) {
|
|
70
70
|
Object.assign(task, updates);
|
|
71
71
|
task.lastUpdatedAt = new Date().toISOString();
|
|
72
|
+
// Slide TTL window on every activity so long-running tasks don't expire mid-flight.
|
|
73
|
+
task._createdAtMs = Date.now();
|
|
72
74
|
}
|
|
73
75
|
cancelActiveTask(task, statusMessage) {
|
|
74
76
|
this.applyTaskUpdate(task, {
|
package/dist/tools/fetch-url.js
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
2
|
import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
|
|
3
3
|
import { z } from 'zod';
|
|
4
|
-
import * as cache from '../lib/core.js';
|
|
5
4
|
import { config } from '../lib/core.js';
|
|
6
5
|
import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from '../lib/core.js';
|
|
7
|
-
import { generateSafeFilename } from '../lib/http.js';
|
|
8
6
|
import { handleToolError } from '../lib/mcp-tools.js';
|
|
9
|
-
import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord,
|
|
7
|
+
import { appendTruncationMarker, markdownTransform, parseCachedMarkdownResult, performSharedFetch, readNestedRecord, serializeMarkdownResult, TRUNCATION_MARKER, withSignal, } from '../lib/mcp-tools.js';
|
|
10
8
|
import { createProgressReporter, } from '../lib/mcp-tools.js';
|
|
11
9
|
import { isAbortError, isObject, toError } from '../lib/utils.js';
|
|
12
10
|
import { fetchUrlInputSchema } from '../schemas/inputs.js';
|
|
@@ -19,7 +17,7 @@ const FETCH_URL_TOOL_DESCRIPTION = `
|
|
|
19
17
|
<constraints>
|
|
20
18
|
- READ-ONLY. No JavaScript execution.
|
|
21
19
|
- GitHub/GitLab/Bitbucket URLs auto-transform to raw endpoints (check resolvedUrl).
|
|
22
|
-
- If truncated=true,
|
|
20
|
+
- If truncated=true, full content is available in the next fetch with forceRefresh.
|
|
23
21
|
- For large pages/timeouts, use task mode (task: {}).
|
|
24
22
|
- If error queue_full, retry with task mode.
|
|
25
23
|
</constraints>
|
|
@@ -38,47 +36,6 @@ function buildTextBlock(structuredContent) {
|
|
|
38
36
|
text: JSON.stringify(structuredContent),
|
|
39
37
|
};
|
|
40
38
|
}
|
|
41
|
-
function buildEmbeddedResource(content, url, title) {
|
|
42
|
-
if (!content)
|
|
43
|
-
return null;
|
|
44
|
-
const filename = generateSafeFilename(url, title, undefined, '.md');
|
|
45
|
-
const uri = `internal://inline/${encodeURIComponent(filename)}`;
|
|
46
|
-
const resource = {
|
|
47
|
-
uri,
|
|
48
|
-
mimeType: 'text/markdown',
|
|
49
|
-
text: content,
|
|
50
|
-
};
|
|
51
|
-
return {
|
|
52
|
-
type: 'resource',
|
|
53
|
-
resource,
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
function buildCacheResourceLink(cacheResourceUri, contentSize, fetchedAt) {
|
|
57
|
-
return {
|
|
58
|
-
type: 'resource_link',
|
|
59
|
-
uri: cacheResourceUri,
|
|
60
|
-
name: 'cached-markdown',
|
|
61
|
-
title: 'Cached Fetch Output',
|
|
62
|
-
description: 'Read full markdown via resources/read.',
|
|
63
|
-
mimeType: 'text/markdown',
|
|
64
|
-
...(contentSize > 0 ? { size: contentSize } : {}),
|
|
65
|
-
annotations: {
|
|
66
|
-
audience: ['assistant'],
|
|
67
|
-
priority: 0.8,
|
|
68
|
-
lastModified: fetchedAt,
|
|
69
|
-
},
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
function buildToolContentBlocks(structuredContent, resourceLink, embeddedResource) {
|
|
73
|
-
const blocks = [buildTextBlock(structuredContent)];
|
|
74
|
-
appendIfPresent(blocks, resourceLink);
|
|
75
|
-
appendIfPresent(blocks, embeddedResource);
|
|
76
|
-
return blocks;
|
|
77
|
-
}
|
|
78
|
-
function appendIfPresent(items, value) {
|
|
79
|
-
if (value !== null && value !== undefined)
|
|
80
|
-
items.push(value);
|
|
81
|
-
}
|
|
82
39
|
/* -------------------------------------------------------------------------------------------------
|
|
83
40
|
* Tool abort signal
|
|
84
41
|
* ------------------------------------------------------------------------------------------------- */
|
|
@@ -116,15 +73,15 @@ function truncateMetadata(metadata) {
|
|
|
116
73
|
return result;
|
|
117
74
|
}
|
|
118
75
|
function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
119
|
-
const cacheResourceUri = resolveCacheResourceUri(pipeline.cacheKey);
|
|
120
76
|
const truncated = inlineResult.truncated ?? pipeline.data.truncated;
|
|
121
|
-
const
|
|
77
|
+
const rawMarkdown = applyTruncationMarker(inlineResult.content, pipeline.data.truncated);
|
|
78
|
+
const maxChars = config.constants.maxInlineContentChars;
|
|
79
|
+
const markdown = maxChars > 0 ? truncateStr(rawMarkdown, maxChars) : rawMarkdown;
|
|
122
80
|
const { metadata } = pipeline.data;
|
|
123
81
|
return {
|
|
124
82
|
url: pipeline.originalUrl ?? pipeline.url,
|
|
125
83
|
resolvedUrl: pipeline.url,
|
|
126
84
|
...(pipeline.finalUrl ? { finalUrl: pipeline.finalUrl } : {}),
|
|
127
|
-
...(cacheResourceUri ? { cacheResourceUri } : {}),
|
|
128
85
|
inputUrl,
|
|
129
86
|
title: truncateStr(pipeline.data.title, 512),
|
|
130
87
|
...(metadata ? { metadata: truncateMetadata(metadata) } : {}),
|
|
@@ -140,34 +97,12 @@ function applyTruncationMarker(content, truncated) {
|
|
|
140
97
|
return content;
|
|
141
98
|
return appendTruncationMarker(content, TRUNCATION_MARKER);
|
|
142
99
|
}
|
|
143
|
-
function
|
|
144
|
-
|
|
145
|
-
return undefined;
|
|
146
|
-
if (!cache.isEnabled())
|
|
147
|
-
return undefined;
|
|
148
|
-
if (!cache.get(cacheKey))
|
|
149
|
-
return undefined;
|
|
150
|
-
const parsed = cache.parseCacheKey(cacheKey);
|
|
151
|
-
if (!parsed)
|
|
152
|
-
return undefined;
|
|
153
|
-
return `internal://cache/${encodeURIComponent(parsed.namespace)}/${encodeURIComponent(parsed.urlHash)}`;
|
|
154
|
-
}
|
|
155
|
-
function buildFetchUrlContentBlocks(structuredContent, pipeline, inlineResult) {
|
|
156
|
-
const cacheResourceUri = readString(structuredContent, 'cacheResourceUri');
|
|
157
|
-
const contentToEmbed = config.runtime.httpMode
|
|
158
|
-
? inlineResult.content
|
|
159
|
-
: pipeline.data.content;
|
|
160
|
-
const resourceLink = cacheResourceUri
|
|
161
|
-
? buildCacheResourceLink(cacheResourceUri, inlineResult.contentSize, pipeline.fetchedAt)
|
|
162
|
-
: null;
|
|
163
|
-
const embedded = contentToEmbed && pipeline.url
|
|
164
|
-
? buildEmbeddedResource(contentToEmbed, pipeline.url, pipeline.data.title)
|
|
165
|
-
: null;
|
|
166
|
-
return buildToolContentBlocks(structuredContent, resourceLink, embedded);
|
|
100
|
+
function buildFetchUrlContentBlocks(structuredContent) {
|
|
101
|
+
return [buildTextBlock(structuredContent)];
|
|
167
102
|
}
|
|
168
103
|
function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
169
104
|
const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
|
|
170
|
-
const content = buildFetchUrlContentBlocks(structuredContent
|
|
105
|
+
const content = buildFetchUrlContentBlocks(structuredContent);
|
|
171
106
|
const validation = fetchUrlOutputSchema.safeParse(structuredContent);
|
|
172
107
|
if (!validation.success) {
|
|
173
108
|
logWarn('Tool output schema validation failed', {
|
|
@@ -9,7 +9,7 @@ import { config } from '../lib/core.js';
|
|
|
9
9
|
import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/core.js';
|
|
10
10
|
import { isRawTextContentUrl } from '../lib/http.js';
|
|
11
11
|
import { createAbortError, throwIfAborted } from '../lib/utils.js';
|
|
12
|
-
import { FetchError, getErrorMessage } from '../lib/utils.js';
|
|
12
|
+
import { FetchError, getErrorMessage, toError } from '../lib/utils.js';
|
|
13
13
|
import { isObject } from '../lib/utils.js';
|
|
14
14
|
import { translateHtmlFragmentToMarkdown } from './html-translators.js';
|
|
15
15
|
import { extractMetadata, extractMetadataFromHead, mergeMetadata, } from './metadata.js';
|
|
@@ -178,13 +178,22 @@ function trimUtf8Buffer(buffer, maxBytes) {
|
|
|
178
178
|
return buffer.subarray(0, end);
|
|
179
179
|
}
|
|
180
180
|
function trimDanglingTagFragment(content) {
|
|
181
|
-
|
|
182
|
-
|
|
181
|
+
let result = content;
|
|
182
|
+
// Trim dangling HTML entity (e.g. "&" cut before ";")
|
|
183
|
+
const lastAmp = result.lastIndexOf('&');
|
|
184
|
+
if (lastAmp !== -1 && lastAmp > result.length - 10) {
|
|
185
|
+
const tail = result.slice(lastAmp + 1);
|
|
186
|
+
if (!tail.includes(';') && /^[#a-zA-Z][a-zA-Z0-9]*$/.test(tail)) {
|
|
187
|
+
result = result.substring(0, lastAmp);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
const lastOpen = result.lastIndexOf('<');
|
|
191
|
+
const lastClose = result.lastIndexOf('>');
|
|
183
192
|
if (lastOpen > lastClose) {
|
|
184
|
-
if (lastOpen ===
|
|
185
|
-
return
|
|
193
|
+
if (lastOpen === result.length - 1) {
|
|
194
|
+
return result.substring(0, lastOpen);
|
|
186
195
|
}
|
|
187
|
-
const code =
|
|
196
|
+
const code = result.codePointAt(lastOpen + 1);
|
|
188
197
|
if (code !== undefined &&
|
|
189
198
|
(code === 47 || // '/'
|
|
190
199
|
code === 33 || // '!'
|
|
@@ -192,10 +201,10 @@ function trimDanglingTagFragment(content) {
|
|
|
192
201
|
(code >= 65 && code <= 90) || // A-Z
|
|
193
202
|
(code >= 97 && code <= 122)) // a-z
|
|
194
203
|
) {
|
|
195
|
-
return
|
|
204
|
+
return result.substring(0, lastOpen);
|
|
196
205
|
}
|
|
197
206
|
}
|
|
198
|
-
return
|
|
207
|
+
return result;
|
|
199
208
|
}
|
|
200
209
|
function truncateHtml(html, inputTruncated = false) {
|
|
201
210
|
const maxSize = config.constants.maxHtmlSize;
|
|
@@ -1047,6 +1056,8 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
|
1047
1056
|
abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
1048
1057
|
if (error instanceof FetchError)
|
|
1049
1058
|
throw error;
|
|
1059
|
+
if (!(error instanceof Error))
|
|
1060
|
+
throw toError(error);
|
|
1050
1061
|
const message = getErrorMessage(error);
|
|
1051
1062
|
logWarn('Transform worker failed; falling back to in-process', {
|
|
1052
1063
|
url: redactUrl(url),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/fetch-url-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"mcpName": "io.github.j0hanz/fetch-url-mcp",
|
|
5
5
|
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
|
|
6
6
|
"type": "module",
|
|
@@ -81,7 +81,7 @@
|
|
|
81
81
|
"eslint": "^10.0.2",
|
|
82
82
|
"eslint-config-prettier": "^10.1.8",
|
|
83
83
|
"eslint-plugin-de-morgan": "^2.1.1",
|
|
84
|
-
"eslint-plugin-depend": "^1.
|
|
84
|
+
"eslint-plugin-depend": "^1.5.0",
|
|
85
85
|
"eslint-plugin-unused-imports": "^4.4.1",
|
|
86
86
|
"knip": "^5.85.0",
|
|
87
87
|
"prettier": "^3.8.1",
|