@j0hanz/fetch-url-mcp 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/core.d.ts +1 -0
- package/dist/lib/core.js +1 -0
- package/dist/lib/http.js +40 -41
- package/dist/lib/url.d.ts +1 -1
- package/dist/lib/url.js +3 -3
- package/dist/transform/transform.js +19 -8
- package/package.json +1 -1
package/dist/lib/core.d.ts
CHANGED
package/dist/lib/core.js
CHANGED
|
@@ -458,6 +458,7 @@ export const config = {
|
|
|
458
458
|
allowedHosts: parseAllowedHosts(env['ALLOWED_HOSTS']),
|
|
459
459
|
apiKey: env['API_KEY'],
|
|
460
460
|
allowRemote,
|
|
461
|
+
allowLocalFetch: parseBoolean(env['ALLOW_LOCAL_FETCH'], false),
|
|
461
462
|
},
|
|
462
463
|
auth: buildAuthConfig(baseUrl),
|
|
463
464
|
rateLimit: {
|
package/dist/lib/http.js
CHANGED
|
@@ -15,7 +15,7 @@ import { Agent } from 'undici';
|
|
|
15
15
|
import { z } from 'zod';
|
|
16
16
|
import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
|
|
17
17
|
import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
|
|
18
|
-
import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
|
|
18
|
+
import { createErrorWithCode, FetchError, isAbortError, isError, isObject, isSystemError, toError, } from './utils.js';
|
|
19
19
|
const FILENAME_RULES = {
|
|
20
20
|
MAX_LEN: 200,
|
|
21
21
|
UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
|
|
@@ -315,10 +315,6 @@ function createFetchError(input, url) {
|
|
|
315
315
|
return new FetchError(input.message ?? 'Unexpected error', url);
|
|
316
316
|
}
|
|
317
317
|
}
|
|
318
|
-
function isAbortError(error) {
|
|
319
|
-
return (isError(error) &&
|
|
320
|
-
(error.name === 'AbortError' || error.name === 'TimeoutError'));
|
|
321
|
-
}
|
|
322
318
|
function isTimeoutError(error) {
|
|
323
319
|
return isError(error) && error.name === 'TimeoutError';
|
|
324
320
|
}
|
|
@@ -334,7 +330,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
334
330
|
if (error instanceof FetchError)
|
|
335
331
|
return error;
|
|
336
332
|
const url = resolveErrorUrl(error, fallbackUrl);
|
|
337
|
-
if (isAbortError(error)) {
|
|
333
|
+
if (isAbortError(error) || isTimeoutError(error)) {
|
|
338
334
|
return isTimeoutError(error)
|
|
339
335
|
? createFetchError({ kind: 'timeout', timeout: timeoutMs }, url)
|
|
340
336
|
: createFetchError({ kind: 'canceled' }, url);
|
|
@@ -386,7 +382,12 @@ class RedirectFollower {
|
|
|
386
382
|
async fetchWithRedirects(url, init, maxRedirects) {
|
|
387
383
|
let currentUrl = url;
|
|
388
384
|
const redirectLimit = Math.max(0, maxRedirects);
|
|
385
|
+
const visited = new Set();
|
|
389
386
|
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
387
|
+
if (visited.has(currentUrl)) {
|
|
388
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
389
|
+
}
|
|
390
|
+
visited.add(currentUrl);
|
|
390
391
|
const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
|
|
391
392
|
let ipAddress;
|
|
392
393
|
if (this.preflight) {
|
|
@@ -405,9 +406,10 @@ class RedirectFollower {
|
|
|
405
406
|
...init,
|
|
406
407
|
redirect: 'manual',
|
|
407
408
|
};
|
|
409
|
+
let agent;
|
|
408
410
|
if (ipAddress) {
|
|
409
411
|
const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
|
|
410
|
-
|
|
412
|
+
agent = new Agent({
|
|
411
413
|
connect: {
|
|
412
414
|
lookup: (hostname, options, callback) => {
|
|
413
415
|
const family = isIP(ipAddress) === 6 ? 6 : 4;
|
|
@@ -428,25 +430,30 @@ class RedirectFollower {
|
|
|
428
430
|
});
|
|
429
431
|
fetchInit.dispatcher = agent;
|
|
430
432
|
}
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
433
|
+
try {
|
|
434
|
+
const response = await this.fetchFn(currentUrl, fetchInit);
|
|
435
|
+
if (!isRedirectStatus(response.status))
|
|
436
|
+
return { response };
|
|
437
|
+
if (redirectCount >= redirectLimit) {
|
|
438
|
+
cancelResponseBody(response);
|
|
439
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
440
|
+
}
|
|
441
|
+
const location = this.getRedirectLocation(response, currentUrl);
|
|
435
442
|
cancelResponseBody(response);
|
|
436
|
-
|
|
443
|
+
const nextUrl = this.resolveRedirectTarget(currentUrl, location);
|
|
444
|
+
const parsedNextUrl = new URL(nextUrl);
|
|
445
|
+
if (parsedNextUrl.protocol !== 'http:' &&
|
|
446
|
+
parsedNextUrl.protocol !== 'https:') {
|
|
447
|
+
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
448
|
+
}
|
|
449
|
+
return {
|
|
450
|
+
response,
|
|
451
|
+
nextUrl,
|
|
452
|
+
};
|
|
437
453
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
const nextUrl = this.resolveRedirectTarget(currentUrl, location);
|
|
441
|
-
const parsedNextUrl = new URL(nextUrl);
|
|
442
|
-
if (parsedNextUrl.protocol !== 'http:' &&
|
|
443
|
-
parsedNextUrl.protocol !== 'https:') {
|
|
444
|
-
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
454
|
+
finally {
|
|
455
|
+
await agent?.close();
|
|
445
456
|
}
|
|
446
|
-
return {
|
|
447
|
-
response,
|
|
448
|
-
nextUrl,
|
|
449
|
-
};
|
|
450
457
|
}
|
|
451
458
|
getRedirectLocation(response, currentUrl) {
|
|
452
459
|
const location = response.headers.get('location');
|
|
@@ -694,16 +701,8 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
694
701
|
clearAbortListener();
|
|
695
702
|
abortDecodePipeline();
|
|
696
703
|
void decodedReader.cancel(error).catch(() => undefined);
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
encoding: encodingHeader ?? encodings.join(','),
|
|
700
|
-
error: isError(error) ? error.message : String(error),
|
|
701
|
-
});
|
|
702
|
-
return new Response(passthroughBranch, {
|
|
703
|
-
status: response.status,
|
|
704
|
-
statusText: response.statusText,
|
|
705
|
-
headers,
|
|
706
|
-
});
|
|
704
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
705
|
+
throw new FetchError(`Content-Encoding decode failed for ${redactUrl(url)}: ${isError(error) ? error.message : String(error)}`, url);
|
|
707
706
|
}
|
|
708
707
|
}
|
|
709
708
|
class ResponseTextReader {
|
|
@@ -851,17 +850,16 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
|
|
|
851
850
|
cancelResponseBody(response);
|
|
852
851
|
throw responseError;
|
|
853
852
|
}
|
|
854
|
-
const
|
|
855
|
-
const contentType = decodedResponse.headers.get('content-type');
|
|
853
|
+
const contentType = response.headers.get('content-type');
|
|
856
854
|
assertSupportedContentType(contentType, finalUrl);
|
|
857
855
|
const declaredEncoding = getCharsetFromContentType(contentType ?? null);
|
|
858
856
|
if (mode === 'text') {
|
|
859
|
-
const { text, size, truncated } = await reader.read(
|
|
860
|
-
telemetry.recordResponse(ctx,
|
|
857
|
+
const { text, size, truncated } = await reader.read(response, finalUrl, maxBytes, signal, declaredEncoding);
|
|
858
|
+
telemetry.recordResponse(ctx, response, size);
|
|
861
859
|
return { kind: 'text', text, size, truncated };
|
|
862
860
|
}
|
|
863
|
-
const { buffer, encoding, size, truncated } = await reader.readBuffer(
|
|
864
|
-
telemetry.recordResponse(ctx,
|
|
861
|
+
const { buffer, encoding, size, truncated } = await reader.readBuffer(response, finalUrl, maxBytes, signal, declaredEncoding);
|
|
862
|
+
telemetry.recordResponse(ctx, response, size);
|
|
865
863
|
return { kind: 'buffer', buffer, encoding, size, truncated };
|
|
866
864
|
}
|
|
867
865
|
function isReadableStreamLike(value) {
|
|
@@ -1087,8 +1085,9 @@ const DEFAULT_HEADERS = {
|
|
|
1087
1085
|
'User-Agent': config.fetcher.userAgent,
|
|
1088
1086
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
1089
1087
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
1090
|
-
|
|
1091
|
-
|
|
1088
|
+
// Accept-Encoding and Connection are forbidden Fetch API headers.
|
|
1089
|
+
// The undici-based globalThis.fetch manages content negotiation and
|
|
1090
|
+
// decompression transparently per the Fetch spec.
|
|
1092
1091
|
};
|
|
1093
1092
|
function buildHeaders() {
|
|
1094
1093
|
return DEFAULT_HEADERS;
|
package/dist/lib/url.d.ts
CHANGED
package/dist/lib/url.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import dns from 'node:dns';
|
|
2
2
|
import { BlockList, isIP, SocketAddress } from 'node:net';
|
|
3
3
|
import { domainToASCII } from 'node:url';
|
|
4
|
-
import { logDebug } from './core.js';
|
|
4
|
+
import { config, logDebug } from './core.js';
|
|
5
5
|
import { createErrorWithCode, isError, isSystemError } from './utils.js';
|
|
6
6
|
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
7
7
|
const CNAME_LOOKUP_MAX_DEPTH = 5;
|
|
@@ -76,7 +76,7 @@ export class SafeDnsResolver {
|
|
|
76
76
|
if (isCloudMetadataHost(normalizedHostname)) {
|
|
77
77
|
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Cloud metadata endpoints are not allowed`, 'EBLOCKED');
|
|
78
78
|
}
|
|
79
|
-
if (
|
|
79
|
+
if (!isLocalFetchAllowed() &&
|
|
80
80
|
this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
81
81
|
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
82
82
|
}
|
|
@@ -590,7 +590,7 @@ function isCloudMetadataHost(hostname) {
|
|
|
590
590
|
return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
|
|
591
591
|
}
|
|
592
592
|
function isLocalFetchAllowed() {
|
|
593
|
-
return
|
|
593
|
+
return config.security.allowLocalFetch;
|
|
594
594
|
}
|
|
595
595
|
export class IpBlocker {
|
|
596
596
|
security;
|
|
@@ -9,7 +9,7 @@ import { config } from '../lib/core.js';
|
|
|
9
9
|
import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/core.js';
|
|
10
10
|
import { isRawTextContentUrl } from '../lib/http.js';
|
|
11
11
|
import { createAbortError, throwIfAborted } from '../lib/utils.js';
|
|
12
|
-
import { FetchError, getErrorMessage } from '../lib/utils.js';
|
|
12
|
+
import { FetchError, getErrorMessage, toError } from '../lib/utils.js';
|
|
13
13
|
import { isObject } from '../lib/utils.js';
|
|
14
14
|
import { translateHtmlFragmentToMarkdown } from './html-translators.js';
|
|
15
15
|
import { extractMetadata, extractMetadataFromHead, mergeMetadata, } from './metadata.js';
|
|
@@ -178,13 +178,22 @@ function trimUtf8Buffer(buffer, maxBytes) {
|
|
|
178
178
|
return buffer.subarray(0, end);
|
|
179
179
|
}
|
|
180
180
|
function trimDanglingTagFragment(content) {
|
|
181
|
-
|
|
182
|
-
|
|
181
|
+
let result = content;
|
|
182
|
+
// Trim dangling HTML entity (e.g. "&" cut before ";")
|
|
183
|
+
const lastAmp = result.lastIndexOf('&');
|
|
184
|
+
if (lastAmp !== -1 && lastAmp > result.length - 10) {
|
|
185
|
+
const tail = result.slice(lastAmp + 1);
|
|
186
|
+
if (!tail.includes(';') && /^[#a-zA-Z][a-zA-Z0-9]*$/.test(tail)) {
|
|
187
|
+
result = result.substring(0, lastAmp);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
const lastOpen = result.lastIndexOf('<');
|
|
191
|
+
const lastClose = result.lastIndexOf('>');
|
|
183
192
|
if (lastOpen > lastClose) {
|
|
184
|
-
if (lastOpen ===
|
|
185
|
-
return
|
|
193
|
+
if (lastOpen === result.length - 1) {
|
|
194
|
+
return result.substring(0, lastOpen);
|
|
186
195
|
}
|
|
187
|
-
const code =
|
|
196
|
+
const code = result.codePointAt(lastOpen + 1);
|
|
188
197
|
if (code !== undefined &&
|
|
189
198
|
(code === 47 || // '/'
|
|
190
199
|
code === 33 || // '!'
|
|
@@ -192,10 +201,10 @@ function trimDanglingTagFragment(content) {
|
|
|
192
201
|
(code >= 65 && code <= 90) || // A-Z
|
|
193
202
|
(code >= 97 && code <= 122)) // a-z
|
|
194
203
|
) {
|
|
195
|
-
return
|
|
204
|
+
return result.substring(0, lastOpen);
|
|
196
205
|
}
|
|
197
206
|
}
|
|
198
|
-
return
|
|
207
|
+
return result;
|
|
199
208
|
}
|
|
200
209
|
function truncateHtml(html, inputTruncated = false) {
|
|
201
210
|
const maxSize = config.constants.maxHtmlSize;
|
|
@@ -1047,6 +1056,8 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
|
1047
1056
|
abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
1048
1057
|
if (error instanceof FetchError)
|
|
1049
1058
|
throw error;
|
|
1059
|
+
if (!(error instanceof Error))
|
|
1060
|
+
throw toError(error);
|
|
1050
1061
|
const message = getErrorMessage(error);
|
|
1051
1062
|
logWarn('Transform worker failed; falling back to in-process', {
|
|
1052
1063
|
url: redactUrl(url),
|
package/package.json
CHANGED