@j0hanz/fetch-url-mcp 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -131,6 +131,7 @@ export declare const config: {
131
131
  allowedHosts: Set<string>;
132
132
  apiKey: string | undefined;
133
133
  allowRemote: boolean;
134
+ allowLocalFetch: boolean;
134
135
  };
135
136
  auth: AuthConfig;
136
137
  rateLimit: {
package/dist/lib/core.js CHANGED
@@ -458,6 +458,7 @@ export const config = {
458
458
  allowedHosts: parseAllowedHosts(env['ALLOWED_HOSTS']),
459
459
  apiKey: env['API_KEY'],
460
460
  allowRemote,
461
+ allowLocalFetch: parseBoolean(env['ALLOW_LOCAL_FETCH'], false),
461
462
  },
462
463
  auth: buildAuthConfig(baseUrl),
463
464
  rateLimit: {
package/dist/lib/http.js CHANGED
@@ -15,7 +15,7 @@ import { Agent } from 'undici';
15
15
  import { z } from 'zod';
16
16
  import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
17
17
  import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
18
- import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
18
+ import { createErrorWithCode, FetchError, isAbortError, isError, isObject, isSystemError, toError, } from './utils.js';
19
19
  const FILENAME_RULES = {
20
20
  MAX_LEN: 200,
21
21
  UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
@@ -315,10 +315,6 @@ function createFetchError(input, url) {
315
315
  return new FetchError(input.message ?? 'Unexpected error', url);
316
316
  }
317
317
  }
318
- function isAbortError(error) {
319
- return (isError(error) &&
320
- (error.name === 'AbortError' || error.name === 'TimeoutError'));
321
- }
322
318
  function isTimeoutError(error) {
323
319
  return isError(error) && error.name === 'TimeoutError';
324
320
  }
@@ -334,7 +330,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
334
330
  if (error instanceof FetchError)
335
331
  return error;
336
332
  const url = resolveErrorUrl(error, fallbackUrl);
337
- if (isAbortError(error)) {
333
+ if (isAbortError(error) || isTimeoutError(error)) {
338
334
  return isTimeoutError(error)
339
335
  ? createFetchError({ kind: 'timeout', timeout: timeoutMs }, url)
340
336
  : createFetchError({ kind: 'canceled' }, url);
@@ -386,7 +382,12 @@ class RedirectFollower {
386
382
  async fetchWithRedirects(url, init, maxRedirects) {
387
383
  let currentUrl = url;
388
384
  const redirectLimit = Math.max(0, maxRedirects);
385
+ const visited = new Set();
389
386
  for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
387
+ if (visited.has(currentUrl)) {
388
+ throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
389
+ }
390
+ visited.add(currentUrl);
390
391
  const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
391
392
  let ipAddress;
392
393
  if (this.preflight) {
@@ -405,9 +406,10 @@ class RedirectFollower {
405
406
  ...init,
406
407
  redirect: 'manual',
407
408
  };
409
+ let agent;
408
410
  if (ipAddress) {
409
411
  const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
410
- const agent = new Agent({
412
+ agent = new Agent({
411
413
  connect: {
412
414
  lookup: (hostname, options, callback) => {
413
415
  const family = isIP(ipAddress) === 6 ? 6 : 4;
@@ -428,25 +430,30 @@ class RedirectFollower {
428
430
  });
429
431
  fetchInit.dispatcher = agent;
430
432
  }
431
- const response = await this.fetchFn(currentUrl, fetchInit);
432
- if (!isRedirectStatus(response.status))
433
- return { response };
434
- if (redirectCount >= redirectLimit) {
433
+ try {
434
+ const response = await this.fetchFn(currentUrl, fetchInit);
435
+ if (!isRedirectStatus(response.status))
436
+ return { response };
437
+ if (redirectCount >= redirectLimit) {
438
+ cancelResponseBody(response);
439
+ throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
440
+ }
441
+ const location = this.getRedirectLocation(response, currentUrl);
435
442
  cancelResponseBody(response);
436
- throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
443
+ const nextUrl = this.resolveRedirectTarget(currentUrl, location);
444
+ const parsedNextUrl = new URL(nextUrl);
445
+ if (parsedNextUrl.protocol !== 'http:' &&
446
+ parsedNextUrl.protocol !== 'https:') {
447
+ throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
448
+ }
449
+ return {
450
+ response,
451
+ nextUrl,
452
+ };
437
453
  }
438
- const location = this.getRedirectLocation(response, currentUrl);
439
- cancelResponseBody(response);
440
- const nextUrl = this.resolveRedirectTarget(currentUrl, location);
441
- const parsedNextUrl = new URL(nextUrl);
442
- if (parsedNextUrl.protocol !== 'http:' &&
443
- parsedNextUrl.protocol !== 'https:') {
444
- throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
454
+ finally {
455
+ await agent?.close();
445
456
  }
446
- return {
447
- response,
448
- nextUrl,
449
- };
450
457
  }
451
458
  getRedirectLocation(response, currentUrl) {
452
459
  const location = response.headers.get('location');
@@ -694,16 +701,8 @@ async function decodeResponseIfNeeded(response, url, signal) {
694
701
  clearAbortListener();
695
702
  abortDecodePipeline();
696
703
  void decodedReader.cancel(error).catch(() => undefined);
697
- logDebug('Content-Encoding decode failed; using passthrough body', {
698
- url: redactUrl(url),
699
- encoding: encodingHeader ?? encodings.join(','),
700
- error: isError(error) ? error.message : String(error),
701
- });
702
- return new Response(passthroughBranch, {
703
- status: response.status,
704
- statusText: response.statusText,
705
- headers,
706
- });
704
+ void passthroughBranch.cancel().catch(() => undefined);
705
+ throw new FetchError(`Content-Encoding decode failed for ${redactUrl(url)}: ${isError(error) ? error.message : String(error)}`, url);
707
706
  }
708
707
  }
709
708
  class ResponseTextReader {
@@ -851,17 +850,16 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
851
850
  cancelResponseBody(response);
852
851
  throw responseError;
853
852
  }
854
- const decodedResponse = await decodeResponseIfNeeded(response, finalUrl, signal);
855
- const contentType = decodedResponse.headers.get('content-type');
853
+ const contentType = response.headers.get('content-type');
856
854
  assertSupportedContentType(contentType, finalUrl);
857
855
  const declaredEncoding = getCharsetFromContentType(contentType ?? null);
858
856
  if (mode === 'text') {
859
- const { text, size, truncated } = await reader.read(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
860
- telemetry.recordResponse(ctx, decodedResponse, size);
857
+ const { text, size, truncated } = await reader.read(response, finalUrl, maxBytes, signal, declaredEncoding);
858
+ telemetry.recordResponse(ctx, response, size);
861
859
  return { kind: 'text', text, size, truncated };
862
860
  }
863
- const { buffer, encoding, size, truncated } = await reader.readBuffer(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
864
- telemetry.recordResponse(ctx, decodedResponse, size);
861
+ const { buffer, encoding, size, truncated } = await reader.readBuffer(response, finalUrl, maxBytes, signal, declaredEncoding);
862
+ telemetry.recordResponse(ctx, response, size);
865
863
  return { kind: 'buffer', buffer, encoding, size, truncated };
866
864
  }
867
865
  function isReadableStreamLike(value) {
@@ -1087,8 +1085,9 @@ const DEFAULT_HEADERS = {
1087
1085
  'User-Agent': config.fetcher.userAgent,
1088
1086
  Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
1089
1087
  'Accept-Language': 'en-US,en;q=0.5',
1090
- 'Accept-Encoding': 'gzip, deflate, br',
1091
- Connection: 'keep-alive',
1088
+ // Accept-Encoding and Connection are forbidden Fetch API headers.
1089
+ // The undici-based globalThis.fetch manages content negotiation and
1090
+ // decompression transparently per the Fetch spec.
1092
1091
  };
1093
1092
  function buildHeaders() {
1094
1093
  return DEFAULT_HEADERS;
package/dist/lib/url.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { BlockList } from 'node:net';
2
- import { type config } from './core.js';
2
+ import { config } from './core.js';
3
3
  export declare class SafeDnsResolver {
4
4
  private readonly ipBlocker;
5
5
  private readonly security;
package/dist/lib/url.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import dns from 'node:dns';
2
2
  import { BlockList, isIP, SocketAddress } from 'node:net';
3
3
  import { domainToASCII } from 'node:url';
4
- import { logDebug } from './core.js';
4
+ import { config, logDebug } from './core.js';
5
5
  import { createErrorWithCode, isError, isSystemError } from './utils.js';
6
6
  const DNS_LOOKUP_TIMEOUT_MS = 5000;
7
7
  const CNAME_LOOKUP_MAX_DEPTH = 5;
@@ -76,7 +76,7 @@ export class SafeDnsResolver {
76
76
  if (isCloudMetadataHost(normalizedHostname)) {
77
77
  throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Cloud metadata endpoints are not allowed`, 'EBLOCKED');
78
78
  }
79
- if (process.env['ALLOW_LOCAL_FETCH'] !== 'true' &&
79
+ if (!isLocalFetchAllowed() &&
80
80
  this.ipBlocker.isBlockedIp(normalizedHostname)) {
81
81
  throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
82
82
  }
@@ -590,7 +590,7 @@ function isCloudMetadataHost(hostname) {
590
590
  return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
591
591
  }
592
592
  function isLocalFetchAllowed() {
593
- return process.env['ALLOW_LOCAL_FETCH'] === 'true';
593
+ return config.security.allowLocalFetch;
594
594
  }
595
595
  export class IpBlocker {
596
596
  security;
@@ -9,7 +9,7 @@ import { config } from '../lib/core.js';
9
9
  import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/core.js';
10
10
  import { isRawTextContentUrl } from '../lib/http.js';
11
11
  import { createAbortError, throwIfAborted } from '../lib/utils.js';
12
- import { FetchError, getErrorMessage } from '../lib/utils.js';
12
+ import { FetchError, getErrorMessage, toError } from '../lib/utils.js';
13
13
  import { isObject } from '../lib/utils.js';
14
14
  import { translateHtmlFragmentToMarkdown } from './html-translators.js';
15
15
  import { extractMetadata, extractMetadataFromHead, mergeMetadata, } from './metadata.js';
@@ -178,13 +178,22 @@ function trimUtf8Buffer(buffer, maxBytes) {
178
178
  return buffer.subarray(0, end);
179
179
  }
180
180
  function trimDanglingTagFragment(content) {
181
- const lastOpen = content.lastIndexOf('<');
182
- const lastClose = content.lastIndexOf('>');
181
+ let result = content;
182
+ // Trim dangling HTML entity (e.g. "&amp" cut before ";")
183
+ const lastAmp = result.lastIndexOf('&');
184
+ if (lastAmp !== -1 && lastAmp > result.length - 10) {
185
+ const tail = result.slice(lastAmp + 1);
186
+ if (!tail.includes(';') && /^[#a-zA-Z][a-zA-Z0-9]*$/.test(tail)) {
187
+ result = result.substring(0, lastAmp);
188
+ }
189
+ }
190
+ const lastOpen = result.lastIndexOf('<');
191
+ const lastClose = result.lastIndexOf('>');
183
192
  if (lastOpen > lastClose) {
184
- if (lastOpen === content.length - 1) {
185
- return content.substring(0, lastOpen);
193
+ if (lastOpen === result.length - 1) {
194
+ return result.substring(0, lastOpen);
186
195
  }
187
- const code = content.codePointAt(lastOpen + 1);
196
+ const code = result.codePointAt(lastOpen + 1);
188
197
  if (code !== undefined &&
189
198
  (code === 47 || // '/'
190
199
  code === 33 || // '!'
@@ -192,10 +201,10 @@ function trimDanglingTagFragment(content) {
192
201
  (code >= 65 && code <= 90) || // A-Z
193
202
  (code >= 97 && code <= 122)) // a-z
194
203
  ) {
195
- return content.substring(0, lastOpen);
204
+ return result.substring(0, lastOpen);
196
205
  }
197
206
  }
198
- return content;
207
+ return result;
199
208
  }
200
209
  function truncateHtml(html, inputTruncated = false) {
201
210
  const maxSize = config.constants.maxHtmlSize;
@@ -1047,6 +1056,8 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
1047
1056
  abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
1048
1057
  if (error instanceof FetchError)
1049
1058
  throw error;
1059
+ if (!(error instanceof Error))
1060
+ throw toError(error);
1050
1061
  const message = getErrorMessage(error);
1051
1062
  logWarn('Transform worker failed; falling back to in-process', {
1052
1063
  url: redactUrl(url),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.5.0",
3
+ "version": "1.5.1",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",