npm - @j0hanz/fetch-url-mcp - Versions diffs - 1.1.2 → 1.1.3 - Mend

@j0hanz/fetch-url-mcp 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/AGENTS.md CHANGED Viewed

@@ -34,7 +34,7 @@
 - `tests/` — Unit/integration tests (46+ test files) using Node.js built-in test runner
 - `scripts/` — Build & test orchestration (`tasks.mjs`)
 - `assets/` — Server icon (`logo.svg`)
-- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → test → build → publish to npm, MCP Registry, Docker)
+- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → type-check:tests → test → build → publish to npm, MCP Registry, Docker)
 > Ignore: `dist/`, `node_modules/`, `coverage/`, `.cache/`, `.tsbuildinfo`
@@ -49,6 +49,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
 - **Start:** `npm run start` → `node dist/index.js` (see `package.json`)
 - **Build:** `npm run build` → `node scripts/tasks.mjs build` — cleans `dist/`, compiles TS, validates `instructions.md`, copies assets, sets executable bit (see `scripts/tasks.mjs`, `package.json`)
 - **Type-check:** `npm run type-check` → `tsc -p tsconfig.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
+- **Type-check (tests):** `npm run type-check:tests` → build output + `tsc -p tsconfig.tests.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
 - **Lint:** `npm run lint` → `eslint .` (see `package.json`, `.github/workflows/release.yml`)
 - **Lint (fix):** `npm run lint:fix` → `eslint . --fix` (see `package.json`)
 - **Format:** `npm run format` → `prettier --write .` (see `package.json`)
@@ -135,7 +136,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
   - Config values temporarily overridden per test with `try/finally` cleanup (observed in `tests/fetch-url-tool.test.ts`)
   - Worker pool shutdown in `after()` hooks for clean teardown (observed in `tests/fetch-url-tool.test.ts`)
   - No external services (DB/containers) required for tests
-- **CI validation order:** `lint` → `type-check` → `test` → `build` (see `.github/workflows/release.yml`)
+- **CI validation order:** `lint` → `type-check` → `type-check:tests` → `test` → `build` (see `.github/workflows/release.yml`)
 ## 7) Common Pitfalls (Verified Only)

package/dist/fetch-content.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export declare function getCharsetFromContentType(contentType: string | null): string | undefined;
+export declare function decodeBuffer(buffer: Uint8Array, encoding: string): string;
+export declare function resolveEncoding(declaredEncoding: string | undefined, sample: Uint8Array): string | undefined;
+export declare function isBinaryContent(buffer: Uint8Array, encoding?: string): boolean;

package/dist/fetch-content.js ADDED Viewed

@@ -0,0 +1,163 @@
+import { Buffer } from 'node:buffer';
+export function getCharsetFromContentType(contentType) {
+    if (!contentType)
+        return undefined;
+    const match = /charset=([^;]+)/i.exec(contentType);
+    const charsetGroup = match?.[1];
+    if (!charsetGroup)
+        return undefined;
+    let charset = charsetGroup.trim();
+    if (charset.startsWith('"') && charset.endsWith('"')) {
+        charset = charset.slice(1, -1);
+    }
+    return charset.trim();
+}
+function createDecoder(encoding) {
+    if (!encoding)
+        return new TextDecoder('utf-8');
+    try {
+        return new TextDecoder(encoding);
+    }
+    catch {
+        return new TextDecoder('utf-8');
+    }
+}
+export function decodeBuffer(buffer, encoding) {
+    return createDecoder(encoding).decode(buffer);
+}
+function normalizeEncodingLabel(encoding) {
+    return encoding?.trim().toLowerCase() ?? '';
+}
+function isUnicodeWideEncoding(encoding) {
+    const normalized = normalizeEncodingLabel(encoding);
+    return (normalized.startsWith('utf-16') ||
+        normalized.startsWith('utf-32') ||
+        normalized === 'ucs-2' ||
+        normalized === 'unicodefffe' ||
+        normalized === 'unicodefeff');
+}
+const BOM_SIGNATURES = [
+    // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
+    { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
+    { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
+    { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
+    { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
+    { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
+];
+function startsWithBytes(buffer, signature) {
+    const sigLen = signature.length;
+    if (buffer.length < sigLen)
+        return false;
+    for (let i = 0; i < sigLen; i += 1) {
+        if (buffer[i] !== signature[i])
+            return false;
+    }
+    return true;
+}
+function detectBomEncoding(buffer) {
+    for (const { bytes, encoding } of BOM_SIGNATURES) {
+        if (startsWithBytes(buffer, bytes))
+            return encoding;
+    }
+    return undefined;
+}
+function readQuotedValue(input, startIndex) {
+    const first = input[startIndex];
+    if (!first)
+        return '';
+    const quoted = first === '"' || first === "'";
+    if (quoted) {
+        const end = input.indexOf(first, startIndex + 1);
+        return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
+    }
+    const tail = input.slice(startIndex);
+    const stop = tail.search(/[\s/>]/);
+    return (stop === -1 ? tail : tail.slice(0, stop)).trim();
+}
+function extractHtmlCharset(headSnippet) {
+    const lower = headSnippet.toLowerCase();
+    const charsetToken = 'charset=';
+    const charsetIdx = lower.indexOf(charsetToken);
+    if (charsetIdx === -1)
+        return undefined;
+    const valueStart = charsetIdx + charsetToken.length;
+    const charset = readQuotedValue(headSnippet, valueStart);
+    return charset ? charset.toLowerCase() : undefined;
+}
+function extractXmlEncoding(headSnippet) {
+    const lower = headSnippet.toLowerCase();
+    const xmlStart = lower.indexOf('<?xml');
+    if (xmlStart === -1)
+        return undefined;
+    const xmlEnd = lower.indexOf('?>', xmlStart);
+    const declaration = xmlEnd === -1
+        ? headSnippet.slice(xmlStart)
+        : headSnippet.slice(xmlStart, xmlEnd + 2);
+    const declarationLower = declaration.toLowerCase();
+    const encodingToken = 'encoding=';
+    const encodingIdx = declarationLower.indexOf(encodingToken);
+    if (encodingIdx === -1)
+        return undefined;
+    const valueStart = encodingIdx + encodingToken.length;
+    const encoding = readQuotedValue(declaration, valueStart);
+    return encoding ? encoding.toLowerCase() : undefined;
+}
+function detectHtmlDeclaredEncoding(buffer) {
+    const scanSize = Math.min(buffer.length, 8_192);
+    if (scanSize === 0)
+        return undefined;
+    const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
+    return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
+}
+export function resolveEncoding(declaredEncoding, sample) {
+    const bomEncoding = detectBomEncoding(sample);
+    if (bomEncoding)
+        return bomEncoding;
+    if (declaredEncoding)
+        return declaredEncoding;
+    return detectHtmlDeclaredEncoding(sample);
+}
+const BINARY_SIGNATURES = [
+    [0x25, 0x50, 0x44, 0x46],
+    [0x89, 0x50, 0x4e, 0x47],
+    [0x47, 0x49, 0x46, 0x38],
+    [0xff, 0xd8, 0xff],
+    [0x52, 0x49, 0x46, 0x46],
+    [0x42, 0x4d],
+    [0x49, 0x49, 0x2a, 0x00],
+    [0x4d, 0x4d, 0x00, 0x2a],
+    [0x00, 0x00, 0x01, 0x00],
+    [0x50, 0x4b, 0x03, 0x04],
+    [0x1f, 0x8b],
+    [0x42, 0x5a, 0x68],
+    [0x52, 0x61, 0x72, 0x21],
+    [0x37, 0x7a, 0xbc, 0xaf],
+    [0x7f, 0x45, 0x4c, 0x46],
+    [0x4d, 0x5a],
+    [0xcf, 0xfa, 0xed, 0xfe],
+    [0x00, 0x61, 0x73, 0x6d],
+    [0x1a, 0x45, 0xdf, 0xa3],
+    [0x66, 0x74, 0x79, 0x70],
+    [0x46, 0x4c, 0x56],
+    [0x49, 0x44, 0x33],
+    [0xff, 0xfb],
+    [0xff, 0xfa],
+    [0x4f, 0x67, 0x67, 0x53],
+    [0x66, 0x4c, 0x61, 0x43],
+    [0x4d, 0x54, 0x68, 0x64],
+    [0x77, 0x4f, 0x46, 0x46],
+    [0x00, 0x01, 0x00, 0x00],
+    [0x4f, 0x54, 0x54, 0x4f],
+    [0x53, 0x51, 0x4c, 0x69],
+];
+function hasNullByte(buffer, limit) {
+    const checkLen = Math.min(buffer.length, limit);
+    return buffer.subarray(0, checkLen).includes(0x00);
+}
+export function isBinaryContent(buffer, encoding) {
+    for (const signature of BINARY_SIGNATURES) {
+        if (startsWithBytes(buffer, signature))
+            return true;
+    }
+    return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
+}

package/dist/fetch-stream.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { Readable } from 'node:stream';
+import type { ReadableStream as NodeReadableStream } from 'node:stream/web';
+export declare function toNodeReadableStream(stream: ReadableStream<Uint8Array>, url: string, stage: string): NodeReadableStream<Uint8Array>;
+export declare function toWebReadableStream(stream: Readable, url: string, stage: string): ReadableStream<Uint8Array>;

package/dist/fetch-stream.js ADDED Viewed

@@ -0,0 +1,28 @@
+import { Readable } from 'node:stream';
+import { FetchError } from './errors.js';
+import { isObject } from './type-guards.js';
+function isReadableStreamLike(value) {
+    if (!isObject(value))
+        return false;
+    return (typeof value['getReader'] === 'function' &&
+        typeof value['cancel'] === 'function' &&
+        typeof value['tee'] === 'function' &&
+        typeof value['locked'] === 'boolean');
+}
+function assertReadableStreamLike(stream, url, stage) {
+    if (isReadableStreamLike(stream))
+        return;
+    throw new FetchError('Invalid response stream', url, 500, {
+        reason: 'invalid_stream',
+        stage,
+    });
+}
+export function toNodeReadableStream(stream, url, stage) {
+    assertReadableStreamLike(stream, url, stage);
+    return stream;
+}
+export function toWebReadableStream(stream, url, stage) {
+    const converted = Readable.toWeb(stream);
+    assertReadableStreamLike(converted, url, stage);
+    return converted;
+}

package/dist/fetch.js CHANGED Viewed

@@ -10,6 +10,8 @@ import { finished, pipeline } from 'node:stream/promises';
 import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
 import { config } from './config.js';
 import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
+import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
+import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
 import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
 import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
 import { isError, isObject } from './type-guards.js';
@@ -26,23 +28,6 @@ const defaultRedactor = {
     redact: redactUrl,
 };
 const defaultFetch = (input, init) => globalThis.fetch(input, init);
-function assertReadableStreamLike(stream, url, stage) {
-    if (isObject(stream) && typeof stream['getReader'] === 'function')
-        return;
-    throw new FetchError('Invalid response stream', url, 500, {
-        reason: 'invalid_stream',
-        stage,
-    });
-}
-function toNodeReadableStream(stream, url, stage) {
-    assertReadableStreamLike(stream, url, stage);
-    return stream;
-}
-function toWebReadableStream(stream, url, stage) {
-    const converted = Readable.toWeb(stream);
-    assertReadableStreamLike(converted, url, stage);
-    return converted;
-}
 class IpBlocker {
     security;
     blockList = createDefaultBlockList();
@@ -871,168 +856,6 @@ class RedirectFollower {
         }
     }
 }
-function getCharsetFromContentType(contentType) {
-    if (!contentType)
-        return undefined;
-    const match = /charset=([^;]+)/i.exec(contentType);
-    const charsetGroup = match?.[1];
-    if (!charsetGroup)
-        return undefined;
-    let charset = charsetGroup.trim();
-    if (charset.startsWith('"') && charset.endsWith('"')) {
-        charset = charset.slice(1, -1);
-    }
-    return charset.trim();
-}
-function createDecoder(encoding) {
-    if (!encoding)
-        return new TextDecoder('utf-8');
-    try {
-        return new TextDecoder(encoding);
-    }
-    catch {
-        return new TextDecoder('utf-8');
-    }
-}
-function decodeBuffer(buffer, encoding) {
-    return createDecoder(encoding).decode(buffer);
-}
-function normalizeEncodingLabel(encoding) {
-    return encoding?.trim().toLowerCase() ?? '';
-}
-function isUnicodeWideEncoding(encoding) {
-    const normalized = normalizeEncodingLabel(encoding);
-    return (normalized.startsWith('utf-16') ||
-        normalized.startsWith('utf-32') ||
-        normalized === 'ucs-2' ||
-        normalized === 'unicodefffe' ||
-        normalized === 'unicodefeff');
-}
-const BOM_SIGNATURES = [
-    // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
-    { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
-    { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
-    { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
-    { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
-    { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
-];
-function detectBomEncoding(buffer) {
-    for (const { bytes, encoding } of BOM_SIGNATURES) {
-        if (startsWithBytes(buffer, bytes))
-            return encoding;
-    }
-    return undefined;
-}
-function readQuotedValue(input, startIndex) {
-    const first = input[startIndex];
-    if (!first)
-        return '';
-    const quoted = first === '"' || first === "'";
-    if (quoted) {
-        const end = input.indexOf(first, startIndex + 1);
-        return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
-    }
-    const tail = input.slice(startIndex);
-    const stop = tail.search(/[\s/>]/);
-    return (stop === -1 ? tail : tail.slice(0, stop)).trim();
-}
-function extractHtmlCharset(headSnippet) {
-    const lower = headSnippet.toLowerCase();
-    const charsetToken = 'charset=';
-    const charsetIdx = lower.indexOf(charsetToken);
-    if (charsetIdx === -1)
-        return undefined;
-    const valueStart = charsetIdx + charsetToken.length;
-    const charset = readQuotedValue(headSnippet, valueStart);
-    return charset ? charset.toLowerCase() : undefined;
-}
-function extractXmlEncoding(headSnippet) {
-    const lower = headSnippet.toLowerCase();
-    const xmlStart = lower.indexOf('<?xml');
-    if (xmlStart === -1)
-        return undefined;
-    const xmlEnd = lower.indexOf('?>', xmlStart);
-    const declaration = xmlEnd === -1
-        ? headSnippet.slice(xmlStart)
-        : headSnippet.slice(xmlStart, xmlEnd + 2);
-    const declarationLower = declaration.toLowerCase();
-    const encodingToken = 'encoding=';
-    const encodingIdx = declarationLower.indexOf(encodingToken);
-    if (encodingIdx === -1)
-        return undefined;
-    const valueStart = encodingIdx + encodingToken.length;
-    const encoding = readQuotedValue(declaration, valueStart);
-    return encoding ? encoding.toLowerCase() : undefined;
-}
-function detectHtmlDeclaredEncoding(buffer) {
-    const scanSize = Math.min(buffer.length, 8_192);
-    if (scanSize === 0)
-        return undefined;
-    const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
-    return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
-}
-function resolveEncoding(declaredEncoding, sample) {
-    const bomEncoding = detectBomEncoding(sample);
-    if (bomEncoding)
-        return bomEncoding;
-    if (declaredEncoding)
-        return declaredEncoding;
-    return detectHtmlDeclaredEncoding(sample);
-}
-const BINARY_SIGNATURES = [
-    [0x25, 0x50, 0x44, 0x46],
-    [0x89, 0x50, 0x4e, 0x47],
-    [0x47, 0x49, 0x46, 0x38],
-    [0xff, 0xd8, 0xff],
-    [0x52, 0x49, 0x46, 0x46],
-    [0x42, 0x4d],
-    [0x49, 0x49, 0x2a, 0x00],
-    [0x4d, 0x4d, 0x00, 0x2a],
-    [0x00, 0x00, 0x01, 0x00],
-    [0x50, 0x4b, 0x03, 0x04],
-    [0x1f, 0x8b],
-    [0x42, 0x5a, 0x68],
-    [0x52, 0x61, 0x72, 0x21],
-    [0x37, 0x7a, 0xbc, 0xaf],
-    [0x7f, 0x45, 0x4c, 0x46],
-    [0x4d, 0x5a],
-    [0xcf, 0xfa, 0xed, 0xfe],
-    [0x00, 0x61, 0x73, 0x6d],
-    [0x1a, 0x45, 0xdf, 0xa3],
-    [0x66, 0x74, 0x79, 0x70],
-    [0x46, 0x4c, 0x56],
-    [0x49, 0x44, 0x33],
-    [0xff, 0xfb],
-    [0xff, 0xfa],
-    [0x4f, 0x67, 0x67, 0x53],
-    [0x66, 0x4c, 0x61, 0x43],
-    [0x4d, 0x54, 0x68, 0x64],
-    [0x77, 0x4f, 0x46, 0x46],
-    [0x00, 0x01, 0x00, 0x00],
-    [0x4f, 0x54, 0x54, 0x4f],
-    [0x53, 0x51, 0x4c, 0x69],
-];
-function startsWithBytes(buffer, signature) {
-    const sigLen = signature.length;
-    if (buffer.length < sigLen)
-        return false;
-    for (let i = 0; i < sigLen; i += 1) {
-        if (buffer[i] !== signature[i])
-            return false;
-    }
-    return true;
-}
-function hasNullByte(buffer, limit) {
-    const checkLen = Math.min(buffer.length, limit);
-    return buffer.subarray(0, checkLen).includes(0x00);
-}
-function isBinaryContent(buffer, encoding) {
-    for (const signature of BINARY_SIGNATURES) {
-        if (startsWithBytes(buffer, signature))
-            return true;
-    }
-    return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
-}
 class ResponseTextReader {
     async read(response, url, maxBytes, signal, encoding) {
         const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@j0hanz/fetch-url-mcp",
-  "version": "1.1.2",
+  "version": "1.1.3",
   "mcpName": "io.github.j0hanz/fetch-url-mcp",
   "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
   "type": "module",
@@ -52,6 +52,7 @@
     "start": "node dist/index.js",
     "format": "prettier --write .",
     "type-check": "node scripts/tasks.mjs type-check",
+    "type-check:tests": "node scripts/tasks.mjs type-check:tests",
     "type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
     "type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
     "lint": "eslint .",