npm - @j0hanz/superfetch - Versions diffs - 2.2.2 → 2.4.0 - Mend

@j0hanz/superfetch 2.2.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/README.md +358 -363
package/dist/assets/logo.svg +24835 -0
package/dist/cache.d.ts +0 -1
package/dist/cache.js +71 -29
package/dist/config.d.ts +2 -1
package/dist/config.js +11 -7
package/dist/crypto.d.ts +0 -1
package/dist/crypto.js +0 -1
package/dist/dom-noise-removal.d.ts +0 -1
package/dist/dom-noise-removal.js +50 -45
package/dist/errors.d.ts +0 -1
package/dist/errors.js +0 -1
package/dist/fetch.d.ts +0 -1
package/dist/fetch.js +61 -54
package/dist/host-normalization.d.ts +1 -0
package/dist/host-normalization.js +47 -0
package/dist/http-native.d.ts +0 -1
package/dist/http-native.js +92 -28
package/dist/index.d.ts +0 -1
package/dist/index.js +0 -1
package/dist/instructions.md +41 -41
package/dist/json.d.ts +0 -1
package/dist/json.js +0 -1
package/dist/language-detection.d.ts +0 -1
package/dist/language-detection.js +10 -2
package/dist/markdown-cleanup.d.ts +6 -13
package/dist/markdown-cleanup.js +252 -34
package/dist/mcp-validator.d.ts +14 -0
package/dist/mcp-validator.js +22 -0
package/dist/mcp.d.ts +0 -1
package/dist/mcp.js +20 -10
package/dist/observability.d.ts +2 -1
package/dist/observability.js +30 -3
package/dist/server-tuning.d.ts +9 -0
package/dist/server-tuning.js +30 -0
package/dist/{http-utils.d.ts → session.d.ts} +0 -25
package/dist/{http-utils.js → session.js} +11 -104
package/dist/tools.d.ts +5 -4
package/dist/tools.js +46 -41
package/dist/transform-types.d.ts +38 -1
package/dist/transform-types.js +0 -1
package/dist/transform.d.ts +12 -7
package/dist/transform.js +205 -344
package/dist/type-guards.d.ts +0 -1
package/dist/type-guards.js +0 -1
package/dist/workers/transform-worker.d.ts +0 -1
package/dist/workers/transform-worker.js +29 -19
package/package.json +84 -85
package/dist/cache.d.ts.map +0 -1
package/dist/cache.js.map +0 -1
package/dist/config.d.ts.map +0 -1
package/dist/config.js.map +0 -1
package/dist/crypto.d.ts.map +0 -1
package/dist/crypto.js.map +0 -1
package/dist/dom-noise-removal.d.ts.map +0 -1
package/dist/dom-noise-removal.js.map +0 -1
package/dist/errors.d.ts.map +0 -1
package/dist/errors.js.map +0 -1
package/dist/fetch.d.ts.map +0 -1
package/dist/fetch.js.map +0 -1
package/dist/http-native.d.ts.map +0 -1
package/dist/http-native.js.map +0 -1
package/dist/http-utils.d.ts.map +0 -1
package/dist/http-utils.js.map +0 -1
package/dist/index.d.ts.map +0 -1
package/dist/index.js.map +0 -1
package/dist/json.d.ts.map +0 -1
package/dist/json.js.map +0 -1
package/dist/language-detection.d.ts.map +0 -1
package/dist/language-detection.js.map +0 -1
package/dist/markdown-cleanup.d.ts.map +0 -1
package/dist/markdown-cleanup.js.map +0 -1
package/dist/mcp.d.ts.map +0 -1
package/dist/mcp.js.map +0 -1
package/dist/observability.d.ts.map +0 -1
package/dist/observability.js.map +0 -1
package/dist/tools.d.ts.map +0 -1
package/dist/tools.js.map +0 -1
package/dist/transform-types.d.ts.map +0 -1
package/dist/transform-types.js.map +0 -1
package/dist/transform.d.ts.map +0 -1
package/dist/transform.js.map +0 -1
package/dist/type-guards.d.ts.map +0 -1
package/dist/type-guards.js.map +0 -1
package/dist/workers/transform-worker.d.ts.map +0 -1
package/dist/workers/transform-worker.js.map +0 -1

package/dist/http-native.js CHANGED Viewed

@@ -5,13 +5,51 @@ import { URL, URLSearchParams } from 'node:url';
 import { InvalidTokenError, ServerError, } from '@modelcontextprotocol/sdk/server/auth/errors.js';
 import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
 import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
-import { handleDownload } from './cache.js';
-import { config, enableHttpMode } from './config.js';
+import { keys as cacheKeys, handleDownload } from './cache.js';
+import { config, enableHttpMode, serverVersion } from './config.js';
 import { timingSafeEqualUtf8 } from './crypto.js';
-import { acceptsEventStream, applyHttpServerTuning, composeCloseHandlers, createSessionStore, createSlotTracker, drainConnectionsOnShutdown, ensureSessionCapacity, isJsonRpcBatchRequest, isMcpRequestBody, normalizeHost, reserveSessionSlot, startSessionCleanupLoop, } from './http-utils.js';
+import { normalizeHost } from './host-normalization.js';
+import { acceptsEventStream, isJsonRpcBatchRequest, isMcpRequestBody, } from './mcp-validator.js';
 import { createMcpServer } from './mcp.js';
 import { logError, logInfo, logWarn } from './observability.js';
+import { applyHttpServerTuning, drainConnectionsOnShutdown, } from './server-tuning.js';
+import { composeCloseHandlers, createSessionStore, createSlotTracker, ensureSessionCapacity, reserveSessionSlot, startSessionCleanupLoop, } from './session.js';
+import { getTransformPoolStats } from './transform.js';
 import { isObject } from './type-guards.js';
+function createTransportAdapter(transportImpl) {
+    const noopOnClose = () => { };
+    const noopOnError = () => { };
+    const noopOnMessage = () => { };
+    let oncloseHandler = noopOnClose;
+    let onerrorHandler = noopOnError;
+    let onmessageHandler = noopOnMessage;
+    return {
+        start: () => transportImpl.start(),
+        send: (message, options) => transportImpl.send(message, options),
+        close: () => transportImpl.close(),
+        get onclose() {
+            return oncloseHandler;
+        },
+        set onclose(handler) {
+            oncloseHandler = handler;
+            transportImpl.onclose = handler;
+        },
+        get onerror() {
+            return onerrorHandler;
+        },
+        set onerror(handler) {
+            onerrorHandler = handler;
+            transportImpl.onerror = handler;
+        },
+        get onmessage() {
+            return onmessageHandler;
+        },
+        set onmessage(handler) {
+            onmessageHandler = handler;
+            transportImpl.onmessage = handler;
+        },
+    };
+}
 function shimResponse(res) {
     const shim = res;
     shim.status = function (code) {
@@ -144,26 +182,26 @@ function resolveOriginHost(origin) {
         return null;
     }
 }
+function rejectHostRequest(res, status, message) {
+    res.status(status).json({ error: message });
+    return false;
+}
 function validateHostAndOrigin(req, res) {
     const host = resolveHostHeader(req);
     if (!host) {
-        res.status(400).json({ error: 'Missing or invalid Host header' });
-        return false;
+        return rejectHostRequest(res, 400, 'Missing or invalid Host header');
     }
     if (!ALLOWED_HOSTS.has(host)) {
-        res.status(403).json({ error: 'Host not allowed' });
-        return false;
+        return rejectHostRequest(res, 403, 'Host not allowed');
     }
     const originHeader = getHeaderValue(req, 'origin');
     if (originHeader) {
         const originHost = resolveOriginHost(originHeader);
         if (!originHost) {
-            res.status(403).json({ error: 'Invalid Origin header' });
-            return false;
+            return rejectHostRequest(res, 403, 'Invalid Origin header');
         }
         if (!ALLOWED_HOSTS.has(originHost)) {
-            res.status(403).json({ error: 'Origin not allowed' });
-            return false;
+            return rejectHostRequest(res, 403, 'Origin not allowed');
         }
     }
     return true;
@@ -318,24 +356,35 @@ async function verifyWithIntrospection(token) {
         throw new InvalidTokenError('Token is inactive');
     return buildIntrospectionAuthInfo(token, payload);
 }
+function resolveBearerToken(authHeader) {
+    const [type, token] = authHeader.split(' ');
+    if (type !== 'Bearer' || !token) {
+        throw new InvalidTokenError('Invalid Authorization header format');
+    }
+    return token;
+}
+function authenticateWithToken(token) {
+    return config.auth.mode === 'oauth'
+        ? verifyWithIntrospection(token)
+        : Promise.resolve(verifyStaticToken(token));
+}
+function authenticateWithApiKey(req) {
+    const apiKey = getHeaderValue(req, 'x-api-key');
+    if (apiKey && config.auth.mode === 'static') {
+        return verifyStaticToken(apiKey);
+    }
+    if (apiKey && config.auth.mode === 'oauth') {
+        throw new InvalidTokenError('X-API-Key not supported for OAuth');
+    }
+    throw new InvalidTokenError('Missing Authorization header');
+}
 async function authenticate(req) {
     const authHeader = req.headers.authorization;
     if (!authHeader) {
-        const apiKey = getHeaderValue(req, 'x-api-key');
-        if (apiKey && config.auth.mode === 'static') {
-            return verifyStaticToken(apiKey);
-        }
-        if (apiKey && config.auth.mode === 'oauth') {
-            throw new InvalidTokenError('X-API-Key not supported for OAuth');
-        }
-        throw new InvalidTokenError('Missing Authorization header');
+        return authenticateWithApiKey(req);
     }
-    const [type, token] = authHeader.split(' ');
-    if (type !== 'Bearer' || !token)
-        throw new InvalidTokenError('Invalid Authorization header format');
-    if (config.auth.mode === 'oauth')
-        return verifyWithIntrospection(token);
-    return verifyStaticToken(token);
+    const token = resolveBearerToken(authHeader);
+    return authenticateWithToken(token);
 }
 // --- MCP Routes ---
 function sendError(res, code, message, status = 400, id = null) {
@@ -394,7 +443,8 @@ async function createNewSession(store, mcpServer, res, requestId) {
             tracker.releaseSlot();
     };
     try {
-        await mcpServer.connect(transportImpl);
+        const transport = createTransportAdapter(transportImpl);
+        await mcpServer.connect(transport);
     }
     catch (err) {
         clearTimeout(initTimeout);
@@ -531,7 +581,22 @@ async function dispatchRequest(req, res, url, ctx) {
     const { method } = req;
     try {
         if (method === 'GET' && path === '/health') {
-            res.status(200).json({ status: 'ok' });
+            const poolStats = getTransformPoolStats();
+            res.status(200).json({
+                status: 'ok',
+                version: serverVersion,
+                uptime: Math.floor(process.uptime()),
+                timestamp: new Date().toISOString(),
+                stats: {
+                    activeSessions: ctx.store.size(),
+                    cacheKeys: cacheKeys().length,
+                    workerPool: poolStats ?? {
+                        queueDepth: 0,
+                        activeWorkers: 0,
+                        capacity: 0,
+                    },
+                },
+            });
             return;
         }
         if (!(await authenticateRequest(req, res))) {
@@ -642,4 +707,3 @@ async function handleRequest(rawReq, rawRes, rateLimiter, ctx) {
     // 5. Routing
     await dispatchRequest(req, res, url, ctx);
 }
-//# sourceMappingURL=http-native.js.map

package/dist/index.d.ts CHANGED Viewed

@@ -1,3 +1,2 @@
 #!/usr/bin/env node
 export {};
-//# sourceMappingURL=index.d.ts.map

package/dist/index.js CHANGED Viewed

@@ -52,4 +52,3 @@ catch (error) {
     process.stderr.write(`Failed to start server: ${message}\n`);
     process.exit(1);
 }
-//# sourceMappingURL=index.js.map

package/dist/instructions.md CHANGED Viewed

@@ -1,41 +1,41 @@
-# superFetch Instructions
-> Guidance for the Agent: These instructions are available as a resource (`internal://instructions`) or prompt (`get-help`). Load them when you are unsure about tool usage.
-## 1. Core Capability
-- **Domain:** Fetch public http(s) URLs, extract readable content, and return clean Markdown.
-- **Primary Resources:** `fetch-url` output (`markdown`, `title`, `url`) and cache resources (`superfetch://cache/markdown/{urlHash}`).
-## 2. The "Golden Path" Workflows (Critical)
-_Describe the standard order of operations using ONLY tools that exist._
-### Workflow A: Fetch and Read
-1. Call `fetch-url` with `url`.
-2. Read `structuredContent.markdown` and `structuredContent.title` from the result.
-3. If content is truncated (look for `...[truncated]`), follow the returned `resource_link` URI.
-   > Constraint: Never guess resource URIs. Use the returned `resource_link` or list resources first.
-### Workflow B: Retrieve Cached Content
-1. List resources to find available cached pages (`superfetch://cache/...`).
-2. Read the specific `superfetch://cache/markdown/{urlHash}` URI.
-## 3. Tool Nuances & Gotchas
-_Do NOT repeat JSON schema. Focus on behavior and pitfalls._
-- **`fetch-url`**
-  - **Purpose:** Fetches a webpage and converts it to clean Markdown format.
-  - **Inputs:** `url` (Must be public http/https. Private patterns like localhost/127.0.0.1 are blocked).
-  - **Side effects:** Open world network request; writes to internal LRU cache.
-  - **Latency/limits:** Network-bound. Large content exceeds inline limits and returns a `resource_link`.
-  - **Common failure modes:** `VALIDATION_ERROR` (private/blocked URL), `FETCH_ERROR` (network timeout/404).
-## 4. Error Handling Strategy
-- **`VALIDATION_ERROR`**: Ensure the URL is valid and publicly accessible.
-- **`FETCH_ERROR`**: Retry once. If persistent, the site may be blocking automated requests.
-- **Truncation**: If `isError` is false but content ends in `...[truncated]`, you MUST read the provided `resource_link` URI to get the full markdown.
+# superFetch Instructions
+> Guidance for the Agent: These instructions are available as a resource (`internal://instructions`) or prompt (`get-help`). Load them when you are unsure about tool usage.
+## 1. Core Capability
+- **Domain:** Fetch public http(s) URLs, extract readable content, and return clean Markdown.
+- **Primary Resources:** `fetch-url` output (`markdown`, `title`, `url`) and cache resources (`superfetch://cache/markdown/{urlHash}`).
+## 2. The "Golden Path" Workflows (Critical)
+_Describe the standard order of operations using ONLY tools that exist._
+### Workflow A: Fetch and Read
+1. Call `fetch-url` with `url`.
+2. Read `structuredContent.markdown` and `structuredContent.title` from the result.
+3. If content is truncated (look for `...[truncated]`), follow the returned `resource_link` URI.
+   > Constraint: Never guess resource URIs. Use the returned `resource_link` or list resources first.
+### Workflow B: Retrieve Cached Content
+1. List resources to find available cached pages (`superfetch://cache/...`).
+2. Read the specific `superfetch://cache/markdown/{urlHash}` URI.
+## 3. Tool Nuances & Gotchas
+_Do NOT repeat JSON schema. Focus on behavior and pitfalls._
+- **`fetch-url`**
+  - **Purpose:** Fetches a webpage and converts it to clean Markdown format.
+  - **Inputs:** `url` (Must be public http/https. Private patterns like localhost/127.0.0.1 are blocked).
+  - **Side effects:** Open world network request; writes to internal LRU cache.
+  - **Latency/limits:** Network-bound. Large content exceeds inline limits and returns a `resource_link`.
+  - **Common failure modes:** `VALIDATION_ERROR` (private/blocked URL), `FETCH_ERROR` (network timeout/404).
+## 4. Error Handling Strategy
+- **`VALIDATION_ERROR`**: Ensure the URL is valid and publicly accessible.
+- **`FETCH_ERROR`**: Retry once. If persistent, the site may be blocking automated requests.
+- **Truncation**: If `isError` is false but content ends in `...[truncated]`, you MUST read the provided `resource_link` URI to get the full markdown.

package/dist/json.d.ts CHANGED Viewed

	@@ -1,2 +1 @@
1 1	export declare function stableStringify(obj: unknown, depth?: number, seen?: WeakSet<object>): string;
2	- //# sourceMappingURL=json.d.ts.map

package/dist/json.js CHANGED Viewed

@@ -27,4 +27,3 @@ export function stableStringify(obj, depth = 0, seen = new WeakSet()) {
     const processed = processValue(obj, depth, seen);
     return JSON.stringify(processed);
 }
-//# sourceMappingURL=json.js.map

package/dist/language-detection.d.ts CHANGED Viewed

@@ -10,4 +10,3 @@ export declare function detectLanguageFromCode(code: string): string | undefined
  * Resolve language from HTML attributes (class name and data-language).
  */
 export declare function resolveLanguageFromAttributes(className: string, dataLang: string): string | undefined;
-//# sourceMappingURL=language-detection.d.ts.map

package/dist/language-detection.js CHANGED Viewed

@@ -6,7 +6,16 @@
  * Check if source contains the given word as a standalone word (not part of another word).
  */
 function containsWord(source, word) {
-    return new RegExp(`\\b${word}\\b`).test(source);
+    return getWordRegex(word).test(source);
+}
+const WORD_REGEX_CACHE = new Map();
+function getWordRegex(word) {
+    const cached = WORD_REGEX_CACHE.get(word);
+    if (cached)
+        return cached;
+    const compiled = new RegExp(`\\b${word}\\b`);
+    WORD_REGEX_CACHE.set(word, compiled);
+    return compiled;
 }
 /**
  * Extract language from class name (e.g., "language-typescript", "lang-js", "hljs javascript").
@@ -280,4 +289,3 @@ export function resolveLanguageFromAttributes(className, dataLang) {
     const classMatch = extractLanguageFromClassName(className);
     return classMatch ?? resolveLanguageFromDataAttribute(dataLang);
 }
-//# sourceMappingURL=language-detection.js.map

package/dist/markdown-cleanup.d.ts CHANGED Viewed

@@ -1,19 +1,12 @@
-/**
- * Markdown cleanup utilities for post-processing converted content.
- *
- * Goals:
- * - Never mutate fenced code blocks (``` / ~~~) content.
- * - Keep rules localized and readable.
- * - Avoid multi-pass regexes that accidentally hit code blocks.
- */
-/**
- * Clean up common markdown artifacts and formatting issues.
- * IMPORTANT: All rules are applied ONLY outside fenced code blocks.
- */
+import type { MetadataBlock } from './transform-types.js';
 export declare function cleanupMarkdownArtifacts(content: string): string;
+export declare function extractTitleFromRawMarkdown(content: string): string | undefined;
+export declare function addSourceToMarkdown(content: string, url: string): string;
+export declare function isRawTextContent(content: string): boolean;
+export declare function isLikelyHtmlContent(content: string): boolean;
+export declare function buildMetadataFooter(metadata?: MetadataBlock, fallbackUrl?: string): string;
 /**
  * Promote standalone lines that look like headings to proper markdown headings.
  * Fence-aware: never modifies content inside fenced code blocks.
  */
 export declare function promoteOrphanHeadings(markdown: string): string;
-//# sourceMappingURL=markdown-cleanup.d.ts.map

package/dist/markdown-cleanup.js CHANGED Viewed

@@ -1,11 +1,4 @@
-/**
- * Markdown cleanup utilities for post-processing converted content.
- *
- * Goals:
- * - Never mutate fenced code blocks (``` / ~~~) content.
- * - Keep rules localized and readable.
- * - Avoid multi-pass regexes that accidentally hit code blocks.
- */
+import { config } from './config.js';
 // ─────────────────────────────────────────────────────────────────────────────
 // Fence state helpers
 // ─────────────────────────────────────────────────────────────────────────────
@@ -76,15 +69,6 @@ function splitByFences(content) {
     }
     return segments;
 }
-/**
- * Apply a transformation function only to non-fenced content.
- */
-function mapOutsideFences(content, transform) {
-    const segments = splitByFences(content);
-    return segments
-        .map((seg) => (seg.inFence ? seg.content : transform(seg.content)))
-        .join('\n');
-}
 // ─────────────────────────────────────────────────────────────────────────────
 // Cleanup rules (OUTSIDE fences only)
 // ─────────────────────────────────────────────────────────────────────────────
@@ -165,28 +149,263 @@ function normalizeListsAndSpacing(text) {
     // Collapse excessive blank lines
     return text.replace(/\n{3,}/g, '\n\n');
 }
+const CLEANUP_STEPS = [
+    fixOrphanHeadings,
+    removeEmptyHeadings,
+    removeSkipLinksAndEmptyAnchors,
+    ensureBlankLineAfterHeadings,
+    removeTocBlocks,
+    tidyLinksAndEscapes,
+    normalizeListsAndSpacing,
+];
 // ─────────────────────────────────────────────────────────────────────────────
 // Public API
 // ─────────────────────────────────────────────────────────────────────────────
-/**
- * Clean up common markdown artifacts and formatting issues.
- * IMPORTANT: All rules are applied ONLY outside fenced code blocks.
- */
+function getLastLine(text) {
+    const index = text.lastIndexOf('\n');
+    return index === -1 ? text : text.slice(index + 1);
+}
 export function cleanupMarkdownArtifacts(content) {
     if (!content)
         return '';
-    const cleaned = mapOutsideFences(content, (outside) => {
-        let text = outside;
-        text = fixOrphanHeadings(text);
-        text = removeEmptyHeadings(text);
-        text = removeSkipLinksAndEmptyAnchors(text);
-        text = ensureBlankLineAfterHeadings(text);
-        text = removeTocBlocks(text);
-        text = tidyLinksAndEscapes(text);
-        text = normalizeListsAndSpacing(text);
-        return text;
-    });
-    return cleaned.trim();
+    const segments = splitByFences(content);
+    return segments
+        .map((seg, index) => {
+        if (seg.inFence)
+            return seg.content;
+        const prevSeg = segments[index - 1];
+        const prevLineContext = prevSeg ? getLastLine(prevSeg.content) : '';
+        const lines = seg.content.split('\n');
+        const promotedLines = [];
+        for (let i = 0; i < lines.length; i += 1) {
+            const line = lines[i] ?? '';
+            const prevLine = i > 0 ? (lines[i - 1] ?? '') : prevLineContext;
+            promotedLines.push(processNonFencedLine(line, prevLine));
+        }
+        const promoted = promotedLines.join('\n');
+        return CLEANUP_STEPS.reduce((text, step) => step(text), promoted);
+    })
+        .join('\n')
+        .trim();
+}
+// ─────────────────────────────────────────────────────────────────────────────
+// Raw markdown handling + metadata footer
+// ─────────────────────────────────────────────────────────────────────────────
+const HEADING_PATTERN = /^#{1,6}\s/m;
+const LIST_PATTERN = /^(?:[-*+])\s/m;
+const HTML_DOCUMENT_PATTERN = /^(<!doctype|<html)/i;
+function containsMarkdownHeading(content) {
+    return HEADING_PATTERN.test(content);
+}
+function containsMarkdownList(content) {
+    return LIST_PATTERN.test(content);
+}
+function containsFencedCodeBlock(content) {
+    const first = content.indexOf('```');
+    if (first === -1)
+        return false;
+    return content.includes('```', first + 3);
+}
+function looksLikeMarkdown(content) {
+    return (containsMarkdownHeading(content) ||
+        containsMarkdownList(content) ||
+        containsFencedCodeBlock(content));
+}
+function detectLineEnding(content) {
+    return content.includes('\r\n') ? '\r\n' : '\n';
+}
+const FRONTMATTER_DELIMITER = '---';
+function findFrontmatterLines(content) {
+    const lineEnding = detectLineEnding(content);
+    const lines = content.split(lineEnding);
+    if (lines[0] !== FRONTMATTER_DELIMITER)
+        return null;
+    const endIndex = lines.indexOf(FRONTMATTER_DELIMITER, 1);
+    if (endIndex === -1)
+        return null;
+    return { lineEnding, lines, endIndex };
+}
+function stripOptionalQuotes(value) {
+    const trimmed = value.trim();
+    if (trimmed.length < 2)
+        return trimmed;
+    const first = trimmed[0];
+    const last = trimmed[trimmed.length - 1];
+    if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
+        return trimmed.slice(1, -1).trim();
+    }
+    return trimmed;
+}
+function parseFrontmatterEntry(line) {
+    const trimmed = line.trim();
+    if (!trimmed)
+        return null;
+    const separatorIndex = trimmed.indexOf(':');
+    if (separatorIndex <= 0)
+        return null;
+    const key = trimmed.slice(0, separatorIndex).trim().toLowerCase();
+    const value = trimmed.slice(separatorIndex + 1);
+    return { key, value };
+}
+function isTitleKey(key) {
+    return key === 'title' || key === 'name';
+}
+function extractTitleFromHeading(content) {
+    const lineEnding = detectLineEnding(content);
+    const lines = content.split(lineEnding);
+    for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed)
+            continue;
+        let index = 0;
+        while (index < trimmed.length && trimmed[index] === '#') {
+            index += 1;
+        }
+        if (index === 0 || index > 6)
+            return undefined;
+        const nextChar = trimmed[index];
+        if (nextChar !== ' ' && nextChar !== '\t')
+            return undefined;
+        const heading = trimmed.slice(index).trim();
+        return heading.length > 0 ? heading : undefined;
+    }
+    return undefined;
+}
+export function extractTitleFromRawMarkdown(content) {
+    const frontmatter = findFrontmatterLines(content);
+    if (!frontmatter) {
+        return extractTitleFromHeading(content);
+    }
+    const { lines, endIndex } = frontmatter;
+    const entry = lines
+        .slice(1, endIndex)
+        .map((line) => parseFrontmatterEntry(line))
+        .find((parsed) => parsed !== null && isTitleKey(parsed.key));
+    if (!entry)
+        return undefined;
+    const value = stripOptionalQuotes(entry.value);
+    return value || undefined;
+}
+function hasMarkdownSourceLine(content) {
+    const lineEnding = detectLineEnding(content);
+    const lines = content.split(lineEnding);
+    const limit = Math.min(lines.length, 50);
+    for (let index = 0; index < limit; index += 1) {
+        const line = lines[index];
+        if (!line)
+            continue;
+        if (line.trimStart().toLowerCase().startsWith('source:')) {
+            return true;
+        }
+    }
+    return false;
+}
+function addSourceToMarkdownMarkdownFormat(content, url) {
+    if (hasMarkdownSourceLine(content))
+        return content;
+    const lineEnding = detectLineEnding(content);
+    const lines = content.split(lineEnding);
+    const firstNonEmptyIndex = lines.findIndex((line) => line.trim().length > 0);
+    if (firstNonEmptyIndex !== -1) {
+        const firstLine = lines[firstNonEmptyIndex];
+        if (firstLine && /^#{1,6}\s+/.test(firstLine.trim())) {
+            const insertAt = firstNonEmptyIndex + 1;
+            const updated = [
+                ...lines.slice(0, insertAt),
+                '',
+                `Source: ${url}`,
+                '',
+                ...lines.slice(insertAt),
+            ];
+            return updated.join(lineEnding);
+        }
+    }
+    return [`Source: ${url}`, '', content].join(lineEnding);
+}
+export function addSourceToMarkdown(content, url) {
+    const frontmatter = findFrontmatterLines(content);
+    if (config.transform.metadataFormat === 'markdown' && !frontmatter) {
+        return addSourceToMarkdownMarkdownFormat(content, url);
+    }
+    if (!frontmatter) {
+        return `---\nsource: "${url}"\n---\n\n${content}`;
+    }
+    const { lineEnding, lines, endIndex } = frontmatter;
+    const bodyLines = lines.slice(1, endIndex);
+    const hasSource = bodyLines.some((line) => line.trimStart().toLowerCase().startsWith('source:'));
+    if (hasSource)
+        return content;
+    const updatedLines = [
+        lines[0],
+        ...bodyLines,
+        `source: "${url}"`,
+        ...lines.slice(endIndex),
+    ];
+    return updatedLines.join(lineEnding);
+}
+function hasFrontmatter(trimmed) {
+    return trimmed.startsWith('---\n') || trimmed.startsWith('---\r\n');
+}
+function looksLikeHtmlDocument(trimmed) {
+    return HTML_DOCUMENT_PATTERN.test(trimmed);
+}
+function countCommonHtmlTags(content) {
+    const matches = content.match(/<(html|head|body|div|span|script|style|meta|link)\b/gi) ??
+        [];
+    return matches.length;
+}
+export function isRawTextContent(content) {
+    const trimmed = content.trim();
+    const isHtmlDocument = looksLikeHtmlDocument(trimmed);
+    const hasMarkdownFrontmatter = hasFrontmatter(trimmed);
+    const hasTooManyHtmlTags = countCommonHtmlTags(content) > 2;
+    const isMarkdown = looksLikeMarkdown(content);
+    return (!isHtmlDocument &&
+        (hasMarkdownFrontmatter || (!hasTooManyHtmlTags && isMarkdown)));
+}
+export function isLikelyHtmlContent(content) {
+    const trimmed = content.trim();
+    if (!trimmed)
+        return false;
+    if (looksLikeHtmlDocument(trimmed))
+        return true;
+    return countCommonHtmlTags(content) > 2;
+}
+function formatFetchedDate(isoString) {
+    try {
+        const date = new Date(isoString);
+        const day = String(date.getDate()).padStart(2, '0');
+        const month = String(date.getMonth() + 1).padStart(2, '0');
+        const year = date.getFullYear();
+        return `${day}-${month}-${year}`;
+    }
+    catch {
+        return isoString;
+    }
+}
+export function buildMetadataFooter(metadata, fallbackUrl) {
+    if (!metadata)
+        return '';
+    const lines = ['---', ''];
+    const url = metadata.url || fallbackUrl;
+    const parts = [];
+    if (metadata.title)
+        parts.push(`_${metadata.title}_`);
+    if (metadata.author)
+        parts.push(`_${metadata.author}_`);
+    if (url)
+        parts.push(`[_Original Source_](${url})`);
+    if (metadata.fetchedAt) {
+        const formattedDate = formatFetchedDate(metadata.fetchedAt);
+        parts.push(`_${formattedDate}_`);
+    }
+    if (parts.length > 0) {
+        lines.push(` ${parts.join(' | ')}`);
+    }
+    if (metadata.description) {
+        lines.push(` <sub>${metadata.description}</sub>`);
+    }
+    return lines.join('\n');
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // Heading Promotion (fence-aware)
@@ -280,4 +499,3 @@ export function promoteOrphanHeadings(markdown) {
     }
     return result.join('\n');
 }
-//# sourceMappingURL=markdown-cleanup.js.map

package/dist/mcp-validator.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+export type JsonRpcId = string | number | null;
+export interface McpRequestParams {
+    _meta?: Record<string, unknown>;
+    [key: string]: unknown;
+}
+export interface McpRequestBody {
+    jsonrpc: '2.0';
+    method: string;
+    id?: JsonRpcId;
+    params?: McpRequestParams;
+}
+export declare function isJsonRpcBatchRequest(body: unknown): boolean;
+export declare function isMcpRequestBody(body: unknown): body is McpRequestBody;
+export declare function acceptsEventStream(header: string | null | undefined): boolean;