npm - @j0hanz/superfetch - Versions diffs - 2.5.2 → 2.6.0 - Mend

@j0hanz/superfetch 2.5.2 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/README.md +356 -223
package/dist/assets/logo.svg +24837 -24835
package/dist/cache.d.ts +28 -20
package/dist/cache.js +292 -514
package/dist/config.d.ts +41 -7
package/dist/config.js +298 -148
package/dist/crypto.js +25 -12
package/dist/dom-noise-removal.js +379 -421
package/dist/errors.d.ts +2 -2
package/dist/errors.js +25 -8
package/dist/fetch.d.ts +18 -16
package/dist/fetch.js +1132 -526
package/dist/host-normalization.js +40 -10
package/dist/http-native.js +628 -287
package/dist/index.js +67 -7
package/dist/instructions.md +44 -30
package/dist/ip-blocklist.d.ts +8 -0
package/dist/ip-blocklist.js +65 -0
package/dist/json.js +14 -9
package/dist/language-detection.d.ts +2 -11
package/dist/language-detection.js +289 -280
package/dist/markdown-cleanup.d.ts +0 -1
package/dist/markdown-cleanup.js +391 -429
package/dist/mcp-validator.js +4 -2
package/dist/mcp.js +184 -135
package/dist/observability.js +89 -21
package/dist/resources.js +16 -6
package/dist/server-tuning.d.ts +2 -0
package/dist/server-tuning.js +25 -23
package/dist/session.d.ts +1 -0
package/dist/session.js +41 -33
package/dist/tasks.d.ts +2 -0
package/dist/tasks.js +91 -9
package/dist/timer-utils.d.ts +5 -0
package/dist/timer-utils.js +20 -0
package/dist/tools.d.ts +28 -5
package/dist/tools.js +317 -183
package/dist/transform-types.d.ts +5 -1
package/dist/transform.d.ts +3 -2
package/dist/transform.js +1138 -421
package/dist/type-guards.d.ts +1 -0
package/dist/type-guards.js +7 -0
package/dist/workers/transform-child.d.ts +1 -0
package/dist/workers/transform-child.js +118 -0
package/dist/workers/transform-worker.js +87 -78
package/package.json +21 -13

package/dist/markdown-cleanup.js CHANGED Viewed

@@ -1,483 +1,448 @@
 import { config } from './config.js';
-/* -------------------------------------------------------------------------------------------------
- * Fences
- * ------------------------------------------------------------------------------------------------- */
-function isFenceStart(line) {
-    const trimmed = line.trimStart();
-    return trimmed.startsWith('```') || trimmed.startsWith('~~~');
-}
-function extractFenceMarker(line) {
-    const trimmed = line.trimStart();
-    const match = /^(`{3,}|~{3,})/.exec(trimmed);
-    return match?.[1] ?? '```';
-}
-function isFenceEnd(line, marker) {
-    const trimmed = line.trimStart();
-    return (trimmed.startsWith(marker) && trimmed.slice(marker.length).trim() === '');
-}
-function initialFenceState() {
-    return { inFence: false, marker: '' };
-}
-function advanceFenceState(line, state) {
-    if (!state.inFence && isFenceStart(line)) {
-        state.inFence = true;
-        state.marker = extractFenceMarker(line);
-        return;
-    }
-    if (state.inFence && isFenceEnd(line, state.marker)) {
-        state.inFence = false;
-        state.marker = '';
-    }
+// --- Constants & Regex ---
+const MAX_LINE_LENGTH = 80;
+const REGEX = {
+    HEADING_MARKER: /^#{1,6}\s/m,
+    HEADING_STRICT: /^#{1,6}\s+/m,
+    EMPTY_HEADING_LINE: /^#{1,6}[ \t\u00A0]*$/,
+    FENCE_START: /^\s*(`{3,}|~{3,})/,
+    LIST_MARKER: /^(?:[-*+])\s/m,
+    TOC_LINK: /^- \[[^\]]+\]\(#[^)]+\)\s*$/,
+    TOC_HEADING: /^(?:#{1,6}\s+)?(?:table of contents|contents)\s*$/i,
+    HTML_DOC_START: /^(<!doctype|<html)/i,
+    COMBINED_LINE_REMOVALS: /^(?:\[Skip to (?:main )?(?:content|navigation)\]\(#[^)]*\)|\[Skip link\]\(#[^)]*\)|Was this page helpful\??)\s*$/gim,
+    ZERO_WIDTH_ANCHOR: /\[(?:\s|\u200B)*\]\(#[^)]*\)[ \t]*/g,
+    CONCATENATED_PROPS: /([a-z_][a-z0-9_]{0,30}\??:\s+)([\u0022\u201C][^\u0022\u201C\u201D]*[\u0022\u201D])([a-z_][a-z0-9_]{0,30}\??:)/g,
+    DOUBLE_NEWLINE_REDUCER: /\n{3,}/g,
+    SOURCE_KEY: /^source:\s/im,
+    HEADING_SPACING: /(^#{1,6}\s[^\n]*)\n([^\n])/gm,
+    HEADING_CODE_BLOCK: /(^#{1,6}\s+\w+)```/gm,
+    HEADING_CAMEL_CASE: /(^#{1,6}\s+\w*[A-Z])([A-Z][a-z])/gm,
+    SPACING_LINK_FIX: /\]\(([^)]+)\)\[/g,
+    SPACING_ADJ_COMBINED: /(?:\]\([^)]+\)|`[^`]+`)(?=[A-Za-z0-9])/g,
+    SPACING_CODE_DASH: /(`[^`]+`)\s*\\-\s*/g,
+    SPACING_ESCAPES: /\\([[\].])/g,
+    SPACING_URL_ENC: /\]\([^)]*%5[Ff][^)]*\)/g,
+    SPACING_LIST_NUM_COMBINED: /^((?![-*+] |\d+\. |[ \t]).+)\n((?:[-*+]|\d+\.) )/gm,
+    TYPEDOC: /(`+)(?:(?!\1)[\s\S])*?\1|\s?\/\\?\*[\s\S]*?\\?\*\//g,
+};
+const HEADING_KEYWORDS = new Set(config.markdownCleanup.headingKeywords.map((value) => value.toLocaleLowerCase(config.i18n.locale)));
+const SPECIAL_PREFIXES = /^(?:example|note|tip|warning|important|caution):\s+\S/i;
+// --- Helper Functions ---
+function getLineEnding(content) {
+    return content.includes('\r\n') ? '\r\n' : '\n';
 }
-class FencedSegmenter {
-    split(content) {
-        const lines = content.split('\n');
-        const segments = [];
-        const state = initialFenceState();
-        let current = [];
-        let currentIsFence = false;
-        for (const line of lines) {
-            // Transition into fence: flush outside segment first.
-            if (!state.inFence && isFenceStart(line)) {
-                if (current.length > 0) {
-                    segments.push({
-                        content: current.join('\n'),
-                        inFence: currentIsFence,
-                    });
-                    current = [];
-                }
-                currentIsFence = true;
-                current.push(line);
-                advanceFenceState(line, state);
-                continue;
-            }
-            current.push(line);
-            const wasInFence = state.inFence;
-            advanceFenceState(line, state);
-            // Transition out of fence: flush fence segment.
-            if (wasInFence && !state.inFence) {
-                segments.push({ content: current.join('\n'), inFence: true });
-                current = [];
-                currentIsFence = false;
-            }
-        }
-        if (current.length > 0) {
-            segments.push({ content: current.join('\n'), inFence: currentIsFence });
-        }
-        return segments;
+function hasFollowingContent(lines, startIndex) {
+    // Optimization: Bound lookahead to avoid checking too many lines in huge files
+    const max = Math.min(lines.length, startIndex + 50);
+    for (let i = startIndex + 1; i < max; i++) {
+        const line = lines[i];
+        if (line && line.trim().length > 0)
+            return true;
     }
+    return false;
 }
-const fencedSegmenter = new FencedSegmenter();
-/* -------------------------------------------------------------------------------------------------
- * Orphan heading promotion
- * ------------------------------------------------------------------------------------------------- */
-const HEADING_KEYWORDS = new Set([
-    'overview',
-    'introduction',
-    'summary',
-    'conclusion',
-    'prerequisites',
-    'requirements',
-    'installation',
-    'configuration',
-    'usage',
-    'features',
-    'limitations',
-    'troubleshooting',
-    'faq',
-    'resources',
-    'references',
-    'changelog',
-    'license',
-    'acknowledgments',
-    'appendix',
-]);
-class OrphanHeadingPromoter {
-    shouldPromote(line, prevLine) {
-        const isPrecededByBlank = prevLine.trim() === '';
-        if (!isPrecededByBlank)
+// Optimized Heuristics
+function isTitleCaseOrKeyword(trimmed) {
+    // Quick check for length to avoid regex on long strings
+    if (trimmed.length > MAX_LINE_LENGTH)
+        return false;
+    // Single word optimization
+    if (!trimmed.includes(' ')) {
+        if (!/^[A-Z]/.test(trimmed))
             return false;
-        return this.isLikelyHeadingLine(line);
+        return HEADING_KEYWORDS.has(trimmed.toLocaleLowerCase(config.i18n.locale));
     }
-    format(line) {
-        const trimmed = line.trim();
-        const isExample = /^example:\s/i.test(trimmed);
-        const prefix = isExample ? '### ' : '## ';
-        return prefix + trimmed;
+    // Split limited number of words
+    const words = trimmed.split(/\s+/);
+    const len = words.length;
+    if (len < 2 || len > 6)
+        return false;
+    let capitalizedCount = 0;
+    for (let i = 0; i < len; i++) {
+        const w = words[i];
+        if (!w)
+            continue;
+        const isCap = /^[A-Z][a-z]*$/.test(w);
+        if (isCap)
+            capitalizedCount++;
+        else if (!/^(?:and|or|the|of|in|for|to|a)$/i.test(w))
+            return false;
     }
-    processLine(line, prevLine) {
-        if (this.shouldPromote(line, prevLine)) {
-            return this.format(line);
+    return capitalizedCount >= 2;
+}
+function getHeadingPrefix(trimmed) {
+    if (trimmed.length > MAX_LINE_LENGTH)
+        return null;
+    // Fast path: Check common markdown markers first
+    const firstChar = trimmed.charCodeAt(0);
+    // # (35), - (45), * (42), + (43), digit (48-57), [ (91)
+    if (firstChar === 35 ||
+        firstChar === 45 ||
+        firstChar === 42 ||
+        firstChar === 43 ||
+        firstChar === 91 ||
+        (firstChar >= 48 && firstChar <= 57)) {
+        if (REGEX.HEADING_MARKER.test(trimmed) ||
+            REGEX.LIST_MARKER.test(trimmed) ||
+            /^\d+\.\s/.test(trimmed) ||
+            /^\[.*\]\(.*\)$/.test(trimmed)) {
+            return null;
         }
-        return line;
     }
-    isLikelyHeadingLine(line) {
-        const trimmed = line.trim();
-        if (!trimmed || trimmed.length > 80)
-            return false;
-        if (/^#{1,6}\s/.test(trimmed))
-            return false;
-        if (/^[-*+•]\s/.test(trimmed) || /^\d+\.\s/.test(trimmed))
-            return false;
-        if (/[.!?]$/.test(trimmed))
-            return false;
-        if (/^\[.*\]\(.*\)$/.test(trimmed))
-            return false;
-        if (/^(?:example|note|tip|warning|important|caution):\s+\S/i.test(trimmed)) {
+    if (SPECIAL_PREFIXES.test(trimmed)) {
+        return /^example:\s/i.test(trimmed) ? '### ' : '## ';
+    }
+    const lastChar = trimmed.charCodeAt(trimmed.length - 1);
+    // . (46), ! (33), ? (63)
+    if (lastChar === 46 || lastChar === 33 || lastChar === 63)
+        return null;
+    return isTitleCaseOrKeyword(trimmed) ? '## ' : null;
+}
+// Optimized TOC detection
+function hasTocBlock(lines, headingIndex) {
+    const lookaheadMax = Math.min(lines.length, headingIndex + 8);
+    for (let i = headingIndex + 1; i < lookaheadMax; i++) {
+        const line = lines[i];
+        if (!line || line.trim().length === 0)
+            continue;
+        if (REGEX.TOC_LINK.test(line))
             return true;
-        }
-        const words = trimmed.split(/\s+/);
-        if (words.length >= 2 && words.length <= 6) {
-            const isTitleCase = words.every((w) => /^[A-Z][a-z]*$/.test(w) || /^(?:and|or|the|of|in|for|to|a)$/i.test(w));
-            if (isTitleCase)
-                return true;
-        }
-        if (words.length === 1) {
-            const lower = trimmed.toLowerCase();
-            if (HEADING_KEYWORDS.has(lower) && /^[A-Z]/.test(trimmed))
-                return true;
-        }
-        return false;
     }
+    return false;
 }
-const orphanHeadingPromoter = new OrphanHeadingPromoter();
-/* -------------------------------------------------------------------------------------------------
- * Cleanup rules (OUTSIDE fences only)
- * ------------------------------------------------------------------------------------------------- */
-function removeEmptyHeadings(text) {
-    return text.replace(/^#{1,6}[ \t\u00A0]*$\r?\n?/gm, '');
-}
-function fixOrphanHeadings(text) {
-    // Pattern: hashes on their own line, blank line, then a "heading-like" line.
-    return text.replace(/^(.*?)(#{1,6})\s*(?:\r?\n){2}([A-Z][^\r\n]+?)(?:\r?\n)/gm, (_match, prefix, hashes, heading) => {
-        if (heading.length > 150)
-            return _match;
-        const trimmedPrefix = prefix.trim();
-        if (trimmedPrefix === '') {
-            return `${hashes} ${heading}\n\n`;
-        }
-        return `${trimmedPrefix}\n\n${hashes} ${heading}\n\n`;
-    });
-}
-function removeSkipLinksAndEmptyAnchors(text) {
-    const zeroWidthAnchorLink = /\[(?:\s|\u200B)*\]\(#[^)]*\)[ \t]*/g;
-    return text
-        .replace(zeroWidthAnchorLink, '')
-        .replace(/^\[Skip to (?:main )?content\]\(#[^)]*\)\s*$/gim, '')
-        .replace(/^\[Skip to (?:main )?navigation\]\(#[^)]*\)\s*$/gim, '')
-        .replace(/^\[Skip link\]\(#[^)]*\)\s*$/gim, '');
+function skipTocLines(lines, startIndex) {
+    for (let i = startIndex; i < lines.length; i++) {
+        const line = lines[i];
+        if (!line)
+            continue;
+        if (line.trim().length === 0)
+            continue;
+        if (!REGEX.TOC_LINK.test(line))
+            return i;
+    }
+    return lines.length;
 }
-function ensureBlankLineAfterHeadings(text) {
-    // Heading followed immediately by a fence marker
-    text = text.replace(/(^#{1,6}\s+\w+)```/gm, '$1\n\n```');
-    // Heuristic: Some converters jam words together after a heading
-    text = text.replace(/(^#{1,6}\s+\w*[A-Z])([A-Z][a-z])/gm, '$1\n\n$2');
-    // Any heading line should be followed by a blank line before body
-    return text.replace(/(^#{1,6}\s[^\n]*)\n([^\n])/gm, '$1\n\n$2');
+// --- Main Processing Logic ---
+function tryPromoteOrphan(lines, i, trimmed) {
+    const prevLine = lines[i - 1];
+    const isOrphan = i === 0 || !prevLine || prevLine.trim().length === 0;
+    if (!isOrphan)
+        return null;
+    const prefix = getHeadingPrefix(trimmed);
+    if (!prefix)
+        return null;
+    const isTitleCaseOnly = prefix === '## ' &&
+        !SPECIAL_PREFIXES.test(trimmed) &&
+        trimmed.includes(' ');
+    if (isTitleCaseOnly && !hasFollowingContent(lines, i))
+        return null;
+    return `${prefix}${trimmed}`;
 }
-/**
- * Remove markdown TOC blocks of the form:
- * - [Title](#anchor)
- * outside fenced code blocks.
- */
-function removeTocBlocks(text) {
-    const tocLine = /^- \[[^\]]+\]\(#[^)]+\)\s*$/;
-    const lines = text.split('\n');
-    const out = [];
-    let skipping = false;
-    for (let i = 0; i < lines.length; i += 1) {
-        const line = lines[i] ?? '';
-        const prev = i > 0 ? (lines[i - 1] ?? '') : '';
-        const next = i < lines.length - 1 ? (lines[i + 1] ?? '') : '';
-        if (tocLine.test(line)) {
-            const prevIsToc = tocLine.test(prev) || prev.trim() === '';
-            const nextIsToc = tocLine.test(next) || next.trim() === '';
-            if (prevIsToc || nextIsToc) {
-                skipping = true;
-                continue;
-            }
-        }
-        if (skipping) {
-            if (line.trim() === '') {
-                skipping = false;
-            }
+function shouldSkipAsToc(lines, i, trimmed, removeToc) {
+    if (removeToc && REGEX.TOC_HEADING.test(trimmed) && hasTocBlock(lines, i)) {
+        return skipTocLines(lines, i + 1);
+    }
+    return null;
+}
+function preprocessLines(lines) {
+    const processedLines = [];
+    const len = lines.length;
+    const promote = config.markdownCleanup.promoteOrphanHeadings;
+    const removeToc = config.markdownCleanup.removeTocBlocks;
+    let skipUntil = -1;
+    for (let i = 0; i < len; i++) {
+        if (i < skipUntil)
+            continue;
+        let line = lines[i];
+        if (line === undefined)
+            continue;
+        const trimmed = line.trim();
+        if (REGEX.EMPTY_HEADING_LINE.test(trimmed))
+            continue;
+        const tocSkip = shouldSkipAsToc(lines, i, trimmed, removeToc);
+        if (tocSkip !== null) {
+            skipUntil = tocSkip;
             continue;
         }
-        out.push(line);
+        if (promote && trimmed.length > 0) {
+            const promoted = tryPromoteOrphan(lines, i, trimmed);
+            if (promoted)
+                line = promoted;
+        }
+        processedLines.push(line);
     }
-    return out.join('\n');
-}
-function tidyLinksAndEscapes(text) {
-    return text
-        .replace(/\]\(([^)]+)\)\[/g, ']($1)\n\n[')
-        .replace(/^Was this page helpful\??\s*$/gim, '')
-        .replace(/(`[^`]+`)\s*\\-\s*/g, '$1 - ')
-        .replace(/\\([[]])/g, '$1');
+    return processedLines.join('\n');
 }
-function normalizeListsAndSpacing(text) {
-    // Ensure blank line before list starts (bullet/ordered)
-    text = text.replace(/([^\n])\n([-*+] )/g, '$1\n\n$2');
-    text = text.replace(/(\S)\n(\d+\. )/g, '$1\n\n$2');
-    // Collapse excessive blank lines
-    return text.replace(/\n{3,}/g, '\n\n');
+// Process a block of non-fence lines
+function processTextBuffer(lines) {
+    if (lines.length === 0)
+        return '';
+    const text = preprocessLines(lines);
+    return applyGlobalRegexes(text);
 }
-function fixConcatenatedProperties(text) {
-    const quotedValuePattern = /([a-z_][a-z0-9_]{0,30}\??:\s+)([\u0022\u201C][^\u0022\u201C\u201D]*[\u0022\u201D])([a-z_][a-z0-9_]{0,30}\??:)/g;
+function applyGlobalRegexes(text) {
     let result = text;
-    let iterations = 0;
-    const maxIterations = 3;
-    while (iterations < maxIterations) {
-        const before = result;
-        result = result.replace(quotedValuePattern, '$1$2\n\n$3');
-        if (result === before) {
+    // fixAndSpaceHeadings
+    result = result
+        .replace(REGEX.HEADING_SPACING, '$1\n\n$2')
+        .replace(REGEX.HEADING_CODE_BLOCK, '$1\n\n```')
+        .replace(REGEX.HEADING_CAMEL_CASE, '$1\n\n$2');
+    // removeTypeDocComments
+    if (config.markdownCleanup.removeTypeDocComments) {
+        result = result.replace(REGEX.TYPEDOC, (match) => match.startsWith('`') ? match : '');
+    }
+    if (config.markdownCleanup.removeSkipLinks) {
+        result = result
+            .replace(REGEX.ZERO_WIDTH_ANCHOR, '')
+            .replace(REGEX.COMBINED_LINE_REMOVALS, '');
+    }
+    // normalizeSpacing
+    result = result
+        .replace(REGEX.SPACING_LINK_FIX, ']($1)\n\n[')
+        .replace(REGEX.SPACING_ADJ_COMBINED, '$& ')
+        .replace(REGEX.SPACING_CODE_DASH, '$1 - ')
+        .replace(REGEX.SPACING_ESCAPES, '$1')
+        .replace(REGEX.SPACING_URL_ENC, (m) => m.replace(/%5[Ff]/g, '_'))
+        .replace(REGEX.SPACING_LIST_NUM_COMBINED, '$1\n\n$2')
+        .replace(REGEX.DOUBLE_NEWLINE_REDUCER, '\n\n');
+    // fixProperties
+    for (let k = 0; k < 3; k++) {
+        const next = result.replace(REGEX.CONCATENATED_PROPS, '$1$2\n\n$3');
+        if (next === result)
             break;
-        }
-        iterations++;
+        result = next;
     }
     return result;
 }
-const CLEANUP_STEPS = [
-    fixOrphanHeadings,
-    removeEmptyHeadings,
-    removeSkipLinksAndEmptyAnchors,
-    ensureBlankLineAfterHeadings,
-    removeTocBlocks,
-    tidyLinksAndEscapes,
-    normalizeListsAndSpacing,
-    fixConcatenatedProperties,
-];
-function getLastLine(text) {
-    const index = text.lastIndexOf('\n');
-    return index === -1 ? text : text.slice(index + 1);
-}
-class MarkdownCleanupPipeline {
-    cleanup(markdown) {
-        if (!markdown)
-            return '';
-        const segments = fencedSegmenter.split(markdown);
-        const cleaned = segments
-            .map((seg, index) => {
-            if (seg.inFence)
-                return seg.content;
-            const prevSeg = segments[index - 1];
-            const prevLineContext = prevSeg ? getLastLine(prevSeg.content) : '';
-            const lines = seg.content.split('\n');
-            const promotedLines = [];
-            for (let i = 0; i < lines.length; i += 1) {
-                const line = lines[i] ?? '';
-                const prevLine = i > 0 ? (lines[i - 1] ?? '') : prevLineContext;
-                promotedLines.push(orphanHeadingPromoter.processLine(line, prevLine));
-            }
-            const promoted = promotedLines.join('\n');
-            return CLEANUP_STEPS.reduce((text, step) => step(text), promoted);
-        })
-            .join('\n')
-            .trim();
-        return cleaned;
+function findNextLine(content, lastIndex, len) {
+    let nextIndex = content.indexOf('\n', lastIndex);
+    let line;
+    if (nextIndex === -1) {
+        line = content.slice(lastIndex);
+        nextIndex = len;
     }
+    else {
+        if (nextIndex > lastIndex && content.charCodeAt(nextIndex - 1) === 13) {
+            line = content.slice(lastIndex, nextIndex - 1);
+        }
+        else {
+            line = content.slice(lastIndex, nextIndex);
+        }
+        nextIndex++; // Skip \n
+    }
+    return { line, nextIndex };
 }
-const markdownCleanupPipeline = new MarkdownCleanupPipeline();
-export function cleanupMarkdownArtifacts(content) {
-    return markdownCleanupPipeline.cleanup(content);
-}
-/* -------------------------------------------------------------------------------------------------
- * Raw markdown handling + metadata footer
- * ------------------------------------------------------------------------------------------------- */
-const HEADING_PATTERN = /^#{1,6}\s/m;
-const LIST_PATTERN = /^(?:[-*+])\s/m;
-const HTML_DOCUMENT_PATTERN = /^(<!doctype|<html)/i;
-function containsMarkdownHeading(content) {
-    return HEADING_PATTERN.test(content);
-}
-function containsMarkdownList(content) {
-    return LIST_PATTERN.test(content);
+function checkFenceStart(line) {
+    const match = REGEX.FENCE_START.exec(line);
+    return match ? (match[1] ?? '```') : null;
 }
-function containsFencedCodeBlock(content) {
-    const first = content.indexOf('```');
-    if (first === -1)
-        return false;
-    return content.includes('```', first + 3);
+function isFenceClosure(trimmed, marker) {
+    return (trimmed.startsWith(marker) && trimmed.slice(marker.length).trim() === '');
 }
-function looksLikeMarkdown(content) {
-    return (containsMarkdownHeading(content) ||
-        containsMarkdownList(content) ||
-        containsFencedCodeBlock(content));
+function handleFencedLine(line, trimmed, fenceMarker, segments) {
+    segments.push(line);
+    return isFenceClosure(trimmed, fenceMarker) ? null : fenceMarker;
 }
-function detectLineEnding(content) {
-    return content.includes('\r\n') ? '\r\n' : '\n';
+function handleUnfencedLine(line, segments, buffer) {
+    const newMarker = checkFenceStart(line);
+    if (!newMarker) {
+        buffer.push(line);
+        return { fenceMarker: null, buffer };
+    }
+    if (buffer.length > 0) {
+        segments.push(processTextBuffer(buffer));
+        buffer = [];
+    }
+    segments.push(line);
+    return { fenceMarker: newMarker, buffer };
 }
-const FRONTMATTER_DELIMITER = '---';
-class RawMarkdownFrontmatter {
-    find(content) {
-        const lineEnding = detectLineEnding(content);
-        const lines = content.split(lineEnding);
-        if (lines[0] !== FRONTMATTER_DELIMITER)
-            return null;
-        const endIndex = lines.indexOf(FRONTMATTER_DELIMITER, 1);
-        if (endIndex === -1)
-            return null;
-        return { lineEnding, lines, endIndex };
+export function cleanupMarkdownArtifacts(content) {
+    if (!content)
+        return '';
+    const len = content.length;
+    let lastIndex = 0;
+    let fenceMarker = null;
+    const segments = [];
+    let buffer = [];
+    while (lastIndex < len) {
+        const { line, nextIndex } = findNextLine(content, lastIndex, len);
+        const trimmed = line.trimStart();
+        if (fenceMarker) {
+            fenceMarker = handleFencedLine(line, trimmed, fenceMarker, segments);
+        }
+        else {
+            ({ fenceMarker, buffer } = handleUnfencedLine(line, segments, buffer));
+        }
+        lastIndex = nextIndex;
     }
-    hasFrontmatter(trimmed) {
-        return trimmed.startsWith('---\n') || trimmed.startsWith('---\r\n');
+    if (buffer.length > 0) {
+        segments.push(processTextBuffer(buffer));
     }
+    return segments.join('\n').trim();
 }
-const frontmatter = new RawMarkdownFrontmatter();
-function stripOptionalQuotes(value) {
-    const trimmed = value.trim();
-    if (trimmed.length < 2)
-        return trimmed;
-    const first = trimmed[0];
-    const last = trimmed[trimmed.length - 1];
-    if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
-        return trimmed.slice(1, -1).trim();
+function detectFrontmatter(content) {
+    const len = content.length;
+    if (len < 4)
+        return null;
+    let lineEnding = null;
+    let fenceLen = 0;
+    if (content.startsWith('---\n')) {
+        lineEnding = '\n';
+        fenceLen = 4;
     }
-    return trimmed;
+    else if (content.startsWith('---\r\n')) {
+        lineEnding = '\r\n';
+        fenceLen = 5;
+    }
+    if (!lineEnding)
+        return null;
+    const fence = `---${lineEnding}`;
+    const closeIndex = content.indexOf(fence, fenceLen);
+    if (closeIndex === -1)
+        return null;
+    return {
+        start: 0,
+        end: closeIndex + fenceLen,
+        linesStart: fenceLen,
+        linesEnd: closeIndex,
+        lineEnding,
+    };
 }
 function parseFrontmatterEntry(line) {
     const trimmed = line.trim();
-    if (!trimmed)
-        return null;
-    const separatorIndex = trimmed.indexOf(':');
-    if (separatorIndex <= 0)
+    const idx = trimmed.indexOf(':');
+    if (!trimmed || idx <= 0)
         return null;
-    const key = trimmed.slice(0, separatorIndex).trim().toLowerCase();
-    const value = trimmed.slice(separatorIndex + 1);
-    return { key, value };
-}
-function isTitleKey(key) {
-    return key === 'title' || key === 'name';
-}
-function extractTitleFromHeading(content) {
-    const lineEnding = detectLineEnding(content);
-    const lines = content.split(lineEnding);
-    for (const line of lines) {
-        const trimmed = line.trim();
-        if (!trimmed)
-            continue;
-        let index = 0;
-        while (index < trimmed.length && trimmed[index] === '#') {
-            index += 1;
+    return {
+        key: trimmed.slice(0, idx).trim().toLowerCase(),
+        value: trimmed.slice(idx + 1).trim(),
+    };
+}
+function stripFrontmatterQuotes(val) {
+    const first = val.charAt(0);
+    const last = val.charAt(val.length - 1);
+    if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
+        return val.slice(1, -1).trim();
+    }
+    return val;
+}
+function scanFrontmatterForTitle(content, fm) {
+    const fmBody = content.slice(fm.linesStart, fm.linesEnd);
+    let lastIdx = 0;
+    while (lastIdx < fmBody.length) {
+        let nextIdx = fmBody.indexOf(fm.lineEnding, lastIdx);
+        if (nextIdx === -1)
+            nextIdx = fmBody.length;
+        const line = fmBody.slice(lastIdx, nextIdx);
+        const entry = parseFrontmatterEntry(line);
+        if (entry) {
+            if (entry.key === 'title' || entry.key === 'name') {
+                const cleaned = stripFrontmatterQuotes(entry.value);
+                if (cleaned)
+                    return cleaned;
+            }
         }
-        if (index === 0 || index > 6)
-            return undefined;
-        const nextChar = trimmed[index];
-        if (nextChar !== ' ' && nextChar !== '\t')
-            return undefined;
-        const heading = trimmed.slice(index).trim();
-        return heading.length > 0 ? heading : undefined;
+        lastIdx = nextIdx + fm.lineEnding.length;
     }
     return undefined;
 }
-export function extractTitleFromRawMarkdown(content) {
-    const fm = frontmatter.find(content);
-    if (!fm) {
-        return extractTitleFromHeading(content);
-    }
-    const { lines, endIndex } = fm;
-    const entry = lines
-        .slice(1, endIndex)
-        .map((line) => parseFrontmatterEntry(line))
-        .find((parsed) => parsed !== null && isTitleKey(parsed.key));
-    if (!entry)
-        return undefined;
-    const value = stripOptionalQuotes(entry.value);
-    return value || undefined;
-}
-function hasMarkdownSourceLine(content) {
-    const lineEnding = detectLineEnding(content);
-    const lines = content.split(lineEnding);
-    const limit = Math.min(lines.length, 50);
-    for (let index = 0; index < limit; index += 1) {
-        const line = lines[index];
-        if (!line)
-            continue;
-        if (line.trimStart().toLowerCase().startsWith('source:')) {
-            return true;
+function scanBodyForTitle(content) {
+    const len = content.length;
+    let scanIndex = 0;
+    const LIMIT = 5000;
+    const maxScan = Math.min(len, LIMIT);
+    while (scanIndex < maxScan) {
+        let nextIndex = content.indexOf('\n', scanIndex);
+        if (nextIndex === -1)
+            nextIndex = len;
+        let line = content.slice(scanIndex, nextIndex);
+        if (line.endsWith('\r'))
+            line = line.slice(0, -1);
+        const trimmed = line.trim();
+        if (trimmed) {
+            if (REGEX.HEADING_STRICT.test(trimmed)) {
+                return trimmed.replace(REGEX.HEADING_MARKER, '').trim() || undefined;
+            }
+            return undefined;
         }
+        scanIndex = nextIndex + 1;
     }
-    return false;
+    return undefined;
 }
-function addSourceToMarkdownAsMarkdown(content, url) {
-    if (hasMarkdownSourceLine(content))
-        return content;
-    const lineEnding = detectLineEnding(content);
-    const lines = content.split(lineEnding);
-    const firstNonEmptyIndex = lines.findIndex((line) => line.trim().length > 0);
-    if (firstNonEmptyIndex !== -1) {
-        const firstLine = lines[firstNonEmptyIndex];
-        if (firstLine && /^#{1,6}\s+/.test(firstLine.trim())) {
-            const insertAt = firstNonEmptyIndex + 1;
-            const updated = [
-                ...lines.slice(0, insertAt),
-                '',
-                `Source: ${url}`,
-                '',
-                ...lines.slice(insertAt),
-            ];
-            return updated.join(lineEnding);
-        }
+export function extractTitleFromRawMarkdown(content) {
+    const fm = detectFrontmatter(content);
+    if (fm) {
+        const title = scanFrontmatterForTitle(content, fm);
+        if (title)
+            return title;
     }
-    return [`Source: ${url}`, '', content].join(lineEnding);
+    return scanBodyForTitle(content);
 }
 export function addSourceToMarkdown(content, url) {
-    const fm = frontmatter.find(content);
-    if (config.transform.metadataFormat === 'markdown' && !fm) {
-        return addSourceToMarkdownAsMarkdown(content, url);
+    const fm = detectFrontmatter(content);
+    const useMarkdownFormat = config.transform.metadataFormat === 'markdown';
+    if (useMarkdownFormat && !fm) {
+        if (REGEX.SOURCE_KEY.test(content))
+            return content;
+        const lineEnding = getLineEnding(content);
+        const firstH1Match = REGEX.HEADING_MARKER.exec(content);
+        if (firstH1Match) {
+            const h1Index = firstH1Match.index;
+            const lineEndIndex = content.indexOf(lineEnding, h1Index);
+            const insertPos = lineEndIndex === -1 ? content.length : lineEndIndex + lineEnding.length;
+            const injection = `${lineEnding}Source: ${url}${lineEnding}`;
+            return content.slice(0, insertPos) + injection + content.slice(insertPos);
+        }
+        return `Source: ${url}${lineEnding}${lineEnding}${content}`;
     }
     if (!fm) {
-        // Preserve existing behavior: always uses LF even if content uses CRLF.
-        return `---\nsource: "${url}"\n---\n\n${content}`;
+        const lineEnding = getLineEnding(content);
+        const escapedUrl = url.replace(/"/g, '\\"');
+        return `---${lineEnding}source: "${escapedUrl}"${lineEnding}---${lineEnding}${lineEnding}${content}`;
     }
-    const { lineEnding, lines, endIndex } = fm;
-    const bodyLines = lines.slice(1, endIndex);
-    const hasSource = bodyLines.some((line) => line.trimStart().toLowerCase().startsWith('source:'));
-    if (hasSource)
+    const fmBody = content.slice(fm.linesStart, fm.linesEnd);
+    if (REGEX.SOURCE_KEY.test(fmBody))
         return content;
-    const updatedLines = [
-        lines[0],
-        ...bodyLines,
-        `source: "${url}"`,
-        ...lines.slice(endIndex),
-    ];
-    return updatedLines.join(lineEnding);
-}
-function looksLikeHtmlDocument(trimmed) {
-    return HTML_DOCUMENT_PATTERN.test(trimmed);
-}
-function countCommonHtmlTags(content) {
-    const matches = content.match(/<(html|head|body|div|span|script|style|meta|link)\b/gi) ??
-        [];
-    return matches.length;
+    const escapedUrl = url.replace(/"/g, '\\"');
+    const injection = `source: "${escapedUrl}"${fm.lineEnding}`;
+    return content.slice(0, fm.linesEnd) + injection + content.slice(fm.linesEnd);
+}
+function countCommonTags(content, limit) {
+    if (limit <= 0)
+        return 0;
+    const regex = /<(html|head|body|div|span|script|style|meta|link)\b/gi;
+    let count = 0;
+    while (regex.exec(content)) {
+        count += 1;
+        if (count > limit)
+            break;
+    }
+    return count;
 }
 export function isRawTextContent(content) {
     const trimmed = content.trim();
-    const isHtmlDocument = looksLikeHtmlDocument(trimmed);
-    const hasMarkdownFrontmatter = frontmatter.hasFrontmatter(trimmed);
-    const hasTooManyHtmlTags = countCommonHtmlTags(content) > 2;
-    const isMarkdown = looksLikeMarkdown(content);
-    return (!isHtmlDocument &&
-        (hasMarkdownFrontmatter || (!hasTooManyHtmlTags && isMarkdown)));
-}
-export function isLikelyHtmlContent(content) {
-    const trimmed = content.trim();
-    if (!trimmed)
+    if (REGEX.HTML_DOC_START.test(trimmed))
         return false;
-    if (looksLikeHtmlDocument(trimmed))
+    if (detectFrontmatter(trimmed) !== null)
         return true;
-    return countCommonHtmlTags(content) > 2;
-}
-function formatFetchedDate(isoString) {
-    try {
-        const date = new Date(isoString);
-        const day = String(date.getDate()).padStart(2, '0');
-        const month = String(date.getMonth() + 1).padStart(2, '0');
-        const year = date.getFullYear();
-        return `${day}-${month}-${year}`;
-    }
-    catch {
-        return isoString;
-    }
+    const tagCount = countCommonTags(content, 5);
+    if (tagCount > 5)
+        return false;
+    return (REGEX.HEADING_MARKER.test(content) ||
+        REGEX.LIST_MARKER.test(content) ||
+        content.includes('```'));
+}
+function formatFetchedAt(value) {
+    const date = new Date(value);
+    if (Number.isNaN(date.getTime()))
+        return value;
+    const formatter = new Intl.DateTimeFormat(config.i18n.locale, {
+        day: '2-digit',
+        month: '2-digit',
+        year: 'numeric',
+    });
+    return formatter.format(date);
 }
 export function buildMetadataFooter(metadata, fallbackUrl) {
     if (!metadata)
@@ -492,14 +457,11 @@ export function buildMetadataFooter(metadata, fallbackUrl) {
     if (url)
         parts.push(`[_Original Source_](${url})`);
     if (metadata.fetchedAt) {
-        const formattedDate = formatFetchedDate(metadata.fetchedAt);
-        parts.push(`_${formattedDate}_`);
+        parts.push(`_${formatFetchedAt(metadata.fetchedAt)}_`);
     }
-    if (parts.length > 0) {
+    if (parts.length > 0)
         lines.push(` ${parts.join(' | ')}`);
-    }
-    if (metadata.description) {
+    if (metadata.description)
         lines.push(` <sub>${metadata.description}</sub>`);
-    }
     return lines.join('\n');
 }