npm - @humanspeak/svelte-markdown - Versions diffs - 1.2.0 → 1.4.0 - Mend

@humanspeak/svelte-markdown 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +61 -2
package/dist/Parser.svelte +92 -33
package/dist/Parser.svelte.d.ts +3 -0
package/dist/SvelteMarkdown.svelte +269 -28
package/dist/SvelteMarkdown.svelte.d.ts +5 -2
package/dist/index.d.ts +12 -2
package/dist/index.js +9 -0
package/dist/renderers/Image.svelte +7 -1
package/dist/renderers/Link.svelte +1 -1
package/dist/types.d.ts +34 -4
package/dist/utils/incremental-parser.d.ts +9 -4
package/dist/utils/incremental-parser.js +92 -12
package/dist/utils/sanitize.d.ts +69 -0
package/dist/utils/sanitize.js +104 -0
package/dist/utils/stream-benchmark.d.ts +26 -0
package/dist/utils/stream-benchmark.js +53 -0
package/package.json +21 -21

package/dist/utils/incremental-parser.js CHANGED Viewed

@@ -9,6 +9,10 @@
  * @module incremental-parser
  */
 import { lexAndClean } from './parse-and-cache.js';
+const CLOSED_FENCE_RE = /^ {0,3}(`{3,}|~{3,}).*\n[\s\S]*\n {0,3}\1[ \t]*\n*$/;
+const LINK_REFERENCE_RE = /\[[^\]\n]+\]\[[^\]\n]*\]/;
+const SHORTCUT_REFERENCE_RE = /\[[^\]\n]+\](?![[(])/; // Excludes inline links/images and full refs
+const REFERENCE_DEFINITION_RE = /^\s{0,3}\[[^\]\n]+\]:/m;
 /**
  * Streaming-optimized parser that performs full re-parses but diffs results
  * against the previous token array to minimize DOM updates.
@@ -33,8 +37,12 @@ import { lexAndClean } from './parse-and-cache.js';
 export class IncrementalParser {
     /** Previous parse result for diffing */
     prevTokens = [];
+    /** Previous full source string for append-only tail reparsing */
+    prevSource = '';
     /** Parser options passed to the Marked lexer */
     options;
+    /** Whether caller-supplied parser hooks make tail-window reparsing unsafe */
+    tailWindowDisabled;
     /**
      * Creates a new incremental parser instance.
      *
@@ -42,7 +50,77 @@ export class IncrementalParser {
      */
     constructor(options) {
         this.options = options;
+        const exts = options.extensions;
+        const hasExtensionTokenizers = (exts?.block != null && exts.block.length > 0) ||
+            (exts?.inline != null && exts.inline.length > 0);
+        this.tailWindowDisabled =
+            typeof options.walkTokens === 'function' ||
+                options.tokenizer != null ||
+                hasExtensionTokenizers;
     }
+    getTailWindowBoundary = () => {
+        if (this.prevTokens.length === 0) {
+            return { prefixCount: 0, reparseOffset: 0 };
+        }
+        let offset = 0;
+        for (let i = 0; i < this.prevTokens.length - 1; i++) {
+            offset += this.prevTokens[i].raw.length;
+        }
+        const lastToken = this.prevTokens[this.prevTokens.length - 1];
+        if (this.isStableAtSourceEnd(lastToken)) {
+            return {
+                prefixCount: this.prevTokens.length,
+                reparseOffset: this.prevSource.length
+            };
+        }
+        return {
+            prefixCount: this.prevTokens.length - 1,
+            reparseOffset: offset
+        };
+    };
+    isStableAtSourceEnd = (token) => {
+        if (token.type === 'space')
+            return false;
+        if (token.raw.endsWith('\n\n'))
+            return true;
+        switch (token.type) {
+            case 'heading':
+            case 'hr':
+                return token.raw.endsWith('\n');
+            case 'code':
+                return CLOSED_FENCE_RE.test(token.raw);
+            default:
+                return false;
+        }
+    };
+    hasAppendSensitiveReferenceSyntax = (source) => {
+        if (!source.includes('[') || !source.includes(']'))
+            return false;
+        return (LINK_REFERENCE_RE.test(source) ||
+            SHORTCUT_REFERENCE_RE.test(source) ||
+            REFERENCE_DEFINITION_RE.test(source));
+    };
+    canUseTailWindow = (source, boundary) => {
+        if (this.tailWindowDisabled)
+            return false;
+        if (this.prevSource === '' || this.prevTokens.length === 0)
+            return false;
+        if (!source.startsWith(this.prevSource))
+            return false;
+        if (boundary.reparseOffset <= 0)
+            return false;
+        const stablePrefix = this.prevSource.slice(0, boundary.reparseOffset);
+        if (this.hasAppendSensitiveReferenceSyntax(stablePrefix))
+            return false;
+        return true;
+    };
+    parseSource = (source, boundary) => {
+        if (!this.canUseTailWindow(source, boundary)) {
+            return lexAndClean(source, this.options, false);
+        }
+        const tailTokens = lexAndClean(source.slice(boundary.reparseOffset), this.options, false);
+        return [...this.prevTokens.slice(0, boundary.prefixCount), ...tailTokens];
+    };
     /**
      * Parses the full source and diffs against the previous result.
      *
@@ -50,26 +128,28 @@ export class IncrementalParser {
      * @returns The new tokens and the index where they diverge from the previous parse
      */
     update = (source) => {
-        const newTokens = lexAndClean(source, this.options, false);
+        const boundary = this.getTailWindowBoundary();
+        const newTokens = this.parseSource(source, boundary);
         // Apply walkTokens if configured
         if (typeof this.options.walkTokens === 'function') {
             newTokens.forEach(this.options.walkTokens);
         }
+        // Reference definitions can change inline children without changing raw,
+        // so force a full rerender when reference syntax is present
+        const referenceSensitive = this.hasAppendSensitiveReferenceSyntax(this.prevSource) ||
+            this.hasAppendSensitiveReferenceSyntax(source);
         // Find first divergence point by comparing raw strings
         let divergeAt = 0;
-        const minLen = Math.min(this.prevTokens.length, newTokens.length);
-        while (divergeAt < minLen) {
-            if (this.prevTokens[divergeAt].raw !== newTokens[divergeAt].raw)
-                break;
-            divergeAt++;
+        if (!referenceSensitive) {
+            const minLen = Math.min(this.prevTokens.length, newTokens.length);
+            while (divergeAt < minLen) {
+                if (this.prevTokens[divergeAt].raw !== newTokens[divergeAt].raw)
+                    break;
+                divergeAt++;
+            }
         }
+        this.prevSource = source;
         this.prevTokens = newTokens;
         return { tokens: newTokens, divergeAt };
     };
-    /**
-     * Resets the parser state. Call this when starting a new stream.
-     */
-    reset = () => {
-        this.prevTokens = [];
-    };
 }

package/dist/utils/sanitize.d.ts ADDED Viewed

@@ -0,0 +1,69 @@
+/**
+ * URL and HTML attribute sanitization utilities for XSS prevention.
+ *
+ * These functions are applied in the Parser before tokens reach any
+ * renderer component or snippet, ensuring custom renderers cannot
+ * bypass sanitization.
+ *
+ * @see https://github.com/humanspeak/svelte-markdown/issues/272
+ * @packageDocumentation
+ */
+/**
+ * Context passed to sanitization functions so users can apply
+ * different rules per markdown token type or HTML tag.
+ *
+ * - For markdown links: `{ type: 'link', tag: 'a' }`
+ * - For markdown images: `{ type: 'image', tag: 'img' }`
+ * - For HTML tags: `{ type: 'html', tag: 'a' | 'img' | 'div' | ... }`
+ */
+export interface SanitizeContext {
+    /** The markdown token type. */
+    type: 'link' | 'image' | 'html';
+    /** The HTML tag name being rendered (e.g. `'a'`, `'img'`, `'div'`). */
+    tag: string;
+}
+export type SanitizeUrlFn = (_url: string, _context: SanitizeContext) => string;
+export type SanitizeAttributesFn = (_attributes: Record<string, string>, _context: SanitizeContext, _sanitizeUrl: SanitizeUrlFn) => Record<string, string>;
+/**
+ * Sanitizes a URL against a protocol allowlist.
+ *
+ * Allows `http:`, `https:`, `mailto:`, `tel:`, and relative URLs
+ * (starting with `/`, `#`, `?`, or no protocol). Blocks everything
+ * else including `javascript:`, `data:`, `vbscript:`, etc.
+ *
+ * Handles mixed-case protocols and leading whitespace.
+ *
+ * The `context` parameter provides the token type and HTML tag name,
+ * enabling per-element policies in custom overrides.
+ */
+export declare const defaultSanitizeUrl: (url: string, _context: SanitizeContext) => string;
+/**
+ * Passthrough URL sanitizer that allows all URLs unchanged.
+ *
+ * Use this to disable URL sanitization entirely:
+ * ```svelte
+ * <SvelteMarkdown source={markdown} sanitizeUrl={unsanitizedUrl} />
+ * ```
+ */
+export declare const unsanitizedUrl: SanitizeUrlFn;
+/**
+ * Passthrough attribute sanitizer that allows all attributes unchanged.
+ *
+ * Use this to disable attribute sanitization entirely:
+ * ```svelte
+ * <SvelteMarkdown source={markdown} sanitizeAttributes={unsanitizedAttributes} />
+ * ```
+ */
+export declare const unsanitizedAttributes: SanitizeAttributesFn;
+/**
+ * Sanitizes an HTML attribute object by:
+ * 1. Removing all event handler attributes (`on*`)
+ * 2. Running URL-bearing attributes through the sanitizer
+ *
+ * The `context` parameter provides the HTML tag name, enabling
+ * per-element policies in custom overrides (e.g. stricter rules
+ * for `<iframe>` than `<a>`).
+ *
+ * Returns a new object; does not mutate the input.
+ */
+export declare const defaultSanitizeAttributes: SanitizeAttributesFn;

package/dist/utils/sanitize.js ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * URL and HTML attribute sanitization utilities for XSS prevention.
+ *
+ * These functions are applied in the Parser before tokens reach any
+ * renderer component or snippet, ensuring custom renderers cannot
+ * bypass sanitization.
+ *
+ * @see https://github.com/humanspeak/svelte-markdown/issues/272
+ * @packageDocumentation
+ */
+/** Protocols considered safe for href/src attributes. */
+const SAFE_PROTOCOLS = new Set(['http:', 'https:', 'mailto:', 'tel:']);
+/**
+ * URL attributes in HTML that should be run through the sanitizer.
+ * Covers standard attributes that can trigger navigation or resource loading.
+ */
+const URL_ATTRIBUTES = new Set(['href', 'src', 'action', 'formaction', 'cite', 'data', 'poster']);
+/** Fast-path: most URLs are http/https — avoid `new URL()` for these. */
+const SAFE_PREFIX_RE = /^https?:/i;
+const LEADING_WS_RE = /^\s+/;
+const RELATIVE_RE = /^[#/?.]/;
+/**
+ * Sanitizes a URL against a protocol allowlist.
+ *
+ * Allows `http:`, `https:`, `mailto:`, `tel:`, and relative URLs
+ * (starting with `/`, `#`, `?`, or no protocol). Blocks everything
+ * else including `javascript:`, `data:`, `vbscript:`, etc.
+ *
+ * Handles mixed-case protocols and leading whitespace.
+ *
+ * The `context` parameter provides the token type and HTML tag name,
+ * enabling per-element policies in custom overrides.
+ */
+export const defaultSanitizeUrl = (url, _context) => {
+    if (!url)
+        return '';
+    const trimmed = url.replace(LEADING_WS_RE, '');
+    // Relative URLs are safe: #anchor, /path, ?query, ./relative, ../parent
+    if (RELATIVE_RE.test(trimmed))
+        return trimmed;
+    // No colon means no protocol — safe relative URL
+    if (!trimmed.includes(':'))
+        return trimmed;
+    // Fast-path for http/https — avoids new URL() allocation
+    if (SAFE_PREFIX_RE.test(trimmed))
+        return trimmed;
+    try {
+        const parsed = new URL(trimmed, 'http://localhost');
+        if (SAFE_PROTOCOLS.has(parsed.protocol))
+            return trimmed;
+    }
+    catch {
+        // Malformed URL — block it
+    }
+    return '';
+};
+/**
+ * Passthrough URL sanitizer that allows all URLs unchanged.
+ *
+ * Use this to disable URL sanitization entirely:
+ * ```svelte
+ * <SvelteMarkdown source={markdown} sanitizeUrl={unsanitizedUrl} />
+ * ```
+ */
+export const unsanitizedUrl = (url) => url;
+/**
+ * Passthrough attribute sanitizer that allows all attributes unchanged.
+ *
+ * Use this to disable attribute sanitization entirely:
+ * ```svelte
+ * <SvelteMarkdown source={markdown} sanitizeAttributes={unsanitizedAttributes} />
+ * ```
+ */
+export const unsanitizedAttributes = (attributes) => attributes;
+/**
+ * Sanitizes an HTML attribute object by:
+ * 1. Removing all event handler attributes (`on*`)
+ * 2. Running URL-bearing attributes through the sanitizer
+ *
+ * The `context` parameter provides the HTML tag name, enabling
+ * per-element policies in custom overrides (e.g. stricter rules
+ * for `<iframe>` than `<a>`).
+ *
+ * Returns a new object; does not mutate the input.
+ */
+export const defaultSanitizeAttributes = (attributes, context, sanitizeUrl) => {
+    const result = {};
+    for (const [key, value] of Object.entries(attributes)) {
+        const lower = key.toLowerCase();
+        // Strip event handlers (onclick, onerror, onload, etc.)
+        // Strip srcdoc — allows arbitrary HTML/script execution in iframes
+        if (lower.startsWith('on') || lower === 'srcdoc')
+            continue;
+        // Sanitize URL-bearing attributes
+        if (URL_ATTRIBUTES.has(lower)) {
+            const sanitized = sanitizeUrl(value, context);
+            if (sanitized)
+                result[key] = sanitized;
+            continue;
+        }
+        result[key] = value;
+    }
+    return result;
+};

package/dist/utils/stream-benchmark.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import type { SvelteMarkdownOptions } from '../types.js';
+import type { Token } from './markdown-parser.js';
+export interface StreamBenchmarkResult {
+    totalChars: number;
+    chunkCount: number;
+    totalParseMs: number;
+    peakParseMs: number;
+    p95ParseMs: number;
+    finalTokens: Token[];
+    parseDurationsMs: number[];
+}
+/**
+ * Benchmarks incremental parsing performance by simulating streaming chunk appends.
+ *
+ * @param chunks - Array of string chunks to append sequentially
+ * @param options - SvelteMarkdown parser options forwarded to IncrementalParser
+ * @returns Benchmark results including per-chunk timing, peak, and p95 parse durations
+ *
+ * @example
+ * ```ts
+ * const chunks = ['# Hello ', 'world, ', 'this is a test.']
+ * const result = benchmarkAppendStream(chunks, { gfm: true })
+ * console.log(result.p95ParseMs, result.peakParseMs)
+ * ```
+ */
+export declare const benchmarkAppendStream: (chunks: string[], options: SvelteMarkdownOptions) => StreamBenchmarkResult;

package/dist/utils/stream-benchmark.js ADDED Viewed

@@ -0,0 +1,53 @@
+import { IncrementalParser } from './incremental-parser.js';
+/**
+ * Calculates the p-th percentile of a numeric array.
+ *
+ * @param values - Array of numeric values
+ * @param p - Percentile to calculate (0-1, e.g., 0.95 for 95th percentile)
+ * @returns The value at the specified percentile, or 0 if array is empty
+ */
+const percentile = (values, p) => {
+    if (values.length === 0)
+        return 0;
+    const sorted = [...values].sort((a, b) => a - b);
+    const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * p) - 1));
+    return sorted[index];
+};
+/**
+ * Benchmarks incremental parsing performance by simulating streaming chunk appends.
+ *
+ * @param chunks - Array of string chunks to append sequentially
+ * @param options - SvelteMarkdown parser options forwarded to IncrementalParser
+ * @returns Benchmark results including per-chunk timing, peak, and p95 parse durations
+ *
+ * @example
+ * ```ts
+ * const chunks = ['# Hello ', 'world, ', 'this is a test.']
+ * const result = benchmarkAppendStream(chunks, { gfm: true })
+ * console.log(result.p95ParseMs, result.peakParseMs)
+ * ```
+ */
+export const benchmarkAppendStream = (chunks, options) => {
+    const parser = new IncrementalParser(options);
+    const parseDurationsMs = [];
+    let source = '';
+    let finalTokens = [];
+    for (const chunk of chunks) {
+        source += chunk;
+        const start = performance.now();
+        const result = parser.update(source);
+        const elapsed = performance.now() - start;
+        parseDurationsMs.push(elapsed);
+        finalTokens = result.tokens;
+    }
+    const totalParseMs = parseDurationsMs.reduce((sum, duration) => sum + duration, 0);
+    return {
+        totalChars: source.length,
+        chunkCount: chunks.length,
+        totalParseMs,
+        peakParseMs: parseDurationsMs.length > 0 ? Math.max(...parseDurationsMs) : 0,
+        p95ParseMs: percentile(parseDurationsMs, 0.95),
+        finalTokens,
+        parseDurationsMs
+    };
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@humanspeak/svelte-markdown",
-  "version": "1.2.0",
+  "version": "1.4.0",
   "description": "Fast, customizable markdown renderer for Svelte with built-in caching, TypeScript support, and Svelte 5 runes",
   "keywords": [
     "svelte",
@@ -70,48 +70,48 @@
     "@humanspeak/memory-cache": "^1.0.6",
     "github-slugger": "^2.0.0",
     "htmlparser2": "^12.0.0",
-    "marked": "^17.0.5"
+    "marked": "^18.0.0"
   },
   "devDependencies": {
-    "@eslint/compat": "^2.0.3",
+    "@eslint/compat": "^2.0.4",
     "@eslint/js": "^10.0.1",
-    "@playwright/cli": "^0.1.1",
-    "@playwright/test": "^1.58.2",
+    "@playwright/cli": "^0.1.5",
+    "@playwright/test": "^1.59.1",
     "@sveltejs/adapter-auto": "^7.0.1",
-    "@sveltejs/kit": "^2.55.0",
+    "@sveltejs/kit": "^2.56.1",
     "@sveltejs/package": "^2.5.7",
     "@sveltejs/vite-plugin-svelte": "^7.0.0",
     "@testing-library/jest-dom": "^6.9.1",
     "@testing-library/svelte": "^5.3.1",
     "@testing-library/user-event": "^14.6.1",
     "@types/katex": "^0.16.8",
-    "@types/node": "^25.5.0",
-    "@typescript-eslint/eslint-plugin": "^8.57.2",
-    "@typescript-eslint/parser": "^8.57.2",
-    "@vitest/coverage-v8": "^4.1.1",
-    "eslint": "^10.1.0",
+    "@types/node": "^25.5.2",
+    "@typescript-eslint/eslint-plugin": "^8.58.0",
+    "@typescript-eslint/parser": "^8.58.0",
+    "@vitest/coverage-v8": "^4.1.3",
+    "eslint": "^10.2.0",
     "eslint-config-prettier": "^10.1.8",
     "eslint-plugin-import": "^2.32.0",
-    "eslint-plugin-svelte": "^3.16.0",
+    "eslint-plugin-svelte": "^3.17.0",
     "eslint-plugin-unused-imports": "^4.4.1",
     "globals": "^17.4.0",
     "husky": "^9.1.7",
-    "jsdom": "^29.0.1",
-    "katex": "^0.16.41",
-    "marked-katex-extension": "^5.1.7",
-    "mermaid": "^11.13.0",
+    "jsdom": "^29.0.2",
+    "katex": "^0.16.45",
+    "marked-katex-extension": "^5.1.8",
+    "mermaid": "^11.14.0",
     "mprocs": "^0.9.2",
     "prettier": "^3.8.1",
     "prettier-plugin-organize-imports": "^4.3.0",
     "prettier-plugin-svelte": "^3.5.1",
     "prettier-plugin-tailwindcss": "^0.7.2",
     "publint": "^0.3.18",
-    "svelte": "^5.55.0",
-    "svelte-check": "^4.4.5",
+    "svelte": "^5.55.1",
+    "svelte-check": "^4.4.6",
     "typescript": "^6.0.2",
-    "typescript-eslint": "^8.57.2",
-    "vite": "^8.0.2",
-    "vitest": "^4.1.1"
+    "typescript-eslint": "^8.58.0",
+    "vite": "^8.0.7",
+    "vitest": "^4.1.3"
   },
   "peerDependencies": {
     "mermaid": ">=10.0.0",