@humanspeak/svelte-markdown 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,10 @@
9
9
  * @module incremental-parser
10
10
  */
11
11
  import { lexAndClean } from './parse-and-cache.js';
12
+ const CLOSED_FENCE_RE = /^ {0,3}(`{3,}|~{3,}).*\n[\s\S]*\n {0,3}\1[ \t]*\n*$/;
13
+ const LINK_REFERENCE_RE = /\[[^\]\n]+\]\[[^\]\n]*\]/;
14
+ const SHORTCUT_REFERENCE_RE = /\[[^\]\n]+\](?![[(])/; // Excludes inline links/images and full refs
15
+ const REFERENCE_DEFINITION_RE = /^\s{0,3}\[[^\]\n]+\]:/m;
12
16
  /**
13
17
  * Streaming-optimized parser that performs full re-parses but diffs results
14
18
  * against the previous token array to minimize DOM updates.
@@ -33,8 +37,12 @@ import { lexAndClean } from './parse-and-cache.js';
33
37
  export class IncrementalParser {
34
38
  /** Previous parse result for diffing */
35
39
  prevTokens = [];
40
+ /** Previous full source string for append-only tail reparsing */
41
+ prevSource = '';
36
42
  /** Parser options passed to the Marked lexer */
37
43
  options;
44
+ /** Whether caller-supplied parser hooks make tail-window reparsing unsafe */
45
+ tailWindowDisabled;
38
46
  /**
39
47
  * Creates a new incremental parser instance.
40
48
  *
@@ -42,7 +50,77 @@ export class IncrementalParser {
42
50
  */
43
51
  constructor(options) {
44
52
  this.options = options;
53
+ const exts = options.extensions;
54
+ const hasExtensionTokenizers = (exts?.block != null && exts.block.length > 0) ||
55
+ (exts?.inline != null && exts.inline.length > 0);
56
+ this.tailWindowDisabled =
57
+ typeof options.walkTokens === 'function' ||
58
+ options.tokenizer != null ||
59
+ hasExtensionTokenizers;
45
60
  }
61
+ getTailWindowBoundary = () => {
62
+ if (this.prevTokens.length === 0) {
63
+ return { prefixCount: 0, reparseOffset: 0 };
64
+ }
65
+ let offset = 0;
66
+ for (let i = 0; i < this.prevTokens.length - 1; i++) {
67
+ offset += this.prevTokens[i].raw.length;
68
+ }
69
+ const lastToken = this.prevTokens[this.prevTokens.length - 1];
70
+ if (this.isStableAtSourceEnd(lastToken)) {
71
+ return {
72
+ prefixCount: this.prevTokens.length,
73
+ reparseOffset: this.prevSource.length
74
+ };
75
+ }
76
+ return {
77
+ prefixCount: this.prevTokens.length - 1,
78
+ reparseOffset: offset
79
+ };
80
+ };
81
+ isStableAtSourceEnd = (token) => {
82
+ if (token.type === 'space')
83
+ return false;
84
+ if (token.raw.endsWith('\n\n'))
85
+ return true;
86
+ switch (token.type) {
87
+ case 'heading':
88
+ case 'hr':
89
+ return token.raw.endsWith('\n');
90
+ case 'code':
91
+ return CLOSED_FENCE_RE.test(token.raw);
92
+ default:
93
+ return false;
94
+ }
95
+ };
96
+ hasAppendSensitiveReferenceSyntax = (source) => {
97
+ if (!source.includes('[') || !source.includes(']'))
98
+ return false;
99
+ return (LINK_REFERENCE_RE.test(source) ||
100
+ SHORTCUT_REFERENCE_RE.test(source) ||
101
+ REFERENCE_DEFINITION_RE.test(source));
102
+ };
103
+ canUseTailWindow = (source, boundary) => {
104
+ if (this.tailWindowDisabled)
105
+ return false;
106
+ if (this.prevSource === '' || this.prevTokens.length === 0)
107
+ return false;
108
+ if (!source.startsWith(this.prevSource))
109
+ return false;
110
+ if (boundary.reparseOffset <= 0)
111
+ return false;
112
+ const stablePrefix = this.prevSource.slice(0, boundary.reparseOffset);
113
+ if (this.hasAppendSensitiveReferenceSyntax(stablePrefix))
114
+ return false;
115
+ return true;
116
+ };
117
+ parseSource = (source, boundary) => {
118
+ if (!this.canUseTailWindow(source, boundary)) {
119
+ return lexAndClean(source, this.options, false);
120
+ }
121
+ const tailTokens = lexAndClean(source.slice(boundary.reparseOffset), this.options, false);
122
+ return [...this.prevTokens.slice(0, boundary.prefixCount), ...tailTokens];
123
+ };
46
124
  /**
47
125
  * Parses the full source and diffs against the previous result.
48
126
  *
@@ -50,26 +128,28 @@ export class IncrementalParser {
50
128
  * @returns The new tokens and the index where they diverge from the previous parse
51
129
  */
52
130
  update = (source) => {
53
- const newTokens = lexAndClean(source, this.options, false);
131
+ const boundary = this.getTailWindowBoundary();
132
+ const newTokens = this.parseSource(source, boundary);
54
133
  // Apply walkTokens if configured
55
134
  if (typeof this.options.walkTokens === 'function') {
56
135
  newTokens.forEach(this.options.walkTokens);
57
136
  }
137
+ // Reference definitions can change inline children without changing raw,
138
+ // so force a full rerender when reference syntax is present
139
+ const referenceSensitive = this.hasAppendSensitiveReferenceSyntax(this.prevSource) ||
140
+ this.hasAppendSensitiveReferenceSyntax(source);
58
141
  // Find first divergence point by comparing raw strings
59
142
  let divergeAt = 0;
60
- const minLen = Math.min(this.prevTokens.length, newTokens.length);
61
- while (divergeAt < minLen) {
62
- if (this.prevTokens[divergeAt].raw !== newTokens[divergeAt].raw)
63
- break;
64
- divergeAt++;
143
+ if (!referenceSensitive) {
144
+ const minLen = Math.min(this.prevTokens.length, newTokens.length);
145
+ while (divergeAt < minLen) {
146
+ if (this.prevTokens[divergeAt].raw !== newTokens[divergeAt].raw)
147
+ break;
148
+ divergeAt++;
149
+ }
65
150
  }
151
+ this.prevSource = source;
66
152
  this.prevTokens = newTokens;
67
153
  return { tokens: newTokens, divergeAt };
68
154
  };
69
- /**
70
- * Resets the parser state. Call this when starting a new stream.
71
- */
72
- reset = () => {
73
- this.prevTokens = [];
74
- };
75
155
  }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * URL and HTML attribute sanitization utilities for XSS prevention.
3
+ *
4
+ * These functions are applied in the Parser before tokens reach any
5
+ * renderer component or snippet, ensuring custom renderers cannot
6
+ * bypass sanitization.
7
+ *
8
+ * @see https://github.com/humanspeak/svelte-markdown/issues/272
9
+ * @packageDocumentation
10
+ */
11
+ /**
12
+ * Context passed to sanitization functions so users can apply
13
+ * different rules per markdown token type or HTML tag.
14
+ *
15
+ * - For markdown links: `{ type: 'link', tag: 'a' }`
16
+ * - For markdown images: `{ type: 'image', tag: 'img' }`
17
+ * - For HTML tags: `{ type: 'html', tag: 'a' | 'img' | 'div' | ... }`
18
+ */
19
+ export interface SanitizeContext {
20
+ /** The markdown token type. */
21
+ type: 'link' | 'image' | 'html';
22
+ /** The HTML tag name being rendered (e.g. `'a'`, `'img'`, `'div'`). */
23
+ tag: string;
24
+ }
25
+ export type SanitizeUrlFn = (_url: string, _context: SanitizeContext) => string;
26
+ export type SanitizeAttributesFn = (_attributes: Record<string, string>, _context: SanitizeContext, _sanitizeUrl: SanitizeUrlFn) => Record<string, string>;
27
+ /**
28
+ * Sanitizes a URL against a protocol allowlist.
29
+ *
30
+ * Allows `http:`, `https:`, `mailto:`, `tel:`, and relative URLs
31
+ * (starting with `/`, `#`, `?`, or no protocol). Blocks everything
32
+ * else including `javascript:`, `data:`, `vbscript:`, etc.
33
+ *
34
+ * Handles mixed-case protocols and leading whitespace.
35
+ *
36
+ * The `context` parameter provides the token type and HTML tag name,
37
+ * enabling per-element policies in custom overrides.
38
+ */
39
+ export declare const defaultSanitizeUrl: (url: string, _context: SanitizeContext) => string;
40
+ /**
41
+ * Passthrough URL sanitizer that allows all URLs unchanged.
42
+ *
43
+ * Use this to disable URL sanitization entirely:
44
+ * ```svelte
45
+ * <SvelteMarkdown source={markdown} sanitizeUrl={unsanitizedUrl} />
46
+ * ```
47
+ */
48
+ export declare const unsanitizedUrl: SanitizeUrlFn;
49
+ /**
50
+ * Passthrough attribute sanitizer that allows all attributes unchanged.
51
+ *
52
+ * Use this to disable attribute sanitization entirely:
53
+ * ```svelte
54
+ * <SvelteMarkdown source={markdown} sanitizeAttributes={unsanitizedAttributes} />
55
+ * ```
56
+ */
57
+ export declare const unsanitizedAttributes: SanitizeAttributesFn;
58
+ /**
59
+ * Sanitizes an HTML attribute object by:
60
+ * 1. Removing all event handler attributes (`on*`)
61
+ * 2. Running URL-bearing attributes through the sanitizer
62
+ *
63
+ * The `context` parameter provides the HTML tag name, enabling
64
+ * per-element policies in custom overrides (e.g. stricter rules
65
+ * for `<iframe>` than `<a>`).
66
+ *
67
+ * Returns a new object; does not mutate the input.
68
+ */
69
+ export declare const defaultSanitizeAttributes: SanitizeAttributesFn;
@@ -0,0 +1,104 @@
1
+ /**
2
+ * URL and HTML attribute sanitization utilities for XSS prevention.
3
+ *
4
+ * These functions are applied in the Parser before tokens reach any
5
+ * renderer component or snippet, ensuring custom renderers cannot
6
+ * bypass sanitization.
7
+ *
8
+ * @see https://github.com/humanspeak/svelte-markdown/issues/272
9
+ * @packageDocumentation
10
+ */
11
+ /** Protocols considered safe for href/src attributes. */
12
+ const SAFE_PROTOCOLS = new Set(['http:', 'https:', 'mailto:', 'tel:']);
13
+ /**
14
+ * URL attributes in HTML that should be run through the sanitizer.
15
+ * Covers standard attributes that can trigger navigation or resource loading.
16
+ */
17
+ const URL_ATTRIBUTES = new Set(['href', 'src', 'action', 'formaction', 'cite', 'data', 'poster']);
18
+ /** Fast-path: most URLs are http/https — avoid `new URL()` for these. */
19
+ const SAFE_PREFIX_RE = /^https?:/i;
20
+ const LEADING_WS_RE = /^\s+/;
21
+ const RELATIVE_RE = /^[#/?.]/;
22
+ /**
23
+ * Sanitizes a URL against a protocol allowlist.
24
+ *
25
+ * Allows `http:`, `https:`, `mailto:`, `tel:`, and relative URLs
26
+ * (starting with `/`, `#`, `?`, or no protocol). Blocks everything
27
+ * else including `javascript:`, `data:`, `vbscript:`, etc.
28
+ *
29
+ * Handles mixed-case protocols and leading whitespace.
30
+ *
31
+ * The `context` parameter provides the token type and HTML tag name,
32
+ * enabling per-element policies in custom overrides.
33
+ */
34
+ export const defaultSanitizeUrl = (url, _context) => {
35
+ if (!url)
36
+ return '';
37
+ const trimmed = url.replace(LEADING_WS_RE, '');
38
+ // Relative URLs are safe: #anchor, /path, ?query, ./relative, ../parent
39
+ if (RELATIVE_RE.test(trimmed))
40
+ return trimmed;
41
+ // No colon means no protocol — safe relative URL
42
+ if (!trimmed.includes(':'))
43
+ return trimmed;
44
+ // Fast-path for http/https — avoids new URL() allocation
45
+ if (SAFE_PREFIX_RE.test(trimmed))
46
+ return trimmed;
47
+ try {
48
+ const parsed = new URL(trimmed, 'http://localhost');
49
+ if (SAFE_PROTOCOLS.has(parsed.protocol))
50
+ return trimmed;
51
+ }
52
+ catch {
53
+ // Malformed URL — block it
54
+ }
55
+ return '';
56
+ };
57
+ /**
58
+ * Passthrough URL sanitizer that allows all URLs unchanged.
59
+ *
60
+ * Use this to disable URL sanitization entirely:
61
+ * ```svelte
62
+ * <SvelteMarkdown source={markdown} sanitizeUrl={unsanitizedUrl} />
63
+ * ```
64
+ */
65
+ export const unsanitizedUrl = (url) => url;
66
+ /**
67
+ * Passthrough attribute sanitizer that allows all attributes unchanged.
68
+ *
69
+ * Use this to disable attribute sanitization entirely:
70
+ * ```svelte
71
+ * <SvelteMarkdown source={markdown} sanitizeAttributes={unsanitizedAttributes} />
72
+ * ```
73
+ */
74
+ export const unsanitizedAttributes = (attributes) => attributes;
75
+ /**
76
+ * Sanitizes an HTML attribute object by:
77
+ * 1. Removing all event handler attributes (`on*`)
78
+ * 2. Running URL-bearing attributes through the sanitizer
79
+ *
80
+ * The `context` parameter provides the HTML tag name, enabling
81
+ * per-element policies in custom overrides (e.g. stricter rules
82
+ * for `<iframe>` than `<a>`).
83
+ *
84
+ * Returns a new object; does not mutate the input.
85
+ */
86
+ export const defaultSanitizeAttributes = (attributes, context, sanitizeUrl) => {
87
+ const result = {};
88
+ for (const [key, value] of Object.entries(attributes)) {
89
+ const lower = key.toLowerCase();
90
+ // Strip event handlers (onclick, onerror, onload, etc.)
91
+ // Strip srcdoc — allows arbitrary HTML/script execution in iframes
92
+ if (lower.startsWith('on') || lower === 'srcdoc')
93
+ continue;
94
+ // Sanitize URL-bearing attributes
95
+ if (URL_ATTRIBUTES.has(lower)) {
96
+ const sanitized = sanitizeUrl(value, context);
97
+ if (sanitized)
98
+ result[key] = sanitized;
99
+ continue;
100
+ }
101
+ result[key] = value;
102
+ }
103
+ return result;
104
+ };
@@ -0,0 +1,26 @@
1
+ import type { SvelteMarkdownOptions } from '../types.js';
2
+ import type { Token } from './markdown-parser.js';
3
+ export interface StreamBenchmarkResult {
4
+ totalChars: number;
5
+ chunkCount: number;
6
+ totalParseMs: number;
7
+ peakParseMs: number;
8
+ p95ParseMs: number;
9
+ finalTokens: Token[];
10
+ parseDurationsMs: number[];
11
+ }
12
+ /**
13
+ * Benchmarks incremental parsing performance by simulating streaming chunk appends.
14
+ *
15
+ * @param chunks - Array of string chunks to append sequentially
16
+ * @param options - SvelteMarkdown parser options forwarded to IncrementalParser
17
+ * @returns Benchmark results including per-chunk timing, peak, and p95 parse durations
18
+ *
19
+ * @example
20
+ * ```ts
21
+ * const chunks = ['# Hello ', 'world, ', 'this is a test.']
22
+ * const result = benchmarkAppendStream(chunks, { gfm: true })
23
+ * console.log(result.p95ParseMs, result.peakParseMs)
24
+ * ```
25
+ */
26
+ export declare const benchmarkAppendStream: (chunks: string[], options: SvelteMarkdownOptions) => StreamBenchmarkResult;
@@ -0,0 +1,53 @@
1
+ import { IncrementalParser } from './incremental-parser.js';
2
+ /**
3
+ * Calculates the p-th percentile of a numeric array.
4
+ *
5
+ * @param values - Array of numeric values
6
+ * @param p - Percentile to calculate (0-1, e.g., 0.95 for 95th percentile)
7
+ * @returns The value at the specified percentile, or 0 if array is empty
8
+ */
9
+ const percentile = (values, p) => {
10
+ if (values.length === 0)
11
+ return 0;
12
+ const sorted = [...values].sort((a, b) => a - b);
13
+ const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * p) - 1));
14
+ return sorted[index];
15
+ };
16
+ /**
17
+ * Benchmarks incremental parsing performance by simulating streaming chunk appends.
18
+ *
19
+ * @param chunks - Array of string chunks to append sequentially
20
+ * @param options - SvelteMarkdown parser options forwarded to IncrementalParser
21
+ * @returns Benchmark results including per-chunk timing, peak, and p95 parse durations
22
+ *
23
+ * @example
24
+ * ```ts
25
+ * const chunks = ['# Hello ', 'world, ', 'this is a test.']
26
+ * const result = benchmarkAppendStream(chunks, { gfm: true })
27
+ * console.log(result.p95ParseMs, result.peakParseMs)
28
+ * ```
29
+ */
30
+ export const benchmarkAppendStream = (chunks, options) => {
31
+ const parser = new IncrementalParser(options);
32
+ const parseDurationsMs = [];
33
+ let source = '';
34
+ let finalTokens = [];
35
+ for (const chunk of chunks) {
36
+ source += chunk;
37
+ const start = performance.now();
38
+ const result = parser.update(source);
39
+ const elapsed = performance.now() - start;
40
+ parseDurationsMs.push(elapsed);
41
+ finalTokens = result.tokens;
42
+ }
43
+ const totalParseMs = parseDurationsMs.reduce((sum, duration) => sum + duration, 0);
44
+ return {
45
+ totalChars: source.length,
46
+ chunkCount: chunks.length,
47
+ totalParseMs,
48
+ peakParseMs: parseDurationsMs.length > 0 ? Math.max(...parseDurationsMs) : 0,
49
+ p95ParseMs: percentile(parseDurationsMs, 0.95),
50
+ finalTokens,
51
+ parseDurationsMs
52
+ };
53
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@humanspeak/svelte-markdown",
3
- "version": "1.2.0",
3
+ "version": "1.4.0",
4
4
  "description": "Fast, customizable markdown renderer for Svelte with built-in caching, TypeScript support, and Svelte 5 runes",
5
5
  "keywords": [
6
6
  "svelte",
@@ -70,48 +70,48 @@
70
70
  "@humanspeak/memory-cache": "^1.0.6",
71
71
  "github-slugger": "^2.0.0",
72
72
  "htmlparser2": "^12.0.0",
73
- "marked": "^17.0.5"
73
+ "marked": "^18.0.0"
74
74
  },
75
75
  "devDependencies": {
76
- "@eslint/compat": "^2.0.3",
76
+ "@eslint/compat": "^2.0.4",
77
77
  "@eslint/js": "^10.0.1",
78
- "@playwright/cli": "^0.1.1",
79
- "@playwright/test": "^1.58.2",
78
+ "@playwright/cli": "^0.1.5",
79
+ "@playwright/test": "^1.59.1",
80
80
  "@sveltejs/adapter-auto": "^7.0.1",
81
- "@sveltejs/kit": "^2.55.0",
81
+ "@sveltejs/kit": "^2.56.1",
82
82
  "@sveltejs/package": "^2.5.7",
83
83
  "@sveltejs/vite-plugin-svelte": "^7.0.0",
84
84
  "@testing-library/jest-dom": "^6.9.1",
85
85
  "@testing-library/svelte": "^5.3.1",
86
86
  "@testing-library/user-event": "^14.6.1",
87
87
  "@types/katex": "^0.16.8",
88
- "@types/node": "^25.5.0",
89
- "@typescript-eslint/eslint-plugin": "^8.57.2",
90
- "@typescript-eslint/parser": "^8.57.2",
91
- "@vitest/coverage-v8": "^4.1.1",
92
- "eslint": "^10.1.0",
88
+ "@types/node": "^25.5.2",
89
+ "@typescript-eslint/eslint-plugin": "^8.58.0",
90
+ "@typescript-eslint/parser": "^8.58.0",
91
+ "@vitest/coverage-v8": "^4.1.3",
92
+ "eslint": "^10.2.0",
93
93
  "eslint-config-prettier": "^10.1.8",
94
94
  "eslint-plugin-import": "^2.32.0",
95
- "eslint-plugin-svelte": "^3.16.0",
95
+ "eslint-plugin-svelte": "^3.17.0",
96
96
  "eslint-plugin-unused-imports": "^4.4.1",
97
97
  "globals": "^17.4.0",
98
98
  "husky": "^9.1.7",
99
- "jsdom": "^29.0.1",
100
- "katex": "^0.16.41",
101
- "marked-katex-extension": "^5.1.7",
102
- "mermaid": "^11.13.0",
99
+ "jsdom": "^29.0.2",
100
+ "katex": "^0.16.45",
101
+ "marked-katex-extension": "^5.1.8",
102
+ "mermaid": "^11.14.0",
103
103
  "mprocs": "^0.9.2",
104
104
  "prettier": "^3.8.1",
105
105
  "prettier-plugin-organize-imports": "^4.3.0",
106
106
  "prettier-plugin-svelte": "^3.5.1",
107
107
  "prettier-plugin-tailwindcss": "^0.7.2",
108
108
  "publint": "^0.3.18",
109
- "svelte": "^5.55.0",
110
- "svelte-check": "^4.4.5",
109
+ "svelte": "^5.55.1",
110
+ "svelte-check": "^4.4.6",
111
111
  "typescript": "^6.0.2",
112
- "typescript-eslint": "^8.57.2",
113
- "vite": "^8.0.2",
114
- "vitest": "^4.1.1"
112
+ "typescript-eslint": "^8.58.0",
113
+ "vite": "^8.0.7",
114
+ "vitest": "^4.1.3"
115
115
  },
116
116
  "peerDependencies": {
117
117
  "mermaid": ">=10.0.0",