flux-md 0.14.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,109 @@ Notable changes to flux-md. Format based on
4
4
  [Keep a Changelog](https://keepachangelog.com/); this project aims to follow
5
5
  [Semantic Versioning](https://semver.org/).
6
6
 
7
+ ## 0.15.1 — 2026-06-22
8
+
9
+ ### Security
10
+
11
+ - **XSS — dangerous-scheme autolinks are neutralized.** A CommonMark URI autolink
12
+ (`<javascript:alert(1)>`, `<vbscript:…>`, `<file:…>`) previously emitted a live
13
+ `href`, because autolinks bypassed the scheme allowlist that regular links go
14
+ through. They now route through the same decode-stable dangerous-scheme filter:
15
+ the `href` becomes `#` while the visible link text is unchanged. `file:` is now
16
+ blocked everywhere (links, autolinks, URL attributes) — it has no legitimate use
17
+ in rendered untrusted markdown and is a local-resource / phishing vector in
18
+ privileged contexts (Electron, extensions, `file://` origins).
19
+ - **Component-tag / `htmlToReact` attribute hardening.** Sanitized attributes now
20
+ also drop React-meaningful names (`dangerouslySetInnerHTML`, `ref`, `key`,
21
+ `defaultValue`, `defaultChecked`, `suppressHydrationWarning`, …) so a hostile
22
+ attribute can't crash the render tree or smuggle in a prop. Attribute→prop
23
+ lookup maps are prototype-free (`Object.create(null)`), and only HTML / `data-`
24
+ / `aria-` attribute names are forwarded to React.
25
+
26
+ ### Fixed
27
+
28
+ - **ReDoS / quadratic blow-ups on untrusted input.**
29
+ - Highlighter (`hi.ts`): the JS/TS regex-literal and bash double-quoted-string
30
+ patterns could backtrack quadratically on crafted code blocks; both rewritten
31
+ to linear forms, plus a 50 KB per-block size guard.
32
+ - URL scheme check: the decode-to-fixpoint loop (Rust `is_dangerous_scheme` and
33
+ JS `safeUrl`) is capped at 8 passes — still catches multi-encoded
34
+ `javascript&amp;amp;#58;` payloads, no longer O(n²) on `&amp;`-spam.
35
+ - Inline parser: nested / unbalanced link-bracket scanning is bounded
36
+ (depth + length caps); GFM extended-autolink trailing-paren trimming is now
37
+ linear instead of recounting the span each iteration.
38
+
39
+ ### Changed
40
+
41
+ - **`flux-md/server` uses a literal `import("node:fs/promises")`** instead of a
42
+ variable specifier, resolving the `dynamicRequire` supply-chain signal. Behavior
43
+ is unchanged — still a Node-only, `file:`-guarded branch.
44
+ - Added a **`## Security`** / supply-chain-transparency section to the README and a
45
+ documented **`socket.yml`** covering the inherent `nativeCode` / `networkAccess`
46
+ / `filesystemAccess` signals (the WebAssembly core and the opt-in
47
+ `<flux-markdown src>` fetch).
48
+
49
+ ### Performance
50
+
51
+ - **No redundant re-renders / rebuilds on no-op updates.**
52
+ - `<flux-markdown>` ignores a `setAttribute` whose value didn't change (a host
53
+ framework re-applying identical attributes no longer tears down the self-owned
54
+ client and reparses the whole document), and the `components` / `sanitize`
55
+ property setters skip the remount when assigned the same identity.
56
+ - `FluxClient.reset()` no longer notifies subscribers when the store was already
57
+ empty — skips a wasted, output-identical render pass.
58
+ - Documented that `sanitize` (like `components`) should be memoized/hoisted in
59
+ React, so a fresh closure each render doesn't bust the per-block memo.
60
+ - Added render-count / node-reuse / no-remount regression tests across the React,
61
+ DOM, store, custom-element, and Vue bindings, locking in that committed blocks
62
+ never re-render or rebuild as the stream grows (only the streaming tail does).
63
+
64
+ ### Known limitations
65
+
66
+ - Streaming a single very large **unclosed** block (a multi-megabyte indented code
67
+ block, open HTML block, or footnote-disarmed list delivered across many chunks)
68
+ is still O(n²) in the uncommitted-tail length. A bounded incremental cache for
69
+ these resumable containers is tracked as follow-up; finalized / closed blocks and
70
+ all other inputs are unaffected.
71
+
72
+ ## 0.15.0 — 2026-06-17
73
+
74
+ ### Added
75
+
76
+ - **Safe raw-HTML sanitizer (`htmlAllowlist` / `dropHtmlTags`)** — render a safe
77
+ subset of *inline* raw HTML (`<br>`, `<sub>`, `<sup>`, `<mark>`, …) **without**
78
+ `unsafeHtml`. Setting either list (even to `[]`) engages it: `htmlAllowlist`
79
+ non-empty renders only those tags (others escaped); **empty allows all tags
80
+ except a built-in, non-overridable dangerous set** (`script`, `style`,
81
+ `iframe`, `object`, `embed`, `form`, `svg`, `xmp`, `plaintext`, …);
82
+ `dropHtmlTags` removes tags entirely. Every rendered tag's attributes are
83
+ sanitized — `on*` handlers and `style` (a CSS beacon / clickjacking vector)
84
+ dropped, dangerous URL schemes (incl. multi-encoded) → `#`. Inline-scoped;
85
+ block-level raw HTML stays escaped. Matching is case-insensitive.
86
+
87
+ ### Fixed
88
+
89
+ - **HTML comments are dropped instead of escaped to visible text.** `<!--mk:id-->`
90
+ (a common LLM marker) previously rendered as a literal `&lt;!--…--&gt;` run or a
91
+ `<pre><code>` block; it now has no visible representation, in every mode except
92
+ bare `unsafeHtml` pass-through (which keeps it verbatim for CommonMark fidelity —
93
+ the browser ignores it either way). A comment-led block with trailing content
94
+ keeps that content (only comment-*only* blocks are dropped).
95
+
96
+ ### Security
97
+
98
+ - The dangerous-tag set is **non-overridable** (allowlisting `script`/`iframe`/`svg`
99
+ still drops them), `style` is stripped from every sanitized/component tag, and
100
+ raw-text elements (`xmp`/`plaintext`/`noembed`/`noframes`/`listing`) are blocked
101
+ in allow-all mode — closing CSS-exfiltration / clickjacking / DOM-corruption
102
+ vectors found in adversarial review. The React `htmlToReact` path mirrors the
103
+ `style` value-filter as defense-in-depth (safe declarations like `text-align`
104
+ still pass).
105
+
106
+ Feature-off output is byte-identical except HTML comments now drop (the
107
+ CommonMark/GFM suites run with `unsafeHtml` on, so the 652/GFM floors are
108
+ unaffected).
109
+
7
110
  ## 0.14.0 — 2026-06-17
8
111
 
9
112
  ### Added
package/README.md CHANGED
@@ -558,6 +558,8 @@ const client = new FluxClient({
558
558
  unsafeHtml: false, // pass raw HTML through (default false — keep it false for untrusted input)
559
559
  componentTags: ["Thinking", "Callout"], // BLOCK custom tags w/ markdown inside (default none)
560
560
  inlineComponentTags: ["tik", "cite"], // INLINE custom tags (chips/citations) w/ markdown inside (default none)
561
+ htmlAllowlist: ["br", "sub", "sup"], // safe raw-HTML sanitizer: [] = allow all but dangerous; list = only those (default off)
562
+ dropHtmlTags: [], // tags removed entirely (comments always dropped when sanitizing; default off)
561
563
  blockData: true, // opt-in structured kind.data per block (default false — see "Structured block data")
562
564
  },
563
565
  });
@@ -592,6 +594,9 @@ When to enable each flag:
592
594
  - `inlineComponentTags: ["tik", …]` — same idea for **inline** custom elements
593
595
  that sit inside a paragraph, heading, list item, or **table cell** (ticker
594
596
  chips, citations, `@mentions`). See [Inline component tags](#inline-component-tags).
597
+ - `htmlAllowlist` / `dropHtmlTags` — render a **safe subset of raw HTML** (e.g.
598
+ `<br>`, `<sub>`, `<sup>`) natively without `unsafeHtml`, drop specific tags, and
599
+ drop HTML comments. See [Safe raw HTML](#safe-raw-html).
595
600
 
596
601
  **Footnotes** (`gfmFootnotes`) work in streaming with one honest caveat: a
597
602
  `[^1]` reference renders speculatively the moment it's seen (committed blocks
@@ -826,6 +831,37 @@ surrounding content.
826
831
  > renders inline-in-cells too — `inlineComponentTags` simply replaces that
827
832
  > workaround with first-class inline elements.
828
833
 
834
+ ### Safe raw HTML
835
+
836
+ LLMs emit a little raw HTML — `<br>`, `<sub>`/`<sup>`, `<mark>`, and HTML comments
837
+ as markers (`<!--mk:id-->`). `unsafeHtml` is all-or-nothing; instead opt into a
838
+ **sanitizer** that renders a safe subset natively. Setting `htmlAllowlist` and/or
839
+ `dropHtmlTags` (even to `[]`) engages it:
840
+
841
+ ```ts
842
+ // Render only these inline tags; escape everything else:
843
+ new FluxClient({ config: { htmlAllowlist: ["br", "sub", "sup", "mark"] } });
844
+
845
+ // Or allow everything except a built-in dangerous set:
846
+ new FluxClient({ config: { htmlAllowlist: [] } });
847
+ ```
848
+
849
+ - **HTML comments are dropped** — no more `<!--mk:id-->` surfacing as escaped text
850
+ — in every mode except bare `unsafeHtml` pass-through.
851
+ - **`htmlAllowlist: ["br", …]`** renders only those inline tags; everything else is
852
+ escaped. **`htmlAllowlist: []`** (empty) allows *all* tags **except a built-in
853
+ dangerous set** (`script`, `style`, `iframe`, `object`, `embed`, `form`, `svg`,
854
+ `xmp`, `plaintext`, … — **non-overridable**: allowlisting one still drops it).
855
+ - **`dropHtmlTags: ["mk", …]`** removes those tags entirely (markup gone; inner
856
+ text stays as inert text).
857
+ - Every rendered tag's **attributes are sanitized**: `on*` handlers and `style`
858
+ (a CSS beacon / clickjacking vector) are dropped, and dangerous URL schemes
859
+ (`javascript:`, …, including multi-encoded) become `#`.
860
+ - **Scope:** *inline* raw HTML. Block-level raw HTML stays escaped for now (use
861
+ `unsafeHtml` **without** the sanitizer to render block HTML — when the sanitizer
862
+ is engaged, block HTML stays escaped even if `unsafeHtml` is also on). Tag
863
+ matching is case-insensitive.
864
+
829
865
  ### Types
830
866
 
831
867
  ```ts
@@ -959,6 +995,43 @@ genuinely hostile content where CSS-overlay/clickjacking matters, render inside
959
995
  a sandboxed `<iframe>` instead — sanitization stops injection, not every
960
996
  visual-overlay trick.
961
997
 
998
+ ### Supply-chain transparency
999
+
1000
+ flux-md is **zero runtime dependency** — no third-party packages are pulled in
1001
+ at runtime. The parsing core is Rust compiled to WebAssembly, reproducibly
1002
+ buildable from `crates/flux-md-core/` via `bun run build:wasm`.
1003
+
1004
+ **Native code (WebAssembly).** The shipped `flux_md_core_bg.wasm` (~200 KB) is
1005
+ first-party, built from the Rust source in this repo, and runs inside a sandboxed
1006
+ Web Worker (browser) or Node worker thread. Supply-chain scanners such as
1007
+ [Socket.dev](https://socket.dev) will flag it as `nativeCode` — this is accurate
1008
+ and expected. The WASM is not a vendored third-party binary; it is reproducible
1009
+ from source.
1010
+
1011
+ **Network access.** flux-md performs network I/O in exactly two scenarios, both
1012
+ caller-driven:
1013
+
1014
+ - `<flux-markdown src="URL">` — the Web Component fetches the URL you supply and
1015
+ streams the response. No URL is ever chosen by flux-md itself.
1016
+ - The wasm-bindgen glue (`wasm/flux_md_core.js`) loads the co-located `.wasm`
1017
+ asset via `fetch(new URL("…_bg.wasm", import.meta.url))` — bundlers resolve
1018
+ this to a local build artifact, not a remote endpoint.
1019
+
1020
+ flux-md has no telemetry, no analytics, and no first-party remote endpoints.
1021
+ Socket will flag the `networkAccess` signal — it is accurate and expected. In
1022
+ privileged contexts (browser extensions, Electron, environments where the
1023
+ same-origin policy may not apply), treat the `src` attribute value as you would
1024
+ any external URL and allowlist it in your CSP / security policy.
1025
+
1026
+ **Filesystem access (Node/SSR only).** `flux-md/server` reads the package's
1027
+ own `.wasm` file off disk on Node.js (Node's `fetch` cannot load `file://`
1028
+ URLs). This is a Node-only path; it reads only the package-internal asset and
1029
+ never touches caller-supplied paths. Socket will flag `filesystemAccess` — also
1030
+ accurate and expected.
1031
+
1032
+ The `socket.yml` at the repository root documents these signals with their
1033
+ justifications for Socket's GitHub app.
1034
+
962
1035
  ## Scaling
963
1036
 
964
1037
  `FluxClient`s share a **worker pool** (`getDefaultPool()`), so concurrency
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "flux-md",
3
- "version": "0.14.0",
3
+ "version": "0.15.1",
4
4
  "description": "Zero-dep streaming markdown for the browser. Rust→WASM core, Web Worker per stream, incremental parse with speculative closure.",
5
5
  "type": "module",
6
6
  "sideEffects": ["./src/worker.ts", "./src/styles.css"],
package/src/client.ts CHANGED
@@ -271,6 +271,15 @@ export function getDefaultPool(): FluxPool {
271
271
  return defaultPool;
272
272
  }
273
273
 
274
+ /** TEST-ONLY: drop the process-wide default pool so the next {@link getDefaultPool}
275
+ * rebuilds it (lazily, with the current global `Worker`). Lets a test file that
276
+ * drives the default pool start from a clean, deterministic state regardless of
277
+ * which other file warmed it first in bun's shared test process. Not part of the
278
+ * public API and a no-op for normal runtime use. */
279
+ export function __resetDefaultPool(): void {
280
+ defaultPool = null;
281
+ }
282
+
274
283
  // --------------------------------------------------------------------------
275
284
  // Client
276
285
  // --------------------------------------------------------------------------
@@ -514,6 +523,11 @@ export class FluxClient {
514
523
  }
515
524
 
516
525
  reset() {
526
+ // Only notify subscribers if there was content to clear: resetting an
527
+ // already-empty store leaves the view empty either way, so skip the no-op
528
+ // emit (which would otherwise drive every subscriber through a wasted,
529
+ // output-identical render pass).
530
+ const hadContent = this.store.snapshot.length > 0;
517
531
  this.store = emptyBlockStore();
518
532
  this.appendedBytes = 0;
519
533
  this.patchCount = 0;
@@ -527,7 +541,7 @@ export class FluxClient {
527
541
  // Same streamId + worker — the worker frees and lazily recreates the parser.
528
542
  const pw = this.ensureAcquired();
529
543
  this.pool.send(pw, { type: "reset", streamId: this.streamId });
530
- this.emit();
544
+ if (hadContent) this.emit();
531
545
  }
532
546
 
533
547
  destroy() {
package/src/element.ts CHANGED
@@ -90,6 +90,7 @@ export function defineFluxMarkdown(tag = "flux-markdown"): void {
90
90
  return this.#components;
91
91
  }
92
92
  set components(value: DomComponents | undefined) {
93
+ if (value === this.#components) return; // no-op re-assign: don't remount
93
94
  this.#components = value;
94
95
  if (this.#connected) this.#remount();
95
96
  }
@@ -98,6 +99,7 @@ export function defineFluxMarkdown(tag = "flux-markdown"): void {
98
99
  return this.#sanitize;
99
100
  }
100
101
  set sanitize(value: ((html: string) => string) | undefined) {
102
+ if (value === this.#sanitize) return; // no-op re-assign: don't remount
101
103
  this.#sanitize = value;
102
104
  if (this.#connected) this.#remount();
103
105
  }
@@ -155,10 +157,16 @@ export function defineFluxMarkdown(tag = "flux-markdown"): void {
155
157
  }
156
158
  }
157
159
 
158
- attributeChangedCallback(name: string, _old: string | null, _new: string | null): void {
160
+ attributeChangedCallback(name: string, oldValue: string | null, newValue: string | null): void {
159
161
  // attributeChangedCallback fires before connectedCallback for attributes
160
162
  // present at upgrade; ignore until connected so config reads happen once.
161
163
  if (!this.#connected) return;
164
+ // setAttribute fires this on EVERY set, including setting an attribute to
165
+ // its current value (common when a host framework re-applies the same
166
+ // attrs on re-render). A no-op value change must not tear down the client
167
+ // and reparse the whole document — only a genuine change proceeds.
168
+ // (Attribute removal yields null, distinct from an empty string.)
169
+ if (oldValue === newValue) return;
162
170
 
163
171
  if (name === "markdown" || name === "src") {
164
172
  // One-shot content source change — only for a self-owned client. A
package/src/hi.ts CHANGED
@@ -55,7 +55,7 @@ const jsPats: Pat[] = [
55
55
  ["str", /"(?:\\.|[^"\\\n])*"/y],
56
56
  ["str", /'(?:\\.|[^'\\\n])*'/y],
57
57
  ["str", /`(?:\\.|[^`\\])*`/y],
58
- ["rx", /\/(?:\\.|\[(?:\\.|[^\]\\])*\]|[^/\\\n])+\/[gimsuy]*/y],
58
+ ["rx", /\/(?![*/])(?:\\.|[^/\\\n])+\/[gimsuy]*/y],
59
59
  ["num", /\b(?:0x[\da-fA-F_]+|0b[01_]+|0o[0-7_]+|\d[\d_]*(?:\.\d[\d_]*)?(?:[eE][+-]?\d+)?)\b/y],
60
60
  ["ident", /[A-Za-z_$][\w$]*/y],
61
61
  ["pun", /[+\-*/=<>!&|^~?:;,.[\](){}]/y],
@@ -103,7 +103,7 @@ const goPats: Pat[] = [
103
103
 
104
104
  const bashPats: Pat[] = [
105
105
  ["com", /#[^\n]*/y],
106
- ["str", /"(?:\\.|\$\([^)]*\)|[^"\\])*"/y],
106
+ ["str", /"(?:\\.|[^"\\])*"/y],
107
107
  ["str", /'[^']*'/y],
108
108
  ["var", /\$\{[^}]+\}|\$\w+|\$[*@#?!$0-9]/y],
109
109
  ["num", /\b\d+\b/y],
@@ -187,6 +187,9 @@ function escapeHtml(s: string): string {
187
187
  }
188
188
 
189
189
  export function highlight(code: string, lang: string): string {
190
+ // Defense-in-depth: never tokenize a pathologically huge block on the main
191
+ // thread — fall back to plain escaped text.
192
+ if (code.length > 50_000) return escapeHtml(code);
190
193
  const conf = LANGS[lang.toLowerCase()];
191
194
  if (!conf) return escapeHtml(code);
192
195
 
@@ -10,7 +10,10 @@ const VOID = new Set([
10
10
  // Attribute name → React prop name, for the handful that differ. Anything not
11
11
  // listed passes through verbatim (React forwards data-*/aria-* and lowercase
12
12
  // attributes unchanged).
13
- const ATTR_MAP: Record<string, string> = {
13
+ // Prototype-free map so an attribute named `constructor`/`hasOwnProperty`/etc.
14
+ // returns undefined (and the `?? name` fallback fires) rather than resolving to
15
+ // an inherited Object.prototype member.
16
+ const ATTR_MAP: Record<string, string> = Object.assign(Object.create(null), {
14
17
  class: "className",
15
18
  for: "htmlFor",
16
19
  colspan: "colSpan",
@@ -26,7 +29,7 @@ const ATTR_MAP: Record<string, string> = {
26
29
  crossorigin: "crossOrigin",
27
30
  enterkeyhint: "enterKeyHint",
28
31
  inputmode: "inputMode",
29
- };
32
+ });
30
33
 
31
34
  // URL-bearing attributes whose value must be scheme-checked. `htmlToReact` is
32
35
  // exported and may be handed untrusted HTML directly; React happily renders a
@@ -34,6 +37,19 @@ const ATTR_MAP: Record<string, string> = {
34
37
  // defense-in-depth — the core's own output is already sanitized.
35
38
  const URL_ATTRS = new Set(["href", "src", "xlink:href", "formaction", "action", "poster", "data"]);
36
39
 
40
+ // React-meaningful prop names that must never be forwarded from (possibly
41
+ // untrusted) HTML attributes: `dangerouslySetInnerHTML` as a prop crashes the
42
+ // whole render tree (DoS), and ref/key/defaultValue/etc. are injectable.
43
+ const PROP_DENY = new Set([
44
+ "dangerouslysetinnerhtml", "ref", "key", "defaultvalue", "defaultchecked",
45
+ "suppresshydrationwarning", "suppresscontenteditablewarning",
46
+ ]);
47
+
48
+ // Only forward attribute names that are a plain HTML attribute identifier
49
+ // (so camelCase / `__proto__` / `constructor` never reach React props). The
50
+ // explicit ATTR_MAP renames and `xlink:href` are allowed past this gate.
51
+ const SAFE_ATTR_NAME = /^[a-z][a-z0-9-]*$/i;
52
+
37
53
  /** Replace a dangerous-scheme URL with "#". Mirrors the Rust `is_dangerous_scheme`:
38
54
  * strip control chars (C0, DEL, C1 — matching Rust char::is_control),
39
55
  * lowercase, then match. The strip affects only the probe, never output. */
@@ -42,8 +58,10 @@ function safeUrl(value: string): string {
42
58
  // reaches the DOM, so peel layers to a fixpoint before the scheme check —
43
59
  // catches `javascript&#58;` and double-encoded `javascript&amp;#58;`. Only the
44
60
  // probe is decoded; the returned value is untouched (safe URLs stay verbatim).
61
+ // Cap at 8 iterations: far beyond any legit URL (browsers entity-decode an
62
+ // href once), and bounds the loop so a hostile value can't make it quadratic.
45
63
  let decoded = value;
46
- for (let prev = ""; decoded !== prev; ) {
64
+ for (let i = 0, prev = ""; i < 8 && decoded !== prev; i++) {
47
65
  prev = decoded;
48
66
  decoded = decodeEntities(decoded);
49
67
  }
@@ -110,6 +128,27 @@ export function parseStyle(css: string): Record<string, string> {
110
128
  return out;
111
129
  }
112
130
 
131
+ // CSS values that beacon/exfiltrate (`url(`), execute (legacy `expression(`,
132
+ // `-moz-binding`, `behavior:`), or pull external resources (`@import`,
133
+ // `image-set(`). Defense-in-depth: the core sanitizer already drops `style`, but
134
+ // `htmlToReact` is exported and may be handed untrusted HTML directly.
135
+ const DANGEROUS_CSS_VALUE = /url\(|expression\(|image-set\(|-moz-binding|@import|behavior\s*:/i;
136
+
137
+ /** Strip CSS declarations that can beacon/exfiltrate, execute, or overlay the
138
+ * viewport (`position: fixed/sticky` → clickjacking). Safe declarations
139
+ * (`text-align`, `color`, …) — including flux's own table-alignment style —
140
+ * pass through untouched. */
141
+ function safeStyle(style: Record<string, string>): Record<string, string> {
142
+ const out: Record<string, string> = {};
143
+ for (const k in style) {
144
+ const v = style[k];
145
+ if (DANGEROUS_CSS_VALUE.test(v)) continue;
146
+ if (k.toLowerCase() === "position" && /\b(?:fixed|sticky)\b/i.test(v)) continue;
147
+ out[k] = v;
148
+ }
149
+ return out;
150
+ }
151
+
113
152
  /** Parse one opening tag starting at `start` (the `<`). */
114
153
  function parseOpenTag(html: string, start: number) {
115
154
  let i = start + 1;
@@ -244,8 +283,11 @@ function attrsToProps(tag: string, attrs: Record<string, string | true>, key: st
244
283
  // React drops most lowercase `on*` attrs — this also covers casings and
245
284
  // future React behavior.
246
285
  if (lower.startsWith("on")) continue;
286
+ // Reject React-meaningful names that would crash the render tree or inject
287
+ // internals (dangerouslySetInnerHTML, ref, key, defaultValue, …).
288
+ if (PROP_DENY.has(lower)) continue;
247
289
  if (lower === "style" && typeof value === "string") {
248
- props.style = parseStyle(value);
290
+ props.style = safeStyle(parseStyle(value));
249
291
  continue;
250
292
  }
251
293
  // Neutralize dangerous-scheme URLs (javascript:, vbscript:, data:text/html).
@@ -259,6 +301,10 @@ function attrsToProps(tag: string, attrs: Record<string, string | true>, key: st
259
301
  props.defaultChecked = value === true ? true : value;
260
302
  continue;
261
303
  }
304
+ // Restrict forwarded ORIGINAL names to a plain HTML attribute identifier
305
+ // (plus the ATTR_MAP renames and xlink:href handled above) so weird casings
306
+ // / `__proto__` / `constructor` can never become a React prop.
307
+ if (!(lower in ATTR_MAP) && !SAFE_ATTR_NAME.test(name)) continue;
262
308
  props[ATTR_MAP[lower] ?? name] = value;
263
309
  }
264
310
  return props;
package/src/react.tsx CHANGED
@@ -100,6 +100,10 @@ interface FluxMarkdownProps {
100
100
  * `unsafeHtml` on. flux-md stays zero-dep — you bring the sanitizer. The
101
101
  * built-in code/math renderers operate on already-escaped content and are not
102
102
  * run through it. When omitted, rendering is byte-identical and zero-cost.
103
+ *
104
+ * **Memoize / hoist this** (same trap as `components`): a fresh closure each
105
+ * render busts the per-block memo, so every block re-sanitizes and re-parses
106
+ * on every patch instead of only the streaming tail.
103
107
  */
104
108
  sanitize?: (html: string) => string;
105
109
  /** Appended to the root's `className` (the `flux-md` class is always present). */
@@ -362,14 +366,40 @@ export function blockKindProps(block: Block, components?: Components): BlockComp
362
366
  return props;
363
367
  }
364
368
 
365
- const REACT_ATTR_NAME: Record<string, string> = { class: "className", for: "htmlFor" };
369
+ // Prototype-free so a key like `constructor`/`hasOwnProperty` returns undefined
370
+ // (and the `?? k` fallback fires) instead of an inherited Object.prototype member.
371
+ const REACT_ATTR_NAME: Record<string, string> = Object.assign(Object.create(null), {
372
+ class: "className",
373
+ for: "htmlFor",
374
+ });
375
+
376
+ // React-meaningful prop names that must never survive into a user override's
377
+ // attrs object (dangerouslySetInnerHTML crashes the render tree; ref/key/etc.
378
+ // inject internals). Mirrors html-to-react's PROP_DENY.
379
+ const ATTR_DENY = new Set([
380
+ "dangerouslysetinnerhtml", "ref", "key", "defaultvalue", "defaultchecked",
381
+ "suppresshydrationwarning", "suppresscontenteditablewarning",
382
+ ]);
383
+
384
+ // Forward only plain HTML attribute identifiers (the REACT_ATTR_NAME renames
385
+ // pass too), so weird casings / `__proto__` / `constructor` never reach a prop.
386
+ const SAFE_ATTR_NAME = /^[a-z][a-z0-9-]*$/i;
366
387
 
367
388
  /** Convert sanitized HTML attribute pairs into a React-spreadable object,
368
389
  * renaming the two names React requires (`class`→`className`, `for`→`htmlFor`).
369
- * Other names (including `data-*` / `aria-*`) pass through unchanged. */
390
+ * Other names (including `data-*` / `aria-*`) pass through unchanged. Drops
391
+ * inline event handlers and React-meaningful/unsafe names as defense-in-depth
392
+ * (the Rust `sanitize_attrs` is the primary gate; this keeps the React layer
393
+ * safe on its own when attrs are handed to user override components). */
370
394
  function reactAttrs(pairs: [string, string][]): Record<string, string> {
371
395
  const out: Record<string, string> = {};
372
- for (const [k, v] of pairs) out[REACT_ATTR_NAME[k] ?? k] = v;
396
+ for (const [k, v] of pairs) {
397
+ const lower = k.toLowerCase();
398
+ if (lower.startsWith("on")) continue;
399
+ if (ATTR_DENY.has(lower)) continue;
400
+ if (!(lower in REACT_ATTR_NAME) && !SAFE_ATTR_NAME.test(k)) continue;
401
+ out[REACT_ATTR_NAME[lower] ?? k] = v;
402
+ }
373
403
  return out;
374
404
  }
375
405
 
package/src/server.tsx CHANGED
@@ -58,11 +58,11 @@ export function initFlux(opts?: { wasm?: BufferSource | WebAssembly.Module }): P
58
58
  initPromise = (async () => {
59
59
  const wasmUrl = new URL("./wasm/flux_md_core_bg.wasm", import.meta.url);
60
60
  if (wasmUrl.protocol === "file:") {
61
- // Node: read the bytes (Node's fetch can't load file://). A non-literal
62
- // specifier keeps `node:fs` out of web bundles and off tsc's module graph
63
- // (no @types/node needed to compile this source).
64
- const nodeFs = "node:fs/promises";
65
- const { readFile } = await import(nodeFs);
61
+ // Node: read the bytes (Node's fetch can't load file://). The literal
62
+ // `node:` specifier is externalized by bundlers, so node:fs never reaches
63
+ // a web bundle (this branch is also file:-only, never true in browsers).
64
+ // @ts-ignore no @types/node in this package; node:fs/promises is a builtin.
65
+ const { readFile } = await import("node:fs/promises");
66
66
  initFluxSync(await readFile(wasmUrl));
67
67
  } else {
68
68
  await initWasmAsync({ module_or_path: wasmUrl });
@@ -87,6 +87,12 @@ function makeParser(config?: ParserConfig): FluxParser {
87
87
  p.setUnsafeHtml(config?.unsafeHtml ?? false);
88
88
  p.setComponentTags(config?.componentTags ?? []);
89
89
  p.setInlineComponentTags(config?.inlineComponentTags ?? []);
90
+ // Engage the safe raw-HTML sanitizer when either list is provided (even []).
91
+ p.setHtmlSanitize(
92
+ config?.htmlAllowlist !== undefined || config?.dropHtmlTags !== undefined,
93
+ config?.htmlAllowlist ?? [],
94
+ config?.dropHtmlTags ?? [],
95
+ );
90
96
  p.setBlockData(config?.blockData ?? false);
91
97
  return p;
92
98
  }
package/src/types-core.ts CHANGED
@@ -261,6 +261,26 @@ export interface ParserConfig {
261
261
  * `componentTags`. Empty/omitted = off.
262
262
  */
263
263
  inlineComponentTags?: string[];
264
+ /**
265
+ * Opt-in **safe raw-HTML allowlist**. Setting this (even to `[]`) engages a
266
+ * sanitizer that renders a safe subset of *inline* raw HTML **without**
267
+ * `unsafeHtml`: an **empty** array means "allow all tags except a built-in
268
+ * dangerous set" (`script`, `style`, `iframe`, `object`, `embed`, `form`,
269
+ * `input`, `svg`, …); a **non-empty** array renders only those tags (e.g.
270
+ * `["br","sub","sup"]`) and escapes the rest. Every rendered tag's attributes
271
+ * are sanitized (event handlers dropped, dangerous URL schemes → `#`), and HTML
272
+ * comments are dropped. Block-level raw HTML stays escaped (sanitize is
273
+ * inline-scoped for now). Unset/omitted = off (raw HTML handling unchanged).
274
+ * Matching is case-insensitive. See also {@link dropHtmlTags}.
275
+ */
276
+ htmlAllowlist?: string[];
277
+ /**
278
+ * Tags removed entirely (markup dropped; any text between an open/close pair
279
+ * stays as inert text) — e.g. app marker tags, or belt-and-suspenders
280
+ * `["script","style"]`. Setting this (even to `[]`) also engages the safe
281
+ * raw-HTML sanitizer (see {@link htmlAllowlist}). Case-insensitive.
282
+ */
283
+ dropHtmlTags?: string[];
264
284
  /**
265
285
  * Opt-in structured table data. When on, a `Table` block's `kind.data` is
266
286
  * populated with `{ headers, rows, aligns }` (each cell `{ text, html }`) so a
@@ -71,6 +71,15 @@ export class FluxParser {
71
71
  * `<div class="math math-display">` for a KaTeX pass on the JS side.
72
72
  */
73
73
  setGfmMath(on: boolean): void;
74
+ /**
75
+ * Engage the safe raw-HTML sanitizer. When `on`, inline raw HTML renders
76
+ * sanitized without full unsafe HTML: `allow` empty = allow all tags except
77
+ * a built-in dangerous set (`script`, `style`, `iframe`, …); `allow`
78
+ * non-empty = only those render (others escaped); `drop` tags are removed
79
+ * entirely; HTML comments are dropped; every rendered tag's attributes are
80
+ * sanitized. Off by default (raw-HTML handling unchanged).
81
+ */
82
+ setHtmlSanitize(on: boolean, allow: string[], drop: string[]): void;
74
83
  /**
75
84
  * Set the opt-in INLINE component-tag allowlist (e.g. `["tik", "cite"]`).
76
85
  * An allowlisted inline `<tik>…</tik>` (or self-closing `<tik/>`) renders as
@@ -105,6 +114,7 @@ export interface InitOutput {
105
114
  readonly fluxparser_setGfmAutolinks: (a: number, b: number) => void;
106
115
  readonly fluxparser_setGfmFootnotes: (a: number, b: number) => void;
107
116
  readonly fluxparser_setGfmMath: (a: number, b: number) => void;
117
+ readonly fluxparser_setHtmlSanitize: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
108
118
  readonly fluxparser_setInlineComponentTags: (a: number, b: number, c: number) => void;
109
119
  readonly fluxparser_setUnsafeHtml: (a: number, b: number) => void;
110
120
  readonly __wbindgen_export: (a: number, b: number) => number;
@@ -171,6 +171,24 @@ export class FluxParser {
171
171
  setGfmMath(on) {
172
172
  wasm.fluxparser_setGfmMath(this.__wbg_ptr, on);
173
173
  }
174
+ /**
175
+ * Engage the safe raw-HTML sanitizer. When `on`, inline raw HTML renders
176
+ * sanitized without full unsafe HTML: `allow` empty = allow all tags except
177
+ * a built-in dangerous set (`script`, `style`, `iframe`, …); `allow`
178
+ * non-empty = only those render (others escaped); `drop` tags are removed
179
+ * entirely; HTML comments are dropped; every rendered tag's attributes are
180
+ * sanitized. Off by default (raw-HTML handling unchanged).
181
+ * @param {boolean} on
182
+ * @param {string[]} allow
183
+ * @param {string[]} drop
184
+ */
185
+ setHtmlSanitize(on, allow, drop) {
186
+ const ptr0 = passArrayJsValueToWasm0(allow, wasm.__wbindgen_export);
187
+ const len0 = WASM_VECTOR_LEN;
188
+ const ptr1 = passArrayJsValueToWasm0(drop, wasm.__wbindgen_export);
189
+ const len1 = WASM_VECTOR_LEN;
190
+ wasm.fluxparser_setHtmlSanitize(this.__wbg_ptr, on, ptr0, len0, ptr1, len1);
191
+ }
174
192
  /**
175
193
  * Set the opt-in INLINE component-tag allowlist (e.g. `["tik", "cite"]`).
176
194
  * An allowlisted inline `<tik>…</tik>` (or self-closing `<tik/>`) renders as
Binary file
@@ -16,6 +16,7 @@ export const fluxparser_setGfmAlerts: (a: number, b: number) => void;
16
16
  export const fluxparser_setGfmAutolinks: (a: number, b: number) => void;
17
17
  export const fluxparser_setGfmFootnotes: (a: number, b: number) => void;
18
18
  export const fluxparser_setGfmMath: (a: number, b: number) => void;
19
+ export const fluxparser_setHtmlSanitize: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
19
20
  export const fluxparser_setInlineComponentTags: (a: number, b: number, c: number) => void;
20
21
  export const fluxparser_setUnsafeHtml: (a: number, b: number) => void;
21
22
  export const __wbindgen_export: (a: number, b: number) => number;
@@ -2,7 +2,7 @@
2
2
  "name": "flux-md-core",
3
3
  "type": "module",
4
4
  "description": "Incremental, streaming-aware markdown parser with speculative closure",
5
- "version": "0.14.0",
5
+ "version": "0.15.0",
6
6
  "license": "MIT",
7
7
  "files": [
8
8
  "flux_md_core_bg.wasm",
package/src/worker.ts CHANGED
@@ -31,6 +31,12 @@ const core = new WorkerCore({
31
31
  p.setUnsafeHtml(c?.unsafeHtml ?? false);
32
32
  p.setComponentTags(c?.componentTags ?? []);
33
33
  p.setInlineComponentTags(c?.inlineComponentTags ?? []);
34
+ // Engage the safe raw-HTML sanitizer when either list is provided (even []).
35
+ p.setHtmlSanitize(
36
+ c?.htmlAllowlist !== undefined || c?.dropHtmlTags !== undefined,
37
+ c?.htmlAllowlist ?? [],
38
+ c?.dropHtmlTags ?? [],
39
+ );
34
40
  p.setBlockData(c?.blockData ?? false);
35
41
  return p;
36
42
  },