flux-md 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,44 @@ Notable changes to flux-md. Format based on
4
4
  [Keep a Changelog](https://keepachangelog.com/); this project aims to follow
5
5
  [Semantic Versioning](https://semver.org/).
6
6
 
7
+ ## 0.15.0 — 2026-06-17
8
+
9
+ ### Added
10
+
11
+ - **Safe raw-HTML sanitizer (`htmlAllowlist` / `dropHtmlTags`)** — render a safe
12
+ subset of *inline* raw HTML (`<br>`, `<sub>`, `<sup>`, `<mark>`, …) **without**
13
+ `unsafeHtml`. Setting either list (even to `[]`) engages it: `htmlAllowlist`
14
+ non-empty renders only those tags (others escaped); **empty allows all tags
15
+ except a built-in, non-overridable dangerous set** (`script`, `style`,
16
+ `iframe`, `object`, `embed`, `form`, `svg`, `xmp`, `plaintext`, …);
17
+ `dropHtmlTags` removes tags entirely. Every rendered tag's attributes are
18
+ sanitized — `on*` handlers and `style` (a CSS beacon / clickjacking vector)
19
+ dropped, dangerous URL schemes (incl. multi-encoded) → `#`. Inline-scoped;
20
+ block-level raw HTML stays escaped. Matching is case-insensitive.
21
+
22
+ ### Fixed
23
+
24
+ - **HTML comments are dropped instead of escaped to visible text.** `<!--mk:id-->`
25
+ (a common LLM marker) previously rendered as a literal `&lt;!--…--&gt;` run or a
26
+ `<pre><code>` block; it now has no visible representation, in every mode except
27
+ bare `unsafeHtml` pass-through (which keeps it verbatim for CommonMark fidelity —
28
+ the browser ignores it either way). A comment-led block with trailing content
29
+ keeps that content (only comment-*only* blocks are dropped).
30
+
31
+ ### Security
32
+
33
+ - The dangerous-tag set is **non-overridable** (allowlisting `script`/`iframe`/`svg`
34
+ still drops them), `style` is stripped from every sanitized/component tag, and
35
+ raw-text elements (`xmp`/`plaintext`/`noembed`/`noframes`/`listing`) are blocked
36
+ in allow-all mode — closing CSS-exfiltration / clickjacking / DOM-corruption
37
+ vectors found in adversarial review. The React `htmlToReact` path mirrors the
38
+ `style` value-filter as defense-in-depth (safe declarations like `text-align`
39
+ still pass).
40
+
41
+ Feature-off output is byte-identical except HTML comments now drop (the
42
+ CommonMark/GFM suites run with `unsafeHtml` on, so the 652/GFM floors are
43
+ unaffected).
44
+
7
45
  ## 0.14.0 — 2026-06-17
8
46
 
9
47
  ### Added
package/README.md CHANGED
@@ -558,6 +558,8 @@ const client = new FluxClient({
558
558
  unsafeHtml: false, // pass raw HTML through (default false — keep it false for untrusted input)
559
559
  componentTags: ["Thinking", "Callout"], // BLOCK custom tags w/ markdown inside (default none)
560
560
  inlineComponentTags: ["tik", "cite"], // INLINE custom tags (chips/citations) w/ markdown inside (default none)
561
+ htmlAllowlist: ["br", "sub", "sup"], // safe raw-HTML sanitizer: [] = allow all but dangerous; list = only those (default off)
562
+ dropHtmlTags: [], // tags removed entirely (comments always dropped when sanitizing; default off)
561
563
  blockData: true, // opt-in structured kind.data per block (default false — see "Structured block data")
562
564
  },
563
565
  });
@@ -592,6 +594,9 @@ When to enable each flag:
592
594
  - `inlineComponentTags: ["tik", …]` — same idea for **inline** custom elements
593
595
  that sit inside a paragraph, heading, list item, or **table cell** (ticker
594
596
  chips, citations, `@mentions`). See [Inline component tags](#inline-component-tags).
597
+ - `htmlAllowlist` / `dropHtmlTags` — render a **safe subset of raw HTML** (e.g.
598
+ `<br>`, `<sub>`, `<sup>`) natively without `unsafeHtml`, drop specific tags, and
599
+ drop HTML comments. See [Safe raw HTML](#safe-raw-html).
595
600
 
596
601
  **Footnotes** (`gfmFootnotes`) work in streaming with one honest caveat: a
597
602
  `[^1]` reference renders speculatively the moment it's seen (committed blocks
@@ -826,6 +831,37 @@ surrounding content.
826
831
  > renders inline-in-cells too — `inlineComponentTags` simply replaces that
827
832
  > workaround with first-class inline elements.
828
833
 
834
+ ### Safe raw HTML
835
+
836
+ LLMs emit a little raw HTML — `<br>`, `<sub>`/`<sup>`, `<mark>`, and HTML comments
837
+ as markers (`<!--mk:id-->`). `unsafeHtml` is all-or-nothing; instead opt into a
838
+ **sanitizer** that renders a safe subset natively. Setting `htmlAllowlist` and/or
839
+ `dropHtmlTags` (even to `[]`) engages it:
840
+
841
+ ```ts
842
+ // Render only these inline tags; escape everything else:
843
+ new FluxClient({ config: { htmlAllowlist: ["br", "sub", "sup", "mark"] } });
844
+
845
+ // Or allow everything except a built-in dangerous set:
846
+ new FluxClient({ config: { htmlAllowlist: [] } });
847
+ ```
848
+
849
+ - **HTML comments are dropped** — no more `<!--mk:id-->` surfacing as escaped text
850
+ — in every mode except bare `unsafeHtml` pass-through.
851
+ - **`htmlAllowlist: ["br", …]`** renders only those inline tags; everything else is
852
+ escaped. **`htmlAllowlist: []`** (empty) allows *all* tags **except a built-in
853
+ dangerous set** (`script`, `style`, `iframe`, `object`, `embed`, `form`, `svg`,
854
+ `xmp`, `plaintext`, … — **non-overridable**: allowlisting one still drops it).
855
+ - **`dropHtmlTags: ["mk", …]`** removes those tags entirely (markup gone; inner
856
+ text stays as inert text).
857
+ - Every rendered tag's **attributes are sanitized**: `on*` handlers and `style`
858
+ (a CSS beacon / clickjacking vector) are dropped, and dangerous URL schemes
859
+ (`javascript:`, …, including multi-encoded) become `#`.
860
+ - **Scope:** *inline* raw HTML. Block-level raw HTML stays escaped for now (use
861
+ `unsafeHtml` **without** the sanitizer to render block HTML — when the sanitizer
862
+ is engaged, block HTML stays escaped even if `unsafeHtml` is also on). Tag
863
+ matching is case-insensitive.
864
+
829
865
  ### Types
830
866
 
831
867
  ```ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "flux-md",
3
- "version": "0.14.0",
3
+ "version": "0.15.0",
4
4
  "description": "Zero-dep streaming markdown for the browser. Rust→WASM core, Web Worker per stream, incremental parse with speculative closure.",
5
5
  "type": "module",
6
6
  "sideEffects": ["./src/worker.ts", "./src/styles.css"],
@@ -110,6 +110,27 @@ export function parseStyle(css: string): Record<string, string> {
110
110
  return out;
111
111
  }
112
112
 
113
+ // CSS values that beacon/exfiltrate (`url(`), execute (legacy `expression(`,
114
+ // `-moz-binding`, `behavior:`), or pull external resources (`@import`,
115
+ // `image-set(`). Defense-in-depth: the core sanitizer already drops `style`, but
116
+ // `htmlToReact` is exported and may be handed untrusted HTML directly.
117
+ const DANGEROUS_CSS_VALUE = /url\(|expression\(|image-set\(|-moz-binding|@import|behavior\s*:/i;
118
+
119
+ /** Strip CSS declarations that can beacon/exfiltrate, execute, or overlay the
120
+ * viewport (`position: fixed/sticky` → clickjacking). Safe declarations
121
+ * (`text-align`, `color`, …) — including flux's own table-alignment style —
122
+ * pass through untouched. */
123
+ function safeStyle(style: Record<string, string>): Record<string, string> {
124
+ const out: Record<string, string> = {};
125
+ for (const k in style) {
126
+ const v = style[k];
127
+ if (DANGEROUS_CSS_VALUE.test(v)) continue;
128
+ if (k.toLowerCase() === "position" && /\b(?:fixed|sticky)\b/i.test(v)) continue;
129
+ out[k] = v;
130
+ }
131
+ return out;
132
+ }
133
+
113
134
  /** Parse one opening tag starting at `start` (the `<`). */
114
135
  function parseOpenTag(html: string, start: number) {
115
136
  let i = start + 1;
@@ -245,7 +266,7 @@ function attrsToProps(tag: string, attrs: Record<string, string | true>, key: st
245
266
  // future React behavior.
246
267
  if (lower.startsWith("on")) continue;
247
268
  if (lower === "style" && typeof value === "string") {
248
- props.style = parseStyle(value);
269
+ props.style = safeStyle(parseStyle(value));
249
270
  continue;
250
271
  }
251
272
  // Neutralize dangerous-scheme URLs (javascript:, vbscript:, data:text/html).
package/src/server.tsx CHANGED
@@ -87,6 +87,12 @@ function makeParser(config?: ParserConfig): FluxParser {
87
87
  p.setUnsafeHtml(config?.unsafeHtml ?? false);
88
88
  p.setComponentTags(config?.componentTags ?? []);
89
89
  p.setInlineComponentTags(config?.inlineComponentTags ?? []);
90
+ // Engage the safe raw-HTML sanitizer when either list is provided (even []).
91
+ p.setHtmlSanitize(
92
+ config?.htmlAllowlist !== undefined || config?.dropHtmlTags !== undefined,
93
+ config?.htmlAllowlist ?? [],
94
+ config?.dropHtmlTags ?? [],
95
+ );
90
96
  p.setBlockData(config?.blockData ?? false);
91
97
  return p;
92
98
  }
package/src/types-core.ts CHANGED
@@ -261,6 +261,26 @@ export interface ParserConfig {
261
261
  * `componentTags`. Empty/omitted = off.
262
262
  */
263
263
  inlineComponentTags?: string[];
264
+ /**
265
+ * Opt-in **safe raw-HTML allowlist**. Setting this (even to `[]`) engages a
266
+ * sanitizer that renders a safe subset of *inline* raw HTML **without**
267
+ * `unsafeHtml`: an **empty** array means "allow all tags except a built-in
268
+ * dangerous set" (`script`, `style`, `iframe`, `object`, `embed`, `form`,
269
+ * `input`, `svg`, …); a **non-empty** array renders only those tags (e.g.
270
+ * `["br","sub","sup"]`) and escapes the rest. Every rendered tag's attributes
271
+ * are sanitized (event handlers dropped, dangerous URL schemes → `#`), and HTML
272
+ * comments are dropped. Block-level raw HTML stays escaped (sanitize is
273
+ * inline-scoped for now). Unset/omitted = off (raw HTML handling unchanged).
274
+ * Matching is case-insensitive. See also {@link dropHtmlTags}.
275
+ */
276
+ htmlAllowlist?: string[];
277
+ /**
278
+ * Tags removed entirely (markup dropped; any text between an open/close pair
279
+ * stays as inert text) — e.g. app marker tags, or belt-and-suspenders
280
+ * `["script","style"]`. Setting this (even to `[]`) also engages the safe
281
+ * raw-HTML sanitizer (see {@link htmlAllowlist}). Case-insensitive.
282
+ */
283
+ dropHtmlTags?: string[];
264
284
  /**
265
285
  * Opt-in structured table data. When on, a `Table` block's `kind.data` is
266
286
  * populated with `{ headers, rows, aligns }` (each cell `{ text, html }`) so a
@@ -71,6 +71,15 @@ export class FluxParser {
71
71
  * `<div class="math math-display">` for a KaTeX pass on the JS side.
72
72
  */
73
73
  setGfmMath(on: boolean): void;
74
+ /**
75
+ * Engage the safe raw-HTML sanitizer. When `on`, inline raw HTML renders
76
+ * sanitized without full unsafe HTML: `allow` empty = allow all tags except
77
+ * a built-in dangerous set (`script`, `style`, `iframe`, …); `allow`
78
+ * non-empty = only those render (others escaped); `drop` tags are removed
79
+ * entirely; HTML comments are dropped; every rendered tag's attributes are
80
+ * sanitized. Off by default (raw-HTML handling unchanged).
81
+ */
82
+ setHtmlSanitize(on: boolean, allow: string[], drop: string[]): void;
74
83
  /**
75
84
  * Set the opt-in INLINE component-tag allowlist (e.g. `["tik", "cite"]`).
76
85
  * An allowlisted inline `<tik>…</tik>` (or self-closing `<tik/>`) renders as
@@ -105,6 +114,7 @@ export interface InitOutput {
105
114
  readonly fluxparser_setGfmAutolinks: (a: number, b: number) => void;
106
115
  readonly fluxparser_setGfmFootnotes: (a: number, b: number) => void;
107
116
  readonly fluxparser_setGfmMath: (a: number, b: number) => void;
117
+ readonly fluxparser_setHtmlSanitize: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
108
118
  readonly fluxparser_setInlineComponentTags: (a: number, b: number, c: number) => void;
109
119
  readonly fluxparser_setUnsafeHtml: (a: number, b: number) => void;
110
120
  readonly __wbindgen_export: (a: number, b: number) => number;
@@ -171,6 +171,24 @@ export class FluxParser {
171
171
  setGfmMath(on) {
172
172
  wasm.fluxparser_setGfmMath(this.__wbg_ptr, on);
173
173
  }
174
+ /**
175
+ * Engage the safe raw-HTML sanitizer. When `on`, inline raw HTML renders
176
+ * sanitized without full unsafe HTML: `allow` empty = allow all tags except
177
+ * a built-in dangerous set (`script`, `style`, `iframe`, …); `allow`
178
+ * non-empty = only those render (others escaped); `drop` tags are removed
179
+ * entirely; HTML comments are dropped; every rendered tag's attributes are
180
+ * sanitized. Off by default (raw-HTML handling unchanged).
181
+ * @param {boolean} on
182
+ * @param {string[]} allow
183
+ * @param {string[]} drop
184
+ */
185
+ setHtmlSanitize(on, allow, drop) {
186
+ const ptr0 = passArrayJsValueToWasm0(allow, wasm.__wbindgen_export);
187
+ const len0 = WASM_VECTOR_LEN;
188
+ const ptr1 = passArrayJsValueToWasm0(drop, wasm.__wbindgen_export);
189
+ const len1 = WASM_VECTOR_LEN;
190
+ wasm.fluxparser_setHtmlSanitize(this.__wbg_ptr, on, ptr0, len0, ptr1, len1);
191
+ }
174
192
  /**
175
193
  * Set the opt-in INLINE component-tag allowlist (e.g. `["tik", "cite"]`).
176
194
  * An allowlisted inline `<tik>…</tik>` (or self-closing `<tik/>`) renders as
Binary file
@@ -16,6 +16,7 @@ export const fluxparser_setGfmAlerts: (a: number, b: number) => void;
16
16
  export const fluxparser_setGfmAutolinks: (a: number, b: number) => void;
17
17
  export const fluxparser_setGfmFootnotes: (a: number, b: number) => void;
18
18
  export const fluxparser_setGfmMath: (a: number, b: number) => void;
19
+ export const fluxparser_setHtmlSanitize: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
19
20
  export const fluxparser_setInlineComponentTags: (a: number, b: number, c: number) => void;
20
21
  export const fluxparser_setUnsafeHtml: (a: number, b: number) => void;
21
22
  export const __wbindgen_export: (a: number, b: number) => number;
@@ -2,7 +2,7 @@
2
2
  "name": "flux-md-core",
3
3
  "type": "module",
4
4
  "description": "Incremental, streaming-aware markdown parser with speculative closure",
5
- "version": "0.14.0",
5
+ "version": "0.15.0",
6
6
  "license": "MIT",
7
7
  "files": [
8
8
  "flux_md_core_bg.wasm",
package/src/worker.ts CHANGED
@@ -31,6 +31,12 @@ const core = new WorkerCore({
31
31
  p.setUnsafeHtml(c?.unsafeHtml ?? false);
32
32
  p.setComponentTags(c?.componentTags ?? []);
33
33
  p.setInlineComponentTags(c?.inlineComponentTags ?? []);
34
+ // Engage the safe raw-HTML sanitizer when either list is provided (even []).
35
+ p.setHtmlSanitize(
36
+ c?.htmlAllowlist !== undefined || c?.dropHtmlTags !== undefined,
37
+ c?.htmlAllowlist ?? [],
38
+ c?.dropHtmlTags ?? [],
39
+ );
34
40
  p.setBlockData(c?.blockData ?? false);
35
41
  return p;
36
42
  },