flux-md 0.14.0 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +103 -0
- package/README.md +73 -0
- package/package.json +1 -1
- package/src/client.ts +15 -1
- package/src/element.ts +9 -1
- package/src/hi.ts +5 -2
- package/src/html-to-react.ts +50 -4
- package/src/react.tsx +33 -3
- package/src/server.tsx +11 -5
- package/src/types-core.ts +20 -0
- package/src/wasm/flux_md_core.d.ts +10 -0
- package/src/wasm/flux_md_core.js +18 -0
- package/src/wasm/flux_md_core_bg.wasm +0 -0
- package/src/wasm/flux_md_core_bg.wasm.d.ts +1 -0
- package/src/wasm/package.json +1 -1
- package/src/worker.ts +6 -0
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,109 @@ Notable changes to flux-md. Format based on
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/); this project aims to follow
|
|
5
5
|
[Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## 0.15.1 — 2026-06-22
|
|
8
|
+
|
|
9
|
+
### Security
|
|
10
|
+
|
|
11
|
+
- **XSS — dangerous-scheme autolinks are neutralized.** A CommonMark URI autolink
|
|
12
|
+
(`<javascript:alert(1)>`, `<vbscript:…>`, `<file:…>`) previously emitted a live
|
|
13
|
+
`href`, because autolinks bypassed the scheme allowlist that regular links go
|
|
14
|
+
through. They now route through the same decode-stable dangerous-scheme filter:
|
|
15
|
+
the `href` becomes `#` while the visible link text is unchanged. `file:` is now
|
|
16
|
+
blocked everywhere (links, autolinks, URL attributes) — it has no legitimate use
|
|
17
|
+
in rendered untrusted markdown and is a local-resource / phishing vector in
|
|
18
|
+
privileged contexts (Electron, extensions, `file://` origins).
|
|
19
|
+
- **Component-tag / `htmlToReact` attribute hardening.** Sanitized attributes now
|
|
20
|
+
also drop React-meaningful names (`dangerouslySetInnerHTML`, `ref`, `key`,
|
|
21
|
+
`defaultValue`, `defaultChecked`, `suppressHydrationWarning`, …) so a hostile
|
|
22
|
+
attribute can't crash the render tree or smuggle in a prop. Attribute→prop
|
|
23
|
+
lookup maps are prototype-free (`Object.create(null)`), and only HTML / `data-`
|
|
24
|
+
/ `aria-` attribute names are forwarded to React.
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- **ReDoS / quadratic blow-ups on untrusted input.**
|
|
29
|
+
- Highlighter (`hi.ts`): the JS/TS regex-literal and bash double-quoted-string
|
|
30
|
+
patterns could backtrack quadratically on crafted code blocks; both rewritten
|
|
31
|
+
to linear forms, plus a 50 KB per-block size guard.
|
|
32
|
+
- URL scheme check: the decode-to-fixpoint loop (Rust `is_dangerous_scheme` and
|
|
33
|
+
JS `safeUrl`) is capped at 8 passes — still catches multi-encoded
|
|
34
|
+
`javascript&amp;#58;` payloads, no longer O(n²) on `&`-spam.
|
|
35
|
+
- Inline parser: nested / unbalanced link-bracket scanning is bounded
|
|
36
|
+
(depth + length caps); GFM extended-autolink trailing-paren trimming is now
|
|
37
|
+
linear instead of recounting the span each iteration.
|
|
38
|
+
|
|
39
|
+
### Changed
|
|
40
|
+
|
|
41
|
+
- **`flux-md/server` uses a literal `import("node:fs/promises")`** instead of a
|
|
42
|
+
variable specifier, resolving the `dynamicRequire` supply-chain signal. Behavior
|
|
43
|
+
is unchanged — still a Node-only, `file:`-guarded branch.
|
|
44
|
+
- Added a **`## Security`** / supply-chain-transparency section to the README and a
|
|
45
|
+
documented **`socket.yml`** covering the inherent `nativeCode` / `networkAccess`
|
|
46
|
+
/ `filesystemAccess` signals (the WebAssembly core and the opt-in
|
|
47
|
+
`<flux-markdown src>` fetch).
|
|
48
|
+
|
|
49
|
+
### Performance
|
|
50
|
+
|
|
51
|
+
- **No redundant re-renders / rebuilds on no-op updates.**
|
|
52
|
+
- `<flux-markdown>` ignores a `setAttribute` whose value didn't change (a host
|
|
53
|
+
framework re-applying identical attributes no longer tears down the self-owned
|
|
54
|
+
client and reparses the whole document), and the `components` / `sanitize`
|
|
55
|
+
property setters skip the remount when assigned the same identity.
|
|
56
|
+
- `FluxClient.reset()` no longer notifies subscribers when the store was already
|
|
57
|
+
empty — skips a wasted, output-identical render pass.
|
|
58
|
+
- Documented that `sanitize` (like `components`) should be memoized/hoisted in
|
|
59
|
+
React, so a fresh closure each render doesn't bust the per-block memo.
|
|
60
|
+
- Added render-count / node-reuse / no-remount regression tests across the React,
|
|
61
|
+
DOM, store, custom-element, and Vue bindings, locking in that committed blocks
|
|
62
|
+
never re-render or rebuild as the stream grows (only the streaming tail does).
|
|
63
|
+
|
|
64
|
+
### Known limitations
|
|
65
|
+
|
|
66
|
+
- Streaming a single very large **unclosed** block (a multi-megabyte indented code
|
|
67
|
+
block, open HTML block, or footnote-disarmed list delivered across many chunks)
|
|
68
|
+
is still O(n²) in the uncommitted-tail length. A bounded incremental cache for
|
|
69
|
+
these resumable containers is tracked as follow-up; finalized / closed blocks and
|
|
70
|
+
all other inputs are unaffected.
|
|
71
|
+
|
|
72
|
+
## 0.15.0 — 2026-06-17
|
|
73
|
+
|
|
74
|
+
### Added
|
|
75
|
+
|
|
76
|
+
- **Safe raw-HTML sanitizer (`htmlAllowlist` / `dropHtmlTags`)** — render a safe
|
|
77
|
+
subset of *inline* raw HTML (`<br>`, `<sub>`, `<sup>`, `<mark>`, …) **without**
|
|
78
|
+
`unsafeHtml`. Setting either list (even to `[]`) engages it: `htmlAllowlist`
|
|
79
|
+
non-empty renders only those tags (others escaped); **empty allows all tags
|
|
80
|
+
except a built-in, non-overridable dangerous set** (`script`, `style`,
|
|
81
|
+
`iframe`, `object`, `embed`, `form`, `svg`, `xmp`, `plaintext`, …);
|
|
82
|
+
`dropHtmlTags` removes tags entirely. Every rendered tag's attributes are
|
|
83
|
+
sanitized — `on*` handlers and `style` (a CSS beacon / clickjacking vector)
|
|
84
|
+
dropped, dangerous URL schemes (incl. multi-encoded) → `#`. Inline-scoped;
|
|
85
|
+
block-level raw HTML stays escaped. Matching is case-insensitive.
|
|
86
|
+
|
|
87
|
+
### Fixed
|
|
88
|
+
|
|
89
|
+
- **HTML comments are dropped instead of escaped to visible text.** `<!--mk:id-->`
|
|
90
|
+
(a common LLM marker) previously rendered as a literal `<!--…-->` run or a
|
|
91
|
+
`<pre><code>` block; it now has no visible representation, in every mode except
|
|
92
|
+
bare `unsafeHtml` pass-through (which keeps it verbatim for CommonMark fidelity —
|
|
93
|
+
the browser ignores it either way). A comment-led block with trailing content
|
|
94
|
+
keeps that content (only comment-*only* blocks are dropped).
|
|
95
|
+
|
|
96
|
+
### Security
|
|
97
|
+
|
|
98
|
+
- The dangerous-tag set is **non-overridable** (allowlisting `script`/`iframe`/`svg`
|
|
99
|
+
still drops them), `style` is stripped from every sanitized/component tag, and
|
|
100
|
+
raw-text elements (`xmp`/`plaintext`/`noembed`/`noframes`/`listing`) are blocked
|
|
101
|
+
in allow-all mode — closing CSS-exfiltration / clickjacking / DOM-corruption
|
|
102
|
+
vectors found in adversarial review. The React `htmlToReact` path mirrors the
|
|
103
|
+
`style` value-filter as defense-in-depth (safe declarations like `text-align`
|
|
104
|
+
still pass).
|
|
105
|
+
|
|
106
|
+
Feature-off output is byte-identical except HTML comments now drop (the
|
|
107
|
+
CommonMark/GFM suites run with `unsafeHtml` on, so the 652/GFM floors are
|
|
108
|
+
unaffected).
|
|
109
|
+
|
|
7
110
|
## 0.14.0 — 2026-06-17
|
|
8
111
|
|
|
9
112
|
### Added
|
package/README.md
CHANGED
|
@@ -558,6 +558,8 @@ const client = new FluxClient({
|
|
|
558
558
|
unsafeHtml: false, // pass raw HTML through (default false — keep it false for untrusted input)
|
|
559
559
|
componentTags: ["Thinking", "Callout"], // BLOCK custom tags w/ markdown inside (default none)
|
|
560
560
|
inlineComponentTags: ["tik", "cite"], // INLINE custom tags (chips/citations) w/ markdown inside (default none)
|
|
561
|
+
htmlAllowlist: ["br", "sub", "sup"], // safe raw-HTML sanitizer: [] = allow all but dangerous; list = only those (default off)
|
|
562
|
+
dropHtmlTags: [], // tags removed entirely (comments always dropped when sanitizing; default off)
|
|
561
563
|
blockData: true, // opt-in structured kind.data per block (default false — see "Structured block data")
|
|
562
564
|
},
|
|
563
565
|
});
|
|
@@ -592,6 +594,9 @@ When to enable each flag:
|
|
|
592
594
|
- `inlineComponentTags: ["tik", …]` — same idea for **inline** custom elements
|
|
593
595
|
that sit inside a paragraph, heading, list item, or **table cell** (ticker
|
|
594
596
|
chips, citations, `@mentions`). See [Inline component tags](#inline-component-tags).
|
|
597
|
+
- `htmlAllowlist` / `dropHtmlTags` — render a **safe subset of raw HTML** (e.g.
|
|
598
|
+
`<br>`, `<sub>`, `<sup>`) natively without `unsafeHtml`, drop specific tags, and
|
|
599
|
+
drop HTML comments. See [Safe raw HTML](#safe-raw-html).
|
|
595
600
|
|
|
596
601
|
**Footnotes** (`gfmFootnotes`) work in streaming with one honest caveat: a
|
|
597
602
|
`[^1]` reference renders speculatively the moment it's seen (committed blocks
|
|
@@ -826,6 +831,37 @@ surrounding content.
|
|
|
826
831
|
> renders inline-in-cells too — `inlineComponentTags` simply replaces that
|
|
827
832
|
> workaround with first-class inline elements.
|
|
828
833
|
|
|
834
|
+
### Safe raw HTML
|
|
835
|
+
|
|
836
|
+
LLMs emit a little raw HTML — `<br>`, `<sub>`/`<sup>`, `<mark>`, and HTML comments
|
|
837
|
+
as markers (`<!--mk:id-->`). `unsafeHtml` is all-or-nothing; instead opt into a
|
|
838
|
+
**sanitizer** that renders a safe subset natively. Setting `htmlAllowlist` and/or
|
|
839
|
+
`dropHtmlTags` (even to `[]`) engages it:
|
|
840
|
+
|
|
841
|
+
```ts
|
|
842
|
+
// Render only these inline tags; escape everything else:
|
|
843
|
+
new FluxClient({ config: { htmlAllowlist: ["br", "sub", "sup", "mark"] } });
|
|
844
|
+
|
|
845
|
+
// Or allow everything except a built-in dangerous set:
|
|
846
|
+
new FluxClient({ config: { htmlAllowlist: [] } });
|
|
847
|
+
```
|
|
848
|
+
|
|
849
|
+
- **HTML comments are dropped** — no more `<!--mk:id-->` surfacing as escaped text
|
|
850
|
+
— in every mode except bare `unsafeHtml` pass-through.
|
|
851
|
+
- **`htmlAllowlist: ["br", …]`** renders only those inline tags; everything else is
|
|
852
|
+
escaped. **`htmlAllowlist: []`** (empty) allows *all* tags **except a built-in
|
|
853
|
+
dangerous set** (`script`, `style`, `iframe`, `object`, `embed`, `form`, `svg`,
|
|
854
|
+
`xmp`, `plaintext`, … — **non-overridable**: allowlisting one still drops it).
|
|
855
|
+
- **`dropHtmlTags: ["mk", …]`** removes those tags entirely (markup gone; inner
|
|
856
|
+
text stays as inert text).
|
|
857
|
+
- Every rendered tag's **attributes are sanitized**: `on*` handlers and `style`
|
|
858
|
+
(a CSS beacon / clickjacking vector) are dropped, and dangerous URL schemes
|
|
859
|
+
(`javascript:`, …, including multi-encoded) become `#`.
|
|
860
|
+
- **Scope:** *inline* raw HTML. Block-level raw HTML stays escaped for now (use
|
|
861
|
+
`unsafeHtml` **without** the sanitizer to render block HTML — when the sanitizer
|
|
862
|
+
is engaged, block HTML stays escaped even if `unsafeHtml` is also on). Tag
|
|
863
|
+
matching is case-insensitive.
|
|
864
|
+
|
|
829
865
|
### Types
|
|
830
866
|
|
|
831
867
|
```ts
|
|
@@ -959,6 +995,43 @@ genuinely hostile content where CSS-overlay/clickjacking matters, render inside
|
|
|
959
995
|
a sandboxed `<iframe>` instead — sanitization stops injection, not every
|
|
960
996
|
visual-overlay trick.
|
|
961
997
|
|
|
998
|
+
### Supply-chain transparency
|
|
999
|
+
|
|
1000
|
+
flux-md is **zero runtime dependency** — no third-party packages are pulled in
|
|
1001
|
+
at runtime. The parsing core is Rust compiled to WebAssembly, reproducibly
|
|
1002
|
+
buildable from `crates/flux-md-core/` via `bun run build:wasm`.
|
|
1003
|
+
|
|
1004
|
+
**Native code (WebAssembly).** The shipped `flux_md_core_bg.wasm` (~200 KB) is
|
|
1005
|
+
first-party, built from the Rust source in this repo, and runs inside a sandboxed
|
|
1006
|
+
Web Worker (browser) or Node worker thread. Supply-chain scanners such as
|
|
1007
|
+
[Socket.dev](https://socket.dev) will flag it as `nativeCode` — this is accurate
|
|
1008
|
+
and expected. The WASM is not a vendored third-party binary; it is reproducible
|
|
1009
|
+
from source.
|
|
1010
|
+
|
|
1011
|
+
**Network access.** flux-md performs network I/O in exactly two scenarios, both
|
|
1012
|
+
caller-driven:
|
|
1013
|
+
|
|
1014
|
+
- `<flux-markdown src="URL">` — the Web Component fetches the URL you supply and
|
|
1015
|
+
streams the response. No URL is ever chosen by flux-md itself.
|
|
1016
|
+
- The wasm-bindgen glue (`wasm/flux_md_core.js`) loads the co-located `.wasm`
|
|
1017
|
+
asset via `fetch(new URL("…_bg.wasm", import.meta.url))` — bundlers resolve
|
|
1018
|
+
this to a local build artifact, not a remote endpoint.
|
|
1019
|
+
|
|
1020
|
+
flux-md has no telemetry, no analytics, and no first-party remote endpoints.
|
|
1021
|
+
Socket will flag the `networkAccess` signal — it is accurate and expected. In
|
|
1022
|
+
privileged contexts (browser extensions, Electron, environments where the
|
|
1023
|
+
same-origin policy may not apply), treat the `src` attribute value as you would
|
|
1024
|
+
any external URL and allowlist it in your CSP / security policy.
|
|
1025
|
+
|
|
1026
|
+
**Filesystem access (Node/SSR only).** `flux-md/server` reads the package's
|
|
1027
|
+
own `.wasm` file off disk on Node.js (Node's `fetch` cannot load `file://`
|
|
1028
|
+
URLs). This is a Node-only path; it reads only the package-internal asset and
|
|
1029
|
+
never touches caller-supplied paths. Socket will flag `filesystemAccess` — also
|
|
1030
|
+
accurate and expected.
|
|
1031
|
+
|
|
1032
|
+
The `socket.yml` at the repository root documents these signals with their
|
|
1033
|
+
justifications for Socket's GitHub app.
|
|
1034
|
+
|
|
962
1035
|
## Scaling
|
|
963
1036
|
|
|
964
1037
|
`FluxClient`s share a **worker pool** (`getDefaultPool()`), so concurrency
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "flux-md",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.15.1",
|
|
4
4
|
"description": "Zero-dep streaming markdown for the browser. Rust→WASM core, Web Worker per stream, incremental parse with speculative closure.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": ["./src/worker.ts", "./src/styles.css"],
|
package/src/client.ts
CHANGED
|
@@ -271,6 +271,15 @@ export function getDefaultPool(): FluxPool {
|
|
|
271
271
|
return defaultPool;
|
|
272
272
|
}
|
|
273
273
|
|
|
274
|
+
/** TEST-ONLY: drop the process-wide default pool so the next {@link getDefaultPool}
|
|
275
|
+
* rebuilds it (lazily, with the current global `Worker`). Lets a test file that
|
|
276
|
+
* drives the default pool start from a clean, deterministic state regardless of
|
|
277
|
+
* which other file warmed it first in bun's shared test process. Not part of the
|
|
278
|
+
* public API and a no-op for normal runtime use. */
|
|
279
|
+
export function __resetDefaultPool(): void {
|
|
280
|
+
defaultPool = null;
|
|
281
|
+
}
|
|
282
|
+
|
|
274
283
|
// --------------------------------------------------------------------------
|
|
275
284
|
// Client
|
|
276
285
|
// --------------------------------------------------------------------------
|
|
@@ -514,6 +523,11 @@ export class FluxClient {
|
|
|
514
523
|
}
|
|
515
524
|
|
|
516
525
|
reset() {
|
|
526
|
+
// Only notify subscribers if there was content to clear: resetting an
|
|
527
|
+
// already-empty store leaves the view empty either way, so skip the no-op
|
|
528
|
+
// emit (which would otherwise drive every subscriber through a wasted,
|
|
529
|
+
// output-identical render pass).
|
|
530
|
+
const hadContent = this.store.snapshot.length > 0;
|
|
517
531
|
this.store = emptyBlockStore();
|
|
518
532
|
this.appendedBytes = 0;
|
|
519
533
|
this.patchCount = 0;
|
|
@@ -527,7 +541,7 @@ export class FluxClient {
|
|
|
527
541
|
// Same streamId + worker — the worker frees and lazily recreates the parser.
|
|
528
542
|
const pw = this.ensureAcquired();
|
|
529
543
|
this.pool.send(pw, { type: "reset", streamId: this.streamId });
|
|
530
|
-
this.emit();
|
|
544
|
+
if (hadContent) this.emit();
|
|
531
545
|
}
|
|
532
546
|
|
|
533
547
|
destroy() {
|
package/src/element.ts
CHANGED
|
@@ -90,6 +90,7 @@ export function defineFluxMarkdown(tag = "flux-markdown"): void {
|
|
|
90
90
|
return this.#components;
|
|
91
91
|
}
|
|
92
92
|
set components(value: DomComponents | undefined) {
|
|
93
|
+
if (value === this.#components) return; // no-op re-assign: don't remount
|
|
93
94
|
this.#components = value;
|
|
94
95
|
if (this.#connected) this.#remount();
|
|
95
96
|
}
|
|
@@ -98,6 +99,7 @@ export function defineFluxMarkdown(tag = "flux-markdown"): void {
|
|
|
98
99
|
return this.#sanitize;
|
|
99
100
|
}
|
|
100
101
|
set sanitize(value: ((html: string) => string) | undefined) {
|
|
102
|
+
if (value === this.#sanitize) return; // no-op re-assign: don't remount
|
|
101
103
|
this.#sanitize = value;
|
|
102
104
|
if (this.#connected) this.#remount();
|
|
103
105
|
}
|
|
@@ -155,10 +157,16 @@ export function defineFluxMarkdown(tag = "flux-markdown"): void {
|
|
|
155
157
|
}
|
|
156
158
|
}
|
|
157
159
|
|
|
158
|
-
attributeChangedCallback(name: string,
|
|
160
|
+
attributeChangedCallback(name: string, oldValue: string | null, newValue: string | null): void {
|
|
159
161
|
// attributeChangedCallback fires before connectedCallback for attributes
|
|
160
162
|
// present at upgrade; ignore until connected so config reads happen once.
|
|
161
163
|
if (!this.#connected) return;
|
|
164
|
+
// setAttribute fires this on EVERY set, including setting an attribute to
|
|
165
|
+
// its current value (common when a host framework re-applies the same
|
|
166
|
+
// attrs on re-render). A no-op value change must not tear down the client
|
|
167
|
+
// and reparse the whole document — only a genuine change proceeds.
|
|
168
|
+
// (Attribute removal yields null, distinct from an empty string.)
|
|
169
|
+
if (oldValue === newValue) return;
|
|
162
170
|
|
|
163
171
|
if (name === "markdown" || name === "src") {
|
|
164
172
|
// One-shot content source change — only for a self-owned client. A
|
package/src/hi.ts
CHANGED
|
@@ -55,7 +55,7 @@ const jsPats: Pat[] = [
|
|
|
55
55
|
["str", /"(?:\\.|[^"\\\n])*"/y],
|
|
56
56
|
["str", /'(?:\\.|[^'\\\n])*'/y],
|
|
57
57
|
["str", /`(?:\\.|[^`\\])*`/y],
|
|
58
|
-
["rx", /\/(
|
|
58
|
+
["rx", /\/(?![*/])(?:\\.|[^/\\\n])+\/[gimsuy]*/y],
|
|
59
59
|
["num", /\b(?:0x[\da-fA-F_]+|0b[01_]+|0o[0-7_]+|\d[\d_]*(?:\.\d[\d_]*)?(?:[eE][+-]?\d+)?)\b/y],
|
|
60
60
|
["ident", /[A-Za-z_$][\w$]*/y],
|
|
61
61
|
["pun", /[+\-*/=<>!&|^~?:;,.[\](){}]/y],
|
|
@@ -103,7 +103,7 @@ const goPats: Pat[] = [
|
|
|
103
103
|
|
|
104
104
|
const bashPats: Pat[] = [
|
|
105
105
|
["com", /#[^\n]*/y],
|
|
106
|
-
["str", /"(
|
|
106
|
+
["str", /"(?:\\.|[^"\\])*"/y],
|
|
107
107
|
["str", /'[^']*'/y],
|
|
108
108
|
["var", /\$\{[^}]+\}|\$\w+|\$[*@#?!$0-9]/y],
|
|
109
109
|
["num", /\b\d+\b/y],
|
|
@@ -187,6 +187,9 @@ function escapeHtml(s: string): string {
|
|
|
187
187
|
}
|
|
188
188
|
|
|
189
189
|
export function highlight(code: string, lang: string): string {
|
|
190
|
+
// Defense-in-depth: never tokenize a pathologically huge block on the main
|
|
191
|
+
// thread — fall back to plain escaped text.
|
|
192
|
+
if (code.length > 50_000) return escapeHtml(code);
|
|
190
193
|
const conf = LANGS[lang.toLowerCase()];
|
|
191
194
|
if (!conf) return escapeHtml(code);
|
|
192
195
|
|
package/src/html-to-react.ts
CHANGED
|
@@ -10,7 +10,10 @@ const VOID = new Set([
|
|
|
10
10
|
// Attribute name → React prop name, for the handful that differ. Anything not
|
|
11
11
|
// listed passes through verbatim (React forwards data-*/aria-* and lowercase
|
|
12
12
|
// attributes unchanged).
|
|
13
|
-
|
|
13
|
+
// Prototype-free map so an attribute named `constructor`/`hasOwnProperty`/etc.
|
|
14
|
+
// returns undefined (and the `?? name` fallback fires) rather than resolving to
|
|
15
|
+
// an inherited Object.prototype member.
|
|
16
|
+
const ATTR_MAP: Record<string, string> = Object.assign(Object.create(null), {
|
|
14
17
|
class: "className",
|
|
15
18
|
for: "htmlFor",
|
|
16
19
|
colspan: "colSpan",
|
|
@@ -26,7 +29,7 @@ const ATTR_MAP: Record<string, string> = {
|
|
|
26
29
|
crossorigin: "crossOrigin",
|
|
27
30
|
enterkeyhint: "enterKeyHint",
|
|
28
31
|
inputmode: "inputMode",
|
|
29
|
-
};
|
|
32
|
+
});
|
|
30
33
|
|
|
31
34
|
// URL-bearing attributes whose value must be scheme-checked. `htmlToReact` is
|
|
32
35
|
// exported and may be handed untrusted HTML directly; React happily renders a
|
|
@@ -34,6 +37,19 @@ const ATTR_MAP: Record<string, string> = {
|
|
|
34
37
|
// defense-in-depth — the core's own output is already sanitized.
|
|
35
38
|
const URL_ATTRS = new Set(["href", "src", "xlink:href", "formaction", "action", "poster", "data"]);
|
|
36
39
|
|
|
40
|
+
// React-meaningful prop names that must never be forwarded from (possibly
|
|
41
|
+
// untrusted) HTML attributes: `dangerouslySetInnerHTML` as a prop crashes the
|
|
42
|
+
// whole render tree (DoS), and ref/key/defaultValue/etc. are injectable.
|
|
43
|
+
const PROP_DENY = new Set([
|
|
44
|
+
"dangerouslysetinnerhtml", "ref", "key", "defaultvalue", "defaultchecked",
|
|
45
|
+
"suppresshydrationwarning", "suppresscontenteditablewarning",
|
|
46
|
+
]);
|
|
47
|
+
|
|
48
|
+
// Only forward attribute names that are a plain HTML attribute identifier
|
|
49
|
+
// (so camelCase / `__proto__` / `constructor` never reach React props). The
|
|
50
|
+
// explicit ATTR_MAP renames and `xlink:href` are allowed past this gate.
|
|
51
|
+
const SAFE_ATTR_NAME = /^[a-z][a-z0-9-]*$/i;
|
|
52
|
+
|
|
37
53
|
/** Replace a dangerous-scheme URL with "#". Mirrors the Rust `is_dangerous_scheme`:
|
|
38
54
|
* strip control chars (C0, DEL, C1 — matching Rust char::is_control),
|
|
39
55
|
* lowercase, then match. The strip affects only the probe, never output. */
|
|
@@ -42,8 +58,10 @@ function safeUrl(value: string): string {
|
|
|
42
58
|
// reaches the DOM, so peel layers to a fixpoint before the scheme check —
|
|
43
59
|
// catches `javascript:` and double-encoded `javascript&#58;`. Only the
|
|
44
60
|
// probe is decoded; the returned value is untouched (safe URLs stay verbatim).
|
|
61
|
+
// Cap at 8 iterations: far beyond any legit URL (browsers entity-decode an
|
|
62
|
+
// href once), and bounds the loop so a hostile value can't make it quadratic.
|
|
45
63
|
let decoded = value;
|
|
46
|
-
for (let prev = ""; decoded !== prev; ) {
|
|
64
|
+
for (let i = 0, prev = ""; i < 8 && decoded !== prev; i++) {
|
|
47
65
|
prev = decoded;
|
|
48
66
|
decoded = decodeEntities(decoded);
|
|
49
67
|
}
|
|
@@ -110,6 +128,27 @@ export function parseStyle(css: string): Record<string, string> {
|
|
|
110
128
|
return out;
|
|
111
129
|
}
|
|
112
130
|
|
|
131
|
+
// CSS values that beacon/exfiltrate (`url(`), execute (legacy `expression(`,
|
|
132
|
+
// `-moz-binding`, `behavior:`), or pull external resources (`@import`,
|
|
133
|
+
// `image-set(`). Defense-in-depth: the core sanitizer already drops `style`, but
|
|
134
|
+
// `htmlToReact` is exported and may be handed untrusted HTML directly.
|
|
135
|
+
const DANGEROUS_CSS_VALUE = /url\(|expression\(|image-set\(|-moz-binding|@import|behavior\s*:/i;
|
|
136
|
+
|
|
137
|
+
/** Strip CSS declarations that can beacon/exfiltrate, execute, or overlay the
|
|
138
|
+
* viewport (`position: fixed/sticky` → clickjacking). Safe declarations
|
|
139
|
+
* (`text-align`, `color`, …) — including flux's own table-alignment style —
|
|
140
|
+
* pass through untouched. */
|
|
141
|
+
function safeStyle(style: Record<string, string>): Record<string, string> {
|
|
142
|
+
const out: Record<string, string> = {};
|
|
143
|
+
for (const k in style) {
|
|
144
|
+
const v = style[k];
|
|
145
|
+
if (DANGEROUS_CSS_VALUE.test(v)) continue;
|
|
146
|
+
if (k.toLowerCase() === "position" && /\b(?:fixed|sticky)\b/i.test(v)) continue;
|
|
147
|
+
out[k] = v;
|
|
148
|
+
}
|
|
149
|
+
return out;
|
|
150
|
+
}
|
|
151
|
+
|
|
113
152
|
/** Parse one opening tag starting at `start` (the `<`). */
|
|
114
153
|
function parseOpenTag(html: string, start: number) {
|
|
115
154
|
let i = start + 1;
|
|
@@ -244,8 +283,11 @@ function attrsToProps(tag: string, attrs: Record<string, string | true>, key: st
|
|
|
244
283
|
// React drops most lowercase `on*` attrs — this also covers casings and
|
|
245
284
|
// future React behavior.
|
|
246
285
|
if (lower.startsWith("on")) continue;
|
|
286
|
+
// Reject React-meaningful names that would crash the render tree or inject
|
|
287
|
+
// internals (dangerouslySetInnerHTML, ref, key, defaultValue, …).
|
|
288
|
+
if (PROP_DENY.has(lower)) continue;
|
|
247
289
|
if (lower === "style" && typeof value === "string") {
|
|
248
|
-
props.style = parseStyle(value);
|
|
290
|
+
props.style = safeStyle(parseStyle(value));
|
|
249
291
|
continue;
|
|
250
292
|
}
|
|
251
293
|
// Neutralize dangerous-scheme URLs (javascript:, vbscript:, data:text/html).
|
|
@@ -259,6 +301,10 @@ function attrsToProps(tag: string, attrs: Record<string, string | true>, key: st
|
|
|
259
301
|
props.defaultChecked = value === true ? true : value;
|
|
260
302
|
continue;
|
|
261
303
|
}
|
|
304
|
+
// Restrict forwarded ORIGINAL names to a plain HTML attribute identifier
|
|
305
|
+
// (plus the ATTR_MAP renames and xlink:href handled above) so weird casings
|
|
306
|
+
// / `__proto__` / `constructor` can never become a React prop.
|
|
307
|
+
if (!(lower in ATTR_MAP) && !SAFE_ATTR_NAME.test(name)) continue;
|
|
262
308
|
props[ATTR_MAP[lower] ?? name] = value;
|
|
263
309
|
}
|
|
264
310
|
return props;
|
package/src/react.tsx
CHANGED
|
@@ -100,6 +100,10 @@ interface FluxMarkdownProps {
|
|
|
100
100
|
* `unsafeHtml` on. flux-md stays zero-dep — you bring the sanitizer. The
|
|
101
101
|
* built-in code/math renderers operate on already-escaped content and are not
|
|
102
102
|
* run through it. When omitted, rendering is byte-identical and zero-cost.
|
|
103
|
+
*
|
|
104
|
+
* **Memoize / hoist this** (same trap as `components`): a fresh closure each
|
|
105
|
+
* render busts the per-block memo, so every block re-sanitizes and re-parses
|
|
106
|
+
* on every patch instead of only the streaming tail.
|
|
103
107
|
*/
|
|
104
108
|
sanitize?: (html: string) => string;
|
|
105
109
|
/** Appended to the root's `className` (the `flux-md` class is always present). */
|
|
@@ -362,14 +366,40 @@ export function blockKindProps(block: Block, components?: Components): BlockComp
|
|
|
362
366
|
return props;
|
|
363
367
|
}
|
|
364
368
|
|
|
365
|
-
|
|
369
|
+
// Prototype-free so a key like `constructor`/`hasOwnProperty` returns undefined
|
|
370
|
+
// (and the `?? k` fallback fires) instead of an inherited Object.prototype member.
|
|
371
|
+
const REACT_ATTR_NAME: Record<string, string> = Object.assign(Object.create(null), {
|
|
372
|
+
class: "className",
|
|
373
|
+
for: "htmlFor",
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
// React-meaningful prop names that must never survive into a user override's
|
|
377
|
+
// attrs object (dangerouslySetInnerHTML crashes the render tree; ref/key/etc.
|
|
378
|
+
// inject internals). Mirrors html-to-react's PROP_DENY.
|
|
379
|
+
const ATTR_DENY = new Set([
|
|
380
|
+
"dangerouslysetinnerhtml", "ref", "key", "defaultvalue", "defaultchecked",
|
|
381
|
+
"suppresshydrationwarning", "suppresscontenteditablewarning",
|
|
382
|
+
]);
|
|
383
|
+
|
|
384
|
+
// Forward only plain HTML attribute identifiers (the REACT_ATTR_NAME renames
|
|
385
|
+
// pass too), so weird casings / `__proto__` / `constructor` never reach a prop.
|
|
386
|
+
const SAFE_ATTR_NAME = /^[a-z][a-z0-9-]*$/i;
|
|
366
387
|
|
|
367
388
|
/** Convert sanitized HTML attribute pairs into a React-spreadable object,
|
|
368
389
|
* renaming the two names React requires (`class`→`className`, `for`→`htmlFor`).
|
|
369
|
-
* Other names (including `data-*` / `aria-*`) pass through unchanged.
|
|
390
|
+
* Other names (including `data-*` / `aria-*`) pass through unchanged. Drops
|
|
391
|
+
* inline event handlers and React-meaningful/unsafe names as defense-in-depth
|
|
392
|
+
* (the Rust `sanitize_attrs` is the primary gate; this keeps the React layer
|
|
393
|
+
* safe on its own when attrs are handed to user override components). */
|
|
370
394
|
function reactAttrs(pairs: [string, string][]): Record<string, string> {
|
|
371
395
|
const out: Record<string, string> = {};
|
|
372
|
-
for (const [k, v] of pairs)
|
|
396
|
+
for (const [k, v] of pairs) {
|
|
397
|
+
const lower = k.toLowerCase();
|
|
398
|
+
if (lower.startsWith("on")) continue;
|
|
399
|
+
if (ATTR_DENY.has(lower)) continue;
|
|
400
|
+
if (!(lower in REACT_ATTR_NAME) && !SAFE_ATTR_NAME.test(k)) continue;
|
|
401
|
+
out[REACT_ATTR_NAME[lower] ?? k] = v;
|
|
402
|
+
}
|
|
373
403
|
return out;
|
|
374
404
|
}
|
|
375
405
|
|
package/src/server.tsx
CHANGED
|
@@ -58,11 +58,11 @@ export function initFlux(opts?: { wasm?: BufferSource | WebAssembly.Module }): P
|
|
|
58
58
|
initPromise = (async () => {
|
|
59
59
|
const wasmUrl = new URL("./wasm/flux_md_core_bg.wasm", import.meta.url);
|
|
60
60
|
if (wasmUrl.protocol === "file:") {
|
|
61
|
-
// Node: read the bytes (Node's fetch can't load file://).
|
|
62
|
-
//
|
|
63
|
-
// (
|
|
64
|
-
|
|
65
|
-
const { readFile } = await import(
|
|
61
|
+
// Node: read the bytes (Node's fetch can't load file://). The literal
|
|
62
|
+
// `node:` specifier is externalized by bundlers, so node:fs never reaches
|
|
63
|
+
// a web bundle (this branch is also file:-only, never true in browsers).
|
|
64
|
+
// @ts-ignore — no @types/node in this package; node:fs/promises is a builtin.
|
|
65
|
+
const { readFile } = await import("node:fs/promises");
|
|
66
66
|
initFluxSync(await readFile(wasmUrl));
|
|
67
67
|
} else {
|
|
68
68
|
await initWasmAsync({ module_or_path: wasmUrl });
|
|
@@ -87,6 +87,12 @@ function makeParser(config?: ParserConfig): FluxParser {
|
|
|
87
87
|
p.setUnsafeHtml(config?.unsafeHtml ?? false);
|
|
88
88
|
p.setComponentTags(config?.componentTags ?? []);
|
|
89
89
|
p.setInlineComponentTags(config?.inlineComponentTags ?? []);
|
|
90
|
+
// Engage the safe raw-HTML sanitizer when either list is provided (even []).
|
|
91
|
+
p.setHtmlSanitize(
|
|
92
|
+
config?.htmlAllowlist !== undefined || config?.dropHtmlTags !== undefined,
|
|
93
|
+
config?.htmlAllowlist ?? [],
|
|
94
|
+
config?.dropHtmlTags ?? [],
|
|
95
|
+
);
|
|
90
96
|
p.setBlockData(config?.blockData ?? false);
|
|
91
97
|
return p;
|
|
92
98
|
}
|
package/src/types-core.ts
CHANGED
|
@@ -261,6 +261,26 @@ export interface ParserConfig {
|
|
|
261
261
|
* `componentTags`. Empty/omitted = off.
|
|
262
262
|
*/
|
|
263
263
|
inlineComponentTags?: string[];
|
|
264
|
+
/**
|
|
265
|
+
* Opt-in **safe raw-HTML allowlist**. Setting this (even to `[]`) engages a
|
|
266
|
+
* sanitizer that renders a safe subset of *inline* raw HTML **without**
|
|
267
|
+
* `unsafeHtml`: an **empty** array means "allow all tags except a built-in
|
|
268
|
+
* dangerous set" (`script`, `style`, `iframe`, `object`, `embed`, `form`,
|
|
269
|
+
* `input`, `svg`, …); a **non-empty** array renders only those tags (e.g.
|
|
270
|
+
* `["br","sub","sup"]`) and escapes the rest. Every rendered tag's attributes
|
|
271
|
+
* are sanitized (event handlers dropped, dangerous URL schemes → `#`), and HTML
|
|
272
|
+
* comments are dropped. Block-level raw HTML stays escaped (sanitize is
|
|
273
|
+
* inline-scoped for now). Unset/omitted = off (raw HTML handling unchanged).
|
|
274
|
+
* Matching is case-insensitive. See also {@link dropHtmlTags}.
|
|
275
|
+
*/
|
|
276
|
+
htmlAllowlist?: string[];
|
|
277
|
+
/**
|
|
278
|
+
* Tags removed entirely (markup dropped; any text between an open/close pair
|
|
279
|
+
* stays as inert text) — e.g. app marker tags, or belt-and-suspenders
|
|
280
|
+
* `["script","style"]`. Setting this (even to `[]`) also engages the safe
|
|
281
|
+
* raw-HTML sanitizer (see {@link htmlAllowlist}). Case-insensitive.
|
|
282
|
+
*/
|
|
283
|
+
dropHtmlTags?: string[];
|
|
264
284
|
/**
|
|
265
285
|
* Opt-in structured table data. When on, a `Table` block's `kind.data` is
|
|
266
286
|
* populated with `{ headers, rows, aligns }` (each cell `{ text, html }`) so a
|
|
@@ -71,6 +71,15 @@ export class FluxParser {
|
|
|
71
71
|
* `<div class="math math-display">` for a KaTeX pass on the JS side.
|
|
72
72
|
*/
|
|
73
73
|
setGfmMath(on: boolean): void;
|
|
74
|
+
/**
|
|
75
|
+
* Engage the safe raw-HTML sanitizer. When `on`, inline raw HTML renders
|
|
76
|
+
* sanitized without full unsafe HTML: `allow` empty = allow all tags except
|
|
77
|
+
* a built-in dangerous set (`script`, `style`, `iframe`, …); `allow`
|
|
78
|
+
* non-empty = only those render (others escaped); `drop` tags are removed
|
|
79
|
+
* entirely; HTML comments are dropped; every rendered tag's attributes are
|
|
80
|
+
* sanitized. Off by default (raw-HTML handling unchanged).
|
|
81
|
+
*/
|
|
82
|
+
setHtmlSanitize(on: boolean, allow: string[], drop: string[]): void;
|
|
74
83
|
/**
|
|
75
84
|
* Set the opt-in INLINE component-tag allowlist (e.g. `["tik", "cite"]`).
|
|
76
85
|
* An allowlisted inline `<tik>…</tik>` (or self-closing `<tik/>`) renders as
|
|
@@ -105,6 +114,7 @@ export interface InitOutput {
|
|
|
105
114
|
readonly fluxparser_setGfmAutolinks: (a: number, b: number) => void;
|
|
106
115
|
readonly fluxparser_setGfmFootnotes: (a: number, b: number) => void;
|
|
107
116
|
readonly fluxparser_setGfmMath: (a: number, b: number) => void;
|
|
117
|
+
readonly fluxparser_setHtmlSanitize: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
108
118
|
readonly fluxparser_setInlineComponentTags: (a: number, b: number, c: number) => void;
|
|
109
119
|
readonly fluxparser_setUnsafeHtml: (a: number, b: number) => void;
|
|
110
120
|
readonly __wbindgen_export: (a: number, b: number) => number;
|
package/src/wasm/flux_md_core.js
CHANGED
|
@@ -171,6 +171,24 @@ export class FluxParser {
|
|
|
171
171
|
setGfmMath(on) {
|
|
172
172
|
wasm.fluxparser_setGfmMath(this.__wbg_ptr, on);
|
|
173
173
|
}
|
|
174
|
+
/**
|
|
175
|
+
* Engage the safe raw-HTML sanitizer. When `on`, inline raw HTML renders
|
|
176
|
+
* sanitized without full unsafe HTML: `allow` empty = allow all tags except
|
|
177
|
+
* a built-in dangerous set (`script`, `style`, `iframe`, …); `allow`
|
|
178
|
+
* non-empty = only those render (others escaped); `drop` tags are removed
|
|
179
|
+
* entirely; HTML comments are dropped; every rendered tag's attributes are
|
|
180
|
+
* sanitized. Off by default (raw-HTML handling unchanged).
|
|
181
|
+
* @param {boolean} on
|
|
182
|
+
* @param {string[]} allow
|
|
183
|
+
* @param {string[]} drop
|
|
184
|
+
*/
|
|
185
|
+
setHtmlSanitize(on, allow, drop) {
|
|
186
|
+
const ptr0 = passArrayJsValueToWasm0(allow, wasm.__wbindgen_export);
|
|
187
|
+
const len0 = WASM_VECTOR_LEN;
|
|
188
|
+
const ptr1 = passArrayJsValueToWasm0(drop, wasm.__wbindgen_export);
|
|
189
|
+
const len1 = WASM_VECTOR_LEN;
|
|
190
|
+
wasm.fluxparser_setHtmlSanitize(this.__wbg_ptr, on, ptr0, len0, ptr1, len1);
|
|
191
|
+
}
|
|
174
192
|
/**
|
|
175
193
|
* Set the opt-in INLINE component-tag allowlist (e.g. `["tik", "cite"]`).
|
|
176
194
|
* An allowlisted inline `<tik>…</tik>` (or self-closing `<tik/>`) renders as
|
|
Binary file
|
|
@@ -16,6 +16,7 @@ export const fluxparser_setGfmAlerts: (a: number, b: number) => void;
|
|
|
16
16
|
export const fluxparser_setGfmAutolinks: (a: number, b: number) => void;
|
|
17
17
|
export const fluxparser_setGfmFootnotes: (a: number, b: number) => void;
|
|
18
18
|
export const fluxparser_setGfmMath: (a: number, b: number) => void;
|
|
19
|
+
export const fluxparser_setHtmlSanitize: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
|
|
19
20
|
export const fluxparser_setInlineComponentTags: (a: number, b: number, c: number) => void;
|
|
20
21
|
export const fluxparser_setUnsafeHtml: (a: number, b: number) => void;
|
|
21
22
|
export const __wbindgen_export: (a: number, b: number) => number;
|
package/src/wasm/package.json
CHANGED
package/src/worker.ts
CHANGED
|
@@ -31,6 +31,12 @@ const core = new WorkerCore({
|
|
|
31
31
|
p.setUnsafeHtml(c?.unsafeHtml ?? false);
|
|
32
32
|
p.setComponentTags(c?.componentTags ?? []);
|
|
33
33
|
p.setInlineComponentTags(c?.inlineComponentTags ?? []);
|
|
34
|
+
// Engage the safe raw-HTML sanitizer when either list is provided (even []).
|
|
35
|
+
p.setHtmlSanitize(
|
|
36
|
+
c?.htmlAllowlist !== undefined || c?.dropHtmlTags !== undefined,
|
|
37
|
+
c?.htmlAllowlist ?? [],
|
|
38
|
+
c?.dropHtmlTags ?? [],
|
|
39
|
+
);
|
|
34
40
|
p.setBlockData(c?.blockData ?? false);
|
|
35
41
|
return p;
|
|
36
42
|
},
|