ironmark 1.11.4 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,32 +2,46 @@
2
2
 
3
3
  [![CI](https://github.com/ph1p/ironmark/actions/workflows/ci.yml/badge.svg)](https://github.com/ph1p/ironmark/actions/workflows/ci.yml) [![npm](https://img.shields.io/npm/v/ironmark)](https://www.npmjs.com/package/ironmark) [![crates.io](https://img.shields.io/crates/v/ironmark)](https://crates.io/crates/ironmark)
4
4
 
5
- Fast Markdown to HTML/AST parser written in Rust with **zero third-party** parsing dependencies. Fully compliant with [CommonMark 0.31.2](https://spec.commonmark.org/0.31.2/) (652/652 spec tests pass). Available as a Rust crate and as an npm package via WebAssembly, with both HTML and AST output APIs.
6
-
7
- ## Options
5
+ Fast Markdown parser written in Rust with **zero third-party** parsing dependencies. Outputs HTML, AST, ANSI terminal, or Markdown. Fully compliant with [CommonMark 0.31.2](https://spec.commonmark.org/0.31.2/) (652/652 spec tests pass). Available as a Rust crate and as an npm package via WebAssembly.
6
+
7
+ ## Table of Contents
8
+
9
+ - [Configuration](#configuration)
10
+ - [Extensions](#extensions)
11
+ - [Security](#security)
12
+ - [Other Options](#other-options)
13
+ - [JavaScript / TypeScript](#javascript--typescript)
14
+ - [Node.js](#nodejs)
15
+ - [AST Output](#ast-output)
16
+ - [HTML to Markdown](#html-to-markdown)
17
+ - [AST to Markdown](#ast-to-markdown)
18
+ - [ANSI Terminal Output](#ansi-terminal-output)
19
+ - [Browser / Bundler](#browser--bundler)
20
+ - [CLI](#cli)
21
+ - [Rust](#rust)
22
+ - [C / C++](#c--c++)
23
+ - [Benchmarks](#benchmarks)
24
+ - [Development](#development)
25
+
26
+ ## Configuration
8
27
 
9
28
  ### Extensions (default `true`)
10
29
 
11
- | Option | JS (`camelCase`) | Rust (`snake_case`) | Description |
12
- | ------------- | -------------------------- | ----------------------------- | ------------------------------ |
13
- | Hard breaks | `hardBreaks` | `hard_breaks` | Every newline becomes `<br />` |
14
- | Highlight | `enableHighlight` | `enable_highlight` | `==text==` → `<mark>` |
15
- | Strikethrough | `enableStrikethrough` | `enable_strikethrough` | `~~text~~` → `<del>` |
16
- | Underline | `enableUnderline` | `enable_underline` | `++text++` → `<u>` |
17
- | Tables | `enableTables` | `enable_tables` | Pipe table syntax |
18
- | Autolink | `enableAutolink` | `enable_autolink` | Bare URLs & emails → `<a>` |
19
- | Task lists | `enableTaskLists` | `enable_task_lists` | `- [ ]` / `- [x]` checkboxes |
20
- | Indented code | `enableIndentedCodeBlocks` | `enable_indented_code_blocks` | 4-space indent → `<pre><code>` |
21
-
22
- ### Extensions (default `false`)
23
-
24
- | Option | JS (`camelCase`) | Rust (`snake_case`) | Description |
25
- | ------------------- | ---------------------- | ------------------------ | ------------------------------------------------------ |
26
- | Wiki links | `enableWikiLinks` | `enable_wiki_links` | `[[page]]` → `<a href="page">` |
27
- | LaTeX math | `enableLatexMath` | `enable_latex_math` | `$inline$` and `$$display$$` → `<span class="math-…">` |
28
- | Heading IDs | `enableHeadingIds` | `enable_heading_ids` | Auto `id=` on headings from slugified text |
29
- | Heading anchors | `enableHeadingAnchors` | `enable_heading_anchors` | `<a class="anchor">` inside each heading (implies IDs) |
30
- | Permissive headings | `permissiveAtxHeaders` | `permissive_atx_headers` | Allow `#Heading` without space after `#` |
30
+ | Option | JS (`camelCase`) | Rust (`snake_case`) | Description |
31
+ | ------------------- | -------------------------- | ----------------------------- | ------------------------------------------------------ |
32
+ | Hard breaks | `hardBreaks` | `hard_breaks` | Every newline becomes `<br />` |
33
+ | Highlight | `enableHighlight` | `enable_highlight` | `==text==` → `<mark>` |
34
+ | Strikethrough | `enableStrikethrough` | `enable_strikethrough` | `~~text~~` → `<del>` |
35
+ | Underline | `enableUnderline` | `enable_underline` | `++text++` → `<u>` |
36
+ | Tables | `enableTables` | `enable_tables` | Pipe table syntax |
37
+ | Autolink | `enableAutolink` | `enable_autolink` | Bare URLs & emails → `<a>` |
38
+ | Task lists | `enableTaskLists` | `enable_task_lists` | `- [ ]` / `- [x]` checkboxes |
39
+ | Indented code | `enableIndentedCodeBlocks` | `enable_indented_code_blocks` | 4-space indent → `<pre><code>` |
40
+ | Wiki links | `enableWikiLinks` | `enable_wiki_links` | `[[page]]` → `<a href="page">` |
41
+ | LaTeX math | `enableLateXMath` | `enable_latex_math` | `$inline$` and `$$display$$` → `<span class="math-…">` |
42
+ | Heading IDs | `enableHeadingIds` | `enable_heading_ids` | Auto `id=` on headings from slugified text |
43
+ | Heading anchors | `enableHeadingAnchors` | `enable_heading_anchors` | `<a class="anchor">` inside each heading (implies IDs) |
44
+ | Permissive headings | `permissiveAtxHeaders` | `permissive_atx_headers` | Allow `#Heading` without space after `#` |
31
45
 
32
46
  ### Security
33
47
 
@@ -44,7 +58,7 @@ Fast Markdown to HTML/AST parser written in Rust with **zero third-party** parsi
44
58
 
45
59
  Dangerous URI schemes (`javascript:`, `vbscript:`, `data:` except `data:image/…`) are **always** stripped from link and image destinations, regardless of options.
46
60
 
47
- ### Other options
61
+ ### Other Options
48
62
 
49
63
  | Option | JS (`camelCase`) | Rust (`snake_case`) | Default | Description |
50
64
  | ------------------- | -------------------- | --------------------- | ------- | ------------------------------------------------- |
@@ -84,6 +98,42 @@ const ast = JSON.parse(astJson);
84
98
 
85
99
  `parseToAst()` returns a JSON string for portability across JS runtimes and WASM boundaries.
86
100
 
101
+ ### HTML to Markdown
102
+
103
+ Convert HTML back to Markdown syntax using `htmlToMarkdown()`. Useful for importing content from HTML sources or round-trip conversion.
104
+
105
+ ```ts
106
+ import { htmlToMarkdown } from "ironmark";
107
+
108
+ const md = htmlToMarkdown("<h1>Hello</h1><p><strong>Bold</strong> text</p>");
109
+ // Returns: "# Hello\n\n**Bold** text"
110
+
111
+ // Preserve unknown HTML tags (e.g., <sup>, <sub>) as raw HTML in output
112
+ const md = htmlToMarkdown("<p>H<sub>2</sub>O</p>", true);
113
+ // Returns: "H<sub>2</sub>O"
114
+ ```
115
+
116
+ For AST access, use `parseHtmlToAst()`:
117
+
118
+ ```ts
119
+ import { parseHtmlToAst } from "ironmark";
120
+
121
+ const astJson = parseHtmlToAst("<h1>Hello</h1><p>World</p>");
122
+ const ast = JSON.parse(astJson);
123
+ ```
124
+
125
+ ### AST to Markdown
126
+
127
+ Render an AST back to Markdown syntax using `renderMarkdown()`. Combined with `parseToAst()` or `parseHtmlToAst()`, this enables round-trip conversion.
128
+
129
+ ```ts
130
+ import { parseToAst, renderMarkdown } from "ironmark";
131
+
132
+ const ast = parseToAst("# Hello\n\n**World**");
133
+ const md = renderMarkdown(ast);
134
+ // Returns: "# Hello\n\n**World**"
135
+ ```
136
+
87
137
  ### ANSI Terminal Output
88
138
 
89
139
  Use `renderAnsi()` to render Markdown as coloured terminal output (ANSI 256-colour escape codes). Useful for CLI tools, terminal UIs, or any environment with a TTY.
@@ -264,6 +314,69 @@ Exported AST types:
264
314
  - `TableData`
265
315
  - `TableAlignment`
266
316
 
317
+ ### HTML to Markdown
318
+
319
+ Convert HTML back to Markdown syntax:
320
+
321
+ ```rust
322
+ use ironmark::{html_to_markdown, HtmlParseOptions};
323
+
324
+ fn main() {
325
+ let md = html_to_markdown(
326
+ "<h1>Hello</h1><p><strong>Bold</strong> text</p>",
327
+ &HtmlParseOptions::default(),
328
+ );
329
+ // Returns: "# Hello\n\n**Bold** text"
330
+ }
331
+ ```
332
+
333
+ For AST access, use `parse_html_to_ast()`:
334
+
335
+ ```rust
336
+ use ironmark::{parse_html_to_ast, HtmlParseOptions, UnknownInlineHandling};
337
+
338
+ fn main() {
339
+ // Default: strip unknown tags, keep text content
340
+ let ast = parse_html_to_ast("<p>H<sub>2</sub>O</p>", &HtmlParseOptions::default());
341
+
342
+ // Preserve unknown tags as raw HTML
343
+ let ast = parse_html_to_ast(
344
+ "<p>H<sub>2</sub>O</p>",
345
+ &HtmlParseOptions {
346
+ unknown_inline_handling: UnknownInlineHandling::PreserveAsHtml,
347
+ ..Default::default()
348
+ },
349
+ );
350
+ }
351
+ ```
352
+
353
+ `HtmlParseOptions` fields:
354
+
355
+ | Field | Type | Default | Description |
356
+ | ------------------------- | ----------------------- | ----------- | ------------------------------------- |
357
+ | `max_nesting_depth` | `usize` | `128` | Limit nesting depth (DoS prevention) |
358
+ | `unknown_inline_handling` | `UnknownInlineHandling` | `StripTags` | How to handle unknown HTML tags |
359
+ | `max_input_size` | `usize` | `0` | Truncate input beyond this byte count |
360
+
361
+ `UnknownInlineHandling` variants:
362
+
363
+ - `StripTags` — Remove unknown tags, keep text content (default)
364
+ - `PreserveAsHtml` — Keep unknown tags as raw HTML in output
365
+
366
+ ### AST to Markdown
367
+
368
+ Render an AST back to Markdown syntax:
369
+
370
+ ```rust
371
+ use ironmark::{parse_to_ast, render_markdown, ParseOptions};
372
+
373
+ fn main() {
374
+ let ast = parse_to_ast("# Hello\n\n**World**", &ParseOptions::default());
375
+ let md = render_markdown(&ast);
376
+ // Returns: "# Hello\n\n**World**"
377
+ }
378
+ ```
379
+
267
380
  ### ANSI Terminal Output
268
381
 
269
382
  `render_ansi()` renders Markdown as ANSI-coloured terminal output. Pass `Some(&AnsiOptions { .. })` to control width, colour, and line numbers, or `None` for defaults.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ironmark",
3
- "version": "1.11.4",
3
+ "version": "1.12.1",
4
4
  "description": "Very fast markdown parser in Rust, consumable from JavaScript/TypeScript via WebAssembly",
5
5
  "keywords": [
6
6
  "markdown",
package/wasm/index.d.ts CHANGED
@@ -118,3 +118,69 @@ export declare function renderAnsi(
118
118
  options?: ParseOptions,
119
119
  ansiOptions?: AnsiOptions,
120
120
  ): string;
121
+
122
+ /**
123
+ * Options for HTML-to-Markdown parsing.
124
+ */
125
+ export interface HtmlParseOptions {
126
+ /**
127
+ * If true, unknown HTML tags (like `<sup>`, `<sub>`, `<abbr>`) are preserved
128
+ * as raw HTML in the Markdown output. If false (default), unknown tags are
129
+ * stripped but their text content is kept.
130
+ */
131
+ preserveUnknownAsHtml?: boolean;
132
+ }
133
+
134
+ /**
135
+ * Parse an HTML string and return the AST as a JSON string.
136
+ *
137
+ * This converts HTML back into the same AST structure used by the Markdown parser,
138
+ * enabling HTML-to-Markdown conversion.
139
+ *
140
+ * @param html - HTML source string.
141
+ * @param preserveUnknownAsHtml - If true, unknown HTML tags are preserved as raw HTML.
142
+ * @returns JSON string representing the AST.
143
+ *
144
+ * @example
145
+ * ```ts
146
+ * import { parseHtmlToAst } from "ironmark";
147
+ * const ast = parseHtmlToAst("<h1>Hello</h1><p>World</p>");
148
+ * console.log(JSON.parse(ast));
149
+ * ```
150
+ */
151
+ export declare function parseHtmlToAst(html: string, preserveUnknownAsHtml?: boolean): string;
152
+
153
+ /**
154
+ * Convert HTML to Markdown.
155
+ *
156
+ * This parses HTML and renders it as Markdown syntax.
157
+ *
158
+ * @param html - HTML source string.
159
+ * @param preserveUnknownAsHtml - If true, unknown HTML tags are preserved as raw HTML in output.
160
+ * @returns Markdown string.
161
+ *
162
+ * @example
163
+ * ```ts
164
+ * import { htmlToMarkdown } from "ironmark";
165
+ * const md = htmlToMarkdown("<p><strong>Bold</strong> text</p>");
166
+ * // Returns: "**Bold** text"
167
+ * ```
168
+ */
169
+ export declare function htmlToMarkdown(html: string, preserveUnknownAsHtml?: boolean): string;
170
+
171
+ /**
172
+ * Render an AST (as JSON) to Markdown.
173
+ *
174
+ * This takes a JSON string representing an ironmark AST and renders it as Markdown.
175
+ *
176
+ * @param astJson - JSON string representing the AST (as returned by `parseToAst` or `parseHtmlToAst`).
177
+ * @returns Markdown string.
178
+ *
179
+ * @example
180
+ * ```ts
181
+ * import { parseToAst, renderMarkdown } from "ironmark";
182
+ * const ast = parseToAst("# Hello\n\n**World**");
183
+ * const md = renderMarkdown(ast);
184
+ * ```
185
+ */
186
+ export declare function renderMarkdown(astJson: string): string;