html-to-markdown-wasm 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # html-to-markdown
2
2
 
3
- High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
3
+ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
4
4
 
5
5
  [![Crates.io](https://img.shields.io/crates/v/html-to-markdown.svg)](https://crates.io/crates/html-to-markdown)
6
6
  [![npm (node)](https://badge.fury.io/js/html-to-markdown-node.svg)](https://www.npmjs.com/package/html-to-markdown-node)
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
8
8
  [![PyPI](https://badge.fury.io/py/html-to-markdown.svg)](https://pypi.org/project/html-to-markdown/)
9
9
  [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
10
10
  [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
11
+ [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
11
12
  [![NuGet](https://img.shields.io/nuget/v/HtmlToMarkdown.svg)](https://www.nuget.org/packages/HtmlToMarkdown/)
12
13
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
13
14
  [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
43
44
  - PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
44
45
  - PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
45
46
  - **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
47
+ - **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
46
48
  - **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
47
49
  - **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
48
50
  - **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
57
59
  | **Python** (bindings + CLI) | `pip install html-to-markdown` |
58
60
  | **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
59
61
  | **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
62
+ | **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
60
63
  | **Rust** crate | `cargo add html-to-markdown-rs` |
61
64
  | Rust CLI | `cargo install html-to-markdown-cli` |
62
65
  | Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
126
129
  )
127
130
  ```
128
131
 
132
+ ### Elixir
133
+
134
+ ```elixir
135
+ {:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
136
+
137
+ # Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
138
+ HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
139
+ ```
140
+
129
141
  ### Rust
130
142
 
131
143
  ```rust
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
178
190
  - **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
179
191
  - **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
180
192
  - **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
193
+ - **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
181
194
  - **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
182
195
  - **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
183
196
 
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
185
198
 
186
199
  Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
187
200
 
188
- | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
189
- | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
190
- | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
191
- | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
192
- | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
193
- | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
194
- | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
195
- | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
196
- | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
197
- | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
201
+ | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
202
+ | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
203
+ | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
204
+ | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
205
+ | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
206
+ | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
207
+ | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
208
+ | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
209
+ | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
210
+ | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
198
211
 
199
212
  The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
200
213
 
201
- Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
214
+ Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
202
215
 
203
216
  Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
204
217
 
@@ -1,10 +1,5 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
- /**
4
- * Initialize panic hook for better error messages in the browser
5
- */
6
- export function init(): void;
7
- export function convertBytes(html: Uint8Array, options: any): string;
8
3
  /**
9
4
  * Convert HTML to Markdown
10
5
  *
@@ -24,11 +19,16 @@ export function convertBytes(html: Uint8Array, options: any): string;
24
19
  * ```
25
20
  */
26
21
  export function convert(html: string, options: any): string;
27
- export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
28
22
  export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
29
- export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
23
+ export function convertBytes(html: Uint8Array, options: any): string;
30
24
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
31
25
  export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
26
+ /**
27
+ * Initialize panic hook for better error messages in the browser
28
+ */
29
+ export function init(): void;
30
+ export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
31
+ export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
32
32
  export class WasmConversionOptionsHandle {
33
33
  free(): void;
34
34
  [Symbol.dispose](): void;
@@ -231,43 +231,6 @@ function getArrayJsValueFromWasm0(ptr, len) {
231
231
  }
232
232
  return result;
233
233
  }
234
- /**
235
- * Initialize panic hook for better error messages in the browser
236
- */
237
- export function init() {
238
- wasm.init();
239
- }
240
-
241
- /**
242
- * @param {Uint8Array} html
243
- * @param {any} options
244
- * @returns {string}
245
- */
246
- export function convertBytes(html, options) {
247
- let deferred2_0;
248
- let deferred2_1;
249
- try {
250
- const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
251
- wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
252
- var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
253
- var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
254
- var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
255
- var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
256
- var ptr1 = r0;
257
- var len1 = r1;
258
- if (r3) {
259
- ptr1 = 0; len1 = 0;
260
- throw takeObject(r2);
261
- }
262
- deferred2_0 = ptr1;
263
- deferred2_1 = len1;
264
- return getStringFromWasm0(ptr1, len1);
265
- } finally {
266
- wasm.__wbindgen_add_to_stack_pointer(16);
267
- wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
268
- }
269
- }
270
-
271
234
  /**
272
235
  * Convert HTML to Markdown
273
236
  *
@@ -316,42 +279,6 @@ export function convert(html, options) {
316
279
  }
317
280
  }
318
281
 
319
- function _assertClass(instance, klass) {
320
- if (!(instance instanceof klass)) {
321
- throw new Error(`expected instance of ${klass.name}`);
322
- }
323
- }
324
- /**
325
- * @param {Uint8Array} html
326
- * @param {WasmConversionOptionsHandle} handle
327
- * @returns {string}
328
- */
329
- export function convertBytesWithOptionsHandle(html, handle) {
330
- let deferred2_0;
331
- let deferred2_1;
332
- try {
333
- const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
334
- _assertClass(handle, WasmConversionOptionsHandle);
335
- wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
336
- var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
337
- var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
338
- var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
339
- var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
340
- var ptr1 = r0;
341
- var len1 = r1;
342
- if (r3) {
343
- ptr1 = 0; len1 = 0;
344
- throw takeObject(r2);
345
- }
346
- deferred2_0 = ptr1;
347
- deferred2_1 = len1;
348
- return getStringFromWasm0(ptr1, len1);
349
- } finally {
350
- wasm.__wbindgen_add_to_stack_pointer(16);
351
- wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
352
- }
353
- }
354
-
355
282
  /**
356
283
  * @param {any} options
357
284
  * @returns {WasmConversionOptionsHandle}
@@ -373,34 +300,40 @@ export function createConversionOptionsHandle(options) {
373
300
  }
374
301
 
375
302
  /**
376
- * @param {string} html
303
+ * @param {Uint8Array} html
377
304
  * @param {any} options
378
- * @param {WasmInlineImageConfig | null} [image_config]
379
- * @returns {WasmHtmlExtraction}
305
+ * @returns {string}
380
306
  */
381
- export function convertWithInlineImages(html, options, image_config) {
307
+ export function convertBytes(html, options) {
308
+ let deferred2_0;
309
+ let deferred2_1;
382
310
  try {
383
311
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
384
- const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
385
- const len0 = WASM_VECTOR_LEN;
386
- let ptr1 = 0;
387
- if (!isLikeNone(image_config)) {
388
- _assertClass(image_config, WasmInlineImageConfig);
389
- ptr1 = image_config.__destroy_into_raw();
390
- }
391
- wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
312
+ wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
392
313
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
393
314
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
394
315
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
395
- if (r2) {
396
- throw takeObject(r1);
316
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
317
+ var ptr1 = r0;
318
+ var len1 = r1;
319
+ if (r3) {
320
+ ptr1 = 0; len1 = 0;
321
+ throw takeObject(r2);
397
322
  }
398
- return WasmHtmlExtraction.__wrap(r0);
323
+ deferred2_0 = ptr1;
324
+ deferred2_1 = len1;
325
+ return getStringFromWasm0(ptr1, len1);
399
326
  } finally {
400
327
  wasm.__wbindgen_add_to_stack_pointer(16);
328
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
401
329
  }
402
330
  }
403
331
 
332
+ function _assertClass(instance, klass) {
333
+ if (!(instance instanceof klass)) {
334
+ throw new Error(`expected instance of ${klass.name}`);
335
+ }
336
+ }
404
337
  /**
405
338
  * @param {string} html
406
339
  * @param {WasmConversionOptionsHandle} handle
@@ -461,6 +394,73 @@ export function convertBytesWithInlineImages(html, options, image_config) {
461
394
  }
462
395
  }
463
396
 
397
+ /**
398
+ * Initialize panic hook for better error messages in the browser
399
+ */
400
+ export function init() {
401
+ wasm.init();
402
+ }
403
+
404
+ /**
405
+ * @param {string} html
406
+ * @param {any} options
407
+ * @param {WasmInlineImageConfig | null} [image_config]
408
+ * @returns {WasmHtmlExtraction}
409
+ */
410
+ export function convertWithInlineImages(html, options, image_config) {
411
+ try {
412
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
413
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
414
+ const len0 = WASM_VECTOR_LEN;
415
+ let ptr1 = 0;
416
+ if (!isLikeNone(image_config)) {
417
+ _assertClass(image_config, WasmInlineImageConfig);
418
+ ptr1 = image_config.__destroy_into_raw();
419
+ }
420
+ wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
421
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
422
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
423
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
424
+ if (r2) {
425
+ throw takeObject(r1);
426
+ }
427
+ return WasmHtmlExtraction.__wrap(r0);
428
+ } finally {
429
+ wasm.__wbindgen_add_to_stack_pointer(16);
430
+ }
431
+ }
432
+
433
+ /**
434
+ * @param {Uint8Array} html
435
+ * @param {WasmConversionOptionsHandle} handle
436
+ * @returns {string}
437
+ */
438
+ export function convertBytesWithOptionsHandle(html, handle) {
439
+ let deferred2_0;
440
+ let deferred2_1;
441
+ try {
442
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
443
+ _assertClass(handle, WasmConversionOptionsHandle);
444
+ wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
445
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
446
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
447
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
448
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
449
+ var ptr1 = r0;
450
+ var len1 = r1;
451
+ if (r3) {
452
+ ptr1 = 0; len1 = 0;
453
+ throw takeObject(r2);
454
+ }
455
+ deferred2_0 = ptr1;
456
+ deferred2_1 = len1;
457
+ return getStringFromWasm0(ptr1, len1);
458
+ } finally {
459
+ wasm.__wbindgen_add_to_stack_pointer(16);
460
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
461
+ }
462
+ }
463
+
464
464
  const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
465
465
  ? { register: () => {}, unregister: () => {} }
466
466
  : new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
Binary file
package/dist/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "collaborators": [
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
- "version": "2.8.1",
7
+ "version": "2.8.3",
8
8
  "license": "MIT",
9
9
  "repository": {
10
10
  "type": "git",
@@ -1,6 +1,6 @@
1
1
  # html-to-markdown
2
2
 
3
- High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
3
+ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
4
4
 
5
5
  [![Crates.io](https://img.shields.io/crates/v/html-to-markdown.svg)](https://crates.io/crates/html-to-markdown)
6
6
  [![npm (node)](https://badge.fury.io/js/html-to-markdown-node.svg)](https://www.npmjs.com/package/html-to-markdown-node)
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
8
8
  [![PyPI](https://badge.fury.io/py/html-to-markdown.svg)](https://pypi.org/project/html-to-markdown/)
9
9
  [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
10
10
  [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
11
+ [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
11
12
  [![NuGet](https://img.shields.io/nuget/v/HtmlToMarkdown.svg)](https://www.nuget.org/packages/HtmlToMarkdown/)
12
13
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
13
14
  [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
43
44
  - PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
44
45
  - PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
45
46
  - **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
47
+ - **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
46
48
  - **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
47
49
  - **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
48
50
  - **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
57
59
  | **Python** (bindings + CLI) | `pip install html-to-markdown` |
58
60
  | **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
59
61
  | **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
62
+ | **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
60
63
  | **Rust** crate | `cargo add html-to-markdown-rs` |
61
64
  | Rust CLI | `cargo install html-to-markdown-cli` |
62
65
  | Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
126
129
  )
127
130
  ```
128
131
 
132
+ ### Elixir
133
+
134
+ ```elixir
135
+ {:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
136
+
137
+ # Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
138
+ HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
139
+ ```
140
+
129
141
  ### Rust
130
142
 
131
143
  ```rust
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
178
190
  - **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
179
191
  - **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
180
192
  - **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
193
+ - **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
181
194
  - **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
182
195
  - **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
183
196
 
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
185
198
 
186
199
  Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
187
200
 
188
- | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
189
- | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
190
- | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
191
- | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
192
- | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
193
- | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
194
- | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
195
- | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
196
- | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
197
- | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
201
+ | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
202
+ | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
203
+ | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
204
+ | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
205
+ | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
206
+ | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
207
+ | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
208
+ | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
209
+ | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
210
+ | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
198
211
 
199
212
  The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
200
213
 
201
- Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
214
+ Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
202
215
 
203
216
  Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
204
217
 
@@ -1,10 +1,5 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
- /**
4
- * Initialize panic hook for better error messages in the browser
5
- */
6
- export function init(): void;
7
- export function convertBytes(html: Uint8Array, options: any): string;
8
3
  /**
9
4
  * Convert HTML to Markdown
10
5
  *
@@ -24,11 +19,16 @@ export function convertBytes(html: Uint8Array, options: any): string;
24
19
  * ```
25
20
  */
26
21
  export function convert(html: string, options: any): string;
27
- export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
28
22
  export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
29
- export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
23
+ export function convertBytes(html: Uint8Array, options: any): string;
30
24
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
31
25
  export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
26
+ /**
27
+ * Initialize panic hook for better error messages in the browser
28
+ */
29
+ export function init(): void;
30
+ export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
31
+ export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
32
32
  export class WasmConversionOptionsHandle {
33
33
  free(): void;
34
34
  [Symbol.dispose](): void;
@@ -221,43 +221,6 @@ function getArrayJsValueFromWasm0(ptr, len) {
221
221
  }
222
222
  return result;
223
223
  }
224
- /**
225
- * Initialize panic hook for better error messages in the browser
226
- */
227
- exports.init = function() {
228
- wasm.init();
229
- };
230
-
231
- /**
232
- * @param {Uint8Array} html
233
- * @param {any} options
234
- * @returns {string}
235
- */
236
- exports.convertBytes = function(html, options) {
237
- let deferred2_0;
238
- let deferred2_1;
239
- try {
240
- const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
241
- wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
242
- var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
243
- var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
244
- var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
245
- var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
246
- var ptr1 = r0;
247
- var len1 = r1;
248
- if (r3) {
249
- ptr1 = 0; len1 = 0;
250
- throw takeObject(r2);
251
- }
252
- deferred2_0 = ptr1;
253
- deferred2_1 = len1;
254
- return getStringFromWasm0(ptr1, len1);
255
- } finally {
256
- wasm.__wbindgen_add_to_stack_pointer(16);
257
- wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
258
- }
259
- };
260
-
261
224
  /**
262
225
  * Convert HTML to Markdown
263
226
  *
@@ -306,42 +269,6 @@ exports.convert = function(html, options) {
306
269
  }
307
270
  };
308
271
 
309
- function _assertClass(instance, klass) {
310
- if (!(instance instanceof klass)) {
311
- throw new Error(`expected instance of ${klass.name}`);
312
- }
313
- }
314
- /**
315
- * @param {Uint8Array} html
316
- * @param {WasmConversionOptionsHandle} handle
317
- * @returns {string}
318
- */
319
- exports.convertBytesWithOptionsHandle = function(html, handle) {
320
- let deferred2_0;
321
- let deferred2_1;
322
- try {
323
- const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
324
- _assertClass(handle, WasmConversionOptionsHandle);
325
- wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
326
- var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
327
- var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
328
- var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
329
- var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
330
- var ptr1 = r0;
331
- var len1 = r1;
332
- if (r3) {
333
- ptr1 = 0; len1 = 0;
334
- throw takeObject(r2);
335
- }
336
- deferred2_0 = ptr1;
337
- deferred2_1 = len1;
338
- return getStringFromWasm0(ptr1, len1);
339
- } finally {
340
- wasm.__wbindgen_add_to_stack_pointer(16);
341
- wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
342
- }
343
- };
344
-
345
272
  /**
346
273
  * @param {any} options
347
274
  * @returns {WasmConversionOptionsHandle}
@@ -363,34 +290,40 @@ exports.createConversionOptionsHandle = function(options) {
363
290
  };
364
291
 
365
292
  /**
366
- * @param {string} html
293
+ * @param {Uint8Array} html
367
294
  * @param {any} options
368
- * @param {WasmInlineImageConfig | null} [image_config]
369
- * @returns {WasmHtmlExtraction}
295
+ * @returns {string}
370
296
  */
371
- exports.convertWithInlineImages = function(html, options, image_config) {
297
+ exports.convertBytes = function(html, options) {
298
+ let deferred2_0;
299
+ let deferred2_1;
372
300
  try {
373
301
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
374
- const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
375
- const len0 = WASM_VECTOR_LEN;
376
- let ptr1 = 0;
377
- if (!isLikeNone(image_config)) {
378
- _assertClass(image_config, WasmInlineImageConfig);
379
- ptr1 = image_config.__destroy_into_raw();
380
- }
381
- wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
302
+ wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
382
303
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
383
304
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
384
305
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
385
- if (r2) {
386
- throw takeObject(r1);
306
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
307
+ var ptr1 = r0;
308
+ var len1 = r1;
309
+ if (r3) {
310
+ ptr1 = 0; len1 = 0;
311
+ throw takeObject(r2);
387
312
  }
388
- return WasmHtmlExtraction.__wrap(r0);
313
+ deferred2_0 = ptr1;
314
+ deferred2_1 = len1;
315
+ return getStringFromWasm0(ptr1, len1);
389
316
  } finally {
390
317
  wasm.__wbindgen_add_to_stack_pointer(16);
318
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
391
319
  }
392
320
  };
393
321
 
322
+ function _assertClass(instance, klass) {
323
+ if (!(instance instanceof klass)) {
324
+ throw new Error(`expected instance of ${klass.name}`);
325
+ }
326
+ }
394
327
  /**
395
328
  * @param {string} html
396
329
  * @param {WasmConversionOptionsHandle} handle
@@ -451,6 +384,73 @@ exports.convertBytesWithInlineImages = function(html, options, image_config) {
451
384
  }
452
385
  };
453
386
 
387
+ /**
388
+ * Initialize panic hook for better error messages in the browser
389
+ */
390
+ exports.init = function() {
391
+ wasm.init();
392
+ };
393
+
394
+ /**
395
+ * @param {string} html
396
+ * @param {any} options
397
+ * @param {WasmInlineImageConfig | null} [image_config]
398
+ * @returns {WasmHtmlExtraction}
399
+ */
400
+ exports.convertWithInlineImages = function(html, options, image_config) {
401
+ try {
402
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
403
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
404
+ const len0 = WASM_VECTOR_LEN;
405
+ let ptr1 = 0;
406
+ if (!isLikeNone(image_config)) {
407
+ _assertClass(image_config, WasmInlineImageConfig);
408
+ ptr1 = image_config.__destroy_into_raw();
409
+ }
410
+ wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
411
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
412
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
413
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
414
+ if (r2) {
415
+ throw takeObject(r1);
416
+ }
417
+ return WasmHtmlExtraction.__wrap(r0);
418
+ } finally {
419
+ wasm.__wbindgen_add_to_stack_pointer(16);
420
+ }
421
+ };
422
+
423
+ /**
424
+ * @param {Uint8Array} html
425
+ * @param {WasmConversionOptionsHandle} handle
426
+ * @returns {string}
427
+ */
428
+ exports.convertBytesWithOptionsHandle = function(html, handle) {
429
+ let deferred2_0;
430
+ let deferred2_1;
431
+ try {
432
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
433
+ _assertClass(handle, WasmConversionOptionsHandle);
434
+ wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
435
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
436
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
437
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
438
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
439
+ var ptr1 = r0;
440
+ var len1 = r1;
441
+ if (r3) {
442
+ ptr1 = 0; len1 = 0;
443
+ throw takeObject(r2);
444
+ }
445
+ deferred2_0 = ptr1;
446
+ deferred2_1 = len1;
447
+ return getStringFromWasm0(ptr1, len1);
448
+ } finally {
449
+ wasm.__wbindgen_add_to_stack_pointer(16);
450
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
451
+ }
452
+ };
453
+
454
454
  const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
455
455
  ? { register: () => {}, unregister: () => {} }
456
456
  : new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
@@ -3,7 +3,7 @@
3
3
  "collaborators": [
4
4
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
5
5
  ],
6
- "version": "2.8.1",
6
+ "version": "2.8.3",
7
7
  "license": "MIT",
8
8
  "repository": {
9
9
  "type": "git",
@@ -1,6 +1,6 @@
1
1
  # html-to-markdown
2
2
 
3
- High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
3
+ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
4
4
 
5
5
  [![Crates.io](https://img.shields.io/crates/v/html-to-markdown.svg)](https://crates.io/crates/html-to-markdown)
6
6
  [![npm (node)](https://badge.fury.io/js/html-to-markdown-node.svg)](https://www.npmjs.com/package/html-to-markdown-node)
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
8
8
  [![PyPI](https://badge.fury.io/py/html-to-markdown.svg)](https://pypi.org/project/html-to-markdown/)
9
9
  [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
10
10
  [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
11
+ [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
11
12
  [![NuGet](https://img.shields.io/nuget/v/HtmlToMarkdown.svg)](https://www.nuget.org/packages/HtmlToMarkdown/)
12
13
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
13
14
  [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
43
44
  - PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
44
45
  - PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
45
46
  - **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
47
+ - **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
46
48
  - **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
47
49
  - **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
48
50
  - **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
57
59
  | **Python** (bindings + CLI) | `pip install html-to-markdown` |
58
60
  | **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
59
61
  | **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
62
+ | **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
60
63
  | **Rust** crate | `cargo add html-to-markdown-rs` |
61
64
  | Rust CLI | `cargo install html-to-markdown-cli` |
62
65
  | Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
126
129
  )
127
130
  ```
128
131
 
132
+ ### Elixir
133
+
134
+ ```elixir
135
+ {:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
136
+
137
+ # Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
138
+ HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
139
+ ```
140
+
129
141
  ### Rust
130
142
 
131
143
  ```rust
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
178
190
  - **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
179
191
  - **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
180
192
  - **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
193
+ - **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
181
194
  - **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
182
195
  - **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
183
196
 
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
185
198
 
186
199
  Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
187
200
 
188
- | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
189
- | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
190
- | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
191
- | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
192
- | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
193
- | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
194
- | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
195
- | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
196
- | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
197
- | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
201
+ | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
202
+ | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
203
+ | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
204
+ | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
205
+ | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
206
+ | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
207
+ | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
208
+ | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
209
+ | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
210
+ | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
198
211
 
199
212
  The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
200
213
 
201
- Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
214
+ Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
202
215
 
203
216
  Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
204
217
 
@@ -1,10 +1,5 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
- /**
4
- * Initialize panic hook for better error messages in the browser
5
- */
6
- export function init(): void;
7
- export function convertBytes(html: Uint8Array, options: any): string;
8
3
  /**
9
4
  * Convert HTML to Markdown
10
5
  *
@@ -24,11 +19,16 @@ export function convertBytes(html: Uint8Array, options: any): string;
24
19
  * ```
25
20
  */
26
21
  export function convert(html: string, options: any): string;
27
- export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
28
22
  export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
29
- export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
23
+ export function convertBytes(html: Uint8Array, options: any): string;
30
24
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
31
25
  export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
26
+ /**
27
+ * Initialize panic hook for better error messages in the browser
28
+ */
29
+ export function init(): void;
30
+ export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
31
+ export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
32
32
  export class WasmConversionOptionsHandle {
33
33
  free(): void;
34
34
  [Symbol.dispose](): void;
@@ -227,43 +227,6 @@ function getArrayJsValueFromWasm0(ptr, len) {
227
227
  }
228
228
  return result;
229
229
  }
230
- /**
231
- * Initialize panic hook for better error messages in the browser
232
- */
233
- export function init() {
234
- wasm.init();
235
- }
236
-
237
- /**
238
- * @param {Uint8Array} html
239
- * @param {any} options
240
- * @returns {string}
241
- */
242
- export function convertBytes(html, options) {
243
- let deferred2_0;
244
- let deferred2_1;
245
- try {
246
- const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
247
- wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
248
- var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
249
- var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
250
- var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
251
- var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
252
- var ptr1 = r0;
253
- var len1 = r1;
254
- if (r3) {
255
- ptr1 = 0; len1 = 0;
256
- throw takeObject(r2);
257
- }
258
- deferred2_0 = ptr1;
259
- deferred2_1 = len1;
260
- return getStringFromWasm0(ptr1, len1);
261
- } finally {
262
- wasm.__wbindgen_add_to_stack_pointer(16);
263
- wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
264
- }
265
- }
266
-
267
230
  /**
268
231
  * Convert HTML to Markdown
269
232
  *
@@ -312,42 +275,6 @@ export function convert(html, options) {
312
275
  }
313
276
  }
314
277
 
315
- function _assertClass(instance, klass) {
316
- if (!(instance instanceof klass)) {
317
- throw new Error(`expected instance of ${klass.name}`);
318
- }
319
- }
320
- /**
321
- * @param {Uint8Array} html
322
- * @param {WasmConversionOptionsHandle} handle
323
- * @returns {string}
324
- */
325
- export function convertBytesWithOptionsHandle(html, handle) {
326
- let deferred2_0;
327
- let deferred2_1;
328
- try {
329
- const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
330
- _assertClass(handle, WasmConversionOptionsHandle);
331
- wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
332
- var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
333
- var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
334
- var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
335
- var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
336
- var ptr1 = r0;
337
- var len1 = r1;
338
- if (r3) {
339
- ptr1 = 0; len1 = 0;
340
- throw takeObject(r2);
341
- }
342
- deferred2_0 = ptr1;
343
- deferred2_1 = len1;
344
- return getStringFromWasm0(ptr1, len1);
345
- } finally {
346
- wasm.__wbindgen_add_to_stack_pointer(16);
347
- wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
348
- }
349
- }
350
-
351
278
  /**
352
279
  * @param {any} options
353
280
  * @returns {WasmConversionOptionsHandle}
@@ -369,34 +296,40 @@ export function createConversionOptionsHandle(options) {
369
296
  }
370
297
 
371
298
  /**
372
- * @param {string} html
299
+ * @param {Uint8Array} html
373
300
  * @param {any} options
374
- * @param {WasmInlineImageConfig | null} [image_config]
375
- * @returns {WasmHtmlExtraction}
301
+ * @returns {string}
376
302
  */
377
- export function convertWithInlineImages(html, options, image_config) {
303
+ export function convertBytes(html, options) {
304
+ let deferred2_0;
305
+ let deferred2_1;
378
306
  try {
379
307
  const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
380
- const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
381
- const len0 = WASM_VECTOR_LEN;
382
- let ptr1 = 0;
383
- if (!isLikeNone(image_config)) {
384
- _assertClass(image_config, WasmInlineImageConfig);
385
- ptr1 = image_config.__destroy_into_raw();
386
- }
387
- wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
308
+ wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
388
309
  var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
389
310
  var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
390
311
  var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
391
- if (r2) {
392
- throw takeObject(r1);
312
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
313
+ var ptr1 = r0;
314
+ var len1 = r1;
315
+ if (r3) {
316
+ ptr1 = 0; len1 = 0;
317
+ throw takeObject(r2);
393
318
  }
394
- return WasmHtmlExtraction.__wrap(r0);
319
+ deferred2_0 = ptr1;
320
+ deferred2_1 = len1;
321
+ return getStringFromWasm0(ptr1, len1);
395
322
  } finally {
396
323
  wasm.__wbindgen_add_to_stack_pointer(16);
324
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
397
325
  }
398
326
  }
399
327
 
328
+ function _assertClass(instance, klass) {
329
+ if (!(instance instanceof klass)) {
330
+ throw new Error(`expected instance of ${klass.name}`);
331
+ }
332
+ }
400
333
  /**
401
334
  * @param {string} html
402
335
  * @param {WasmConversionOptionsHandle} handle
@@ -457,6 +390,73 @@ export function convertBytesWithInlineImages(html, options, image_config) {
457
390
  }
458
391
  }
459
392
 
393
+ /**
394
+ * Initialize panic hook for better error messages in the browser
395
+ */
396
+ export function init() {
397
+ wasm.init();
398
+ }
399
+
400
+ /**
401
+ * @param {string} html
402
+ * @param {any} options
403
+ * @param {WasmInlineImageConfig | null} [image_config]
404
+ * @returns {WasmHtmlExtraction}
405
+ */
406
+ export function convertWithInlineImages(html, options, image_config) {
407
+ try {
408
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
409
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
410
+ const len0 = WASM_VECTOR_LEN;
411
+ let ptr1 = 0;
412
+ if (!isLikeNone(image_config)) {
413
+ _assertClass(image_config, WasmInlineImageConfig);
414
+ ptr1 = image_config.__destroy_into_raw();
415
+ }
416
+ wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
417
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
418
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
419
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
420
+ if (r2) {
421
+ throw takeObject(r1);
422
+ }
423
+ return WasmHtmlExtraction.__wrap(r0);
424
+ } finally {
425
+ wasm.__wbindgen_add_to_stack_pointer(16);
426
+ }
427
+ }
428
+
429
+ /**
430
+ * @param {Uint8Array} html
431
+ * @param {WasmConversionOptionsHandle} handle
432
+ * @returns {string}
433
+ */
434
+ export function convertBytesWithOptionsHandle(html, handle) {
435
+ let deferred2_0;
436
+ let deferred2_1;
437
+ try {
438
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
439
+ _assertClass(handle, WasmConversionOptionsHandle);
440
+ wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
441
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
442
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
443
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
444
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
445
+ var ptr1 = r0;
446
+ var len1 = r1;
447
+ if (r3) {
448
+ ptr1 = 0; len1 = 0;
449
+ throw takeObject(r2);
450
+ }
451
+ deferred2_0 = ptr1;
452
+ deferred2_1 = len1;
453
+ return getStringFromWasm0(ptr1, len1);
454
+ } finally {
455
+ wasm.__wbindgen_add_to_stack_pointer(16);
456
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
457
+ }
458
+ }
459
+
460
460
  const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
461
461
  ? { register: () => {}, unregister: () => {} }
462
462
  : new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
@@ -4,7 +4,7 @@
4
4
  "collaborators": [
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
- "version": "2.8.1",
7
+ "version": "2.8.3",
8
8
  "license": "MIT",
9
9
  "repository": {
10
10
  "type": "git",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "html-to-markdown-wasm",
3
- "version": "2.8.1",
3
+ "version": "2.8.3",
4
4
  "description": "High-performance HTML to Markdown converter - WebAssembly bindings",
5
5
  "main": "dist/html_to_markdown_wasm.js",
6
6
  "types": "dist/html_to_markdown_wasm.d.ts",