html-to-markdown-wasm 2.8.0 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/README.md +25 -12
- package/dist/html_to_markdown_wasm.d.ts +5 -5
- package/dist/html_to_markdown_wasm_bg.js +108 -108
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/package.json +1 -1
- package/dist-node/README.md +25 -12
- package/dist-node/html_to_markdown_wasm.d.ts +5 -5
- package/dist-node/html_to_markdown_wasm.js +108 -108
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/package.json +1 -1
- package/dist-web/README.md +25 -12
- package/dist-web/html_to_markdown_wasm.d.ts +5 -5
- package/dist-web/html_to_markdown_wasm.js +108 -108
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/package.json +1 -1
- package/package.json +1 -1
|
@@ -222,88 +222,90 @@ function getArrayJsValueFromWasm0(ptr, len) {
|
|
|
222
222
|
return result;
|
|
223
223
|
}
|
|
224
224
|
/**
|
|
225
|
-
* @param {Uint8Array} html
|
|
226
225
|
* @param {any} options
|
|
227
|
-
* @returns {
|
|
226
|
+
* @returns {WasmConversionOptionsHandle}
|
|
228
227
|
*/
|
|
229
|
-
exports.
|
|
230
|
-
let deferred2_0;
|
|
231
|
-
let deferred2_1;
|
|
228
|
+
exports.createConversionOptionsHandle = function(options) {
|
|
232
229
|
try {
|
|
233
230
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
234
|
-
wasm.
|
|
231
|
+
wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
|
|
235
232
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
236
233
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
237
234
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
var len1 = r1;
|
|
241
|
-
if (r3) {
|
|
242
|
-
ptr1 = 0; len1 = 0;
|
|
243
|
-
throw takeObject(r2);
|
|
235
|
+
if (r2) {
|
|
236
|
+
throw takeObject(r1);
|
|
244
237
|
}
|
|
245
|
-
|
|
246
|
-
deferred2_1 = len1;
|
|
247
|
-
return getStringFromWasm0(ptr1, len1);
|
|
238
|
+
return WasmConversionOptionsHandle.__wrap(r0);
|
|
248
239
|
} finally {
|
|
249
240
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
250
|
-
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
251
241
|
}
|
|
252
242
|
};
|
|
253
243
|
|
|
244
|
+
function _assertClass(instance, klass) {
|
|
245
|
+
if (!(instance instanceof klass)) {
|
|
246
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
254
249
|
/**
|
|
255
|
-
* Convert HTML to Markdown
|
|
256
|
-
*
|
|
257
|
-
* # Arguments
|
|
258
|
-
*
|
|
259
|
-
* * `html` - The HTML string to convert
|
|
260
|
-
* * `options` - Optional conversion options (as a JavaScript object)
|
|
261
|
-
*
|
|
262
|
-
* # Example
|
|
263
|
-
*
|
|
264
|
-
* ```javascript
|
|
265
|
-
* import { convert } from 'html-to-markdown-wasm';
|
|
266
|
-
*
|
|
267
|
-
* const html = '<h1>Hello World</h1>';
|
|
268
|
-
* const markdown = convert(html);
|
|
269
|
-
* console.log(markdown); // # Hello World
|
|
270
|
-
* ```
|
|
271
250
|
* @param {string} html
|
|
272
251
|
* @param {any} options
|
|
273
|
-
* @
|
|
252
|
+
* @param {WasmInlineImageConfig | null} [image_config]
|
|
253
|
+
* @returns {WasmHtmlExtraction}
|
|
274
254
|
*/
|
|
275
|
-
exports.
|
|
276
|
-
let deferred3_0;
|
|
277
|
-
let deferred3_1;
|
|
255
|
+
exports.convertWithInlineImages = function(html, options, image_config) {
|
|
278
256
|
try {
|
|
279
257
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
280
258
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
281
259
|
const len0 = WASM_VECTOR_LEN;
|
|
282
|
-
|
|
260
|
+
let ptr1 = 0;
|
|
261
|
+
if (!isLikeNone(image_config)) {
|
|
262
|
+
_assertClass(image_config, WasmInlineImageConfig);
|
|
263
|
+
ptr1 = image_config.__destroy_into_raw();
|
|
264
|
+
}
|
|
265
|
+
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
266
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
267
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
268
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
269
|
+
if (r2) {
|
|
270
|
+
throw takeObject(r1);
|
|
271
|
+
}
|
|
272
|
+
return WasmHtmlExtraction.__wrap(r0);
|
|
273
|
+
} finally {
|
|
274
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
275
|
+
}
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* @param {Uint8Array} html
|
|
280
|
+
* @param {WasmConversionOptionsHandle} handle
|
|
281
|
+
* @returns {string}
|
|
282
|
+
*/
|
|
283
|
+
exports.convertBytesWithOptionsHandle = function(html, handle) {
|
|
284
|
+
let deferred2_0;
|
|
285
|
+
let deferred2_1;
|
|
286
|
+
try {
|
|
287
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
288
|
+
_assertClass(handle, WasmConversionOptionsHandle);
|
|
289
|
+
wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
|
|
283
290
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
284
291
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
285
292
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
286
293
|
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
287
|
-
var
|
|
288
|
-
var
|
|
294
|
+
var ptr1 = r0;
|
|
295
|
+
var len1 = r1;
|
|
289
296
|
if (r3) {
|
|
290
|
-
|
|
297
|
+
ptr1 = 0; len1 = 0;
|
|
291
298
|
throw takeObject(r2);
|
|
292
299
|
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
return getStringFromWasm0(
|
|
300
|
+
deferred2_0 = ptr1;
|
|
301
|
+
deferred2_1 = len1;
|
|
302
|
+
return getStringFromWasm0(ptr1, len1);
|
|
296
303
|
} finally {
|
|
297
304
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
298
|
-
wasm.__wbindgen_export4(
|
|
305
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
299
306
|
}
|
|
300
307
|
};
|
|
301
308
|
|
|
302
|
-
function _assertClass(instance, klass) {
|
|
303
|
-
if (!(instance instanceof klass)) {
|
|
304
|
-
throw new Error(`expected instance of ${klass.name}`);
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
309
|
/**
|
|
308
310
|
* @param {string} html
|
|
309
311
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -337,108 +339,99 @@ exports.convertWithOptionsHandle = function(html, handle) {
|
|
|
337
339
|
}
|
|
338
340
|
};
|
|
339
341
|
|
|
340
|
-
/**
|
|
341
|
-
* Initialize panic hook for better error messages in the browser
|
|
342
|
-
*/
|
|
343
|
-
exports.init = function() {
|
|
344
|
-
wasm.init();
|
|
345
|
-
};
|
|
346
|
-
|
|
347
342
|
/**
|
|
348
343
|
* @param {Uint8Array} html
|
|
349
344
|
* @param {any} options
|
|
350
|
-
* @
|
|
351
|
-
* @returns {WasmHtmlExtraction}
|
|
345
|
+
* @returns {string}
|
|
352
346
|
*/
|
|
353
|
-
exports.
|
|
347
|
+
exports.convertBytes = function(html, options) {
|
|
348
|
+
let deferred2_0;
|
|
349
|
+
let deferred2_1;
|
|
354
350
|
try {
|
|
355
351
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
356
|
-
|
|
357
|
-
if (!isLikeNone(image_config)) {
|
|
358
|
-
_assertClass(image_config, WasmInlineImageConfig);
|
|
359
|
-
ptr0 = image_config.__destroy_into_raw();
|
|
360
|
-
}
|
|
361
|
-
wasm.convertBytesWithInlineImages(retptr, addHeapObject(html), addHeapObject(options), ptr0);
|
|
352
|
+
wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
|
|
362
353
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
363
354
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
364
355
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
365
|
-
|
|
366
|
-
|
|
356
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
357
|
+
var ptr1 = r0;
|
|
358
|
+
var len1 = r1;
|
|
359
|
+
if (r3) {
|
|
360
|
+
ptr1 = 0; len1 = 0;
|
|
361
|
+
throw takeObject(r2);
|
|
367
362
|
}
|
|
368
|
-
|
|
363
|
+
deferred2_0 = ptr1;
|
|
364
|
+
deferred2_1 = len1;
|
|
365
|
+
return getStringFromWasm0(ptr1, len1);
|
|
369
366
|
} finally {
|
|
370
367
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
368
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
371
369
|
}
|
|
372
370
|
};
|
|
373
371
|
|
|
374
372
|
/**
|
|
373
|
+
* Convert HTML to Markdown
|
|
374
|
+
*
|
|
375
|
+
* # Arguments
|
|
376
|
+
*
|
|
377
|
+
* * `html` - The HTML string to convert
|
|
378
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
379
|
+
*
|
|
380
|
+
* # Example
|
|
381
|
+
*
|
|
382
|
+
* ```javascript
|
|
383
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
384
|
+
*
|
|
385
|
+
* const html = '<h1>Hello World</h1>';
|
|
386
|
+
* const markdown = convert(html);
|
|
387
|
+
* console.log(markdown); // # Hello World
|
|
388
|
+
* ```
|
|
389
|
+
* @param {string} html
|
|
375
390
|
* @param {any} options
|
|
376
|
-
* @returns {WasmConversionOptionsHandle}
|
|
377
|
-
*/
|
|
378
|
-
exports.createConversionOptionsHandle = function(options) {
|
|
379
|
-
try {
|
|
380
|
-
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
381
|
-
wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
|
|
382
|
-
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
383
|
-
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
384
|
-
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
385
|
-
if (r2) {
|
|
386
|
-
throw takeObject(r1);
|
|
387
|
-
}
|
|
388
|
-
return WasmConversionOptionsHandle.__wrap(r0);
|
|
389
|
-
} finally {
|
|
390
|
-
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
391
|
-
}
|
|
392
|
-
};
|
|
393
|
-
|
|
394
|
-
/**
|
|
395
|
-
* @param {Uint8Array} html
|
|
396
|
-
* @param {WasmConversionOptionsHandle} handle
|
|
397
391
|
* @returns {string}
|
|
398
392
|
*/
|
|
399
|
-
exports.
|
|
400
|
-
let
|
|
401
|
-
let
|
|
393
|
+
exports.convert = function(html, options) {
|
|
394
|
+
let deferred3_0;
|
|
395
|
+
let deferred3_1;
|
|
402
396
|
try {
|
|
403
397
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
404
|
-
|
|
405
|
-
|
|
398
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
399
|
+
const len0 = WASM_VECTOR_LEN;
|
|
400
|
+
wasm.convert(retptr, ptr0, len0, addHeapObject(options));
|
|
406
401
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
407
402
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
408
403
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
409
404
|
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
410
|
-
var
|
|
411
|
-
var
|
|
405
|
+
var ptr2 = r0;
|
|
406
|
+
var len2 = r1;
|
|
412
407
|
if (r3) {
|
|
413
|
-
|
|
408
|
+
ptr2 = 0; len2 = 0;
|
|
414
409
|
throw takeObject(r2);
|
|
415
410
|
}
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
return getStringFromWasm0(
|
|
411
|
+
deferred3_0 = ptr2;
|
|
412
|
+
deferred3_1 = len2;
|
|
413
|
+
return getStringFromWasm0(ptr2, len2);
|
|
419
414
|
} finally {
|
|
420
415
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
421
|
-
wasm.__wbindgen_export4(
|
|
416
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
422
417
|
}
|
|
423
418
|
};
|
|
424
419
|
|
|
425
420
|
/**
|
|
426
|
-
* @param {
|
|
421
|
+
* @param {Uint8Array} html
|
|
427
422
|
* @param {any} options
|
|
428
423
|
* @param {WasmInlineImageConfig | null} [image_config]
|
|
429
424
|
* @returns {WasmHtmlExtraction}
|
|
430
425
|
*/
|
|
431
|
-
exports.
|
|
426
|
+
exports.convertBytesWithInlineImages = function(html, options, image_config) {
|
|
432
427
|
try {
|
|
433
428
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
434
|
-
|
|
435
|
-
const len0 = WASM_VECTOR_LEN;
|
|
436
|
-
let ptr1 = 0;
|
|
429
|
+
let ptr0 = 0;
|
|
437
430
|
if (!isLikeNone(image_config)) {
|
|
438
431
|
_assertClass(image_config, WasmInlineImageConfig);
|
|
439
|
-
|
|
432
|
+
ptr0 = image_config.__destroy_into_raw();
|
|
440
433
|
}
|
|
441
|
-
wasm.
|
|
434
|
+
wasm.convertBytesWithInlineImages(retptr, addHeapObject(html), addHeapObject(options), ptr0);
|
|
442
435
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
443
436
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
444
437
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
@@ -451,6 +444,13 @@ exports.convertWithInlineImages = function(html, options, image_config) {
|
|
|
451
444
|
}
|
|
452
445
|
};
|
|
453
446
|
|
|
447
|
+
/**
|
|
448
|
+
* Initialize panic hook for better error messages in the browser
|
|
449
|
+
*/
|
|
450
|
+
exports.init = function() {
|
|
451
|
+
wasm.init();
|
|
452
|
+
};
|
|
453
|
+
|
|
454
454
|
const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
455
455
|
? { register: () => {}, unregister: () => {} }
|
|
456
456
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
|
|
Binary file
|
package/dist-node/package.json
CHANGED
package/dist-web/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# html-to-markdown
|
|
2
2
|
|
|
3
|
-
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
3
|
+
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
4
4
|
|
|
5
5
|
[](https://crates.io/crates/html-to-markdown)
|
|
6
6
|
[](https://www.npmjs.com/package/html-to-markdown-node)
|
|
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
8
8
|
[](https://pypi.org/project/html-to-markdown/)
|
|
9
9
|
[](https://packagist.org/packages/goldziher/html-to-markdown)
|
|
10
10
|
[](https://rubygems.org/gems/html-to-markdown)
|
|
11
|
+
[](https://hex.pm/packages/html_to_markdown)
|
|
11
12
|
[](https://www.nuget.org/packages/HtmlToMarkdown/)
|
|
12
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
13
14
|
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
43
44
|
- PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
|
|
44
45
|
- PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
|
|
45
46
|
- **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
|
|
47
|
+
- **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
|
|
46
48
|
- **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
|
|
47
49
|
- **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
|
|
48
50
|
- **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
|
|
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
57
59
|
| **Python** (bindings + CLI) | `pip install html-to-markdown` |
|
|
58
60
|
| **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
|
|
59
61
|
| **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
|
|
62
|
+
| **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
|
|
60
63
|
| **Rust** crate | `cargo add html-to-markdown-rs` |
|
|
61
64
|
| Rust CLI | `cargo install html-to-markdown-cli` |
|
|
62
65
|
| Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
|
|
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
|
|
|
126
129
|
)
|
|
127
130
|
```
|
|
128
131
|
|
|
132
|
+
### Elixir
|
|
133
|
+
|
|
134
|
+
```elixir
|
|
135
|
+
{:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
|
|
136
|
+
|
|
137
|
+
# Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
|
|
138
|
+
HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
|
|
139
|
+
```
|
|
140
|
+
|
|
129
141
|
### Rust
|
|
130
142
|
|
|
131
143
|
```rust
|
|
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
178
190
|
- **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
|
|
179
191
|
- **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
|
|
180
192
|
- **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
|
|
193
|
+
- **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
|
|
181
194
|
- **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
|
|
182
195
|
- **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
|
|
183
196
|
|
|
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
185
198
|
|
|
186
199
|
Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
|
|
187
200
|
|
|
188
|
-
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
|
|
189
|
-
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
|
|
190
|
-
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
|
|
191
|
-
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
|
|
192
|
-
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
|
|
193
|
-
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
|
|
194
|
-
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
|
|
195
|
-
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
|
|
196
|
-
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
|
|
197
|
-
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
|
|
201
|
+
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
|
|
202
|
+
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
|
|
203
|
+
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
|
|
204
|
+
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
|
|
205
|
+
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
|
|
206
|
+
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
|
|
207
|
+
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
|
|
208
|
+
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
|
|
209
|
+
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
|
|
210
|
+
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
|
|
198
211
|
|
|
199
212
|
The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
|
|
200
213
|
|
|
201
|
-
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
214
|
+
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
202
215
|
|
|
203
216
|
Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
|
|
204
217
|
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
|
+
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
4
|
+
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
5
|
+
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
6
|
+
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
3
7
|
export function convertBytes(html: Uint8Array, options: any): string;
|
|
4
8
|
/**
|
|
5
9
|
* Convert HTML to Markdown
|
|
@@ -20,15 +24,11 @@ export function convertBytes(html: Uint8Array, options: any): string;
|
|
|
20
24
|
* ```
|
|
21
25
|
*/
|
|
22
26
|
export function convert(html: string, options: any): string;
|
|
23
|
-
export function
|
|
27
|
+
export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
24
28
|
/**
|
|
25
29
|
* Initialize panic hook for better error messages in the browser
|
|
26
30
|
*/
|
|
27
31
|
export function init(): void;
|
|
28
|
-
export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
29
|
-
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
30
|
-
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
31
|
-
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
32
32
|
export class WasmConversionOptionsHandle {
|
|
33
33
|
free(): void;
|
|
34
34
|
[Symbol.dispose](): void;
|