html-to-markdown-wasm 2.6.6 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
+ export function convertBytes(html: Uint8Array, options: any): string;
3
4
  /**
4
5
  * Convert HTML to Markdown
5
6
  *
@@ -19,34 +20,20 @@
19
20
  * ```
20
21
  */
21
22
  export function convert(html: string, options: any): string;
22
- /**
23
- * Convert HTML to Markdown while collecting inline images
24
- *
25
- * # Arguments
26
- *
27
- * * `html` - The HTML string to convert
28
- * * `options` - Optional conversion options (as a JavaScript object)
29
- * * `image_config` - Configuration for inline image extraction
30
- *
31
- * # Example
32
- *
33
- * ```javascript
34
- * import { convertWithInlineImages, WasmInlineImageConfig } from '@html-to-markdown/wasm';
35
- *
36
- * const html = '<img src="data:image/png;base64,..." alt="test">';
37
- * const config = new WasmInlineImageConfig(1024 * 1024);
38
- * config.inferDimensions = true;
39
- *
40
- * const result = convertWithInlineImages(html, null, config);
41
- * console.log(result.markdown);
42
- * console.log(result.inlineImages.length);
43
- * ```
44
- */
23
+ export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
24
+ export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
45
25
  export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
26
+ export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
46
27
  /**
47
28
  * Initialize panic hook for better error messages in the browser
48
29
  */
49
30
  export function init(): void;
31
+ export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
32
+ export class WasmConversionOptionsHandle {
33
+ free(): void;
34
+ [Symbol.dispose](): void;
35
+ constructor(options: any);
36
+ }
50
37
  /**
51
38
  * Result of HTML extraction with inline images
52
39
  */
@@ -221,6 +221,36 @@ function getArrayJsValueFromWasm0(ptr, len) {
221
221
  }
222
222
  return result;
223
223
  }
224
+ /**
225
+ * @param {Uint8Array} html
226
+ * @param {any} options
227
+ * @returns {string}
228
+ */
229
+ exports.convertBytes = function(html, options) {
230
+ let deferred2_0;
231
+ let deferred2_1;
232
+ try {
233
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
234
+ wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
235
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
236
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
237
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
238
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
239
+ var ptr1 = r0;
240
+ var len1 = r1;
241
+ if (r3) {
242
+ ptr1 = 0; len1 = 0;
243
+ throw takeObject(r2);
244
+ }
245
+ deferred2_0 = ptr1;
246
+ deferred2_1 = len1;
247
+ return getStringFromWasm0(ptr1, len1);
248
+ } finally {
249
+ wasm.__wbindgen_add_to_stack_pointer(16);
250
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
251
+ }
252
+ };
253
+
224
254
  /**
225
255
  * Convert HTML to Markdown
226
256
  *
@@ -275,27 +305,59 @@ function _assertClass(instance, klass) {
275
305
  }
276
306
  }
277
307
  /**
278
- * Convert HTML to Markdown while collecting inline images
279
- *
280
- * # Arguments
281
- *
282
- * * `html` - The HTML string to convert
283
- * * `options` - Optional conversion options (as a JavaScript object)
284
- * * `image_config` - Configuration for inline image extraction
285
- *
286
- * # Example
287
- *
288
- * ```javascript
289
- * import { convertWithInlineImages, WasmInlineImageConfig } from '@html-to-markdown/wasm';
290
- *
291
- * const html = '<img src="data:image/png;base64,..." alt="test">';
292
- * const config = new WasmInlineImageConfig(1024 * 1024);
293
- * config.inferDimensions = true;
294
- *
295
- * const result = convertWithInlineImages(html, null, config);
296
- * console.log(result.markdown);
297
- * console.log(result.inlineImages.length);
298
- * ```
308
+ * @param {string} html
309
+ * @param {WasmConversionOptionsHandle} handle
310
+ * @returns {string}
311
+ */
312
+ exports.convertWithOptionsHandle = function(html, handle) {
313
+ let deferred3_0;
314
+ let deferred3_1;
315
+ try {
316
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
317
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
318
+ const len0 = WASM_VECTOR_LEN;
319
+ _assertClass(handle, WasmConversionOptionsHandle);
320
+ wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
321
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
322
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
323
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
324
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
325
+ var ptr2 = r0;
326
+ var len2 = r1;
327
+ if (r3) {
328
+ ptr2 = 0; len2 = 0;
329
+ throw takeObject(r2);
330
+ }
331
+ deferred3_0 = ptr2;
332
+ deferred3_1 = len2;
333
+ return getStringFromWasm0(ptr2, len2);
334
+ } finally {
335
+ wasm.__wbindgen_add_to_stack_pointer(16);
336
+ wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
337
+ }
338
+ };
339
+
340
+ /**
341
+ * @param {any} options
342
+ * @returns {WasmConversionOptionsHandle}
343
+ */
344
+ exports.createConversionOptionsHandle = function(options) {
345
+ try {
346
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
347
+ wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
348
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
349
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
350
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
351
+ if (r2) {
352
+ throw takeObject(r1);
353
+ }
354
+ return WasmConversionOptionsHandle.__wrap(r0);
355
+ } finally {
356
+ wasm.__wbindgen_add_to_stack_pointer(16);
357
+ }
358
+ };
359
+
360
+ /**
299
361
  * @param {string} html
300
362
  * @param {any} options
301
363
  * @param {WasmInlineImageConfig | null} [image_config]
@@ -324,6 +386,33 @@ exports.convertWithInlineImages = function(html, options, image_config) {
324
386
  }
325
387
  };
326
388
 
389
+ /**
390
+ * @param {Uint8Array} html
391
+ * @param {any} options
392
+ * @param {WasmInlineImageConfig | null} [image_config]
393
+ * @returns {WasmHtmlExtraction}
394
+ */
395
+ exports.convertBytesWithInlineImages = function(html, options, image_config) {
396
+ try {
397
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
398
+ let ptr0 = 0;
399
+ if (!isLikeNone(image_config)) {
400
+ _assertClass(image_config, WasmInlineImageConfig);
401
+ ptr0 = image_config.__destroy_into_raw();
402
+ }
403
+ wasm.convertBytesWithInlineImages(retptr, addHeapObject(html), addHeapObject(options), ptr0);
404
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
405
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
406
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
407
+ if (r2) {
408
+ throw takeObject(r1);
409
+ }
410
+ return WasmHtmlExtraction.__wrap(r0);
411
+ } finally {
412
+ wasm.__wbindgen_add_to_stack_pointer(16);
413
+ }
414
+ };
415
+
327
416
  /**
328
417
  * Initialize panic hook for better error messages in the browser
329
418
  */
@@ -331,6 +420,87 @@ exports.init = function() {
331
420
  wasm.init();
332
421
  };
333
422
 
423
+ /**
424
+ * @param {Uint8Array} html
425
+ * @param {WasmConversionOptionsHandle} handle
426
+ * @returns {string}
427
+ */
428
+ exports.convertBytesWithOptionsHandle = function(html, handle) {
429
+ let deferred2_0;
430
+ let deferred2_1;
431
+ try {
432
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
433
+ _assertClass(handle, WasmConversionOptionsHandle);
434
+ wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
435
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
436
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
437
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
438
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
439
+ var ptr1 = r0;
440
+ var len1 = r1;
441
+ if (r3) {
442
+ ptr1 = 0; len1 = 0;
443
+ throw takeObject(r2);
444
+ }
445
+ deferred2_0 = ptr1;
446
+ deferred2_1 = len1;
447
+ return getStringFromWasm0(ptr1, len1);
448
+ } finally {
449
+ wasm.__wbindgen_add_to_stack_pointer(16);
450
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
451
+ }
452
+ };
453
+
454
+ const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
455
+ ? { register: () => {}, unregister: () => {} }
456
+ : new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
457
+
458
+ class WasmConversionOptionsHandle {
459
+
460
+ static __wrap(ptr) {
461
+ ptr = ptr >>> 0;
462
+ const obj = Object.create(WasmConversionOptionsHandle.prototype);
463
+ obj.__wbg_ptr = ptr;
464
+ WasmConversionOptionsHandleFinalization.register(obj, obj.__wbg_ptr, obj);
465
+ return obj;
466
+ }
467
+
468
+ __destroy_into_raw() {
469
+ const ptr = this.__wbg_ptr;
470
+ this.__wbg_ptr = 0;
471
+ WasmConversionOptionsHandleFinalization.unregister(this);
472
+ return ptr;
473
+ }
474
+
475
+ free() {
476
+ const ptr = this.__destroy_into_raw();
477
+ wasm.__wbg_wasmconversionoptionshandle_free(ptr, 0);
478
+ }
479
+ /**
480
+ * @param {any} options
481
+ */
482
+ constructor(options) {
483
+ try {
484
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
485
+ wasm.wasmconversionoptionshandle_new(retptr, addHeapObject(options));
486
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
487
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
488
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
489
+ if (r2) {
490
+ throw takeObject(r1);
491
+ }
492
+ this.__wbg_ptr = r0 >>> 0;
493
+ WasmConversionOptionsHandleFinalization.register(this, this.__wbg_ptr, this);
494
+ return this;
495
+ } finally {
496
+ wasm.__wbindgen_add_to_stack_pointer(16);
497
+ }
498
+ }
499
+ }
500
+ if (Symbol.dispose) WasmConversionOptionsHandle.prototype[Symbol.dispose] = WasmConversionOptionsHandle.prototype.free;
501
+
502
+ exports.WasmConversionOptionsHandle = WasmConversionOptionsHandle;
503
+
334
504
  const WasmHtmlExtractionFinalization = (typeof FinalizationRegistry === 'undefined')
335
505
  ? { register: () => {}, unregister: () => {} }
336
506
  : new FinalizationRegistry(ptr => wasm.__wbg_wasmhtmlextraction_free(ptr >>> 0, 1));
@@ -829,6 +999,17 @@ exports.__wbg_instanceof_ArrayBuffer_70beb1189ca63b38 = function(arg0) {
829
999
  return ret;
830
1000
  };
831
1001
 
1002
+ exports.__wbg_instanceof_Object_10bb762262230c68 = function(arg0) {
1003
+ let result;
1004
+ try {
1005
+ result = getObject(arg0) instanceof Object;
1006
+ } catch (_) {
1007
+ result = false;
1008
+ }
1009
+ const ret = result;
1010
+ return ret;
1011
+ };
1012
+
832
1013
  exports.__wbg_instanceof_Uint8Array_20c8e73002f7af98 = function(arg0) {
833
1014
  let result;
834
1015
  try {
@@ -855,6 +1036,11 @@ exports.__wbg_iterator_e5822695327a3c39 = function() {
855
1036
  return addHeapObject(ret);
856
1037
  };
857
1038
 
1039
+ exports.__wbg_keys_b4d27b02ad14f4be = function(arg0) {
1040
+ const ret = Object.keys(getObject(arg0));
1041
+ return addHeapObject(ret);
1042
+ };
1043
+
858
1044
  exports.__wbg_length_69bca3cb64fc8748 = function(arg0) {
859
1045
  const ret = getObject(arg0).length;
860
1046
  return ret;
@@ -1,12 +1,19 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
  export const memory: WebAssembly.Memory;
4
+ export const __wbg_wasmconversionoptionshandle_free: (a: number, b: number) => void;
4
5
  export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
5
6
  export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
6
7
  export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
7
8
  export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
8
9
  export const convert: (a: number, b: number, c: number, d: number) => void;
10
+ export const convertBytes: (a: number, b: number, c: number) => void;
11
+ export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
12
+ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
9
13
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
14
+ export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
15
+ export const createConversionOptionsHandle: (a: number, b: number) => void;
16
+ export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
10
17
  export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
11
18
  export const wasmhtmlextraction_markdown: (a: number, b: number) => void;
12
19
  export const wasmhtmlextraction_warnings: (a: number, b: number) => void;
@@ -3,7 +3,7 @@
3
3
  "collaborators": [
4
4
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
5
5
  ],
6
- "version": "2.6.6",
6
+ "version": "2.7.0",
7
7
  "license": "MIT",
8
8
  "repository": {
9
9
  "type": "git",
@@ -89,7 +89,7 @@ const markdown = convert(html, {
89
89
  });
90
90
  ```
91
91
 
92
- **Performance:** Native bindings average ~19k ops/sec, WASM averages ~16k ops/sec (benchmarked on complex real-world documents).
92
+ **Performance:** The shared fixture harness (`task bench:bindings`) now clocks Node, Python, and the Rust CLI at ~1.3–1.4k ops/sec (≈150 MB/s) on the 129 KB Wikipedia “Lists” page thanks to the new Buffer/Uint8Array fast paths and release-mode harness. Ruby stays close at ~1.2k ops/sec, PHP lands around 0.3k ops/sec (≈35 MB/s), and WASM hits ~0.85k ops/sec—plenty for browsers, Deno, and edge runtimes.
93
93
 
94
94
  See the JavaScript guides for full API documentation:
95
95
 
@@ -146,38 +146,65 @@ Benchmarked on Apple M4 with complex real-world documents (Wikipedia articles, t
146
146
 
147
147
  ### Operations per Second (higher is better)
148
148
 
149
- | Document Type | Node.js (NAPI) | WASM | Python (PyO3) | Speedup (Node vs Python) |
150
- | -------------------------- | -------------- | ------ | ------------- | ------------------------ |
151
- | **Small (5 paragraphs)** | 86,233 | 70,300 | 8,443 | **10.2×** |
152
- | **Medium (25 paragraphs)** | 18,979 | 15,282 | 1,846 | **10.3×** |
153
- | **Large (100 paragraphs)** | 4,907 | 3,836 | 438 | **11.2×** |
154
- | **Tables (complex)** | 5,003 | 3,748 | 4,829 | 1.|
155
- | **Lists (nested)** | 1,819 | 1,391 | 1,165 | **1.6×** |
156
- | **Wikipedia (129KB)** | 1,125 | 1,022 | - | - |
157
- | **Wikipedia (653KB)** | 156 | 147 | - | - |
149
+ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, shared fixtures):
150
+
151
+ | Fixture | Node.js (NAPI) | WASM | Python (PyO3) | Speedup (Node vs Python) |
152
+ | ---------------------- | -------------- | ---- | ------------- | ------------------------ |
153
+ | **Lists (Timeline)** | 1,308 | 882 | 1,405 | **0.9×** |
154
+ | **Tables (Countries)** | 331 | 242 | 352 | **0.9×** |
155
+ | **Medium (Python)** | 150 | 121 | 158 | **1.0×** |
156
+ | **Large (Rust)** | 163 | 124 | 183 | **0.9×** |
157
+ | **Small (Intro)** | 208 | 163 | 223 | **0.9×** |
158
+ | **HOCR German PDF** | 2,944 | 1,637| 2,991 | **1.0×** |
159
+ | **HOCR Invoice** | 27,326 | 7,775| 23,500 | **1.2×** |
160
+ | **HOCR Tables** | 3,475 | 1,667| 3,464 | **1.0×** |
158
161
 
159
162
  ### Average Performance Summary
160
163
 
161
- | Implementation | Avg ops/sec | vs WASM | vs Python | Best For |
162
- | --------------------- | ---------------- | ------------ | --------------- | --------------------------------- |
163
- | **Node.js (NAPI-RS)** | **18,162** | 1.17× faster | **7.4× faster** | Maximum throughput in Node.js/Bun |
164
- | **WebAssembly** | **15,536** | baseline | **6.3× faster** | Universal (Deno, browsers, edge) |
165
- | **Python (PyO3)** | **2,465** | 6.3× slower | baseline | Python ecosystem integration |
166
- | **Rust CLI/Binary** | **150-210 MB/s** | - | - | Standalone processing |
164
+ | Implementation | Avg ops/sec (fixtures) | vs Python | Notes |
165
+ | --------------------- | ---------------------- | --------- | ----- |
166
+ | **Rust CLI/Binary** | **4,996** | **1.2× faster** | Preprocessing now stays in one pass + reuses `parse_owned`, so the CLI leads every fixture |
167
+ | **Node.js (NAPI-RS)** | **4,488** | 1.0× | Buffer/handle combo keeps Node within ~10 % of the Rust core while serving JS runtimes |
168
+ | **Ruby (magnus)** | **4,278** | 0.9× | Still extremely fast; ~25 k ops/sec on HOCR invoices without extra work |
169
+ | **Python (PyO3)** | **4,034** | baseline | Release-mode harness plus handle reuse keep it competitive, but it now trails Node/Rust |
170
+ | **WebAssembly** | **1,576** | 0.4× | Portable option for Deno/browsers/edge using the new byte APIs |
171
+ | **PHP (ext)** | **1,480** | 0.4× | Composer extension holds steady at 35–70 MB/s once the PIE build is installed |
167
172
 
168
173
  ### Key Insights
169
174
 
170
- - **JavaScript bindings are fastest**: Native Node.js bindings achieve ~18k ops/sec average, with WASM close behind at ~16k ops/sec
171
- - **Python is 6-10× slower**: Despite using the same Rust core, PyO3 FFI overhead significantly impacts Python performance
172
- - **Small documents**: Both JS implementations reach 70-90k ops/sec on simple HTML
173
- - **Large documents**: Performance gap widens with complexity
175
+ - **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
176
+ - **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
177
+ - **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
178
+ - **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
179
+ - **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
180
+
181
+ ### Runtime Benchmarks (PHP / Ruby / Python / Node / WASM)
182
+
183
+ Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
184
+
185
+ | Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
186
+ | ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
187
+ | Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
188
+ | Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
189
+ | Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
190
+ | Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
191
+ | Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
192
+ | HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
193
+ | HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
194
+ | HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
195
+
196
+ The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
197
+
198
+ Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
199
+
200
+ Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
174
201
 
175
202
  **Note on Python performance**: The current Python bindings have optimization opportunities. The v2 API with direct `convert()` calls performs best; avoid the v1 compatibility layer for performance-critical applications.
176
203
 
177
204
  ## Compatibility (v1 → v2)
178
205
 
179
206
  - V2’s Rust core sustains **150–210 MB/s** throughput; V1 averaged **≈ 2.5 MB/s** in its Python/BeautifulSoup implementation (60–80× faster).
180
- - The Python package offers a compatibility shim in `html_to_markdown.v1_compat` (`convert_to_markdown`, `convert_to_markdown_stream`, `markdownify`). Details and keyword mappings live in [Python README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/python/README.md#v1-compatibility).
207
+ - The Python package offers a compatibility shim in `html_to_markdown.v1_compat` (`convert_to_markdown`, `convert_to_markdown_stream`, `markdownify`). The shim is deprecated, emits `DeprecationWarning` on every call, and will be removed in v3.0—plan migrations now. Details and keyword mappings live in [Python README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/python/README.md#v1-compatibility).
181
208
  - CLI flag changes, option renames, and other breaking updates are summarised in [CHANGELOG](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md#breaking-changes).
182
209
 
183
210
  ## Community
@@ -1,5 +1,6 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
+ export function convertBytes(html: Uint8Array, options: any): string;
3
4
  /**
4
5
  * Convert HTML to Markdown
5
6
  *
@@ -19,34 +20,20 @@
19
20
  * ```
20
21
  */
21
22
  export function convert(html: string, options: any): string;
22
- /**
23
- * Convert HTML to Markdown while collecting inline images
24
- *
25
- * # Arguments
26
- *
27
- * * `html` - The HTML string to convert
28
- * * `options` - Optional conversion options (as a JavaScript object)
29
- * * `image_config` - Configuration for inline image extraction
30
- *
31
- * # Example
32
- *
33
- * ```javascript
34
- * import { convertWithInlineImages, WasmInlineImageConfig } from '@html-to-markdown/wasm';
35
- *
36
- * const html = '<img src="data:image/png;base64,..." alt="test">';
37
- * const config = new WasmInlineImageConfig(1024 * 1024);
38
- * config.inferDimensions = true;
39
- *
40
- * const result = convertWithInlineImages(html, null, config);
41
- * console.log(result.markdown);
42
- * console.log(result.inlineImages.length);
43
- * ```
44
- */
23
+ export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
24
+ export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
45
25
  export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
26
+ export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
46
27
  /**
47
28
  * Initialize panic hook for better error messages in the browser
48
29
  */
49
30
  export function init(): void;
31
+ export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
32
+ export class WasmConversionOptionsHandle {
33
+ free(): void;
34
+ [Symbol.dispose](): void;
35
+ constructor(options: any);
36
+ }
50
37
  /**
51
38
  * Result of HTML extraction with inline images
52
39
  */
@@ -99,12 +86,19 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
99
86
 
100
87
  export interface InitOutput {
101
88
  readonly memory: WebAssembly.Memory;
89
+ readonly __wbg_wasmconversionoptionshandle_free: (a: number, b: number) => void;
102
90
  readonly __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
103
91
  readonly __wbg_wasminlineimage_free: (a: number, b: number) => void;
104
92
  readonly __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
105
93
  readonly __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
106
94
  readonly convert: (a: number, b: number, c: number, d: number) => void;
95
+ readonly convertBytes: (a: number, b: number, c: number) => void;
96
+ readonly convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
97
+ readonly convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
107
98
  readonly convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
99
+ readonly convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
100
+ readonly createConversionOptionsHandle: (a: number, b: number) => void;
101
+ readonly wasmconversionoptionshandle_new: (a: number, b: number) => void;
108
102
  readonly wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
109
103
  readonly wasmhtmlextraction_markdown: (a: number, b: number) => void;
110
104
  readonly wasmhtmlextraction_warnings: (a: number, b: number) => void;