html-to-markdown-wasm 2.8.1 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/README.md +25 -12
- package/dist/html_to_markdown_wasm.d.ts +8 -8
- package/dist/html_to_markdown_wasm_bg.js +85 -85
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/package.json +1 -1
- package/dist-node/README.md +25 -12
- package/dist-node/html_to_markdown_wasm.d.ts +8 -8
- package/dist-node/html_to_markdown_wasm.js +85 -85
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/package.json +1 -1
- package/dist-web/README.md +25 -12
- package/dist-web/html_to_markdown_wasm.d.ts +8 -8
- package/dist-web/html_to_markdown_wasm.js +85 -85
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/package.json +1 -1
- package/package.json +1 -1
package/dist/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# html-to-markdown
|
|
2
2
|
|
|
3
|
-
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
3
|
+
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
4
4
|
|
|
5
5
|
[](https://crates.io/crates/html-to-markdown)
|
|
6
6
|
[](https://www.npmjs.com/package/html-to-markdown-node)
|
|
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
8
8
|
[](https://pypi.org/project/html-to-markdown/)
|
|
9
9
|
[](https://packagist.org/packages/goldziher/html-to-markdown)
|
|
10
10
|
[](https://rubygems.org/gems/html-to-markdown)
|
|
11
|
+
[](https://hex.pm/packages/html_to_markdown)
|
|
11
12
|
[](https://www.nuget.org/packages/HtmlToMarkdown/)
|
|
12
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
13
14
|
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
43
44
|
- PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
|
|
44
45
|
- PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
|
|
45
46
|
- **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
|
|
47
|
+
- **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
|
|
46
48
|
- **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
|
|
47
49
|
- **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
|
|
48
50
|
- **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
|
|
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
57
59
|
| **Python** (bindings + CLI) | `pip install html-to-markdown` |
|
|
58
60
|
| **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
|
|
59
61
|
| **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
|
|
62
|
+
| **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
|
|
60
63
|
| **Rust** crate | `cargo add html-to-markdown-rs` |
|
|
61
64
|
| Rust CLI | `cargo install html-to-markdown-cli` |
|
|
62
65
|
| Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
|
|
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
|
|
|
126
129
|
)
|
|
127
130
|
```
|
|
128
131
|
|
|
132
|
+
### Elixir
|
|
133
|
+
|
|
134
|
+
```elixir
|
|
135
|
+
{:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
|
|
136
|
+
|
|
137
|
+
# Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
|
|
138
|
+
HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
|
|
139
|
+
```
|
|
140
|
+
|
|
129
141
|
### Rust
|
|
130
142
|
|
|
131
143
|
```rust
|
|
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
178
190
|
- **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
|
|
179
191
|
- **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
|
|
180
192
|
- **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
|
|
193
|
+
- **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
|
|
181
194
|
- **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
|
|
182
195
|
- **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
|
|
183
196
|
|
|
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
185
198
|
|
|
186
199
|
Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
|
|
187
200
|
|
|
188
|
-
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
|
|
189
|
-
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
|
|
190
|
-
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
|
|
191
|
-
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
|
|
192
|
-
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
|
|
193
|
-
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
|
|
194
|
-
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
|
|
195
|
-
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
|
|
196
|
-
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
|
|
197
|
-
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
|
|
201
|
+
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
|
|
202
|
+
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
|
|
203
|
+
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
|
|
204
|
+
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
|
|
205
|
+
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
|
|
206
|
+
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
|
|
207
|
+
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
|
|
208
|
+
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
|
|
209
|
+
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
|
|
210
|
+
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
|
|
198
211
|
|
|
199
212
|
The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
|
|
200
213
|
|
|
201
|
-
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
214
|
+
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
202
215
|
|
|
203
216
|
Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
|
|
204
217
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export function
|
|
3
|
+
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
4
|
+
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
5
|
+
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
6
|
+
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
7
7
|
export function convertBytes(html: Uint8Array, options: any): string;
|
|
8
8
|
/**
|
|
9
9
|
* Convert HTML to Markdown
|
|
@@ -24,11 +24,11 @@ export function convertBytes(html: Uint8Array, options: any): string;
|
|
|
24
24
|
* ```
|
|
25
25
|
*/
|
|
26
26
|
export function convert(html: string, options: any): string;
|
|
27
|
-
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
28
|
-
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
29
|
-
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
30
|
-
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
31
27
|
export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
28
|
+
/**
|
|
29
|
+
* Initialize panic hook for better error messages in the browser
|
|
30
|
+
*/
|
|
31
|
+
export function init(): void;
|
|
32
32
|
export class WasmConversionOptionsHandle {
|
|
33
33
|
free(): void;
|
|
34
34
|
[Symbol.dispose](): void;
|
|
@@ -232,95 +232,59 @@ function getArrayJsValueFromWasm0(ptr, len) {
|
|
|
232
232
|
return result;
|
|
233
233
|
}
|
|
234
234
|
/**
|
|
235
|
-
* Initialize panic hook for better error messages in the browser
|
|
236
|
-
*/
|
|
237
|
-
export function init() {
|
|
238
|
-
wasm.init();
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
/**
|
|
242
|
-
* @param {Uint8Array} html
|
|
243
235
|
* @param {any} options
|
|
244
|
-
* @returns {
|
|
236
|
+
* @returns {WasmConversionOptionsHandle}
|
|
245
237
|
*/
|
|
246
|
-
export function
|
|
247
|
-
let deferred2_0;
|
|
248
|
-
let deferred2_1;
|
|
238
|
+
export function createConversionOptionsHandle(options) {
|
|
249
239
|
try {
|
|
250
240
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
251
|
-
wasm.
|
|
241
|
+
wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
|
|
252
242
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
253
243
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
254
244
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
var len1 = r1;
|
|
258
|
-
if (r3) {
|
|
259
|
-
ptr1 = 0; len1 = 0;
|
|
260
|
-
throw takeObject(r2);
|
|
245
|
+
if (r2) {
|
|
246
|
+
throw takeObject(r1);
|
|
261
247
|
}
|
|
262
|
-
|
|
263
|
-
deferred2_1 = len1;
|
|
264
|
-
return getStringFromWasm0(ptr1, len1);
|
|
248
|
+
return WasmConversionOptionsHandle.__wrap(r0);
|
|
265
249
|
} finally {
|
|
266
250
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
267
|
-
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
268
251
|
}
|
|
269
252
|
}
|
|
270
253
|
|
|
254
|
+
function _assertClass(instance, klass) {
|
|
255
|
+
if (!(instance instanceof klass)) {
|
|
256
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
271
259
|
/**
|
|
272
|
-
* Convert HTML to Markdown
|
|
273
|
-
*
|
|
274
|
-
* # Arguments
|
|
275
|
-
*
|
|
276
|
-
* * `html` - The HTML string to convert
|
|
277
|
-
* * `options` - Optional conversion options (as a JavaScript object)
|
|
278
|
-
*
|
|
279
|
-
* # Example
|
|
280
|
-
*
|
|
281
|
-
* ```javascript
|
|
282
|
-
* import { convert } from 'html-to-markdown-wasm';
|
|
283
|
-
*
|
|
284
|
-
* const html = '<h1>Hello World</h1>';
|
|
285
|
-
* const markdown = convert(html);
|
|
286
|
-
* console.log(markdown); // # Hello World
|
|
287
|
-
* ```
|
|
288
260
|
* @param {string} html
|
|
289
261
|
* @param {any} options
|
|
290
|
-
* @
|
|
262
|
+
* @param {WasmInlineImageConfig | null} [image_config]
|
|
263
|
+
* @returns {WasmHtmlExtraction}
|
|
291
264
|
*/
|
|
292
|
-
export function
|
|
293
|
-
let deferred3_0;
|
|
294
|
-
let deferred3_1;
|
|
265
|
+
export function convertWithInlineImages(html, options, image_config) {
|
|
295
266
|
try {
|
|
296
267
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
297
268
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
298
269
|
const len0 = WASM_VECTOR_LEN;
|
|
299
|
-
|
|
270
|
+
let ptr1 = 0;
|
|
271
|
+
if (!isLikeNone(image_config)) {
|
|
272
|
+
_assertClass(image_config, WasmInlineImageConfig);
|
|
273
|
+
ptr1 = image_config.__destroy_into_raw();
|
|
274
|
+
}
|
|
275
|
+
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
300
276
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
301
277
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
302
278
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
var len2 = r1;
|
|
306
|
-
if (r3) {
|
|
307
|
-
ptr2 = 0; len2 = 0;
|
|
308
|
-
throw takeObject(r2);
|
|
279
|
+
if (r2) {
|
|
280
|
+
throw takeObject(r1);
|
|
309
281
|
}
|
|
310
|
-
|
|
311
|
-
deferred3_1 = len2;
|
|
312
|
-
return getStringFromWasm0(ptr2, len2);
|
|
282
|
+
return WasmHtmlExtraction.__wrap(r0);
|
|
313
283
|
} finally {
|
|
314
284
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
315
|
-
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
316
285
|
}
|
|
317
286
|
}
|
|
318
287
|
|
|
319
|
-
function _assertClass(instance, klass) {
|
|
320
|
-
if (!(instance instanceof klass)) {
|
|
321
|
-
throw new Error(`expected instance of ${klass.name}`);
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
288
|
/**
|
|
325
289
|
* @param {Uint8Array} html
|
|
326
290
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -353,68 +317,97 @@ export function convertBytesWithOptionsHandle(html, handle) {
|
|
|
353
317
|
}
|
|
354
318
|
|
|
355
319
|
/**
|
|
356
|
-
* @param {
|
|
357
|
-
* @
|
|
320
|
+
* @param {string} html
|
|
321
|
+
* @param {WasmConversionOptionsHandle} handle
|
|
322
|
+
* @returns {string}
|
|
358
323
|
*/
|
|
359
|
-
export function
|
|
324
|
+
export function convertWithOptionsHandle(html, handle) {
|
|
325
|
+
let deferred3_0;
|
|
326
|
+
let deferred3_1;
|
|
360
327
|
try {
|
|
361
328
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
362
|
-
wasm.
|
|
329
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
330
|
+
const len0 = WASM_VECTOR_LEN;
|
|
331
|
+
_assertClass(handle, WasmConversionOptionsHandle);
|
|
332
|
+
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
363
333
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
364
334
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
365
335
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
366
|
-
|
|
367
|
-
|
|
336
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
337
|
+
var ptr2 = r0;
|
|
338
|
+
var len2 = r1;
|
|
339
|
+
if (r3) {
|
|
340
|
+
ptr2 = 0; len2 = 0;
|
|
341
|
+
throw takeObject(r2);
|
|
368
342
|
}
|
|
369
|
-
|
|
343
|
+
deferred3_0 = ptr2;
|
|
344
|
+
deferred3_1 = len2;
|
|
345
|
+
return getStringFromWasm0(ptr2, len2);
|
|
370
346
|
} finally {
|
|
371
347
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
348
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
372
349
|
}
|
|
373
350
|
}
|
|
374
351
|
|
|
375
352
|
/**
|
|
376
|
-
* @param {
|
|
353
|
+
* @param {Uint8Array} html
|
|
377
354
|
* @param {any} options
|
|
378
|
-
* @
|
|
379
|
-
* @returns {WasmHtmlExtraction}
|
|
355
|
+
* @returns {string}
|
|
380
356
|
*/
|
|
381
|
-
export function
|
|
357
|
+
export function convertBytes(html, options) {
|
|
358
|
+
let deferred2_0;
|
|
359
|
+
let deferred2_1;
|
|
382
360
|
try {
|
|
383
361
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
384
|
-
|
|
385
|
-
const len0 = WASM_VECTOR_LEN;
|
|
386
|
-
let ptr1 = 0;
|
|
387
|
-
if (!isLikeNone(image_config)) {
|
|
388
|
-
_assertClass(image_config, WasmInlineImageConfig);
|
|
389
|
-
ptr1 = image_config.__destroy_into_raw();
|
|
390
|
-
}
|
|
391
|
-
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
362
|
+
wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
|
|
392
363
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
393
364
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
394
365
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
395
|
-
|
|
396
|
-
|
|
366
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
367
|
+
var ptr1 = r0;
|
|
368
|
+
var len1 = r1;
|
|
369
|
+
if (r3) {
|
|
370
|
+
ptr1 = 0; len1 = 0;
|
|
371
|
+
throw takeObject(r2);
|
|
397
372
|
}
|
|
398
|
-
|
|
373
|
+
deferred2_0 = ptr1;
|
|
374
|
+
deferred2_1 = len1;
|
|
375
|
+
return getStringFromWasm0(ptr1, len1);
|
|
399
376
|
} finally {
|
|
400
377
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
378
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
401
379
|
}
|
|
402
380
|
}
|
|
403
381
|
|
|
404
382
|
/**
|
|
383
|
+
* Convert HTML to Markdown
|
|
384
|
+
*
|
|
385
|
+
* # Arguments
|
|
386
|
+
*
|
|
387
|
+
* * `html` - The HTML string to convert
|
|
388
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
389
|
+
*
|
|
390
|
+
* # Example
|
|
391
|
+
*
|
|
392
|
+
* ```javascript
|
|
393
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
394
|
+
*
|
|
395
|
+
* const html = '<h1>Hello World</h1>';
|
|
396
|
+
* const markdown = convert(html);
|
|
397
|
+
* console.log(markdown); // # Hello World
|
|
398
|
+
* ```
|
|
405
399
|
* @param {string} html
|
|
406
|
-
* @param {
|
|
400
|
+
* @param {any} options
|
|
407
401
|
* @returns {string}
|
|
408
402
|
*/
|
|
409
|
-
export function
|
|
403
|
+
export function convert(html, options) {
|
|
410
404
|
let deferred3_0;
|
|
411
405
|
let deferred3_1;
|
|
412
406
|
try {
|
|
413
407
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
414
408
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
415
409
|
const len0 = WASM_VECTOR_LEN;
|
|
416
|
-
|
|
417
|
-
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
410
|
+
wasm.convert(retptr, ptr0, len0, addHeapObject(options));
|
|
418
411
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
419
412
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
420
413
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
@@ -461,6 +454,13 @@ export function convertBytesWithInlineImages(html, options, image_config) {
|
|
|
461
454
|
}
|
|
462
455
|
}
|
|
463
456
|
|
|
457
|
+
/**
|
|
458
|
+
* Initialize panic hook for better error messages in the browser
|
|
459
|
+
*/
|
|
460
|
+
export function init() {
|
|
461
|
+
wasm.init();
|
|
462
|
+
}
|
|
463
|
+
|
|
464
464
|
const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
465
465
|
? { register: () => {}, unregister: () => {} }
|
|
466
466
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
|
|
Binary file
|
package/dist/package.json
CHANGED
package/dist-node/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# html-to-markdown
|
|
2
2
|
|
|
3
|
-
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
3
|
+
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
4
4
|
|
|
5
5
|
[](https://crates.io/crates/html-to-markdown)
|
|
6
6
|
[](https://www.npmjs.com/package/html-to-markdown-node)
|
|
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
8
8
|
[](https://pypi.org/project/html-to-markdown/)
|
|
9
9
|
[](https://packagist.org/packages/goldziher/html-to-markdown)
|
|
10
10
|
[](https://rubygems.org/gems/html-to-markdown)
|
|
11
|
+
[](https://hex.pm/packages/html_to_markdown)
|
|
11
12
|
[](https://www.nuget.org/packages/HtmlToMarkdown/)
|
|
12
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
13
14
|
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
43
44
|
- PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
|
|
44
45
|
- PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
|
|
45
46
|
- **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
|
|
47
|
+
- **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
|
|
46
48
|
- **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
|
|
47
49
|
- **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
|
|
48
50
|
- **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
|
|
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
57
59
|
| **Python** (bindings + CLI) | `pip install html-to-markdown` |
|
|
58
60
|
| **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
|
|
59
61
|
| **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
|
|
62
|
+
| **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
|
|
60
63
|
| **Rust** crate | `cargo add html-to-markdown-rs` |
|
|
61
64
|
| Rust CLI | `cargo install html-to-markdown-cli` |
|
|
62
65
|
| Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
|
|
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
|
|
|
126
129
|
)
|
|
127
130
|
```
|
|
128
131
|
|
|
132
|
+
### Elixir
|
|
133
|
+
|
|
134
|
+
```elixir
|
|
135
|
+
{:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
|
|
136
|
+
|
|
137
|
+
# Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
|
|
138
|
+
HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
|
|
139
|
+
```
|
|
140
|
+
|
|
129
141
|
### Rust
|
|
130
142
|
|
|
131
143
|
```rust
|
|
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
178
190
|
- **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
|
|
179
191
|
- **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
|
|
180
192
|
- **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
|
|
193
|
+
- **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
|
|
181
194
|
- **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
|
|
182
195
|
- **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
|
|
183
196
|
|
|
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
185
198
|
|
|
186
199
|
Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
|
|
187
200
|
|
|
188
|
-
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
|
|
189
|
-
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
|
|
190
|
-
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
|
|
191
|
-
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
|
|
192
|
-
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
|
|
193
|
-
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
|
|
194
|
-
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
|
|
195
|
-
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
|
|
196
|
-
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
|
|
197
|
-
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
|
|
201
|
+
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
|
|
202
|
+
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
|
|
203
|
+
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
|
|
204
|
+
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
|
|
205
|
+
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
|
|
206
|
+
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
|
|
207
|
+
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
|
|
208
|
+
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
|
|
209
|
+
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
|
|
210
|
+
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
|
|
198
211
|
|
|
199
212
|
The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
|
|
200
213
|
|
|
201
|
-
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
214
|
+
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
202
215
|
|
|
203
216
|
Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
|
|
204
217
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export function
|
|
3
|
+
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
4
|
+
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
5
|
+
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
6
|
+
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
7
7
|
export function convertBytes(html: Uint8Array, options: any): string;
|
|
8
8
|
/**
|
|
9
9
|
* Convert HTML to Markdown
|
|
@@ -24,11 +24,11 @@ export function convertBytes(html: Uint8Array, options: any): string;
|
|
|
24
24
|
* ```
|
|
25
25
|
*/
|
|
26
26
|
export function convert(html: string, options: any): string;
|
|
27
|
-
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
28
|
-
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
29
|
-
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
30
|
-
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
31
27
|
export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
28
|
+
/**
|
|
29
|
+
* Initialize panic hook for better error messages in the browser
|
|
30
|
+
*/
|
|
31
|
+
export function init(): void;
|
|
32
32
|
export class WasmConversionOptionsHandle {
|
|
33
33
|
free(): void;
|
|
34
34
|
[Symbol.dispose](): void;
|
|
@@ -222,95 +222,59 @@ function getArrayJsValueFromWasm0(ptr, len) {
|
|
|
222
222
|
return result;
|
|
223
223
|
}
|
|
224
224
|
/**
|
|
225
|
-
* Initialize panic hook for better error messages in the browser
|
|
226
|
-
*/
|
|
227
|
-
exports.init = function() {
|
|
228
|
-
wasm.init();
|
|
229
|
-
};
|
|
230
|
-
|
|
231
|
-
/**
|
|
232
|
-
* @param {Uint8Array} html
|
|
233
225
|
* @param {any} options
|
|
234
|
-
* @returns {
|
|
226
|
+
* @returns {WasmConversionOptionsHandle}
|
|
235
227
|
*/
|
|
236
|
-
exports.
|
|
237
|
-
let deferred2_0;
|
|
238
|
-
let deferred2_1;
|
|
228
|
+
exports.createConversionOptionsHandle = function(options) {
|
|
239
229
|
try {
|
|
240
230
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
241
|
-
wasm.
|
|
231
|
+
wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
|
|
242
232
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
243
233
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
244
234
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
var len1 = r1;
|
|
248
|
-
if (r3) {
|
|
249
|
-
ptr1 = 0; len1 = 0;
|
|
250
|
-
throw takeObject(r2);
|
|
235
|
+
if (r2) {
|
|
236
|
+
throw takeObject(r1);
|
|
251
237
|
}
|
|
252
|
-
|
|
253
|
-
deferred2_1 = len1;
|
|
254
|
-
return getStringFromWasm0(ptr1, len1);
|
|
238
|
+
return WasmConversionOptionsHandle.__wrap(r0);
|
|
255
239
|
} finally {
|
|
256
240
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
257
|
-
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
258
241
|
}
|
|
259
242
|
};
|
|
260
243
|
|
|
244
|
+
function _assertClass(instance, klass) {
|
|
245
|
+
if (!(instance instanceof klass)) {
|
|
246
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
261
249
|
/**
|
|
262
|
-
* Convert HTML to Markdown
|
|
263
|
-
*
|
|
264
|
-
* # Arguments
|
|
265
|
-
*
|
|
266
|
-
* * `html` - The HTML string to convert
|
|
267
|
-
* * `options` - Optional conversion options (as a JavaScript object)
|
|
268
|
-
*
|
|
269
|
-
* # Example
|
|
270
|
-
*
|
|
271
|
-
* ```javascript
|
|
272
|
-
* import { convert } from 'html-to-markdown-wasm';
|
|
273
|
-
*
|
|
274
|
-
* const html = '<h1>Hello World</h1>';
|
|
275
|
-
* const markdown = convert(html);
|
|
276
|
-
* console.log(markdown); // # Hello World
|
|
277
|
-
* ```
|
|
278
250
|
* @param {string} html
|
|
279
251
|
* @param {any} options
|
|
280
|
-
* @
|
|
252
|
+
* @param {WasmInlineImageConfig | null} [image_config]
|
|
253
|
+
* @returns {WasmHtmlExtraction}
|
|
281
254
|
*/
|
|
282
|
-
exports.
|
|
283
|
-
let deferred3_0;
|
|
284
|
-
let deferred3_1;
|
|
255
|
+
exports.convertWithInlineImages = function(html, options, image_config) {
|
|
285
256
|
try {
|
|
286
257
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
287
258
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
288
259
|
const len0 = WASM_VECTOR_LEN;
|
|
289
|
-
|
|
260
|
+
let ptr1 = 0;
|
|
261
|
+
if (!isLikeNone(image_config)) {
|
|
262
|
+
_assertClass(image_config, WasmInlineImageConfig);
|
|
263
|
+
ptr1 = image_config.__destroy_into_raw();
|
|
264
|
+
}
|
|
265
|
+
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
290
266
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
291
267
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
292
268
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
var len2 = r1;
|
|
296
|
-
if (r3) {
|
|
297
|
-
ptr2 = 0; len2 = 0;
|
|
298
|
-
throw takeObject(r2);
|
|
269
|
+
if (r2) {
|
|
270
|
+
throw takeObject(r1);
|
|
299
271
|
}
|
|
300
|
-
|
|
301
|
-
deferred3_1 = len2;
|
|
302
|
-
return getStringFromWasm0(ptr2, len2);
|
|
272
|
+
return WasmHtmlExtraction.__wrap(r0);
|
|
303
273
|
} finally {
|
|
304
274
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
305
|
-
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
306
275
|
}
|
|
307
276
|
};
|
|
308
277
|
|
|
309
|
-
function _assertClass(instance, klass) {
|
|
310
|
-
if (!(instance instanceof klass)) {
|
|
311
|
-
throw new Error(`expected instance of ${klass.name}`);
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
278
|
/**
|
|
315
279
|
* @param {Uint8Array} html
|
|
316
280
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -343,68 +307,97 @@ exports.convertBytesWithOptionsHandle = function(html, handle) {
|
|
|
343
307
|
};
|
|
344
308
|
|
|
345
309
|
/**
|
|
346
|
-
* @param {
|
|
347
|
-
* @
|
|
310
|
+
* @param {string} html
|
|
311
|
+
* @param {WasmConversionOptionsHandle} handle
|
|
312
|
+
* @returns {string}
|
|
348
313
|
*/
|
|
349
|
-
exports.
|
|
314
|
+
exports.convertWithOptionsHandle = function(html, handle) {
|
|
315
|
+
let deferred3_0;
|
|
316
|
+
let deferred3_1;
|
|
350
317
|
try {
|
|
351
318
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
352
|
-
wasm.
|
|
319
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
320
|
+
const len0 = WASM_VECTOR_LEN;
|
|
321
|
+
_assertClass(handle, WasmConversionOptionsHandle);
|
|
322
|
+
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
353
323
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
354
324
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
355
325
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
356
|
-
|
|
357
|
-
|
|
326
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
327
|
+
var ptr2 = r0;
|
|
328
|
+
var len2 = r1;
|
|
329
|
+
if (r3) {
|
|
330
|
+
ptr2 = 0; len2 = 0;
|
|
331
|
+
throw takeObject(r2);
|
|
358
332
|
}
|
|
359
|
-
|
|
333
|
+
deferred3_0 = ptr2;
|
|
334
|
+
deferred3_1 = len2;
|
|
335
|
+
return getStringFromWasm0(ptr2, len2);
|
|
360
336
|
} finally {
|
|
361
337
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
338
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
362
339
|
}
|
|
363
340
|
};
|
|
364
341
|
|
|
365
342
|
/**
|
|
366
|
-
* @param {
|
|
343
|
+
* @param {Uint8Array} html
|
|
367
344
|
* @param {any} options
|
|
368
|
-
* @
|
|
369
|
-
* @returns {WasmHtmlExtraction}
|
|
345
|
+
* @returns {string}
|
|
370
346
|
*/
|
|
371
|
-
exports.
|
|
347
|
+
exports.convertBytes = function(html, options) {
|
|
348
|
+
let deferred2_0;
|
|
349
|
+
let deferred2_1;
|
|
372
350
|
try {
|
|
373
351
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
374
|
-
|
|
375
|
-
const len0 = WASM_VECTOR_LEN;
|
|
376
|
-
let ptr1 = 0;
|
|
377
|
-
if (!isLikeNone(image_config)) {
|
|
378
|
-
_assertClass(image_config, WasmInlineImageConfig);
|
|
379
|
-
ptr1 = image_config.__destroy_into_raw();
|
|
380
|
-
}
|
|
381
|
-
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
352
|
+
wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
|
|
382
353
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
383
354
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
384
355
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
385
|
-
|
|
386
|
-
|
|
356
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
357
|
+
var ptr1 = r0;
|
|
358
|
+
var len1 = r1;
|
|
359
|
+
if (r3) {
|
|
360
|
+
ptr1 = 0; len1 = 0;
|
|
361
|
+
throw takeObject(r2);
|
|
387
362
|
}
|
|
388
|
-
|
|
363
|
+
deferred2_0 = ptr1;
|
|
364
|
+
deferred2_1 = len1;
|
|
365
|
+
return getStringFromWasm0(ptr1, len1);
|
|
389
366
|
} finally {
|
|
390
367
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
368
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
391
369
|
}
|
|
392
370
|
};
|
|
393
371
|
|
|
394
372
|
/**
|
|
373
|
+
* Convert HTML to Markdown
|
|
374
|
+
*
|
|
375
|
+
* # Arguments
|
|
376
|
+
*
|
|
377
|
+
* * `html` - The HTML string to convert
|
|
378
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
379
|
+
*
|
|
380
|
+
* # Example
|
|
381
|
+
*
|
|
382
|
+
* ```javascript
|
|
383
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
384
|
+
*
|
|
385
|
+
* const html = '<h1>Hello World</h1>';
|
|
386
|
+
* const markdown = convert(html);
|
|
387
|
+
* console.log(markdown); // # Hello World
|
|
388
|
+
* ```
|
|
395
389
|
* @param {string} html
|
|
396
|
-
* @param {
|
|
390
|
+
* @param {any} options
|
|
397
391
|
* @returns {string}
|
|
398
392
|
*/
|
|
399
|
-
exports.
|
|
393
|
+
exports.convert = function(html, options) {
|
|
400
394
|
let deferred3_0;
|
|
401
395
|
let deferred3_1;
|
|
402
396
|
try {
|
|
403
397
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
404
398
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
405
399
|
const len0 = WASM_VECTOR_LEN;
|
|
406
|
-
|
|
407
|
-
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
400
|
+
wasm.convert(retptr, ptr0, len0, addHeapObject(options));
|
|
408
401
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
409
402
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
410
403
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
@@ -451,6 +444,13 @@ exports.convertBytesWithInlineImages = function(html, options, image_config) {
|
|
|
451
444
|
}
|
|
452
445
|
};
|
|
453
446
|
|
|
447
|
+
/**
|
|
448
|
+
* Initialize panic hook for better error messages in the browser
|
|
449
|
+
*/
|
|
450
|
+
exports.init = function() {
|
|
451
|
+
wasm.init();
|
|
452
|
+
};
|
|
453
|
+
|
|
454
454
|
const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
455
455
|
? { register: () => {}, unregister: () => {} }
|
|
456
456
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
|
|
Binary file
|
package/dist-node/package.json
CHANGED
package/dist-web/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# html-to-markdown
|
|
2
2
|
|
|
3
|
-
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
3
|
+
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
|
|
4
4
|
|
|
5
5
|
[](https://crates.io/crates/html-to-markdown)
|
|
6
6
|
[](https://www.npmjs.com/package/html-to-markdown-node)
|
|
@@ -8,6 +8,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
8
8
|
[](https://pypi.org/project/html-to-markdown/)
|
|
9
9
|
[](https://packagist.org/packages/goldziher/html-to-markdown)
|
|
10
10
|
[](https://rubygems.org/gems/html-to-markdown)
|
|
11
|
+
[](https://hex.pm/packages/html_to_markdown)
|
|
11
12
|
[](https://www.nuget.org/packages/HtmlToMarkdown/)
|
|
12
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
13
14
|
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
@@ -43,6 +44,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
43
44
|
- PHP wrapper package – [PHP README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php/README.md)
|
|
44
45
|
- PHP extension (PIE) – [Extension README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/php-ext/README.md)
|
|
45
46
|
- **Ruby guide** – [Ruby README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md)
|
|
47
|
+
- **Elixir guide** – [Elixir README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/elixir/README.md)
|
|
46
48
|
- **Rust guide** – [Rust README](https://github.com/Goldziher/html-to-markdown/blob/main/crates/html-to-markdown/README.md)
|
|
47
49
|
- **Contributing** – [CONTRIBUTING.md](https://github.com/Goldziher/html-to-markdown/blob/main/CONTRIBUTING.md) ⭐ Start here!
|
|
48
50
|
- **Changelog** – [CHANGELOG.md](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md)
|
|
@@ -57,6 +59,7 @@ Experience WebAssembly-powered HTML to Markdown conversion instantly in your bro
|
|
|
57
59
|
| **Python** (bindings + CLI) | `pip install html-to-markdown` |
|
|
58
60
|
| **PHP** (extension + helpers) | `pie install goldziher/html-to-markdown`<br>`composer require html-to-markdown/extension` |
|
|
59
61
|
| **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
|
|
62
|
+
| **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
|
|
60
63
|
| **Rust** crate | `cargo add html-to-markdown-rs` |
|
|
61
64
|
| Rust CLI | `cargo install html-to-markdown-cli` |
|
|
62
65
|
| Homebrew CLI | `brew tap goldziher/tap`<br>`brew install html-to-markdown` |
|
|
@@ -126,6 +129,15 @@ markdown, inline_images, warnings = convert_with_inline_images(
|
|
|
126
129
|
)
|
|
127
130
|
```
|
|
128
131
|
|
|
132
|
+
### Elixir
|
|
133
|
+
|
|
134
|
+
```elixir
|
|
135
|
+
{:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1>")
|
|
136
|
+
|
|
137
|
+
# Keyword options are supported (internally mapped to the Rust ConversionOptions struct)
|
|
138
|
+
HtmlToMarkdown.convert!("<p>Wrap me</p>", wrap: true, wrap_width: 32, preprocessing: %{enabled: true})
|
|
139
|
+
```
|
|
140
|
+
|
|
129
141
|
### Rust
|
|
130
142
|
|
|
131
143
|
```rust
|
|
@@ -178,6 +190,7 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
178
190
|
- **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
|
|
179
191
|
- **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
|
|
180
192
|
- **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
|
|
193
|
+
- **Elixir matches the Rust core** because the Rustler NIF executes the same `ConversionOptions` pipeline—benchmarks land between 170–1,460 ops/sec on the Wikipedia fixtures and >20 k ops/sec on micro HOCR payloads.
|
|
181
194
|
- **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
|
|
182
195
|
- **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
|
|
183
196
|
|
|
@@ -185,20 +198,20 @@ Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, share
|
|
|
185
198
|
|
|
186
199
|
Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
|
|
187
200
|
|
|
188
|
-
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
|
|
189
|
-
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
|
|
190
|
-
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
|
|
191
|
-
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
|
|
192
|
-
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
|
|
193
|
-
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
|
|
194
|
-
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
|
|
195
|
-
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
|
|
196
|
-
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
|
|
197
|
-
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
|
|
201
|
+
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Elixir ops/sec | Rust ops/sec |
|
|
202
|
+
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | -------------- | ------------ |
|
|
203
|
+
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | 1,463 | **1,700** |
|
|
204
|
+
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | 357 | **416** |
|
|
205
|
+
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | 171 | **190** |
|
|
206
|
+
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | 174 | **220** |
|
|
207
|
+
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | 247 | **258** |
|
|
208
|
+
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 3,113 | 2,760 |
|
|
209
|
+
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | 20,424 | **31,345** |
|
|
210
|
+
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,366 | 3,080 |
|
|
198
211
|
|
|
199
212
|
The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
|
|
200
213
|
|
|
201
|
-
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
214
|
+
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures (for example, `task bench:bindings -- --language elixir`), pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
202
215
|
|
|
203
216
|
Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
|
|
204
217
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export function
|
|
3
|
+
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
4
|
+
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
5
|
+
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
6
|
+
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
7
7
|
export function convertBytes(html: Uint8Array, options: any): string;
|
|
8
8
|
/**
|
|
9
9
|
* Convert HTML to Markdown
|
|
@@ -24,11 +24,11 @@ export function convertBytes(html: Uint8Array, options: any): string;
|
|
|
24
24
|
* ```
|
|
25
25
|
*/
|
|
26
26
|
export function convert(html: string, options: any): string;
|
|
27
|
-
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
28
|
-
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
29
|
-
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
30
|
-
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
31
27
|
export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
28
|
+
/**
|
|
29
|
+
* Initialize panic hook for better error messages in the browser
|
|
30
|
+
*/
|
|
31
|
+
export function init(): void;
|
|
32
32
|
export class WasmConversionOptionsHandle {
|
|
33
33
|
free(): void;
|
|
34
34
|
[Symbol.dispose](): void;
|
|
@@ -228,95 +228,59 @@ function getArrayJsValueFromWasm0(ptr, len) {
|
|
|
228
228
|
return result;
|
|
229
229
|
}
|
|
230
230
|
/**
|
|
231
|
-
* Initialize panic hook for better error messages in the browser
|
|
232
|
-
*/
|
|
233
|
-
export function init() {
|
|
234
|
-
wasm.init();
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
/**
|
|
238
|
-
* @param {Uint8Array} html
|
|
239
231
|
* @param {any} options
|
|
240
|
-
* @returns {
|
|
232
|
+
* @returns {WasmConversionOptionsHandle}
|
|
241
233
|
*/
|
|
242
|
-
export function
|
|
243
|
-
let deferred2_0;
|
|
244
|
-
let deferred2_1;
|
|
234
|
+
export function createConversionOptionsHandle(options) {
|
|
245
235
|
try {
|
|
246
236
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
247
|
-
wasm.
|
|
237
|
+
wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
|
|
248
238
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
249
239
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
250
240
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
var len1 = r1;
|
|
254
|
-
if (r3) {
|
|
255
|
-
ptr1 = 0; len1 = 0;
|
|
256
|
-
throw takeObject(r2);
|
|
241
|
+
if (r2) {
|
|
242
|
+
throw takeObject(r1);
|
|
257
243
|
}
|
|
258
|
-
|
|
259
|
-
deferred2_1 = len1;
|
|
260
|
-
return getStringFromWasm0(ptr1, len1);
|
|
244
|
+
return WasmConversionOptionsHandle.__wrap(r0);
|
|
261
245
|
} finally {
|
|
262
246
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
263
|
-
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
264
247
|
}
|
|
265
248
|
}
|
|
266
249
|
|
|
250
|
+
function _assertClass(instance, klass) {
|
|
251
|
+
if (!(instance instanceof klass)) {
|
|
252
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
267
255
|
/**
|
|
268
|
-
* Convert HTML to Markdown
|
|
269
|
-
*
|
|
270
|
-
* # Arguments
|
|
271
|
-
*
|
|
272
|
-
* * `html` - The HTML string to convert
|
|
273
|
-
* * `options` - Optional conversion options (as a JavaScript object)
|
|
274
|
-
*
|
|
275
|
-
* # Example
|
|
276
|
-
*
|
|
277
|
-
* ```javascript
|
|
278
|
-
* import { convert } from 'html-to-markdown-wasm';
|
|
279
|
-
*
|
|
280
|
-
* const html = '<h1>Hello World</h1>';
|
|
281
|
-
* const markdown = convert(html);
|
|
282
|
-
* console.log(markdown); // # Hello World
|
|
283
|
-
* ```
|
|
284
256
|
* @param {string} html
|
|
285
257
|
* @param {any} options
|
|
286
|
-
* @
|
|
258
|
+
* @param {WasmInlineImageConfig | null} [image_config]
|
|
259
|
+
* @returns {WasmHtmlExtraction}
|
|
287
260
|
*/
|
|
288
|
-
export function
|
|
289
|
-
let deferred3_0;
|
|
290
|
-
let deferred3_1;
|
|
261
|
+
export function convertWithInlineImages(html, options, image_config) {
|
|
291
262
|
try {
|
|
292
263
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
293
264
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
294
265
|
const len0 = WASM_VECTOR_LEN;
|
|
295
|
-
|
|
266
|
+
let ptr1 = 0;
|
|
267
|
+
if (!isLikeNone(image_config)) {
|
|
268
|
+
_assertClass(image_config, WasmInlineImageConfig);
|
|
269
|
+
ptr1 = image_config.__destroy_into_raw();
|
|
270
|
+
}
|
|
271
|
+
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
296
272
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
297
273
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
298
274
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
var len2 = r1;
|
|
302
|
-
if (r3) {
|
|
303
|
-
ptr2 = 0; len2 = 0;
|
|
304
|
-
throw takeObject(r2);
|
|
275
|
+
if (r2) {
|
|
276
|
+
throw takeObject(r1);
|
|
305
277
|
}
|
|
306
|
-
|
|
307
|
-
deferred3_1 = len2;
|
|
308
|
-
return getStringFromWasm0(ptr2, len2);
|
|
278
|
+
return WasmHtmlExtraction.__wrap(r0);
|
|
309
279
|
} finally {
|
|
310
280
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
311
|
-
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
312
281
|
}
|
|
313
282
|
}
|
|
314
283
|
|
|
315
|
-
function _assertClass(instance, klass) {
|
|
316
|
-
if (!(instance instanceof klass)) {
|
|
317
|
-
throw new Error(`expected instance of ${klass.name}`);
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
284
|
/**
|
|
321
285
|
* @param {Uint8Array} html
|
|
322
286
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -349,68 +313,97 @@ export function convertBytesWithOptionsHandle(html, handle) {
|
|
|
349
313
|
}
|
|
350
314
|
|
|
351
315
|
/**
|
|
352
|
-
* @param {
|
|
353
|
-
* @
|
|
316
|
+
* @param {string} html
|
|
317
|
+
* @param {WasmConversionOptionsHandle} handle
|
|
318
|
+
* @returns {string}
|
|
354
319
|
*/
|
|
355
|
-
export function
|
|
320
|
+
export function convertWithOptionsHandle(html, handle) {
|
|
321
|
+
let deferred3_0;
|
|
322
|
+
let deferred3_1;
|
|
356
323
|
try {
|
|
357
324
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
358
|
-
wasm.
|
|
325
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
326
|
+
const len0 = WASM_VECTOR_LEN;
|
|
327
|
+
_assertClass(handle, WasmConversionOptionsHandle);
|
|
328
|
+
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
359
329
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
360
330
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
361
331
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
362
|
-
|
|
363
|
-
|
|
332
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
333
|
+
var ptr2 = r0;
|
|
334
|
+
var len2 = r1;
|
|
335
|
+
if (r3) {
|
|
336
|
+
ptr2 = 0; len2 = 0;
|
|
337
|
+
throw takeObject(r2);
|
|
364
338
|
}
|
|
365
|
-
|
|
339
|
+
deferred3_0 = ptr2;
|
|
340
|
+
deferred3_1 = len2;
|
|
341
|
+
return getStringFromWasm0(ptr2, len2);
|
|
366
342
|
} finally {
|
|
367
343
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
344
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
368
345
|
}
|
|
369
346
|
}
|
|
370
347
|
|
|
371
348
|
/**
|
|
372
|
-
* @param {
|
|
349
|
+
* @param {Uint8Array} html
|
|
373
350
|
* @param {any} options
|
|
374
|
-
* @
|
|
375
|
-
* @returns {WasmHtmlExtraction}
|
|
351
|
+
* @returns {string}
|
|
376
352
|
*/
|
|
377
|
-
export function
|
|
353
|
+
export function convertBytes(html, options) {
|
|
354
|
+
let deferred2_0;
|
|
355
|
+
let deferred2_1;
|
|
378
356
|
try {
|
|
379
357
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
380
|
-
|
|
381
|
-
const len0 = WASM_VECTOR_LEN;
|
|
382
|
-
let ptr1 = 0;
|
|
383
|
-
if (!isLikeNone(image_config)) {
|
|
384
|
-
_assertClass(image_config, WasmInlineImageConfig);
|
|
385
|
-
ptr1 = image_config.__destroy_into_raw();
|
|
386
|
-
}
|
|
387
|
-
wasm.convertWithInlineImages(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
358
|
+
wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
|
|
388
359
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
389
360
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
390
361
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
391
|
-
|
|
392
|
-
|
|
362
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
363
|
+
var ptr1 = r0;
|
|
364
|
+
var len1 = r1;
|
|
365
|
+
if (r3) {
|
|
366
|
+
ptr1 = 0; len1 = 0;
|
|
367
|
+
throw takeObject(r2);
|
|
393
368
|
}
|
|
394
|
-
|
|
369
|
+
deferred2_0 = ptr1;
|
|
370
|
+
deferred2_1 = len1;
|
|
371
|
+
return getStringFromWasm0(ptr1, len1);
|
|
395
372
|
} finally {
|
|
396
373
|
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
374
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
397
375
|
}
|
|
398
376
|
}
|
|
399
377
|
|
|
400
378
|
/**
|
|
379
|
+
* Convert HTML to Markdown
|
|
380
|
+
*
|
|
381
|
+
* # Arguments
|
|
382
|
+
*
|
|
383
|
+
* * `html` - The HTML string to convert
|
|
384
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
385
|
+
*
|
|
386
|
+
* # Example
|
|
387
|
+
*
|
|
388
|
+
* ```javascript
|
|
389
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
390
|
+
*
|
|
391
|
+
* const html = '<h1>Hello World</h1>';
|
|
392
|
+
* const markdown = convert(html);
|
|
393
|
+
* console.log(markdown); // # Hello World
|
|
394
|
+
* ```
|
|
401
395
|
* @param {string} html
|
|
402
|
-
* @param {
|
|
396
|
+
* @param {any} options
|
|
403
397
|
* @returns {string}
|
|
404
398
|
*/
|
|
405
|
-
export function
|
|
399
|
+
export function convert(html, options) {
|
|
406
400
|
let deferred3_0;
|
|
407
401
|
let deferred3_1;
|
|
408
402
|
try {
|
|
409
403
|
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
410
404
|
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
411
405
|
const len0 = WASM_VECTOR_LEN;
|
|
412
|
-
|
|
413
|
-
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
406
|
+
wasm.convert(retptr, ptr0, len0, addHeapObject(options));
|
|
414
407
|
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
415
408
|
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
416
409
|
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
@@ -457,6 +450,13 @@ export function convertBytesWithInlineImages(html, options, image_config) {
|
|
|
457
450
|
}
|
|
458
451
|
}
|
|
459
452
|
|
|
453
|
+
/**
|
|
454
|
+
* Initialize panic hook for better error messages in the browser
|
|
455
|
+
*/
|
|
456
|
+
export function init() {
|
|
457
|
+
wasm.init();
|
|
458
|
+
}
|
|
459
|
+
|
|
460
460
|
const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
461
461
|
? { register: () => {}, unregister: () => {} }
|
|
462
462
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
|
|
Binary file
|
package/dist-web/package.json
CHANGED
package/package.json
CHANGED