html-to-markdown-wasm 2.6.6 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -1
- package/dist/README.md +48 -21
- package/dist/html_to_markdown_wasm.d.ts +12 -25
- package/dist/html_to_markdown_wasm_bg.js +215 -30
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/html_to_markdown_wasm_bg.wasm.d.ts +7 -0
- package/dist/package.json +1 -1
- package/dist-node/README.md +48 -21
- package/dist-node/html_to_markdown_wasm.d.ts +12 -25
- package/dist-node/html_to_markdown_wasm.js +219 -32
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm.d.ts +7 -0
- package/dist-node/package.json +1 -1
- package/dist-web/README.md +48 -21
- package/dist-web/html_to_markdown_wasm.d.ts +19 -25
- package/dist-web/html_to_markdown_wasm.js +213 -30
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm.d.ts +7 -0
- package/dist-web/package.json +1 -1
- package/package.json +1 -3
package/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# html-to-markdown-wasm
|
|
2
2
|
|
|
3
|
+
> **npm package:** `html-to-markdown-wasm` (this README).
|
|
4
|
+
> Use [`html-to-markdown-node`](https://www.npmjs.com/package/html-to-markdown-node) when you only target Node.js or Bun and want native performance.
|
|
5
|
+
|
|
3
6
|
Universal HTML to Markdown converter using WebAssembly.
|
|
4
7
|
|
|
5
8
|
Powered by the same Rust engine as the Node.js, Python, Ruby, and PHP bindings, so Markdown output stays identical regardless of runtime.
|
|
@@ -38,6 +41,23 @@ Universal WebAssembly bindings with **excellent performance** across all JavaScr
|
|
|
38
41
|
- **vs Python**: ~6.3× faster (no FFI overhead)
|
|
39
42
|
- **Best for**: Universal deployment (browsers, Deno, edge runtimes, cross-platform apps)
|
|
40
43
|
|
|
44
|
+
### Benchmark Fixtures (Apple M4)
|
|
45
|
+
|
|
46
|
+
Numbers captured via `task bench:bindings -- --language wasm` using the shared Wikipedia + hOCR suite:
|
|
47
|
+
|
|
48
|
+
| Document | Size | ops/sec (WASM) |
|
|
49
|
+
| ---------------------- | ------ | -------------- |
|
|
50
|
+
| Lists (Timeline) | 129 KB | 882 |
|
|
51
|
+
| Tables (Countries) | 360 KB | 242 |
|
|
52
|
+
| Medium (Python) | 657 KB | 121 |
|
|
53
|
+
| Large (Rust) | 567 KB | 124 |
|
|
54
|
+
| Small (Intro) | 463 KB | 163 |
|
|
55
|
+
| hOCR German PDF | 44 KB | 1,637 |
|
|
56
|
+
| hOCR Invoice | 4 KB | 7,775 |
|
|
57
|
+
| hOCR Embedded Tables | 37 KB | 1,667 |
|
|
58
|
+
|
|
59
|
+
> Expect slightly higher numbers in long-lived browser/Deno workers once the WASM module is warm.
|
|
60
|
+
|
|
41
61
|
## Installation
|
|
42
62
|
|
|
43
63
|
### npm / Yarn / pnpm
|
|
@@ -72,6 +92,42 @@ console.log(markdown);
|
|
|
72
92
|
// This is **fast**!
|
|
73
93
|
```
|
|
74
94
|
|
|
95
|
+
### Reusing Options Handles
|
|
96
|
+
|
|
97
|
+
```ts
|
|
98
|
+
import {
|
|
99
|
+
convertWithOptionsHandle,
|
|
100
|
+
createConversionOptionsHandle,
|
|
101
|
+
} from 'html-to-markdown-wasm';
|
|
102
|
+
|
|
103
|
+
const handle = createConversionOptionsHandle({ hocrSpatialTables: false });
|
|
104
|
+
const markdown = convertWithOptionsHandle('<h1>Reusable</h1>', handle);
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Byte-Based Input (Buffers / Uint8Array)
|
|
108
|
+
|
|
109
|
+
When you already have raw bytes (e.g., `fs.readFileSync`, Fetch API responses), skip re-encoding with `TextDecoder` by calling the byte-friendly helpers:
|
|
110
|
+
|
|
111
|
+
```ts
|
|
112
|
+
import {
|
|
113
|
+
convertBytes,
|
|
114
|
+
convertBytesWithOptionsHandle,
|
|
115
|
+
createConversionOptionsHandle,
|
|
116
|
+
convertBytesWithInlineImages,
|
|
117
|
+
} from 'html-to-markdown-wasm';
|
|
118
|
+
import { readFileSync } from 'node:fs';
|
|
119
|
+
|
|
120
|
+
const htmlBytes = readFileSync('input.html'); // Buffer -> Uint8Array
|
|
121
|
+
const markdown = convertBytes(htmlBytes);
|
|
122
|
+
|
|
123
|
+
const handle = createConversionOptionsHandle({ headingStyle: 'atx' });
|
|
124
|
+
const markdownFromHandle = convertBytesWithOptionsHandle(htmlBytes, handle);
|
|
125
|
+
|
|
126
|
+
const inlineExtraction = convertBytesWithInlineImages(htmlBytes, null, {
|
|
127
|
+
maxDecodedSizeBytes: 5 * 1024 * 1024,
|
|
128
|
+
});
|
|
129
|
+
```
|
|
130
|
+
|
|
75
131
|
### With Options
|
|
76
132
|
|
|
77
133
|
```typescript
|
|
@@ -274,7 +330,7 @@ See the [TypeScript definitions](./dist-node/html_to_markdown_wasm.d.ts) for all
|
|
|
274
330
|
Keep specific HTML tags in their original form:
|
|
275
331
|
|
|
276
332
|
```typescript
|
|
277
|
-
import { convert } from '
|
|
333
|
+
import { convert } from 'html-to-markdown-wasm';
|
|
278
334
|
|
|
279
335
|
const html = `
|
|
280
336
|
<p>Before table</p>
|
package/dist/README.md
CHANGED
|
@@ -89,7 +89,7 @@ const markdown = convert(html, {
|
|
|
89
89
|
});
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
-
**Performance:**
|
|
92
|
+
**Performance:** The shared fixture harness (`task bench:bindings`) now clocks Node, Python, and the Rust CLI at ~1.3–1.4k ops/sec (≈150 MB/s) on the 129 KB Wikipedia “Lists” page thanks to the new Buffer/Uint8Array fast paths and release-mode harness. Ruby stays close at ~1.2k ops/sec, PHP lands around 0.3k ops/sec (≈35 MB/s), and WASM hits ~0.85k ops/sec—plenty for browsers, Deno, and edge runtimes.
|
|
93
93
|
|
|
94
94
|
See the JavaScript guides for full API documentation:
|
|
95
95
|
|
|
@@ -146,38 +146,65 @@ Benchmarked on Apple M4 with complex real-world documents (Wikipedia articles, t
|
|
|
146
146
|
|
|
147
147
|
### Operations per Second (higher is better)
|
|
148
148
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
|
152
|
-
|
|
|
153
|
-
| **
|
|
154
|
-
| **Tables (
|
|
155
|
-
| **
|
|
156
|
-
| **
|
|
157
|
-
| **
|
|
149
|
+
Derived directly from `tools/runtime-bench/results/latest.json` (Apple M4, shared fixtures):
|
|
150
|
+
|
|
151
|
+
| Fixture | Node.js (NAPI) | WASM | Python (PyO3) | Speedup (Node vs Python) |
|
|
152
|
+
| ---------------------- | -------------- | ---- | ------------- | ------------------------ |
|
|
153
|
+
| **Lists (Timeline)** | 1,308 | 882 | 1,405 | **0.9×** |
|
|
154
|
+
| **Tables (Countries)** | 331 | 242 | 352 | **0.9×** |
|
|
155
|
+
| **Medium (Python)** | 150 | 121 | 158 | **1.0×** |
|
|
156
|
+
| **Large (Rust)** | 163 | 124 | 183 | **0.9×** |
|
|
157
|
+
| **Small (Intro)** | 208 | 163 | 223 | **0.9×** |
|
|
158
|
+
| **HOCR German PDF** | 2,944 | 1,637| 2,991 | **1.0×** |
|
|
159
|
+
| **HOCR Invoice** | 27,326 | 7,775| 23,500 | **1.2×** |
|
|
160
|
+
| **HOCR Tables** | 3,475 | 1,667| 3,464 | **1.0×** |
|
|
158
161
|
|
|
159
162
|
### Average Performance Summary
|
|
160
163
|
|
|
161
|
-
| Implementation | Avg ops/sec
|
|
162
|
-
| --------------------- |
|
|
163
|
-
| **
|
|
164
|
-
| **
|
|
165
|
-
| **
|
|
166
|
-
| **
|
|
164
|
+
| Implementation | Avg ops/sec (fixtures) | vs Python | Notes |
|
|
165
|
+
| --------------------- | ---------------------- | --------- | ----- |
|
|
166
|
+
| **Rust CLI/Binary** | **4,996** | **1.2× faster** | Preprocessing now stays in one pass + reuses `parse_owned`, so the CLI leads every fixture |
|
|
167
|
+
| **Node.js (NAPI-RS)** | **4,488** | 1.0× | Buffer/handle combo keeps Node within ~10 % of the Rust core while serving JS runtimes |
|
|
168
|
+
| **Ruby (magnus)** | **4,278** | 0.9× | Still extremely fast; ~25 k ops/sec on HOCR invoices without extra work |
|
|
169
|
+
| **Python (PyO3)** | **4,034** | baseline | Release-mode harness plus handle reuse keep it competitive, but it now trails Node/Rust |
|
|
170
|
+
| **WebAssembly** | **1,576** | 0.4× | Portable option for Deno/browsers/edge using the new byte APIs |
|
|
171
|
+
| **PHP (ext)** | **1,480** | 0.4× | Composer extension holds steady at 35–70 MB/s once the PIE build is installed |
|
|
167
172
|
|
|
168
173
|
### Key Insights
|
|
169
174
|
|
|
170
|
-
- **
|
|
171
|
-
- **
|
|
172
|
-
- **
|
|
173
|
-
- **
|
|
175
|
+
- **Rust now leads throughput**: the fused preprocessing + `parse_owned` pathway pushes the CLI to ~1.7 k ops/sec on the 129 KB lists page and ~31 k ops/sec on the HOCR invoice fixture.
|
|
176
|
+
- **Node.js trails by only a few percent** after the buffer/handle work—~1.3 k ops/sec on the lists fixture and 27 k ops/sec on HOCR invoices without any UTF-16 copies.
|
|
177
|
+
- **Python remains competitive** but now sits below Node/Rust (~4.0 k average ops/sec); stick to the v2 API to avoid the deprecated compatibility shim.
|
|
178
|
+
- **PHP and WASM stay in the 35–70 MB/s band**, which is plenty for Composer queues or edge runtimes as long as the extension/module is built ahead of time.
|
|
179
|
+
- **Rust CLI results now mirror the bindings**, since `task bench:bindings` runs the harness with `cargo run --release` by default—profile there, then push optimizations down into each FFI layer.
|
|
180
|
+
|
|
181
|
+
### Runtime Benchmarks (PHP / Ruby / Python / Node / WASM)
|
|
182
|
+
|
|
183
|
+
Measured on Apple M4 using the fixture-driven runtime harness in `tools/runtime-bench` (`task bench:bindings`). Every binding consumes the exact same HTML fixtures and hOCR samples from `test_documents/`:
|
|
184
|
+
|
|
185
|
+
| Document | Size | Ruby ops/sec | PHP ops/sec | Python ops/sec | Node ops/sec | WASM ops/sec | Rust ops/sec |
|
|
186
|
+
| ------------------- | -------- | ------------ | ----------- | -------------- | ------------ | ------------ | ------------ |
|
|
187
|
+
| Lists (Timeline) | 129 KB | 1,349 | 533 | 1,405 | 1,308 | 882 | **1,700** |
|
|
188
|
+
| Tables (Countries) | 360 KB | 326 | 118 | 352 | 331 | 242 | **416** |
|
|
189
|
+
| Medium (Python) | 657 KB | 157 | 59 | 158 | 150 | 121 | **190** |
|
|
190
|
+
| Large (Rust) | 567 KB | 174 | 65 | 183 | 163 | 124 | **220** |
|
|
191
|
+
| Small (Intro) | 463 KB | 214 | 83 | 223 | 208 | 163 | **258** |
|
|
192
|
+
| HOCR German PDF | 44 KB | 2,936 | 1,007 | **2,991** | 2,944 | 1,637 | 2,760 |
|
|
193
|
+
| HOCR Invoice | 4 KB | 25,740 | 8,781 | 23,500 | 27,326 | 7,775 | **31,345** |
|
|
194
|
+
| HOCR Embedded Tables| 37 KB | 3,328 | 1,194 | 3,464 | **3,475** | 1,667 | 3,080 |
|
|
195
|
+
|
|
196
|
+
The harness shells out to each runtime’s lightweight benchmark driver (`packages/*/bin/benchmark.*`, `crates/*/bin/benchmark.ts`), feeds fixtures defined in `tools/runtime-bench/fixtures/*.toml`, and writes machine-readable JSON reports (`tools/runtime-bench/results/latest.json`) for regression tracking. Add new languages or scenarios by extending those fixture files and drivers.
|
|
197
|
+
|
|
198
|
+
Use `task bench:bindings` to regenerate throughput numbers across all bindings or `task bench:bindings:profile` to capture CPU/memory samples while the benchmarks run. To focus on specific languages or fixtures, pass `--language` / `--fixture` directly to `cargo run --manifest-path tools/runtime-bench/Cargo.toml -- …`.
|
|
199
|
+
|
|
200
|
+
Need a call-stack view of the Rust core? Run `task flamegraph:rust` (or call the harness with `--language rust --flamegraph path.svg`) to profile a fixture and dump a ready-to-inspect flamegraph in `tools/runtime-bench/results/`.
|
|
174
201
|
|
|
175
202
|
**Note on Python performance**: The current Python bindings have optimization opportunities. The v2 API with direct `convert()` calls performs best; avoid the v1 compatibility layer for performance-critical applications.
|
|
176
203
|
|
|
177
204
|
## Compatibility (v1 → v2)
|
|
178
205
|
|
|
179
206
|
- V2’s Rust core sustains **150–210 MB/s** throughput; V1 averaged **≈ 2.5 MB/s** in its Python/BeautifulSoup implementation (60–80× faster).
|
|
180
|
-
- The Python package offers a compatibility shim in `html_to_markdown.v1_compat` (`convert_to_markdown`, `convert_to_markdown_stream`, `markdownify`). Details and keyword mappings live in [Python README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/python/README.md#v1-compatibility).
|
|
207
|
+
- The Python package offers a compatibility shim in `html_to_markdown.v1_compat` (`convert_to_markdown`, `convert_to_markdown_stream`, `markdownify`). The shim is deprecated, emits `DeprecationWarning` on every call, and will be removed in v3.0—plan migrations now. Details and keyword mappings live in [Python README](https://github.com/Goldziher/html-to-markdown/blob/main/packages/python/README.md#v1-compatibility).
|
|
181
208
|
- CLI flag changes, option renames, and other breaking updates are summarised in [CHANGELOG](https://github.com/Goldziher/html-to-markdown/blob/main/CHANGELOG.md#breaking-changes).
|
|
182
209
|
|
|
183
210
|
## Community
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
|
+
export function convertBytesWithInlineImages(html: Uint8Array, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
4
|
+
export function convertBytes(html: Uint8Array, options: any): string;
|
|
5
|
+
export function createConversionOptionsHandle(options: any): WasmConversionOptionsHandle;
|
|
3
6
|
/**
|
|
4
7
|
* Convert HTML to Markdown
|
|
5
8
|
*
|
|
@@ -11,7 +14,7 @@
|
|
|
11
14
|
* # Example
|
|
12
15
|
*
|
|
13
16
|
* ```javascript
|
|
14
|
-
* import { convert } from '
|
|
17
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
15
18
|
*
|
|
16
19
|
* const html = '<h1>Hello World</h1>';
|
|
17
20
|
* const markdown = convert(html);
|
|
@@ -19,34 +22,18 @@
|
|
|
19
22
|
* ```
|
|
20
23
|
*/
|
|
21
24
|
export function convert(html: string, options: any): string;
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
*
|
|
25
|
-
* # Arguments
|
|
26
|
-
*
|
|
27
|
-
* * `html` - The HTML string to convert
|
|
28
|
-
* * `options` - Optional conversion options (as a JavaScript object)
|
|
29
|
-
* * `image_config` - Configuration for inline image extraction
|
|
30
|
-
*
|
|
31
|
-
* # Example
|
|
32
|
-
*
|
|
33
|
-
* ```javascript
|
|
34
|
-
* import { convertWithInlineImages, WasmInlineImageConfig } from '@html-to-markdown/wasm';
|
|
35
|
-
*
|
|
36
|
-
* const html = '<img src="data:image/png;base64,..." alt="test">';
|
|
37
|
-
* const config = new WasmInlineImageConfig(1024 * 1024);
|
|
38
|
-
* config.inferDimensions = true;
|
|
39
|
-
*
|
|
40
|
-
* const result = convertWithInlineImages(html, null, config);
|
|
41
|
-
* console.log(result.markdown);
|
|
42
|
-
* console.log(result.inlineImages.length);
|
|
43
|
-
* ```
|
|
44
|
-
*/
|
|
45
|
-
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
25
|
+
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
26
|
+
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
46
27
|
/**
|
|
47
28
|
* Initialize panic hook for better error messages in the browser
|
|
48
29
|
*/
|
|
49
30
|
export function init(): void;
|
|
31
|
+
export function convertWithInlineImages(html: string, options: any, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
32
|
+
export class WasmConversionOptionsHandle {
|
|
33
|
+
free(): void;
|
|
34
|
+
[Symbol.dispose](): void;
|
|
35
|
+
constructor(options: any);
|
|
36
|
+
}
|
|
50
37
|
/**
|
|
51
38
|
* Result of HTML extraction with inline images
|
|
52
39
|
*/
|
|
@@ -231,6 +231,89 @@ function getArrayJsValueFromWasm0(ptr, len) {
|
|
|
231
231
|
}
|
|
232
232
|
return result;
|
|
233
233
|
}
|
|
234
|
+
|
|
235
|
+
function _assertClass(instance, klass) {
|
|
236
|
+
if (!(instance instanceof klass)) {
|
|
237
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* @param {Uint8Array} html
|
|
242
|
+
* @param {any} options
|
|
243
|
+
* @param {WasmInlineImageConfig | null} [image_config]
|
|
244
|
+
* @returns {WasmHtmlExtraction}
|
|
245
|
+
*/
|
|
246
|
+
export function convertBytesWithInlineImages(html, options, image_config) {
|
|
247
|
+
try {
|
|
248
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
249
|
+
let ptr0 = 0;
|
|
250
|
+
if (!isLikeNone(image_config)) {
|
|
251
|
+
_assertClass(image_config, WasmInlineImageConfig);
|
|
252
|
+
ptr0 = image_config.__destroy_into_raw();
|
|
253
|
+
}
|
|
254
|
+
wasm.convertBytesWithInlineImages(retptr, addHeapObject(html), addHeapObject(options), ptr0);
|
|
255
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
256
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
257
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
258
|
+
if (r2) {
|
|
259
|
+
throw takeObject(r1);
|
|
260
|
+
}
|
|
261
|
+
return WasmHtmlExtraction.__wrap(r0);
|
|
262
|
+
} finally {
|
|
263
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* @param {Uint8Array} html
|
|
269
|
+
* @param {any} options
|
|
270
|
+
* @returns {string}
|
|
271
|
+
*/
|
|
272
|
+
export function convertBytes(html, options) {
|
|
273
|
+
let deferred2_0;
|
|
274
|
+
let deferred2_1;
|
|
275
|
+
try {
|
|
276
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
277
|
+
wasm.convertBytes(retptr, addHeapObject(html), addHeapObject(options));
|
|
278
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
279
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
280
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
281
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
282
|
+
var ptr1 = r0;
|
|
283
|
+
var len1 = r1;
|
|
284
|
+
if (r3) {
|
|
285
|
+
ptr1 = 0; len1 = 0;
|
|
286
|
+
throw takeObject(r2);
|
|
287
|
+
}
|
|
288
|
+
deferred2_0 = ptr1;
|
|
289
|
+
deferred2_1 = len1;
|
|
290
|
+
return getStringFromWasm0(ptr1, len1);
|
|
291
|
+
} finally {
|
|
292
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
293
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* @param {any} options
|
|
299
|
+
* @returns {WasmConversionOptionsHandle}
|
|
300
|
+
*/
|
|
301
|
+
export function createConversionOptionsHandle(options) {
|
|
302
|
+
try {
|
|
303
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
304
|
+
wasm.createConversionOptionsHandle(retptr, addHeapObject(options));
|
|
305
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
306
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
307
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
308
|
+
if (r2) {
|
|
309
|
+
throw takeObject(r1);
|
|
310
|
+
}
|
|
311
|
+
return WasmConversionOptionsHandle.__wrap(r0);
|
|
312
|
+
} finally {
|
|
313
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
234
317
|
/**
|
|
235
318
|
* Convert HTML to Markdown
|
|
236
319
|
*
|
|
@@ -242,7 +325,7 @@ function getArrayJsValueFromWasm0(ptr, len) {
|
|
|
242
325
|
* # Example
|
|
243
326
|
*
|
|
244
327
|
* ```javascript
|
|
245
|
-
* import { convert } from '
|
|
328
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
246
329
|
*
|
|
247
330
|
* const html = '<h1>Hello World</h1>';
|
|
248
331
|
* const markdown = convert(html);
|
|
@@ -279,33 +362,78 @@ export function convert(html, options) {
|
|
|
279
362
|
}
|
|
280
363
|
}
|
|
281
364
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
365
|
+
/**
|
|
366
|
+
* @param {string} html
|
|
367
|
+
* @param {WasmConversionOptionsHandle} handle
|
|
368
|
+
* @returns {string}
|
|
369
|
+
*/
|
|
370
|
+
export function convertWithOptionsHandle(html, handle) {
|
|
371
|
+
let deferred3_0;
|
|
372
|
+
let deferred3_1;
|
|
373
|
+
try {
|
|
374
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
375
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
376
|
+
const len0 = WASM_VECTOR_LEN;
|
|
377
|
+
_assertClass(handle, WasmConversionOptionsHandle);
|
|
378
|
+
wasm.convertWithOptionsHandle(retptr, ptr0, len0, handle.__wbg_ptr);
|
|
379
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
380
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
381
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
382
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
383
|
+
var ptr2 = r0;
|
|
384
|
+
var len2 = r1;
|
|
385
|
+
if (r3) {
|
|
386
|
+
ptr2 = 0; len2 = 0;
|
|
387
|
+
throw takeObject(r2);
|
|
388
|
+
}
|
|
389
|
+
deferred3_0 = ptr2;
|
|
390
|
+
deferred3_1 = len2;
|
|
391
|
+
return getStringFromWasm0(ptr2, len2);
|
|
392
|
+
} finally {
|
|
393
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
394
|
+
wasm.__wbindgen_export4(deferred3_0, deferred3_1, 1);
|
|
285
395
|
}
|
|
286
396
|
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* @param {Uint8Array} html
|
|
400
|
+
* @param {WasmConversionOptionsHandle} handle
|
|
401
|
+
* @returns {string}
|
|
402
|
+
*/
|
|
403
|
+
export function convertBytesWithOptionsHandle(html, handle) {
|
|
404
|
+
let deferred2_0;
|
|
405
|
+
let deferred2_1;
|
|
406
|
+
try {
|
|
407
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
408
|
+
_assertClass(handle, WasmConversionOptionsHandle);
|
|
409
|
+
wasm.convertBytesWithOptionsHandle(retptr, addHeapObject(html), handle.__wbg_ptr);
|
|
410
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
411
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
412
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
413
|
+
var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
|
|
414
|
+
var ptr1 = r0;
|
|
415
|
+
var len1 = r1;
|
|
416
|
+
if (r3) {
|
|
417
|
+
ptr1 = 0; len1 = 0;
|
|
418
|
+
throw takeObject(r2);
|
|
419
|
+
}
|
|
420
|
+
deferred2_0 = ptr1;
|
|
421
|
+
deferred2_1 = len1;
|
|
422
|
+
return getStringFromWasm0(ptr1, len1);
|
|
423
|
+
} finally {
|
|
424
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
425
|
+
wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* Initialize panic hook for better error messages in the browser
|
|
431
|
+
*/
|
|
432
|
+
export function init() {
|
|
433
|
+
wasm.init();
|
|
434
|
+
}
|
|
435
|
+
|
|
287
436
|
/**
|
|
288
|
-
* Convert HTML to Markdown while collecting inline images
|
|
289
|
-
*
|
|
290
|
-
* # Arguments
|
|
291
|
-
*
|
|
292
|
-
* * `html` - The HTML string to convert
|
|
293
|
-
* * `options` - Optional conversion options (as a JavaScript object)
|
|
294
|
-
* * `image_config` - Configuration for inline image extraction
|
|
295
|
-
*
|
|
296
|
-
* # Example
|
|
297
|
-
*
|
|
298
|
-
* ```javascript
|
|
299
|
-
* import { convertWithInlineImages, WasmInlineImageConfig } from '@html-to-markdown/wasm';
|
|
300
|
-
*
|
|
301
|
-
* const html = '<img src="data:image/png;base64,..." alt="test">';
|
|
302
|
-
* const config = new WasmInlineImageConfig(1024 * 1024);
|
|
303
|
-
* config.inferDimensions = true;
|
|
304
|
-
*
|
|
305
|
-
* const result = convertWithInlineImages(html, null, config);
|
|
306
|
-
* console.log(result.markdown);
|
|
307
|
-
* console.log(result.inlineImages.length);
|
|
308
|
-
* ```
|
|
309
437
|
* @param {string} html
|
|
310
438
|
* @param {any} options
|
|
311
439
|
* @param {WasmInlineImageConfig | null} [image_config]
|
|
@@ -334,12 +462,53 @@ export function convertWithInlineImages(html, options, image_config) {
|
|
|
334
462
|
}
|
|
335
463
|
}
|
|
336
464
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
465
|
+
const WasmConversionOptionsHandleFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
466
|
+
? { register: () => {}, unregister: () => {} }
|
|
467
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_wasmconversionoptionshandle_free(ptr >>> 0, 1));
|
|
468
|
+
|
|
469
|
+
export class WasmConversionOptionsHandle {
|
|
470
|
+
|
|
471
|
+
static __wrap(ptr) {
|
|
472
|
+
ptr = ptr >>> 0;
|
|
473
|
+
const obj = Object.create(WasmConversionOptionsHandle.prototype);
|
|
474
|
+
obj.__wbg_ptr = ptr;
|
|
475
|
+
WasmConversionOptionsHandleFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
476
|
+
return obj;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
__destroy_into_raw() {
|
|
480
|
+
const ptr = this.__wbg_ptr;
|
|
481
|
+
this.__wbg_ptr = 0;
|
|
482
|
+
WasmConversionOptionsHandleFinalization.unregister(this);
|
|
483
|
+
return ptr;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
free() {
|
|
487
|
+
const ptr = this.__destroy_into_raw();
|
|
488
|
+
wasm.__wbg_wasmconversionoptionshandle_free(ptr, 0);
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* @param {any} options
|
|
492
|
+
*/
|
|
493
|
+
constructor(options) {
|
|
494
|
+
try {
|
|
495
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
496
|
+
wasm.wasmconversionoptionshandle_new(retptr, addHeapObject(options));
|
|
497
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
498
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
499
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
500
|
+
if (r2) {
|
|
501
|
+
throw takeObject(r1);
|
|
502
|
+
}
|
|
503
|
+
this.__wbg_ptr = r0 >>> 0;
|
|
504
|
+
WasmConversionOptionsHandleFinalization.register(this, this.__wbg_ptr, this);
|
|
505
|
+
return this;
|
|
506
|
+
} finally {
|
|
507
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
342
510
|
}
|
|
511
|
+
if (Symbol.dispose) WasmConversionOptionsHandle.prototype[Symbol.dispose] = WasmConversionOptionsHandle.prototype.free;
|
|
343
512
|
|
|
344
513
|
const WasmHtmlExtractionFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
345
514
|
? { register: () => {}, unregister: () => {} }
|
|
@@ -831,6 +1000,17 @@ export function __wbg_instanceof_ArrayBuffer_70beb1189ca63b38(arg0) {
|
|
|
831
1000
|
return ret;
|
|
832
1001
|
};
|
|
833
1002
|
|
|
1003
|
+
export function __wbg_instanceof_Object_10bb762262230c68(arg0) {
|
|
1004
|
+
let result;
|
|
1005
|
+
try {
|
|
1006
|
+
result = getObject(arg0) instanceof Object;
|
|
1007
|
+
} catch (_) {
|
|
1008
|
+
result = false;
|
|
1009
|
+
}
|
|
1010
|
+
const ret = result;
|
|
1011
|
+
return ret;
|
|
1012
|
+
};
|
|
1013
|
+
|
|
834
1014
|
export function __wbg_instanceof_Uint8Array_20c8e73002f7af98(arg0) {
|
|
835
1015
|
let result;
|
|
836
1016
|
try {
|
|
@@ -857,6 +1037,11 @@ export function __wbg_iterator_e5822695327a3c39() {
|
|
|
857
1037
|
return addHeapObject(ret);
|
|
858
1038
|
};
|
|
859
1039
|
|
|
1040
|
+
export function __wbg_keys_b4d27b02ad14f4be(arg0) {
|
|
1041
|
+
const ret = Object.keys(getObject(arg0));
|
|
1042
|
+
return addHeapObject(ret);
|
|
1043
|
+
};
|
|
1044
|
+
|
|
860
1045
|
export function __wbg_length_69bca3cb64fc8748(arg0) {
|
|
861
1046
|
const ret = getObject(arg0).length;
|
|
862
1047
|
return ret;
|
|
Binary file
|
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
/* tslint:disable */
|
|
2
2
|
/* eslint-disable */
|
|
3
3
|
export const memory: WebAssembly.Memory;
|
|
4
|
+
export const __wbg_wasmconversionoptionshandle_free: (a: number, b: number) => void;
|
|
4
5
|
export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
|
|
5
6
|
export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
|
|
6
7
|
export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
|
|
7
8
|
export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
|
|
8
9
|
export const convert: (a: number, b: number, c: number, d: number) => void;
|
|
10
|
+
export const convertBytes: (a: number, b: number, c: number) => void;
|
|
11
|
+
export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
|
|
12
|
+
export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
|
|
9
13
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
14
|
+
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
15
|
+
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
16
|
+
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
10
17
|
export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
|
|
11
18
|
export const wasmhtmlextraction_markdown: (a: number, b: number) => void;
|
|
12
19
|
export const wasmhtmlextraction_warnings: (a: number, b: number) => void;
|