html-to-markdown-wasm 2.4.2 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -24
- package/dist/README.md +19 -0
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/package.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -52,56 +52,63 @@ import { convert } from "npm:html-to-markdown-wasm";
|
|
|
52
52
|
|
|
53
53
|
## Usage
|
|
54
54
|
|
|
55
|
-
###
|
|
55
|
+
### Basic Conversion
|
|
56
56
|
|
|
57
57
|
```javascript
|
|
58
|
-
|
|
59
|
-
const { convert } = require('html-to-markdown-wasm/dist-node');
|
|
58
|
+
import { convert } from 'html-to-markdown-wasm';
|
|
60
59
|
|
|
61
|
-
const
|
|
60
|
+
const html = '<h1>Hello World</h1><p>This is <strong>fast</strong>!</p>';
|
|
61
|
+
const markdown = convert(html);
|
|
62
62
|
console.log(markdown);
|
|
63
|
+
// # Hello World
|
|
64
|
+
//
|
|
65
|
+
// This is **fast**!
|
|
63
66
|
```
|
|
64
67
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
### With Options
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
import { convert } from 'html-to-markdown-wasm';
|
|
68
72
|
|
|
69
|
-
const html = '<h1>Hello</h1><p>World</p>';
|
|
70
73
|
const markdown = convert(html, {
|
|
71
74
|
headingStyle: 'atx',
|
|
72
75
|
codeBlockStyle: 'backticks',
|
|
76
|
+
listIndentWidth: 2,
|
|
77
|
+
bullets: '-',
|
|
78
|
+
wrap: true,
|
|
79
|
+
wrapWidth: 80
|
|
73
80
|
});
|
|
74
81
|
```
|
|
75
82
|
|
|
76
|
-
###
|
|
83
|
+
### Preserve Complex HTML (NEW in v2.5)
|
|
77
84
|
|
|
78
85
|
```typescript
|
|
79
|
-
import { convert } from
|
|
86
|
+
import { convert } from 'html-to-markdown-wasm';
|
|
80
87
|
|
|
81
|
-
const html =
|
|
88
|
+
const html = `
|
|
89
|
+
<h1>Report</h1>
|
|
90
|
+
<table>
|
|
91
|
+
<tr><th>Name</th><th>Value</th></tr>
|
|
92
|
+
<tr><td>Foo</td><td>Bar</td></tr>
|
|
93
|
+
</table>
|
|
94
|
+
`;
|
|
82
95
|
|
|
83
96
|
const markdown = convert(html, {
|
|
84
|
-
|
|
85
|
-
listIndentWidth: 2,
|
|
86
|
-
bullets: "-"
|
|
97
|
+
preserveTags: ['table'] // Keep tables as HTML
|
|
87
98
|
});
|
|
88
|
-
|
|
89
|
-
await Deno.writeTextFile("output.md", markdown);
|
|
90
99
|
```
|
|
91
100
|
|
|
92
|
-
###
|
|
101
|
+
### Deno
|
|
93
102
|
|
|
94
103
|
```typescript
|
|
95
|
-
import { convert } from
|
|
104
|
+
import { convert } from "npm:html-to-markdown-wasm";
|
|
96
105
|
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
wrapWidth: 80
|
|
101
|
-
});
|
|
106
|
+
const html = await Deno.readTextFile("input.html");
|
|
107
|
+
const markdown = convert(html, { headingStyle: "atx" });
|
|
108
|
+
await Deno.writeTextFile("output.md", markdown);
|
|
102
109
|
```
|
|
103
110
|
|
|
104
|
-
> **
|
|
111
|
+
> **Performance Tip:** For Node.js/Bun, use [html-to-markdown-node](https://www.npmjs.com/package/html-to-markdown-node) for 1.17× better performance with native bindings.
|
|
105
112
|
|
|
106
113
|
### Browser (ESM)
|
|
107
114
|
|
|
@@ -248,12 +255,45 @@ See the [TypeScript definitions](./dist-node/html_to_markdown_wasm.d.ts) for all
|
|
|
248
255
|
- Code block styles (indented, backticks, tildes)
|
|
249
256
|
- List formatting (indent width, bullet characters)
|
|
250
257
|
- Text escaping and formatting
|
|
258
|
+
- Tag preservation (`preserveTags`) and stripping (`stripTags`)
|
|
251
259
|
- Preprocessing for web scraping
|
|
252
260
|
- hOCR table extraction
|
|
253
261
|
- And more...
|
|
254
262
|
|
|
255
263
|
## Examples
|
|
256
264
|
|
|
265
|
+
### Preserving HTML Tags
|
|
266
|
+
|
|
267
|
+
Keep specific HTML tags in their original form:
|
|
268
|
+
|
|
269
|
+
```typescript
|
|
270
|
+
import { convert } from '@html-to-markdown/wasm';
|
|
271
|
+
|
|
272
|
+
const html = `
|
|
273
|
+
<p>Before table</p>
|
|
274
|
+
<table class="data">
|
|
275
|
+
<tr><th>Name</th><th>Value</th></tr>
|
|
276
|
+
<tr><td>Item 1</td><td>100</td></tr>
|
|
277
|
+
</table>
|
|
278
|
+
<p>After table</p>
|
|
279
|
+
`;
|
|
280
|
+
|
|
281
|
+
const markdown = convert(html, {
|
|
282
|
+
preserveTags: ['table']
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
// Result includes the table as HTML
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
Combine with `stripTags`:
|
|
289
|
+
|
|
290
|
+
```typescript
|
|
291
|
+
const markdown = convert(html, {
|
|
292
|
+
preserveTags: ['table', 'form'], // Keep as HTML
|
|
293
|
+
stripTags: ['script', 'style'] // Remove entirely
|
|
294
|
+
});
|
|
295
|
+
```
|
|
296
|
+
|
|
257
297
|
### Deno Web Server
|
|
258
298
|
|
|
259
299
|
```typescript
|
package/dist/README.md
CHANGED
|
@@ -9,6 +9,24 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
9
9
|
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
10
10
|
[](https://discord.gg/pXxagNK2zN)
|
|
11
11
|
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## 🎮 **[Try the Live Demo →](https://goldziher.github.io/html-to-markdown/)**
|
|
15
|
+
|
|
16
|
+
Experience WebAssembly-powered HTML to Markdown conversion instantly in your browser. No installation needed!
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Why html-to-markdown?
|
|
21
|
+
|
|
22
|
+
- **Blazing Fast**: Rust-powered core delivers 10-80× faster conversion than pure Python alternatives
|
|
23
|
+
- **Universal**: Works everywhere - Node.js, Bun, Deno, browsers, Python, Rust, and standalone CLI
|
|
24
|
+
- **Smart Conversion**: Handles complex documents including nested tables, code blocks, task lists, and hOCR OCR output
|
|
25
|
+
- **Highly Configurable**: Control heading styles, code block fences, list formatting, whitespace handling, and HTML sanitization
|
|
26
|
+
- **Tag Preservation**: Keep specific HTML tags unconverted when markdown isn't expressive enough
|
|
27
|
+
- **Secure by Default**: Built-in HTML sanitization prevents malicious content
|
|
28
|
+
- **Consistent Output**: Identical markdown rendering across all language bindings
|
|
29
|
+
|
|
12
30
|
## Documentation
|
|
13
31
|
|
|
14
32
|
- **JavaScript/TypeScript guides**:
|
|
@@ -46,6 +64,7 @@ const markdown = convert(html, {
|
|
|
46
64
|
headingStyle: 'Atx',
|
|
47
65
|
codeBlockStyle: 'Backticks',
|
|
48
66
|
wrap: true,
|
|
67
|
+
preserveTags: ['table'], // NEW in v2.5: Keep complex HTML as-is
|
|
49
68
|
});
|
|
50
69
|
```
|
|
51
70
|
|
|
Binary file
|
package/dist/package.json
CHANGED