@kreuzberg/html-to-markdown 2.19.0-rc.1 → 2.23.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +302 -206
- package/dist/cli.js +0 -0
- package/dist/index.d.ts +68 -2
- package/dist/index.js +63 -2
- package/package.json +18 -18
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright 2024-2025 Na'aman Hirschfeld
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,40 +1,103 @@
|
|
|
1
|
-
# html-to-markdown
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
1
|
+
# html-to-markdown
|
|
2
|
+
|
|
3
|
+
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
|
|
4
|
+
<!-- Language Bindings -->
|
|
5
|
+
<a href="https://crates.io/crates/html-to-markdown-rs">
|
|
6
|
+
<img src="https://img.shields.io/crates/v/html-to-markdown-rs?label=Rust&color=007ec6" alt="Rust">
|
|
7
|
+
</a>
|
|
8
|
+
<a href="https://pypi.org/project/html-to-markdown/">
|
|
9
|
+
<img src="https://img.shields.io/pypi/v/html-to-markdown?label=Python&color=007ec6" alt="Python">
|
|
10
|
+
</a>
|
|
11
|
+
<a href="https://www.npmjs.com/package/@kreuzberg/html-to-markdown-node">
|
|
12
|
+
<img src="https://img.shields.io/npm/v/@kreuzberg/html-to-markdown-node?label=Node.js&color=007ec6" alt="Node.js">
|
|
13
|
+
</a>
|
|
14
|
+
<a href="https://www.npmjs.com/package/@kreuzberg/html-to-markdown-wasm">
|
|
15
|
+
<img src="https://img.shields.io/npm/v/@kreuzberg/html-to-markdown-wasm?label=WASM&color=007ec6" alt="WASM">
|
|
16
|
+
</a>
|
|
17
|
+
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/html-to-markdown">
|
|
18
|
+
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
|
|
19
|
+
</a>
|
|
20
|
+
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown">
|
|
21
|
+
<img src="https://img.shields.io/badge/Go-v2.19.0-007ec6" alt="Go">
|
|
22
|
+
</a>
|
|
23
|
+
<a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
|
|
24
|
+
<img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
|
|
25
|
+
</a>
|
|
26
|
+
<a href="https://packagist.org/packages/goldziher/html-to-markdown">
|
|
27
|
+
<img src="https://img.shields.io/packagist/v/goldziher/html-to-markdown?label=PHP&color=007ec6" alt="PHP">
|
|
28
|
+
</a>
|
|
29
|
+
<a href="https://rubygems.org/gems/html-to-markdown">
|
|
30
|
+
<img src="https://img.shields.io/gem/v/html-to-markdown?label=Ruby&color=007ec6" alt="Ruby">
|
|
31
|
+
</a>
|
|
32
|
+
<a href="https://hex.pm/packages/html_to_markdown">
|
|
33
|
+
<img src="https://img.shields.io/hexpm/v/html_to_markdown?label=Elixir&color=007ec6" alt="Elixir">
|
|
34
|
+
</a>
|
|
35
|
+
|
|
36
|
+
<!-- Project Info -->
|
|
37
|
+
<a href="https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE">
|
|
38
|
+
<img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
|
|
39
|
+
</a>
|
|
40
|
+
</div>
|
|
41
|
+
|
|
42
|
+
<img width="1128" height="191" alt="html-to-markdown" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
|
|
43
|
+
|
|
44
|
+
<div align="center" style="margin-top: 20px;">
|
|
45
|
+
<a href="https://discord.gg/pXxagNK2zN">
|
|
46
|
+
<img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord">
|
|
47
|
+
</a>
|
|
48
|
+
</div>
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
High-performance HTML to Markdown converter for Node.js and Bun with full TypeScript support.
|
|
52
|
+
This package wraps native `@kreuzberg/html-to-markdown-node` bindings and provides a type-safe API.
|
|
53
|
+
|
|
17
54
|
|
|
18
55
|
## Installation
|
|
19
56
|
|
|
20
57
|
```bash
|
|
21
|
-
# Native bindings (Node.js/Bun) - Recommended
|
|
22
58
|
npm install @kreuzberg/html-to-markdown
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
Requires Node.js 18+ or Bun. Native bindings provide superior performance.
|
|
64
|
+
|
|
65
|
+
**npm:**
|
|
66
|
+
```bash
|
|
67
|
+
npm install @kreuzberg/html-to-markdown
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**pnpm:**
|
|
71
|
+
```bash
|
|
23
72
|
pnpm add @kreuzberg/html-to-markdown
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**yarn:**
|
|
76
|
+
```bash
|
|
24
77
|
yarn add @kreuzberg/html-to-markdown
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**bun:**
|
|
81
|
+
```bash
|
|
25
82
|
bun add @kreuzberg/html-to-markdown
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Alternatively, use the WebAssembly version for browser/edge environments:
|
|
26
86
|
|
|
27
|
-
|
|
87
|
+
```bash
|
|
28
88
|
npm install @kreuzberg/html-to-markdown-wasm
|
|
29
89
|
```
|
|
30
90
|
|
|
31
|
-
## Migration Guide (v2.18.x → v2.19.0)
|
|
32
91
|
|
|
33
|
-
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Migration Guide: TypeScript v2.18.x → v2.19.0
|
|
95
|
+
|
|
96
|
+
## Breaking Change: Scoped npm Packages
|
|
34
97
|
|
|
35
98
|
In v2.19.0, npm packages were moved to the `@kreuzberg` scope to align with the Kreuzberg.dev organization.
|
|
36
99
|
|
|
37
|
-
|
|
100
|
+
### Package Installation Update
|
|
38
101
|
|
|
39
102
|
**Before (v2.18.x):**
|
|
40
103
|
```bash
|
|
@@ -48,30 +111,30 @@ npm install @kreuzberg/html-to-markdown-node
|
|
|
48
111
|
npm install @kreuzberg/html-to-markdown-wasm
|
|
49
112
|
```
|
|
50
113
|
|
|
51
|
-
|
|
114
|
+
### Import Statement Update
|
|
52
115
|
|
|
53
116
|
**Before:**
|
|
54
117
|
```typescript
|
|
55
|
-
import { convert } from
|
|
56
|
-
import { convert } from
|
|
118
|
+
import { convert } from 'html-to-markdown-node';
|
|
119
|
+
import { convert } from 'html-to-markdown-wasm';
|
|
57
120
|
```
|
|
58
121
|
|
|
59
122
|
**After:**
|
|
60
123
|
```typescript
|
|
61
|
-
import { convert } from
|
|
62
|
-
import { convert } from
|
|
124
|
+
import { convert } from '@kreuzberg/html-to-markdown-node';
|
|
125
|
+
import { convert } from '@kreuzberg/html-to-markdown-wasm';
|
|
63
126
|
```
|
|
64
127
|
|
|
65
|
-
|
|
128
|
+
### TypeScript Declaration Update
|
|
66
129
|
|
|
67
130
|
Update your TypeScript configuration if you have imports from the old package name:
|
|
68
131
|
|
|
69
132
|
**Before (tsconfig.json or import aliases):**
|
|
70
133
|
```json
|
|
71
134
|
{
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
135
|
+
"compilerOptions": {
|
|
136
|
+
"paths": {
|
|
137
|
+
"html-to-markdown": ["node_modules/html-to-markdown-node"]
|
|
75
138
|
}
|
|
76
139
|
}
|
|
77
140
|
}
|
|
@@ -80,27 +143,27 @@ Update your TypeScript configuration if you have imports from the old package na
|
|
|
80
143
|
**After:**
|
|
81
144
|
```json
|
|
82
145
|
{
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
146
|
+
"compilerOptions": {
|
|
147
|
+
"paths": {
|
|
148
|
+
"@kreuzberg/html-to-markdown": ["node_modules/@kreuzberg/html-to-markdown-node"]
|
|
86
149
|
}
|
|
87
150
|
}
|
|
88
151
|
}
|
|
89
152
|
```
|
|
90
153
|
|
|
91
|
-
|
|
154
|
+
### Deno Update
|
|
92
155
|
|
|
93
156
|
**Before:**
|
|
94
157
|
```typescript
|
|
95
|
-
import { convert } from
|
|
158
|
+
import { convert } from "npm:html-to-markdown-wasm";
|
|
96
159
|
```
|
|
97
160
|
|
|
98
161
|
**After:**
|
|
99
162
|
```typescript
|
|
100
|
-
import { convert } from
|
|
163
|
+
import { convert } from "npm:@kreuzberg/html-to-markdown-wasm";
|
|
101
164
|
```
|
|
102
165
|
|
|
103
|
-
|
|
166
|
+
## Summary of Changes
|
|
104
167
|
|
|
105
168
|
- All npm packages now use `@kreuzberg` scope
|
|
106
169
|
- `html-to-markdown-node` → `@kreuzberg/html-to-markdown-node`
|
|
@@ -108,243 +171,276 @@ import { convert } from "npm:@kreuzberg/html-to-markdown-wasm";
|
|
|
108
171
|
- TypeScript types and APIs are identical
|
|
109
172
|
- No functional changes to the library
|
|
110
173
|
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
## Performance Snapshot
|
|
178
|
+
|
|
179
|
+
Apple M4 • Real Wikipedia documents • `convert()` (TypeScript (Node.js))
|
|
180
|
+
|
|
181
|
+
| Document | Size | Latency | Throughput |
|
|
182
|
+
| -------- | ---- | ------- | ---------- |
|
|
183
|
+
| Lists (Timeline) | 129KB | 0.58ms | 222 MB/s |
|
|
184
|
+
| Tables (Countries) | 360KB | 1.89ms | 190 MB/s |
|
|
185
|
+
| Mixed (Python wiki) | 656KB | 4.21ms | 156 MB/s |
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
See [Performance Guide](../../examples/performance/) for detailed benchmarks.
|
|
189
|
+
|
|
190
|
+
|
|
111
191
|
## Quick Start
|
|
112
192
|
|
|
113
|
-
|
|
193
|
+
Basic conversion:
|
|
194
|
+
|
|
114
195
|
```typescript
|
|
115
|
-
import { convert } from
|
|
196
|
+
import { convert } from '@kreuzberg/html-to-markdown';
|
|
116
197
|
|
|
117
|
-
const markdown: string = convert(
|
|
198
|
+
const markdown: string = convert('<h1>Hello World</h1>');
|
|
118
199
|
console.log(markdown); // # Hello World
|
|
119
200
|
```
|
|
120
201
|
|
|
121
|
-
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
With conversion options:
|
|
205
|
+
|
|
122
206
|
```typescript
|
|
123
|
-
import { convert, ConversionOptions } from
|
|
207
|
+
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown';
|
|
124
208
|
|
|
125
209
|
const options: ConversionOptions = {
|
|
126
|
-
headingStyle:
|
|
210
|
+
headingStyle: 'atx',
|
|
127
211
|
listIndentWidth: 2,
|
|
128
212
|
wrap: true,
|
|
129
213
|
};
|
|
130
214
|
|
|
131
|
-
const markdown = convert(
|
|
215
|
+
const markdown = convert('<h1>Title</h1><p>Content</p>', options);
|
|
132
216
|
```
|
|
133
217
|
|
|
134
|
-
**TypeScript interfaces for type safety:**
|
|
135
|
-
```typescript
|
|
136
|
-
interface ConversionOptions {
|
|
137
|
-
headingStyle?: 'atx' | 'setext';
|
|
138
|
-
listIndentWidth?: number;
|
|
139
|
-
wrap?: boolean;
|
|
140
|
-
wrapWidth?: number;
|
|
141
|
-
// ... more options
|
|
142
|
-
}
|
|
143
|
-
```
|
|
144
218
|
|
|
145
|
-
**File and stream helpers:**
|
|
146
|
-
```typescript
|
|
147
|
-
import { convertFile, convertBuffer } from '@kreuzberg/html-to-markdown';
|
|
148
219
|
|
|
149
|
-
// From file
|
|
150
|
-
const markdown = await convertFile('page.html');
|
|
151
220
|
|
|
152
|
-
|
|
153
|
-
const buffer = Buffer.from('<h1>Title</h1>');
|
|
154
|
-
const markdown = convertBuffer(buffer);
|
|
155
|
-
```
|
|
221
|
+
|
|
156
222
|
|
|
157
223
|
## API Reference
|
|
158
224
|
|
|
159
225
|
### Core Functions
|
|
160
226
|
|
|
161
|
-
#### `convert(html: string, options?: ConversionOptions): string`
|
|
162
|
-
Convert HTML string to Markdown.
|
|
163
227
|
|
|
164
|
-
|
|
165
|
-
Convert HTML from Buffer/Uint8Array (avoids string allocation overhead).
|
|
228
|
+
**`convert(html: string, options?: ConversionOptions): string`**
|
|
166
229
|
|
|
167
|
-
|
|
168
|
-
Asynchronously convert an HTML file to Markdown.
|
|
230
|
+
Basic HTML-to-Markdown conversion. Fast and simple.
|
|
169
231
|
|
|
170
|
-
|
|
171
|
-
Convert HTML from a readable stream (stdin, file stream, network).
|
|
232
|
+
**`convertWithMetadata(html: string, options?: ConversionOptions, config?: MetadataConfig): { markdown: string; metadata: Metadata }`**
|
|
172
233
|
|
|
173
|
-
|
|
234
|
+
Extract Markdown plus metadata (headers, links, images, structured data) in a single pass. See [Metadata Extraction Guide](../../examples/metadata-extraction/).
|
|
174
235
|
|
|
175
|
-
|
|
236
|
+
**`convertWithVisitor(html: string, options: { visitor: Visitor } & ConversionOptions): string`**
|
|
176
237
|
|
|
177
|
-
|
|
178
|
-
Convert and extract document metadata, headers, links, images, and structured data.
|
|
238
|
+
Customize conversion with visitor callbacks for element interception. See [Visitor Pattern Guide](../../examples/visitor-pattern/).
|
|
179
239
|
|
|
180
|
-
|
|
181
|
-
Convert from Buffer with metadata extraction.
|
|
240
|
+
**`convertWithAsyncVisitor(html: string, options: { visitor: AsyncVisitor } & ConversionOptions): Promise<string>`**
|
|
182
241
|
|
|
183
|
-
|
|
184
|
-
Convert HTML file with metadata extraction.
|
|
242
|
+
Async version of visitor pattern for I/O operations.
|
|
185
243
|
|
|
186
|
-
|
|
187
|
-
Convert stream with metadata extraction.
|
|
244
|
+
**`convertWithInlineImages(html: string, config?: InlineImageConfig): { markdown: string; images: ImageData[]; warnings: string[] }`**
|
|
188
245
|
|
|
189
|
-
|
|
190
|
-
Check if metadata extraction is available at runtime.
|
|
246
|
+
Extract base64-encoded inline images with metadata.
|
|
191
247
|
|
|
192
|
-
### Visitor Pattern Functions
|
|
193
248
|
|
|
194
|
-
Custom element callbacks for fine-grained conversion control.
|
|
195
249
|
|
|
196
|
-
|
|
197
|
-
Convert with visitor callbacks for element interception.
|
|
250
|
+
### Options
|
|
198
251
|
|
|
199
|
-
|
|
200
|
-
|
|
252
|
+
**`ConversionOptions`** – Key configuration fields:
|
|
253
|
+
- `heading_style`: Heading format (`"underlined"` | `"atx"` | `"atx_closed"`) — default: `"underlined"`
|
|
254
|
+
- `list_indent_width`: Spaces per indent level — default: `2`
|
|
255
|
+
- `bullets`: Bullet characters cycle — default: `"*+-"`
|
|
256
|
+
- `wrap`: Enable text wrapping — default: `false`
|
|
257
|
+
- `wrap_width`: Wrap at column — default: `80`
|
|
258
|
+
- `code_language`: Default fenced code block language — default: none
|
|
259
|
+
- `extract_metadata`: Embed metadata as YAML frontmatter — default: `false`
|
|
260
|
+
- `output_format`: Output markup format (`"markdown"` | `"djot"`) — default: `"markdown"`
|
|
201
261
|
|
|
202
|
-
|
|
262
|
+
**`MetadataConfig`** – Selective metadata extraction:
|
|
263
|
+
- `extract_headers`: h1-h6 elements — default: `true`
|
|
264
|
+
- `extract_links`: Hyperlinks — default: `true`
|
|
265
|
+
- `extract_images`: Image elements — default: `true`
|
|
266
|
+
- `extract_structured_data`: JSON-LD, Microdata, RDFa — default: `true`
|
|
267
|
+
- `max_structured_data_size`: Size limit in bytes — default: `100KB`
|
|
203
268
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
269
|
+
|
|
270
|
+
## Djot Output Format
|
|
271
|
+
|
|
272
|
+
The library supports converting HTML to [Djot](https://djot.net/), a lightweight markup language similar to Markdown but with a different syntax for some elements. Set `output_format` to `"djot"` to use this format.
|
|
273
|
+
|
|
274
|
+
### Syntax Differences
|
|
275
|
+
|
|
276
|
+
| Element | Markdown | Djot |
|
|
277
|
+
|---------|----------|------|
|
|
278
|
+
| Strong | `**text**` | `*text*` |
|
|
279
|
+
| Emphasis | `*text*` | `_text_` |
|
|
280
|
+
| Strikethrough | `~~text~~` | `{-text-}` |
|
|
281
|
+
| Inserted/Added | N/A | `{+text+}` |
|
|
282
|
+
| Highlighted | N/A | `{=text=}` |
|
|
283
|
+
| Subscript | N/A | `~text~` |
|
|
284
|
+
| Superscript | N/A | `^text^` |
|
|
285
|
+
|
|
286
|
+
### Example Usage
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
from html_to_markdown import convert, ConversionOptions
|
|
290
|
+
|
|
291
|
+
html = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
|
|
292
|
+
|
|
293
|
+
# Default Markdown output
|
|
294
|
+
markdown = convert(html)
|
|
295
|
+
# Result: "This is **bold** and *italic* text."
|
|
296
|
+
|
|
297
|
+
# Djot output
|
|
298
|
+
djot = convert(html, ConversionOptions(output_format="djot"))
|
|
299
|
+
# Result: "This is *bold* and _italic_ text."
|
|
220
300
|
```
|
|
221
301
|
|
|
222
|
-
|
|
302
|
+
Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
## Metadata Extraction
|
|
307
|
+
|
|
308
|
+
The metadata extraction feature enables comprehensive document analysis during conversion. Extract document properties, headers, links, images, and structured data in a single pass.
|
|
309
|
+
|
|
310
|
+
**Use Cases:**
|
|
311
|
+
- **SEO analysis** – Extract title, description, Open Graph tags, Twitter cards
|
|
312
|
+
- **Table of contents generation** – Build structured outlines from heading hierarchy
|
|
313
|
+
- **Content migration** – Document all external links and resources
|
|
314
|
+
- **Accessibility audits** – Check for images without alt text, empty links, invalid heading hierarchy
|
|
315
|
+
- **Link validation** – Classify and validate anchor, internal, external, email, and phone links
|
|
316
|
+
|
|
317
|
+
**Zero Overhead When Disabled:** Metadata extraction adds negligible overhead and happens during the HTML parsing pass. Disable unused metadata types in `MetadataConfig` to optimize further.
|
|
318
|
+
|
|
319
|
+
### Example: Quick Start
|
|
320
|
+
|
|
321
|
+
|
|
223
322
|
```typescript
|
|
224
|
-
|
|
225
|
-
extractHeaders?: boolean; // h1-h6 elements
|
|
226
|
-
extractLinks?: boolean; // <a> elements
|
|
227
|
-
extractImages?: boolean; // <img> and inline SVG
|
|
228
|
-
extractStructuredData?: boolean; // JSON-LD, Microdata, RDFa
|
|
229
|
-
maxStructuredDataSize?: number; // Size limit (default: 1MB)
|
|
230
|
-
}
|
|
323
|
+
import { convertWithMetadata } from 'html-to-markdown';
|
|
231
324
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
headers: JsHeaderMetadata[];
|
|
235
|
-
links: JsLinkMetadata[];
|
|
236
|
-
images: JsImageMetadata[];
|
|
237
|
-
structuredData: JsStructuredData[];
|
|
238
|
-
}
|
|
325
|
+
const html = '<h1>Article</h1><img src="test.jpg" alt="test">';
|
|
326
|
+
const { markdown, metadata } = convertWithMetadata(html);
|
|
239
327
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
canonicalUrl?: string;
|
|
246
|
-
language?: string;
|
|
247
|
-
textDirection?: 'ltr' | 'rtl' | 'auto';
|
|
248
|
-
openGraph: Record<string, string>;
|
|
249
|
-
twitterCard: Record<string, string>;
|
|
250
|
-
metaTags: Record<string, string>;
|
|
251
|
-
}
|
|
328
|
+
console.log(metadata.document.title); // Document title
|
|
329
|
+
console.log(metadata.headers); // All h1-h6 elements
|
|
330
|
+
console.log(metadata.links); // All hyperlinks
|
|
331
|
+
console.log(metadata.images); // All images with alt text
|
|
332
|
+
console.log(metadata.structuredData); // JSON-LD, Microdata, RDFa
|
|
252
333
|
```
|
|
253
334
|
|
|
254
|
-
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
For detailed examples including SEO extraction, table-of-contents generation, link validation, and accessibility audits, see the [Metadata Extraction Guide](../../examples/metadata-extraction/).
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
## Visitor Pattern
|
|
343
|
+
|
|
344
|
+
The visitor pattern enables custom HTML→Markdown conversion logic by providing callbacks for specific HTML elements during traversal. Use visitors to transform content, filter elements, validate structure, or collect analytics.
|
|
345
|
+
|
|
346
|
+
**Use Cases:**
|
|
347
|
+
- **Custom Markdown dialects** – Convert to Obsidian, Notion, or other flavors
|
|
348
|
+
- **Content filtering** – Remove tracking pixels, ads, or unwanted elements
|
|
349
|
+
- **URL rewriting** – Rewrite CDN URLs, add query parameters, validate links
|
|
350
|
+
- **Accessibility validation** – Check alt text, heading hierarchy, link text
|
|
351
|
+
- **Analytics** – Track element usage, link destinations, image sources
|
|
352
|
+
|
|
353
|
+
**Supported Visitor Methods:** 40+ callbacks for text, inline elements, links, images, headings, lists, blocks, and tables.
|
|
354
|
+
|
|
355
|
+
### Example: Quick Start
|
|
356
|
+
|
|
357
|
+
|
|
255
358
|
```typescript
|
|
256
|
-
|
|
257
|
-
visitText?(ctx: NodeContext, text: string): VisitResult;
|
|
258
|
-
visitLink?(ctx: NodeContext, href: string, text: string, title?: string): VisitResult;
|
|
259
|
-
visitImage?(ctx: NodeContext, src: string, alt?: string, title?: string): VisitResult;
|
|
260
|
-
visitHeading?(ctx: NodeContext, level: number, text: string, id?: string): VisitResult;
|
|
261
|
-
visitCodeBlock?(ctx: NodeContext, lang?: string, code?: string): VisitResult;
|
|
262
|
-
// ... 41 total methods for fine-grained control
|
|
263
|
-
}
|
|
359
|
+
import { convertWithVisitor, type Visitor, type NodeContext, type VisitResult } from 'html-to-markdown';
|
|
264
360
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
}
|
|
361
|
+
const visitor: Visitor = {
|
|
362
|
+
visitLink(ctx: NodeContext, href: string, text: string, title?: string): VisitResult {
|
|
363
|
+
// Rewrite CDN URLs
|
|
364
|
+
if (href.startsWith('https://old-cdn.com')) {
|
|
365
|
+
href = href.replace('https://old-cdn.com', 'https://new-cdn.com');
|
|
366
|
+
}
|
|
367
|
+
return { type: 'custom', output: `[${text}](${href})` };
|
|
368
|
+
},
|
|
274
369
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
370
|
+
visitImage(ctx: NodeContext, src: string, alt?: string, title?: string): VisitResult {
|
|
371
|
+
// Skip tracking pixels
|
|
372
|
+
if (src.includes('tracking')) {
|
|
373
|
+
return { type: 'skip' };
|
|
374
|
+
}
|
|
375
|
+
return { type: 'continue' };
|
|
376
|
+
},
|
|
377
|
+
};
|
|
282
378
|
|
|
283
|
-
|
|
379
|
+
const html = '<a href="https://old-cdn.com/file.pdf">Download</a>';
|
|
380
|
+
const markdown = convertWithVisitor(html, { visitor });
|
|
381
|
+
```
|
|
284
382
|
|
|
383
|
+
Async support:
|
|
285
384
|
```typescript
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
}
|
|
385
|
+
import { convertWithAsyncVisitor, type AsyncVisitor } from 'html-to-markdown';
|
|
386
|
+
|
|
387
|
+
const asyncVisitor: AsyncVisitor = {
|
|
388
|
+
async visitLink(ctx, href, text, title) {
|
|
389
|
+
const isValid = await validateUrl(href);
|
|
390
|
+
return isValid ? { type: 'continue' } : { type: 'error', message: `Broken link: ${href}` };
|
|
391
|
+
},
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
const markdown = await convertWithAsyncVisitor(html, { visitor: asyncVisitor });
|
|
293
395
|
```
|
|
294
396
|
|
|
295
|
-
Inputs with binary data (PDF bytes coerced to strings) raise errors with message: `Invalid input`.
|
|
296
397
|
|
|
297
|
-
## Examples
|
|
298
398
|
|
|
299
|
-
|
|
399
|
+
For comprehensive examples including content filtering, link footnotes, accessibility validation, and asynchronous URL validation, see the [Visitor Pattern Guide](../../examples/visitor-pattern/).
|
|
400
|
+
|
|
300
401
|
|
|
301
|
-
- **[Visitor Pattern](../../examples/visitor-pattern/)** - Custom callbacks, filtering, transformations, analytics
|
|
302
|
-
- **[Metadata Extraction](../../examples/metadata-extraction/)** - SEO metadata, TOC generation, link validation
|
|
303
|
-
- **[Performance](../../examples/performance/)** - Benchmarks, optimization strategies
|
|
304
402
|
|
|
305
|
-
##
|
|
403
|
+
## Examples
|
|
306
404
|
|
|
307
|
-
|
|
405
|
+
- [Visitor Pattern Guide](../../examples/visitor-pattern/)
|
|
406
|
+
- [Metadata Extraction Guide](../../examples/metadata-extraction/)
|
|
407
|
+
- [Performance Guide](../../examples/performance/)
|
|
308
408
|
|
|
309
|
-
|
|
310
|
-
{
|
|
311
|
-
"compilerOptions": {
|
|
312
|
-
"strict": true,
|
|
313
|
-
"noUncheckedIndexedAccess": true,
|
|
314
|
-
"exactOptionalPropertyTypes": true,
|
|
315
|
-
"noImplicitAny": true,
|
|
316
|
-
"noImplicitThis": true,
|
|
317
|
-
"strictNullChecks": true,
|
|
318
|
-
"strictFunctionTypes": true,
|
|
319
|
-
"strictPropertyInitialization": true,
|
|
320
|
-
"noImplicitReturns": true
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
```
|
|
409
|
+
## Links
|
|
324
410
|
|
|
325
|
-
|
|
411
|
+
- **GitHub:** [github.com/kreuzberg-dev/html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown)
|
|
326
412
|
|
|
327
|
-
|
|
413
|
+
- **npm:** [npmjs.com/@kreuzberg/html-to-markdown-node](https://www.npmjs.com/package/@kreuzberg/html-to-markdown-node)
|
|
414
|
+
- **WASM:** [npmjs.com/@kreuzberg/html-to-markdown-wasm](https://www.npmjs.com/package/@kreuzberg/html-to-markdown-wasm)
|
|
328
415
|
|
|
329
|
-
|
|
416
|
+
- **Kreuzberg Ecosystem:** [kreuzberg.dev](https://kreuzberg.dev)
|
|
417
|
+
- **Discord:** [discord.gg/pXxagNK2zN](https://discord.gg/pXxagNK2zN)
|
|
330
418
|
|
|
331
|
-
|
|
332
|
-
| ------------------- | ------- | ------- |
|
|
333
|
-
| Small (Intro) | 463 KB | 627 |
|
|
334
|
-
| Medium (Python) | 657 KB | 460 |
|
|
335
|
-
| Large (Rust) | 567 KB | 554 |
|
|
336
|
-
| Lists (Timeline) | 129 KB | 3,137 |
|
|
337
|
-
| Tables (Countries) | 360 KB | 932 |
|
|
419
|
+
## Contributing
|
|
338
420
|
|
|
339
|
-
|
|
421
|
+
We welcome contributions! Please see our [Contributing Guide](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/CONTRIBUTING.md) for details on:
|
|
340
422
|
|
|
341
|
-
|
|
423
|
+
- Setting up the development environment
|
|
424
|
+
- Running tests locally
|
|
425
|
+
- Submitting pull requests
|
|
426
|
+
- Reporting issues
|
|
427
|
+
|
|
428
|
+
All contributions must follow our code quality standards (enforced via pre-commit hooks):
|
|
342
429
|
|
|
343
|
-
-
|
|
344
|
-
-
|
|
345
|
-
-
|
|
346
|
-
- [Discord Community](https://discord.gg/pXxagNK2zN)
|
|
430
|
+
- Proper test coverage (Rust 95%+, language bindings 80%+)
|
|
431
|
+
- Formatting and linting checks
|
|
432
|
+
- Documentation for public APIs
|
|
347
433
|
|
|
348
434
|
## License
|
|
349
435
|
|
|
350
|
-
MIT
|
|
436
|
+
MIT License – see [LICENSE](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE).
|
|
437
|
+
|
|
438
|
+
## Support
|
|
439
|
+
|
|
440
|
+
If you find this library useful, consider [sponsoring the project](https://github.com/sponsors/kreuzberg-dev).
|
|
441
|
+
|
|
442
|
+
Have questions or run into issues? We're here to help:
|
|
443
|
+
|
|
444
|
+
- **GitHub Issues:** [github.com/kreuzberg-dev/html-to-markdown/issues](https://github.com/kreuzberg-dev/html-to-markdown/issues)
|
|
445
|
+
- **Discussions:** [github.com/kreuzberg-dev/html-to-markdown/discussions](https://github.com/kreuzberg-dev/html-to-markdown/discussions)
|
|
446
|
+
- **Discord Community:** [discord.gg/pXxagNK2zN](https://discord.gg/pXxagNK2zN)
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/index.d.ts
CHANGED
|
@@ -30,7 +30,7 @@ export declare function convertStreamWithInlineImages(stream: Readable | AsyncIt
|
|
|
30
30
|
* links, images, and structured data (JSON-LD, Microdata, RDFa).
|
|
31
31
|
*
|
|
32
32
|
* @param html HTML content to convert
|
|
33
|
-
* @param options Optional conversion configuration
|
|
33
|
+
* @param options Optional conversion configuration. Supports `skipImages` to skip image conversion
|
|
34
34
|
* @param metadataConfig Optional metadata extraction configuration
|
|
35
35
|
* @returns Object with converted markdown and extracted metadata
|
|
36
36
|
*
|
|
@@ -51,7 +51,7 @@ export declare function convertStreamWithInlineImages(stream: Readable | AsyncIt
|
|
|
51
51
|
* </html>
|
|
52
52
|
* `;
|
|
53
53
|
*
|
|
54
|
-
* const { markdown, metadata } =
|
|
54
|
+
* const { markdown, metadata } = convertWithMetadata(html, undefined, {
|
|
55
55
|
* extractHeaders: true,
|
|
56
56
|
* extractLinks: true,
|
|
57
57
|
* extractImages: true,
|
|
@@ -78,3 +78,69 @@ export declare function convertFileWithMetadata(filePath: string, options?: JsCo
|
|
|
78
78
|
* Convert HTML streamed from stdin or another readable stream with metadata extraction.
|
|
79
79
|
*/
|
|
80
80
|
export declare function convertStreamWithMetadata(stream: Readable | AsyncIterable<string | Buffer>, options?: JsConversionOptions | null | undefined, metadataConfig?: JsMetadataConfig | null | undefined): Promise<JsMetadataExtraction>;
|
|
81
|
+
/**
|
|
82
|
+
* Type for visitor callback that receives parsed context object
|
|
83
|
+
*/
|
|
84
|
+
type VisitorCallback<TContext = unknown, TResult = {
|
|
85
|
+
type: string;
|
|
86
|
+
output?: string;
|
|
87
|
+
}> = (context: TContext) => Promise<TResult>;
|
|
88
|
+
/**
|
|
89
|
+
* Type for wrapped visitor callback that handles JSON strings
|
|
90
|
+
*/
|
|
91
|
+
type WrappedVisitorCallback = (jsonString: string) => Promise<string>;
|
|
92
|
+
/**
|
|
93
|
+
* Wraps a single visitor callback to handle JSON serialization/deserialization automatically.
|
|
94
|
+
*
|
|
95
|
+
* The native NAPI bindings expect visitor callbacks with signature:
|
|
96
|
+
* `(jsonString: string) => Promise<string>`
|
|
97
|
+
*
|
|
98
|
+
* This wrapper allows you to write callbacks that receive parsed objects:
|
|
99
|
+
* `(context: NodeContext) => Promise<{type: string}>`
|
|
100
|
+
*
|
|
101
|
+
* @param callback - Visitor callback that receives parsed context object
|
|
102
|
+
* @returns Wrapped callback that handles JSON string conversion
|
|
103
|
+
*
|
|
104
|
+
* @example
|
|
105
|
+
* ```ts
|
|
106
|
+
* const wrappedCallback = wrapVisitorCallback(async (ctx) => {
|
|
107
|
+
* console.log('Tag name:', ctx.tagName);
|
|
108
|
+
* return { type: 'continue' };
|
|
109
|
+
* });
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
export declare function wrapVisitorCallback<TContext, TResult>(callback: VisitorCallback<TContext, TResult>): WrappedVisitorCallback;
|
|
113
|
+
/**
|
|
114
|
+
* Type for visitor object with callbacks that receive parsed objects
|
|
115
|
+
*/
|
|
116
|
+
type VisitorObject = Record<string, VisitorCallback>;
|
|
117
|
+
/**
|
|
118
|
+
* Type for wrapped visitor object with JSON-handling callbacks
|
|
119
|
+
*/
|
|
120
|
+
type WrappedVisitorObject = Record<string, WrappedVisitorCallback>;
|
|
121
|
+
/**
|
|
122
|
+
* Wraps all callbacks in a visitor object to handle JSON serialization/deserialization.
|
|
123
|
+
*
|
|
124
|
+
* This is a convenience function to wrap all callbacks in a visitor object at once.
|
|
125
|
+
*
|
|
126
|
+
* @param visitor - Visitor object with callbacks that receive parsed objects
|
|
127
|
+
* @returns Wrapped visitor object with JSON-handling callbacks
|
|
128
|
+
*
|
|
129
|
+
* @example
|
|
130
|
+
* ```ts
|
|
131
|
+
* const visitor = {
|
|
132
|
+
* visitElementStart: async (ctx: NodeContext) => {
|
|
133
|
+
* console.log('Tag:', ctx.tagName);
|
|
134
|
+
* return { type: 'continue' };
|
|
135
|
+
* },
|
|
136
|
+
* visitText: async (ctx: NodeContext, text: string) => {
|
|
137
|
+
* console.log('Text:', text);
|
|
138
|
+
* return { type: 'continue' };
|
|
139
|
+
* },
|
|
140
|
+
* };
|
|
141
|
+
*
|
|
142
|
+
* const wrapped = wrapVisitorCallbacks(visitor);
|
|
143
|
+
* const result = await convertWithVisitor(html, undefined, wrapped);
|
|
144
|
+
* ```
|
|
145
|
+
*/
|
|
146
|
+
export declare function wrapVisitorCallbacks(visitor: VisitorObject): WrappedVisitorObject;
|
package/dist/index.js
CHANGED
|
@@ -62,7 +62,7 @@ export async function convertStreamWithInlineImages(stream, options, imageConfig
|
|
|
62
62
|
* links, images, and structured data (JSON-LD, Microdata, RDFa).
|
|
63
63
|
*
|
|
64
64
|
* @param html HTML content to convert
|
|
65
|
-
* @param options Optional conversion configuration
|
|
65
|
+
* @param options Optional conversion configuration. Supports `skipImages` to skip image conversion
|
|
66
66
|
* @param metadataConfig Optional metadata extraction configuration
|
|
67
67
|
* @returns Object with converted markdown and extracted metadata
|
|
68
68
|
*
|
|
@@ -83,7 +83,7 @@ export async function convertStreamWithInlineImages(stream, options, imageConfig
|
|
|
83
83
|
* </html>
|
|
84
84
|
* `;
|
|
85
85
|
*
|
|
86
|
-
* const { markdown, metadata } =
|
|
86
|
+
* const { markdown, metadata } = convertWithMetadata(html, undefined, {
|
|
87
87
|
* extractHeaders: true,
|
|
88
88
|
* extractLinks: true,
|
|
89
89
|
* extractImages: true,
|
|
@@ -124,3 +124,64 @@ export async function convertStreamWithMetadata(stream, options, metadataConfig)
|
|
|
124
124
|
}
|
|
125
125
|
return convertWithMetadata(html, options ?? undefined, metadataConfig ?? undefined);
|
|
126
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Wraps a single visitor callback to handle JSON serialization/deserialization automatically.
|
|
129
|
+
*
|
|
130
|
+
* The native NAPI bindings expect visitor callbacks with signature:
|
|
131
|
+
* `(jsonString: string) => Promise<string>`
|
|
132
|
+
*
|
|
133
|
+
* This wrapper allows you to write callbacks that receive parsed objects:
|
|
134
|
+
* `(context: NodeContext) => Promise<{type: string}>`
|
|
135
|
+
*
|
|
136
|
+
* @param callback - Visitor callback that receives parsed context object
|
|
137
|
+
* @returns Wrapped callback that handles JSON string conversion
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* ```ts
|
|
141
|
+
* const wrappedCallback = wrapVisitorCallback(async (ctx) => {
|
|
142
|
+
* console.log('Tag name:', ctx.tagName);
|
|
143
|
+
* return { type: 'continue' };
|
|
144
|
+
* });
|
|
145
|
+
* ```
|
|
146
|
+
*/
|
|
147
|
+
export function wrapVisitorCallback(callback) {
|
|
148
|
+
return async (jsonString) => {
|
|
149
|
+
const context = JSON.parse(jsonString);
|
|
150
|
+
const result = await callback(context);
|
|
151
|
+
return JSON.stringify(result);
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Wraps all callbacks in a visitor object to handle JSON serialization/deserialization.
|
|
156
|
+
*
|
|
157
|
+
* This is a convenience function to wrap all callbacks in a visitor object at once.
|
|
158
|
+
*
|
|
159
|
+
* @param visitor - Visitor object with callbacks that receive parsed objects
|
|
160
|
+
* @returns Wrapped visitor object with JSON-handling callbacks
|
|
161
|
+
*
|
|
162
|
+
* @example
|
|
163
|
+
* ```ts
|
|
164
|
+
* const visitor = {
|
|
165
|
+
* visitElementStart: async (ctx: NodeContext) => {
|
|
166
|
+
* console.log('Tag:', ctx.tagName);
|
|
167
|
+
* return { type: 'continue' };
|
|
168
|
+
* },
|
|
169
|
+
* visitText: async (ctx: NodeContext, text: string) => {
|
|
170
|
+
* console.log('Text:', text);
|
|
171
|
+
* return { type: 'continue' };
|
|
172
|
+
* },
|
|
173
|
+
* };
|
|
174
|
+
*
|
|
175
|
+
* const wrapped = wrapVisitorCallbacks(visitor);
|
|
176
|
+
* const result = await convertWithVisitor(html, undefined, wrapped);
|
|
177
|
+
* ```
|
|
178
|
+
*/
|
|
179
|
+
export function wrapVisitorCallbacks(visitor) {
|
|
180
|
+
const wrapped = {};
|
|
181
|
+
for (const [methodName, callback] of Object.entries(visitor)) {
|
|
182
|
+
if (typeof callback === "function") {
|
|
183
|
+
wrapped[methodName] = wrapVisitorCallback(callback);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return wrapped;
|
|
187
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/html-to-markdown",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.23.4",
|
|
4
4
|
"description": "High-performance HTML to Markdown converter for TypeScript/Node.js with a Rust core.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"html",
|
|
@@ -39,30 +39,19 @@
|
|
|
39
39
|
"bin": {
|
|
40
40
|
"html-to-markdown": "./dist/cli.js"
|
|
41
41
|
},
|
|
42
|
-
"scripts": {
|
|
43
|
-
"build": "pnpm --filter @kreuzberg/html-to-markdown-node run build && tsc --project tsconfig.json",
|
|
44
|
-
"clean": "rm -rf dist",
|
|
45
|
-
"lint": "biome check src",
|
|
46
|
-
"lint:fix": "biome check --write src",
|
|
47
|
-
"format:fix": "biome check --write src",
|
|
48
|
-
"format:check": "biome check src",
|
|
49
|
-
"test": "pnpm --filter @kreuzberg/html-to-markdown-node run build && vitest run",
|
|
50
|
-
"test:coverage": "pnpm --filter @kreuzberg/html-to-markdown-node run build && vitest run --coverage",
|
|
51
|
-
"test:watch": "vitest"
|
|
52
|
-
},
|
|
53
42
|
"files": [
|
|
54
43
|
"dist",
|
|
55
44
|
"README.md"
|
|
56
45
|
],
|
|
57
46
|
"dependencies": {
|
|
58
|
-
"@kreuzberg/html-to-markdown-node": "2.
|
|
47
|
+
"@kreuzberg/html-to-markdown-node": "2.23.4"
|
|
59
48
|
},
|
|
60
49
|
"devDependencies": {
|
|
61
|
-
"@biomejs/biome": "^2.3.
|
|
62
|
-
"@types/node": "^25.0.
|
|
63
|
-
"@vitest/coverage-v8": "^4.0.
|
|
50
|
+
"@biomejs/biome": "^2.3.11",
|
|
51
|
+
"@types/node": "^25.0.9",
|
|
52
|
+
"@vitest/coverage-v8": "^4.0.17",
|
|
64
53
|
"typescript": "^5.9.3",
|
|
65
|
-
"vitest": "^4.0.
|
|
54
|
+
"vitest": "^4.0.17"
|
|
66
55
|
},
|
|
67
56
|
"engines": {
|
|
68
57
|
"node": ">=18"
|
|
@@ -70,5 +59,16 @@
|
|
|
70
59
|
"publishConfig": {
|
|
71
60
|
"registry": "https://registry.npmjs.org/",
|
|
72
61
|
"access": "public"
|
|
62
|
+
},
|
|
63
|
+
"scripts": {
|
|
64
|
+
"build": "pnpm --filter @kreuzberg/html-to-markdown-node run build && tsc --project tsconfig.json",
|
|
65
|
+
"clean": "rm -rf dist",
|
|
66
|
+
"lint": "biome check src",
|
|
67
|
+
"lint:fix": "biome check --write src",
|
|
68
|
+
"format:fix": "biome check --write src",
|
|
69
|
+
"format:check": "biome check src",
|
|
70
|
+
"test": "pnpm --filter @kreuzberg/html-to-markdown-node run build && vitest run",
|
|
71
|
+
"test:coverage": "pnpm --filter @kreuzberg/html-to-markdown-node run build && vitest run --coverage",
|
|
72
|
+
"test:watch": "vitest"
|
|
73
73
|
}
|
|
74
|
-
}
|
|
74
|
+
}
|