@kreuzberg/html-to-markdown-wasm 2.19.0-rc.1 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,11 +15,65 @@ Runs anywhere: Node.js, Deno, Bun, browsers, and edge runtimes.
15
15
  [![PyPI](https://img.shields.io/pypi/v/html-to-markdown.svg?logo=pypi)](https://pypi.org/project/html-to-markdown/)
16
16
  [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
17
17
  [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
18
- [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
19
- [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
18
+ [![NuGet](https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown.svg)](https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/)
19
+ [![Maven Central](https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown.svg)](https://central.sonatype.com/artifact/dev.kreuzberg/html-to-markdown)
20
20
  [![Go Reference](https://pkg.go.dev/badge/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown)
21
21
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE)
22
22
 
23
+ ## Migration Guide (v2.18.x → v2.19.0)
24
+
25
+ > **⚠️ BREAKING CHANGE: Package Namespace Update**
26
+ >
27
+ > In v2.19.0, the npm package namespace changed from `html-to-markdown-wasm` to `@kreuzberg/html-to-markdown-wasm` to reflect the new Kreuzberg.dev organization.
28
+
29
+ ### Install Updated Package
30
+
31
+ **Before (v2.18.x):**
32
+ ```bash
33
+ npm install html-to-markdown-wasm
34
+ ```
35
+
36
+ **After (v2.19.0+):**
37
+ ```bash
38
+ npm install @kreuzberg/html-to-markdown-wasm
39
+ ```
40
+
41
+ ### Update Import Statements
42
+
43
+ **Before:**
44
+ ```typescript
45
+ import { convert } from 'html-to-markdown-wasm';
46
+ // or
47
+ import { convert } from "npm:html-to-markdown-wasm"; // Deno
48
+ ```
49
+
50
+ **After:**
51
+ ```typescript
52
+ import { convert } from '@kreuzberg/html-to-markdown-wasm';
53
+ // or
54
+ import { convert } from "npm:@kreuzberg/html-to-markdown-wasm"; // Deno
55
+ ```
56
+
57
+ ### Update Browser ESM Imports
58
+
59
+ **Before:**
60
+ ```javascript
61
+ import init, { convert } from 'https://unpkg.com/html-to-markdown-wasm/dist-web/html_to_markdown_wasm.js';
62
+ ```
63
+
64
+ **After:**
65
+ ```javascript
66
+ import init, { convert } from 'https://unpkg.com/@kreuzberg/html-to-markdown-wasm/dist-web/html_to_markdown_wasm.js';
67
+ ```
68
+
69
+ ### Summary of Changes
70
+
71
+ - Package renamed from `html-to-markdown-wasm` to `@kreuzberg/html-to-markdown-wasm`
72
+ - All APIs remain identical
73
+ - Full backward compatibility after updating package name and imports
74
+
75
+ ---
76
+
23
77
  ## Performance
24
78
 
25
79
  Universal WebAssembly bindings with **excellent performance** across all JavaScript runtimes.
package/package.json CHANGED
@@ -1,68 +1,68 @@
1
1
  {
2
- "name": "@kreuzberg/html-to-markdown-wasm",
3
- "version": "2.19.0-rc.1",
4
- "description": "High-performance HTML to Markdown converter - WebAssembly bindings",
5
- "main": "dist/html_to_markdown_wasm.js",
6
- "types": "dist/html_to_markdown_wasm.d.ts",
7
- "exports": {
8
- ".": {
9
- "import": "./dist/html_to_markdown_wasm.js",
10
- "types": "./dist/html_to_markdown_wasm.d.ts",
11
- "default": "./dist/html_to_markdown_wasm.js"
12
- },
13
- "./dist-node": {
14
- "import": "./dist-node/html_to_markdown_wasm.js",
15
- "require": "./dist-node/html_to_markdown_wasm.js",
16
- "types": "./dist-node/html_to_markdown_wasm.d.ts"
17
- },
18
- "./dist-node/*": "./dist-node/*",
19
- "./dist-web": {
20
- "import": "./dist-web/html_to_markdown_wasm.js",
21
- "types": "./dist-web/html_to_markdown_wasm.d.ts"
22
- },
23
- "./dist-web/*": "./dist-web/*"
24
- },
25
- "repository": "https://github.com/kreuzberg-dev/html-to-markdown",
26
- "homepage": "https://github.com/kreuzberg-dev/html-to-markdown",
27
- "license": "MIT",
28
- "author": "Na'aman Hirschfeld <nhirschfeld@gmail.com>",
29
- "bugs": "https://github.com/kreuzberg-dev/html-to-markdown/issues",
30
- "keywords": [
31
- "html",
32
- "markdown",
33
- "converter",
34
- "rust",
35
- "wasm",
36
- "webassembly"
37
- ],
38
- "files": [
39
- "dist",
40
- "dist-node",
41
- "dist-web",
42
- "README.md"
43
- ],
44
- "scripts": {
45
- "build": "wasm-pack build --target bundler --out-dir dist && node ./scripts/patch-bundler-entry.js",
46
- "build:nodejs": "wasm-pack build --target nodejs --out-dir dist-node && node ./scripts/patch-bundler-entry.js dist-node --types-only",
47
- "build:web": "wasm-pack build --target web --out-dir dist-web && node ./scripts/patch-bundler-entry.js dist-web --types-only",
48
- "build:all": "pnpm run build && pnpm run build:nodejs && pnpm run build:web && pnpm run cleanup:gitignore",
49
- "cleanup:gitignore": "node ./scripts/cleanup-gitignore.js",
50
- "test": "vitest run",
51
- "test:watch": "vitest",
52
- "test:wasm-pack": "wasm-pack test --headless --chrome",
53
- "clean": "rm -rf dist dist-node dist-web node_modules pkg"
54
- },
55
- "devDependencies": {
56
- "@types/node": "^25.0.3",
57
- "tsx": "^4.21.0",
58
- "vitest": "^4.0.16",
59
- "wasm-pack": "^0.13.1"
60
- },
61
- "publishConfig": {
62
- "registry": "https://registry.npmjs.org/",
63
- "access": "public"
64
- },
65
- "dependencies": {
66
- "up": "^1.0.2"
67
- }
2
+ "name": "@kreuzberg/html-to-markdown-wasm",
3
+ "version": "2.19.0",
4
+ "description": "High-performance HTML to Markdown converter - WebAssembly bindings",
5
+ "main": "dist/html_to_markdown_wasm.js",
6
+ "types": "dist/html_to_markdown_wasm.d.ts",
7
+ "exports": {
8
+ ".": {
9
+ "import": "./dist/html_to_markdown_wasm.js",
10
+ "types": "./dist/html_to_markdown_wasm.d.ts",
11
+ "default": "./dist/html_to_markdown_wasm.js"
12
+ },
13
+ "./dist-node": {
14
+ "import": "./dist-node/html_to_markdown_wasm.js",
15
+ "require": "./dist-node/html_to_markdown_wasm.js",
16
+ "types": "./dist-node/html_to_markdown_wasm.d.ts"
17
+ },
18
+ "./dist-node/*": "./dist-node/*",
19
+ "./dist-web": {
20
+ "import": "./dist-web/html_to_markdown_wasm.js",
21
+ "types": "./dist-web/html_to_markdown_wasm.d.ts"
22
+ },
23
+ "./dist-web/*": "./dist-web/*"
24
+ },
25
+ "repository": "https://github.com/kreuzberg-dev/html-to-markdown",
26
+ "homepage": "https://github.com/kreuzberg-dev/html-to-markdown",
27
+ "license": "MIT",
28
+ "author": "Na'aman Hirschfeld <nhirschfeld@gmail.com>",
29
+ "bugs": "https://github.com/kreuzberg-dev/html-to-markdown/issues",
30
+ "keywords": [
31
+ "html",
32
+ "markdown",
33
+ "converter",
34
+ "rust",
35
+ "wasm",
36
+ "webassembly"
37
+ ],
38
+ "files": [
39
+ "dist",
40
+ "dist-node",
41
+ "dist-web",
42
+ "README.md"
43
+ ],
44
+ "scripts": {
45
+ "build": "wasm-pack build --target bundler --out-dir dist && node ./scripts/patch-bundler-entry.js",
46
+ "build:nodejs": "wasm-pack build --target nodejs --out-dir dist-node && node ./scripts/patch-bundler-entry.js dist-node --types-only",
47
+ "build:web": "wasm-pack build --target web --out-dir dist-web && node ./scripts/patch-bundler-entry.js dist-web --types-only",
48
+ "build:all": "pnpm run build && pnpm run build:nodejs && pnpm run build:web && pnpm run cleanup:gitignore",
49
+ "cleanup:gitignore": "node ./scripts/cleanup-gitignore.js",
50
+ "test": "vitest run",
51
+ "test:watch": "vitest",
52
+ "test:wasm-pack": "wasm-pack test --headless --chrome",
53
+ "clean": "rm -rf dist dist-node dist-web node_modules pkg"
54
+ },
55
+ "devDependencies": {
56
+ "@types/node": "^25.0.3",
57
+ "tsx": "^4.21.0",
58
+ "vitest": "^4.0.16",
59
+ "wasm-pack": "^0.13.1"
60
+ },
61
+ "publishConfig": {
62
+ "registry": "https://registry.npmjs.org/",
63
+ "access": "public"
64
+ },
65
+ "dependencies": {
66
+ "up": "^1.0.2"
67
+ }
68
68
  }
package/dist/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- The MIT License (MIT)
2
-
3
- Copyright 2024-2025 Na'aman Hirschfeld
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
package/dist/README.md DELETED
@@ -1,202 +0,0 @@
1
- # html-to-markdown
2
-
3
- <img width="1128" height="191" alt="Linkedin- Banner (1)" src="https://github.com/user-attachments/assets/f8e91036-20a5-40f9-9fcc-9e6c6e15f1f5" />
4
-
5
-
6
-
7
- High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
8
-
9
- Part of the Kreuzberg.dev document intelligence ecosystem. Kreuzberg is a polyglot document intelligence framework with a fast Rust core. We build tools that help developers extract, process, and understand documents at scale, from PDFs to Office files, images, archives, emails, in 50+ formats. We've set out to make high-performance document intelligence faster and more ecological.
10
-
11
- [![Crates.io](https://img.shields.io/crates/v/html-to-markdown-rs.svg?logo=rust&label=crates.io)](https://crates.io/crates/html-to-markdown-rs)
12
- [![npm (node)](https://img.shields.io/npm/v/%40kreuzberg%2Fhtml-to-markdown-node.svg?logo=npm)](https://www.npmjs.com/package/@kreuzberg/html-to-markdown-node)
13
- [![npm (wasm)](https://img.shields.io/npm/v/%40kreuzberg%2Fhtml-to-markdown-wasm.svg?logo=npm)](https://www.npmjs.com/package/@kreuzberg/html-to-markdown-wasm)
14
- [![PyPI](https://img.shields.io/pypi/v/html-to-markdown.svg?logo=pypi)](https://pypi.org/project/html-to-markdown/)
15
- [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
16
- [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
17
- [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
18
- [![NuGet](https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown.svg)](https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/)
19
- [![Maven Central](https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown.svg)](https://central.sonatype.com/artifact/dev.kreuzberg/html-to-markdown)
20
- [![Go Reference](https://pkg.go.dev/badge/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown)
21
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE)
22
- [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
23
-
24
-
25
- ---
26
-
27
- ## 🎮 **[Try the Live Demo →](https://kreuzberg-dev.github.io/html-to-markdown/)**
28
-
29
- Experience WebAssembly-powered HTML to Markdown conversion instantly in your browser. No installation needed!
30
-
31
- ---
32
-
33
- ## Why html-to-markdown?
34
-
35
- - **Blazing Fast**: Rust-powered core delivers 10-80× faster conversion than pure Python alternatives
36
- - **Universal**: Works everywhere - Node.js, Bun, Deno, browsers, Python, Rust, and standalone CLI
37
- - **Smart Conversion**: Handles complex documents including nested tables, code blocks, task lists, and hOCR OCR output
38
- - **Metadata Extraction**: Extract document metadata (title, description, headers, links, images) alongside conversion
39
- - **Highly Configurable**: Control heading styles, code block fences, list formatting, whitespace handling, and HTML sanitization
40
- - **Tag Preservation**: Keep specific HTML tags unconverted when markdown isn't expressive enough
41
- - **Secure by Default**: Built-in HTML sanitization prevents malicious content
42
- - **Consistent Output**: Identical markdown rendering across all language bindings
43
-
44
- ## Quick Start
45
-
46
- **Node.js / Bun (Native - Fastest):**
47
-
48
- ```typescript
49
- import { convert } from '@kreuzberg/html-to-markdown-node';
50
-
51
- const html = '<h1>Hello</h1><p>Rust ❤️ Markdown</p>';
52
- const markdown = convert(html, {
53
- headingStyle: 'Atx',
54
- codeBlockStyle: 'Backticks',
55
- wrap: true,
56
- preserveTags: ['table'],
57
- });
58
- ```
59
-
60
- **Python:**
61
-
62
- ```python
63
- from html_to_markdown import convert
64
-
65
- html = '<h1>Hello</h1><p>Rust ❤️ Markdown</p>'
66
- markdown = convert(html, heading_style='Atx', wrap=True)
67
- ```
68
-
69
- **Ruby:**
70
-
71
- ```ruby
72
- require 'html_to_markdown'
73
-
74
- html = '<h1>Hello</h1><p>Rust ❤️ Markdown</p>'
75
- markdown = HtmlToMarkdown.convert(html, heading_style: :atx, wrap: true)
76
- ```
77
-
78
- Full language guides: See [Language Guides](#language-guides) below.
79
-
80
- ## Installation
81
-
82
- | Target | Command(s) |
83
- | --------------------------- | ---------------------------------------------------------------------------------------------------------------- |
84
- | **Node.js/Bun** (native) | `npm install @kreuzberg/html-to-markdown-node` |
85
- | **WebAssembly** (universal) | `npm install @kreuzberg/html-to-markdown-wasm` |
86
- | **Deno** | `import { convert } from "npm:@kreuzberg/html-to-markdown-wasm"` |
87
- | **Python** (bindings + CLI) | `pip install html-to-markdown` |
88
- | **PHP** (extension + helpers) | `PHP_EXTENSION_DIR=$(php-config --extension-dir) pie install goldziher/html-to-markdown`<br>`composer require goldziher/html-to-markdown` |
89
- | **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
90
- | **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
91
- | **Rust** crate | `cargo add html-to-markdown-rs` |
92
- | **Java** (Maven) | `<groupId>dev.kreuzberg</groupId><artifactId>html-to-markdown</artifactId>` |
93
- | **C#/.NET** (NuGet) | `dotnet add package KreuzbergDev.HtmlToMarkdown` |
94
- | Rust CLI (crates.io) | `cargo install html-to-markdown-cli` |
95
- | Homebrew CLI | `brew install html-to-markdown` (core) |
96
- | Releases | [GitHub Releases](https://github.com/kreuzberg-dev/html-to-markdown/releases) |
97
-
98
- ## Performance
99
-
100
- Benchmarked on Apple M4 using the shared fixture harness in `tools/benchmark-harness`.
101
-
102
- ### Comparative Throughput (Median Across Fixtures)
103
-
104
- | Runtime | Median ops/sec | Median throughput (MB/s) | Peak memory (MB) | Successes |
105
- | ------- | -------------- | ------------------------ | ---------------- | --------- |
106
- | Rust | 1,060.3 | 116.4 | 171.3 | 56/56 |
107
- | Go | 1,496.3 | 131.1 | 22.9 | 16/16 |
108
- | Ruby | 2,155.5 | 300.4 | 280.3 | 48/48 |
109
- | PHP | 2,357.7 | 308.0 | 223.5 | 48/48 |
110
- | Elixir | 1,564.1 | 269.1 | 384.7 | 48/48 |
111
- | C# | 1,234.2 | 272.4 | 187.8 | 16/16 |
112
- | Java | 1,298.7 | 167.1 | 527.2 | 16/16 |
113
- | WASM | 1,485.8 | 157.6 | 95.3 | 48/48 |
114
- | Node.js (NAPI) | 2,054.2 | 306.5 | 95.4 | 48/48 |
115
- | Python (PyO3) | 3,120.3 | 307.5 | 83.5 | 48/48 |
116
-
117
- Use `task bench:harness` to regenerate throughput numbers. See [Performance Guide](./examples/performance/) for benchmarking strategies and optimization tips.
118
-
119
- ## Language Guides
120
-
121
- Complete documentation with examples for each language:
122
-
123
- - **Python** – [README](./packages/python/README.md) | PyO3 bindings, metadata extraction, inline images
124
- - **JavaScript/TypeScript** – [Node.js](./crates/html-to-markdown-node/README.md) | [TypeScript](./packages/typescript/README.md) | [WASM](./crates/html-to-markdown-wasm/README.md)
125
- - **Ruby** – [README](./packages/ruby/README.md) | Magnus bindings, RBS type definitions, Steep checking
126
- - **PHP** – [Package](./packages/php/README.md) | [Extension (PIE)](./packages/php-ext/README.md) | ext-php-rs extension
127
- - **Go** – [README](./packages/go/README.md) | FFI bindings with cgo
128
- - **Java** – [README](./packages/java/README.md) | Panama FFI, Maven/Gradle setup
129
- - **C#/.NET** – [README](./packages/csharp/README.md) | P/Invoke FFI, NuGet distribution
130
- - **Elixir** – [README](./packages/elixir/README.md) | Rustler NIF bindings
131
- - **Rust** – [README](./crates/html-to-markdown/README.md) | Core library, error handling, advanced features
132
-
133
- ## Feature Guides
134
-
135
- ### Visitor Pattern
136
- Customize HTML→Markdown conversion with callbacks for specific elements. Use cases: domain-specific dialects, content filtering, URL rewriting, accessibility validation.
137
-
138
- **→ [Full Guide with Examples](./examples/visitor-pattern/)** (Python, TypeScript, Ruby)
139
-
140
- ### Metadata Extraction
141
- Extract comprehensive metadata during conversion: title, description, headers, links, images, structured data. Use cases: SEO extraction, TOC generation, link validation, accessibility auditing, content migration.
142
-
143
- **→ [Full Guide with Examples](./examples/metadata-extraction/)** (Python, TypeScript, Ruby)
144
-
145
- ### Performance & Benchmarking
146
- Understand performance characteristics, run benchmarks, optimize for your use case. Includes benchmarking tools, memory profiling, streaming strategies, and optimization tips.
147
-
148
- **→ [Full Guide](./examples/performance/)**
149
-
150
- ## Examples
151
-
152
- Explore working code examples in multiple languages:
153
-
154
- | Example | Path | Languages |
155
- | ------- | ---- | --------- |
156
- | **Visitor Pattern** | [examples/visitor-pattern/](./examples/visitor-pattern/) | Python, TypeScript, Ruby |
157
- | **Metadata Extraction** | [examples/metadata-extraction/](./examples/metadata-extraction/) | Python, TypeScript, Ruby |
158
- | **Performance** | [examples/performance/](./examples/performance/) | Benchmarks, profiling, optimization |
159
-
160
- ## Testing
161
-
162
- Run the test suite locally:
163
-
164
- ```bash
165
- # All core test suites (Rust, Python, Ruby, Node, PHP, Go, C#, Elixir, Java)
166
- task test
167
-
168
- # Run the Wasmtime-backed WASM integration tests
169
- task wasm:test:wasmtime
170
- ```
171
-
172
- ## Compatibility & Migrations
173
-
174
- ### v2.19.0 Breaking Changes (Package Namespace Updates)
175
-
176
- Several language bindings were updated to use new namespaces and package owners:
177
-
178
- - **npm packages**: Scoped under `@kreuzberg` organization
179
- - Old: `html-to-markdown-node` → New: `@kreuzberg/html-to-markdown-node`
180
- - Old: `html-to-markdown-wasm` → New: `@kreuzberg/html-to-markdown-wasm`
181
- - **Java**: Package namespace changed from `io.github.goldziher` to `dev.kreuzberg`
182
- - **C#/.NET**: Package changed from `Goldziher.HtmlToMarkdown` to `KreuzbergDev.HtmlToMarkdown`
183
-
184
- See [MIGRATION.md](./MIGRATION.md) for step-by-step upgrade instructions for each language.
185
-
186
- ### v1 → v2 Compatibility
187
-
188
- - V2's Rust core sustains **150–210 MB/s** throughput; V1 averaged **≈ 2.5 MB/s** (60–80× faster).
189
- - Python compatibility shim available in `html_to_markdown.v1_compat` (deprecated; emits warnings; plan migrations now). See [Python README](./packages/python/README.md#v1-compatibility) for keyword mappings.
190
- - CLI flag changes and other breaking updates in [CHANGELOG](./CHANGELOG.md#breaking-changes).
191
-
192
- ## Community
193
-
194
- - **Discord** – [Join our community](https://discord.gg/pXxagNK2zN)
195
- - **Ecosystem** – Explore [Kreuzberg](https://kreuzberg.dev) document-processing tools
196
- - **Contribute** – [CONTRIBUTING.md](./CONTRIBUTING.md)
197
- - **Sponsor** – [GitHub Sponsors](https://github.com/sponsors/kreuzberg-dev)
198
- - **Changelog** – [Version history](./CHANGELOG.md)
199
-
200
- ## License
201
-
202
- MIT License – see [LICENSE](./LICENSE) for details.
@@ -1,200 +0,0 @@
1
- /* tslint:disable */
2
- /* eslint-disable */
3
-
4
- export class WasmConversionOptionsHandle {
5
- free(): void;
6
- [Symbol.dispose](): void;
7
- constructor(options?: WasmConversionOptions | null);
8
- }
9
-
10
- export class WasmHtmlExtraction {
11
- private constructor();
12
- free(): void;
13
- [Symbol.dispose](): void;
14
- readonly inlineImages: WasmInlineImage[];
15
- readonly markdown: string;
16
- readonly warnings: WasmInlineImageWarning[];
17
- }
18
-
19
- export class WasmInlineImage {
20
- private constructor();
21
- free(): void;
22
- [Symbol.dispose](): void;
23
- readonly attributes: Record<string, string>;
24
- readonly dimensions: Uint32Array | undefined;
25
- readonly description: string | undefined;
26
- readonly data: Uint8Array;
27
- readonly format: string;
28
- readonly source: string;
29
- readonly filename: string | undefined;
30
- }
31
-
32
- export class WasmInlineImageConfig {
33
- free(): void;
34
- [Symbol.dispose](): void;
35
- constructor(max_decoded_size_bytes?: number | null);
36
- set captureSvg(value: boolean);
37
- set filenamePrefix(value: string | null | undefined);
38
- set inferDimensions(value: boolean);
39
- }
40
-
41
- export class WasmInlineImageWarning {
42
- private constructor();
43
- free(): void;
44
- [Symbol.dispose](): void;
45
- readonly index: number;
46
- readonly message: string;
47
- }
48
-
49
- export class WasmMetadataConfig {
50
- free(): void;
51
- [Symbol.dispose](): void;
52
- /**
53
- * Create a new metadata configuration with defaults
54
- *
55
- * All extraction types enabled by default with 1MB structured data limit
56
- */
57
- constructor();
58
- extract_links: boolean;
59
- extract_images: boolean;
60
- extract_headers: boolean;
61
- extract_document: boolean;
62
- extract_structured_data: boolean;
63
- max_structured_data_size: number;
64
- }
65
-
66
- /**
67
- * Convert HTML to Markdown
68
- *
69
- * # Arguments
70
- *
71
- * * `html` - The HTML string to convert
72
- * * `options` - Optional conversion options (as a JavaScript object)
73
- *
74
- * # Example
75
- *
76
- * ```javascript
77
- * import { convert } from 'html-to-markdown-wasm';
78
- *
79
- * const html = '<h1>Hello World</h1>';
80
- * const markdown = convert(html);
81
- * console.log(markdown); // # Hello World
82
- * ```
83
- */
84
- export function convert(html: string, options?: WasmConversionOptions | null): string;
85
-
86
- export function convertBytes(html: Uint8Array, options?: WasmConversionOptions | null): string;
87
-
88
- export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
89
-
90
- /**
91
- * Convert HTML bytes to Markdown with metadata extraction
92
- *
93
- * # Arguments
94
- *
95
- * * `html` - The HTML bytes to convert
96
- * * `options` - Optional conversion options (as a JavaScript object)
97
- * * `metadata_config` - Metadata extraction configuration
98
- *
99
- * # Returns
100
- *
101
- * JavaScript object with `markdown` (string) and `metadata` (object) fields
102
- */
103
- export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
104
-
105
- export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
106
-
107
- export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
108
-
109
- /**
110
- * Convert HTML to Markdown with metadata extraction
111
- *
112
- * # Arguments
113
- *
114
- * * `html` - The HTML string to convert
115
- * * `options` - Optional conversion options (as a JavaScript object)
116
- * * `metadata_config` - Metadata extraction configuration
117
- *
118
- * # Returns
119
- *
120
- * JavaScript object with `markdown` (string) and `metadata` (object) fields
121
- *
122
- * # Example
123
- *
124
- * ```javascript
125
- * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
126
- *
127
- * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
128
- * const config = new WasmMetadataConfig();
129
- * config.extractHeaders = true;
130
- * config.extractLinks = true;
131
- *
132
- * const result = convertWithMetadata(html, null, config);
133
- * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
134
- * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
135
- * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
136
- * ```
137
- */
138
- export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
139
-
140
- export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
141
-
142
- export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
143
-
144
- /**
145
- * Initialize panic hook for better error messages in the browser
146
- */
147
- export function init(): void;
148
-
149
- export declare function initWasm(): Promise<void>;
150
- export declare const wasmReady: Promise<void>;
151
-
152
-
153
- export type WasmHeadingStyle = "underlined" | "atx" | "atxClosed";
154
- export type WasmListIndentType = "spaces" | "tabs";
155
- export type WasmWhitespaceMode = "normalized" | "strict";
156
- export type WasmNewlineStyle = "spaces" | "backslash";
157
- export type WasmCodeBlockStyle = "indented" | "backticks" | "tildes";
158
- export type WasmHighlightStyle = "doubleEqual" | "html" | "bold" | "none";
159
- export type WasmPreprocessingPreset = "minimal" | "standard" | "aggressive";
160
-
161
- export interface WasmPreprocessingOptions {
162
- enabled?: boolean;
163
- preset?: WasmPreprocessingPreset;
164
- removeNavigation?: boolean;
165
- removeForms?: boolean;
166
- }
167
-
168
- export interface WasmConversionOptions {
169
- headingStyle?: WasmHeadingStyle;
170
- listIndentType?: WasmListIndentType;
171
- listIndentWidth?: number;
172
- bullets?: string;
173
- strongEmSymbol?: string;
174
- escapeAsterisks?: boolean;
175
- escapeUnderscores?: boolean;
176
- escapeMisc?: boolean;
177
- escapeAscii?: boolean;
178
- codeLanguage?: string;
179
- autolinks?: boolean;
180
- defaultTitle?: boolean;
181
- brInTables?: boolean;
182
- hocrSpatialTables?: boolean;
183
- highlightStyle?: WasmHighlightStyle;
184
- extractMetadata?: boolean;
185
- whitespaceMode?: WasmWhitespaceMode;
186
- stripNewlines?: boolean;
187
- wrap?: boolean;
188
- wrapWidth?: number;
189
- convertAsInline?: boolean;
190
- subSymbol?: string;
191
- supSymbol?: string;
192
- newlineStyle?: WasmNewlineStyle;
193
- codeBlockStyle?: WasmCodeBlockStyle;
194
- keepInlineImagesIn?: string[];
195
- preprocessing?: WasmPreprocessingOptions | null;
196
- encoding?: string;
197
- debug?: boolean;
198
- stripTags?: string[];
199
- preserveTags?: string[];
200
- }