@kreuzberg/html-to-markdown-wasm 2.19.0-rc.1 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,55 +0,0 @@
1
- /* tslint:disable */
2
- /* eslint-disable */
3
- export const memory: WebAssembly.Memory;
4
- export const __wbg_wasmconversionoptionshandle_free: (a: number, b: number) => void;
5
- export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
6
- export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
7
- export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
8
- export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
9
- export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
10
- export const convert: (a: number, b: number, c: number, d: number) => void;
11
- export const convertBytes: (a: number, b: number, c: number) => void;
12
- export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
13
- export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
14
- export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
15
- export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
- export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
17
- export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
18
- export const createConversionOptionsHandle: (a: number, b: number) => void;
19
- export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
20
- export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
21
- export const wasmhtmlextraction_markdown: (a: number, b: number) => void;
22
- export const wasmhtmlextraction_warnings: (a: number, b: number) => void;
23
- export const wasminlineimage_attributes: (a: number) => number;
24
- export const wasminlineimage_data: (a: number) => number;
25
- export const wasminlineimage_description: (a: number, b: number) => void;
26
- export const wasminlineimage_dimensions: (a: number, b: number) => void;
27
- export const wasminlineimage_filename: (a: number, b: number) => void;
28
- export const wasminlineimage_format: (a: number, b: number) => void;
29
- export const wasminlineimage_source: (a: number, b: number) => void;
30
- export const wasminlineimageconfig_new: (a: number, b: number) => number;
31
- export const wasminlineimageconfig_set_captureSvg: (a: number, b: number) => void;
32
- export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c: number) => void;
33
- export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
34
- export const wasminlineimagewarning_index: (a: number) => number;
35
- export const wasminlineimagewarning_message: (a: number, b: number) => void;
36
- export const wasmmetadataconfig_extract_document: (a: number) => number;
37
- export const wasmmetadataconfig_extract_headers: (a: number) => number;
38
- export const wasmmetadataconfig_extract_images: (a: number) => number;
39
- export const wasmmetadataconfig_extract_links: (a: number) => number;
40
- export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
41
- export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
42
- export const wasmmetadataconfig_new: () => number;
43
- export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
44
- export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
45
- export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
46
- export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
47
- export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
48
- export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
49
- export const init: () => void;
50
- export const __wbindgen_export: (a: number, b: number) => number;
51
- export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
52
- export const __wbindgen_export3: (a: number) => void;
53
- export const __wbindgen_export4: (a: number, b: number, c: number) => void;
54
- export const __wbindgen_add_to_stack_pointer: (a: number) => number;
55
- export const __wbindgen_start: () => void;
@@ -1,21 +0,0 @@
1
- {
2
- "name": "html-to-markdown-wasm",
3
- "collaborators": [
4
- "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
5
- ],
6
- "description": "HTML to Markdown conversion for WebAssembly targets",
7
- "version": "2.19.0-rc.1",
8
- "license": "MIT",
9
- "repository": {
10
- "type": "git",
11
- "url": "https://github.com/kreuzberg-dev/html-to-markdown"
12
- },
13
- "files": [
14
- "html_to_markdown_wasm_bg.wasm",
15
- "html_to_markdown_wasm.js",
16
- "html_to_markdown_wasm.d.ts"
17
- ],
18
- "main": "html_to_markdown_wasm.js",
19
- "homepage": "https://github.com/kreuzberg-dev/html-to-markdown",
20
- "types": "html_to_markdown_wasm.d.ts"
21
- }
package/dist-web/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- The MIT License (MIT)
2
-
3
- Copyright 2024-2025 Na'aman Hirschfeld
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
@@ -1,202 +0,0 @@
1
- # html-to-markdown
2
-
3
- <img width="1128" height="191" alt="Linkedin- Banner (1)" src="https://github.com/user-attachments/assets/f8e91036-20a5-40f9-9fcc-9e6c6e15f1f5" />
4
-
5
-
6
-
7
- High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behaviour.
8
-
9
- Part of the Kreuzberg.dev document intelligence ecosystem. Kreuzberg is a polyglot document intelligence framework with a fast Rust core. We build tools that help developers extract, process, and understand documents at scale, from PDFs to Office files, images, archives, emails, in 50+ formats. We've set out to make high-performance document intelligence faster and more ecological.
10
-
11
- [![Crates.io](https://img.shields.io/crates/v/html-to-markdown-rs.svg?logo=rust&label=crates.io)](https://crates.io/crates/html-to-markdown-rs)
12
- [![npm (node)](https://img.shields.io/npm/v/%40kreuzberg%2Fhtml-to-markdown-node.svg?logo=npm)](https://www.npmjs.com/package/@kreuzberg/html-to-markdown-node)
13
- [![npm (wasm)](https://img.shields.io/npm/v/%40kreuzberg%2Fhtml-to-markdown-wasm.svg?logo=npm)](https://www.npmjs.com/package/@kreuzberg/html-to-markdown-wasm)
14
- [![PyPI](https://img.shields.io/pypi/v/html-to-markdown.svg?logo=pypi)](https://pypi.org/project/html-to-markdown/)
15
- [![Packagist](https://img.shields.io/packagist/v/goldziher/html-to-markdown.svg)](https://packagist.org/packages/goldziher/html-to-markdown)
16
- [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
17
- [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
18
- [![NuGet](https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown.svg)](https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/)
19
- [![Maven Central](https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown.svg)](https://central.sonatype.com/artifact/dev.kreuzberg/html-to-markdown)
20
- [![Go Reference](https://pkg.go.dev/badge/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown)
21
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE)
22
- [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
23
-
24
-
25
- ---
26
-
27
- ## 🎮 **[Try the Live Demo →](https://kreuzberg-dev.github.io/html-to-markdown/)**
28
-
29
- Experience WebAssembly-powered HTML to Markdown conversion instantly in your browser. No installation needed!
30
-
31
- ---
32
-
33
- ## Why html-to-markdown?
34
-
35
- - **Blazing Fast**: Rust-powered core delivers 10-80× faster conversion than pure Python alternatives
36
- - **Universal**: Works everywhere - Node.js, Bun, Deno, browsers, Python, Rust, and standalone CLI
37
- - **Smart Conversion**: Handles complex documents including nested tables, code blocks, task lists, and hOCR OCR output
38
- - **Metadata Extraction**: Extract document metadata (title, description, headers, links, images) alongside conversion
39
- - **Highly Configurable**: Control heading styles, code block fences, list formatting, whitespace handling, and HTML sanitization
40
- - **Tag Preservation**: Keep specific HTML tags unconverted when markdown isn't expressive enough
41
- - **Secure by Default**: Built-in HTML sanitization prevents malicious content
42
- - **Consistent Output**: Identical markdown rendering across all language bindings
43
-
44
- ## Quick Start
45
-
46
- **Node.js / Bun (Native - Fastest):**
47
-
48
- ```typescript
49
- import { convert } from '@kreuzberg/html-to-markdown-node';
50
-
51
- const html = '<h1>Hello</h1><p>Rust ❤️ Markdown</p>';
52
- const markdown = convert(html, {
53
- headingStyle: 'Atx',
54
- codeBlockStyle: 'Backticks',
55
- wrap: true,
56
- preserveTags: ['table'],
57
- });
58
- ```
59
-
60
- **Python:**
61
-
62
- ```python
63
- from html_to_markdown import convert
64
-
65
- html = '<h1>Hello</h1><p>Rust ❤️ Markdown</p>'
66
- markdown = convert(html, heading_style='Atx', wrap=True)
67
- ```
68
-
69
- **Ruby:**
70
-
71
- ```ruby
72
- require 'html_to_markdown'
73
-
74
- html = '<h1>Hello</h1><p>Rust ❤️ Markdown</p>'
75
- markdown = HtmlToMarkdown.convert(html, heading_style: :atx, wrap: true)
76
- ```
77
-
78
- Full language guides: See [Language Guides](#language-guides) below.
79
-
80
- ## Installation
81
-
82
- | Target | Command(s) |
83
- | --------------------------- | ---------------------------------------------------------------------------------------------------------------- |
84
- | **Node.js/Bun** (native) | `npm install @kreuzberg/html-to-markdown-node` |
85
- | **WebAssembly** (universal) | `npm install @kreuzberg/html-to-markdown-wasm` |
86
- | **Deno** | `import { convert } from "npm:@kreuzberg/html-to-markdown-wasm"` |
87
- | **Python** (bindings + CLI) | `pip install html-to-markdown` |
88
- | **PHP** (extension + helpers) | `PHP_EXTENSION_DIR=$(php-config --extension-dir) pie install goldziher/html-to-markdown`<br>`composer require goldziher/html-to-markdown` |
89
- | **Ruby** gem | `bundle add html-to-markdown` or `gem install html-to-markdown` |
90
- | **Elixir** (Rustler NIF) | `{:html_to_markdown, "~> 2.8"}` |
91
- | **Rust** crate | `cargo add html-to-markdown-rs` |
92
- | **Java** (Maven) | `<groupId>dev.kreuzberg</groupId><artifactId>html-to-markdown</artifactId>` |
93
- | **C#/.NET** (NuGet) | `dotnet add package KreuzbergDev.HtmlToMarkdown` |
94
- | Rust CLI (crates.io) | `cargo install html-to-markdown-cli` |
95
- | Homebrew CLI | `brew install html-to-markdown` (core) |
96
- | Releases | [GitHub Releases](https://github.com/kreuzberg-dev/html-to-markdown/releases) |
97
-
98
- ## Performance
99
-
100
- Benchmarked on Apple M4 using the shared fixture harness in `tools/benchmark-harness`.
101
-
102
- ### Comparative Throughput (Median Across Fixtures)
103
-
104
- | Runtime | Median ops/sec | Median throughput (MB/s) | Peak memory (MB) | Successes |
105
- | ------- | -------------- | ------------------------ | ---------------- | --------- |
106
- | Rust | 1,060.3 | 116.4 | 171.3 | 56/56 |
107
- | Go | 1,496.3 | 131.1 | 22.9 | 16/16 |
108
- | Ruby | 2,155.5 | 300.4 | 280.3 | 48/48 |
109
- | PHP | 2,357.7 | 308.0 | 223.5 | 48/48 |
110
- | Elixir | 1,564.1 | 269.1 | 384.7 | 48/48 |
111
- | C# | 1,234.2 | 272.4 | 187.8 | 16/16 |
112
- | Java | 1,298.7 | 167.1 | 527.2 | 16/16 |
113
- | WASM | 1,485.8 | 157.6 | 95.3 | 48/48 |
114
- | Node.js (NAPI) | 2,054.2 | 306.5 | 95.4 | 48/48 |
115
- | Python (PyO3) | 3,120.3 | 307.5 | 83.5 | 48/48 |
116
-
117
- Use `task bench:harness` to regenerate throughput numbers. See [Performance Guide](./examples/performance/) for benchmarking strategies and optimization tips.
118
-
119
- ## Language Guides
120
-
121
- Complete documentation with examples for each language:
122
-
123
- - **Python** – [README](./packages/python/README.md) | PyO3 bindings, metadata extraction, inline images
124
- - **JavaScript/TypeScript** – [Node.js](./crates/html-to-markdown-node/README.md) | [TypeScript](./packages/typescript/README.md) | [WASM](./crates/html-to-markdown-wasm/README.md)
125
- - **Ruby** – [README](./packages/ruby/README.md) | Magnus bindings, RBS type definitions, Steep checking
126
- - **PHP** – [Package](./packages/php/README.md) | [Extension (PIE)](./packages/php-ext/README.md) | ext-php-rs extension
127
- - **Go** – [README](./packages/go/README.md) | FFI bindings with cgo
128
- - **Java** – [README](./packages/java/README.md) | Panama FFI, Maven/Gradle setup
129
- - **C#/.NET** – [README](./packages/csharp/README.md) | P/Invoke FFI, NuGet distribution
130
- - **Elixir** – [README](./packages/elixir/README.md) | Rustler NIF bindings
131
- - **Rust** – [README](./crates/html-to-markdown/README.md) | Core library, error handling, advanced features
132
-
133
- ## Feature Guides
134
-
135
- ### Visitor Pattern
136
- Customize HTML→Markdown conversion with callbacks for specific elements. Use cases: domain-specific dialects, content filtering, URL rewriting, accessibility validation.
137
-
138
- **→ [Full Guide with Examples](./examples/visitor-pattern/)** (Python, TypeScript, Ruby)
139
-
140
- ### Metadata Extraction
141
- Extract comprehensive metadata during conversion: title, description, headers, links, images, structured data. Use cases: SEO extraction, TOC generation, link validation, accessibility auditing, content migration.
142
-
143
- **→ [Full Guide with Examples](./examples/metadata-extraction/)** (Python, TypeScript, Ruby)
144
-
145
- ### Performance & Benchmarking
146
- Understand performance characteristics, run benchmarks, optimize for your use case. Includes benchmarking tools, memory profiling, streaming strategies, and optimization tips.
147
-
148
- **→ [Full Guide](./examples/performance/)**
149
-
150
- ## Examples
151
-
152
- Explore working code examples in multiple languages:
153
-
154
- | Example | Path | Languages |
155
- | ------- | ---- | --------- |
156
- | **Visitor Pattern** | [examples/visitor-pattern/](./examples/visitor-pattern/) | Python, TypeScript, Ruby |
157
- | **Metadata Extraction** | [examples/metadata-extraction/](./examples/metadata-extraction/) | Python, TypeScript, Ruby |
158
- | **Performance** | [examples/performance/](./examples/performance/) | Benchmarks, profiling, optimization |
159
-
160
- ## Testing
161
-
162
- Run the test suite locally:
163
-
164
- ```bash
165
- # All core test suites (Rust, Python, Ruby, Node, PHP, Go, C#, Elixir, Java)
166
- task test
167
-
168
- # Run the Wasmtime-backed WASM integration tests
169
- task wasm:test:wasmtime
170
- ```
171
-
172
- ## Compatibility & Migrations
173
-
174
- ### v2.19.0 Breaking Changes (Package Namespace Updates)
175
-
176
- Several language bindings were updated to use new namespaces and package owners:
177
-
178
- - **npm packages**: Scoped under `@kreuzberg` organization
179
- - Old: `html-to-markdown-node` → New: `@kreuzberg/html-to-markdown-node`
180
- - Old: `html-to-markdown-wasm` → New: `@kreuzberg/html-to-markdown-wasm`
181
- - **Java**: Package namespace changed from `io.github.goldziher` to `dev.kreuzberg`
182
- - **C#/.NET**: Package changed from `Goldziher.HtmlToMarkdown` to `KreuzbergDev.HtmlToMarkdown`
183
-
184
- See [MIGRATION.md](./MIGRATION.md) for step-by-step upgrade instructions for each language.
185
-
186
- ### v1 → v2 Compatibility
187
-
188
- - V2's Rust core sustains **150–210 MB/s** throughput; V1 averaged **≈ 2.5 MB/s** (60–80× faster).
189
- - Python compatibility shim available in `html_to_markdown.v1_compat` (deprecated; emits warnings; plan migrations now). See [Python README](./packages/python/README.md#v1-compatibility) for keyword mappings.
190
- - CLI flag changes and other breaking updates in [CHANGELOG](./CHANGELOG.md#breaking-changes).
191
-
192
- ## Community
193
-
194
- - **Discord** – [Join our community](https://discord.gg/pXxagNK2zN)
195
- - **Ecosystem** – Explore [Kreuzberg](https://kreuzberg.dev) document-processing tools
196
- - **Contribute** – [CONTRIBUTING.md](./CONTRIBUTING.md)
197
- - **Sponsor** – [GitHub Sponsors](https://github.com/sponsors/kreuzberg-dev)
198
- - **Changelog** – [Version history](./CHANGELOG.md)
199
-
200
- ## License
201
-
202
- MIT License – see [LICENSE](./LICENSE) for details.
@@ -1,277 +0,0 @@
1
- /* tslint:disable */
2
- /* eslint-disable */
3
-
4
- export class WasmConversionOptionsHandle {
5
- free(): void;
6
- [Symbol.dispose](): void;
7
- constructor(options?: WasmConversionOptions | null);
8
- }
9
-
10
- export class WasmHtmlExtraction {
11
- private constructor();
12
- free(): void;
13
- [Symbol.dispose](): void;
14
- readonly inlineImages: WasmInlineImage[];
15
- readonly markdown: string;
16
- readonly warnings: WasmInlineImageWarning[];
17
- }
18
-
19
- export class WasmInlineImage {
20
- private constructor();
21
- free(): void;
22
- [Symbol.dispose](): void;
23
- readonly attributes: Record<string, string>;
24
- readonly dimensions: Uint32Array | undefined;
25
- readonly description: string | undefined;
26
- readonly data: Uint8Array;
27
- readonly format: string;
28
- readonly source: string;
29
- readonly filename: string | undefined;
30
- }
31
-
32
- export class WasmInlineImageConfig {
33
- free(): void;
34
- [Symbol.dispose](): void;
35
- constructor(max_decoded_size_bytes?: number | null);
36
- set captureSvg(value: boolean);
37
- set filenamePrefix(value: string | null | undefined);
38
- set inferDimensions(value: boolean);
39
- }
40
-
41
- export class WasmInlineImageWarning {
42
- private constructor();
43
- free(): void;
44
- [Symbol.dispose](): void;
45
- readonly index: number;
46
- readonly message: string;
47
- }
48
-
49
- export class WasmMetadataConfig {
50
- free(): void;
51
- [Symbol.dispose](): void;
52
- /**
53
- * Create a new metadata configuration with defaults
54
- *
55
- * All extraction types enabled by default with 1MB structured data limit
56
- */
57
- constructor();
58
- extract_links: boolean;
59
- extract_images: boolean;
60
- extract_headers: boolean;
61
- extract_document: boolean;
62
- extract_structured_data: boolean;
63
- max_structured_data_size: number;
64
- }
65
-
66
- /**
67
- * Convert HTML to Markdown
68
- *
69
- * # Arguments
70
- *
71
- * * `html` - The HTML string to convert
72
- * * `options` - Optional conversion options (as a JavaScript object)
73
- *
74
- * # Example
75
- *
76
- * ```javascript
77
- * import { convert } from 'html-to-markdown-wasm';
78
- *
79
- * const html = '<h1>Hello World</h1>';
80
- * const markdown = convert(html);
81
- * console.log(markdown); // # Hello World
82
- * ```
83
- */
84
- export function convert(html: string, options?: WasmConversionOptions | null): string;
85
-
86
- export function convertBytes(html: Uint8Array, options?: WasmConversionOptions | null): string;
87
-
88
- export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
89
-
90
- /**
91
- * Convert HTML bytes to Markdown with metadata extraction
92
- *
93
- * # Arguments
94
- *
95
- * * `html` - The HTML bytes to convert
96
- * * `options` - Optional conversion options (as a JavaScript object)
97
- * * `metadata_config` - Metadata extraction configuration
98
- *
99
- * # Returns
100
- *
101
- * JavaScript object with `markdown` (string) and `metadata` (object) fields
102
- */
103
- export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
104
-
105
- export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
106
-
107
- export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
108
-
109
- /**
110
- * Convert HTML to Markdown with metadata extraction
111
- *
112
- * # Arguments
113
- *
114
- * * `html` - The HTML string to convert
115
- * * `options` - Optional conversion options (as a JavaScript object)
116
- * * `metadata_config` - Metadata extraction configuration
117
- *
118
- * # Returns
119
- *
120
- * JavaScript object with `markdown` (string) and `metadata` (object) fields
121
- *
122
- * # Example
123
- *
124
- * ```javascript
125
- * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
126
- *
127
- * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
128
- * const config = new WasmMetadataConfig();
129
- * config.extractHeaders = true;
130
- * config.extractLinks = true;
131
- *
132
- * const result = convertWithMetadata(html, null, config);
133
- * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
134
- * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
135
- * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
136
- * ```
137
- */
138
- export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
139
-
140
- export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
141
-
142
- export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
143
-
144
- /**
145
- * Initialize panic hook for better error messages in the browser
146
- */
147
- export function init(): void;
148
-
149
- export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
150
-
151
- export interface InitOutput {
152
- readonly memory: WebAssembly.Memory;
153
- readonly __wbg_wasmconversionoptionshandle_free: (a: number, b: number) => void;
154
- readonly __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
155
- readonly __wbg_wasminlineimage_free: (a: number, b: number) => void;
156
- readonly __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
157
- readonly __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
158
- readonly __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
159
- readonly convert: (a: number, b: number, c: number, d: number) => void;
160
- readonly convertBytes: (a: number, b: number, c: number) => void;
161
- readonly convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
162
- readonly convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
163
- readonly convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
164
- readonly convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
165
- readonly convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
166
- readonly convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
167
- readonly createConversionOptionsHandle: (a: number, b: number) => void;
168
- readonly wasmconversionoptionshandle_new: (a: number, b: number) => void;
169
- readonly wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
170
- readonly wasmhtmlextraction_markdown: (a: number, b: number) => void;
171
- readonly wasmhtmlextraction_warnings: (a: number, b: number) => void;
172
- readonly wasminlineimage_attributes: (a: number) => number;
173
- readonly wasminlineimage_data: (a: number) => number;
174
- readonly wasminlineimage_description: (a: number, b: number) => void;
175
- readonly wasminlineimage_dimensions: (a: number, b: number) => void;
176
- readonly wasminlineimage_filename: (a: number, b: number) => void;
177
- readonly wasminlineimage_format: (a: number, b: number) => void;
178
- readonly wasminlineimage_source: (a: number, b: number) => void;
179
- readonly wasminlineimageconfig_new: (a: number, b: number) => number;
180
- readonly wasminlineimageconfig_set_captureSvg: (a: number, b: number) => void;
181
- readonly wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c: number) => void;
182
- readonly wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
183
- readonly wasminlineimagewarning_index: (a: number) => number;
184
- readonly wasminlineimagewarning_message: (a: number, b: number) => void;
185
- readonly wasmmetadataconfig_extract_document: (a: number) => number;
186
- readonly wasmmetadataconfig_extract_headers: (a: number) => number;
187
- readonly wasmmetadataconfig_extract_images: (a: number) => number;
188
- readonly wasmmetadataconfig_extract_links: (a: number) => number;
189
- readonly wasmmetadataconfig_extract_structured_data: (a: number) => number;
190
- readonly wasmmetadataconfig_max_structured_data_size: (a: number) => number;
191
- readonly wasmmetadataconfig_new: () => number;
192
- readonly wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
193
- readonly wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
194
- readonly wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
195
- readonly wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
196
- readonly wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
197
- readonly wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
198
- readonly init: () => void;
199
- readonly __wbindgen_export: (a: number, b: number) => number;
200
- readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
201
- readonly __wbindgen_export3: (a: number) => void;
202
- readonly __wbindgen_export4: (a: number, b: number, c: number) => void;
203
- readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
204
- readonly __wbindgen_start: () => void;
205
- }
206
-
207
- export type SyncInitInput = BufferSource | WebAssembly.Module;
208
-
209
- /**
210
- * Instantiates the given `module`, which can either be bytes or
211
- * a precompiled `WebAssembly.Module`.
212
- *
213
- * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
214
- *
215
- * @returns {InitOutput}
216
- */
217
- export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
218
-
219
- /**
220
- * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
221
- * for everything else, calls `WebAssembly.instantiate` directly.
222
- *
223
- * @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
224
- *
225
- * @returns {Promise<InitOutput>}
226
- */
227
- export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
228
-
229
-
230
- export type WasmHeadingStyle = "underlined" | "atx" | "atxClosed";
231
- export type WasmListIndentType = "spaces" | "tabs";
232
- export type WasmWhitespaceMode = "normalized" | "strict";
233
- export type WasmNewlineStyle = "spaces" | "backslash";
234
- export type WasmCodeBlockStyle = "indented" | "backticks" | "tildes";
235
- export type WasmHighlightStyle = "doubleEqual" | "html" | "bold" | "none";
236
- export type WasmPreprocessingPreset = "minimal" | "standard" | "aggressive";
237
-
238
- export interface WasmPreprocessingOptions {
239
- enabled?: boolean;
240
- preset?: WasmPreprocessingPreset;
241
- removeNavigation?: boolean;
242
- removeForms?: boolean;
243
- }
244
-
245
- export interface WasmConversionOptions {
246
- headingStyle?: WasmHeadingStyle;
247
- listIndentType?: WasmListIndentType;
248
- listIndentWidth?: number;
249
- bullets?: string;
250
- strongEmSymbol?: string;
251
- escapeAsterisks?: boolean;
252
- escapeUnderscores?: boolean;
253
- escapeMisc?: boolean;
254
- escapeAscii?: boolean;
255
- codeLanguage?: string;
256
- autolinks?: boolean;
257
- defaultTitle?: boolean;
258
- brInTables?: boolean;
259
- hocrSpatialTables?: boolean;
260
- highlightStyle?: WasmHighlightStyle;
261
- extractMetadata?: boolean;
262
- whitespaceMode?: WasmWhitespaceMode;
263
- stripNewlines?: boolean;
264
- wrap?: boolean;
265
- wrapWidth?: number;
266
- convertAsInline?: boolean;
267
- subSymbol?: string;
268
- supSymbol?: string;
269
- newlineStyle?: WasmNewlineStyle;
270
- codeBlockStyle?: WasmCodeBlockStyle;
271
- keepInlineImagesIn?: string[];
272
- preprocessing?: WasmPreprocessingOptions | null;
273
- encoding?: string;
274
- debug?: boolean;
275
- stripTags?: string[];
276
- preserveTags?: string[];
277
- }