@kreuzberg/html-to-markdown-wasm 2.19.4 → 2.19.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/LICENSE +21 -0
- package/dist/README.md +148 -0
- package/dist/html_to_markdown_wasm.d.ts +200 -0
- package/dist/html_to_markdown_wasm.js +116 -0
- package/dist/html_to_markdown_wasm_bg.js +1355 -0
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/html_to_markdown_wasm_bg.wasm.d.ts +55 -0
- package/dist/package.json +26 -0
- package/dist-node/LICENSE +21 -0
- package/dist-node/README.md +148 -0
- package/dist-node/html_to_markdown_wasm.d.ts +197 -0
- package/dist-node/html_to_markdown_wasm.js +1369 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm.d.ts +55 -0
- package/dist-node/package.json +21 -0
- package/dist-web/LICENSE +21 -0
- package/dist-web/README.md +148 -0
- package/dist-web/html_to_markdown_wasm.d.ts +277 -0
- package/dist-web/html_to_markdown_wasm.js +1395 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm.d.ts +55 -0
- package/dist-web/package.json +25 -0
- package/package.json +1 -1
package/dist/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright 2024-2025 Na'aman Hirschfeld
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/README.md
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# html-to-markdown
|
|
2
|
+
|
|
3
|
+
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
|
|
4
|
+
<!-- Language Bindings -->
|
|
5
|
+
<a href="https://crates.io/crates/html-to-markdown-rs">
|
|
6
|
+
<img src="https://img.shields.io/crates/v/html-to-markdown-rs?label=Rust&color=007ec6" alt="Rust">
|
|
7
|
+
</a>
|
|
8
|
+
<a href="https://pypi.org/project/html-to-markdown/">
|
|
9
|
+
<img src="https://img.shields.io/pypi/v/html-to-markdown?label=Python&color=007ec6" alt="Python">
|
|
10
|
+
</a>
|
|
11
|
+
<a href="https://www.npmjs.com/package/@kreuzberg/html-to-markdown-node">
|
|
12
|
+
<img src="https://img.shields.io/npm/v/@kreuzberg/html-to-markdown-node?label=Node.js&color=007ec6" alt="Node.js">
|
|
13
|
+
</a>
|
|
14
|
+
<a href="https://www.npmjs.com/package/@kreuzberg/html-to-markdown-wasm">
|
|
15
|
+
<img src="https://img.shields.io/npm/v/@kreuzberg/html-to-markdown-wasm?label=WASM&color=007ec6" alt="WASM">
|
|
16
|
+
</a>
|
|
17
|
+
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/html-to-markdown">
|
|
18
|
+
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
|
|
19
|
+
</a>
|
|
20
|
+
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown">
|
|
21
|
+
<img src="https://img.shields.io/badge/Go-v2.19.0-007ec6" alt="Go">
|
|
22
|
+
</a>
|
|
23
|
+
<a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
|
|
24
|
+
<img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
|
|
25
|
+
</a>
|
|
26
|
+
<a href="https://packagist.org/packages/goldziher/html-to-markdown">
|
|
27
|
+
<img src="https://img.shields.io/packagist/v/goldziher/html-to-markdown?label=PHP&color=007ec6" alt="PHP">
|
|
28
|
+
</a>
|
|
29
|
+
<a href="https://rubygems.org/gems/html-to-markdown">
|
|
30
|
+
<img src="https://img.shields.io/gem/v/html-to-markdown?label=Ruby&color=007ec6" alt="Ruby">
|
|
31
|
+
</a>
|
|
32
|
+
<a href="https://hex.pm/packages/html_to_markdown">
|
|
33
|
+
<img src="https://img.shields.io/hexpm/v/html_to_markdown?label=Elixir&color=007ec6" alt="Elixir">
|
|
34
|
+
</a>
|
|
35
|
+
|
|
36
|
+
<!-- Project Info -->
|
|
37
|
+
<a href="https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE">
|
|
38
|
+
<img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
|
|
39
|
+
</a>
|
|
40
|
+
</div>
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
<img width="3384" height="573" alt="Linkedin- Banner" src="https://github.com/user-attachments/assets/1bd52e37-c45d-4f5c-8408-ee12997f6cfd" />
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
<div align="center" style="margin-top: 20px;">
|
|
47
|
+
<a href="https://discord.gg/pXxagNK2zN">
|
|
48
|
+
<img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord">
|
|
49
|
+
</a>
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rust crate, Python package, PHP extension, Ruby gem, Elixir Rustler NIF, Node.js bindings, WebAssembly, and standalone CLI with identical rendering behavior across all runtimes.
|
|
53
|
+
|
|
54
|
+
## Key Features
|
|
55
|
+
|
|
56
|
+
- **Blazing Fast** – Rust-powered core delivers 10-80× faster conversion than pure Python alternatives (150–280 MB/s)
|
|
57
|
+
- **Polyglot** – Native bindings for Rust, Python, TypeScript/Node.js, Ruby, PHP, Go, Java, C#, and Elixir
|
|
58
|
+
- **Smart Conversion** – Handles complex documents including nested tables, code blocks, task lists, and hOCR OCR output
|
|
59
|
+
- **Metadata Extraction** – Extract document metadata (title, description, headers, links, images, structured data) alongside conversion
|
|
60
|
+
- **Visitor Pattern** – Custom callbacks for domain-specific dialects, content filtering, URL rewriting, accessibility validation
|
|
61
|
+
- **Highly Configurable** – Control heading styles, code block fences, list formatting, whitespace handling, and HTML sanitization
|
|
62
|
+
- **Tag Preservation** – Keep specific HTML tags unconverted when markdown isn't expressive enough
|
|
63
|
+
- **Secure by Default** – Built-in HTML sanitization prevents malicious content
|
|
64
|
+
- **Consistent Output** – Identical markdown rendering across all language bindings
|
|
65
|
+
|
|
66
|
+
**[Try the Live Demo →](https://kreuzberg-dev.github.io/html-to-markdown/)**
|
|
67
|
+
|
|
68
|
+
## Installation
|
|
69
|
+
|
|
70
|
+
Each language binding provides comprehensive documentation with installation instructions, examples, and best practices. Choose your platform to get started:
|
|
71
|
+
|
|
72
|
+
**Scripting Languages:**
|
|
73
|
+
- **[Python](./packages/python/README.md)** – PyPI package, metadata extraction, visitor pattern, CLI included
|
|
74
|
+
- **[Ruby](./packages/ruby/README.md)** – RubyGems package, RBS type definitions, Steep checking
|
|
75
|
+
- **[PHP](./packages/php/README.md)** – Composer package + PIE extension, PHP 8.2+, PHPStan level 9
|
|
76
|
+
- **[Elixir](./packages/elixir/README.md)** – Hex package, Rustler NIF bindings, Elixir 1.19+
|
|
77
|
+
|
|
78
|
+
**JavaScript/TypeScript:**
|
|
79
|
+
- **[Node.js / TypeScript](./packages/typescript/README.md)** – Native NAPI-RS bindings for Node.js/Bun, fastest performance, WebAssembly for browsers/Deno
|
|
80
|
+
|
|
81
|
+
**Compiled Languages:**
|
|
82
|
+
- **[Go](./packages/go/v2/README.md)** – Go module with FFI bindings, automatic library download
|
|
83
|
+
- **[Java](./packages/java/README.md)** – Maven Central, Panama Foreign Function & Memory API, Java 24+
|
|
84
|
+
- **[C#](./packages/csharp/README.md)** – NuGet package, .NET 8.0+, P/Invoke FFI bindings
|
|
85
|
+
|
|
86
|
+
**Native:**
|
|
87
|
+
- **[Rust](./crates/html-to-markdown/README.md)** – Core library, flexible feature flags, zero-copy APIs
|
|
88
|
+
|
|
89
|
+
**Command-Line:**
|
|
90
|
+
- **[CLI](https://crates.io/crates/html-to-markdown-cli)** – Cross-platform binary via `cargo install html-to-markdown-cli` or [Homebrew](https://formulae.brew.sh/formula/html-to-markdown)
|
|
91
|
+
|
|
92
|
+
<details>
|
|
93
|
+
<summary><strong>Metadata Extraction</strong></summary>
|
|
94
|
+
|
|
95
|
+
Extract comprehensive metadata during conversion: title, description, headers, links, images, structured data (JSON-LD, Microdata, RDFa). Use cases: SEO extraction, table-of-contents generation, link validation, accessibility auditing, content migration.
|
|
96
|
+
|
|
97
|
+
**[Metadata Extraction Guide →](./examples/metadata-extraction/)**
|
|
98
|
+
|
|
99
|
+
</details>
|
|
100
|
+
|
|
101
|
+
<details>
|
|
102
|
+
<summary><strong>Visitor Pattern</strong></summary>
|
|
103
|
+
|
|
104
|
+
Customize HTML→Markdown conversion with callbacks for specific elements. Intercept links, images, headings, lists, and more. Use cases: domain-specific Markdown dialects (Obsidian, Notion), content filtering, URL rewriting, accessibility validation, analytics.
|
|
105
|
+
|
|
106
|
+
**[Visitor Pattern Guide →](./examples/visitor-pattern/)**
|
|
107
|
+
|
|
108
|
+
</details>
|
|
109
|
+
|
|
110
|
+
<details>
|
|
111
|
+
<summary><strong>Performance & Benchmarking</strong></summary>
|
|
112
|
+
|
|
113
|
+
Rust-powered core delivers 150–280 MB/s throughput (10-80× faster than pure Python alternatives). Includes benchmarking tools, memory profiling, streaming strategies, and optimization tips.
|
|
114
|
+
|
|
115
|
+
**[Performance Guide →](./examples/performance/)**
|
|
116
|
+
|
|
117
|
+
</details>
|
|
118
|
+
|
|
119
|
+
<details>
|
|
120
|
+
<summary><strong>Tag Preservation</strong></summary>
|
|
121
|
+
|
|
122
|
+
Keep specific HTML tags unconverted when Markdown isn't expressive enough. Useful for tables, SVG, custom elements, or when you need mixed HTML/Markdown output.
|
|
123
|
+
|
|
124
|
+
See language-specific documentation for `preserveTags` configuration.
|
|
125
|
+
|
|
126
|
+
</details>
|
|
127
|
+
|
|
128
|
+
<details>
|
|
129
|
+
<summary><strong>Secure by Default</strong></summary>
|
|
130
|
+
|
|
131
|
+
Built-in HTML sanitization prevents XSS attacks and malicious content. Powered by ammonia with safe defaults. Configurable via `sanitize` options.
|
|
132
|
+
|
|
133
|
+
</details>
|
|
134
|
+
|
|
135
|
+
## Contributing
|
|
136
|
+
|
|
137
|
+
Contributions are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on:
|
|
138
|
+
|
|
139
|
+
- Setting up the development environment
|
|
140
|
+
- Running tests locally (Rust 95%+ coverage, language bindings 80%+)
|
|
141
|
+
- Submitting pull requests
|
|
142
|
+
- Reporting issues
|
|
143
|
+
|
|
144
|
+
All contributions must follow code quality standards enforced via pre-commit hooks (prek).
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
MIT License – see [LICENSE](LICENSE) for details. You can use html-to-markdown freely in both commercial and closed-source products with no obligations, no viral effects, and no licensing restrictions.
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
export class WasmConversionOptionsHandle {
|
|
5
|
+
free(): void;
|
|
6
|
+
[Symbol.dispose](): void;
|
|
7
|
+
constructor(options?: WasmConversionOptions | null);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class WasmHtmlExtraction {
|
|
11
|
+
private constructor();
|
|
12
|
+
free(): void;
|
|
13
|
+
[Symbol.dispose](): void;
|
|
14
|
+
readonly inlineImages: WasmInlineImage[];
|
|
15
|
+
readonly markdown: string;
|
|
16
|
+
readonly warnings: WasmInlineImageWarning[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class WasmInlineImage {
|
|
20
|
+
private constructor();
|
|
21
|
+
free(): void;
|
|
22
|
+
[Symbol.dispose](): void;
|
|
23
|
+
readonly attributes: Record<string, string>;
|
|
24
|
+
readonly dimensions: Uint32Array | undefined;
|
|
25
|
+
readonly description: string | undefined;
|
|
26
|
+
readonly data: Uint8Array;
|
|
27
|
+
readonly format: string;
|
|
28
|
+
readonly source: string;
|
|
29
|
+
readonly filename: string | undefined;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export class WasmInlineImageConfig {
|
|
33
|
+
free(): void;
|
|
34
|
+
[Symbol.dispose](): void;
|
|
35
|
+
constructor(max_decoded_size_bytes?: number | null);
|
|
36
|
+
set captureSvg(value: boolean);
|
|
37
|
+
set filenamePrefix(value: string | null | undefined);
|
|
38
|
+
set inferDimensions(value: boolean);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export class WasmInlineImageWarning {
|
|
42
|
+
private constructor();
|
|
43
|
+
free(): void;
|
|
44
|
+
[Symbol.dispose](): void;
|
|
45
|
+
readonly index: number;
|
|
46
|
+
readonly message: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export class WasmMetadataConfig {
|
|
50
|
+
free(): void;
|
|
51
|
+
[Symbol.dispose](): void;
|
|
52
|
+
/**
|
|
53
|
+
* Create a new metadata configuration with defaults
|
|
54
|
+
*
|
|
55
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
56
|
+
*/
|
|
57
|
+
constructor();
|
|
58
|
+
extract_links: boolean;
|
|
59
|
+
extract_images: boolean;
|
|
60
|
+
extract_headers: boolean;
|
|
61
|
+
extract_document: boolean;
|
|
62
|
+
extract_structured_data: boolean;
|
|
63
|
+
max_structured_data_size: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Convert HTML to Markdown
|
|
68
|
+
*
|
|
69
|
+
* # Arguments
|
|
70
|
+
*
|
|
71
|
+
* * `html` - The HTML string to convert
|
|
72
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
73
|
+
*
|
|
74
|
+
* # Example
|
|
75
|
+
*
|
|
76
|
+
* ```javascript
|
|
77
|
+
* import { convert } from 'html-to-markdown-wasm';
|
|
78
|
+
*
|
|
79
|
+
* const html = '<h1>Hello World</h1>';
|
|
80
|
+
* const markdown = convert(html);
|
|
81
|
+
* console.log(markdown); // # Hello World
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
export function convert(html: string, options?: WasmConversionOptions | null): string;
|
|
85
|
+
|
|
86
|
+
export function convertBytes(html: Uint8Array, options?: WasmConversionOptions | null): string;
|
|
87
|
+
|
|
88
|
+
export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
92
|
+
*
|
|
93
|
+
* # Arguments
|
|
94
|
+
*
|
|
95
|
+
* * `html` - The HTML bytes to convert
|
|
96
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
97
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
98
|
+
*
|
|
99
|
+
* # Returns
|
|
100
|
+
*
|
|
101
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
102
|
+
*/
|
|
103
|
+
export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
104
|
+
|
|
105
|
+
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
106
|
+
|
|
107
|
+
export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Convert HTML to Markdown with metadata extraction
|
|
111
|
+
*
|
|
112
|
+
* # Arguments
|
|
113
|
+
*
|
|
114
|
+
* * `html` - The HTML string to convert
|
|
115
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
116
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
117
|
+
*
|
|
118
|
+
* # Returns
|
|
119
|
+
*
|
|
120
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
121
|
+
*
|
|
122
|
+
* # Example
|
|
123
|
+
*
|
|
124
|
+
* ```javascript
|
|
125
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
126
|
+
*
|
|
127
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
128
|
+
* const config = new WasmMetadataConfig();
|
|
129
|
+
* config.extractHeaders = true;
|
|
130
|
+
* config.extractLinks = true;
|
|
131
|
+
*
|
|
132
|
+
* const result = convertWithMetadata(html, null, config);
|
|
133
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
134
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
135
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
139
|
+
|
|
140
|
+
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
141
|
+
|
|
142
|
+
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Initialize panic hook for better error messages in the browser
|
|
146
|
+
*/
|
|
147
|
+
export function init(): void;
|
|
148
|
+
|
|
149
|
+
export declare function initWasm(): Promise<void>;
|
|
150
|
+
export declare const wasmReady: Promise<void>;
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
export type WasmHeadingStyle = "underlined" | "atx" | "atxClosed";
|
|
154
|
+
export type WasmListIndentType = "spaces" | "tabs";
|
|
155
|
+
export type WasmWhitespaceMode = "normalized" | "strict";
|
|
156
|
+
export type WasmNewlineStyle = "spaces" | "backslash";
|
|
157
|
+
export type WasmCodeBlockStyle = "indented" | "backticks" | "tildes";
|
|
158
|
+
export type WasmHighlightStyle = "doubleEqual" | "html" | "bold" | "none";
|
|
159
|
+
export type WasmPreprocessingPreset = "minimal" | "standard" | "aggressive";
|
|
160
|
+
|
|
161
|
+
export interface WasmPreprocessingOptions {
|
|
162
|
+
enabled?: boolean;
|
|
163
|
+
preset?: WasmPreprocessingPreset;
|
|
164
|
+
removeNavigation?: boolean;
|
|
165
|
+
removeForms?: boolean;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export interface WasmConversionOptions {
|
|
169
|
+
headingStyle?: WasmHeadingStyle;
|
|
170
|
+
listIndentType?: WasmListIndentType;
|
|
171
|
+
listIndentWidth?: number;
|
|
172
|
+
bullets?: string;
|
|
173
|
+
strongEmSymbol?: string;
|
|
174
|
+
escapeAsterisks?: boolean;
|
|
175
|
+
escapeUnderscores?: boolean;
|
|
176
|
+
escapeMisc?: boolean;
|
|
177
|
+
escapeAscii?: boolean;
|
|
178
|
+
codeLanguage?: string;
|
|
179
|
+
autolinks?: boolean;
|
|
180
|
+
defaultTitle?: boolean;
|
|
181
|
+
brInTables?: boolean;
|
|
182
|
+
hocrSpatialTables?: boolean;
|
|
183
|
+
highlightStyle?: WasmHighlightStyle;
|
|
184
|
+
extractMetadata?: boolean;
|
|
185
|
+
whitespaceMode?: WasmWhitespaceMode;
|
|
186
|
+
stripNewlines?: boolean;
|
|
187
|
+
wrap?: boolean;
|
|
188
|
+
wrapWidth?: number;
|
|
189
|
+
convertAsInline?: boolean;
|
|
190
|
+
subSymbol?: string;
|
|
191
|
+
supSymbol?: string;
|
|
192
|
+
newlineStyle?: WasmNewlineStyle;
|
|
193
|
+
codeBlockStyle?: WasmCodeBlockStyle;
|
|
194
|
+
keepInlineImagesIn?: string[];
|
|
195
|
+
preprocessing?: WasmPreprocessingOptions | null;
|
|
196
|
+
encoding?: string;
|
|
197
|
+
debug?: boolean;
|
|
198
|
+
stripTags?: string[];
|
|
199
|
+
preserveTags?: string[];
|
|
200
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import * as wasmModule from "./html_to_markdown_wasm_bg.wasm";
|
|
2
|
+
export * from "./html_to_markdown_wasm_bg.js";
|
|
3
|
+
import * as imports_mod from "./html_to_markdown_wasm_bg.js";
|
|
4
|
+
|
|
5
|
+
const notReadyError = () =>
|
|
6
|
+
new Error("html-to-markdown-wasm: WebAssembly bundle is still initializing. Await initWasm() before calling convert() in runtimes that load WASM asynchronously (e.g., Cloudflare Workers).");
|
|
7
|
+
|
|
8
|
+
const notReadyProxy = new Proxy({}, {
|
|
9
|
+
get(_target, prop) {
|
|
10
|
+
if (prop === "__esModule") {
|
|
11
|
+
return true;
|
|
12
|
+
}
|
|
13
|
+
throw notReadyError();
|
|
14
|
+
}
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
let wasmExports;
|
|
18
|
+
let initialized = false;
|
|
19
|
+
let initPromise;
|
|
20
|
+
|
|
21
|
+
imports_mod.__wbg_set_wasm(notReadyProxy);
|
|
22
|
+
|
|
23
|
+
function asExports(value) {
|
|
24
|
+
if (!value) {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
if (typeof value.__wbindgen_start === "function") {
|
|
28
|
+
return value;
|
|
29
|
+
}
|
|
30
|
+
if (value instanceof WebAssembly.Instance) {
|
|
31
|
+
return value.exports;
|
|
32
|
+
}
|
|
33
|
+
if (typeof value === "object") {
|
|
34
|
+
if (value.instance instanceof WebAssembly.Instance) {
|
|
35
|
+
return value.instance.exports;
|
|
36
|
+
}
|
|
37
|
+
if (value.default instanceof WebAssembly.Instance) {
|
|
38
|
+
return value.default.exports;
|
|
39
|
+
}
|
|
40
|
+
if (value.default && value.default.instance instanceof WebAssembly.Instance) {
|
|
41
|
+
return value.default.instance.exports;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function finalize(exports) {
|
|
48
|
+
wasmExports = exports;
|
|
49
|
+
imports_mod.__wbg_set_wasm(exports);
|
|
50
|
+
if (typeof exports.__wbindgen_start === "function") {
|
|
51
|
+
exports.__wbindgen_start();
|
|
52
|
+
}
|
|
53
|
+
initialized = true;
|
|
54
|
+
return exports;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function trySyncInit() {
|
|
58
|
+
try {
|
|
59
|
+
const exports = asExports(wasmModule);
|
|
60
|
+
if (exports) {
|
|
61
|
+
finalize(exports);
|
|
62
|
+
}
|
|
63
|
+
} catch {
|
|
64
|
+
// ignore and fall back to async init
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
trySyncInit();
|
|
69
|
+
|
|
70
|
+
async function ensureInitPromise() {
|
|
71
|
+
if (initialized) {
|
|
72
|
+
return Promise.resolve(wasmExports);
|
|
73
|
+
}
|
|
74
|
+
if (!initPromise) {
|
|
75
|
+
initPromise = (async () => {
|
|
76
|
+
let module = wasmModule;
|
|
77
|
+
|
|
78
|
+
// Handle promise-wrapped modules
|
|
79
|
+
if (module && typeof module.then === "function") {
|
|
80
|
+
module = await module;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Handle function loaders (like @rollup/plugin-wasm)
|
|
84
|
+
if (module && typeof module.default === "function") {
|
|
85
|
+
module = await module.default(module);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Handle WebAssembly.Module (Wrangler/esbuild)
|
|
89
|
+
if (module && module.default instanceof WebAssembly.Module) {
|
|
90
|
+
const imports = {};
|
|
91
|
+
imports["./html_to_markdown_wasm_bg.js"] = {};
|
|
92
|
+
for (const key in imports_mod) {
|
|
93
|
+
if ((key.startsWith('__wbg_') || key.startsWith('__wbindgen_')) && key !== '__wbg_set_wasm' && typeof imports_mod[key] === 'function') {
|
|
94
|
+
imports["./html_to_markdown_wasm_bg.js"][key] = imports_mod[key];
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
const instance = await WebAssembly.instantiate(module.default, imports);
|
|
98
|
+
return finalize(instance.exports);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Try standard export detection
|
|
102
|
+
const exports = asExports(module);
|
|
103
|
+
if (!exports) {
|
|
104
|
+
throw new Error("html-to-markdown-wasm: failed to initialize WebAssembly bundle. Call initWasm() with a supported bundler configuration.");
|
|
105
|
+
}
|
|
106
|
+
return finalize(exports);
|
|
107
|
+
})();
|
|
108
|
+
}
|
|
109
|
+
return initPromise;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export const wasmReady = ensureInitPromise();
|
|
113
|
+
|
|
114
|
+
export async function initWasm() {
|
|
115
|
+
return ensureInitPromise();
|
|
116
|
+
}
|