@kreuzberg/html-to-markdown-wasm 2.27.3 → 2.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/README.md +1 -0
- package/dist/html_to_markdown_wasm.d.ts +26 -0
- package/dist/html_to_markdown_wasm_bg.js +51 -0
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/html_to_markdown_wasm_bg.wasm.d.ts +1 -0
- package/dist/package.json +1 -1
- package/dist-node/README.md +1 -0
- package/dist-node/html_to_markdown_wasm.d.ts +26 -0
- package/dist-node/html_to_markdown_wasm.js +52 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm.d.ts +1 -0
- package/dist-node/package.json +1 -1
- package/dist-web/README.md +1 -0
- package/dist-web/html_to_markdown_wasm.d.ts +27 -0
- package/dist-web/html_to_markdown_wasm.js +51 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm.d.ts +1 -0
- package/dist-web/package.json +1 -1
- package/package.json +1 -1
package/dist/README.md
CHANGED
|
@@ -64,6 +64,7 @@ High-performance HTML to Markdown conversion powered by Rust. Ships as native bi
|
|
|
64
64
|
- **12 language bindings** with consistent output across all runtimes
|
|
65
65
|
- **Metadata extraction** — title, headers, links, images, structured data (JSON-LD, Microdata, RDFa)
|
|
66
66
|
- **Visitor pattern** — custom callbacks for content filtering, URL rewriting, domain-specific dialects
|
|
67
|
+
- **Table extraction** — extract structured table data (cells, headers, rendered markdown) during conversion
|
|
67
68
|
- **Secure by default** — built-in HTML sanitization via ammonia
|
|
68
69
|
|
|
69
70
|
## Quick Start
|
|
@@ -151,6 +151,32 @@ export function convertWithMetadata(html: string, options?: WasmConversionOption
|
|
|
151
151
|
|
|
152
152
|
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
153
153
|
|
|
154
|
+
/**
|
|
155
|
+
* Convert HTML to Markdown with structured table extraction
|
|
156
|
+
*
|
|
157
|
+
* # Arguments
|
|
158
|
+
*
|
|
159
|
+
* * `html` - The HTML string to convert
|
|
160
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
161
|
+
* * `metadata_config` - Optional metadata extraction configuration
|
|
162
|
+
*
|
|
163
|
+
* # Returns
|
|
164
|
+
*
|
|
165
|
+
* JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
|
|
166
|
+
*
|
|
167
|
+
* # Example
|
|
168
|
+
*
|
|
169
|
+
* ```javascript
|
|
170
|
+
* import { convertWithTables } from 'html-to-markdown-wasm';
|
|
171
|
+
*
|
|
172
|
+
* const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
|
|
173
|
+
* const result = convertWithTables(html, null, null);
|
|
174
|
+
* console.log(result.content);
|
|
175
|
+
* console.log(result.tables[0].cells);
|
|
176
|
+
* ```
|
|
177
|
+
*/
|
|
178
|
+
export function convertWithTables(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
179
|
+
|
|
154
180
|
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
155
181
|
|
|
156
182
|
/**
|
|
@@ -738,6 +738,57 @@ export function convertWithOptionsHandle(html, handle) {
|
|
|
738
738
|
}
|
|
739
739
|
}
|
|
740
740
|
|
|
741
|
+
/**
|
|
742
|
+
* Convert HTML to Markdown with structured table extraction
|
|
743
|
+
*
|
|
744
|
+
* # Arguments
|
|
745
|
+
*
|
|
746
|
+
* * `html` - The HTML string to convert
|
|
747
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
748
|
+
* * `metadata_config` - Optional metadata extraction configuration
|
|
749
|
+
*
|
|
750
|
+
* # Returns
|
|
751
|
+
*
|
|
752
|
+
* JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
|
|
753
|
+
*
|
|
754
|
+
* # Example
|
|
755
|
+
*
|
|
756
|
+
* ```javascript
|
|
757
|
+
* import { convertWithTables } from 'html-to-markdown-wasm';
|
|
758
|
+
*
|
|
759
|
+
* const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
|
|
760
|
+
* const result = convertWithTables(html, null, null);
|
|
761
|
+
* console.log(result.content);
|
|
762
|
+
* console.log(result.tables[0].cells);
|
|
763
|
+
* ```
|
|
764
|
+
* @param {string} html
|
|
765
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
766
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
767
|
+
* @returns {Record<string, string>}
|
|
768
|
+
*/
|
|
769
|
+
export function convertWithTables(html, options, metadata_config) {
|
|
770
|
+
try {
|
|
771
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
772
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
773
|
+
const len0 = WASM_VECTOR_LEN;
|
|
774
|
+
let ptr1 = 0;
|
|
775
|
+
if (!isLikeNone(metadata_config)) {
|
|
776
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
777
|
+
ptr1 = metadata_config.__destroy_into_raw();
|
|
778
|
+
}
|
|
779
|
+
wasm.convertWithTables(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
780
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
781
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
782
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
783
|
+
if (r2) {
|
|
784
|
+
throw takeObject(r1);
|
|
785
|
+
}
|
|
786
|
+
return takeObject(r0);
|
|
787
|
+
} finally {
|
|
788
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
741
792
|
/**
|
|
742
793
|
* @param {WasmConversionOptions | null | undefined} [options]
|
|
743
794
|
* @returns {WasmConversionOptionsHandle}
|
|
Binary file
|
|
@@ -15,6 +15,7 @@ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) =>
|
|
|
15
15
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
16
16
|
export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
17
17
|
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
18
|
+
export const convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
18
19
|
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
19
20
|
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
20
21
|
export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
|
package/dist/package.json
CHANGED
package/dist-node/README.md
CHANGED
|
@@ -64,6 +64,7 @@ High-performance HTML to Markdown conversion powered by Rust. Ships as native bi
|
|
|
64
64
|
- **12 language bindings** with consistent output across all runtimes
|
|
65
65
|
- **Metadata extraction** — title, headers, links, images, structured data (JSON-LD, Microdata, RDFa)
|
|
66
66
|
- **Visitor pattern** — custom callbacks for content filtering, URL rewriting, domain-specific dialects
|
|
67
|
+
- **Table extraction** — extract structured table data (cells, headers, rendered markdown) during conversion
|
|
67
68
|
- **Secure by default** — built-in HTML sanitization via ammonia
|
|
68
69
|
|
|
69
70
|
## Quick Start
|
|
@@ -151,6 +151,32 @@ export function convertWithMetadata(html: string, options?: WasmConversionOption
|
|
|
151
151
|
|
|
152
152
|
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
153
153
|
|
|
154
|
+
/**
|
|
155
|
+
* Convert HTML to Markdown with structured table extraction
|
|
156
|
+
*
|
|
157
|
+
* # Arguments
|
|
158
|
+
*
|
|
159
|
+
* * `html` - The HTML string to convert
|
|
160
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
161
|
+
* * `metadata_config` - Optional metadata extraction configuration
|
|
162
|
+
*
|
|
163
|
+
* # Returns
|
|
164
|
+
*
|
|
165
|
+
* JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
|
|
166
|
+
*
|
|
167
|
+
* # Example
|
|
168
|
+
*
|
|
169
|
+
* ```javascript
|
|
170
|
+
* import { convertWithTables } from 'html-to-markdown-wasm';
|
|
171
|
+
*
|
|
172
|
+
* const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
|
|
173
|
+
* const result = convertWithTables(html, null, null);
|
|
174
|
+
* console.log(result.content);
|
|
175
|
+
* console.log(result.tables[0].cells);
|
|
176
|
+
* ```
|
|
177
|
+
*/
|
|
178
|
+
export function convertWithTables(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
179
|
+
|
|
154
180
|
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
155
181
|
|
|
156
182
|
/**
|
|
@@ -758,6 +758,58 @@ function convertWithOptionsHandle(html, handle) {
|
|
|
758
758
|
}
|
|
759
759
|
exports.convertWithOptionsHandle = convertWithOptionsHandle;
|
|
760
760
|
|
|
761
|
+
/**
|
|
762
|
+
* Convert HTML to Markdown with structured table extraction
|
|
763
|
+
*
|
|
764
|
+
* # Arguments
|
|
765
|
+
*
|
|
766
|
+
* * `html` - The HTML string to convert
|
|
767
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
768
|
+
* * `metadata_config` - Optional metadata extraction configuration
|
|
769
|
+
*
|
|
770
|
+
* # Returns
|
|
771
|
+
*
|
|
772
|
+
* JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
|
|
773
|
+
*
|
|
774
|
+
* # Example
|
|
775
|
+
*
|
|
776
|
+
* ```javascript
|
|
777
|
+
* import { convertWithTables } from 'html-to-markdown-wasm';
|
|
778
|
+
*
|
|
779
|
+
* const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
|
|
780
|
+
* const result = convertWithTables(html, null, null);
|
|
781
|
+
* console.log(result.content);
|
|
782
|
+
* console.log(result.tables[0].cells);
|
|
783
|
+
* ```
|
|
784
|
+
* @param {string} html
|
|
785
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
786
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
787
|
+
* @returns {Record<string, string>}
|
|
788
|
+
*/
|
|
789
|
+
function convertWithTables(html, options, metadata_config) {
|
|
790
|
+
try {
|
|
791
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
792
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
793
|
+
const len0 = WASM_VECTOR_LEN;
|
|
794
|
+
let ptr1 = 0;
|
|
795
|
+
if (!isLikeNone(metadata_config)) {
|
|
796
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
797
|
+
ptr1 = metadata_config.__destroy_into_raw();
|
|
798
|
+
}
|
|
799
|
+
wasm.convertWithTables(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
800
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
801
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
802
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
803
|
+
if (r2) {
|
|
804
|
+
throw takeObject(r1);
|
|
805
|
+
}
|
|
806
|
+
return takeObject(r0);
|
|
807
|
+
} finally {
|
|
808
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
exports.convertWithTables = convertWithTables;
|
|
812
|
+
|
|
761
813
|
/**
|
|
762
814
|
* @param {WasmConversionOptions | null | undefined} [options]
|
|
763
815
|
* @returns {WasmConversionOptionsHandle}
|
|
Binary file
|
|
@@ -15,6 +15,7 @@ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) =>
|
|
|
15
15
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
16
16
|
export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
17
17
|
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
18
|
+
export const convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
18
19
|
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
19
20
|
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
20
21
|
export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
|
package/dist-node/package.json
CHANGED
package/dist-web/README.md
CHANGED
|
@@ -64,6 +64,7 @@ High-performance HTML to Markdown conversion powered by Rust. Ships as native bi
|
|
|
64
64
|
- **12 language bindings** with consistent output across all runtimes
|
|
65
65
|
- **Metadata extraction** — title, headers, links, images, structured data (JSON-LD, Microdata, RDFa)
|
|
66
66
|
- **Visitor pattern** — custom callbacks for content filtering, URL rewriting, domain-specific dialects
|
|
67
|
+
- **Table extraction** — extract structured table data (cells, headers, rendered markdown) during conversion
|
|
67
68
|
- **Secure by default** — built-in HTML sanitization via ammonia
|
|
68
69
|
|
|
69
70
|
## Quick Start
|
|
@@ -151,6 +151,32 @@ export function convertWithMetadata(html: string, options?: WasmConversionOption
|
|
|
151
151
|
|
|
152
152
|
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
153
153
|
|
|
154
|
+
/**
|
|
155
|
+
* Convert HTML to Markdown with structured table extraction
|
|
156
|
+
*
|
|
157
|
+
* # Arguments
|
|
158
|
+
*
|
|
159
|
+
* * `html` - The HTML string to convert
|
|
160
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
161
|
+
* * `metadata_config` - Optional metadata extraction configuration
|
|
162
|
+
*
|
|
163
|
+
* # Returns
|
|
164
|
+
*
|
|
165
|
+
* JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
|
|
166
|
+
*
|
|
167
|
+
* # Example
|
|
168
|
+
*
|
|
169
|
+
* ```javascript
|
|
170
|
+
* import { convertWithTables } from 'html-to-markdown-wasm';
|
|
171
|
+
*
|
|
172
|
+
* const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
|
|
173
|
+
* const result = convertWithTables(html, null, null);
|
|
174
|
+
* console.log(result.content);
|
|
175
|
+
* console.log(result.tables[0].cells);
|
|
176
|
+
* ```
|
|
177
|
+
*/
|
|
178
|
+
export function convertWithTables(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
179
|
+
|
|
154
180
|
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
155
181
|
|
|
156
182
|
/**
|
|
@@ -176,6 +202,7 @@ export interface InitOutput {
|
|
|
176
202
|
readonly convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
177
203
|
readonly convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
178
204
|
readonly convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
205
|
+
readonly convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
179
206
|
readonly createConversionOptionsHandle: (a: number, b: number) => void;
|
|
180
207
|
readonly wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
181
208
|
readonly wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
|
|
@@ -744,6 +744,57 @@ export function convertWithOptionsHandle(html, handle) {
|
|
|
744
744
|
}
|
|
745
745
|
}
|
|
746
746
|
|
|
747
|
+
/**
|
|
748
|
+
* Convert HTML to Markdown with structured table extraction
|
|
749
|
+
*
|
|
750
|
+
* # Arguments
|
|
751
|
+
*
|
|
752
|
+
* * `html` - The HTML string to convert
|
|
753
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
754
|
+
* * `metadata_config` - Optional metadata extraction configuration
|
|
755
|
+
*
|
|
756
|
+
* # Returns
|
|
757
|
+
*
|
|
758
|
+
* JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
|
|
759
|
+
*
|
|
760
|
+
* # Example
|
|
761
|
+
*
|
|
762
|
+
* ```javascript
|
|
763
|
+
* import { convertWithTables } from 'html-to-markdown-wasm';
|
|
764
|
+
*
|
|
765
|
+
* const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
|
|
766
|
+
* const result = convertWithTables(html, null, null);
|
|
767
|
+
* console.log(result.content);
|
|
768
|
+
* console.log(result.tables[0].cells);
|
|
769
|
+
* ```
|
|
770
|
+
* @param {string} html
|
|
771
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
772
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
773
|
+
* @returns {Record<string, string>}
|
|
774
|
+
*/
|
|
775
|
+
export function convertWithTables(html, options, metadata_config) {
|
|
776
|
+
try {
|
|
777
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
778
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
779
|
+
const len0 = WASM_VECTOR_LEN;
|
|
780
|
+
let ptr1 = 0;
|
|
781
|
+
if (!isLikeNone(metadata_config)) {
|
|
782
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
783
|
+
ptr1 = metadata_config.__destroy_into_raw();
|
|
784
|
+
}
|
|
785
|
+
wasm.convertWithTables(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
786
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
787
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
788
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
789
|
+
if (r2) {
|
|
790
|
+
throw takeObject(r1);
|
|
791
|
+
}
|
|
792
|
+
return takeObject(r0);
|
|
793
|
+
} finally {
|
|
794
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
747
798
|
/**
|
|
748
799
|
* @param {WasmConversionOptions | null | undefined} [options]
|
|
749
800
|
* @returns {WasmConversionOptionsHandle}
|
|
Binary file
|
|
@@ -15,6 +15,7 @@ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) =>
|
|
|
15
15
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
16
16
|
export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
17
17
|
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
18
|
+
export const convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
18
19
|
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
19
20
|
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
20
21
|
export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
|
package/dist-web/package.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/html-to-markdown-wasm",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.28.1",
|
|
4
4
|
"description": "High-performance HTML to Markdown converter - WebAssembly bindings",
|
|
5
5
|
"main": "dist/html_to_markdown_wasm.js",
|
|
6
6
|
"types": "dist/html_to_markdown_wasm.d.ts",
|