@kreuzberg/html-to-markdown-wasm 2.27.3 → 2.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/README.md CHANGED
@@ -64,6 +64,7 @@ High-performance HTML to Markdown conversion powered by Rust. Ships as native bi
64
64
  - **12 language bindings** with consistent output across all runtimes
65
65
  - **Metadata extraction** — title, headers, links, images, structured data (JSON-LD, Microdata, RDFa)
66
66
  - **Visitor pattern** — custom callbacks for content filtering, URL rewriting, domain-specific dialects
67
+ - **Table extraction** — extract structured table data (cells, headers, rendered markdown) during conversion
67
68
  - **Secure by default** — built-in HTML sanitization via ammonia
68
69
 
69
70
  ## Quick Start
@@ -151,6 +151,32 @@ export function convertWithMetadata(html: string, options?: WasmConversionOption
151
151
 
152
152
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
153
153
 
154
+ /**
155
+ * Convert HTML to Markdown with structured table extraction
156
+ *
157
+ * # Arguments
158
+ *
159
+ * * `html` - The HTML string to convert
160
+ * * `options` - Optional conversion options (as a JavaScript object)
161
+ * * `metadata_config` - Optional metadata extraction configuration
162
+ *
163
+ * # Returns
164
+ *
165
+ * JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
166
+ *
167
+ * # Example
168
+ *
169
+ * ```javascript
170
+ * import { convertWithTables } from 'html-to-markdown-wasm';
171
+ *
172
+ * const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
173
+ * const result = convertWithTables(html, null, null);
174
+ * console.log(result.content);
175
+ * console.log(result.tables[0].cells);
176
+ * ```
177
+ */
178
+ export function convertWithTables(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
179
+
154
180
  export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
155
181
 
156
182
  /**
@@ -738,6 +738,57 @@ export function convertWithOptionsHandle(html, handle) {
738
738
  }
739
739
  }
740
740
 
741
+ /**
742
+ * Convert HTML to Markdown with structured table extraction
743
+ *
744
+ * # Arguments
745
+ *
746
+ * * `html` - The HTML string to convert
747
+ * * `options` - Optional conversion options (as a JavaScript object)
748
+ * * `metadata_config` - Optional metadata extraction configuration
749
+ *
750
+ * # Returns
751
+ *
752
+ * JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
753
+ *
754
+ * # Example
755
+ *
756
+ * ```javascript
757
+ * import { convertWithTables } from 'html-to-markdown-wasm';
758
+ *
759
+ * const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
760
+ * const result = convertWithTables(html, null, null);
761
+ * console.log(result.content);
762
+ * console.log(result.tables[0].cells);
763
+ * ```
764
+ * @param {string} html
765
+ * @param {WasmConversionOptions | null | undefined} [options]
766
+ * @param {WasmMetadataConfig | null} [metadata_config]
767
+ * @returns {Record<string, string>}
768
+ */
769
+ export function convertWithTables(html, options, metadata_config) {
770
+ try {
771
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
772
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
773
+ const len0 = WASM_VECTOR_LEN;
774
+ let ptr1 = 0;
775
+ if (!isLikeNone(metadata_config)) {
776
+ _assertClass(metadata_config, WasmMetadataConfig);
777
+ ptr1 = metadata_config.__destroy_into_raw();
778
+ }
779
+ wasm.convertWithTables(retptr, ptr0, len0, addHeapObject(options), ptr1);
780
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
781
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
782
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
783
+ if (r2) {
784
+ throw takeObject(r1);
785
+ }
786
+ return takeObject(r0);
787
+ } finally {
788
+ wasm.__wbindgen_add_to_stack_pointer(16);
789
+ }
790
+ }
791
+
741
792
  /**
742
793
  * @param {WasmConversionOptions | null | undefined} [options]
743
794
  * @returns {WasmConversionOptionsHandle}
Binary file
@@ -15,6 +15,7 @@ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) =>
15
15
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
16
  export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
17
17
  export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
18
+ export const convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
18
19
  export const createConversionOptionsHandle: (a: number, b: number) => void;
19
20
  export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
20
21
  export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
package/dist/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
7
  "description": "HTML to Markdown conversion for WebAssembly targets",
8
- "version": "2.27.3",
8
+ "version": "2.28.0",
9
9
  "license": "MIT",
10
10
  "repository": {
11
11
  "type": "git",
@@ -64,6 +64,7 @@ High-performance HTML to Markdown conversion powered by Rust. Ships as native bi
64
64
  - **12 language bindings** with consistent output across all runtimes
65
65
  - **Metadata extraction** — title, headers, links, images, structured data (JSON-LD, Microdata, RDFa)
66
66
  - **Visitor pattern** — custom callbacks for content filtering, URL rewriting, domain-specific dialects
67
+ - **Table extraction** — extract structured table data (cells, headers, rendered markdown) during conversion
67
68
  - **Secure by default** — built-in HTML sanitization via ammonia
68
69
 
69
70
  ## Quick Start
@@ -151,6 +151,32 @@ export function convertWithMetadata(html: string, options?: WasmConversionOption
151
151
 
152
152
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
153
153
 
154
+ /**
155
+ * Convert HTML to Markdown with structured table extraction
156
+ *
157
+ * # Arguments
158
+ *
159
+ * * `html` - The HTML string to convert
160
+ * * `options` - Optional conversion options (as a JavaScript object)
161
+ * * `metadata_config` - Optional metadata extraction configuration
162
+ *
163
+ * # Returns
164
+ *
165
+ * JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
166
+ *
167
+ * # Example
168
+ *
169
+ * ```javascript
170
+ * import { convertWithTables } from 'html-to-markdown-wasm';
171
+ *
172
+ * const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
173
+ * const result = convertWithTables(html, null, null);
174
+ * console.log(result.content);
175
+ * console.log(result.tables[0].cells);
176
+ * ```
177
+ */
178
+ export function convertWithTables(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
179
+
154
180
  export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
155
181
 
156
182
  /**
@@ -758,6 +758,58 @@ function convertWithOptionsHandle(html, handle) {
758
758
  }
759
759
  exports.convertWithOptionsHandle = convertWithOptionsHandle;
760
760
 
761
+ /**
762
+ * Convert HTML to Markdown with structured table extraction
763
+ *
764
+ * # Arguments
765
+ *
766
+ * * `html` - The HTML string to convert
767
+ * * `options` - Optional conversion options (as a JavaScript object)
768
+ * * `metadata_config` - Optional metadata extraction configuration
769
+ *
770
+ * # Returns
771
+ *
772
+ * JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
773
+ *
774
+ * # Example
775
+ *
776
+ * ```javascript
777
+ * import { convertWithTables } from 'html-to-markdown-wasm';
778
+ *
779
+ * const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
780
+ * const result = convertWithTables(html, null, null);
781
+ * console.log(result.content);
782
+ * console.log(result.tables[0].cells);
783
+ * ```
784
+ * @param {string} html
785
+ * @param {WasmConversionOptions | null | undefined} [options]
786
+ * @param {WasmMetadataConfig | null} [metadata_config]
787
+ * @returns {Record<string, string>}
788
+ */
789
+ function convertWithTables(html, options, metadata_config) {
790
+ try {
791
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
792
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
793
+ const len0 = WASM_VECTOR_LEN;
794
+ let ptr1 = 0;
795
+ if (!isLikeNone(metadata_config)) {
796
+ _assertClass(metadata_config, WasmMetadataConfig);
797
+ ptr1 = metadata_config.__destroy_into_raw();
798
+ }
799
+ wasm.convertWithTables(retptr, ptr0, len0, addHeapObject(options), ptr1);
800
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
801
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
802
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
803
+ if (r2) {
804
+ throw takeObject(r1);
805
+ }
806
+ return takeObject(r0);
807
+ } finally {
808
+ wasm.__wbindgen_add_to_stack_pointer(16);
809
+ }
810
+ }
811
+ exports.convertWithTables = convertWithTables;
812
+
761
813
  /**
762
814
  * @param {WasmConversionOptions | null | undefined} [options]
763
815
  * @returns {WasmConversionOptionsHandle}
@@ -15,6 +15,7 @@ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) =>
15
15
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
16
  export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
17
17
  export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
18
+ export const convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
18
19
  export const createConversionOptionsHandle: (a: number, b: number) => void;
19
20
  export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
20
21
  export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
@@ -4,7 +4,7 @@
4
4
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
5
5
  ],
6
6
  "description": "HTML to Markdown conversion for WebAssembly targets",
7
- "version": "2.27.3",
7
+ "version": "2.28.0",
8
8
  "license": "MIT",
9
9
  "repository": {
10
10
  "type": "git",
@@ -64,6 +64,7 @@ High-performance HTML to Markdown conversion powered by Rust. Ships as native bi
64
64
  - **12 language bindings** with consistent output across all runtimes
65
65
  - **Metadata extraction** — title, headers, links, images, structured data (JSON-LD, Microdata, RDFa)
66
66
  - **Visitor pattern** — custom callbacks for content filtering, URL rewriting, domain-specific dialects
67
+ - **Table extraction** — extract structured table data (cells, headers, rendered markdown) during conversion
67
68
  - **Secure by default** — built-in HTML sanitization via ammonia
68
69
 
69
70
  ## Quick Start
@@ -151,6 +151,32 @@ export function convertWithMetadata(html: string, options?: WasmConversionOption
151
151
 
152
152
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
153
153
 
154
+ /**
155
+ * Convert HTML to Markdown with structured table extraction
156
+ *
157
+ * # Arguments
158
+ *
159
+ * * `html` - The HTML string to convert
160
+ * * `options` - Optional conversion options (as a JavaScript object)
161
+ * * `metadata_config` - Optional metadata extraction configuration
162
+ *
163
+ * # Returns
164
+ *
165
+ * JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
166
+ *
167
+ * # Example
168
+ *
169
+ * ```javascript
170
+ * import { convertWithTables } from 'html-to-markdown-wasm';
171
+ *
172
+ * const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
173
+ * const result = convertWithTables(html, null, null);
174
+ * console.log(result.content);
175
+ * console.log(result.tables[0].cells);
176
+ * ```
177
+ */
178
+ export function convertWithTables(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
179
+
154
180
  export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
155
181
 
156
182
  /**
@@ -176,6 +202,7 @@ export interface InitOutput {
176
202
  readonly convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
177
203
  readonly convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
178
204
  readonly convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
205
+ readonly convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
179
206
  readonly createConversionOptionsHandle: (a: number, b: number) => void;
180
207
  readonly wasmconversionoptionshandle_new: (a: number, b: number) => void;
181
208
  readonly wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
@@ -744,6 +744,57 @@ export function convertWithOptionsHandle(html, handle) {
744
744
  }
745
745
  }
746
746
 
747
+ /**
748
+ * Convert HTML to Markdown with structured table extraction
749
+ *
750
+ * # Arguments
751
+ *
752
+ * * `html` - The HTML string to convert
753
+ * * `options` - Optional conversion options (as a JavaScript object)
754
+ * * `metadata_config` - Optional metadata extraction configuration
755
+ *
756
+ * # Returns
757
+ *
758
+ * JavaScript object with `content` (string), `tables` (array), and `metadata` (object|null) fields
759
+ *
760
+ * # Example
761
+ *
762
+ * ```javascript
763
+ * import { convertWithTables } from 'html-to-markdown-wasm';
764
+ *
765
+ * const html = '<table><tr><th>Name</th></tr><tr><td>Alice</td></tr></table>';
766
+ * const result = convertWithTables(html, null, null);
767
+ * console.log(result.content);
768
+ * console.log(result.tables[0].cells);
769
+ * ```
770
+ * @param {string} html
771
+ * @param {WasmConversionOptions | null | undefined} [options]
772
+ * @param {WasmMetadataConfig | null} [metadata_config]
773
+ * @returns {Record<string, string>}
774
+ */
775
+ export function convertWithTables(html, options, metadata_config) {
776
+ try {
777
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
778
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
779
+ const len0 = WASM_VECTOR_LEN;
780
+ let ptr1 = 0;
781
+ if (!isLikeNone(metadata_config)) {
782
+ _assertClass(metadata_config, WasmMetadataConfig);
783
+ ptr1 = metadata_config.__destroy_into_raw();
784
+ }
785
+ wasm.convertWithTables(retptr, ptr0, len0, addHeapObject(options), ptr1);
786
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
787
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
788
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
789
+ if (r2) {
790
+ throw takeObject(r1);
791
+ }
792
+ return takeObject(r0);
793
+ } finally {
794
+ wasm.__wbindgen_add_to_stack_pointer(16);
795
+ }
796
+ }
797
+
747
798
  /**
748
799
  * @param {WasmConversionOptions | null | undefined} [options]
749
800
  * @returns {WasmConversionOptionsHandle}
@@ -15,6 +15,7 @@ export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) =>
15
15
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
16
  export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
17
17
  export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
18
+ export const convertWithTables: (a: number, b: number, c: number, d: number, e: number) => void;
18
19
  export const createConversionOptionsHandle: (a: number, b: number) => void;
19
20
  export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
20
21
  export const wasmhtmlextraction_inlineImages: (a: number, b: number) => void;
@@ -5,7 +5,7 @@
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
7
  "description": "HTML to Markdown conversion for WebAssembly targets",
8
- "version": "2.27.3",
8
+ "version": "2.28.0",
9
9
  "license": "MIT",
10
10
  "repository": {
11
11
  "type": "git",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kreuzberg/html-to-markdown-wasm",
3
- "version": "2.27.3",
3
+ "version": "2.28.0",
4
4
  "description": "High-performance HTML to Markdown converter - WebAssembly bindings",
5
5
  "main": "dist/html_to_markdown_wasm.js",
6
6
  "types": "dist/html_to_markdown_wasm.d.ts",