html-to-markdown-wasm 2.14.2 → 2.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/README.md +2 -2
- package/dist/html_to_markdown_wasm.d.ts +63 -0
- package/dist/html_to_markdown_wasm_bg.js +224 -0
- package/dist/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist/html_to_markdown_wasm_bg.wasm.d.ts +16 -0
- package/dist/package.json +1 -1
- package/dist-node/README.md +2 -2
- package/dist-node/html_to_markdown_wasm.d.ts +63 -0
- package/dist-node/html_to_markdown_wasm.js +227 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-node/html_to_markdown_wasm_bg.wasm.d.ts +16 -0
- package/dist-node/package.json +1 -1
- package/dist-web/README.md +2 -2
- package/dist-web/html_to_markdown_wasm.d.ts +79 -0
- package/dist-web/html_to_markdown_wasm.js +220 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm +0 -0
- package/dist-web/html_to_markdown_wasm_bg.wasm.d.ts +16 -0
- package/dist-web/package.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@ Runs anywhere: Node.js, Deno, Bun, browsers, and edge runtimes.
|
|
|
17
17
|
[](https://rubygems.org/gems/html-to-markdown)
|
|
18
18
|
[](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
|
|
19
19
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
20
|
-
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
20
|
+
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
|
|
21
21
|
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
22
22
|
|
|
23
23
|
## Performance
|
package/dist/README.md
CHANGED
|
@@ -11,7 +11,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
11
11
|
[](https://hex.pm/packages/html_to_markdown)
|
|
12
12
|
[](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
|
|
13
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
14
|
-
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
14
|
+
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
|
|
15
15
|
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
16
16
|
[](https://discord.gg/pXxagNK2zN)
|
|
17
17
|
|
|
@@ -377,7 +377,7 @@ import (
|
|
|
377
377
|
"fmt"
|
|
378
378
|
"log"
|
|
379
379
|
|
|
380
|
-
"github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown"
|
|
380
|
+
"github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown"
|
|
381
381
|
)
|
|
382
382
|
|
|
383
383
|
func main() {
|
|
@@ -46,6 +46,23 @@ export class WasmInlineImageWarning {
|
|
|
46
46
|
readonly message: string;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
export class WasmMetadataConfig {
|
|
50
|
+
free(): void;
|
|
51
|
+
[Symbol.dispose](): void;
|
|
52
|
+
/**
|
|
53
|
+
* Create a new metadata configuration with defaults
|
|
54
|
+
*
|
|
55
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
56
|
+
*/
|
|
57
|
+
constructor();
|
|
58
|
+
extract_links: boolean;
|
|
59
|
+
extract_images: boolean;
|
|
60
|
+
extract_headers: boolean;
|
|
61
|
+
extract_document: boolean;
|
|
62
|
+
extract_structured_data: boolean;
|
|
63
|
+
max_structured_data_size: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
49
66
|
/**
|
|
50
67
|
* Convert HTML to Markdown
|
|
51
68
|
*
|
|
@@ -70,10 +87,56 @@ export function convertBytes(html: Uint8Array, options?: WasmConversionOptions |
|
|
|
70
87
|
|
|
71
88
|
export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
72
89
|
|
|
90
|
+
/**
|
|
91
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
92
|
+
*
|
|
93
|
+
* # Arguments
|
|
94
|
+
*
|
|
95
|
+
* * `html` - The HTML bytes to convert
|
|
96
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
97
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
98
|
+
*
|
|
99
|
+
* # Returns
|
|
100
|
+
*
|
|
101
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
102
|
+
*/
|
|
103
|
+
export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
104
|
+
|
|
73
105
|
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
74
106
|
|
|
75
107
|
export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
76
108
|
|
|
109
|
+
/**
|
|
110
|
+
* Convert HTML to Markdown with metadata extraction
|
|
111
|
+
*
|
|
112
|
+
* # Arguments
|
|
113
|
+
*
|
|
114
|
+
* * `html` - The HTML string to convert
|
|
115
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
116
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
117
|
+
*
|
|
118
|
+
* # Returns
|
|
119
|
+
*
|
|
120
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
121
|
+
*
|
|
122
|
+
* # Example
|
|
123
|
+
*
|
|
124
|
+
* ```javascript
|
|
125
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
126
|
+
*
|
|
127
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
128
|
+
* const config = new WasmMetadataConfig();
|
|
129
|
+
* config.extractHeaders = true;
|
|
130
|
+
* config.extractLinks = true;
|
|
131
|
+
*
|
|
132
|
+
* const result = convertWithMetadata(html, null, config);
|
|
133
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
134
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
135
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
139
|
+
|
|
77
140
|
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
78
141
|
|
|
79
142
|
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
@@ -253,6 +253,10 @@ const WasmInlineImageWarningFinalization = (typeof FinalizationRegistry === 'und
|
|
|
253
253
|
? { register: () => {}, unregister: () => {} }
|
|
254
254
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasminlineimagewarning_free(ptr >>> 0, 1));
|
|
255
255
|
|
|
256
|
+
const WasmMetadataConfigFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
257
|
+
? { register: () => {}, unregister: () => {} }
|
|
258
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_wasmmetadataconfig_free(ptr >>> 0, 1));
|
|
259
|
+
|
|
256
260
|
export class WasmConversionOptionsHandle {
|
|
257
261
|
static __wrap(ptr) {
|
|
258
262
|
ptr = ptr >>> 0;
|
|
@@ -598,6 +602,112 @@ export class WasmInlineImageWarning {
|
|
|
598
602
|
}
|
|
599
603
|
if (Symbol.dispose) WasmInlineImageWarning.prototype[Symbol.dispose] = WasmInlineImageWarning.prototype.free;
|
|
600
604
|
|
|
605
|
+
/**
|
|
606
|
+
* Metadata extraction configuration
|
|
607
|
+
*/
|
|
608
|
+
export class WasmMetadataConfig {
|
|
609
|
+
__destroy_into_raw() {
|
|
610
|
+
const ptr = this.__wbg_ptr;
|
|
611
|
+
this.__wbg_ptr = 0;
|
|
612
|
+
WasmMetadataConfigFinalization.unregister(this);
|
|
613
|
+
return ptr;
|
|
614
|
+
}
|
|
615
|
+
free() {
|
|
616
|
+
const ptr = this.__destroy_into_raw();
|
|
617
|
+
wasm.__wbg_wasmmetadataconfig_free(ptr, 0);
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* @returns {boolean}
|
|
621
|
+
*/
|
|
622
|
+
get extract_links() {
|
|
623
|
+
const ret = wasm.wasmmetadataconfig_extract_links(this.__wbg_ptr);
|
|
624
|
+
return ret !== 0;
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* @returns {boolean}
|
|
628
|
+
*/
|
|
629
|
+
get extract_images() {
|
|
630
|
+
const ret = wasm.wasmmetadataconfig_extract_images(this.__wbg_ptr);
|
|
631
|
+
return ret !== 0;
|
|
632
|
+
}
|
|
633
|
+
/**
|
|
634
|
+
* @returns {boolean}
|
|
635
|
+
*/
|
|
636
|
+
get extract_headers() {
|
|
637
|
+
const ret = wasm.wasmmetadataconfig_extract_headers(this.__wbg_ptr);
|
|
638
|
+
return ret !== 0;
|
|
639
|
+
}
|
|
640
|
+
/**
|
|
641
|
+
* @returns {boolean}
|
|
642
|
+
*/
|
|
643
|
+
get extract_document() {
|
|
644
|
+
const ret = wasm.wasmmetadataconfig_extract_document(this.__wbg_ptr);
|
|
645
|
+
return ret !== 0;
|
|
646
|
+
}
|
|
647
|
+
/**
|
|
648
|
+
* @param {boolean} value
|
|
649
|
+
*/
|
|
650
|
+
set extract_links(value) {
|
|
651
|
+
wasm.wasmmetadataconfig_set_extract_links(this.__wbg_ptr, value);
|
|
652
|
+
}
|
|
653
|
+
/**
|
|
654
|
+
* @param {boolean} value
|
|
655
|
+
*/
|
|
656
|
+
set extract_images(value) {
|
|
657
|
+
wasm.wasmmetadataconfig_set_extract_images(this.__wbg_ptr, value);
|
|
658
|
+
}
|
|
659
|
+
/**
|
|
660
|
+
* @param {boolean} value
|
|
661
|
+
*/
|
|
662
|
+
set extract_headers(value) {
|
|
663
|
+
wasm.wasmmetadataconfig_set_extract_headers(this.__wbg_ptr, value);
|
|
664
|
+
}
|
|
665
|
+
/**
|
|
666
|
+
* @param {boolean} value
|
|
667
|
+
*/
|
|
668
|
+
set extract_document(value) {
|
|
669
|
+
wasm.wasmmetadataconfig_set_extract_document(this.__wbg_ptr, value);
|
|
670
|
+
}
|
|
671
|
+
/**
|
|
672
|
+
* @returns {boolean}
|
|
673
|
+
*/
|
|
674
|
+
get extract_structured_data() {
|
|
675
|
+
const ret = wasm.wasmmetadataconfig_extract_structured_data(this.__wbg_ptr);
|
|
676
|
+
return ret !== 0;
|
|
677
|
+
}
|
|
678
|
+
/**
|
|
679
|
+
* @returns {number}
|
|
680
|
+
*/
|
|
681
|
+
get max_structured_data_size() {
|
|
682
|
+
const ret = wasm.wasmmetadataconfig_max_structured_data_size(this.__wbg_ptr);
|
|
683
|
+
return ret >>> 0;
|
|
684
|
+
}
|
|
685
|
+
/**
|
|
686
|
+
* @param {boolean} value
|
|
687
|
+
*/
|
|
688
|
+
set extract_structured_data(value) {
|
|
689
|
+
wasm.wasmmetadataconfig_set_extract_structured_data(this.__wbg_ptr, value);
|
|
690
|
+
}
|
|
691
|
+
/**
|
|
692
|
+
* @param {number} value
|
|
693
|
+
*/
|
|
694
|
+
set max_structured_data_size(value) {
|
|
695
|
+
wasm.wasmmetadataconfig_set_max_structured_data_size(this.__wbg_ptr, value);
|
|
696
|
+
}
|
|
697
|
+
/**
|
|
698
|
+
* Create a new metadata configuration with defaults
|
|
699
|
+
*
|
|
700
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
701
|
+
*/
|
|
702
|
+
constructor() {
|
|
703
|
+
const ret = wasm.wasmmetadataconfig_new();
|
|
704
|
+
this.__wbg_ptr = ret >>> 0;
|
|
705
|
+
WasmMetadataConfigFinalization.register(this, this.__wbg_ptr, this);
|
|
706
|
+
return this;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
if (Symbol.dispose) WasmMetadataConfig.prototype[Symbol.dispose] = WasmMetadataConfig.prototype.free;
|
|
710
|
+
|
|
601
711
|
/**
|
|
602
712
|
* Convert HTML to Markdown
|
|
603
713
|
*
|
|
@@ -703,6 +813,44 @@ export function convertBytesWithInlineImages(html, options, image_config) {
|
|
|
703
813
|
}
|
|
704
814
|
}
|
|
705
815
|
|
|
816
|
+
/**
|
|
817
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
818
|
+
*
|
|
819
|
+
* # Arguments
|
|
820
|
+
*
|
|
821
|
+
* * `html` - The HTML bytes to convert
|
|
822
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
823
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
824
|
+
*
|
|
825
|
+
* # Returns
|
|
826
|
+
*
|
|
827
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
828
|
+
* @param {Uint8Array} html
|
|
829
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
830
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
831
|
+
* @returns {Record<string, string>}
|
|
832
|
+
*/
|
|
833
|
+
export function convertBytesWithMetadata(html, options, metadata_config) {
|
|
834
|
+
try {
|
|
835
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
836
|
+
let ptr0 = 0;
|
|
837
|
+
if (!isLikeNone(metadata_config)) {
|
|
838
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
839
|
+
ptr0 = metadata_config.__destroy_into_raw();
|
|
840
|
+
}
|
|
841
|
+
wasm.convertBytesWithMetadata(retptr, addHeapObject(html), addHeapObject(options), ptr0);
|
|
842
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
843
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
844
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
845
|
+
if (r2) {
|
|
846
|
+
throw takeObject(r1);
|
|
847
|
+
}
|
|
848
|
+
return takeObject(r0);
|
|
849
|
+
} finally {
|
|
850
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
706
854
|
/**
|
|
707
855
|
* @param {Uint8Array} html
|
|
708
856
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -763,6 +911,62 @@ export function convertWithInlineImages(html, options, image_config) {
|
|
|
763
911
|
}
|
|
764
912
|
}
|
|
765
913
|
|
|
914
|
+
/**
|
|
915
|
+
* Convert HTML to Markdown with metadata extraction
|
|
916
|
+
*
|
|
917
|
+
* # Arguments
|
|
918
|
+
*
|
|
919
|
+
* * `html` - The HTML string to convert
|
|
920
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
921
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
922
|
+
*
|
|
923
|
+
* # Returns
|
|
924
|
+
*
|
|
925
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
926
|
+
*
|
|
927
|
+
* # Example
|
|
928
|
+
*
|
|
929
|
+
* ```javascript
|
|
930
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
931
|
+
*
|
|
932
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
933
|
+
* const config = new WasmMetadataConfig();
|
|
934
|
+
* config.extractHeaders = true;
|
|
935
|
+
* config.extractLinks = true;
|
|
936
|
+
*
|
|
937
|
+
* const result = convertWithMetadata(html, null, config);
|
|
938
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
939
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
940
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
941
|
+
* ```
|
|
942
|
+
* @param {string} html
|
|
943
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
944
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
945
|
+
* @returns {Record<string, string>}
|
|
946
|
+
*/
|
|
947
|
+
export function convertWithMetadata(html, options, metadata_config) {
|
|
948
|
+
try {
|
|
949
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
950
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
951
|
+
const len0 = WASM_VECTOR_LEN;
|
|
952
|
+
let ptr1 = 0;
|
|
953
|
+
if (!isLikeNone(metadata_config)) {
|
|
954
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
955
|
+
ptr1 = metadata_config.__destroy_into_raw();
|
|
956
|
+
}
|
|
957
|
+
wasm.convertWithMetadata(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
958
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
959
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
960
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
961
|
+
if (r2) {
|
|
962
|
+
throw takeObject(r1);
|
|
963
|
+
}
|
|
964
|
+
return takeObject(r0);
|
|
965
|
+
} finally {
|
|
966
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
|
|
766
970
|
/**
|
|
767
971
|
* @param {string} html
|
|
768
972
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -1048,6 +1252,11 @@ export function __wbg_new_1ba21ce319a06297() {
|
|
|
1048
1252
|
return addHeapObject(ret);
|
|
1049
1253
|
};
|
|
1050
1254
|
|
|
1255
|
+
export function __wbg_new_25f239778d6112b9() {
|
|
1256
|
+
const ret = new Array();
|
|
1257
|
+
return addHeapObject(ret);
|
|
1258
|
+
};
|
|
1259
|
+
|
|
1051
1260
|
export function __wbg_new_6421f6084cc5bc5a(arg0) {
|
|
1052
1261
|
const ret = new Uint8Array(getObject(arg0));
|
|
1053
1262
|
return addHeapObject(ret);
|
|
@@ -1086,6 +1295,15 @@ export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
|
|
|
1086
1295
|
getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
|
|
1087
1296
|
};
|
|
1088
1297
|
|
|
1298
|
+
export function __wbg_set_781438a03c0c3c81() { return handleError(function (arg0, arg1, arg2) {
|
|
1299
|
+
const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
|
|
1300
|
+
return ret;
|
|
1301
|
+
}, arguments) };
|
|
1302
|
+
|
|
1303
|
+
export function __wbg_set_7df433eea03a5c14(arg0, arg1, arg2) {
|
|
1304
|
+
getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
|
|
1305
|
+
};
|
|
1306
|
+
|
|
1089
1307
|
export function __wbg_set_efaaf145b9377369(arg0, arg1, arg2) {
|
|
1090
1308
|
const ret = getObject(arg0).set(getObject(arg1), getObject(arg2));
|
|
1091
1309
|
return addHeapObject(ret);
|
|
@@ -1126,6 +1344,12 @@ export function __wbindgen_cast_4625c577ab2ec9ee(arg0) {
|
|
|
1126
1344
|
return addHeapObject(ret);
|
|
1127
1345
|
};
|
|
1128
1346
|
|
|
1347
|
+
export function __wbindgen_cast_d6cd19b81560fd6e(arg0) {
|
|
1348
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
1349
|
+
const ret = arg0;
|
|
1350
|
+
return addHeapObject(ret);
|
|
1351
|
+
};
|
|
1352
|
+
|
|
1129
1353
|
export function __wbindgen_object_clone_ref(arg0) {
|
|
1130
1354
|
const ret = getObject(arg0);
|
|
1131
1355
|
return addHeapObject(ret);
|
|
Binary file
|
|
@@ -6,11 +6,14 @@ export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
|
|
|
6
6
|
export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
|
|
7
7
|
export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
|
|
8
8
|
export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
|
|
9
|
+
export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
|
|
9
10
|
export const convert: (a: number, b: number, c: number, d: number) => void;
|
|
10
11
|
export const convertBytes: (a: number, b: number, c: number) => void;
|
|
11
12
|
export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
|
|
13
|
+
export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
|
|
12
14
|
export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
|
|
13
15
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
16
|
+
export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
14
17
|
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
15
18
|
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
16
19
|
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
@@ -30,6 +33,19 @@ export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c:
|
|
|
30
33
|
export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
|
|
31
34
|
export const wasminlineimagewarning_index: (a: number) => number;
|
|
32
35
|
export const wasminlineimagewarning_message: (a: number, b: number) => void;
|
|
36
|
+
export const wasmmetadataconfig_extract_document: (a: number) => number;
|
|
37
|
+
export const wasmmetadataconfig_extract_headers: (a: number) => number;
|
|
38
|
+
export const wasmmetadataconfig_extract_images: (a: number) => number;
|
|
39
|
+
export const wasmmetadataconfig_extract_links: (a: number) => number;
|
|
40
|
+
export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
|
|
41
|
+
export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
|
|
42
|
+
export const wasmmetadataconfig_new: () => number;
|
|
43
|
+
export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
|
|
44
|
+
export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
|
|
45
|
+
export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
|
|
46
|
+
export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
|
|
47
|
+
export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
|
|
48
|
+
export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
|
|
33
49
|
export const init: () => void;
|
|
34
50
|
export const __wbindgen_export: (a: number, b: number) => number;
|
|
35
51
|
export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
|
package/dist/package.json
CHANGED
package/dist-node/README.md
CHANGED
|
@@ -11,7 +11,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
11
11
|
[](https://hex.pm/packages/html_to_markdown)
|
|
12
12
|
[](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
|
|
13
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
14
|
-
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
14
|
+
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
|
|
15
15
|
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
16
16
|
[](https://discord.gg/pXxagNK2zN)
|
|
17
17
|
|
|
@@ -377,7 +377,7 @@ import (
|
|
|
377
377
|
"fmt"
|
|
378
378
|
"log"
|
|
379
379
|
|
|
380
|
-
"github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown"
|
|
380
|
+
"github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown"
|
|
381
381
|
)
|
|
382
382
|
|
|
383
383
|
func main() {
|
|
@@ -46,6 +46,23 @@ export class WasmInlineImageWarning {
|
|
|
46
46
|
readonly message: string;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
export class WasmMetadataConfig {
|
|
50
|
+
free(): void;
|
|
51
|
+
[Symbol.dispose](): void;
|
|
52
|
+
/**
|
|
53
|
+
* Create a new metadata configuration with defaults
|
|
54
|
+
*
|
|
55
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
56
|
+
*/
|
|
57
|
+
constructor();
|
|
58
|
+
extract_links: boolean;
|
|
59
|
+
extract_images: boolean;
|
|
60
|
+
extract_headers: boolean;
|
|
61
|
+
extract_document: boolean;
|
|
62
|
+
extract_structured_data: boolean;
|
|
63
|
+
max_structured_data_size: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
49
66
|
/**
|
|
50
67
|
* Convert HTML to Markdown
|
|
51
68
|
*
|
|
@@ -70,10 +87,56 @@ export function convertBytes(html: Uint8Array, options?: WasmConversionOptions |
|
|
|
70
87
|
|
|
71
88
|
export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
72
89
|
|
|
90
|
+
/**
|
|
91
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
92
|
+
*
|
|
93
|
+
* # Arguments
|
|
94
|
+
*
|
|
95
|
+
* * `html` - The HTML bytes to convert
|
|
96
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
97
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
98
|
+
*
|
|
99
|
+
* # Returns
|
|
100
|
+
*
|
|
101
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
102
|
+
*/
|
|
103
|
+
export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
104
|
+
|
|
73
105
|
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
74
106
|
|
|
75
107
|
export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
76
108
|
|
|
109
|
+
/**
|
|
110
|
+
* Convert HTML to Markdown with metadata extraction
|
|
111
|
+
*
|
|
112
|
+
* # Arguments
|
|
113
|
+
*
|
|
114
|
+
* * `html` - The HTML string to convert
|
|
115
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
116
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
117
|
+
*
|
|
118
|
+
* # Returns
|
|
119
|
+
*
|
|
120
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
121
|
+
*
|
|
122
|
+
* # Example
|
|
123
|
+
*
|
|
124
|
+
* ```javascript
|
|
125
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
126
|
+
*
|
|
127
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
128
|
+
* const config = new WasmMetadataConfig();
|
|
129
|
+
* config.extractHeaders = true;
|
|
130
|
+
* config.extractLinks = true;
|
|
131
|
+
*
|
|
132
|
+
* const result = convertWithMetadata(html, null, config);
|
|
133
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
134
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
135
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
139
|
+
|
|
77
140
|
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
78
141
|
|
|
79
142
|
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
@@ -244,6 +244,10 @@ const WasmInlineImageWarningFinalization = (typeof FinalizationRegistry === 'und
|
|
|
244
244
|
? { register: () => {}, unregister: () => {} }
|
|
245
245
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasminlineimagewarning_free(ptr >>> 0, 1));
|
|
246
246
|
|
|
247
|
+
const WasmMetadataConfigFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
248
|
+
? { register: () => {}, unregister: () => {} }
|
|
249
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_wasmmetadataconfig_free(ptr >>> 0, 1));
|
|
250
|
+
|
|
247
251
|
class WasmConversionOptionsHandle {
|
|
248
252
|
static __wrap(ptr) {
|
|
249
253
|
ptr = ptr >>> 0;
|
|
@@ -594,6 +598,113 @@ class WasmInlineImageWarning {
|
|
|
594
598
|
if (Symbol.dispose) WasmInlineImageWarning.prototype[Symbol.dispose] = WasmInlineImageWarning.prototype.free;
|
|
595
599
|
exports.WasmInlineImageWarning = WasmInlineImageWarning;
|
|
596
600
|
|
|
601
|
+
/**
|
|
602
|
+
* Metadata extraction configuration
|
|
603
|
+
*/
|
|
604
|
+
class WasmMetadataConfig {
|
|
605
|
+
__destroy_into_raw() {
|
|
606
|
+
const ptr = this.__wbg_ptr;
|
|
607
|
+
this.__wbg_ptr = 0;
|
|
608
|
+
WasmMetadataConfigFinalization.unregister(this);
|
|
609
|
+
return ptr;
|
|
610
|
+
}
|
|
611
|
+
free() {
|
|
612
|
+
const ptr = this.__destroy_into_raw();
|
|
613
|
+
wasm.__wbg_wasmmetadataconfig_free(ptr, 0);
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* @returns {boolean}
|
|
617
|
+
*/
|
|
618
|
+
get extract_links() {
|
|
619
|
+
const ret = wasm.wasmmetadataconfig_extract_links(this.__wbg_ptr);
|
|
620
|
+
return ret !== 0;
|
|
621
|
+
}
|
|
622
|
+
/**
|
|
623
|
+
* @returns {boolean}
|
|
624
|
+
*/
|
|
625
|
+
get extract_images() {
|
|
626
|
+
const ret = wasm.wasmmetadataconfig_extract_images(this.__wbg_ptr);
|
|
627
|
+
return ret !== 0;
|
|
628
|
+
}
|
|
629
|
+
/**
|
|
630
|
+
* @returns {boolean}
|
|
631
|
+
*/
|
|
632
|
+
get extract_headers() {
|
|
633
|
+
const ret = wasm.wasmmetadataconfig_extract_headers(this.__wbg_ptr);
|
|
634
|
+
return ret !== 0;
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* @returns {boolean}
|
|
638
|
+
*/
|
|
639
|
+
get extract_document() {
|
|
640
|
+
const ret = wasm.wasmmetadataconfig_extract_document(this.__wbg_ptr);
|
|
641
|
+
return ret !== 0;
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* @param {boolean} value
|
|
645
|
+
*/
|
|
646
|
+
set extract_links(value) {
|
|
647
|
+
wasm.wasmmetadataconfig_set_extract_links(this.__wbg_ptr, value);
|
|
648
|
+
}
|
|
649
|
+
/**
|
|
650
|
+
* @param {boolean} value
|
|
651
|
+
*/
|
|
652
|
+
set extract_images(value) {
|
|
653
|
+
wasm.wasmmetadataconfig_set_extract_images(this.__wbg_ptr, value);
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* @param {boolean} value
|
|
657
|
+
*/
|
|
658
|
+
set extract_headers(value) {
|
|
659
|
+
wasm.wasmmetadataconfig_set_extract_headers(this.__wbg_ptr, value);
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* @param {boolean} value
|
|
663
|
+
*/
|
|
664
|
+
set extract_document(value) {
|
|
665
|
+
wasm.wasmmetadataconfig_set_extract_document(this.__wbg_ptr, value);
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* @returns {boolean}
|
|
669
|
+
*/
|
|
670
|
+
get extract_structured_data() {
|
|
671
|
+
const ret = wasm.wasmmetadataconfig_extract_structured_data(this.__wbg_ptr);
|
|
672
|
+
return ret !== 0;
|
|
673
|
+
}
|
|
674
|
+
/**
|
|
675
|
+
* @returns {number}
|
|
676
|
+
*/
|
|
677
|
+
get max_structured_data_size() {
|
|
678
|
+
const ret = wasm.wasmmetadataconfig_max_structured_data_size(this.__wbg_ptr);
|
|
679
|
+
return ret >>> 0;
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* @param {boolean} value
|
|
683
|
+
*/
|
|
684
|
+
set extract_structured_data(value) {
|
|
685
|
+
wasm.wasmmetadataconfig_set_extract_structured_data(this.__wbg_ptr, value);
|
|
686
|
+
}
|
|
687
|
+
/**
|
|
688
|
+
* @param {number} value
|
|
689
|
+
*/
|
|
690
|
+
set max_structured_data_size(value) {
|
|
691
|
+
wasm.wasmmetadataconfig_set_max_structured_data_size(this.__wbg_ptr, value);
|
|
692
|
+
}
|
|
693
|
+
/**
|
|
694
|
+
* Create a new metadata configuration with defaults
|
|
695
|
+
*
|
|
696
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
697
|
+
*/
|
|
698
|
+
constructor() {
|
|
699
|
+
const ret = wasm.wasmmetadataconfig_new();
|
|
700
|
+
this.__wbg_ptr = ret >>> 0;
|
|
701
|
+
WasmMetadataConfigFinalization.register(this, this.__wbg_ptr, this);
|
|
702
|
+
return this;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
if (Symbol.dispose) WasmMetadataConfig.prototype[Symbol.dispose] = WasmMetadataConfig.prototype.free;
|
|
706
|
+
exports.WasmMetadataConfig = WasmMetadataConfig;
|
|
707
|
+
|
|
597
708
|
/**
|
|
598
709
|
* Convert HTML to Markdown
|
|
599
710
|
*
|
|
@@ -702,6 +813,45 @@ function convertBytesWithInlineImages(html, options, image_config) {
|
|
|
702
813
|
}
|
|
703
814
|
exports.convertBytesWithInlineImages = convertBytesWithInlineImages;
|
|
704
815
|
|
|
816
|
+
/**
|
|
817
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
818
|
+
*
|
|
819
|
+
* # Arguments
|
|
820
|
+
*
|
|
821
|
+
* * `html` - The HTML bytes to convert
|
|
822
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
823
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
824
|
+
*
|
|
825
|
+
* # Returns
|
|
826
|
+
*
|
|
827
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
828
|
+
* @param {Uint8Array} html
|
|
829
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
830
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
831
|
+
* @returns {Record<string, string>}
|
|
832
|
+
*/
|
|
833
|
+
function convertBytesWithMetadata(html, options, metadata_config) {
|
|
834
|
+
try {
|
|
835
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
836
|
+
let ptr0 = 0;
|
|
837
|
+
if (!isLikeNone(metadata_config)) {
|
|
838
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
839
|
+
ptr0 = metadata_config.__destroy_into_raw();
|
|
840
|
+
}
|
|
841
|
+
wasm.convertBytesWithMetadata(retptr, addHeapObject(html), addHeapObject(options), ptr0);
|
|
842
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
843
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
844
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
845
|
+
if (r2) {
|
|
846
|
+
throw takeObject(r1);
|
|
847
|
+
}
|
|
848
|
+
return takeObject(r0);
|
|
849
|
+
} finally {
|
|
850
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
exports.convertBytesWithMetadata = convertBytesWithMetadata;
|
|
854
|
+
|
|
705
855
|
/**
|
|
706
856
|
* @param {Uint8Array} html
|
|
707
857
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -764,6 +914,63 @@ function convertWithInlineImages(html, options, image_config) {
|
|
|
764
914
|
}
|
|
765
915
|
exports.convertWithInlineImages = convertWithInlineImages;
|
|
766
916
|
|
|
917
|
+
/**
|
|
918
|
+
* Convert HTML to Markdown with metadata extraction
|
|
919
|
+
*
|
|
920
|
+
* # Arguments
|
|
921
|
+
*
|
|
922
|
+
* * `html` - The HTML string to convert
|
|
923
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
924
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
925
|
+
*
|
|
926
|
+
* # Returns
|
|
927
|
+
*
|
|
928
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
929
|
+
*
|
|
930
|
+
* # Example
|
|
931
|
+
*
|
|
932
|
+
* ```javascript
|
|
933
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
934
|
+
*
|
|
935
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
936
|
+
* const config = new WasmMetadataConfig();
|
|
937
|
+
* config.extractHeaders = true;
|
|
938
|
+
* config.extractLinks = true;
|
|
939
|
+
*
|
|
940
|
+
* const result = convertWithMetadata(html, null, config);
|
|
941
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
942
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
943
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
944
|
+
* ```
|
|
945
|
+
* @param {string} html
|
|
946
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
947
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
948
|
+
* @returns {Record<string, string>}
|
|
949
|
+
*/
|
|
950
|
+
function convertWithMetadata(html, options, metadata_config) {
|
|
951
|
+
try {
|
|
952
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
953
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
954
|
+
const len0 = WASM_VECTOR_LEN;
|
|
955
|
+
let ptr1 = 0;
|
|
956
|
+
if (!isLikeNone(metadata_config)) {
|
|
957
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
958
|
+
ptr1 = metadata_config.__destroy_into_raw();
|
|
959
|
+
}
|
|
960
|
+
wasm.convertWithMetadata(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
961
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
962
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
963
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
964
|
+
if (r2) {
|
|
965
|
+
throw takeObject(r1);
|
|
966
|
+
}
|
|
967
|
+
return takeObject(r0);
|
|
968
|
+
} finally {
|
|
969
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
exports.convertWithMetadata = convertWithMetadata;
|
|
973
|
+
|
|
767
974
|
/**
|
|
768
975
|
* @param {string} html
|
|
769
976
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -1052,6 +1259,11 @@ exports.__wbg_new_1ba21ce319a06297 = function() {
|
|
|
1052
1259
|
return addHeapObject(ret);
|
|
1053
1260
|
};
|
|
1054
1261
|
|
|
1262
|
+
exports.__wbg_new_25f239778d6112b9 = function() {
|
|
1263
|
+
const ret = new Array();
|
|
1264
|
+
return addHeapObject(ret);
|
|
1265
|
+
};
|
|
1266
|
+
|
|
1055
1267
|
exports.__wbg_new_6421f6084cc5bc5a = function(arg0) {
|
|
1056
1268
|
const ret = new Uint8Array(getObject(arg0));
|
|
1057
1269
|
return addHeapObject(ret);
|
|
@@ -1090,6 +1302,15 @@ exports.__wbg_set_3f1d0b984ed272ed = function(arg0, arg1, arg2) {
|
|
|
1090
1302
|
getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
|
|
1091
1303
|
};
|
|
1092
1304
|
|
|
1305
|
+
exports.__wbg_set_781438a03c0c3c81 = function() { return handleError(function (arg0, arg1, arg2) {
|
|
1306
|
+
const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
|
|
1307
|
+
return ret;
|
|
1308
|
+
}, arguments) };
|
|
1309
|
+
|
|
1310
|
+
exports.__wbg_set_7df433eea03a5c14 = function(arg0, arg1, arg2) {
|
|
1311
|
+
getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
|
|
1312
|
+
};
|
|
1313
|
+
|
|
1093
1314
|
exports.__wbg_set_efaaf145b9377369 = function(arg0, arg1, arg2) {
|
|
1094
1315
|
const ret = getObject(arg0).set(getObject(arg1), getObject(arg2));
|
|
1095
1316
|
return addHeapObject(ret);
|
|
@@ -1130,6 +1351,12 @@ exports.__wbindgen_cast_4625c577ab2ec9ee = function(arg0) {
|
|
|
1130
1351
|
return addHeapObject(ret);
|
|
1131
1352
|
};
|
|
1132
1353
|
|
|
1354
|
+
exports.__wbindgen_cast_d6cd19b81560fd6e = function(arg0) {
|
|
1355
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
1356
|
+
const ret = arg0;
|
|
1357
|
+
return addHeapObject(ret);
|
|
1358
|
+
};
|
|
1359
|
+
|
|
1133
1360
|
exports.__wbindgen_object_clone_ref = function(arg0) {
|
|
1134
1361
|
const ret = getObject(arg0);
|
|
1135
1362
|
return addHeapObject(ret);
|
|
Binary file
|
|
@@ -6,11 +6,14 @@ export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
|
|
|
6
6
|
export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
|
|
7
7
|
export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
|
|
8
8
|
export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
|
|
9
|
+
export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
|
|
9
10
|
export const convert: (a: number, b: number, c: number, d: number) => void;
|
|
10
11
|
export const convertBytes: (a: number, b: number, c: number) => void;
|
|
11
12
|
export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
|
|
13
|
+
export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
|
|
12
14
|
export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
|
|
13
15
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
16
|
+
export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
14
17
|
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
15
18
|
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
16
19
|
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
@@ -30,6 +33,19 @@ export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c:
|
|
|
30
33
|
export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
|
|
31
34
|
export const wasminlineimagewarning_index: (a: number) => number;
|
|
32
35
|
export const wasminlineimagewarning_message: (a: number, b: number) => void;
|
|
36
|
+
export const wasmmetadataconfig_extract_document: (a: number) => number;
|
|
37
|
+
export const wasmmetadataconfig_extract_headers: (a: number) => number;
|
|
38
|
+
export const wasmmetadataconfig_extract_images: (a: number) => number;
|
|
39
|
+
export const wasmmetadataconfig_extract_links: (a: number) => number;
|
|
40
|
+
export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
|
|
41
|
+
export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
|
|
42
|
+
export const wasmmetadataconfig_new: () => number;
|
|
43
|
+
export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
|
|
44
|
+
export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
|
|
45
|
+
export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
|
|
46
|
+
export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
|
|
47
|
+
export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
|
|
48
|
+
export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
|
|
33
49
|
export const init: () => void;
|
|
34
50
|
export const __wbindgen_export: (a: number, b: number) => number;
|
|
35
51
|
export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
|
package/dist-node/package.json
CHANGED
package/dist-web/README.md
CHANGED
|
@@ -11,7 +11,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
|
|
|
11
11
|
[](https://hex.pm/packages/html_to_markdown)
|
|
12
12
|
[](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
|
|
13
13
|
[](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
|
|
14
|
-
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
|
|
14
|
+
[](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
|
|
15
15
|
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
16
16
|
[](https://discord.gg/pXxagNK2zN)
|
|
17
17
|
|
|
@@ -377,7 +377,7 @@ import (
|
|
|
377
377
|
"fmt"
|
|
378
378
|
"log"
|
|
379
379
|
|
|
380
|
-
"github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown"
|
|
380
|
+
"github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown"
|
|
381
381
|
)
|
|
382
382
|
|
|
383
383
|
func main() {
|
|
@@ -46,6 +46,23 @@ export class WasmInlineImageWarning {
|
|
|
46
46
|
readonly message: string;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
export class WasmMetadataConfig {
|
|
50
|
+
free(): void;
|
|
51
|
+
[Symbol.dispose](): void;
|
|
52
|
+
/**
|
|
53
|
+
* Create a new metadata configuration with defaults
|
|
54
|
+
*
|
|
55
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
56
|
+
*/
|
|
57
|
+
constructor();
|
|
58
|
+
extract_links: boolean;
|
|
59
|
+
extract_images: boolean;
|
|
60
|
+
extract_headers: boolean;
|
|
61
|
+
extract_document: boolean;
|
|
62
|
+
extract_structured_data: boolean;
|
|
63
|
+
max_structured_data_size: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
49
66
|
/**
|
|
50
67
|
* Convert HTML to Markdown
|
|
51
68
|
*
|
|
@@ -70,10 +87,56 @@ export function convertBytes(html: Uint8Array, options?: WasmConversionOptions |
|
|
|
70
87
|
|
|
71
88
|
export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
72
89
|
|
|
90
|
+
/**
|
|
91
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
92
|
+
*
|
|
93
|
+
* # Arguments
|
|
94
|
+
*
|
|
95
|
+
* * `html` - The HTML bytes to convert
|
|
96
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
97
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
98
|
+
*
|
|
99
|
+
* # Returns
|
|
100
|
+
*
|
|
101
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
102
|
+
*/
|
|
103
|
+
export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
104
|
+
|
|
73
105
|
export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
|
|
74
106
|
|
|
75
107
|
export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
|
|
76
108
|
|
|
109
|
+
/**
|
|
110
|
+
* Convert HTML to Markdown with metadata extraction
|
|
111
|
+
*
|
|
112
|
+
* # Arguments
|
|
113
|
+
*
|
|
114
|
+
* * `html` - The HTML string to convert
|
|
115
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
116
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
117
|
+
*
|
|
118
|
+
* # Returns
|
|
119
|
+
*
|
|
120
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
121
|
+
*
|
|
122
|
+
* # Example
|
|
123
|
+
*
|
|
124
|
+
* ```javascript
|
|
125
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
126
|
+
*
|
|
127
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
128
|
+
* const config = new WasmMetadataConfig();
|
|
129
|
+
* config.extractHeaders = true;
|
|
130
|
+
* config.extractLinks = true;
|
|
131
|
+
*
|
|
132
|
+
* const result = convertWithMetadata(html, null, config);
|
|
133
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
134
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
135
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
136
|
+
* ```
|
|
137
|
+
*/
|
|
138
|
+
export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
|
|
139
|
+
|
|
77
140
|
export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
|
|
78
141
|
|
|
79
142
|
export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
|
|
@@ -92,11 +155,14 @@ export interface InitOutput {
|
|
|
92
155
|
readonly __wbg_wasminlineimage_free: (a: number, b: number) => void;
|
|
93
156
|
readonly __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
|
|
94
157
|
readonly __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
|
|
158
|
+
readonly __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
|
|
95
159
|
readonly convert: (a: number, b: number, c: number, d: number) => void;
|
|
96
160
|
readonly convertBytes: (a: number, b: number, c: number) => void;
|
|
97
161
|
readonly convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
|
|
162
|
+
readonly convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
|
|
98
163
|
readonly convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
|
|
99
164
|
readonly convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
165
|
+
readonly convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
100
166
|
readonly convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
101
167
|
readonly createConversionOptionsHandle: (a: number, b: number) => void;
|
|
102
168
|
readonly wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
@@ -116,6 +182,19 @@ export interface InitOutput {
|
|
|
116
182
|
readonly wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
|
|
117
183
|
readonly wasminlineimagewarning_index: (a: number) => number;
|
|
118
184
|
readonly wasminlineimagewarning_message: (a: number, b: number) => void;
|
|
185
|
+
readonly wasmmetadataconfig_extract_document: (a: number) => number;
|
|
186
|
+
readonly wasmmetadataconfig_extract_headers: (a: number) => number;
|
|
187
|
+
readonly wasmmetadataconfig_extract_images: (a: number) => number;
|
|
188
|
+
readonly wasmmetadataconfig_extract_links: (a: number) => number;
|
|
189
|
+
readonly wasmmetadataconfig_extract_structured_data: (a: number) => number;
|
|
190
|
+
readonly wasmmetadataconfig_max_structured_data_size: (a: number) => number;
|
|
191
|
+
readonly wasmmetadataconfig_new: () => number;
|
|
192
|
+
readonly wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
|
|
193
|
+
readonly wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
|
|
194
|
+
readonly wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
|
|
195
|
+
readonly wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
|
|
196
|
+
readonly wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
|
|
197
|
+
readonly wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
|
|
119
198
|
readonly init: () => void;
|
|
120
199
|
readonly __wbindgen_export: (a: number, b: number) => number;
|
|
121
200
|
readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
|
|
@@ -250,6 +250,10 @@ const WasmInlineImageWarningFinalization = (typeof FinalizationRegistry === 'und
|
|
|
250
250
|
? { register: () => {}, unregister: () => {} }
|
|
251
251
|
: new FinalizationRegistry(ptr => wasm.__wbg_wasminlineimagewarning_free(ptr >>> 0, 1));
|
|
252
252
|
|
|
253
|
+
const WasmMetadataConfigFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
254
|
+
? { register: () => {}, unregister: () => {} }
|
|
255
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_wasmmetadataconfig_free(ptr >>> 0, 1));
|
|
256
|
+
|
|
253
257
|
export class WasmConversionOptionsHandle {
|
|
254
258
|
static __wrap(ptr) {
|
|
255
259
|
ptr = ptr >>> 0;
|
|
@@ -595,6 +599,112 @@ export class WasmInlineImageWarning {
|
|
|
595
599
|
}
|
|
596
600
|
if (Symbol.dispose) WasmInlineImageWarning.prototype[Symbol.dispose] = WasmInlineImageWarning.prototype.free;
|
|
597
601
|
|
|
602
|
+
/**
|
|
603
|
+
* Metadata extraction configuration
|
|
604
|
+
*/
|
|
605
|
+
export class WasmMetadataConfig {
|
|
606
|
+
__destroy_into_raw() {
|
|
607
|
+
const ptr = this.__wbg_ptr;
|
|
608
|
+
this.__wbg_ptr = 0;
|
|
609
|
+
WasmMetadataConfigFinalization.unregister(this);
|
|
610
|
+
return ptr;
|
|
611
|
+
}
|
|
612
|
+
free() {
|
|
613
|
+
const ptr = this.__destroy_into_raw();
|
|
614
|
+
wasm.__wbg_wasmmetadataconfig_free(ptr, 0);
|
|
615
|
+
}
|
|
616
|
+
/**
|
|
617
|
+
* @returns {boolean}
|
|
618
|
+
*/
|
|
619
|
+
get extract_links() {
|
|
620
|
+
const ret = wasm.wasmmetadataconfig_extract_links(this.__wbg_ptr);
|
|
621
|
+
return ret !== 0;
|
|
622
|
+
}
|
|
623
|
+
/**
|
|
624
|
+
* @returns {boolean}
|
|
625
|
+
*/
|
|
626
|
+
get extract_images() {
|
|
627
|
+
const ret = wasm.wasmmetadataconfig_extract_images(this.__wbg_ptr);
|
|
628
|
+
return ret !== 0;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* @returns {boolean}
|
|
632
|
+
*/
|
|
633
|
+
get extract_headers() {
|
|
634
|
+
const ret = wasm.wasmmetadataconfig_extract_headers(this.__wbg_ptr);
|
|
635
|
+
return ret !== 0;
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* @returns {boolean}
|
|
639
|
+
*/
|
|
640
|
+
get extract_document() {
|
|
641
|
+
const ret = wasm.wasmmetadataconfig_extract_document(this.__wbg_ptr);
|
|
642
|
+
return ret !== 0;
|
|
643
|
+
}
|
|
644
|
+
/**
|
|
645
|
+
* @param {boolean} value
|
|
646
|
+
*/
|
|
647
|
+
set extract_links(value) {
|
|
648
|
+
wasm.wasmmetadataconfig_set_extract_links(this.__wbg_ptr, value);
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* @param {boolean} value
|
|
652
|
+
*/
|
|
653
|
+
set extract_images(value) {
|
|
654
|
+
wasm.wasmmetadataconfig_set_extract_images(this.__wbg_ptr, value);
|
|
655
|
+
}
|
|
656
|
+
/**
|
|
657
|
+
* @param {boolean} value
|
|
658
|
+
*/
|
|
659
|
+
set extract_headers(value) {
|
|
660
|
+
wasm.wasmmetadataconfig_set_extract_headers(this.__wbg_ptr, value);
|
|
661
|
+
}
|
|
662
|
+
/**
|
|
663
|
+
* @param {boolean} value
|
|
664
|
+
*/
|
|
665
|
+
set extract_document(value) {
|
|
666
|
+
wasm.wasmmetadataconfig_set_extract_document(this.__wbg_ptr, value);
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* @returns {boolean}
|
|
670
|
+
*/
|
|
671
|
+
get extract_structured_data() {
|
|
672
|
+
const ret = wasm.wasmmetadataconfig_extract_structured_data(this.__wbg_ptr);
|
|
673
|
+
return ret !== 0;
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* @returns {number}
|
|
677
|
+
*/
|
|
678
|
+
get max_structured_data_size() {
|
|
679
|
+
const ret = wasm.wasmmetadataconfig_max_structured_data_size(this.__wbg_ptr);
|
|
680
|
+
return ret >>> 0;
|
|
681
|
+
}
|
|
682
|
+
/**
|
|
683
|
+
* @param {boolean} value
|
|
684
|
+
*/
|
|
685
|
+
set extract_structured_data(value) {
|
|
686
|
+
wasm.wasmmetadataconfig_set_extract_structured_data(this.__wbg_ptr, value);
|
|
687
|
+
}
|
|
688
|
+
/**
|
|
689
|
+
* @param {number} value
|
|
690
|
+
*/
|
|
691
|
+
set max_structured_data_size(value) {
|
|
692
|
+
wasm.wasmmetadataconfig_set_max_structured_data_size(this.__wbg_ptr, value);
|
|
693
|
+
}
|
|
694
|
+
/**
|
|
695
|
+
* Create a new metadata configuration with defaults
|
|
696
|
+
*
|
|
697
|
+
* All extraction types enabled by default with 1MB structured data limit
|
|
698
|
+
*/
|
|
699
|
+
constructor() {
|
|
700
|
+
const ret = wasm.wasmmetadataconfig_new();
|
|
701
|
+
this.__wbg_ptr = ret >>> 0;
|
|
702
|
+
WasmMetadataConfigFinalization.register(this, this.__wbg_ptr, this);
|
|
703
|
+
return this;
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
if (Symbol.dispose) WasmMetadataConfig.prototype[Symbol.dispose] = WasmMetadataConfig.prototype.free;
|
|
707
|
+
|
|
598
708
|
/**
|
|
599
709
|
* Convert HTML to Markdown
|
|
600
710
|
*
|
|
@@ -700,6 +810,44 @@ export function convertBytesWithInlineImages(html, options, image_config) {
|
|
|
700
810
|
}
|
|
701
811
|
}
|
|
702
812
|
|
|
813
|
+
/**
|
|
814
|
+
* Convert HTML bytes to Markdown with metadata extraction
|
|
815
|
+
*
|
|
816
|
+
* # Arguments
|
|
817
|
+
*
|
|
818
|
+
* * `html` - The HTML bytes to convert
|
|
819
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
820
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
821
|
+
*
|
|
822
|
+
* # Returns
|
|
823
|
+
*
|
|
824
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
825
|
+
* @param {Uint8Array} html
|
|
826
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
827
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
828
|
+
* @returns {Record<string, string>}
|
|
829
|
+
*/
|
|
830
|
+
export function convertBytesWithMetadata(html, options, metadata_config) {
|
|
831
|
+
try {
|
|
832
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
833
|
+
let ptr0 = 0;
|
|
834
|
+
if (!isLikeNone(metadata_config)) {
|
|
835
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
836
|
+
ptr0 = metadata_config.__destroy_into_raw();
|
|
837
|
+
}
|
|
838
|
+
wasm.convertBytesWithMetadata(retptr, addHeapObject(html), addHeapObject(options), ptr0);
|
|
839
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
840
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
841
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
842
|
+
if (r2) {
|
|
843
|
+
throw takeObject(r1);
|
|
844
|
+
}
|
|
845
|
+
return takeObject(r0);
|
|
846
|
+
} finally {
|
|
847
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
|
|
703
851
|
/**
|
|
704
852
|
* @param {Uint8Array} html
|
|
705
853
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -760,6 +908,62 @@ export function convertWithInlineImages(html, options, image_config) {
|
|
|
760
908
|
}
|
|
761
909
|
}
|
|
762
910
|
|
|
911
|
+
/**
|
|
912
|
+
* Convert HTML to Markdown with metadata extraction
|
|
913
|
+
*
|
|
914
|
+
* # Arguments
|
|
915
|
+
*
|
|
916
|
+
* * `html` - The HTML string to convert
|
|
917
|
+
* * `options` - Optional conversion options (as a JavaScript object)
|
|
918
|
+
* * `metadata_config` - Metadata extraction configuration
|
|
919
|
+
*
|
|
920
|
+
* # Returns
|
|
921
|
+
*
|
|
922
|
+
* JavaScript object with `markdown` (string) and `metadata` (object) fields
|
|
923
|
+
*
|
|
924
|
+
* # Example
|
|
925
|
+
*
|
|
926
|
+
* ```javascript
|
|
927
|
+
* import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
|
|
928
|
+
*
|
|
929
|
+
* const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
|
|
930
|
+
* const config = new WasmMetadataConfig();
|
|
931
|
+
* config.extractHeaders = true;
|
|
932
|
+
* config.extractLinks = true;
|
|
933
|
+
*
|
|
934
|
+
* const result = convertWithMetadata(html, null, config);
|
|
935
|
+
* console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
|
|
936
|
+
* console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
|
|
937
|
+
* console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
|
|
938
|
+
* ```
|
|
939
|
+
* @param {string} html
|
|
940
|
+
* @param {WasmConversionOptions | null | undefined} [options]
|
|
941
|
+
* @param {WasmMetadataConfig | null} [metadata_config]
|
|
942
|
+
* @returns {Record<string, string>}
|
|
943
|
+
*/
|
|
944
|
+
export function convertWithMetadata(html, options, metadata_config) {
|
|
945
|
+
try {
|
|
946
|
+
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
|
|
947
|
+
const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
948
|
+
const len0 = WASM_VECTOR_LEN;
|
|
949
|
+
let ptr1 = 0;
|
|
950
|
+
if (!isLikeNone(metadata_config)) {
|
|
951
|
+
_assertClass(metadata_config, WasmMetadataConfig);
|
|
952
|
+
ptr1 = metadata_config.__destroy_into_raw();
|
|
953
|
+
}
|
|
954
|
+
wasm.convertWithMetadata(retptr, ptr0, len0, addHeapObject(options), ptr1);
|
|
955
|
+
var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
|
|
956
|
+
var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
|
|
957
|
+
var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
|
|
958
|
+
if (r2) {
|
|
959
|
+
throw takeObject(r1);
|
|
960
|
+
}
|
|
961
|
+
return takeObject(r0);
|
|
962
|
+
} finally {
|
|
963
|
+
wasm.__wbindgen_add_to_stack_pointer(16);
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
|
|
763
967
|
/**
|
|
764
968
|
* @param {string} html
|
|
765
969
|
* @param {WasmConversionOptionsHandle} handle
|
|
@@ -1043,6 +1247,10 @@ function __wbg_get_imports() {
|
|
|
1043
1247
|
const ret = new Object();
|
|
1044
1248
|
return addHeapObject(ret);
|
|
1045
1249
|
};
|
|
1250
|
+
imports.wbg.__wbg_new_25f239778d6112b9 = function() {
|
|
1251
|
+
const ret = new Array();
|
|
1252
|
+
return addHeapObject(ret);
|
|
1253
|
+
};
|
|
1046
1254
|
imports.wbg.__wbg_new_6421f6084cc5bc5a = function(arg0) {
|
|
1047
1255
|
const ret = new Uint8Array(getObject(arg0));
|
|
1048
1256
|
return addHeapObject(ret);
|
|
@@ -1073,6 +1281,13 @@ function __wbg_get_imports() {
|
|
|
1073
1281
|
imports.wbg.__wbg_set_3f1d0b984ed272ed = function(arg0, arg1, arg2) {
|
|
1074
1282
|
getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
|
|
1075
1283
|
};
|
|
1284
|
+
imports.wbg.__wbg_set_781438a03c0c3c81 = function() { return handleError(function (arg0, arg1, arg2) {
|
|
1285
|
+
const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
|
|
1286
|
+
return ret;
|
|
1287
|
+
}, arguments) };
|
|
1288
|
+
imports.wbg.__wbg_set_7df433eea03a5c14 = function(arg0, arg1, arg2) {
|
|
1289
|
+
getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
|
|
1290
|
+
};
|
|
1076
1291
|
imports.wbg.__wbg_set_efaaf145b9377369 = function(arg0, arg1, arg2) {
|
|
1077
1292
|
const ret = getObject(arg0).set(getObject(arg1), getObject(arg2));
|
|
1078
1293
|
return addHeapObject(ret);
|
|
@@ -1106,6 +1321,11 @@ function __wbg_get_imports() {
|
|
|
1106
1321
|
const ret = BigInt.asUintN(64, arg0);
|
|
1107
1322
|
return addHeapObject(ret);
|
|
1108
1323
|
};
|
|
1324
|
+
imports.wbg.__wbindgen_cast_d6cd19b81560fd6e = function(arg0) {
|
|
1325
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
1326
|
+
const ret = arg0;
|
|
1327
|
+
return addHeapObject(ret);
|
|
1328
|
+
};
|
|
1109
1329
|
imports.wbg.__wbindgen_object_clone_ref = function(arg0) {
|
|
1110
1330
|
const ret = getObject(arg0);
|
|
1111
1331
|
return addHeapObject(ret);
|
|
Binary file
|
|
@@ -6,11 +6,14 @@ export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
|
|
|
6
6
|
export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
|
|
7
7
|
export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
|
|
8
8
|
export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
|
|
9
|
+
export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
|
|
9
10
|
export const convert: (a: number, b: number, c: number, d: number) => void;
|
|
10
11
|
export const convertBytes: (a: number, b: number, c: number) => void;
|
|
11
12
|
export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
|
|
13
|
+
export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
|
|
12
14
|
export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
|
|
13
15
|
export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
16
|
+
export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
14
17
|
export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
|
|
15
18
|
export const createConversionOptionsHandle: (a: number, b: number) => void;
|
|
16
19
|
export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
|
|
@@ -30,6 +33,19 @@ export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c:
|
|
|
30
33
|
export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
|
|
31
34
|
export const wasminlineimagewarning_index: (a: number) => number;
|
|
32
35
|
export const wasminlineimagewarning_message: (a: number, b: number) => void;
|
|
36
|
+
export const wasmmetadataconfig_extract_document: (a: number) => number;
|
|
37
|
+
export const wasmmetadataconfig_extract_headers: (a: number) => number;
|
|
38
|
+
export const wasmmetadataconfig_extract_images: (a: number) => number;
|
|
39
|
+
export const wasmmetadataconfig_extract_links: (a: number) => number;
|
|
40
|
+
export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
|
|
41
|
+
export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
|
|
42
|
+
export const wasmmetadataconfig_new: () => number;
|
|
43
|
+
export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
|
|
44
|
+
export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
|
|
45
|
+
export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
|
|
46
|
+
export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
|
|
47
|
+
export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
|
|
48
|
+
export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
|
|
33
49
|
export const init: () => void;
|
|
34
50
|
export const __wbindgen_export: (a: number, b: number) => number;
|
|
35
51
|
export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
|
package/dist-web/package.json
CHANGED
package/package.json
CHANGED