html-to-markdown-wasm 2.14.1 → 2.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,7 +17,7 @@ Runs anywhere: Node.js, Deno, Bun, browsers, and edge runtimes.
17
17
  [![RubyGems](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)
18
18
  [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
19
19
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
20
- [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
20
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
21
21
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
22
22
 
23
23
  ## Performance
package/dist/README.md CHANGED
@@ -11,7 +11,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
11
11
  [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
12
12
  [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
13
13
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
14
- [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
14
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
15
15
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
16
16
  [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
17
17
 
@@ -377,7 +377,7 @@ import (
377
377
  "fmt"
378
378
  "log"
379
379
 
380
- "github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown"
380
+ "github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown"
381
381
  )
382
382
 
383
383
  func main() {
@@ -46,6 +46,23 @@ export class WasmInlineImageWarning {
46
46
  readonly message: string;
47
47
  }
48
48
 
49
+ export class WasmMetadataConfig {
50
+ free(): void;
51
+ [Symbol.dispose](): void;
52
+ /**
53
+ * Create a new metadata configuration with defaults
54
+ *
55
+ * All extraction types enabled by default with 1MB structured data limit
56
+ */
57
+ constructor();
58
+ extract_links: boolean;
59
+ extract_images: boolean;
60
+ extract_headers: boolean;
61
+ extract_document: boolean;
62
+ extract_structured_data: boolean;
63
+ max_structured_data_size: number;
64
+ }
65
+
49
66
  /**
50
67
  * Convert HTML to Markdown
51
68
  *
@@ -70,10 +87,56 @@ export function convertBytes(html: Uint8Array, options?: WasmConversionOptions |
70
87
 
71
88
  export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
72
89
 
90
+ /**
91
+ * Convert HTML bytes to Markdown with metadata extraction
92
+ *
93
+ * # Arguments
94
+ *
95
+ * * `html` - The HTML bytes to convert
96
+ * * `options` - Optional conversion options (as a JavaScript object)
97
+ * * `metadata_config` - Metadata extraction configuration
98
+ *
99
+ * # Returns
100
+ *
101
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
102
+ */
103
+ export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
104
+
73
105
  export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
74
106
 
75
107
  export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
76
108
 
109
+ /**
110
+ * Convert HTML to Markdown with metadata extraction
111
+ *
112
+ * # Arguments
113
+ *
114
+ * * `html` - The HTML string to convert
115
+ * * `options` - Optional conversion options (as a JavaScript object)
116
+ * * `metadata_config` - Metadata extraction configuration
117
+ *
118
+ * # Returns
119
+ *
120
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
121
+ *
122
+ * # Example
123
+ *
124
+ * ```javascript
125
+ * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
126
+ *
127
+ * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
128
+ * const config = new WasmMetadataConfig();
129
+ * config.extractHeaders = true;
130
+ * config.extractLinks = true;
131
+ *
132
+ * const result = convertWithMetadata(html, null, config);
133
+ * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
134
+ * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
135
+ * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
136
+ * ```
137
+ */
138
+ export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
139
+
77
140
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
78
141
 
79
142
  export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
@@ -253,6 +253,10 @@ const WasmInlineImageWarningFinalization = (typeof FinalizationRegistry === 'und
253
253
  ? { register: () => {}, unregister: () => {} }
254
254
  : new FinalizationRegistry(ptr => wasm.__wbg_wasminlineimagewarning_free(ptr >>> 0, 1));
255
255
 
256
+ const WasmMetadataConfigFinalization = (typeof FinalizationRegistry === 'undefined')
257
+ ? { register: () => {}, unregister: () => {} }
258
+ : new FinalizationRegistry(ptr => wasm.__wbg_wasmmetadataconfig_free(ptr >>> 0, 1));
259
+
256
260
  export class WasmConversionOptionsHandle {
257
261
  static __wrap(ptr) {
258
262
  ptr = ptr >>> 0;
@@ -598,6 +602,112 @@ export class WasmInlineImageWarning {
598
602
  }
599
603
  if (Symbol.dispose) WasmInlineImageWarning.prototype[Symbol.dispose] = WasmInlineImageWarning.prototype.free;
600
604
 
605
+ /**
606
+ * Metadata extraction configuration
607
+ */
608
+ export class WasmMetadataConfig {
609
+ __destroy_into_raw() {
610
+ const ptr = this.__wbg_ptr;
611
+ this.__wbg_ptr = 0;
612
+ WasmMetadataConfigFinalization.unregister(this);
613
+ return ptr;
614
+ }
615
+ free() {
616
+ const ptr = this.__destroy_into_raw();
617
+ wasm.__wbg_wasmmetadataconfig_free(ptr, 0);
618
+ }
619
+ /**
620
+ * @returns {boolean}
621
+ */
622
+ get extract_links() {
623
+ const ret = wasm.wasmmetadataconfig_extract_links(this.__wbg_ptr);
624
+ return ret !== 0;
625
+ }
626
+ /**
627
+ * @returns {boolean}
628
+ */
629
+ get extract_images() {
630
+ const ret = wasm.wasmmetadataconfig_extract_images(this.__wbg_ptr);
631
+ return ret !== 0;
632
+ }
633
+ /**
634
+ * @returns {boolean}
635
+ */
636
+ get extract_headers() {
637
+ const ret = wasm.wasmmetadataconfig_extract_headers(this.__wbg_ptr);
638
+ return ret !== 0;
639
+ }
640
+ /**
641
+ * @returns {boolean}
642
+ */
643
+ get extract_document() {
644
+ const ret = wasm.wasmmetadataconfig_extract_document(this.__wbg_ptr);
645
+ return ret !== 0;
646
+ }
647
+ /**
648
+ * @param {boolean} value
649
+ */
650
+ set extract_links(value) {
651
+ wasm.wasmmetadataconfig_set_extract_links(this.__wbg_ptr, value);
652
+ }
653
+ /**
654
+ * @param {boolean} value
655
+ */
656
+ set extract_images(value) {
657
+ wasm.wasmmetadataconfig_set_extract_images(this.__wbg_ptr, value);
658
+ }
659
+ /**
660
+ * @param {boolean} value
661
+ */
662
+ set extract_headers(value) {
663
+ wasm.wasmmetadataconfig_set_extract_headers(this.__wbg_ptr, value);
664
+ }
665
+ /**
666
+ * @param {boolean} value
667
+ */
668
+ set extract_document(value) {
669
+ wasm.wasmmetadataconfig_set_extract_document(this.__wbg_ptr, value);
670
+ }
671
+ /**
672
+ * @returns {boolean}
673
+ */
674
+ get extract_structured_data() {
675
+ const ret = wasm.wasmmetadataconfig_extract_structured_data(this.__wbg_ptr);
676
+ return ret !== 0;
677
+ }
678
+ /**
679
+ * @returns {number}
680
+ */
681
+ get max_structured_data_size() {
682
+ const ret = wasm.wasmmetadataconfig_max_structured_data_size(this.__wbg_ptr);
683
+ return ret >>> 0;
684
+ }
685
+ /**
686
+ * @param {boolean} value
687
+ */
688
+ set extract_structured_data(value) {
689
+ wasm.wasmmetadataconfig_set_extract_structured_data(this.__wbg_ptr, value);
690
+ }
691
+ /**
692
+ * @param {number} value
693
+ */
694
+ set max_structured_data_size(value) {
695
+ wasm.wasmmetadataconfig_set_max_structured_data_size(this.__wbg_ptr, value);
696
+ }
697
+ /**
698
+ * Create a new metadata configuration with defaults
699
+ *
700
+ * All extraction types enabled by default with 1MB structured data limit
701
+ */
702
+ constructor() {
703
+ const ret = wasm.wasmmetadataconfig_new();
704
+ this.__wbg_ptr = ret >>> 0;
705
+ WasmMetadataConfigFinalization.register(this, this.__wbg_ptr, this);
706
+ return this;
707
+ }
708
+ }
709
+ if (Symbol.dispose) WasmMetadataConfig.prototype[Symbol.dispose] = WasmMetadataConfig.prototype.free;
710
+
601
711
  /**
602
712
  * Convert HTML to Markdown
603
713
  *
@@ -703,6 +813,44 @@ export function convertBytesWithInlineImages(html, options, image_config) {
703
813
  }
704
814
  }
705
815
 
816
+ /**
817
+ * Convert HTML bytes to Markdown with metadata extraction
818
+ *
819
+ * # Arguments
820
+ *
821
+ * * `html` - The HTML bytes to convert
822
+ * * `options` - Optional conversion options (as a JavaScript object)
823
+ * * `metadata_config` - Metadata extraction configuration
824
+ *
825
+ * # Returns
826
+ *
827
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
828
+ * @param {Uint8Array} html
829
+ * @param {WasmConversionOptions | null | undefined} [options]
830
+ * @param {WasmMetadataConfig | null} [metadata_config]
831
+ * @returns {Record<string, string>}
832
+ */
833
+ export function convertBytesWithMetadata(html, options, metadata_config) {
834
+ try {
835
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
836
+ let ptr0 = 0;
837
+ if (!isLikeNone(metadata_config)) {
838
+ _assertClass(metadata_config, WasmMetadataConfig);
839
+ ptr0 = metadata_config.__destroy_into_raw();
840
+ }
841
+ wasm.convertBytesWithMetadata(retptr, addHeapObject(html), addHeapObject(options), ptr0);
842
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
843
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
844
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
845
+ if (r2) {
846
+ throw takeObject(r1);
847
+ }
848
+ return takeObject(r0);
849
+ } finally {
850
+ wasm.__wbindgen_add_to_stack_pointer(16);
851
+ }
852
+ }
853
+
706
854
  /**
707
855
  * @param {Uint8Array} html
708
856
  * @param {WasmConversionOptionsHandle} handle
@@ -763,6 +911,62 @@ export function convertWithInlineImages(html, options, image_config) {
763
911
  }
764
912
  }
765
913
 
914
+ /**
915
+ * Convert HTML to Markdown with metadata extraction
916
+ *
917
+ * # Arguments
918
+ *
919
+ * * `html` - The HTML string to convert
920
+ * * `options` - Optional conversion options (as a JavaScript object)
921
+ * * `metadata_config` - Metadata extraction configuration
922
+ *
923
+ * # Returns
924
+ *
925
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
926
+ *
927
+ * # Example
928
+ *
929
+ * ```javascript
930
+ * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
931
+ *
932
+ * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
933
+ * const config = new WasmMetadataConfig();
934
+ * config.extractHeaders = true;
935
+ * config.extractLinks = true;
936
+ *
937
+ * const result = convertWithMetadata(html, null, config);
938
+ * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
939
+ * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
940
+ * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
941
+ * ```
942
+ * @param {string} html
943
+ * @param {WasmConversionOptions | null | undefined} [options]
944
+ * @param {WasmMetadataConfig | null} [metadata_config]
945
+ * @returns {Record<string, string>}
946
+ */
947
+ export function convertWithMetadata(html, options, metadata_config) {
948
+ try {
949
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
950
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
951
+ const len0 = WASM_VECTOR_LEN;
952
+ let ptr1 = 0;
953
+ if (!isLikeNone(metadata_config)) {
954
+ _assertClass(metadata_config, WasmMetadataConfig);
955
+ ptr1 = metadata_config.__destroy_into_raw();
956
+ }
957
+ wasm.convertWithMetadata(retptr, ptr0, len0, addHeapObject(options), ptr1);
958
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
959
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
960
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
961
+ if (r2) {
962
+ throw takeObject(r1);
963
+ }
964
+ return takeObject(r0);
965
+ } finally {
966
+ wasm.__wbindgen_add_to_stack_pointer(16);
967
+ }
968
+ }
969
+
766
970
  /**
767
971
  * @param {string} html
768
972
  * @param {WasmConversionOptionsHandle} handle
@@ -1048,6 +1252,11 @@ export function __wbg_new_1ba21ce319a06297() {
1048
1252
  return addHeapObject(ret);
1049
1253
  };
1050
1254
 
1255
+ export function __wbg_new_25f239778d6112b9() {
1256
+ const ret = new Array();
1257
+ return addHeapObject(ret);
1258
+ };
1259
+
1051
1260
  export function __wbg_new_6421f6084cc5bc5a(arg0) {
1052
1261
  const ret = new Uint8Array(getObject(arg0));
1053
1262
  return addHeapObject(ret);
@@ -1086,6 +1295,15 @@ export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
1086
1295
  getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
1087
1296
  };
1088
1297
 
1298
+ export function __wbg_set_781438a03c0c3c81() { return handleError(function (arg0, arg1, arg2) {
1299
+ const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
1300
+ return ret;
1301
+ }, arguments) };
1302
+
1303
+ export function __wbg_set_7df433eea03a5c14(arg0, arg1, arg2) {
1304
+ getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
1305
+ };
1306
+
1089
1307
  export function __wbg_set_efaaf145b9377369(arg0, arg1, arg2) {
1090
1308
  const ret = getObject(arg0).set(getObject(arg1), getObject(arg2));
1091
1309
  return addHeapObject(ret);
@@ -1126,6 +1344,12 @@ export function __wbindgen_cast_4625c577ab2ec9ee(arg0) {
1126
1344
  return addHeapObject(ret);
1127
1345
  };
1128
1346
 
1347
+ export function __wbindgen_cast_d6cd19b81560fd6e(arg0) {
1348
+ // Cast intrinsic for `F64 -> Externref`.
1349
+ const ret = arg0;
1350
+ return addHeapObject(ret);
1351
+ };
1352
+
1129
1353
  export function __wbindgen_object_clone_ref(arg0) {
1130
1354
  const ret = getObject(arg0);
1131
1355
  return addHeapObject(ret);
Binary file
@@ -6,11 +6,14 @@ export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
6
6
  export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
7
7
  export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
8
8
  export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
9
+ export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
9
10
  export const convert: (a: number, b: number, c: number, d: number) => void;
10
11
  export const convertBytes: (a: number, b: number, c: number) => void;
11
12
  export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
13
+ export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
12
14
  export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
13
15
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
+ export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
14
17
  export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
15
18
  export const createConversionOptionsHandle: (a: number, b: number) => void;
16
19
  export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
@@ -30,6 +33,19 @@ export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c:
30
33
  export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
31
34
  export const wasminlineimagewarning_index: (a: number) => number;
32
35
  export const wasminlineimagewarning_message: (a: number, b: number) => void;
36
+ export const wasmmetadataconfig_extract_document: (a: number) => number;
37
+ export const wasmmetadataconfig_extract_headers: (a: number) => number;
38
+ export const wasmmetadataconfig_extract_images: (a: number) => number;
39
+ export const wasmmetadataconfig_extract_links: (a: number) => number;
40
+ export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
41
+ export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
42
+ export const wasmmetadataconfig_new: () => number;
43
+ export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
44
+ export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
45
+ export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
46
+ export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
47
+ export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
48
+ export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
33
49
  export const init: () => void;
34
50
  export const __wbindgen_export: (a: number, b: number) => number;
35
51
  export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
package/dist/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "collaborators": [
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
- "version": "2.14.1",
7
+ "version": "2.14.3",
8
8
  "license": "MIT",
9
9
  "repository": {
10
10
  "type": "git",
@@ -11,7 +11,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
11
11
  [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
12
12
  [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
13
13
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
14
- [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
14
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
15
15
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
16
16
  [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
17
17
 
@@ -377,7 +377,7 @@ import (
377
377
  "fmt"
378
378
  "log"
379
379
 
380
- "github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown"
380
+ "github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown"
381
381
  )
382
382
 
383
383
  func main() {
@@ -46,6 +46,23 @@ export class WasmInlineImageWarning {
46
46
  readonly message: string;
47
47
  }
48
48
 
49
+ export class WasmMetadataConfig {
50
+ free(): void;
51
+ [Symbol.dispose](): void;
52
+ /**
53
+ * Create a new metadata configuration with defaults
54
+ *
55
+ * All extraction types enabled by default with 1MB structured data limit
56
+ */
57
+ constructor();
58
+ extract_links: boolean;
59
+ extract_images: boolean;
60
+ extract_headers: boolean;
61
+ extract_document: boolean;
62
+ extract_structured_data: boolean;
63
+ max_structured_data_size: number;
64
+ }
65
+
49
66
  /**
50
67
  * Convert HTML to Markdown
51
68
  *
@@ -70,10 +87,56 @@ export function convertBytes(html: Uint8Array, options?: WasmConversionOptions |
70
87
 
71
88
  export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
72
89
 
90
+ /**
91
+ * Convert HTML bytes to Markdown with metadata extraction
92
+ *
93
+ * # Arguments
94
+ *
95
+ * * `html` - The HTML bytes to convert
96
+ * * `options` - Optional conversion options (as a JavaScript object)
97
+ * * `metadata_config` - Metadata extraction configuration
98
+ *
99
+ * # Returns
100
+ *
101
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
102
+ */
103
+ export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
104
+
73
105
  export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
74
106
 
75
107
  export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
76
108
 
109
+ /**
110
+ * Convert HTML to Markdown with metadata extraction
111
+ *
112
+ * # Arguments
113
+ *
114
+ * * `html` - The HTML string to convert
115
+ * * `options` - Optional conversion options (as a JavaScript object)
116
+ * * `metadata_config` - Metadata extraction configuration
117
+ *
118
+ * # Returns
119
+ *
120
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
121
+ *
122
+ * # Example
123
+ *
124
+ * ```javascript
125
+ * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
126
+ *
127
+ * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
128
+ * const config = new WasmMetadataConfig();
129
+ * config.extractHeaders = true;
130
+ * config.extractLinks = true;
131
+ *
132
+ * const result = convertWithMetadata(html, null, config);
133
+ * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
134
+ * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
135
+ * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
136
+ * ```
137
+ */
138
+ export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
139
+
77
140
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
78
141
 
79
142
  export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
@@ -244,6 +244,10 @@ const WasmInlineImageWarningFinalization = (typeof FinalizationRegistry === 'und
244
244
  ? { register: () => {}, unregister: () => {} }
245
245
  : new FinalizationRegistry(ptr => wasm.__wbg_wasminlineimagewarning_free(ptr >>> 0, 1));
246
246
 
247
+ const WasmMetadataConfigFinalization = (typeof FinalizationRegistry === 'undefined')
248
+ ? { register: () => {}, unregister: () => {} }
249
+ : new FinalizationRegistry(ptr => wasm.__wbg_wasmmetadataconfig_free(ptr >>> 0, 1));
250
+
247
251
  class WasmConversionOptionsHandle {
248
252
  static __wrap(ptr) {
249
253
  ptr = ptr >>> 0;
@@ -594,6 +598,113 @@ class WasmInlineImageWarning {
594
598
  if (Symbol.dispose) WasmInlineImageWarning.prototype[Symbol.dispose] = WasmInlineImageWarning.prototype.free;
595
599
  exports.WasmInlineImageWarning = WasmInlineImageWarning;
596
600
 
601
+ /**
602
+ * Metadata extraction configuration
603
+ */
604
+ class WasmMetadataConfig {
605
+ __destroy_into_raw() {
606
+ const ptr = this.__wbg_ptr;
607
+ this.__wbg_ptr = 0;
608
+ WasmMetadataConfigFinalization.unregister(this);
609
+ return ptr;
610
+ }
611
+ free() {
612
+ const ptr = this.__destroy_into_raw();
613
+ wasm.__wbg_wasmmetadataconfig_free(ptr, 0);
614
+ }
615
+ /**
616
+ * @returns {boolean}
617
+ */
618
+ get extract_links() {
619
+ const ret = wasm.wasmmetadataconfig_extract_links(this.__wbg_ptr);
620
+ return ret !== 0;
621
+ }
622
+ /**
623
+ * @returns {boolean}
624
+ */
625
+ get extract_images() {
626
+ const ret = wasm.wasmmetadataconfig_extract_images(this.__wbg_ptr);
627
+ return ret !== 0;
628
+ }
629
+ /**
630
+ * @returns {boolean}
631
+ */
632
+ get extract_headers() {
633
+ const ret = wasm.wasmmetadataconfig_extract_headers(this.__wbg_ptr);
634
+ return ret !== 0;
635
+ }
636
+ /**
637
+ * @returns {boolean}
638
+ */
639
+ get extract_document() {
640
+ const ret = wasm.wasmmetadataconfig_extract_document(this.__wbg_ptr);
641
+ return ret !== 0;
642
+ }
643
+ /**
644
+ * @param {boolean} value
645
+ */
646
+ set extract_links(value) {
647
+ wasm.wasmmetadataconfig_set_extract_links(this.__wbg_ptr, value);
648
+ }
649
+ /**
650
+ * @param {boolean} value
651
+ */
652
+ set extract_images(value) {
653
+ wasm.wasmmetadataconfig_set_extract_images(this.__wbg_ptr, value);
654
+ }
655
+ /**
656
+ * @param {boolean} value
657
+ */
658
+ set extract_headers(value) {
659
+ wasm.wasmmetadataconfig_set_extract_headers(this.__wbg_ptr, value);
660
+ }
661
+ /**
662
+ * @param {boolean} value
663
+ */
664
+ set extract_document(value) {
665
+ wasm.wasmmetadataconfig_set_extract_document(this.__wbg_ptr, value);
666
+ }
667
+ /**
668
+ * @returns {boolean}
669
+ */
670
+ get extract_structured_data() {
671
+ const ret = wasm.wasmmetadataconfig_extract_structured_data(this.__wbg_ptr);
672
+ return ret !== 0;
673
+ }
674
+ /**
675
+ * @returns {number}
676
+ */
677
+ get max_structured_data_size() {
678
+ const ret = wasm.wasmmetadataconfig_max_structured_data_size(this.__wbg_ptr);
679
+ return ret >>> 0;
680
+ }
681
+ /**
682
+ * @param {boolean} value
683
+ */
684
+ set extract_structured_data(value) {
685
+ wasm.wasmmetadataconfig_set_extract_structured_data(this.__wbg_ptr, value);
686
+ }
687
+ /**
688
+ * @param {number} value
689
+ */
690
+ set max_structured_data_size(value) {
691
+ wasm.wasmmetadataconfig_set_max_structured_data_size(this.__wbg_ptr, value);
692
+ }
693
+ /**
694
+ * Create a new metadata configuration with defaults
695
+ *
696
+ * All extraction types enabled by default with 1MB structured data limit
697
+ */
698
+ constructor() {
699
+ const ret = wasm.wasmmetadataconfig_new();
700
+ this.__wbg_ptr = ret >>> 0;
701
+ WasmMetadataConfigFinalization.register(this, this.__wbg_ptr, this);
702
+ return this;
703
+ }
704
+ }
705
+ if (Symbol.dispose) WasmMetadataConfig.prototype[Symbol.dispose] = WasmMetadataConfig.prototype.free;
706
+ exports.WasmMetadataConfig = WasmMetadataConfig;
707
+
597
708
  /**
598
709
  * Convert HTML to Markdown
599
710
  *
@@ -702,6 +813,45 @@ function convertBytesWithInlineImages(html, options, image_config) {
702
813
  }
703
814
  exports.convertBytesWithInlineImages = convertBytesWithInlineImages;
704
815
 
816
+ /**
817
+ * Convert HTML bytes to Markdown with metadata extraction
818
+ *
819
+ * # Arguments
820
+ *
821
+ * * `html` - The HTML bytes to convert
822
+ * * `options` - Optional conversion options (as a JavaScript object)
823
+ * * `metadata_config` - Metadata extraction configuration
824
+ *
825
+ * # Returns
826
+ *
827
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
828
+ * @param {Uint8Array} html
829
+ * @param {WasmConversionOptions | null | undefined} [options]
830
+ * @param {WasmMetadataConfig | null} [metadata_config]
831
+ * @returns {Record<string, string>}
832
+ */
833
+ function convertBytesWithMetadata(html, options, metadata_config) {
834
+ try {
835
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
836
+ let ptr0 = 0;
837
+ if (!isLikeNone(metadata_config)) {
838
+ _assertClass(metadata_config, WasmMetadataConfig);
839
+ ptr0 = metadata_config.__destroy_into_raw();
840
+ }
841
+ wasm.convertBytesWithMetadata(retptr, addHeapObject(html), addHeapObject(options), ptr0);
842
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
843
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
844
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
845
+ if (r2) {
846
+ throw takeObject(r1);
847
+ }
848
+ return takeObject(r0);
849
+ } finally {
850
+ wasm.__wbindgen_add_to_stack_pointer(16);
851
+ }
852
+ }
853
+ exports.convertBytesWithMetadata = convertBytesWithMetadata;
854
+
705
855
  /**
706
856
  * @param {Uint8Array} html
707
857
  * @param {WasmConversionOptionsHandle} handle
@@ -764,6 +914,63 @@ function convertWithInlineImages(html, options, image_config) {
764
914
  }
765
915
  exports.convertWithInlineImages = convertWithInlineImages;
766
916
 
917
+ /**
918
+ * Convert HTML to Markdown with metadata extraction
919
+ *
920
+ * # Arguments
921
+ *
922
+ * * `html` - The HTML string to convert
923
+ * * `options` - Optional conversion options (as a JavaScript object)
924
+ * * `metadata_config` - Metadata extraction configuration
925
+ *
926
+ * # Returns
927
+ *
928
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
929
+ *
930
+ * # Example
931
+ *
932
+ * ```javascript
933
+ * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
934
+ *
935
+ * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
936
+ * const config = new WasmMetadataConfig();
937
+ * config.extractHeaders = true;
938
+ * config.extractLinks = true;
939
+ *
940
+ * const result = convertWithMetadata(html, null, config);
941
+ * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
942
+ * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
943
+ * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
944
+ * ```
945
+ * @param {string} html
946
+ * @param {WasmConversionOptions | null | undefined} [options]
947
+ * @param {WasmMetadataConfig | null} [metadata_config]
948
+ * @returns {Record<string, string>}
949
+ */
950
+ function convertWithMetadata(html, options, metadata_config) {
951
+ try {
952
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
953
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
954
+ const len0 = WASM_VECTOR_LEN;
955
+ let ptr1 = 0;
956
+ if (!isLikeNone(metadata_config)) {
957
+ _assertClass(metadata_config, WasmMetadataConfig);
958
+ ptr1 = metadata_config.__destroy_into_raw();
959
+ }
960
+ wasm.convertWithMetadata(retptr, ptr0, len0, addHeapObject(options), ptr1);
961
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
962
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
963
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
964
+ if (r2) {
965
+ throw takeObject(r1);
966
+ }
967
+ return takeObject(r0);
968
+ } finally {
969
+ wasm.__wbindgen_add_to_stack_pointer(16);
970
+ }
971
+ }
972
+ exports.convertWithMetadata = convertWithMetadata;
973
+
767
974
  /**
768
975
  * @param {string} html
769
976
  * @param {WasmConversionOptionsHandle} handle
@@ -1052,6 +1259,11 @@ exports.__wbg_new_1ba21ce319a06297 = function() {
1052
1259
  return addHeapObject(ret);
1053
1260
  };
1054
1261
 
1262
+ exports.__wbg_new_25f239778d6112b9 = function() {
1263
+ const ret = new Array();
1264
+ return addHeapObject(ret);
1265
+ };
1266
+
1055
1267
  exports.__wbg_new_6421f6084cc5bc5a = function(arg0) {
1056
1268
  const ret = new Uint8Array(getObject(arg0));
1057
1269
  return addHeapObject(ret);
@@ -1090,6 +1302,15 @@ exports.__wbg_set_3f1d0b984ed272ed = function(arg0, arg1, arg2) {
1090
1302
  getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
1091
1303
  };
1092
1304
 
1305
+ exports.__wbg_set_781438a03c0c3c81 = function() { return handleError(function (arg0, arg1, arg2) {
1306
+ const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
1307
+ return ret;
1308
+ }, arguments) };
1309
+
1310
+ exports.__wbg_set_7df433eea03a5c14 = function(arg0, arg1, arg2) {
1311
+ getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
1312
+ };
1313
+
1093
1314
  exports.__wbg_set_efaaf145b9377369 = function(arg0, arg1, arg2) {
1094
1315
  const ret = getObject(arg0).set(getObject(arg1), getObject(arg2));
1095
1316
  return addHeapObject(ret);
@@ -1130,6 +1351,12 @@ exports.__wbindgen_cast_4625c577ab2ec9ee = function(arg0) {
1130
1351
  return addHeapObject(ret);
1131
1352
  };
1132
1353
 
1354
+ exports.__wbindgen_cast_d6cd19b81560fd6e = function(arg0) {
1355
+ // Cast intrinsic for `F64 -> Externref`.
1356
+ const ret = arg0;
1357
+ return addHeapObject(ret);
1358
+ };
1359
+
1133
1360
  exports.__wbindgen_object_clone_ref = function(arg0) {
1134
1361
  const ret = getObject(arg0);
1135
1362
  return addHeapObject(ret);
@@ -6,11 +6,14 @@ export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
6
6
  export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
7
7
  export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
8
8
  export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
9
+ export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
9
10
  export const convert: (a: number, b: number, c: number, d: number) => void;
10
11
  export const convertBytes: (a: number, b: number, c: number) => void;
11
12
  export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
13
+ export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
12
14
  export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
13
15
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
+ export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
14
17
  export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
15
18
  export const createConversionOptionsHandle: (a: number, b: number) => void;
16
19
  export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
@@ -30,6 +33,19 @@ export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c:
30
33
  export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
31
34
  export const wasminlineimagewarning_index: (a: number) => number;
32
35
  export const wasminlineimagewarning_message: (a: number, b: number) => void;
36
+ export const wasmmetadataconfig_extract_document: (a: number) => number;
37
+ export const wasmmetadataconfig_extract_headers: (a: number) => number;
38
+ export const wasmmetadataconfig_extract_images: (a: number) => number;
39
+ export const wasmmetadataconfig_extract_links: (a: number) => number;
40
+ export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
41
+ export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
42
+ export const wasmmetadataconfig_new: () => number;
43
+ export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
44
+ export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
45
+ export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
46
+ export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
47
+ export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
48
+ export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
33
49
  export const init: () => void;
34
50
  export const __wbindgen_export: (a: number, b: number) => number;
35
51
  export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
@@ -3,7 +3,7 @@
3
3
  "collaborators": [
4
4
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
5
5
  ],
6
- "version": "2.14.1",
6
+ "version": "2.14.3",
7
7
  "license": "MIT",
8
8
  "repository": {
9
9
  "type": "git",
@@ -11,7 +11,7 @@ High-performance HTML → Markdown conversion powered by Rust. Shipping as a Rus
11
11
  [![Hex.pm](https://img.shields.io/hexpm/v/html_to_markdown.svg)](https://hex.pm/packages/html_to_markdown)
12
12
  [![NuGet](https://img.shields.io/nuget/v/Goldziher.HtmlToMarkdown.svg)](https://www.nuget.org/packages/Goldziher.HtmlToMarkdown/)
13
13
  [![Maven Central](https://img.shields.io/maven-central/v/io.github.goldziher/html-to-markdown.svg)](https://central.sonatype.com/artifact/io.github.goldziher/html-to-markdown)
14
- [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown)
14
+ [![Go Reference](https://pkg.go.dev/badge/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown.svg)](https://pkg.go.dev/github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown)
15
15
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
16
16
  [![Discord](https://img.shields.io/badge/Discord-Join%20our%20community-7289da)](https://discord.gg/pXxagNK2zN)
17
17
 
@@ -377,7 +377,7 @@ import (
377
377
  "fmt"
378
378
  "log"
379
379
 
380
- "github.com/Goldziher/html-to-markdown/packages/go/htmltomarkdown"
380
+ "github.com/Goldziher/html-to-markdown/packages/go/v2/htmltomarkdown"
381
381
  )
382
382
 
383
383
  func main() {
@@ -46,6 +46,23 @@ export class WasmInlineImageWarning {
46
46
  readonly message: string;
47
47
  }
48
48
 
49
+ export class WasmMetadataConfig {
50
+ free(): void;
51
+ [Symbol.dispose](): void;
52
+ /**
53
+ * Create a new metadata configuration with defaults
54
+ *
55
+ * All extraction types enabled by default with 1MB structured data limit
56
+ */
57
+ constructor();
58
+ extract_links: boolean;
59
+ extract_images: boolean;
60
+ extract_headers: boolean;
61
+ extract_document: boolean;
62
+ extract_structured_data: boolean;
63
+ max_structured_data_size: number;
64
+ }
65
+
49
66
  /**
50
67
  * Convert HTML to Markdown
51
68
  *
@@ -70,10 +87,56 @@ export function convertBytes(html: Uint8Array, options?: WasmConversionOptions |
70
87
 
71
88
  export function convertBytesWithInlineImages(html: Uint8Array, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
72
89
 
90
+ /**
91
+ * Convert HTML bytes to Markdown with metadata extraction
92
+ *
93
+ * # Arguments
94
+ *
95
+ * * `html` - The HTML bytes to convert
96
+ * * `options` - Optional conversion options (as a JavaScript object)
97
+ * * `metadata_config` - Metadata extraction configuration
98
+ *
99
+ * # Returns
100
+ *
101
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
102
+ */
103
+ export function convertBytesWithMetadata(html: Uint8Array, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
104
+
73
105
  export function convertBytesWithOptionsHandle(html: Uint8Array, handle: WasmConversionOptionsHandle): string;
74
106
 
75
107
  export function convertWithInlineImages(html: string, options?: WasmConversionOptions | null, image_config?: WasmInlineImageConfig | null): WasmHtmlExtraction;
76
108
 
109
+ /**
110
+ * Convert HTML to Markdown with metadata extraction
111
+ *
112
+ * # Arguments
113
+ *
114
+ * * `html` - The HTML string to convert
115
+ * * `options` - Optional conversion options (as a JavaScript object)
116
+ * * `metadata_config` - Metadata extraction configuration
117
+ *
118
+ * # Returns
119
+ *
120
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
121
+ *
122
+ * # Example
123
+ *
124
+ * ```javascript
125
+ * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
126
+ *
127
+ * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
128
+ * const config = new WasmMetadataConfig();
129
+ * config.extractHeaders = true;
130
+ * config.extractLinks = true;
131
+ *
132
+ * const result = convertWithMetadata(html, null, config);
133
+ * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
134
+ * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
135
+ * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
136
+ * ```
137
+ */
138
+ export function convertWithMetadata(html: string, options?: WasmConversionOptions | null, metadata_config?: WasmMetadataConfig | null): any;
139
+
77
140
  export function convertWithOptionsHandle(html: string, handle: WasmConversionOptionsHandle): string;
78
141
 
79
142
  export function createConversionOptionsHandle(options?: WasmConversionOptions | null): WasmConversionOptionsHandle;
@@ -92,11 +155,14 @@ export interface InitOutput {
92
155
  readonly __wbg_wasminlineimage_free: (a: number, b: number) => void;
93
156
  readonly __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
94
157
  readonly __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
158
+ readonly __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
95
159
  readonly convert: (a: number, b: number, c: number, d: number) => void;
96
160
  readonly convertBytes: (a: number, b: number, c: number) => void;
97
161
  readonly convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
162
+ readonly convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
98
163
  readonly convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
99
164
  readonly convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
165
+ readonly convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
100
166
  readonly convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
101
167
  readonly createConversionOptionsHandle: (a: number, b: number) => void;
102
168
  readonly wasmconversionoptionshandle_new: (a: number, b: number) => void;
@@ -116,6 +182,19 @@ export interface InitOutput {
116
182
  readonly wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
117
183
  readonly wasminlineimagewarning_index: (a: number) => number;
118
184
  readonly wasminlineimagewarning_message: (a: number, b: number) => void;
185
+ readonly wasmmetadataconfig_extract_document: (a: number) => number;
186
+ readonly wasmmetadataconfig_extract_headers: (a: number) => number;
187
+ readonly wasmmetadataconfig_extract_images: (a: number) => number;
188
+ readonly wasmmetadataconfig_extract_links: (a: number) => number;
189
+ readonly wasmmetadataconfig_extract_structured_data: (a: number) => number;
190
+ readonly wasmmetadataconfig_max_structured_data_size: (a: number) => number;
191
+ readonly wasmmetadataconfig_new: () => number;
192
+ readonly wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
193
+ readonly wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
194
+ readonly wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
195
+ readonly wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
196
+ readonly wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
197
+ readonly wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
119
198
  readonly init: () => void;
120
199
  readonly __wbindgen_export: (a: number, b: number) => number;
121
200
  readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
@@ -250,6 +250,10 @@ const WasmInlineImageWarningFinalization = (typeof FinalizationRegistry === 'und
250
250
  ? { register: () => {}, unregister: () => {} }
251
251
  : new FinalizationRegistry(ptr => wasm.__wbg_wasminlineimagewarning_free(ptr >>> 0, 1));
252
252
 
253
+ const WasmMetadataConfigFinalization = (typeof FinalizationRegistry === 'undefined')
254
+ ? { register: () => {}, unregister: () => {} }
255
+ : new FinalizationRegistry(ptr => wasm.__wbg_wasmmetadataconfig_free(ptr >>> 0, 1));
256
+
253
257
  export class WasmConversionOptionsHandle {
254
258
  static __wrap(ptr) {
255
259
  ptr = ptr >>> 0;
@@ -595,6 +599,112 @@ export class WasmInlineImageWarning {
595
599
  }
596
600
  if (Symbol.dispose) WasmInlineImageWarning.prototype[Symbol.dispose] = WasmInlineImageWarning.prototype.free;
597
601
 
602
+ /**
603
+ * Metadata extraction configuration
604
+ */
605
+ export class WasmMetadataConfig {
606
+ __destroy_into_raw() {
607
+ const ptr = this.__wbg_ptr;
608
+ this.__wbg_ptr = 0;
609
+ WasmMetadataConfigFinalization.unregister(this);
610
+ return ptr;
611
+ }
612
+ free() {
613
+ const ptr = this.__destroy_into_raw();
614
+ wasm.__wbg_wasmmetadataconfig_free(ptr, 0);
615
+ }
616
+ /**
617
+ * @returns {boolean}
618
+ */
619
+ get extract_links() {
620
+ const ret = wasm.wasmmetadataconfig_extract_links(this.__wbg_ptr);
621
+ return ret !== 0;
622
+ }
623
+ /**
624
+ * @returns {boolean}
625
+ */
626
+ get extract_images() {
627
+ const ret = wasm.wasmmetadataconfig_extract_images(this.__wbg_ptr);
628
+ return ret !== 0;
629
+ }
630
+ /**
631
+ * @returns {boolean}
632
+ */
633
+ get extract_headers() {
634
+ const ret = wasm.wasmmetadataconfig_extract_headers(this.__wbg_ptr);
635
+ return ret !== 0;
636
+ }
637
+ /**
638
+ * @returns {boolean}
639
+ */
640
+ get extract_document() {
641
+ const ret = wasm.wasmmetadataconfig_extract_document(this.__wbg_ptr);
642
+ return ret !== 0;
643
+ }
644
+ /**
645
+ * @param {boolean} value
646
+ */
647
+ set extract_links(value) {
648
+ wasm.wasmmetadataconfig_set_extract_links(this.__wbg_ptr, value);
649
+ }
650
+ /**
651
+ * @param {boolean} value
652
+ */
653
+ set extract_images(value) {
654
+ wasm.wasmmetadataconfig_set_extract_images(this.__wbg_ptr, value);
655
+ }
656
+ /**
657
+ * @param {boolean} value
658
+ */
659
+ set extract_headers(value) {
660
+ wasm.wasmmetadataconfig_set_extract_headers(this.__wbg_ptr, value);
661
+ }
662
+ /**
663
+ * @param {boolean} value
664
+ */
665
+ set extract_document(value) {
666
+ wasm.wasmmetadataconfig_set_extract_document(this.__wbg_ptr, value);
667
+ }
668
+ /**
669
+ * @returns {boolean}
670
+ */
671
+ get extract_structured_data() {
672
+ const ret = wasm.wasmmetadataconfig_extract_structured_data(this.__wbg_ptr);
673
+ return ret !== 0;
674
+ }
675
+ /**
676
+ * @returns {number}
677
+ */
678
+ get max_structured_data_size() {
679
+ const ret = wasm.wasmmetadataconfig_max_structured_data_size(this.__wbg_ptr);
680
+ return ret >>> 0;
681
+ }
682
+ /**
683
+ * @param {boolean} value
684
+ */
685
+ set extract_structured_data(value) {
686
+ wasm.wasmmetadataconfig_set_extract_structured_data(this.__wbg_ptr, value);
687
+ }
688
+ /**
689
+ * @param {number} value
690
+ */
691
+ set max_structured_data_size(value) {
692
+ wasm.wasmmetadataconfig_set_max_structured_data_size(this.__wbg_ptr, value);
693
+ }
694
+ /**
695
+ * Create a new metadata configuration with defaults
696
+ *
697
+ * All extraction types enabled by default with 1MB structured data limit
698
+ */
699
+ constructor() {
700
+ const ret = wasm.wasmmetadataconfig_new();
701
+ this.__wbg_ptr = ret >>> 0;
702
+ WasmMetadataConfigFinalization.register(this, this.__wbg_ptr, this);
703
+ return this;
704
+ }
705
+ }
706
+ if (Symbol.dispose) WasmMetadataConfig.prototype[Symbol.dispose] = WasmMetadataConfig.prototype.free;
707
+
598
708
  /**
599
709
  * Convert HTML to Markdown
600
710
  *
@@ -700,6 +810,44 @@ export function convertBytesWithInlineImages(html, options, image_config) {
700
810
  }
701
811
  }
702
812
 
813
+ /**
814
+ * Convert HTML bytes to Markdown with metadata extraction
815
+ *
816
+ * # Arguments
817
+ *
818
+ * * `html` - The HTML bytes to convert
819
+ * * `options` - Optional conversion options (as a JavaScript object)
820
+ * * `metadata_config` - Metadata extraction configuration
821
+ *
822
+ * # Returns
823
+ *
824
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
825
+ * @param {Uint8Array} html
826
+ * @param {WasmConversionOptions | null | undefined} [options]
827
+ * @param {WasmMetadataConfig | null} [metadata_config]
828
+ * @returns {Record<string, string>}
829
+ */
830
+ export function convertBytesWithMetadata(html, options, metadata_config) {
831
+ try {
832
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
833
+ let ptr0 = 0;
834
+ if (!isLikeNone(metadata_config)) {
835
+ _assertClass(metadata_config, WasmMetadataConfig);
836
+ ptr0 = metadata_config.__destroy_into_raw();
837
+ }
838
+ wasm.convertBytesWithMetadata(retptr, addHeapObject(html), addHeapObject(options), ptr0);
839
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
840
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
841
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
842
+ if (r2) {
843
+ throw takeObject(r1);
844
+ }
845
+ return takeObject(r0);
846
+ } finally {
847
+ wasm.__wbindgen_add_to_stack_pointer(16);
848
+ }
849
+ }
850
+
703
851
  /**
704
852
  * @param {Uint8Array} html
705
853
  * @param {WasmConversionOptionsHandle} handle
@@ -760,6 +908,62 @@ export function convertWithInlineImages(html, options, image_config) {
760
908
  }
761
909
  }
762
910
 
911
+ /**
912
+ * Convert HTML to Markdown with metadata extraction
913
+ *
914
+ * # Arguments
915
+ *
916
+ * * `html` - The HTML string to convert
917
+ * * `options` - Optional conversion options (as a JavaScript object)
918
+ * * `metadata_config` - Metadata extraction configuration
919
+ *
920
+ * # Returns
921
+ *
922
+ * JavaScript object with `markdown` (string) and `metadata` (object) fields
923
+ *
924
+ * # Example
925
+ *
926
+ * ```javascript
927
+ * import { convertWithMetadata, WasmMetadataConfig } from 'html-to-markdown-wasm';
928
+ *
929
+ * const html = '<h1>Hello World</h1><a href="https://example.com">Link</a>';
930
+ * const config = new WasmMetadataConfig();
931
+ * config.extractHeaders = true;
932
+ * config.extractLinks = true;
933
+ *
934
+ * const result = convertWithMetadata(html, null, config);
935
+ * console.log(result.markdown); // # Hello World\n\n[Link](https://example.com)
936
+ * console.log(result.metadata.headers); // [{ level: 1, text: "Hello World", ... }]
937
+ * console.log(result.metadata.links); // [{ href: "https://example.com", text: "Link", ... }]
938
+ * ```
939
+ * @param {string} html
940
+ * @param {WasmConversionOptions | null | undefined} [options]
941
+ * @param {WasmMetadataConfig | null} [metadata_config]
942
+ * @returns {Record<string, string>}
943
+ */
944
+ export function convertWithMetadata(html, options, metadata_config) {
945
+ try {
946
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
947
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
948
+ const len0 = WASM_VECTOR_LEN;
949
+ let ptr1 = 0;
950
+ if (!isLikeNone(metadata_config)) {
951
+ _assertClass(metadata_config, WasmMetadataConfig);
952
+ ptr1 = metadata_config.__destroy_into_raw();
953
+ }
954
+ wasm.convertWithMetadata(retptr, ptr0, len0, addHeapObject(options), ptr1);
955
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
956
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
957
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
958
+ if (r2) {
959
+ throw takeObject(r1);
960
+ }
961
+ return takeObject(r0);
962
+ } finally {
963
+ wasm.__wbindgen_add_to_stack_pointer(16);
964
+ }
965
+ }
966
+
763
967
  /**
764
968
  * @param {string} html
765
969
  * @param {WasmConversionOptionsHandle} handle
@@ -1043,6 +1247,10 @@ function __wbg_get_imports() {
1043
1247
  const ret = new Object();
1044
1248
  return addHeapObject(ret);
1045
1249
  };
1250
+ imports.wbg.__wbg_new_25f239778d6112b9 = function() {
1251
+ const ret = new Array();
1252
+ return addHeapObject(ret);
1253
+ };
1046
1254
  imports.wbg.__wbg_new_6421f6084cc5bc5a = function(arg0) {
1047
1255
  const ret = new Uint8Array(getObject(arg0));
1048
1256
  return addHeapObject(ret);
@@ -1073,6 +1281,13 @@ function __wbg_get_imports() {
1073
1281
  imports.wbg.__wbg_set_3f1d0b984ed272ed = function(arg0, arg1, arg2) {
1074
1282
  getObject(arg0)[takeObject(arg1)] = takeObject(arg2);
1075
1283
  };
1284
+ imports.wbg.__wbg_set_781438a03c0c3c81 = function() { return handleError(function (arg0, arg1, arg2) {
1285
+ const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
1286
+ return ret;
1287
+ }, arguments) };
1288
+ imports.wbg.__wbg_set_7df433eea03a5c14 = function(arg0, arg1, arg2) {
1289
+ getObject(arg0)[arg1 >>> 0] = takeObject(arg2);
1290
+ };
1076
1291
  imports.wbg.__wbg_set_efaaf145b9377369 = function(arg0, arg1, arg2) {
1077
1292
  const ret = getObject(arg0).set(getObject(arg1), getObject(arg2));
1078
1293
  return addHeapObject(ret);
@@ -1106,6 +1321,11 @@ function __wbg_get_imports() {
1106
1321
  const ret = BigInt.asUintN(64, arg0);
1107
1322
  return addHeapObject(ret);
1108
1323
  };
1324
+ imports.wbg.__wbindgen_cast_d6cd19b81560fd6e = function(arg0) {
1325
+ // Cast intrinsic for `F64 -> Externref`.
1326
+ const ret = arg0;
1327
+ return addHeapObject(ret);
1328
+ };
1109
1329
  imports.wbg.__wbindgen_object_clone_ref = function(arg0) {
1110
1330
  const ret = getObject(arg0);
1111
1331
  return addHeapObject(ret);
@@ -6,11 +6,14 @@ export const __wbg_wasmhtmlextraction_free: (a: number, b: number) => void;
6
6
  export const __wbg_wasminlineimage_free: (a: number, b: number) => void;
7
7
  export const __wbg_wasminlineimageconfig_free: (a: number, b: number) => void;
8
8
  export const __wbg_wasminlineimagewarning_free: (a: number, b: number) => void;
9
+ export const __wbg_wasmmetadataconfig_free: (a: number, b: number) => void;
9
10
  export const convert: (a: number, b: number, c: number, d: number) => void;
10
11
  export const convertBytes: (a: number, b: number, c: number) => void;
11
12
  export const convertBytesWithInlineImages: (a: number, b: number, c: number, d: number) => void;
13
+ export const convertBytesWithMetadata: (a: number, b: number, c: number, d: number) => void;
12
14
  export const convertBytesWithOptionsHandle: (a: number, b: number, c: number) => void;
13
15
  export const convertWithInlineImages: (a: number, b: number, c: number, d: number, e: number) => void;
16
+ export const convertWithMetadata: (a: number, b: number, c: number, d: number, e: number) => void;
14
17
  export const convertWithOptionsHandle: (a: number, b: number, c: number, d: number) => void;
15
18
  export const createConversionOptionsHandle: (a: number, b: number) => void;
16
19
  export const wasmconversionoptionshandle_new: (a: number, b: number) => void;
@@ -30,6 +33,19 @@ export const wasminlineimageconfig_set_filenamePrefix: (a: number, b: number, c:
30
33
  export const wasminlineimageconfig_set_inferDimensions: (a: number, b: number) => void;
31
34
  export const wasminlineimagewarning_index: (a: number) => number;
32
35
  export const wasminlineimagewarning_message: (a: number, b: number) => void;
36
+ export const wasmmetadataconfig_extract_document: (a: number) => number;
37
+ export const wasmmetadataconfig_extract_headers: (a: number) => number;
38
+ export const wasmmetadataconfig_extract_images: (a: number) => number;
39
+ export const wasmmetadataconfig_extract_links: (a: number) => number;
40
+ export const wasmmetadataconfig_extract_structured_data: (a: number) => number;
41
+ export const wasmmetadataconfig_max_structured_data_size: (a: number) => number;
42
+ export const wasmmetadataconfig_new: () => number;
43
+ export const wasmmetadataconfig_set_extract_document: (a: number, b: number) => void;
44
+ export const wasmmetadataconfig_set_extract_headers: (a: number, b: number) => void;
45
+ export const wasmmetadataconfig_set_extract_images: (a: number, b: number) => void;
46
+ export const wasmmetadataconfig_set_extract_links: (a: number, b: number) => void;
47
+ export const wasmmetadataconfig_set_extract_structured_data: (a: number, b: number) => void;
48
+ export const wasmmetadataconfig_set_max_structured_data_size: (a: number, b: number) => void;
33
49
  export const init: () => void;
34
50
  export const __wbindgen_export: (a: number, b: number) => number;
35
51
  export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
@@ -4,7 +4,7 @@
4
4
  "collaborators": [
5
5
  "Na'aman Hirschfeld <nhirschfeld@gmail.com>"
6
6
  ],
7
- "version": "2.14.1",
7
+ "version": "2.14.3",
8
8
  "license": "MIT",
9
9
  "repository": {
10
10
  "type": "git",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "html-to-markdown-wasm",
3
- "version": "2.14.1",
3
+ "version": "2.14.3",
4
4
  "description": "High-performance HTML to Markdown converter - WebAssembly bindings",
5
5
  "main": "dist/html_to_markdown_wasm.js",
6
6
  "types": "dist/html_to_markdown_wasm.d.ts",