@kreuzberg/html-to-markdown-wasm 3.4.0-rc.8 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +33 -63
- package/{dist-node → pkg/bundler}/html_to_markdown_wasm.d.ts +45 -136
- package/pkg/bundler/html_to_markdown_wasm.js +9 -0
- package/{dist → pkg/bundler}/html_to_markdown_wasm_bg.js +149 -52
- package/{dist-web → pkg/bundler}/html_to_markdown_wasm_bg.wasm +0 -0
- package/{dist-web → pkg/bundler}/html_to_markdown_wasm_bg.wasm.d.ts +10 -3
- package/{dist → pkg/bundler}/package.json +6 -5
- package/{dist → pkg/deno}/html_to_markdown_wasm.d.ts +45 -139
- package/pkg/deno/html_to_markdown_wasm.js +5054 -0
- package/{dist → pkg/deno}/html_to_markdown_wasm_bg.wasm +0 -0
- package/{dist → pkg/deno}/html_to_markdown_wasm_bg.wasm.d.ts +10 -3
- package/pkg/nodejs/html_to_markdown_wasm.d.ts +1020 -0
- package/{dist-node → pkg/nodejs}/html_to_markdown_wasm.js +150 -52
- package/{dist-node → pkg/nodejs}/html_to_markdown_wasm_bg.wasm +0 -0
- package/{dist-node → pkg/nodejs}/html_to_markdown_wasm_bg.wasm.d.ts +10 -3
- package/{dist-node → pkg/nodejs}/package.json +5 -5
- package/{dist-web → pkg/web}/html_to_markdown_wasm.d.ts +55 -139
- package/{dist-web → pkg/web}/html_to_markdown_wasm.js +149 -52
- package/pkg/web/html_to_markdown_wasm_bg.wasm +0 -0
- package/pkg/web/html_to_markdown_wasm_bg.wasm.d.ts +394 -0
- package/{dist-web → pkg/web}/package.json +5 -5
- package/dist/html_to_markdown_wasm.js +0 -116
package/package.json
CHANGED
|
@@ -1,65 +1,35 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
"wasm",
|
|
36
|
-
"webassembly"
|
|
37
|
-
],
|
|
38
|
-
"files": [
|
|
39
|
-
"dist",
|
|
40
|
-
"dist-node",
|
|
41
|
-
"dist-web",
|
|
42
|
-
"README.md"
|
|
43
|
-
],
|
|
44
|
-
"scripts": {
|
|
45
|
-
"build": "wasm-pack build --target bundler --out-dir dist && node ./scripts/patch-bundler-entry.js",
|
|
46
|
-
"build:nodejs": "wasm-pack build --target nodejs --out-dir dist-node && node ./scripts/patch-bundler-entry.js dist-node --types-only",
|
|
47
|
-
"build:web": "wasm-pack build --target web --out-dir dist-web && node ./scripts/patch-bundler-entry.js dist-web --types-only",
|
|
48
|
-
"build:all": "pnpm run build && pnpm run build:nodejs && pnpm run build:web && pnpm run cleanup:gitignore",
|
|
49
|
-
"cleanup:gitignore": "node ./scripts/cleanup-gitignore.js",
|
|
50
|
-
"test": "vitest run",
|
|
51
|
-
"test:watch": "vitest",
|
|
52
|
-
"test:wasm-pack": "wasm-pack test --headless --chrome",
|
|
53
|
-
"clean": "rm -rf dist dist-node dist-web node_modules pkg"
|
|
54
|
-
},
|
|
55
|
-
"devDependencies": {
|
|
56
|
-
"@types/node": "^25.6.0",
|
|
57
|
-
"tsx": "^4.21.0",
|
|
58
|
-
"vitest": "^4.1.5",
|
|
59
|
-
"wasm-pack": "^0.14.0"
|
|
60
|
-
},
|
|
61
|
-
"publishConfig": {
|
|
62
|
-
"registry": "https://registry.npmjs.org/",
|
|
63
|
-
"access": "public"
|
|
64
|
-
}
|
|
2
|
+
"name": "@kreuzberg/html-to-markdown-wasm",
|
|
3
|
+
"version": "3.4.0",
|
|
4
|
+
"private": false,
|
|
5
|
+
"description": "High-performance HTML to Markdown converter",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/kreuzberg-dev/html-to-markdown",
|
|
10
|
+
"directory": "crates/html-to-markdown-wasm"
|
|
11
|
+
},
|
|
12
|
+
"files": [
|
|
13
|
+
"pkg",
|
|
14
|
+
"*.wasm",
|
|
15
|
+
"*.d.ts",
|
|
16
|
+
"README.md"
|
|
17
|
+
],
|
|
18
|
+
"type": "module",
|
|
19
|
+
"main": "pkg/nodejs/html-to-markdown_wasm.js",
|
|
20
|
+
"module": "pkg/web/html-to-markdown_wasm.js",
|
|
21
|
+
"types": "pkg/nodejs/html-to-markdown_wasm.d.ts",
|
|
22
|
+
"scripts": {
|
|
23
|
+
"build": "wasm-pack build --target nodejs --out-dir pkg/nodejs",
|
|
24
|
+
"build:ci": "wasm-pack build --release --target nodejs --out-dir pkg/nodejs",
|
|
25
|
+
"build:wasm:web": "wasm-pack build --release --target web --out-dir pkg/web",
|
|
26
|
+
"build:wasm:bundler": "wasm-pack build --release --target bundler --out-dir pkg/bundler",
|
|
27
|
+
"build:wasm:nodejs": "wasm-pack build --release --target nodejs --out-dir pkg/nodejs",
|
|
28
|
+
"build:wasm:deno": "wasm-pack build --release --target deno --out-dir pkg/deno",
|
|
29
|
+
"build:all": "npm run build:wasm:web && npm run build:wasm:bundler && npm run build:wasm:nodejs && npm run build:wasm:deno && find pkg -name .gitignore -delete",
|
|
30
|
+
"test": "vitest run",
|
|
31
|
+
"test:watch": "vitest watch",
|
|
32
|
+
"test:coverage": "vitest run --coverage",
|
|
33
|
+
"clean": "rm -rf pkg dist"
|
|
34
|
+
}
|
|
65
35
|
}
|
|
@@ -63,7 +63,7 @@ export class WasmConversionOptions {
|
|
|
63
63
|
* Create from a partial update, applying to defaults.
|
|
64
64
|
*/
|
|
65
65
|
static fromUpdate(update: WasmConversionOptionsUpdate): WasmConversionOptions;
|
|
66
|
-
constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, exclude_selectors?: string[] | null, max_depth?: number | null);
|
|
66
|
+
constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptions | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, exclude_selectors?: string[] | null, max_depth?: number | null, visitor?: WasmVisitorHandle | null);
|
|
67
67
|
autolinks: boolean;
|
|
68
68
|
brInTables: boolean;
|
|
69
69
|
bullets: string;
|
|
@@ -102,6 +102,8 @@ export class WasmConversionOptions {
|
|
|
102
102
|
strongEmSymbol: string;
|
|
103
103
|
subSymbol: string;
|
|
104
104
|
supSymbol: string;
|
|
105
|
+
get visitor(): WasmVisitorHandle | undefined;
|
|
106
|
+
set visitor(value: WasmVisitorHandle | null | undefined);
|
|
105
107
|
whitespaceMode: WasmWhitespaceMode;
|
|
106
108
|
wrap: boolean;
|
|
107
109
|
wrapWidth: number;
|
|
@@ -140,6 +142,10 @@ export class WasmConversionOptionsBuilder {
|
|
|
140
142
|
* Set the list of HTML tag names whose content is stripped from output.
|
|
141
143
|
*/
|
|
142
144
|
stripTags(tags: string[]): WasmConversionOptionsBuilder;
|
|
145
|
+
/**
|
|
146
|
+
* Set the visitor used during conversion.
|
|
147
|
+
*/
|
|
148
|
+
visitor(visitor?: WasmVisitorHandle | null): WasmConversionOptionsBuilder;
|
|
143
149
|
}
|
|
144
150
|
|
|
145
151
|
/**
|
|
@@ -151,7 +157,7 @@ export class WasmConversionOptionsBuilder {
|
|
|
151
157
|
export class WasmConversionOptionsUpdate {
|
|
152
158
|
free(): void;
|
|
153
159
|
[Symbol.dispose](): void;
|
|
154
|
-
constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null, exclude_selectors?: string[] | null);
|
|
160
|
+
constructor(heading_style?: WasmHeadingStyle | null, list_indent_type?: WasmListIndentType | null, list_indent_width?: number | null, bullets?: string | null, strong_em_symbol?: string | null, escape_asterisks?: boolean | null, escape_underscores?: boolean | null, escape_misc?: boolean | null, escape_ascii?: boolean | null, code_language?: string | null, autolinks?: boolean | null, default_title?: boolean | null, br_in_tables?: boolean | null, highlight_style?: WasmHighlightStyle | null, extract_metadata?: boolean | null, whitespace_mode?: WasmWhitespaceMode | null, strip_newlines?: boolean | null, wrap?: boolean | null, wrap_width?: number | null, convert_as_inline?: boolean | null, sub_symbol?: string | null, sup_symbol?: string | null, newline_style?: WasmNewlineStyle | null, code_block_style?: WasmCodeBlockStyle | null, keep_inline_images_in?: string[] | null, preprocessing?: WasmPreprocessingOptionsUpdate | null, encoding?: string | null, debug?: boolean | null, strip_tags?: string[] | null, preserve_tags?: string[] | null, skip_images?: boolean | null, link_style?: WasmLinkStyle | null, output_format?: WasmOutputFormat | null, include_document_structure?: boolean | null, extract_images?: boolean | null, max_image_size?: bigint | null, capture_svg?: boolean | null, infer_dimensions?: boolean | null, max_depth?: number | null, exclude_selectors?: string[] | null, visitor?: WasmVisitorHandle | null);
|
|
155
161
|
get autolinks(): boolean | undefined;
|
|
156
162
|
set autolinks(value: boolean | null | undefined);
|
|
157
163
|
get brInTables(): boolean | undefined;
|
|
@@ -226,6 +232,8 @@ export class WasmConversionOptionsUpdate {
|
|
|
226
232
|
set subSymbol(value: string | null | undefined);
|
|
227
233
|
get supSymbol(): string | undefined;
|
|
228
234
|
set supSymbol(value: string | null | undefined);
|
|
235
|
+
get visitor(): WasmVisitorHandle | undefined;
|
|
236
|
+
set visitor(value: WasmVisitorHandle | null | undefined);
|
|
229
237
|
get whitespaceMode(): WasmWhitespaceMode | undefined;
|
|
230
238
|
set whitespaceMode(value: WasmWhitespaceMode | null | undefined);
|
|
231
239
|
get wrap(): boolean | undefined;
|
|
@@ -273,7 +281,6 @@ export class WasmConversionResult {
|
|
|
273
281
|
* # Examples
|
|
274
282
|
*
|
|
275
283
|
* ```
|
|
276
|
-
* # use html_to_markdown_rs::metadata::DocumentMetadata;
|
|
277
284
|
* let doc = DocumentMetadata {
|
|
278
285
|
* title: Some("My Article".to_string()),
|
|
279
286
|
* description: Some("A great article about Rust".to_string()),
|
|
@@ -363,7 +370,6 @@ export class WasmGridCell {
|
|
|
363
370
|
* # Examples
|
|
364
371
|
*
|
|
365
372
|
* ```
|
|
366
|
-
* # use html_to_markdown_rs::metadata::HeaderMetadata;
|
|
367
373
|
* let header = HeaderMetadata {
|
|
368
374
|
* level: 1,
|
|
369
375
|
* text: "Main Title".to_string(),
|
|
@@ -389,7 +395,6 @@ export class WasmHeaderMetadata {
|
|
|
389
395
|
* # Examples
|
|
390
396
|
*
|
|
391
397
|
* ```
|
|
392
|
-
* # use html_to_markdown_rs::metadata::HeaderMetadata;
|
|
393
398
|
* let valid = HeaderMetadata {
|
|
394
399
|
* level: 3,
|
|
395
400
|
* text: "Title".to_string(),
|
|
@@ -451,7 +456,6 @@ export enum WasmHighlightStyle {
|
|
|
451
456
|
* # Examples
|
|
452
457
|
*
|
|
453
458
|
* ```
|
|
454
|
-
* # use html_to_markdown_rs::metadata::HtmlMetadata;
|
|
455
459
|
* let metadata = HtmlMetadata {
|
|
456
460
|
* document: Default::default(),
|
|
457
461
|
* headers: Vec::new(),
|
|
@@ -483,7 +487,6 @@ export class WasmHtmlMetadata {
|
|
|
483
487
|
* # Examples
|
|
484
488
|
*
|
|
485
489
|
* ```
|
|
486
|
-
* # use html_to_markdown_rs::metadata::{ImageMetadata, ImageType};
|
|
487
490
|
* let img = ImageMetadata {
|
|
488
491
|
* src: "https://example.com/image.jpg".to_string(),
|
|
489
492
|
* alt: Some("An example image".to_string()),
|
|
@@ -531,7 +534,6 @@ export enum WasmImageType {
|
|
|
531
534
|
* # Examples
|
|
532
535
|
*
|
|
533
536
|
* ```
|
|
534
|
-
* # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
|
|
535
537
|
* let link = LinkMetadata {
|
|
536
538
|
* href: "https://example.com".to_string(),
|
|
537
539
|
* text: "Example".to_string(),
|
|
@@ -562,7 +564,6 @@ export class WasmLinkMetadata {
|
|
|
562
564
|
* # Examples
|
|
563
565
|
*
|
|
564
566
|
* ```
|
|
565
|
-
* # use html_to_markdown_rs::metadata::{LinkMetadata, LinkType};
|
|
566
567
|
* assert_eq!(LinkMetadata::classify_link("#section"), LinkType::Anchor);
|
|
567
568
|
* assert_eq!(LinkMetadata::classify_link("mailto:test@example.com"), LinkType::Email);
|
|
568
569
|
* assert_eq!(LinkMetadata::classify_link("tel:+1234567890"), LinkType::Phone);
|
|
@@ -867,7 +868,6 @@ export class WasmProcessingWarning {
|
|
|
867
868
|
* # Examples
|
|
868
869
|
*
|
|
869
870
|
* ```
|
|
870
|
-
* # use html_to_markdown_rs::metadata::{StructuredData, StructuredDataType};
|
|
871
871
|
* let schema = StructuredData {
|
|
872
872
|
* data_type: StructuredDataType::JsonLd,
|
|
873
873
|
* raw_json: r#"{"@context":"https://schema.org","@type":"Article"}"#.to_string(),
|
|
@@ -961,6 +961,17 @@ export enum WasmVisitResult {
|
|
|
961
961
|
Error = 4,
|
|
962
962
|
}
|
|
963
963
|
|
|
964
|
+
/**
|
|
965
|
+
* Type alias for a visitor handle (Rc-wrapped `RefCell` for interior mutability).
|
|
966
|
+
*
|
|
967
|
+
* This allows visitors to be passed around and shared while still being mutable.
|
|
968
|
+
*/
|
|
969
|
+
export class WasmVisitorHandle {
|
|
970
|
+
free(): void;
|
|
971
|
+
[Symbol.dispose](): void;
|
|
972
|
+
constructor(visitor: any);
|
|
973
|
+
}
|
|
974
|
+
|
|
964
975
|
/**
|
|
965
976
|
* Categories of processing warnings.
|
|
966
977
|
*/
|
|
@@ -983,129 +994,27 @@ export enum WasmWhitespaceMode {
|
|
|
983
994
|
Strict = 1,
|
|
984
995
|
}
|
|
985
996
|
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
strongEmSymbol?: string;
|
|
1011
|
-
escapeAsterisks?: boolean;
|
|
1012
|
-
escapeUnderscores?: boolean;
|
|
1013
|
-
escapeMisc?: boolean;
|
|
1014
|
-
escapeAscii?: boolean;
|
|
1015
|
-
codeLanguage?: string;
|
|
1016
|
-
autolinks?: boolean;
|
|
1017
|
-
defaultTitle?: boolean;
|
|
1018
|
-
brInTables?: boolean;
|
|
1019
|
-
hocrSpatialTables?: boolean;
|
|
1020
|
-
highlightStyle?: WasmHighlightStyle;
|
|
1021
|
-
extractMetadata?: boolean;
|
|
1022
|
-
whitespaceMode?: WasmWhitespaceMode;
|
|
1023
|
-
stripNewlines?: boolean;
|
|
1024
|
-
wrap?: boolean;
|
|
1025
|
-
wrapWidth?: number;
|
|
1026
|
-
convertAsInline?: boolean;
|
|
1027
|
-
subSymbol?: string;
|
|
1028
|
-
supSymbol?: string;
|
|
1029
|
-
newlineStyle?: WasmNewlineStyle;
|
|
1030
|
-
codeBlockStyle?: WasmCodeBlockStyle;
|
|
1031
|
-
keepInlineImagesIn?: string[];
|
|
1032
|
-
preprocessing?: WasmPreprocessingOptions | null;
|
|
1033
|
-
encoding?: string;
|
|
1034
|
-
debug?: boolean;
|
|
1035
|
-
stripTags?: string[];
|
|
1036
|
-
preserveTags?: string[];
|
|
1037
|
-
skipImages?: boolean;
|
|
1038
|
-
outputFormat?: WasmOutputFormat;
|
|
1039
|
-
includeDocumentStructure?: boolean;
|
|
1040
|
-
extractImages?: boolean;
|
|
1041
|
-
maxImageSize?: number;
|
|
1042
|
-
captureSvg?: boolean;
|
|
1043
|
-
inferDimensions?: boolean;
|
|
1044
|
-
}
|
|
1045
|
-
|
|
1046
|
-
/** A single cell in a structured table grid. */
|
|
1047
|
-
export interface WasmGridCell {
|
|
1048
|
-
content: string;
|
|
1049
|
-
row: number;
|
|
1050
|
-
col: number;
|
|
1051
|
-
rowSpan: number;
|
|
1052
|
-
colSpan: number;
|
|
1053
|
-
isHeader: boolean;
|
|
1054
|
-
}
|
|
1055
|
-
|
|
1056
|
-
/** Structured table grid with cell-level data. */
|
|
1057
|
-
export interface WasmTableGrid {
|
|
1058
|
-
rows: number;
|
|
1059
|
-
cols: number;
|
|
1060
|
-
cells: WasmGridCell[];
|
|
1061
|
-
}
|
|
1062
|
-
|
|
1063
|
-
/** A table extracted during conversion. */
|
|
1064
|
-
export interface WasmConversionTable {
|
|
1065
|
-
grid: WasmTableGrid;
|
|
1066
|
-
markdown: string;
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
/** Non-fatal warning emitted during conversion. */
|
|
1070
|
-
export interface WasmConversionWarning {
|
|
1071
|
-
/** Human-readable warning message. */
|
|
1072
|
-
message: string;
|
|
1073
|
-
/** Warning kind identifier. */
|
|
1074
|
-
kind: string;
|
|
1075
|
-
}
|
|
1076
|
-
|
|
1077
|
-
/** An extracted inline image from the HTML document. */
|
|
1078
|
-
export interface WasmInlineImage {
|
|
1079
|
-
/** Raw image data as a Uint8Array. */
|
|
1080
|
-
data: Uint8Array;
|
|
1081
|
-
/** Image format (png, jpeg, gif, svg, etc.). */
|
|
1082
|
-
format: string;
|
|
1083
|
-
/** Generated or provided filename, or null. */
|
|
1084
|
-
filename: string | null;
|
|
1085
|
-
/** Alt text or description, or null. */
|
|
1086
|
-
description: string | null;
|
|
1087
|
-
/** Image width in pixels, or null if not available. */
|
|
1088
|
-
width: number | null;
|
|
1089
|
-
/** Image height in pixels, or null if not available. */
|
|
1090
|
-
height: number | null;
|
|
1091
|
-
/** Source type ("img_data_uri" or "svg_element"). */
|
|
1092
|
-
source: string;
|
|
1093
|
-
/** HTML attributes from the source element. */
|
|
1094
|
-
attributes: Record<string, string>;
|
|
1095
|
-
}
|
|
1096
|
-
|
|
1097
|
-
/** Result of the convert() API. */
|
|
1098
|
-
export interface WasmConversionResult {
|
|
1099
|
-
/** Converted text output (markdown, djot, or plain text), or null. */
|
|
1100
|
-
content: string | null;
|
|
1101
|
-
/** Structured document tree serialized as a JSON value, or null. */
|
|
1102
|
-
document: unknown | null;
|
|
1103
|
-
/** Extracted HTML metadata serialized as a JSON value, or null. */
|
|
1104
|
-
metadata: unknown | null;
|
|
1105
|
-
/** All tables found in the HTML, in document order. */
|
|
1106
|
-
tables: WasmConversionTable[];
|
|
1107
|
-
/** Extracted inline images (data URIs and SVGs). */
|
|
1108
|
-
images: WasmInlineImage[];
|
|
1109
|
-
/** Non-fatal processing warnings. */
|
|
1110
|
-
warnings: WasmConversionWarning[];
|
|
1111
|
-
}
|
|
997
|
+
/**
|
|
998
|
+
* Convert HTML to Markdown, returning a [`ConversionResult`] with content, metadata, images,
|
|
999
|
+
* and warnings.
|
|
1000
|
+
*
|
|
1001
|
+
* # Arguments
|
|
1002
|
+
*
|
|
1003
|
+
* * `html` — the HTML string to convert.
|
|
1004
|
+
* * `options` — optional conversion options. Defaults to [`ConversionOptions::default`].
|
|
1005
|
+
*
|
|
1006
|
+
* # Example
|
|
1007
|
+
*
|
|
1008
|
+
* ```
|
|
1009
|
+
* use html_to_markdown_rs::convert;
|
|
1010
|
+
*
|
|
1011
|
+
* let html = "<h1>Hello World</h1>";
|
|
1012
|
+
* let result = convert(html, None).unwrap();
|
|
1013
|
+
* assert!(result.content.as_deref().unwrap_or("").contains("Hello World"));
|
|
1014
|
+
* ```
|
|
1015
|
+
*
|
|
1016
|
+
* # Errors
|
|
1017
|
+
*
|
|
1018
|
+
* Returns an error if HTML parsing fails or if the input contains invalid UTF-8.
|
|
1019
|
+
*/
|
|
1020
|
+
export function convert(html: string, options?: WasmConversionOptions | null): WasmConversionResult;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* @ts-self-types="./html_to_markdown_wasm.d.ts" */
|
|
2
|
+
import * as wasm from "./html_to_markdown_wasm_bg.wasm";
|
|
3
|
+
import { __wbg_set_wasm } from "./html_to_markdown_wasm_bg.js";
|
|
4
|
+
|
|
5
|
+
__wbg_set_wasm(wasm);
|
|
6
|
+
|
|
7
|
+
export {
|
|
8
|
+
WasmAnnotationKind, WasmCodeBlockStyle, WasmConversionOptions, WasmConversionOptionsBuilder, WasmConversionOptionsUpdate, WasmConversionResult, WasmDocumentMetadata, WasmDocumentNode, WasmDocumentStructure, WasmGridCell, WasmHeaderMetadata, WasmHeadingStyle, WasmHighlightStyle, WasmHtmlMetadata, WasmImageMetadata, WasmImageType, WasmLinkMetadata, WasmLinkStyle, WasmLinkType, WasmListIndentType, WasmNewlineStyle, WasmNodeContent, WasmNodeContext, WasmNodeType, WasmOutputFormat, WasmPreprocessingOptions, WasmPreprocessingOptionsUpdate, WasmPreprocessingPreset, WasmProcessingWarning, WasmStructuredData, WasmStructuredDataType, WasmTableData, WasmTableGrid, WasmTextAnnotation, WasmTextDirection, WasmVisitResult, WasmVisitorHandle, WasmWarningKind, WasmWhitespaceMode, convert
|
|
9
|
+
} from "./html_to_markdown_wasm_bg.js";
|