html-to-markdown-node 2.6.4 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.d.ts +231 -0
  2. package/index.js +255 -0
  3. package/package.json +10 -10
package/index.d.ts ADDED
@@ -0,0 +1,231 @@
1
+ /* auto-generated by NAPI-RS */
2
+ /* eslint-disable */
3
+ /**
4
+ * Convert HTML to Markdown
5
+ *
6
+ * # Arguments
7
+ *
8
+ * * `html` - The HTML string to convert
9
+ * * `options` - Optional conversion options
10
+ *
11
+ * # Example
12
+ *
13
+ * ```javascript
14
+ * const { convert } = require('html-to-markdown');
15
+ *
16
+ * const html = '<h1>Hello World</h1>';
17
+ * const markdown = convert(html);
18
+ * console.log(markdown); // # Hello World
19
+ * ```
20
+ */
21
+ export declare function convert(html: string, options?: JsConversionOptions | undefined | null): string
22
+
23
+ /**
24
+ * Convert HTML to Markdown while collecting inline images
25
+ *
26
+ * # Arguments
27
+ *
28
+ * * `html` - The HTML string to convert
29
+ * * `options` - Optional conversion options
30
+ * * `image_config` - Configuration for inline image extraction
31
+ *
32
+ * # Example
33
+ *
34
+ * ```javascript
35
+ * const { convertWithInlineImages } = require('html-to-markdown');
36
+ *
37
+ * const html = '<img src="data:image/png;base64,..." alt="test">';
38
+ * const result = convertWithInlineImages(html, null, {
39
+ * maxDecodedSizeBytes: 1024 * 1024,
40
+ * inferDimensions: true
41
+ * });
42
+ * console.log(result.markdown);
43
+ * console.log(result.inlineImages.length);
44
+ * ```
45
+ */
46
+ export declare function convertWithInlineImages(html: string, options?: JsConversionOptions | undefined | null, imageConfig?: JsInlineImageConfig | undefined | null): JsHtmlExtraction
47
+
48
+ /** Code block style */
49
+ export declare const enum JsCodeBlockStyle {
50
+ /** Indented code blocks (4 spaces) - CommonMark default */
51
+ Indented = 'Indented',
52
+ /** Fenced code blocks with backticks (```) */
53
+ Backticks = 'Backticks',
54
+ /** Fenced code blocks with tildes (~~~) */
55
+ Tildes = 'Tildes'
56
+ }
57
+
58
+ /** Main conversion options */
59
+ export interface JsConversionOptions {
60
+ /** Heading style */
61
+ headingStyle?: JsHeadingStyle
62
+ /** List indentation type */
63
+ listIndentType?: JsListIndentType
64
+ /** List indentation width (spaces) */
65
+ listIndentWidth?: number
66
+ /** Bullet characters for unordered lists */
67
+ bullets?: string
68
+ /** Symbol for strong/emphasis (* or _) */
69
+ strongEmSymbol?: string
70
+ /** Escape asterisks in text */
71
+ escapeAsterisks?: boolean
72
+ /** Escape underscores in text */
73
+ escapeUnderscores?: boolean
74
+ /** Escape misc markdown characters */
75
+ escapeMisc?: boolean
76
+ /** Escape all ASCII punctuation */
77
+ escapeAscii?: boolean
78
+ /** Default code language */
79
+ codeLanguage?: string
80
+ /** Use autolinks for bare URLs */
81
+ autolinks?: boolean
82
+ /** Add default title if none exists */
83
+ defaultTitle?: boolean
84
+ /** Use <br> in tables instead of spaces */
85
+ brInTables?: boolean
86
+ /** Enable spatial table reconstruction in hOCR documents */
87
+ hocrSpatialTables?: boolean
88
+ /** Highlight style for <mark> elements */
89
+ highlightStyle?: JsHighlightStyle
90
+ /** Extract metadata from HTML */
91
+ extractMetadata?: boolean
92
+ /** Whitespace handling mode */
93
+ whitespaceMode?: JsWhitespaceMode
94
+ /** Strip newlines from HTML before processing */
95
+ stripNewlines?: boolean
96
+ /** Enable text wrapping */
97
+ wrap?: boolean
98
+ /** Text wrap width */
99
+ wrapWidth?: number
100
+ /** Treat block elements as inline */
101
+ convertAsInline?: boolean
102
+ /** Subscript symbol */
103
+ subSymbol?: string
104
+ /** Superscript symbol */
105
+ supSymbol?: string
106
+ /** Newline style */
107
+ newlineStyle?: JsNewlineStyle
108
+ /** Code block style */
109
+ codeBlockStyle?: JsCodeBlockStyle
110
+ /** Elements where images should remain as markdown */
111
+ keepInlineImagesIn?: Array<string>
112
+ /** Preprocessing options */
113
+ preprocessing?: JsPreprocessingOptions
114
+ /** Source encoding (informational) */
115
+ encoding?: string
116
+ /** Enable debug mode with diagnostic warnings */
117
+ debug?: boolean
118
+ /** List of HTML tags to strip */
119
+ stripTags?: Array<string>
120
+ /** List of HTML tags to preserve as-is in the output */
121
+ preserveTags?: Array<string>
122
+ }
123
+
124
+ /** Heading style options */
125
+ export declare const enum JsHeadingStyle {
126
+ /** Underlined style (=== for h1, --- for h2) */
127
+ Underlined = 'Underlined',
128
+ /** ATX style (# for h1, ## for h2, etc.) */
129
+ Atx = 'Atx',
130
+ /** ATX closed style (# title #) */
131
+ AtxClosed = 'AtxClosed'
132
+ }
133
+
134
+ /** Highlight style for `<mark>` elements */
135
+ export declare const enum JsHighlightStyle {
136
+ /** ==text== */
137
+ DoubleEqual = 'DoubleEqual',
138
+ /** <mark>text</mark> */
139
+ Html = 'Html',
140
+ /** **text** */
141
+ Bold = 'Bold',
142
+ /** Plain text (no formatting) */
143
+ None = 'None'
144
+ }
145
+
146
+ /** Result of HTML extraction with inline images */
147
+ export interface JsHtmlExtraction {
148
+ /** Converted markdown */
149
+ markdown: string
150
+ /** Extracted inline images */
151
+ inlineImages: Array<JsInlineImage>
152
+ /** Warnings encountered during extraction */
153
+ warnings: Array<JsInlineImageWarning>
154
+ }
155
+
156
+ /** Inline image data */
157
+ export interface JsInlineImage {
158
+ /** Raw image data */
159
+ data: Buffer
160
+ /** Image format (png, jpeg, gif, etc.) */
161
+ format: string
162
+ /** Generated or provided filename */
163
+ filename?: string
164
+ /** Alt text / description */
165
+ description?: string
166
+ /** Image dimensions (width, height) if available */
167
+ dimensions?: Array<number>
168
+ /** Source type (img_data_uri or svg_element) */
169
+ source: string
170
+ /** HTML attributes from the source element */
171
+ attributes: Record<string, string>
172
+ }
173
+
174
+ /** Inline image configuration */
175
+ export interface JsInlineImageConfig {
176
+ /** Maximum decoded size in bytes (default: 5MB) */
177
+ maxDecodedSizeBytes?: bigint
178
+ /** Filename prefix for generated filenames */
179
+ filenamePrefix?: string
180
+ /** Capture inline SVG elements (default: true) */
181
+ captureSvg?: boolean
182
+ /** Infer image dimensions (default: false) */
183
+ inferDimensions?: boolean
184
+ }
185
+
186
+ /** Warning about inline image processing */
187
+ export interface JsInlineImageWarning {
188
+ /** Index of the image that caused the warning */
189
+ index: number
190
+ /** Warning message */
191
+ message: string
192
+ }
193
+
194
+ /** List indentation type */
195
+ export declare const enum JsListIndentType {
196
+ Spaces = 'Spaces',
197
+ Tabs = 'Tabs'
198
+ }
199
+
200
+ /** Newline style */
201
+ export declare const enum JsNewlineStyle {
202
+ /** Two spaces at end of line */
203
+ Spaces = 'Spaces',
204
+ /** Backslash at end of line */
205
+ Backslash = 'Backslash'
206
+ }
207
+
208
+ /** HTML preprocessing options */
209
+ export interface JsPreprocessingOptions {
210
+ /** Enable preprocessing */
211
+ enabled?: boolean
212
+ /** Preprocessing preset */
213
+ preset?: JsPreprocessingPreset
214
+ /** Remove navigation elements */
215
+ removeNavigation?: boolean
216
+ /** Remove form elements */
217
+ removeForms?: boolean
218
+ }
219
+
220
+ /** Preprocessing preset levels */
221
+ export declare const enum JsPreprocessingPreset {
222
+ Minimal = 'Minimal',
223
+ Standard = 'Standard',
224
+ Aggressive = 'Aggressive'
225
+ }
226
+
227
+ /** Whitespace handling mode */
228
+ export declare const enum JsWhitespaceMode {
229
+ Normalized = 'Normalized',
230
+ Strict = 'Strict'
231
+ }
package/index.js ADDED
@@ -0,0 +1,255 @@
1
+ const { existsSync, readFileSync } = require('fs')
2
+ const { join } = require('path')
3
+
4
+ const { platform, arch } = process
5
+
6
+ let nativeBinding = null
7
+ let localFileExisted = false
8
+ let loadError = null
9
+
10
+ function isMusl() {
11
+ // For Node 10
12
+ if (!process.report || typeof process.report.getReport !== 'function') {
13
+ try {
14
+ const lddPath = require('child_process').execSync('which ldd').toString().trim()
15
+ return readFileSync(lddPath, 'utf8').includes('musl')
16
+ } catch (e) {
17
+ return true
18
+ }
19
+ } else {
20
+ const { glibcVersionRuntime } = process.report.getReport().header
21
+ return !glibcVersionRuntime
22
+ }
23
+ }
24
+
25
+ switch (platform) {
26
+ case 'android':
27
+ switch (arch) {
28
+ case 'arm64':
29
+ localFileExisted = existsSync(join(__dirname, 'html-to-markdown-node.android-arm64.node'))
30
+ try {
31
+ if (localFileExisted) {
32
+ nativeBinding = require('./html-to-markdown-node.android-arm64.node')
33
+ } else {
34
+ nativeBinding = require('html-to-markdown-node-android-arm64')
35
+ }
36
+ } catch (e) {
37
+ loadError = e
38
+ }
39
+ break
40
+ case 'arm':
41
+ localFileExisted = existsSync(join(__dirname, 'html-to-markdown-node.android-arm-eabi.node'))
42
+ try {
43
+ if (localFileExisted) {
44
+ nativeBinding = require('./html-to-markdown-node.android-arm-eabi.node')
45
+ } else {
46
+ nativeBinding = require('html-to-markdown-node-android-arm-eabi')
47
+ }
48
+ } catch (e) {
49
+ loadError = e
50
+ }
51
+ break
52
+ default:
53
+ throw new Error(`Unsupported architecture on Android ${arch}`)
54
+ }
55
+ break
56
+ case 'win32':
57
+ switch (arch) {
58
+ case 'x64':
59
+ localFileExisted = existsSync(
60
+ join(__dirname, 'html-to-markdown-node.win32-x64-msvc.node')
61
+ )
62
+ try {
63
+ if (localFileExisted) {
64
+ nativeBinding = require('./html-to-markdown-node.win32-x64-msvc.node')
65
+ } else {
66
+ nativeBinding = require('html-to-markdown-node-win32-x64-msvc')
67
+ }
68
+ } catch (e) {
69
+ loadError = e
70
+ }
71
+ break
72
+ case 'ia32':
73
+ localFileExisted = existsSync(
74
+ join(__dirname, 'html-to-markdown-node.win32-ia32-msvc.node')
75
+ )
76
+ try {
77
+ if (localFileExisted) {
78
+ nativeBinding = require('./html-to-markdown-node.win32-ia32-msvc.node')
79
+ } else {
80
+ nativeBinding = require('html-to-markdown-node-win32-ia32-msvc')
81
+ }
82
+ } catch (e) {
83
+ loadError = e
84
+ }
85
+ break
86
+ case 'arm64':
87
+ localFileExisted = existsSync(
88
+ join(__dirname, 'html-to-markdown-node.win32-arm64-msvc.node')
89
+ )
90
+ try {
91
+ if (localFileExisted) {
92
+ nativeBinding = require('./html-to-markdown-node.win32-arm64-msvc.node')
93
+ } else {
94
+ nativeBinding = require('html-to-markdown-node-win32-arm64-msvc')
95
+ }
96
+ } catch (e) {
97
+ loadError = e
98
+ }
99
+ break
100
+ default:
101
+ throw new Error(`Unsupported architecture on Windows: ${arch}`)
102
+ }
103
+ break
104
+ case 'darwin':
105
+ localFileExisted = existsSync(join(__dirname, 'html-to-markdown-node.darwin-universal.node'))
106
+ try {
107
+ if (localFileExisted) {
108
+ nativeBinding = require('./html-to-markdown-node.darwin-universal.node')
109
+ } else {
110
+ try {
111
+ nativeBinding = require('html-to-markdown-node-darwin-universal')
112
+ } catch {
113
+ switch (arch) {
114
+ case 'x64':
115
+ localFileExisted = existsSync(join(__dirname, 'html-to-markdown-node.darwin-x64.node'))
116
+ try {
117
+ if (localFileExisted) {
118
+ nativeBinding = require('./html-to-markdown-node.darwin-x64.node')
119
+ } else {
120
+ nativeBinding = require('html-to-markdown-node-darwin-x64')
121
+ }
122
+ } catch (e) {
123
+ loadError = e
124
+ }
125
+ break
126
+ case 'arm64':
127
+ localFileExisted = existsSync(
128
+ join(__dirname, 'html-to-markdown-node.darwin-arm64.node')
129
+ )
130
+ try {
131
+ if (localFileExisted) {
132
+ nativeBinding = require('./html-to-markdown-node.darwin-arm64.node')
133
+ } else {
134
+ nativeBinding = require('html-to-markdown-node-darwin-arm64')
135
+ }
136
+ } catch (e) {
137
+ loadError = e
138
+ }
139
+ break
140
+ default:
141
+ throw new Error(`Unsupported architecture on macOS: ${arch}`)
142
+ }
143
+ }
144
+ }
145
+ } catch (e) {
146
+ loadError = e
147
+ }
148
+ break
149
+ case 'freebsd':
150
+ if (arch !== 'x64') {
151
+ throw new Error(`Unsupported architecture on FreeBSD: ${arch}`)
152
+ }
153
+ localFileExisted = existsSync(join(__dirname, 'html-to-markdown-node.freebsd-x64.node'))
154
+ try {
155
+ if (localFileExisted) {
156
+ nativeBinding = require('./html-to-markdown-node.freebsd-x64.node')
157
+ } else {
158
+ nativeBinding = require('html-to-markdown-node-freebsd-x64')
159
+ }
160
+ } catch (e) {
161
+ loadError = e
162
+ }
163
+ break
164
+ case 'linux':
165
+ switch (arch) {
166
+ case 'x64':
167
+ if (isMusl()) {
168
+ localFileExisted = existsSync(
169
+ join(__dirname, 'html-to-markdown-node.linux-x64-musl.node')
170
+ )
171
+ try {
172
+ if (localFileExisted) {
173
+ nativeBinding = require('./html-to-markdown-node.linux-x64-musl.node')
174
+ } else {
175
+ nativeBinding = require('html-to-markdown-node-linux-x64-musl')
176
+ }
177
+ } catch (e) {
178
+ loadError = e
179
+ }
180
+ } else {
181
+ localFileExisted = existsSync(
182
+ join(__dirname, 'html-to-markdown-node.linux-x64-gnu.node')
183
+ )
184
+ try {
185
+ if (localFileExisted) {
186
+ nativeBinding = require('./html-to-markdown-node.linux-x64-gnu.node')
187
+ } else {
188
+ nativeBinding = require('html-to-markdown-node-linux-x64-gnu')
189
+ }
190
+ } catch (e) {
191
+ loadError = e
192
+ }
193
+ }
194
+ break
195
+ case 'arm64':
196
+ if (isMusl()) {
197
+ localFileExisted = existsSync(
198
+ join(__dirname, 'html-to-markdown-node.linux-arm64-musl.node')
199
+ )
200
+ try {
201
+ if (localFileExisted) {
202
+ nativeBinding = require('./html-to-markdown-node.linux-arm64-musl.node')
203
+ } else {
204
+ nativeBinding = require('html-to-markdown-node-linux-arm64-musl')
205
+ }
206
+ } catch (e) {
207
+ loadError = e
208
+ }
209
+ } else {
210
+ localFileExisted = existsSync(
211
+ join(__dirname, 'html-to-markdown-node.linux-arm64-gnu.node')
212
+ )
213
+ try {
214
+ if (localFileExisted) {
215
+ nativeBinding = require('./html-to-markdown-node.linux-arm64-gnu.node')
216
+ } else {
217
+ nativeBinding = require('html-to-markdown-node-linux-arm64-gnu')
218
+ }
219
+ } catch (e) {
220
+ loadError = e
221
+ }
222
+ }
223
+ break
224
+ case 'arm':
225
+ localFileExisted = existsSync(
226
+ join(__dirname, 'html-to-markdown-node.linux-arm-gnueabihf.node')
227
+ )
228
+ try {
229
+ if (localFileExisted) {
230
+ nativeBinding = require('./html-to-markdown-node.linux-arm-gnueabihf.node')
231
+ } else {
232
+ nativeBinding = require('html-to-markdown-node-linux-arm-gnueabihf')
233
+ }
234
+ } catch (e) {
235
+ loadError = e
236
+ }
237
+ break
238
+ default:
239
+ throw new Error(`Unsupported architecture on Linux: ${arch}`)
240
+ }
241
+ break
242
+ default:
243
+ throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`)
244
+ }
245
+
246
+ if (!nativeBinding) {
247
+ if (loadError) {
248
+ throw loadError
249
+ }
250
+ throw new Error(`Failed to load native binding`)
251
+ }
252
+
253
+ const { convert } = nativeBinding
254
+
255
+ module.exports.convert = convert
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "html-to-markdown-node",
3
- "version": "2.6.4",
3
+ "version": "2.6.5",
4
4
  "description": "High-performance HTML to Markdown converter - Node.js native bindings",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -59,15 +59,15 @@
59
59
  "up": "^1.0.2"
60
60
  },
61
61
  "optionalDependencies": {
62
- "html-to-markdown-node-darwin-x64": "2.6.4",
63
- "html-to-markdown-node-darwin-arm64": "2.6.4",
64
- "html-to-markdown-node-win32-x64-msvc": "2.6.4",
65
- "html-to-markdown-node-win32-arm64-msvc": "2.6.4",
66
- "html-to-markdown-node-linux-x64-gnu": "2.6.4",
67
- "html-to-markdown-node-linux-x64-musl": "2.6.4",
68
- "html-to-markdown-node-linux-arm64-gnu": "2.6.4",
69
- "html-to-markdown-node-linux-arm64-musl": "2.6.4",
70
- "html-to-markdown-node-linux-arm-gnueabihf": "2.6.4"
62
+ "html-to-markdown-node-darwin-x64": "2.6.5",
63
+ "html-to-markdown-node-darwin-arm64": "2.6.5",
64
+ "html-to-markdown-node-win32-x64-msvc": "2.6.5",
65
+ "html-to-markdown-node-win32-arm64-msvc": "2.6.5",
66
+ "html-to-markdown-node-linux-x64-gnu": "2.6.5",
67
+ "html-to-markdown-node-linux-x64-musl": "2.6.5",
68
+ "html-to-markdown-node-linux-arm64-gnu": "2.6.5",
69
+ "html-to-markdown-node-linux-arm64-musl": "2.6.5",
70
+ "html-to-markdown-node-linux-arm-gnueabihf": "2.6.5"
71
71
  },
72
72
  "scripts": {
73
73
  "artifacts": "napi artifacts",