mdream 1.2.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/bin/mdream.mjs +6 -2
- package/dist/iife.js +25 -25
- package/dist/index.d.mts +6 -0
- package/dist/index.mjs +2 -1
- package/dist/worker.d.mts +6 -0
- package/napi/index.d.mts +1 -0
- package/napi/index.d.ts +1 -0
- package/napi/index.mjs +14 -4
- package/package.json +15 -15
- package/wasm/mdream_edge.js +24 -24
- package/wasm/mdream_edge_bg.wasm +0 -0
- package/wasm/package.json +1 -1
- package/wasm-bundler/mdream_edge_bg.js +22 -23
- package/wasm-bundler/mdream_edge_bg.wasm +0 -0
- package/wasm-bundler/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -69,6 +69,12 @@ interface MdreamOptions {
|
|
|
69
69
|
extraction?: Record<string, (element: ExtractedElement) => void>;
|
|
70
70
|
/** Tag overrides. String values act as aliases */
|
|
71
71
|
tagOverrides?: Record<string, TagOverride | string>;
|
|
72
|
+
/**
|
|
73
|
+
* Hard-wrap prose at this many characters, breaking on word boundaries.
|
|
74
|
+
* Applied inline during conversion (zero-cost when unset). Code blocks,
|
|
75
|
+
* tables, and headings are never wrapped. `0` disables wrapping.
|
|
76
|
+
*/
|
|
77
|
+
wrapWidth?: number;
|
|
72
78
|
}
|
|
73
79
|
declare function htmlToMarkdown(html: string, options?: Partial<MdreamOptions>): string;
|
|
74
80
|
declare function streamHtmlToMarkdown(htmlStream: ReadableStream<Uint8Array | string> | null, options?: Partial<MdreamOptions>): AsyncIterable<string>;
|
package/dist/index.mjs
CHANGED
package/dist/worker.d.mts
CHANGED
|
@@ -69,6 +69,12 @@ interface MdreamOptions {
|
|
|
69
69
|
extraction?: Record<string, (element: ExtractedElement) => void>;
|
|
70
70
|
/** Tag overrides. String values act as aliases */
|
|
71
71
|
tagOverrides?: Record<string, TagOverride | string>;
|
|
72
|
+
/**
|
|
73
|
+
* Hard-wrap prose at this many characters, breaking on word boundaries.
|
|
74
|
+
* Applied inline during conversion (zero-cost when unset). Code blocks,
|
|
75
|
+
* tables, and headings are never wrapped. `0` disables wrapping.
|
|
76
|
+
*/
|
|
77
|
+
wrapWidth?: number;
|
|
72
78
|
}
|
|
73
79
|
/**
|
|
74
80
|
* Initialize the mdream web worker. Must be called before htmlToMarkdown.
|
package/napi/index.d.mts
CHANGED
package/napi/index.d.ts
CHANGED
package/napi/index.mjs
CHANGED
|
@@ -529,23 +529,33 @@ function requireNative() {
|
|
|
529
529
|
|
|
530
530
|
nativeBinding = requireNative()
|
|
531
531
|
|
|
532
|
-
|
|
532
|
+
// NAPI_RS_FORCE_WASI is a tri-state flag:
|
|
533
|
+
// unset / any other value → native binding preferred, WASI is only a fallback
|
|
534
|
+
// 'true' → force WASI fallback even if native loaded
|
|
535
|
+
// 'error' → force WASI and throw if no WASI binding is found
|
|
536
|
+
// Treating any non-empty string as truthy (the historical behavior) meant
|
|
537
|
+
// NAPI_RS_FORCE_WASI=false, NAPI_RS_FORCE_WASI=0, etc. inadvertently triggered
|
|
538
|
+
// the WASI path, causing ENOENT for packages shipped without a .wasi.cjs file.
|
|
539
|
+
const forceWasi =
|
|
540
|
+
process.env.NAPI_RS_FORCE_WASI === 'true' || process.env.NAPI_RS_FORCE_WASI === 'error'
|
|
541
|
+
|
|
542
|
+
if (!nativeBinding || forceWasi) {
|
|
533
543
|
let wasiBinding = null
|
|
534
544
|
let wasiBindingError = null
|
|
535
545
|
try {
|
|
536
546
|
wasiBinding = require('./rust.wasi.cjs')
|
|
537
547
|
nativeBinding = wasiBinding
|
|
538
548
|
} catch (err) {
|
|
539
|
-
if (
|
|
549
|
+
if (forceWasi) {
|
|
540
550
|
wasiBindingError = err
|
|
541
551
|
}
|
|
542
552
|
}
|
|
543
|
-
if (!nativeBinding ||
|
|
553
|
+
if (!nativeBinding || forceWasi) {
|
|
544
554
|
try {
|
|
545
555
|
wasiBinding = require('@mdream/rust-wasm32-wasi')
|
|
546
556
|
nativeBinding = wasiBinding
|
|
547
557
|
} catch (err) {
|
|
548
|
-
if (
|
|
558
|
+
if (forceWasi) {
|
|
549
559
|
if (!wasiBindingError) {
|
|
550
560
|
wasiBindingError = err
|
|
551
561
|
} else {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdream",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.4.0",
|
|
5
5
|
"description": "Ultra-performant HTML to Markdown Convertor Optimized for LLMs and llm.txt artifacts.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -52,22 +52,22 @@
|
|
|
52
52
|
"browser": "./dist/browser.mjs",
|
|
53
53
|
"dependencies": {},
|
|
54
54
|
"optionalDependencies": {
|
|
55
|
-
"@mdream/rust-android-arm-eabi": "1.
|
|
56
|
-
"@mdream/rust-android-arm64": "1.
|
|
57
|
-
"@mdream/rust-darwin-arm64": "1.
|
|
58
|
-
"@mdream/rust-darwin-x64": "1.
|
|
59
|
-
"@mdream/rust-
|
|
60
|
-
"@mdream/rust-
|
|
61
|
-
"@mdream/rust-linux-arm64-
|
|
62
|
-
"@mdream/rust-linux-
|
|
63
|
-
"@mdream/rust-linux-x64-
|
|
64
|
-
"@mdream/rust-
|
|
65
|
-
"@mdream/rust-win32-
|
|
66
|
-
"@mdream/rust-
|
|
67
|
-
"@mdream/rust-
|
|
55
|
+
"@mdream/rust-android-arm-eabi": "1.4.0",
|
|
56
|
+
"@mdream/rust-android-arm64": "1.4.0",
|
|
57
|
+
"@mdream/rust-darwin-arm64": "1.4.0",
|
|
58
|
+
"@mdream/rust-darwin-x64": "1.4.0",
|
|
59
|
+
"@mdream/rust-freebsd-x64": "1.4.0",
|
|
60
|
+
"@mdream/rust-linux-arm-gnueabihf": "1.4.0",
|
|
61
|
+
"@mdream/rust-linux-arm64-gnu": "1.4.0",
|
|
62
|
+
"@mdream/rust-linux-arm64-musl": "1.4.0",
|
|
63
|
+
"@mdream/rust-linux-x64-musl": "1.4.0",
|
|
64
|
+
"@mdream/rust-wasm32-wasi": "1.4.0",
|
|
65
|
+
"@mdream/rust-win32-arm64-msvc": "1.4.0",
|
|
66
|
+
"@mdream/rust-win32-x64-msvc": "1.4.0",
|
|
67
|
+
"@mdream/rust-linux-x64-gnu": "1.4.0"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
|
-
"@mdream/js": "1.
|
|
70
|
+
"@mdream/js": "1.4.0"
|
|
71
71
|
},
|
|
72
72
|
"scripts": {
|
|
73
73
|
"flame": "pnpm build && unbuild bench/bundle && clinic flame -- node bench/bundle/dist/string.mjs 10",
|
package/wasm/mdream_edge.js
CHANGED
|
@@ -35,7 +35,7 @@ export class MarkdownStream {
|
|
|
35
35
|
*/
|
|
36
36
|
constructor(options) {
|
|
37
37
|
const ret = wasm.markdownstream_new(addHeapObject(options));
|
|
38
|
-
this.__wbg_ptr = ret
|
|
38
|
+
this.__wbg_ptr = ret;
|
|
39
39
|
MarkdownStreamFinalization.register(this, this.__wbg_ptr, this);
|
|
40
40
|
return this;
|
|
41
41
|
}
|
|
@@ -102,26 +102,26 @@ export function htmlToMarkdownResult(html, options) {
|
|
|
102
102
|
function __wbg_get_imports() {
|
|
103
103
|
const import0 = {
|
|
104
104
|
__proto__: null,
|
|
105
|
-
|
|
105
|
+
__wbg___wbindgen_boolean_get_b131b2f36d6b2f55: function(arg0) {
|
|
106
106
|
const v = getObject(arg0);
|
|
107
107
|
const ret = typeof(v) === 'boolean' ? v : undefined;
|
|
108
108
|
return isLikeNone(ret) ? 0xFFFFFF : ret ? 1 : 0;
|
|
109
109
|
},
|
|
110
|
-
|
|
110
|
+
__wbg___wbindgen_is_null_ced4761460071341: function(arg0) {
|
|
111
111
|
const ret = getObject(arg0) === null;
|
|
112
112
|
return ret;
|
|
113
113
|
},
|
|
114
|
-
|
|
114
|
+
__wbg___wbindgen_is_undefined_4410e3c20a99fa97: function(arg0) {
|
|
115
115
|
const ret = getObject(arg0) === undefined;
|
|
116
116
|
return ret;
|
|
117
117
|
},
|
|
118
|
-
|
|
118
|
+
__wbg___wbindgen_number_get_588ed6b97f0d7e14: function(arg0, arg1) {
|
|
119
119
|
const obj = getObject(arg1);
|
|
120
120
|
const ret = typeof(obj) === 'number' ? obj : undefined;
|
|
121
121
|
getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true);
|
|
122
122
|
getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
|
|
123
123
|
},
|
|
124
|
-
|
|
124
|
+
__wbg___wbindgen_string_get_fa2687d531ed17a5: function(arg0, arg1) {
|
|
125
125
|
const obj = getObject(arg1);
|
|
126
126
|
const ret = typeof(obj) === 'string' ? obj : undefined;
|
|
127
127
|
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
@@ -129,46 +129,46 @@ function __wbg_get_imports() {
|
|
|
129
129
|
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
130
130
|
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
131
131
|
},
|
|
132
|
-
|
|
132
|
+
__wbg___wbindgen_throw_bbadd78c1bac3a77: function(arg0, arg1) {
|
|
133
133
|
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
134
134
|
},
|
|
135
|
-
|
|
135
|
+
__wbg_entries_5a6a7e7e0df09fe5: function(arg0) {
|
|
136
136
|
const ret = Object.entries(getObject(arg0));
|
|
137
137
|
return addHeapObject(ret);
|
|
138
138
|
},
|
|
139
|
-
|
|
139
|
+
__wbg_from_8a57180716c586ee: function(arg0) {
|
|
140
140
|
const ret = Array.from(getObject(arg0));
|
|
141
141
|
return addHeapObject(ret);
|
|
142
142
|
},
|
|
143
|
-
|
|
144
|
-
const ret = Reflect.get(getObject(arg0), getObject(arg1));
|
|
145
|
-
return addHeapObject(ret);
|
|
146
|
-
}, arguments); },
|
|
147
|
-
__wbg_get_8360291721e2339f: function(arg0, arg1) {
|
|
143
|
+
__wbg_get_4b90d6d8c5deb5d5: function(arg0, arg1) {
|
|
148
144
|
const ret = getObject(arg0)[arg1 >>> 0];
|
|
149
145
|
return addHeapObject(ret);
|
|
150
146
|
},
|
|
151
|
-
|
|
147
|
+
__wbg_get_52a8a619f7b88df6: function() { return handleError(function (arg0, arg1) {
|
|
148
|
+
const ret = Reflect.get(getObject(arg0), getObject(arg1));
|
|
149
|
+
return addHeapObject(ret);
|
|
150
|
+
}, arguments); },
|
|
151
|
+
__wbg_isArray_139f48e3c057ede8: function(arg0) {
|
|
152
152
|
const ret = Array.isArray(getObject(arg0));
|
|
153
153
|
return ret;
|
|
154
154
|
},
|
|
155
|
-
|
|
155
|
+
__wbg_length_fb04d16d7bdf6d4c: function(arg0) {
|
|
156
156
|
const ret = getObject(arg0).length;
|
|
157
157
|
return ret;
|
|
158
158
|
},
|
|
159
|
-
|
|
159
|
+
__wbg_new_0b303268aa395a38: function() {
|
|
160
160
|
const ret = new Array();
|
|
161
161
|
return addHeapObject(ret);
|
|
162
162
|
},
|
|
163
|
-
|
|
163
|
+
__wbg_new_20b778a4c5c691c3: function() {
|
|
164
164
|
const ret = new Object();
|
|
165
165
|
return addHeapObject(ret);
|
|
166
166
|
},
|
|
167
|
-
|
|
167
|
+
__wbg_push_ceb8ef046afb2041: function(arg0, arg1) {
|
|
168
168
|
const ret = getObject(arg0).push(getObject(arg1));
|
|
169
169
|
return ret;
|
|
170
170
|
},
|
|
171
|
-
|
|
171
|
+
__wbg_set_a6ba3ac0e634b822: function() { return handleError(function (arg0, arg1, arg2) {
|
|
172
172
|
const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
|
|
173
173
|
return ret;
|
|
174
174
|
}, arguments); },
|
|
@@ -193,7 +193,7 @@ function __wbg_get_imports() {
|
|
|
193
193
|
|
|
194
194
|
const MarkdownStreamFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
195
195
|
? { register: () => {}, unregister: () => {} }
|
|
196
|
-
: new FinalizationRegistry(ptr => wasm.__wbg_markdownstream_free(ptr
|
|
196
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_markdownstream_free(ptr, 1));
|
|
197
197
|
|
|
198
198
|
function addHeapObject(obj) {
|
|
199
199
|
if (heap_next === heap.length) heap.push(heap.length + 1);
|
|
@@ -219,8 +219,7 @@ function getDataViewMemory0() {
|
|
|
219
219
|
}
|
|
220
220
|
|
|
221
221
|
function getStringFromWasm0(ptr, len) {
|
|
222
|
-
|
|
223
|
-
return decodeText(ptr, len);
|
|
222
|
+
return decodeText(ptr >>> 0, len);
|
|
224
223
|
}
|
|
225
224
|
|
|
226
225
|
let cachedUint8ArrayMemory0 = null;
|
|
@@ -322,8 +321,9 @@ if (!('encodeInto' in cachedTextEncoder)) {
|
|
|
322
321
|
|
|
323
322
|
let WASM_VECTOR_LEN = 0;
|
|
324
323
|
|
|
325
|
-
let wasmModule, wasm;
|
|
324
|
+
let wasmModule, wasmInstance, wasm;
|
|
326
325
|
function __wbg_finalize_init(instance, module) {
|
|
326
|
+
wasmInstance = instance;
|
|
327
327
|
wasm = instance.exports;
|
|
328
328
|
wasmModule = module;
|
|
329
329
|
cachedDataViewMemory0 = null;
|
package/wasm/mdream_edge_bg.wasm
CHANGED
|
Binary file
|
package/wasm/package.json
CHANGED
|
@@ -33,7 +33,7 @@ export class MarkdownStream {
|
|
|
33
33
|
*/
|
|
34
34
|
constructor(options) {
|
|
35
35
|
const ret = wasm.markdownstream_new(addHeapObject(options));
|
|
36
|
-
this.__wbg_ptr = ret
|
|
36
|
+
this.__wbg_ptr = ret;
|
|
37
37
|
MarkdownStreamFinalization.register(this, this.__wbg_ptr, this);
|
|
38
38
|
return this;
|
|
39
39
|
}
|
|
@@ -97,26 +97,26 @@ export function htmlToMarkdownResult(html, options) {
|
|
|
97
97
|
const ret = wasm.htmlToMarkdownResult(ptr0, len0, addHeapObject(options));
|
|
98
98
|
return takeObject(ret);
|
|
99
99
|
}
|
|
100
|
-
export function
|
|
100
|
+
export function __wbg___wbindgen_boolean_get_b131b2f36d6b2f55(arg0) {
|
|
101
101
|
const v = getObject(arg0);
|
|
102
102
|
const ret = typeof(v) === 'boolean' ? v : undefined;
|
|
103
103
|
return isLikeNone(ret) ? 0xFFFFFF : ret ? 1 : 0;
|
|
104
104
|
}
|
|
105
|
-
export function
|
|
105
|
+
export function __wbg___wbindgen_is_null_ced4761460071341(arg0) {
|
|
106
106
|
const ret = getObject(arg0) === null;
|
|
107
107
|
return ret;
|
|
108
108
|
}
|
|
109
|
-
export function
|
|
109
|
+
export function __wbg___wbindgen_is_undefined_4410e3c20a99fa97(arg0) {
|
|
110
110
|
const ret = getObject(arg0) === undefined;
|
|
111
111
|
return ret;
|
|
112
112
|
}
|
|
113
|
-
export function
|
|
113
|
+
export function __wbg___wbindgen_number_get_588ed6b97f0d7e14(arg0, arg1) {
|
|
114
114
|
const obj = getObject(arg1);
|
|
115
115
|
const ret = typeof(obj) === 'number' ? obj : undefined;
|
|
116
116
|
getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true);
|
|
117
117
|
getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
|
|
118
118
|
}
|
|
119
|
-
export function
|
|
119
|
+
export function __wbg___wbindgen_string_get_fa2687d531ed17a5(arg0, arg1) {
|
|
120
120
|
const obj = getObject(arg1);
|
|
121
121
|
const ret = typeof(obj) === 'string' ? obj : undefined;
|
|
122
122
|
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2);
|
|
@@ -124,46 +124,46 @@ export function __wbg___wbindgen_string_get_7ed5322991caaec5(arg0, arg1) {
|
|
|
124
124
|
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
125
125
|
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
126
126
|
}
|
|
127
|
-
export function
|
|
127
|
+
export function __wbg___wbindgen_throw_bbadd78c1bac3a77(arg0, arg1) {
|
|
128
128
|
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
129
129
|
}
|
|
130
|
-
export function
|
|
130
|
+
export function __wbg_entries_5a6a7e7e0df09fe5(arg0) {
|
|
131
131
|
const ret = Object.entries(getObject(arg0));
|
|
132
132
|
return addHeapObject(ret);
|
|
133
133
|
}
|
|
134
|
-
export function
|
|
134
|
+
export function __wbg_from_8a57180716c586ee(arg0) {
|
|
135
135
|
const ret = Array.from(getObject(arg0));
|
|
136
136
|
return addHeapObject(ret);
|
|
137
137
|
}
|
|
138
|
-
export function
|
|
139
|
-
const ret = Reflect.get(getObject(arg0), getObject(arg1));
|
|
140
|
-
return addHeapObject(ret);
|
|
141
|
-
}, arguments); }
|
|
142
|
-
export function __wbg_get_8360291721e2339f(arg0, arg1) {
|
|
138
|
+
export function __wbg_get_4b90d6d8c5deb5d5(arg0, arg1) {
|
|
143
139
|
const ret = getObject(arg0)[arg1 >>> 0];
|
|
144
140
|
return addHeapObject(ret);
|
|
145
141
|
}
|
|
146
|
-
export function
|
|
142
|
+
export function __wbg_get_52a8a619f7b88df6() { return handleError(function (arg0, arg1) {
|
|
143
|
+
const ret = Reflect.get(getObject(arg0), getObject(arg1));
|
|
144
|
+
return addHeapObject(ret);
|
|
145
|
+
}, arguments); }
|
|
146
|
+
export function __wbg_isArray_139f48e3c057ede8(arg0) {
|
|
147
147
|
const ret = Array.isArray(getObject(arg0));
|
|
148
148
|
return ret;
|
|
149
149
|
}
|
|
150
|
-
export function
|
|
150
|
+
export function __wbg_length_fb04d16d7bdf6d4c(arg0) {
|
|
151
151
|
const ret = getObject(arg0).length;
|
|
152
152
|
return ret;
|
|
153
153
|
}
|
|
154
|
-
export function
|
|
154
|
+
export function __wbg_new_0b303268aa395a38() {
|
|
155
155
|
const ret = new Array();
|
|
156
156
|
return addHeapObject(ret);
|
|
157
157
|
}
|
|
158
|
-
export function
|
|
158
|
+
export function __wbg_new_20b778a4c5c691c3() {
|
|
159
159
|
const ret = new Object();
|
|
160
160
|
return addHeapObject(ret);
|
|
161
161
|
}
|
|
162
|
-
export function
|
|
162
|
+
export function __wbg_push_ceb8ef046afb2041(arg0, arg1) {
|
|
163
163
|
const ret = getObject(arg0).push(getObject(arg1));
|
|
164
164
|
return ret;
|
|
165
165
|
}
|
|
166
|
-
export function
|
|
166
|
+
export function __wbg_set_a6ba3ac0e634b822() { return handleError(function (arg0, arg1, arg2) {
|
|
167
167
|
const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
|
|
168
168
|
return ret;
|
|
169
169
|
}, arguments); }
|
|
@@ -181,7 +181,7 @@ export function __wbindgen_object_drop_ref(arg0) {
|
|
|
181
181
|
}
|
|
182
182
|
const MarkdownStreamFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
183
183
|
? { register: () => {}, unregister: () => {} }
|
|
184
|
-
: new FinalizationRegistry(ptr => wasm.__wbg_markdownstream_free(ptr
|
|
184
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_markdownstream_free(ptr, 1));
|
|
185
185
|
|
|
186
186
|
function addHeapObject(obj) {
|
|
187
187
|
if (heap_next === heap.length) heap.push(heap.length + 1);
|
|
@@ -207,8 +207,7 @@ function getDataViewMemory0() {
|
|
|
207
207
|
}
|
|
208
208
|
|
|
209
209
|
function getStringFromWasm0(ptr, len) {
|
|
210
|
-
|
|
211
|
-
return decodeText(ptr, len);
|
|
210
|
+
return decodeText(ptr >>> 0, len);
|
|
212
211
|
}
|
|
213
212
|
|
|
214
213
|
let cachedUint8ArrayMemory0 = null;
|
|
Binary file
|