mdream 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mdream",
3
3
  "type": "module",
4
- "version": "1.1.0",
4
+ "version": "1.1.2",
5
5
  "description": "Ultra-performant HTML to Markdown Convertor Optimized for LLMs and llm.txt artifacts.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -46,27 +46,28 @@
46
46
  "bin",
47
47
  "dist",
48
48
  "napi/",
49
+ "wasm-bundler/",
49
50
  "wasm/"
50
51
  ],
51
52
  "browser": "./dist/browser.mjs",
52
53
  "dependencies": {},
53
54
  "optionalDependencies": {
54
- "@mdream/rust-android-arm64": "1.1.0",
55
- "@mdream/rust-darwin-arm64": "1.1.0",
56
- "@mdream/rust-darwin-x64": "1.1.0",
57
- "@mdream/rust-linux-arm-gnueabihf": "1.1.0",
58
- "@mdream/rust-android-arm-eabi": "1.1.0",
59
- "@mdream/rust-linux-arm64-gnu": "1.1.0",
60
- "@mdream/rust-freebsd-x64": "1.1.0",
61
- "@mdream/rust-linux-arm64-musl": "1.1.0",
62
- "@mdream/rust-linux-x64-gnu": "1.1.0",
63
- "@mdream/rust-linux-x64-musl": "1.1.0",
64
- "@mdream/rust-wasm32-wasi": "1.1.0",
65
- "@mdream/rust-win32-x64-msvc": "1.1.0",
66
- "@mdream/rust-win32-arm64-msvc": "1.1.0"
55
+ "@mdream/rust-android-arm-eabi": "1.1.2",
56
+ "@mdream/rust-darwin-arm64": "1.1.2",
57
+ "@mdream/rust-darwin-x64": "1.1.2",
58
+ "@mdream/rust-freebsd-x64": "1.1.2",
59
+ "@mdream/rust-android-arm64": "1.1.2",
60
+ "@mdream/rust-linux-arm64-gnu": "1.1.2",
61
+ "@mdream/rust-linux-arm-gnueabihf": "1.1.2",
62
+ "@mdream/rust-linux-arm64-musl": "1.1.2",
63
+ "@mdream/rust-linux-x64-musl": "1.1.2",
64
+ "@mdream/rust-linux-x64-gnu": "1.1.2",
65
+ "@mdream/rust-wasm32-wasi": "1.1.2",
66
+ "@mdream/rust-win32-arm64-msvc": "1.1.2",
67
+ "@mdream/rust-win32-x64-msvc": "1.1.2"
67
68
  },
68
69
  "devDependencies": {
69
- "@mdream/js": "1.1.0"
70
+ "@mdream/js": "1.1.2"
70
71
  },
71
72
  "scripts": {
72
73
  "flame": "pnpm build && unbuild bench/bundle && clinic flame -- node bench/bundle/dist/string.mjs 10",
@@ -83,7 +84,7 @@
83
84
  "test:wiki-small:file": "cat test/fixtures/wikipedia-small.html | node ./bin/mdream.mjs --origin https://en.wikipedia.org | tee test/wiki-markdown.md",
84
85
  "build": "obuild",
85
86
  "build:native": "cd ../../crates/node && napi build --platform --release",
86
- "build:edge": "cd ../../crates/edge && wasm-pack build --target web --out-dir ../../packages/mdream/wasm --out-name mdream_edge && wasm-opt -Oz --enable-bulk-memory --enable-nontrapping-float-to-int --strip-producers ../../packages/mdream/wasm/mdream_edge_bg.wasm -o ../../packages/mdream/wasm/mdream_edge_bg.wasm",
87
+ "build:edge": "cd ../../crates/edge && wasm-pack build --target web --out-dir ../../packages/mdream/wasm --out-name mdream_edge && wasm-opt -Oz --enable-bulk-memory --enable-nontrapping-float-to-int --strip-producers ../../packages/mdream/wasm/mdream_edge_bg.wasm -o ../../packages/mdream/wasm/mdream_edge_bg.wasm && wasm-pack build --target bundler --out-dir ../../packages/mdream/wasm-bundler --out-name mdream_edge && wasm-opt -Oz --enable-bulk-memory --enable-nontrapping-float-to-int --strip-producers ../../packages/mdream/wasm-bundler/mdream_edge_bg.wasm -o ../../packages/mdream/wasm-bundler/mdream_edge_bg.wasm",
87
88
  "typecheck": "tsc --noEmit",
88
89
  "dev:prepare": "obuild --stub",
89
90
  "test": "vitest test",
Binary file
package/wasm/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "mdream-edge",
3
3
  "type": "module",
4
4
  "description": "WebAssembly edge runtime bindings for mdream HTML-to-Markdown converter",
5
- "version": "1.1.0",
5
+ "version": "1.1.2",
6
6
  "license": "MIT",
7
7
  "files": [
8
8
  "mdream_edge_bg.wasm",
@@ -14,4 +14,4 @@
14
14
  "sideEffects": [
15
15
  "./snippets/*"
16
16
  ]
17
- }
17
+ }
@@ -0,0 +1,14 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ export class MarkdownStream {
5
+ free(): void;
6
+ [Symbol.dispose](): void;
7
+ finish(): string;
8
+ constructor(options: any);
9
+ processChunk(chunk: string): string;
10
+ }
11
+
12
+ export function htmlToMarkdown(html: string, options: any): string;
13
+
14
+ export function htmlToMarkdownResult(html: string, options: any): any;
@@ -0,0 +1,9 @@
1
+ /* @ts-self-types="./mdream_edge.d.ts" */
2
+ import * as wasm from "./mdream_edge_bg.wasm";
3
+ import { __wbg_set_wasm } from "./mdream_edge_bg.js";
4
+
5
+ __wbg_set_wasm(wasm);
6
+
7
+ export {
8
+ MarkdownStream, htmlToMarkdown, htmlToMarkdownResult
9
+ } from "./mdream_edge_bg.js";
@@ -0,0 +1,317 @@
1
+ export class MarkdownStream {
2
+ __destroy_into_raw() {
3
+ const ptr = this.__wbg_ptr;
4
+ this.__wbg_ptr = 0;
5
+ MarkdownStreamFinalization.unregister(this);
6
+ return ptr;
7
+ }
8
+ free() {
9
+ const ptr = this.__destroy_into_raw();
10
+ wasm.__wbg_markdownstream_free(ptr, 0);
11
+ }
12
+ /**
13
+ * @returns {string}
14
+ */
15
+ finish() {
16
+ let deferred1_0;
17
+ let deferred1_1;
18
+ try {
19
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
20
+ wasm.markdownstream_finish(retptr, this.__wbg_ptr);
21
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
22
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
23
+ deferred1_0 = r0;
24
+ deferred1_1 = r1;
25
+ return getStringFromWasm0(r0, r1);
26
+ } finally {
27
+ wasm.__wbindgen_add_to_stack_pointer(16);
28
+ wasm.__wbindgen_export4(deferred1_0, deferred1_1, 1);
29
+ }
30
+ }
31
+ /**
32
+ * @param {any} options
33
+ */
34
+ constructor(options) {
35
+ const ret = wasm.markdownstream_new(addHeapObject(options));
36
+ this.__wbg_ptr = ret >>> 0;
37
+ MarkdownStreamFinalization.register(this, this.__wbg_ptr, this);
38
+ return this;
39
+ }
40
+ /**
41
+ * @param {string} chunk
42
+ * @returns {string}
43
+ */
44
+ processChunk(chunk) {
45
+ let deferred2_0;
46
+ let deferred2_1;
47
+ try {
48
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
49
+ const ptr0 = passStringToWasm0(chunk, wasm.__wbindgen_export, wasm.__wbindgen_export2);
50
+ const len0 = WASM_VECTOR_LEN;
51
+ wasm.markdownstream_processChunk(retptr, this.__wbg_ptr, ptr0, len0);
52
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
53
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
54
+ deferred2_0 = r0;
55
+ deferred2_1 = r1;
56
+ return getStringFromWasm0(r0, r1);
57
+ } finally {
58
+ wasm.__wbindgen_add_to_stack_pointer(16);
59
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
60
+ }
61
+ }
62
+ }
63
+ if (Symbol.dispose) MarkdownStream.prototype[Symbol.dispose] = MarkdownStream.prototype.free;
64
+
65
+ /**
66
+ * @param {string} html
67
+ * @param {any} options
68
+ * @returns {string}
69
+ */
70
+ export function htmlToMarkdown(html, options) {
71
+ let deferred2_0;
72
+ let deferred2_1;
73
+ try {
74
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
75
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
76
+ const len0 = WASM_VECTOR_LEN;
77
+ wasm.htmlToMarkdown(retptr, ptr0, len0, addHeapObject(options));
78
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
79
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
80
+ deferred2_0 = r0;
81
+ deferred2_1 = r1;
82
+ return getStringFromWasm0(r0, r1);
83
+ } finally {
84
+ wasm.__wbindgen_add_to_stack_pointer(16);
85
+ wasm.__wbindgen_export4(deferred2_0, deferred2_1, 1);
86
+ }
87
+ }
88
+
89
+ /**
90
+ * @param {string} html
91
+ * @param {any} options
92
+ * @returns {any}
93
+ */
94
+ export function htmlToMarkdownResult(html, options) {
95
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
96
+ const len0 = WASM_VECTOR_LEN;
97
+ const ret = wasm.htmlToMarkdownResult(ptr0, len0, addHeapObject(options));
98
+ return takeObject(ret);
99
+ }
100
+ export function __wbg___wbindgen_boolean_get_6ea149f0a8dcc5ff(arg0) {
101
+ const v = getObject(arg0);
102
+ const ret = typeof(v) === 'boolean' ? v : undefined;
103
+ return isLikeNone(ret) ? 0xFFFFFF : ret ? 1 : 0;
104
+ }
105
+ export function __wbg___wbindgen_is_null_52ff4ec04186736f(arg0) {
106
+ const ret = getObject(arg0) === null;
107
+ return ret;
108
+ }
109
+ export function __wbg___wbindgen_is_undefined_29a43b4d42920abd(arg0) {
110
+ const ret = getObject(arg0) === undefined;
111
+ return ret;
112
+ }
113
+ export function __wbg___wbindgen_number_get_c7f42aed0525c451(arg0, arg1) {
114
+ const obj = getObject(arg1);
115
+ const ret = typeof(obj) === 'number' ? obj : undefined;
116
+ getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true);
117
+ getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
118
+ }
119
+ export function __wbg___wbindgen_string_get_7ed5322991caaec5(arg0, arg1) {
120
+ const obj = getObject(arg1);
121
+ const ret = typeof(obj) === 'string' ? obj : undefined;
122
+ var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2);
123
+ var len1 = WASM_VECTOR_LEN;
124
+ getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
125
+ getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
126
+ }
127
+ export function __wbg___wbindgen_throw_6b64449b9b9ed33c(arg0, arg1) {
128
+ throw new Error(getStringFromWasm0(arg0, arg1));
129
+ }
130
+ export function __wbg_entries_e0b73aa8571ddb56(arg0) {
131
+ const ret = Object.entries(getObject(arg0));
132
+ return addHeapObject(ret);
133
+ }
134
+ export function __wbg_from_0dbf29f09e7fb200(arg0) {
135
+ const ret = Array.from(getObject(arg0));
136
+ return addHeapObject(ret);
137
+ }
138
+ export function __wbg_get_6011fa3a58f61074() { return handleError(function (arg0, arg1) {
139
+ const ret = Reflect.get(getObject(arg0), getObject(arg1));
140
+ return addHeapObject(ret);
141
+ }, arguments); }
142
+ export function __wbg_get_8360291721e2339f(arg0, arg1) {
143
+ const ret = getObject(arg0)[arg1 >>> 0];
144
+ return addHeapObject(ret);
145
+ }
146
+ export function __wbg_isArray_c3109d14ffc06469(arg0) {
147
+ const ret = Array.isArray(getObject(arg0));
148
+ return ret;
149
+ }
150
+ export function __wbg_length_3d4ecd04bd8d22f1(arg0) {
151
+ const ret = getObject(arg0).length;
152
+ return ret;
153
+ }
154
+ export function __wbg_new_682678e2f47e32bc() {
155
+ const ret = new Array();
156
+ return addHeapObject(ret);
157
+ }
158
+ export function __wbg_new_aa8d0fa9762c29bd() {
159
+ const ret = new Object();
160
+ return addHeapObject(ret);
161
+ }
162
+ export function __wbg_push_471a5b068a5295f6(arg0, arg1) {
163
+ const ret = getObject(arg0).push(getObject(arg1));
164
+ return ret;
165
+ }
166
+ export function __wbg_set_022bee52d0b05b19() { return handleError(function (arg0, arg1, arg2) {
167
+ const ret = Reflect.set(getObject(arg0), getObject(arg1), getObject(arg2));
168
+ return ret;
169
+ }, arguments); }
170
+ export function __wbindgen_cast_0000000000000001(arg0, arg1) {
171
+ // Cast intrinsic for `Ref(String) -> Externref`.
172
+ const ret = getStringFromWasm0(arg0, arg1);
173
+ return addHeapObject(ret);
174
+ }
175
+ export function __wbindgen_object_clone_ref(arg0) {
176
+ const ret = getObject(arg0);
177
+ return addHeapObject(ret);
178
+ }
179
+ export function __wbindgen_object_drop_ref(arg0) {
180
+ takeObject(arg0);
181
+ }
182
+ const MarkdownStreamFinalization = (typeof FinalizationRegistry === 'undefined')
183
+ ? { register: () => {}, unregister: () => {} }
184
+ : new FinalizationRegistry(ptr => wasm.__wbg_markdownstream_free(ptr >>> 0, 1));
185
+
186
+ function addHeapObject(obj) {
187
+ if (heap_next === heap.length) heap.push(heap.length + 1);
188
+ const idx = heap_next;
189
+ heap_next = heap[idx];
190
+
191
+ heap[idx] = obj;
192
+ return idx;
193
+ }
194
+
195
+ function dropObject(idx) {
196
+ if (idx < 1028) return;
197
+ heap[idx] = heap_next;
198
+ heap_next = idx;
199
+ }
200
+
201
+ let cachedDataViewMemory0 = null;
202
+ function getDataViewMemory0() {
203
+ if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
204
+ cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
205
+ }
206
+ return cachedDataViewMemory0;
207
+ }
208
+
209
+ function getStringFromWasm0(ptr, len) {
210
+ ptr = ptr >>> 0;
211
+ return decodeText(ptr, len);
212
+ }
213
+
214
+ let cachedUint8ArrayMemory0 = null;
215
+ function getUint8ArrayMemory0() {
216
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
217
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
218
+ }
219
+ return cachedUint8ArrayMemory0;
220
+ }
221
+
222
+ function getObject(idx) { return heap[idx]; }
223
+
224
+ function handleError(f, args) {
225
+ try {
226
+ return f.apply(this, args);
227
+ } catch (e) {
228
+ wasm.__wbindgen_export3(addHeapObject(e));
229
+ }
230
+ }
231
+
232
+ let heap = new Array(1024).fill(undefined);
233
+ heap.push(undefined, null, true, false);
234
+
235
+ let heap_next = heap.length;
236
+
237
+ function isLikeNone(x) {
238
+ return x === undefined || x === null;
239
+ }
240
+
241
+ function passStringToWasm0(arg, malloc, realloc) {
242
+ if (realloc === undefined) {
243
+ const buf = cachedTextEncoder.encode(arg);
244
+ const ptr = malloc(buf.length, 1) >>> 0;
245
+ getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
246
+ WASM_VECTOR_LEN = buf.length;
247
+ return ptr;
248
+ }
249
+
250
+ let len = arg.length;
251
+ let ptr = malloc(len, 1) >>> 0;
252
+
253
+ const mem = getUint8ArrayMemory0();
254
+
255
+ let offset = 0;
256
+
257
+ for (; offset < len; offset++) {
258
+ const code = arg.charCodeAt(offset);
259
+ if (code > 0x7F) break;
260
+ mem[ptr + offset] = code;
261
+ }
262
+ if (offset !== len) {
263
+ if (offset !== 0) {
264
+ arg = arg.slice(offset);
265
+ }
266
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
267
+ const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
268
+ const ret = cachedTextEncoder.encodeInto(arg, view);
269
+
270
+ offset += ret.written;
271
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
272
+ }
273
+
274
+ WASM_VECTOR_LEN = offset;
275
+ return ptr;
276
+ }
277
+
278
+ function takeObject(idx) {
279
+ const ret = getObject(idx);
280
+ dropObject(idx);
281
+ return ret;
282
+ }
283
+
284
+ let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
285
+ cachedTextDecoder.decode();
286
+ const MAX_SAFARI_DECODE_BYTES = 2146435072;
287
+ let numBytesDecoded = 0;
288
+ function decodeText(ptr, len) {
289
+ numBytesDecoded += len;
290
+ if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
291
+ cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
292
+ cachedTextDecoder.decode();
293
+ numBytesDecoded = len;
294
+ }
295
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
296
+ }
297
+
298
+ const cachedTextEncoder = new TextEncoder();
299
+
300
+ if (!('encodeInto' in cachedTextEncoder)) {
301
+ cachedTextEncoder.encodeInto = function (arg, view) {
302
+ const buf = cachedTextEncoder.encode(arg);
303
+ view.set(buf);
304
+ return {
305
+ read: arg.length,
306
+ written: buf.length
307
+ };
308
+ };
309
+ }
310
+
311
+ let WASM_VECTOR_LEN = 0;
312
+
313
+
314
+ let wasm;
315
+ export function __wbg_set_wasm(val) {
316
+ wasm = val;
317
+ }
Binary file
@@ -0,0 +1,14 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export const memory: WebAssembly.Memory;
4
+ export const __wbg_markdownstream_free: (a: number, b: number) => void;
5
+ export const htmlToMarkdown: (a: number, b: number, c: number, d: number) => void;
6
+ export const htmlToMarkdownResult: (a: number, b: number, c: number) => number;
7
+ export const markdownstream_finish: (a: number, b: number) => void;
8
+ export const markdownstream_new: (a: number) => number;
9
+ export const markdownstream_processChunk: (a: number, b: number, c: number, d: number) => void;
10
+ export const __wbindgen_export: (a: number, b: number) => number;
11
+ export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
12
+ export const __wbindgen_export3: (a: number) => void;
13
+ export const __wbindgen_add_to_stack_pointer: (a: number) => number;
14
+ export const __wbindgen_export4: (a: number, b: number, c: number) => void;
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "mdream-edge",
3
+ "type": "module",
4
+ "description": "WebAssembly edge runtime bindings for mdream HTML-to-Markdown converter",
5
+ "version": "1.1.2",
6
+ "license": "MIT",
7
+ "files": [
8
+ "mdream_edge_bg.wasm",
9
+ "mdream_edge.js",
10
+ "mdream_edge_bg.js",
11
+ "mdream_edge.d.ts"
12
+ ],
13
+ "main": "mdream_edge.js",
14
+ "types": "mdream_edge.d.ts",
15
+ "sideEffects": [
16
+ "./mdream_edge.js",
17
+ "./snippets/*"
18
+ ]
19
+ }