@oh-my-pi/pi-natives 8.12.10 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-natives",
3
- "version": "8.12.10",
3
+ "version": "9.0.0",
4
4
  "description": "Native Rust functionality compiled to WebAssembly via wasm-bindgen",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -30,7 +30,7 @@
30
30
  "directory": "packages/natives"
31
31
  },
32
32
  "dependencies": {
33
- "@oh-my-pi/pi-utils": "8.12.10"
33
+ "@oh-my-pi/pi-utils": "9.0.0"
34
34
  },
35
35
  "devDependencies": {
36
36
  "@types/node": "^25.0.10"
@@ -0,0 +1,47 @@
1
+ /**
2
+ * HTML to Markdown conversion powered by WASM.
3
+ *
4
+ * Conversion happens in a worker thread to avoid blocking the main thread.
5
+ */
6
+
7
+ import { type RequestOptions, WorkerPool } from "../pool";
8
+ import type { HtmlRequest, HtmlResponse, HtmlToMarkdownOptions } from "./types";
9
+
10
+ export type { HtmlToMarkdownOptions } from "./types";
11
+
12
+ const pool = new WorkerPool<HtmlRequest, HtmlResponse>({
13
+ workerUrl: new URL("./worker.ts", import.meta.url).href,
14
+ maxWorkers: 2,
15
+ idleTimeoutMs: 30_000,
16
+ });
17
+
18
+ /**
19
+ * Convert HTML to Markdown.
20
+ *
21
+ * @param html - HTML content to convert
22
+ * @param options - Conversion options
23
+ * @returns Markdown text
24
+ */
25
+ export async function htmlToMarkdown(
26
+ html: string,
27
+ options?: HtmlToMarkdownOptions,
28
+ req?: RequestOptions,
29
+ ): Promise<string> {
30
+ const response = await pool.request<Extract<HtmlResponse, { type: "converted" }>>(
31
+ {
32
+ type: "convert",
33
+ html,
34
+ options,
35
+ },
36
+ req,
37
+ );
38
+ return response.markdown;
39
+ }
40
+
41
+ /**
42
+ * Terminate the HTML worker pool.
43
+ * Call this when shutting down to clean up resources.
44
+ */
45
+ export function terminate(): void {
46
+ pool.terminate();
47
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Types for HTML to Markdown worker communication.
3
+ */
4
+
5
+ export interface HtmlToMarkdownOptions {
6
+ /** Remove navigation elements, forms, headers, footers */
7
+ cleanContent?: boolean;
8
+ /** Skip images during conversion */
9
+ skipImages?: boolean;
10
+ }
11
+
12
+ export type HtmlRequest =
13
+ | { type: "init"; id: number }
14
+ | { type: "destroy" }
15
+ | {
16
+ type: "convert";
17
+ id: number;
18
+ html: string;
19
+ options?: HtmlToMarkdownOptions;
20
+ };
21
+
22
+ export type HtmlResponse =
23
+ | { type: "ready"; id: number }
24
+ | { type: "error"; id: number; error: string }
25
+ | { type: "converted"; id: number; markdown: string };
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Worker for HTML to Markdown conversion.
3
+ * Uses WASM for actual conversion.
4
+ */
5
+
6
+ import { html_to_markdown } from "../../wasm/pi_natives";
7
+ import type { HtmlRequest, HtmlResponse } from "./types";
8
+
9
+ declare const self: Worker;
10
+
11
+ function respond(msg: HtmlResponse): void {
12
+ self.postMessage(msg);
13
+ }
14
+
15
+ self.addEventListener("message", (e: MessageEvent<HtmlRequest>) => {
16
+ const msg = e.data;
17
+
18
+ switch (msg.type) {
19
+ case "init":
20
+ respond({ type: "ready", id: msg.id });
21
+ break;
22
+
23
+ case "destroy":
24
+ break;
25
+
26
+ case "convert": {
27
+ try {
28
+ const markdown = html_to_markdown(msg.html, msg.options);
29
+ respond({ type: "converted", id: msg.id, markdown });
30
+ } catch (err) {
31
+ respond({
32
+ type: "error",
33
+ id: msg.id,
34
+ error: err instanceof Error ? err.message : String(err),
35
+ });
36
+ }
37
+ break;
38
+ }
39
+ }
40
+ });
@@ -49,9 +49,12 @@ export class PhotonImage {
49
49
  * The bytes are transferred to the worker (zero-copy).
50
50
  */
51
51
  static async new_from_byteslice(bytes: Uint8Array): Promise<PhotonImage> {
52
- const response = await pool.request<Extract<ImageResponse, { type: "loaded" }>>({ type: "load", bytes }, [
53
- bytes.buffer,
54
- ]);
52
+ const response = await pool.request<Extract<ImageResponse, { type: "loaded" }>>(
53
+ { type: "load", bytes },
54
+ {
55
+ transfer: [bytes.buffer],
56
+ },
57
+ );
55
58
  return new PhotonImage(response.handle, response.width, response.height);
56
59
  }
57
60
 
package/src/index.ts CHANGED
@@ -157,8 +157,18 @@ export {
157
157
  supportsLanguage,
158
158
  } from "./highlight/index";
159
159
 
160
+ // =============================================================================
161
+ // HTML to Markdown
162
+ // =============================================================================
163
+
164
+ export {
165
+ type HtmlToMarkdownOptions,
166
+ htmlToMarkdown,
167
+ terminate as terminateHtmlWorker,
168
+ } from "./html/index";
169
+
160
170
  // =============================================================================
161
171
  // Worker Pool (shared infrastructure)
162
172
  // =============================================================================
163
173
 
164
- export { type BaseRequest, type BaseResponse, WorkerPool, type WorkerPoolOptions } from "./pool";
174
+ export { type BaseRequest, type BaseResponse, type RequestOptions, WorkerPool, type WorkerPoolOptions } from "./pool";
package/src/pool.ts CHANGED
@@ -27,6 +27,17 @@ export interface WorkerPoolOptions {
27
27
  idleTimeoutMs?: number;
28
28
  /** Timeout for worker initialization in ms (default: 10000). */
29
29
  initTimeoutMs?: number;
30
+ /** Grace period after request timeout before force-terminating stuck workers (default: 5000). */
31
+ stuckGracePeriodMs?: number;
32
+ }
33
+
34
+ export interface RequestOptions {
35
+ /** Timeout for this request in ms. After this, the promise rejects but worker gets a grace period. */
36
+ timeoutMs?: number;
37
+ /** Abort signal for this request. */
38
+ signal?: AbortSignal;
39
+ /** Transfer list for postMessage. */
40
+ transfer?: ArrayBufferLike[];
30
41
  }
31
42
 
32
43
  interface PooledWorker {
@@ -39,7 +50,8 @@ interface PooledWorker {
39
50
  interface PendingRequest<T> {
40
51
  resolve: (result: T) => void;
41
52
  reject: (error: Error) => void;
42
- timeout?: ReturnType<typeof setTimeout>;
53
+ worker?: PooledWorker;
54
+ dispose?: () => void;
43
55
  }
44
56
 
45
57
  /**
@@ -62,26 +74,51 @@ export class WorkerPool<TReq extends BaseRequest, TRes extends BaseResponse> {
62
74
  maxWorkers: options.maxWorkers ?? 4,
63
75
  idleTimeoutMs: options.idleTimeoutMs ?? 30_000,
64
76
  initTimeoutMs: options.initTimeoutMs ?? 10_000,
77
+ stuckGracePeriodMs: options.stuckGracePeriodMs ?? 5_000,
65
78
  };
66
79
  }
67
80
 
68
81
  /**
69
82
  * Send a request to a worker and wait for the response.
70
83
  * Workers are acquired from the pool (or created if under limit).
84
+ *
85
+ * @param msg - Request message
86
+ * @param options - Request options (timeout, transfer)
71
87
  */
72
88
  async request<T extends TRes = TRes>(
73
89
  msg: TReq | (Omit<TReq, "id"> & { id?: number }),
74
- transfer?: ArrayBufferLike[],
90
+ options?: RequestOptions,
75
91
  ): Promise<T> {
92
+ const { timeoutMs, signal, transfer } = options ?? {};
93
+ signal?.throwIfAborted();
94
+
76
95
  const worker = await this.#acquireWorker();
77
96
  const id = msg.id ?? this.#nextRequestId++;
78
97
  const fullMsg = { ...msg, id } as TReq;
79
98
 
80
99
  const { promise, resolve, reject } = Promise.withResolvers<T>();
81
- this.#pending.set(id, {
100
+ const pending: PendingRequest<T> = {
82
101
  resolve: resolve as (result: TRes) => void,
83
102
  reject,
84
- });
103
+ worker,
104
+ };
105
+ this.#pending.set(id, pending as PendingRequest<TRes>);
106
+
107
+ const onAbort = () => {
108
+ this.#handleRequestAbort(id, worker);
109
+ };
110
+
111
+ if (timeoutMs && timeoutMs > 0 && signal) {
112
+ const combined = AbortSignal.any([signal, AbortSignal.timeout(timeoutMs)]);
113
+ combined.addEventListener("abort", onAbort, { once: true });
114
+ pending.dispose = () => combined.removeEventListener("abort", onAbort);
115
+ } else if (timeoutMs && timeoutMs > 0) {
116
+ const timer = setTimeout(onAbort, timeoutMs);
117
+ pending.dispose = () => clearTimeout(timer);
118
+ } else if (signal) {
119
+ signal.addEventListener("abort", onAbort, { once: true });
120
+ pending.dispose = () => signal.removeEventListener("abort", onAbort);
121
+ }
85
122
 
86
123
  worker.currentRequestId = id;
87
124
  if (transfer) {
@@ -109,7 +146,7 @@ export class WorkerPool<TReq extends BaseRequest, TRes extends BaseResponse> {
109
146
 
110
147
  for (const pending of this.#pending.values()) {
111
148
  pending.reject(new Error("Worker pool terminated"));
112
- if (pending.timeout) clearTimeout(pending.timeout);
149
+ void pending.dispose?.();
113
150
  }
114
151
  this.#pending.clear();
115
152
  }
@@ -144,7 +181,7 @@ export class WorkerPool<TReq extends BaseRequest, TRes extends BaseResponse> {
144
181
  if (!pending) return;
145
182
 
146
183
  this.#pending.delete(msg.id);
147
- if (pending.timeout) clearTimeout(pending.timeout);
184
+ void pending.dispose?.();
148
185
 
149
186
  if (msg.type === "error" && "error" in msg) {
150
187
  pending.reject(new Error(msg.error ?? "Unknown error"));
@@ -163,11 +200,58 @@ export class WorkerPool<TReq extends BaseRequest, TRes extends BaseResponse> {
163
200
  const pending = this.#pending.get(id);
164
201
  if (pending) {
165
202
  this.#pending.delete(id);
166
- if (pending.timeout) clearTimeout(pending.timeout);
203
+ void pending.dispose?.();
167
204
  pending.reject(error);
168
205
  }
169
206
  }
170
207
 
208
+ #handleRequestAbort(id: number, worker: PooledWorker): void {
209
+ const pending = this.#pending.get(id);
210
+ if (!pending) return;
211
+
212
+ pending.dispose = undefined;
213
+ pending.reject(new Error("Request timeout"));
214
+
215
+ if (this.#options.stuckGracePeriodMs > 0) {
216
+ const timer = setTimeout(() => {
217
+ this.#terminateStuckWorker(id, worker);
218
+ }, this.#options.stuckGracePeriodMs);
219
+
220
+ pending.dispose = () => {
221
+ clearTimeout(timer);
222
+ };
223
+ }
224
+ }
225
+
226
+ #terminateStuckWorker(id: number, worker: PooledWorker): void {
227
+ const pending = this.#pending.get(id);
228
+ if (pending) {
229
+ this.#pending.delete(id);
230
+ void pending.dispose?.();
231
+ }
232
+
233
+ if (worker.currentRequestId !== id) return;
234
+ if (!this.#pool.includes(worker)) return;
235
+
236
+ this.#removeWorker(worker);
237
+
238
+ if (this.#pool.length === 0 && this.#waiters.length > 0) {
239
+ this.#replenishPool();
240
+ }
241
+ }
242
+
243
+ async #replenishPool(): Promise<void> {
244
+ const worker = this.#createWorker();
245
+ worker.busy = true;
246
+ this.#pool.push(worker);
247
+ try {
248
+ await this.#initializeWorker(worker);
249
+ this.#releaseWorker(worker);
250
+ } catch {
251
+ this.#removeWorker(worker);
252
+ }
253
+ }
254
+
171
255
  #removeWorker(pooledWorker: PooledWorker): void {
172
256
  const idx = this.#pool.indexOf(pooledWorker);
173
257
  if (idx !== -1) {
@@ -227,7 +311,7 @@ export class WorkerPool<TReq extends BaseRequest, TRes extends BaseResponse> {
227
311
  this.#pending.set(id, {
228
312
  resolve: () => resolve(),
229
313
  reject,
230
- timeout,
314
+ dispose: () => clearTimeout(timeout),
231
315
  } as PendingRequest<TRes>);
232
316
 
233
317
  pooledWorker.worker.postMessage({ type: "init", id } satisfies BaseRequest);
@@ -101,6 +101,11 @@ export function has_match(content: string, pattern: string, ignore_case: boolean
101
101
  */
102
102
  export function highlight_code(code: string, lang: string | null | undefined, colors: any): string;
103
103
 
104
+ /**
105
+ * Convert HTML to Markdown.
106
+ */
107
+ export function html_to_markdown(html: string, options: any): string;
108
+
104
109
  /**
105
110
  * Resize an image to the specified dimensions.
106
111
  */
@@ -309,6 +309,39 @@ export function highlight_code(code, lang, colors) {
309
309
  }
310
310
  }
311
311
 
312
+ /**
313
+ * Convert HTML to Markdown.
314
+ * @param {string} html
315
+ * @param {any} options
316
+ * @returns {string}
317
+ */
318
+ export function html_to_markdown(html, options) {
319
+ let deferred3_0;
320
+ let deferred3_1;
321
+ try {
322
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
323
+ const ptr0 = passStringToWasm0(html, wasm.__wbindgen_export, wasm.__wbindgen_export2);
324
+ const len0 = WASM_VECTOR_LEN;
325
+ wasm.html_to_markdown(retptr, ptr0, len0, addHeapObject(options));
326
+ var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true);
327
+ var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true);
328
+ var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true);
329
+ var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true);
330
+ var ptr2 = r0;
331
+ var len2 = r1;
332
+ if (r3) {
333
+ ptr2 = 0; len2 = 0;
334
+ throw takeObject(r2);
335
+ }
336
+ deferred3_0 = ptr2;
337
+ deferred3_1 = len2;
338
+ return getStringFromWasm0(ptr2, len2);
339
+ } finally {
340
+ wasm.__wbindgen_add_to_stack_pointer(16);
341
+ wasm.__wbindgen_export3(deferred3_0, deferred3_1, 1);
342
+ }
343
+ }
344
+
312
345
  /**
313
346
  * Resize an image to the specified dimensions.
314
347
  * @param {PhotonImage} image
@@ -451,6 +484,10 @@ function __wbg_get_imports() {
451
484
  const ret = typeof(getObject(arg0)) === 'bigint';
452
485
  return ret;
453
486
  },
487
+ __wbg___wbindgen_is_null_ac34f5003991759a: function(arg0) {
488
+ const ret = getObject(arg0) === null;
489
+ return ret;
490
+ },
454
491
  __wbg___wbindgen_is_object_5ae8e5880f2c1fbd: function(arg0) {
455
492
  const val = getObject(arg0);
456
493
  const ret = typeof(val) === 'object' && val !== null;
Binary file
@@ -10,6 +10,7 @@ export const extract_segments: (a: number, b: number, c: number, d: number, e: n
10
10
  export const get_supported_languages: (a: number) => void;
11
11
  export const has_match: (a: number, b: number, c: number, d: number, e: number, f: number, g: number) => void;
12
12
  export const highlight_code: (a: number, b: number, c: number, d: number, e: number, f: number) => void;
13
+ export const html_to_markdown: (a: number, b: number, c: number, d: number) => void;
13
14
  export const photonimage_get_bytes: (a: number, b: number) => void;
14
15
  export const photonimage_get_bytes_jpeg: (a: number, b: number, c: number) => void;
15
16
  export const photonimage_get_height: (a: number) => number;