npm - @p0u4a/inference-worker - Versions diffs - 0.0.1 - Mend

@p0u4a/inference-worker 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 P0u4a
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,152 @@
+# Inference Worker
+A generic, extensible Web Worker for running HuggingFace Transformers inference in the browser. Works with any pretrained model, supports WebGPU acceleration, model load progress reporting, cancellation, and optional tool call parsing.
+## Installation
+```
+npm install @p0u4a/inference-worker
+```
+## Quickstart
+```ts
+import { InferenceClient } from "@p0u4a/inference-worker";
+const client = new InferenceClient({
+  worker: new URL("@p0u4a/inference-worker/worker", import.meta.url),
+  config: {
+    modelId: "Qwen/Qwen2.5-0.5B-Instruct",
+    dtype: "q4",
+    device: "webgpu",
+  },
+  onProgress: (p) => {
+    if (p.status === "progress") {
+      console.log(`Loading ${p.file}: ${Math.round(p.progress)}%`);
+    }
+  },
+});
+await client.init();
+const result = await client.execute({
+  prompt: "Explain quantum computing in one sentence.",
+  generationParams: { maxNewTokens: 64 },
+});
+console.log(result.rawOutput);
+client.dispose();
+```
+## With tool call parsing
+The `toolCallParser` runs on the main thread after the worker returns raw output. Define any parsing logic you need:
+```ts
+interface ToolCall {
+  name: string;
+  arguments: Record<string, unknown>;
+}
+const client = new InferenceClient<ToolCall[]>({
+  worker: new URL("@p0u4a/inference-worker/worker", import.meta.url),
+  config: {
+    modelId: "Qwen/Qwen2.5-0.5B-Instruct",
+    dtype: "q4",
+    device: "webgpu",
+  },
+  toolCallParser: (raw) => {
+    // Implement your own parsing logic based on the model's output format
+    const match = raw.match(/\{.*\}/s);
+    return match ? [JSON.parse(match[0])] : [];
+  },
+});
+await client.init();
+const result = await client.execute({
+  prompt: "What is the weather in London?",
+  systemPrompt: "You can call functions to get information.",
+  tools: [{ type: "function", function: { name: "get_weather", parameters: { location: { type: "string" } } } }],
+  generationParams: { maxNewTokens: 128 },
+});
+// result.toolCalls is typed as ToolCall[]
+console.log(result.toolCalls);
+```
+## Cancellation
+```ts
+const resultPromise = client.execute({ prompt: "Write a long essay..." });
+// Cancel mid-inference
+client.cancel();
+try {
+  await resultPromise;
+} catch (e) {
+  if (e instanceof DOMException && e.name === "AbortError") {
+    console.log("Cancelled");
+  }
+}
+```
+## Resource cleanup
+`InferenceClient` implements `Disposable`, so you can use `using` for automatic cleanup:
+```ts
+using client = new InferenceClient({ ... });
+await client.init();
+const result = await client.execute({ prompt: "Hello" });
+// Worker is terminated when `client` goes out of scope
+```
+Or call `dispose()` manually when done.
+## API
+### `InferenceClient<TToolCalls>`
+| Method | Description |
+| --- | --- |
+| `constructor(options)` | Create a client. Accepts a `Worker`, `URL`, or string path for the worker. |
+| `init()` | Load the model. Resolves when ready, rejects on error. |
+| `execute(options)` | Run inference. Returns `{ rawOutput, toolCalls }`. |
+| `cancel()` | Abort the current inference. |
+| `dispose()` | Terminate the worker and release resources. |
+| `status` | Current worker status: `"idle"` | `"loading"` | `"ready"` | `"error"`. |
+### `WorkerInitConfig`
+| Field | Type | Default | Description |
+| --- | --- | --- | --- |
+| `modelId` | `string` | required | HuggingFace model ID or path |
+| `modelClass` | `string` | `"AutoModelForCausalLM"` | Auto model class name |
+| `tokenizerClass` | `string` | `"AutoTokenizer"` | Tokenizer class name |
+| `dtype` | `string` | - | Quantization type (`"q4"`, `"fp16"`, `"q8"`) |
+| `device` | `string` | - | Compute device (`"webgpu"`, `"wasm"`, `"cpu"`) |
+| `maxRetryAttempts` | `number` | `3` | Retry count for network errors |
+| `baseRetryDelayMs` | `number` | `1000` | Base delay for exponential backoff |
+### `ExecuteOptions`
+| Field | Type | Description |
+| --- | --- | --- |
+| `prompt` | `string` | The user prompt (appended as the final message) |
+| `messages` | `ChatMessage[]` | Conversation history |
+| `systemPrompt` | `string` | System instruction |
+| `tools` | `unknown[]` | Tool schemas for the chat template |
+| `generationParams` | `GenerationParams` | Generation config (`maxNewTokens`, `temperature`, etc.) |
+### Callbacks
+Pass these in `InferenceClientOptions`:
+- `onStatus(status, error?)` - Status changes during init/execution
+- `onProgress(progress)` - Model download progress (file name, bytes loaded/total)
+- `onError(error)` - Error messages from the worker
+- `onRetry(attempt, maxAttempts)` - Retry attempts on network failure

package/dist/client.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import type { ExecuteOptions, InferenceClientOptions, InferenceResult, WorkerStatus } from "./types.ts";
+export declare class InferenceClient<TToolCalls = undefined> implements Disposable {
+    private readonly worker;
+    private readonly ownsWorker;
+    private readonly toolCallParser?;
+    private readonly onStatusCb?;
+    private readonly onProgressCb?;
+    private readonly onErrorCb?;
+    private readonly onRetryCb?;
+    private readonly workerConfig;
+    private currentStatus;
+    private disposed;
+    constructor(options: InferenceClientOptions<TToolCalls>);
+    get status(): WorkerStatus;
+    init(): Promise<void>;
+    execute(options: ExecuteOptions): Promise<InferenceResult<TToolCalls>>;
+    cancel(): void;
+    dispose(): void;
+    [Symbol.dispose](): void;
+    private postWorkerMessage;
+    private assertNotDisposed;
+}
+//# sourceMappingURL=client.d.ts.map

package/dist/client.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EACd,sBAAsB,EACtB,eAAe,EAGf,YAAY,EACb,MAAM,YAAY,CAAC;AAEpB,qBAAa,eAAe,CAAC,UAAU,GAAG,SAAS,CAAE,YAAW,UAAU;IACxE,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAU;IACrC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAoC;IACpE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAiD;IAC7E,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAmD;IACjF,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAgD;IAC3E,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAgD;IAC3E,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA+C;IAE5E,OAAO,CAAC,aAAa,CAAwB;IAC7C,OAAO,CAAC,QAAQ,CAAS;gBAEb,OAAO,EAAE,sBAAsB,CAAC,UAAU,CAAC;IAqBvD,IAAI,MAAM,IAAI,YAAY,CAEzB;IAEK,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAwCrB,OAAO,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IA+D5E,MAAM,IAAI,IAAI;IAKd,OAAO,IAAI,IAAI;IAQf,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI;IAIxB,OAAO,CAAC,iBAAiB;IAIzB,OAAO,CAAC,iBAAiB;CAK1B"}

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export type { MessageRole, ChatMessage, WorkerInitConfig, GenerationParams, WorkerMessageIn, WorkerMessageOut, WorkerStatus, InferenceResult, StatusCallback, ProgressCallback, ErrorCallback, RetryCallback, InferenceClientOptions, ExecuteOptions, ProgressInfo, } from "./types.ts";
+export { InferenceClient } from "./client.ts";
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,WAAW,EACX,WAAW,EACX,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,gBAAgB,EAChB,YAAY,EACZ,eAAe,EACf,cAAc,EACd,gBAAgB,EAChB,aAAa,EACb,aAAa,EACb,sBAAsB,EACtB,cAAc,EACd,YAAY,GACb,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC"}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,167 @@
+var __create = Object.create;
+var __getProtoOf = Object.getPrototypeOf;
+var __defProp = Object.defineProperty;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __toESM = (mod, isNodeMode, target) => {
+  target = mod != null ? __create(__getProtoOf(mod)) : {};
+  const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
+  for (let key of __getOwnPropNames(mod))
+    if (!__hasOwnProp.call(to, key))
+      __defProp(to, key, {
+        get: () => mod[key],
+        enumerable: true
+      });
+  return to;
+};
+var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
+  get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
+}) : x)(function(x) {
+  if (typeof require !== "undefined")
+    return require.apply(this, arguments);
+  throw Error('Dynamic require of "' + x + '" is not supported');
+});
+// src/client.ts
+class InferenceClient {
+  worker;
+  ownsWorker;
+  toolCallParser;
+  onStatusCb;
+  onProgressCb;
+  onErrorCb;
+  onRetryCb;
+  workerConfig;
+  currentStatus = "idle";
+  disposed = false;
+  constructor(options) {
+    this.workerConfig = options.config;
+    this.toolCallParser = options.toolCallParser;
+    this.onStatusCb = options.onStatus;
+    this.onProgressCb = options.onProgress;
+    this.onErrorCb = options.onError;
+    this.onRetryCb = options.onRetry;
+    if (options.worker instanceof Worker) {
+      this.worker = options.worker;
+      this.ownsWorker = false;
+    } else {
+      const url = typeof options.worker === "string" ? new URL(options.worker) : options.worker;
+      this.worker = new Worker(url, { type: "module" });
+      this.ownsWorker = true;
+    }
+  }
+  get status() {
+    return this.currentStatus;
+  }
+  async init() {
+    this.assertNotDisposed();
+    const { promise, resolve, reject } = Promise.withResolvers();
+    const handler = (event) => {
+      const msg = event.data;
+      switch (msg.type) {
+        case "status":
+          this.currentStatus = msg.status;
+          this.onStatusCb?.(msg.status, msg.error);
+          if (msg.status === "ready") {
+            this.worker.removeEventListener("message", handler);
+            resolve();
+          } else if (msg.status === "error") {
+            this.worker.removeEventListener("message", handler);
+            reject(new Error(msg.error ?? "Model loading failed"));
+          }
+          break;
+        case "progress":
+          this.onProgressCb?.(msg.progress);
+          break;
+        case "retry":
+          this.onRetryCb?.(msg.attempt, msg.maxAttempts);
+          break;
+        case "error":
+          this.onErrorCb?.(msg.error);
+          this.worker.removeEventListener("message", handler);
+          reject(new Error(msg.error));
+          break;
+      }
+    };
+    this.worker.addEventListener("message", handler);
+    this.postWorkerMessage({ type: "init", config: this.workerConfig });
+    return promise;
+  }
+  async execute(options) {
+    this.assertNotDisposed();
+    const { promise, resolve, reject } = Promise.withResolvers();
+    const handler = (event) => {
+      const msg = event.data;
+      switch (msg.type) {
+        case "result": {
+          this.worker.removeEventListener("message", handler);
+          try {
+            const toolCalls = this.toolCallParser ? this.toolCallParser(msg.rawOutput) : undefined;
+            resolve({
+              rawOutput: msg.rawOutput,
+              toolCalls
+            });
+          } catch (e) {
+            reject(e);
+          }
+          break;
+        }
+        case "error":
+          this.onErrorCb?.(msg.error);
+          this.worker.removeEventListener("message", handler);
+          reject(new Error(msg.error));
+          break;
+        case "cancelled":
+          this.worker.removeEventListener("message", handler);
+          reject(new DOMException("Inference cancelled", "AbortError"));
+          break;
+        case "status":
+          this.currentStatus = msg.status;
+          this.onStatusCb?.(msg.status, msg.error);
+          break;
+        case "progress":
+          this.onProgressCb?.(msg.progress);
+          break;
+        case "retry":
+          this.onRetryCb?.(msg.attempt, msg.maxAttempts);
+          break;
+      }
+    };
+    this.worker.addEventListener("message", handler);
+    this.postWorkerMessage({
+      type: "execute",
+      prompt: options.prompt,
+      messages: options.messages,
+      systemPrompt: options.systemPrompt,
+      tools: options.tools,
+      generationParams: options.generationParams
+    });
+    return promise;
+  }
+  cancel() {
+    this.assertNotDisposed();
+    this.postWorkerMessage({ type: "cancel" });
+  }
+  dispose() {
+    if (this.disposed)
+      return;
+    this.disposed = true;
+    if (this.ownsWorker) {
+      this.worker.terminate();
+    }
+  }
+  [Symbol.dispose]() {
+    this.dispose();
+  }
+  postWorkerMessage(msg) {
+    this.worker.postMessage(msg);
+  }
+  assertNotDisposed() {
+    if (this.disposed) {
+      throw new Error("InferenceClient has been disposed");
+    }
+  }
+}
+export {
+  InferenceClient
+};

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,100 @@
+import type { ProgressInfo } from "@huggingface/transformers";
+export type MessageRole = "system" | "assistant" | "user";
+export interface ChatMessage {
+    role: MessageRole;
+    content: string;
+}
+export interface WorkerInitConfig {
+    /** HuggingFace model ID or local path */
+    modelId: string;
+    /**
+     * Auto* model class name from `@huggingface/transformers`.
+     * @default "AutoModelForCausalLM"
+     */
+    modelClass?: string;
+    /**
+     * Tokenizer class name from `@huggingface/transformers`.
+     * @default "AutoTokenizer"
+     */
+    tokenizerClass?: string;
+    /** Model quantization dtype (e.g. "q4", "fp16", "q8"). */
+    dtype?: string;
+    /** Device to run on (e.g. "webgpu", "wasm", "cpu"). */
+    device?: string;
+    /** Max retry attempts for network errors during model load. @default 3 */
+    maxRetryAttempts?: number;
+    /** Base delay in ms for exponential backoff. @default 1000 */
+    baseRetryDelayMs?: number;
+}
+export interface GenerationParams {
+    maxNewTokens?: number;
+    doSample?: boolean;
+    temperature?: number;
+    topK?: number;
+    topP?: number;
+    repetitionPenalty?: number;
+    [key: string]: unknown;
+}
+export type WorkerMessageIn = {
+    type: "init";
+    config: WorkerInitConfig;
+} | {
+    type: "execute";
+    prompt: string;
+    messages?: ChatMessage[];
+    systemPrompt?: string;
+    tools?: unknown[];
+    generationParams?: GenerationParams;
+} | {
+    type: "cancel";
+};
+export type WorkerStatus = "idle" | "loading" | "ready" | "error";
+export type WorkerMessageOut = {
+    type: "status";
+    status: WorkerStatus;
+    error?: string;
+} | {
+    type: "progress";
+    progress: ProgressInfo;
+} | {
+    type: "result";
+    rawOutput: string;
+} | {
+    type: "error";
+    error: string;
+} | {
+    type: "cancelled";
+} | {
+    type: "retry";
+    attempt: number;
+    maxAttempts: number;
+};
+export interface InferenceResult<TToolCalls = undefined> {
+    rawOutput: string;
+    toolCalls: TToolCalls;
+}
+export type StatusCallback = (status: WorkerStatus, error?: string) => void;
+export type ProgressCallback = (progress: ProgressInfo) => void;
+export type ErrorCallback = (error: string) => void;
+export type RetryCallback = (attempt: number, maxAttempts: number) => void;
+export interface InferenceClientOptions<TToolCalls = undefined> {
+    /** A Worker instance, URL to worker script, or string path */
+    worker: Worker | URL | string;
+    /** Model & worker initialization config */
+    config: WorkerInitConfig;
+    /** Optional parser applied on main thread to extract tool calls from raw output */
+    toolCallParser?: (rawOutput: string) => TToolCalls;
+    onStatus?: StatusCallback;
+    onProgress?: ProgressCallback;
+    onError?: ErrorCallback;
+    onRetry?: RetryCallback;
+}
+export interface ExecuteOptions {
+    prompt: string;
+    messages?: ChatMessage[];
+    systemPrompt?: string;
+    tools?: unknown[];
+    generationParams?: GenerationParams;
+}
+export { type ProgressInfo } from "@huggingface/transformers";
+//# sourceMappingURL=types.d.ts.map

package/dist/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAE9D,MAAM,MAAM,WAAW,GAAG,QAAQ,GAAG,WAAW,GAAG,MAAM,CAAC;AAE1D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAEhB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB,0DAA0D;IAC1D,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf,uDAAuD;IACvD,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,0EAA0E;IAC1E,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B,8DAA8D;IAC9D,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,gBAAgB;IAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,MAAM,eAAe,GACvB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,gBAAgB,CAAA;CAAE,GAC1C;IACE,IAAI,EAAE,SAAS,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;IAClB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC,GACD;IAAE,IAAI,EAAE,QAAQ,CAAA;CAAE,CAAC;AAEvB,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,OAAO,CAAC;AAElE,MAAM,MAAM,gBAAgB,GACxB;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,YAAY,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACxD;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,YAAY,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAChC;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,GACrB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,CAAC;AAE5D,MAAM,WAAW,eAAe,CAAC,UAAU,GAAG,SAAS;IACrD,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,UAAU,CAAC;CACvB;AAED,MAAM,MAAM,cAAc,GAAG,CAAC,MAAM,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;AAC5E,MAAM,MAAM,gBAAgB,GAAG,CAAC,QAAQ,EAAE,YAAY,KAAK,IAAI,CAAC;AAChE,MAAM,MAAM,aAAa,GAAG,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AACpD,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;AAE3E,MAAM,WAAW,sBAAsB,CAAC,UAAU,GAAG,SAAS;IAC5D,8DAA8D;IAC9D,MAAM,EAAE,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC;IAE9B,2CAA2C;IAC3C,MAAM,EAAE,gBAAgB,CAAC;IAEzB,mFAAmF;IACnF,cAAc,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,UAAU,CAAC;IAEnD,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,WAAW,EAAE,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;IAClB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED,OAAO,EAAE,KAAK,YAAY,EAAE,MAAM,2BAA2B,CAAC"}

package/dist/utils.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export declare function isNetworkError(e: unknown): boolean;
+export declare function sleep(ms: number): Promise<void>;
+export declare function toSnakeCase(str: string): string;
+//# sourceMappingURL=utils.d.ts.map

package/dist/utils.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,wBAAgB,cAAc,CAAC,CAAC,EAAE,OAAO,GAAG,OAAO,CAiBlD;AAED,wBAAgB,KAAK,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAE/C;AAED,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE/C"}

package/dist/worker.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { ChatMessage, GenerationParams } from "./types.ts";
+export declare function convertGenerationParams(params: GenerationParams | undefined): Record<string, unknown>;
+export declare function buildConversation(prompt: string, messages?: ChatMessage[], systemPrompt?: string): Array<{
+    role: string;
+    content: string;
+}>;
+//# sourceMappingURL=worker.d.ts.map

package/dist/worker.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"worker.d.ts","sourceRoot":"","sources":["../src/worker.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,WAAW,EACX,gBAAgB,EAIjB,MAAM,YAAY,CAAC;AASpB,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,gBAAgB,GAAG,SAAS,GACnC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CASzB;AAED,wBAAgB,iBAAiB,CAC/B,MAAM,EAAE,MAAM,EACd,QAAQ,CAAC,EAAE,WAAW,EAAE,EACxB,YAAY,CAAC,EAAE,MAAM,GACpB,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAgB1C"}

package/dist/worker.js ADDED Viewed

@@ -0,0 +1,222 @@
+var __create = Object.create;
+var __getProtoOf = Object.getPrototypeOf;
+var __defProp = Object.defineProperty;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __toESM = (mod, isNodeMode, target) => {
+  target = mod != null ? __create(__getProtoOf(mod)) : {};
+  const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
+  for (let key of __getOwnPropNames(mod))
+    if (!__hasOwnProp.call(to, key))
+      __defProp(to, key, {
+        get: () => mod[key],
+        enumerable: true
+      });
+  return to;
+};
+var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
+  get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
+}) : x)(function(x) {
+  if (typeof require !== "undefined")
+    return require.apply(this, arguments);
+  throw Error('Dynamic require of "' + x + '" is not supported');
+});
+// src/utils.ts
+function isNetworkError(e) {
+  if (e instanceof DOMException && e.name === "AbortError")
+    return false;
+  if (e instanceof TypeError) {
+    const msg = e.message.toLowerCase();
+    return msg.includes("failed to fetch") || msg.includes("network");
+  }
+  if (e instanceof Error) {
+    const msg = e.message.toLowerCase();
+    return msg.includes("network") || msg.includes("fetch") || msg.includes("econnrefused") || msg.includes("enotfound") || msg.includes("timeout");
+  }
+  return false;
+}
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+function toSnakeCase(str) {
+  return str.replaceAll(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
+}
+// src/worker.ts
+var tokenizer = null;
+var model = null;
+var isLoading = false;
+var currentAbortController = null;
+var config = null;
+function convertGenerationParams(params) {
+  if (!params)
+    return {};
+  const result = {};
+  for (const [key, value] of Object.entries(params)) {
+    if (value !== undefined) {
+      result[toSnakeCase(key)] = value;
+    }
+  }
+  return result;
+}
+function buildConversation(prompt, messages, systemPrompt) {
+  const conversation = [];
+  if (systemPrompt) {
+    conversation.push({ role: "system", content: systemPrompt });
+  }
+  if (messages) {
+    for (const msg of messages) {
+      conversation.push({ role: msg.role, content: msg.content });
+    }
+  }
+  conversation.push({ role: "user", content: prompt });
+  return conversation;
+}
+function postMsg(msg) {
+  self.postMessage(msg);
+}
+async function resolveClass(className) {
+  const transformers = await import("@huggingface/transformers");
+  const cls = transformers[className];
+  if (!cls) {
+    throw new Error(`Class "${className}" not found in @huggingface/transformers`);
+  }
+  return cls;
+}
+async function loadModel() {
+  if (model && tokenizer)
+    return;
+  if (isLoading)
+    return;
+  if (!config) {
+    postMsg({
+      type: "error",
+      error: "Worker not initialized: call init first"
+    });
+    return;
+  }
+  isLoading = true;
+  postMsg({ type: "status", status: "loading" });
+  const maxAttempts = config.maxRetryAttempts ?? 3;
+  const baseDelay = config.baseRetryDelayMs ?? 1000;
+  let lastError;
+  for (let attempt = 1;attempt <= maxAttempts; attempt++) {
+    try {
+      const progressCallback = (progress) => {
+        postMsg({ type: "progress", progress });
+      };
+      const modelClassName = config.modelClass ?? "AutoModelForCausalLM";
+      const tokenizerClassName = config.tokenizerClass ?? "AutoTokenizer";
+      const [TokenizerClass, ModelClass] = await Promise.all([
+        resolveClass(tokenizerClassName),
+        resolveClass(modelClassName)
+      ]);
+      const modelOptions = {
+        progress_callback: progressCallback
+      };
+      if (config.dtype !== undefined)
+        modelOptions.dtype = config.dtype;
+      if (config.device !== undefined)
+        modelOptions.device = config.device;
+      const [loadedTokenizer, loadedModel] = await Promise.all([
+        TokenizerClass.from_pretrained(config.modelId, {
+          progress_callback: progressCallback
+        }),
+        ModelClass.from_pretrained(config.modelId, modelOptions)
+      ]);
+      tokenizer = loadedTokenizer;
+      model = loadedModel;
+      isLoading = false;
+      postMsg({ type: "status", status: "ready" });
+      return;
+    } catch (e) {
+      lastError = e;
+      if (isNetworkError(e) && attempt < maxAttempts) {
+        const delay = baseDelay * Math.pow(2, attempt - 1);
+        postMsg({ type: "retry", attempt, maxAttempts });
+        await sleep(delay);
+        continue;
+      }
+      break;
+    }
+  }
+  isLoading = false;
+  const errorMessage = lastError instanceof Error ? lastError.message : String(lastError);
+  postMsg({ type: "status", status: "error", error: errorMessage });
+  throw lastError;
+}
+async function execute(prompt, messages, systemPrompt, tools, generationParams) {
+  if (!tokenizer || !model) {
+    await loadModel();
+  }
+  if (!tokenizer || !model) {
+    postMsg({ type: "error", error: "Model failed to initialize" });
+    return;
+  }
+  currentAbortController = new AbortController;
+  try {
+    const conversation = buildConversation(prompt, messages, systemPrompt);
+    const templateOptions = {
+      tokenize: true,
+      add_generation_prompt: true,
+      return_dict: true
+    };
+    if (tools !== undefined && tools.length > 0) {
+      templateOptions.tools = tools;
+    }
+    const inputs = tokenizer.apply_chat_template(conversation, templateOptions);
+    if (currentAbortController.signal.aborted) {
+      postMsg({ type: "cancelled" });
+      return;
+    }
+    const genParams = convertGenerationParams(generationParams);
+    const output = await model.generate({
+      ...inputs,
+      ...genParams
+    });
+    if (currentAbortController.signal.aborted) {
+      postMsg({ type: "cancelled" });
+      return;
+    }
+    const inputLength = inputs.input_ids.dims[1];
+    const outputTokens = output.slice(0, [inputLength, null]);
+    const rawOutput = tokenizer.decode(outputTokens, {
+      skip_special_tokens: false
+    });
+    postMsg({ type: "result", rawOutput });
+  } catch (e) {
+    if (currentAbortController?.signal.aborted) {
+      postMsg({ type: "cancelled" });
+      return;
+    }
+    const errorMessage = e instanceof Error ? e.message : String(e);
+    postMsg({ type: "error", error: errorMessage });
+  } finally {
+    currentAbortController = null;
+  }
+}
+function cancelExecution() {
+  if (currentAbortController) {
+    currentAbortController.abort();
+  }
+}
+globalThis.onmessage = async (event) => {
+  const message = event.data;
+  switch (message.type) {
+    case "init":
+      config = message.config;
+      await loadModel();
+      break;
+    case "execute":
+      await execute(message.prompt, message.messages, message.systemPrompt, message.tools, message.generationParams);
+      break;
+    case "cancel":
+      cancelExecution();
+      break;
+  }
+};
+export {
+  convertGenerationParams,
+  buildConversation
+};

package/package.json ADDED Viewed

@@ -0,0 +1,37 @@
+{
+  "name": "@p0u4a/inference-worker",
+  "version": "0.0.1",
+  "repository": {
+    "url": "https://github.com/P0u4a/inference-worker"
+  },
+  "module": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "type": "module",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    },
+    "./worker": {
+      "types": "./dist/worker.d.ts",
+      "import": "./dist/worker.js"
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "scripts": {
+    "build": "bun run build:js && bun run build:types",
+    "build:js": "bun build src/index.ts src/worker.ts --outdir dist --target browser --external @huggingface/transformers",
+    "build:types": "tsc -p tsconfig.build.json"
+  },
+  "devDependencies": {
+    "@types/bun": "latest",
+    "@vitest/web-worker": "^4.0.18",
+    "vitest": "^4.0.18"
+  },
+  "peerDependencies": {
+    "@huggingface/transformers": "^3.8.1",
+    "typescript": "^5"
+  }
+}