npm - localm-web - Versions diffs - 0.3.0 → 0.5.0 - Mend

localm-web 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +81 -0
package/README.md +15 -3
package/dist/assets/{inference.worker-CwvQtobb.js → inference.worker-DZbXKJZY.js} +49 -5
package/dist/assets/inference.worker-DZbXKJZY.js.map +1 -0
package/dist/index.d.ts +189 -5
package/dist/index.js +405 -16
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/assets/inference.worker-CwvQtobb.js.map +0 -1

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,87 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.5.0] - 2026-05-10
+### Added
+- **ORT-Web fallback path (v0.5)** — `TransformersTextEngine` in
+  `src/core/transformers-engine.ts` implements the runtime-agnostic
+  `Engine` contract on top of
+  [`@huggingface/transformers`](https://github.com/huggingface/transformers.js).
+  Lazy-imports the optional peer dep so the WebLLM hot path stays slim.
+  Runs ONNX models on WebGPU when available and on WASM-SIMD otherwise,
+  with a `TextStreamer` → async-iterable bridge for `stream()` /
+  `streamCompletion()` parity with `WebLLMEngine`.
+- **Backend selector + auto-routing** — new `BackendChoice` type
+  (`"auto" | "webllm" | "transformers"`) on `LMTaskCreateOptions.backend`.
+  `"auto"` (default) picks WebLLM when WebGPU is available and falls
+  back to the transformers.js engine otherwise. `resolveBackend(choice,
+preset, webGPUAvailable)` exported from the package root for unit
+  tests and custom routing logic. `BackendNotAvailableError` is raised
+  when no backend can satisfy the request (e.g. `"transformers"` forced
+  on a preset without `transformersId`).
+- `ModelPreset.transformersId?: string` — HuggingFace Hub repo id used
+  by the transformers.js fallback. Replaces the unused `ortUrl` field.
+- 4 presets now carry `transformersId` mappings: `phi-3.5-mini-int4`,
+  `llama-3.2-1b-int4`, `qwen2.5-1.5b-int4`, and the new
+  `smollm2-360m-int8` (the smallest viable chat model, intended as the
+  default for low-end devices on the fallback path).
+- Public exports: `TransformersTextEngine`, `WebLLMEngine`,
+  `resolveBackend`, `BackendChoice`.
+- 6 unit tests in `test/resolve-backend.test.ts` covering each
+  combination of `BackendChoice` × WebGPU availability × preset
+  capability, including the two `BackendNotAvailableError` paths.
+### Changed
+- **CI / dev runtime moved to Node 22 + 24.**
+  - `engines.node` bumped from `>=20.19.0` to `>=22.0.0`. Node 20
+    reached end-of-life on 2026-04-30 per the Node release schedule
+    and the `Release to npm` workflow started warning about
+    `actions/checkout@v4` / `actions/setup-node@v4` running on Node 20.
+  - CI matrix in `.github/workflows/ci.yml` flipped from `["20", "22"]`
+    to `["22", "24"]`.
+  - Release workflow (`.github/workflows/release-npm.yml`) now sets up
+    Node 22 (was 20).
+  - `actions/checkout@v4` → `@v5` and `actions/setup-node@v4` → `@v5`
+    in both workflows. Eliminates the Node 20 deprecation notice that
+    appeared on the v0.4.0 publish run.
+- `docs/getting-started.md` prerequisite row updated to reflect the
+  new Node 22+ requirement.
+## [0.4.0] - 2026-05-10
+### Added
+- **Structured output (v0.4)** — JSON mode and JSON Schema constrained
+  decoding via WebLLM's `response_format` / xgrammar.
+  - `GenerationOptions.json: boolean` — when `true`, the engine is forced
+    to emit a string parseable as JSON (free-form shape).
+    `GenerationOptions.jsonSchema?: object` — when set, takes priority
+    over `json` and constrains decoding so the output matches the schema.
+  - `ChatReply.json<T>()` and `CompletionResult.json<T>()` parse the
+    generated text and return it cast to `T`. No runtime validation of
+    the schema is performed; pair with Ajv / Zod on the call site if you
+    need it.
+  - `StructuredOutputError` (extends `LocalmWebError`) wraps the
+    underlying `SyntaxError` from `JSON.parse`, so consumers can
+    distinguish SDK-issued failures from unrelated runtime exceptions.
+  - `src/structured/json-schema.ts` exposes `assertJsonSchema`,
+    `serializeJsonSchema`, and `parseStructuredOutput<T>` re-exported
+    from `localm-web`.
+  - `WebLLMEngine.generate` / `stream` / `complete` / `streamCompletion`
+    forward `response_format` to WebLLM. Worker engine inherits the
+    behavior without changes (the worker protocol already passes
+    `GenerationOptions` through `postMessage`; only `signal` is stripped).
+- 15 unit tests in `test/structured-output.test.ts` covering schema
+  assertion (accept / reject paths), schema serialization, JSON parsing
+  of objects / arrays / primitives / invalid input, error chaining via
+  `cause`, and the `.json()` helpers on `ChatReply` and
+  `CompletionResult`.
+## [0.3.0] - 2026-05-10
 ### Changed
 - **`LMTaskCreateOptions.inWorker` default flipped from `false` to `true`.**

package/README.md CHANGED Viewed

@@ -134,10 +134,22 @@ const vectors = await emb.embed(["hello world", "another sentence"]);
 const rerank = await Reranker.create("bge-reranker-base");
 const scores = await rerank.score("query", ["doc1", "doc2", "doc3"]);
-// Structured output (JSON Schema → constrained decoding)
-const json = await chat.send("Extract user info from: ...", {
-  jsonSchema: { type: "object", properties: { name: { type: "string" } } },
+// Structured output — free-form JSON
+const jsonReply = await chat.send("List three pros and cons of WebGPU as JSON.", { json: true });
+const data = jsonReply.json<{ pros: string[]; cons: string[] }>();
+// Structured output — JSON Schema constrained decoding (xgrammar via WebLLM)
+const userReply = await chat.send("Extract user info from: 'Ada, 36, …'", {
+  jsonSchema: {
+    type: "object",
+    required: ["name", "age"],
+    properties: {
+      name: { type: "string" },
+      age: { type: "integer", minimum: 0 },
+    },
+  },
 });
+const user = userReply.json<{ name: string; age: number }>();
 ```
 The shape mirrors `ort-vision-sdk-web`: `await Class.create(model)` then `predict()` / `send()` / `embed()` / `score()`.

package/dist/assets/{inference.worker-CwvQtobb.js → inference.worker-DZbXKJZY.js} RENAMED Viewed

@@ -24,6 +24,33 @@ class ModelNotLoadedError extends LocalmWebError {
 }
 class GenerationAbortedError extends LocalmWebError {
 }
+class StructuredOutputError extends LocalmWebError {
+}
+function assertJsonSchema(schema) {
+  if (schema === null || typeof schema !== "object" || Array.isArray(schema)) {
+    throw new StructuredOutputError("jsonSchema must be a plain object describing a JSON Schema.");
+  }
+  const keys = Object.keys(schema);
+  const recognized = [
+    "type",
+    "$ref",
+    "oneOf",
+    "anyOf",
+    "allOf",
+    "enum",
+    "const",
+    "properties"
+  ];
+  if (!keys.some((key) => recognized.includes(key))) {
+    throw new StructuredOutputError(
+      "jsonSchema does not look like a JSON Schema (missing type/$ref/oneOf/anyOf/allOf/enum/const/properties)."
+    );
+  }
+}
+function serializeJsonSchema(schema) {
+  assertJsonSchema(schema);
+  return JSON.stringify(schema);
+}
 let webllmModulePromise = null;
 async function loadWebLLM() {
   if (!webllmModulePromise) {
@@ -41,6 +68,15 @@ function buildSamplingParams(options) {
   if (options.topP !== void 0) params.top_p = options.topP;
   return params;
 }
+function buildResponseFormat(options) {
+  if (options.jsonSchema !== void 0) {
+    return { type: "json_object", schema: serializeJsonSchema(options.jsonSchema) };
+  }
+  if (options.json) {
+    return { type: "json_object" };
+  }
+  return void 0;
+}
 function toChatMessages(messages) {
   return messages.map((m) => {
     switch (m.role) {
@@ -95,10 +131,12 @@ class WebLLMEngine {
     if (options.signal?.aborted) {
       throw new GenerationAbortedError("Generation aborted before start.");
     }
+    const responseFormat = buildResponseFormat(options);
     const completion = await engine2.chat.completions.create({
       ...buildSamplingParams(options),
       messages: toChatMessages(messages),
-      stream: false
+      stream: false,
+      ...responseFormat ? { response_format: responseFormat } : {}
     });
     return completion.choices[0]?.message?.content ?? "";
   }
@@ -107,10 +145,12 @@ class WebLLMEngine {
     if (options.signal?.aborted) {
       throw new GenerationAbortedError("Generation aborted before start.");
     }
+    const responseFormat = buildResponseFormat(options);
     const completion = await engine2.chat.completions.create({
       ...buildSamplingParams(options),
       messages: toChatMessages(messages),
-      stream: true
+      stream: true,
+      ...responseFormat ? { response_format: responseFormat } : {}
     });
     let index = 0;
     let finished = false;
@@ -144,10 +184,12 @@ class WebLLMEngine {
     if (options.signal?.aborted) {
       throw new GenerationAbortedError("Generation aborted before start.");
     }
+    const responseFormat = buildResponseFormat(options);
     const completion = await engine2.completions.create({
       ...buildSamplingParams(options),
       prompt,
-      stream: false
+      stream: false,
+      ...responseFormat ? { response_format: responseFormat } : {}
     });
     return completion.choices[0]?.text ?? "";
   }
@@ -156,10 +198,12 @@ class WebLLMEngine {
     if (options.signal?.aborted) {
       throw new GenerationAbortedError("Generation aborted before start.");
     }
+    const responseFormat = buildResponseFormat(options);
     const completion = await engine2.completions.create({
       ...buildSamplingParams(options),
       prompt,
-      stream: true
+      stream: true,
+      ...responseFormat ? { response_format: responseFormat } : {}
     });
     let index = 0;
     let finished = false;
@@ -327,4 +371,4 @@ self.addEventListener("message", (event) => {
       return;
   }
 });
-//# sourceMappingURL=inference.worker-CwvQtobb.js.map
+//# sourceMappingURL=inference.worker-DZbXKJZY.js.map

package/dist/assets/inference.worker-DZbXKJZY.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"inference.worker-DZbXKJZY.js","sources":["../src/core/load-phase.ts","../src/core/exceptions.ts","../src/structured/json-schema.ts","../src/core/webllm-engine.ts","../src/worker/inference.worker.ts"],"sourcesContent":["import type { ModelLoadPhase } from \"../types\";\n\nconst DOWNLOAD_PATTERN: RegExp = /\\b(fetch|download|loading from cache|cache hit|param)/i;\nconst COMPILE_PATTERN: RegExp = /\\b(compil|shader|kernel|tensor|init|allocat|warm)/i;\n\n/**\n * Classify a runtime status text into a {@link ModelLoadPhase}.\n *\n * Heuristic: match download-related verbs first (network or cache hits are\n * treated as `downloading`), then compile-related verbs. Anything else falls\n * back to the generic `loading` bucket. The `ready` phase is never returned\n * here — callers emit it explicitly when the load resolves.\n *\n * @param text - The raw status string from the runtime.\n * @returns The classified phase.\n */\nexport function classifyLoadPhase(text: string): ModelLoadPhase {\n if (DOWNLOAD_PATTERN.test(text)) return \"downloading\";\n if (COMPILE_PATTERN.test(text)) return \"compiling\";\n return \"loading\";\n}\n","/**\n * Error hierarchy for localm-web.\n *\n * All errors thrown by the SDK extend `LocalmWebError` so consumers can\n * distinguish SDK errors from unrelated runtime errors with a single\n * `instanceof` check.\n */\n\n/** Base class for every error raised by localm-web. */\nexport class LocalmWebError extends Error {\n /**\n * @param message - Human-readable description of the error.\n * @param cause - Underlying error, if any.\n */\n constructor(\n message: string,\n public readonly cause?: unknown\n ) {\n super(message);\n this.name = new.target.name;\n }\n}\n\n/** Thrown when WebGPU is required but not available in the host browser. */\nexport class WebGPUUnavailableError extends LocalmWebError {}\n\n/** Thrown when a model fails to load (network, parsing, runtime init). */\nexport class ModelLoadError extends LocalmWebError {}\n\n/** Thrown when an inference call is made before a model has loaded. */\nexport class ModelNotLoadedError extends LocalmWebError {}\n\n/** Thrown when a model id is not present in the curated registry. */\nexport class UnknownModelError extends LocalmWebError {}\n\n/** Thrown when generation is aborted via an `AbortSignal`. */\nexport class GenerationAbortedError extends LocalmWebError {}\n\n/** Thrown when the browser denies storage quota for the model cache. */\nexport class QuotaExceededError extends LocalmWebError {}\n\n/** Thrown when no usable backend is available on the current platform. */\nexport class BackendNotAvailableError extends LocalmWebError {}\n\n/**\n * Thrown when structured output (JSON mode or JSON Schema constrained\n * decoding) fails to parse as valid JSON.\n *\n * Wraps the underlying `SyntaxError` from `JSON.parse` so consumers can\n * distinguish SDK-issued failures from unrelated runtime exceptions.\n */\nexport class StructuredOutputError extends LocalmWebError {}\n","/**\n * JSON Schema helpers for structured output.\n *\n * The SDK delegates the actual constrained decoding to the underlying\n * runtime (xgrammar inside WebLLM today, ORT-Web equivalent later). These\n * helpers normalize user input — turning a JS object schema into the\n * JSON-string shape that WebLLM's `response_format.schema` expects — and\n * parse the runtime's textual output back into typed JSON.\n */\n\nimport { StructuredOutputError } from \"../core/exceptions\";\n\n/**\n * Minimal structural sanity check for a JSON Schema.\n *\n * Does not validate the schema against the JSON Schema meta-schema. The goal\n * is to fail fast on obvious mistakes (passing a string, an array, `null`)\n * before handing the value off to the runtime, where errors surface much\n * later and with much worse messages.\n *\n * @param schema - Candidate JSON Schema object.\n * @throws StructuredOutputError when `schema` is not a plain object or has\n * no recognizable schema shape (`type`, `$ref`, `oneOf`, `anyOf`, `allOf`,\n * `enum`).\n */\nexport function assertJsonSchema(schema: unknown): asserts schema is object {\n if (schema === null || typeof schema !== \"object\" || Array.isArray(schema)) {\n throw new StructuredOutputError(\"jsonSchema must be a plain object describing a JSON Schema.\");\n }\n const keys: string[] = Object.keys(schema);\n const recognized: readonly string[] = [\n \"type\",\n \"$ref\",\n \"oneOf\",\n \"anyOf\",\n \"allOf\",\n \"enum\",\n \"const\",\n \"properties\",\n ];\n if (!keys.some((key) => recognized.includes(key))) {\n throw new StructuredOutputError(\n \"jsonSchema does not look like a JSON Schema (missing type/$ref/oneOf/anyOf/allOf/enum/const/properties).\"\n );\n }\n}\n\n/**\n * Serialize a JSON Schema object for the WebLLM `response_format.schema`\n * field.\n *\n * WebLLM expects the schema as a JSON-encoded string (xgrammar parses it\n * server-side). Validates the shape via {@link assertJsonSchema} first.\n *\n * @param schema - JSON Schema object.\n * @returns The schema serialized as a JSON string.\n * @throws StructuredOutputError when `schema` is not a recognizable JSON\n * Schema shape.\n */\nexport function serializeJsonSchema(schema: unknown): string {\n assertJsonSchema(schema);\n return JSON.stringify(schema);\n}\n\n/**\n * Parse the textual output of a structured-decoding generation as JSON.\n *\n * @typeParam T - The expected parsed shape. The function does not validate\n * the parsed value against `T`; that is the caller's responsibility.\n * @param text - Raw text returned by the engine.\n * @returns The parsed JSON value cast to `T`.\n * @throws StructuredOutputError when the text is not valid JSON.\n */\nexport function parseStructuredOutput<T = unknown>(text: string): T {\n try {\n return JSON.parse(text) as T;\n } catch (err) {\n throw new StructuredOutputError(\n \"Engine output is not valid JSON. The model may have ignored the constrained decoding directive.\",\n err\n );\n }\n}\n","import type { Engine } from \"./engine\";\nimport { classifyLoadPhase } from \"./load-phase\";\nimport type { GenerationOptions, Message, ProgressCallback, TokenChunk } from \"../types\";\nimport {\n GenerationAbortedError,\n ModelLoadError,\n ModelNotLoadedError,\n WebGPUUnavailableError,\n} from \"./exceptions\";\nimport { serializeJsonSchema } from \"../structured/json-schema\";\n\ntype WebLLMModule = typeof import(\"@mlc-ai/web-llm\");\ntype MLCEngine = import(\"@mlc-ai/web-llm\").MLCEngineInterface;\ntype ChatCompletionMessageParam = import(\"@mlc-ai/web-llm\").ChatCompletionMessageParam;\ntype ResponseFormat = import(\"@mlc-ai/web-llm\").ResponseFormat;\n\nlet webllmModulePromise: Promise<WebLLMModule> | null = null;\n\nasync function loadWebLLM(): Promise<WebLLMModule> {\n if (!webllmModulePromise) {\n webllmModulePromise = import(\"@mlc-ai/web-llm\");\n }\n return webllmModulePromise;\n}\n\nfunction isWebGPUAvailable(): boolean {\n return typeof navigator !== \"undefined\" && \"gpu\" in navigator;\n}\n\ninterface SamplingParams {\n max_tokens?: number;\n temperature?: number;\n top_p?: number;\n}\n\nfunction buildSamplingParams(options: GenerationOptions): SamplingParams {\n const params: SamplingParams = {};\n if (options.maxTokens !== undefined) params.max_tokens = options.maxTokens;\n if (options.temperature !== undefined) params.temperature = options.temperature;\n if (options.topP !== undefined) params.top_p = options.topP;\n return params;\n}\n\n/**\n * Build the WebLLM `response_format` payload from generation options.\n *\n * Returns `undefined` when the caller has not requested structured output —\n * letting WebLLM use its default free-text decoding path. When `jsonSchema`\n * is set it takes priority and is serialized into the `schema` field\n * (xgrammar parses it server-side). When only `json` is set the payload\n * carries `{ type: \"json_object\" }` for unconstrained-but-valid JSON.\n */\nfunction buildResponseFormat(options: GenerationOptions): ResponseFormat | undefined {\n if (options.jsonSchema !== undefined) {\n return { type: \"json_object\", schema: serializeJsonSchema(options.jsonSchema) };\n }\n if (options.json) {\n return { type: \"json_object\" };\n }\n return undefined;\n}\n\nfunction toChatMessages(messages: Message[]): ChatCompletionMessageParam[] {\n return messages.map((m): ChatCompletionMessageParam => {\n switch (m.role) {\n case \"system\":\n return { role: \"system\", content: m.content };\n case \"user\":\n return { role: \"user\", content: m.content };\n case \"assistant\":\n return { role: \"assistant\", content: m.content };\n case \"tool\":\n return { role: \"tool\", content: m.content, tool_call_id: m.name ?? \"\" };\n }\n });\n}\n\n/**\n * Inference engine backed by [WebLLM (MLC)](https://github.com/mlc-ai/web-llm).\n *\n * Requires WebGPU. The fallback path planned for v0.5 will route to ORT-Web\n * when WebGPU is missing.\n */\nexport class WebLLMEngine implements Engine {\n private engine: MLCEngine | null = null;\n\n isLoaded(): boolean {\n return this.engine !== null;\n }\n\n async load(modelId: string, onProgress?: ProgressCallback): Promise<void> {\n if (!isWebGPUAvailable()) {\n throw new WebGPUUnavailableError(\n \"WebGPU is not available in this browser. The ORT-Web fallback is planned for v0.5.\"\n );\n }\n const webllm = await loadWebLLM();\n try {\n this.engine = await webllm.CreateMLCEngine(modelId, {\n initProgressCallback: (report): void => {\n onProgress?.({\n progress: report.progress,\n text: report.text,\n loaded: 0,\n total: 0,\n phase: classifyLoadPhase(report.text),\n });\n },\n });\n onProgress?.({\n progress: 1,\n text: \"Model ready.\",\n loaded: 0,\n total: 0,\n phase: \"ready\",\n });\n } catch (err) {\n throw new ModelLoadError(`Failed to load model \"${modelId}\".`, err);\n }\n }\n\n async generate(messages: Message[], options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: false,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n return completion.choices[0]?.message?.content ?? \"\";\n }\n\n async *stream(messages: Message[], options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: true,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.delta?.content ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming generation failed.\", err);\n }\n }\n\n async complete(prompt: string, options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.completions.create({\n ...buildSamplingParams(options),\n prompt,\n stream: false,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n return completion.choices[0]?.text ?? \"\";\n }\n\n async *streamCompletion(\n prompt: string,\n options: GenerationOptions = {}\n ): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.completions.create({\n ...buildSamplingParams(options),\n prompt,\n stream: true,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.text ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming completion failed.\", err);\n }\n }\n\n async unload(): Promise<void> {\n if (this.engine) {\n await this.engine.unload();\n this.engine = null;\n }\n }\n\n private requireEngine(): MLCEngine {\n if (!this.engine) {\n throw new ModelNotLoadedError(\"Engine not loaded. Call load() before generation.\");\n }\n return this.engine;\n }\n}\n","/// <reference lib=\"webworker\" />\n\nimport { WebLLMEngine } from \"../core/webllm-engine\";\nimport type { WorkerRequest, WorkerResponse } from \"./protocol\";\n\ndeclare const self: DedicatedWorkerGlobalScope;\n\nconst engine: WebLLMEngine = new WebLLMEngine();\nconst aborts: Map<number, AbortController> = new Map();\n\nfunction reply(message: WorkerResponse): void {\n self.postMessage(message);\n}\n\nfunction fail(id: number, err: unknown): void {\n const error = err instanceof Error ? err : new Error(String(err));\n reply({ op: \"error\", id, name: error.name, message: error.message });\n}\n\nasync function handleLoad(req: Extract<WorkerRequest, { op: \"load\" }>): Promise<void> {\n try {\n await engine.load(req.modelId, (payload) => {\n reply({ op: \"progress\", id: req.id, payload });\n });\n reply({ op: \"loaded\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n }\n}\n\nasync function handleGenerate(req: Extract<WorkerRequest, { op: \"generate\" }>): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n const text: string = await engine.generate(req.messages, {\n ...req.options,\n signal: controller.signal,\n });\n reply({ op: \"generated\", id: req.id, text });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleComplete(req: Extract<WorkerRequest, { op: \"complete\" }>): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n const text: string = await engine.complete(req.prompt, {\n ...req.options,\n signal: controller.signal,\n });\n reply({ op: \"generated\", id: req.id, text });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleStreamCompletion(\n req: Extract<WorkerRequest, { op: \"stream-completion\" }>\n): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n for await (const chunk of engine.streamCompletion(req.prompt, {\n ...req.options,\n signal: controller.signal,\n })) {\n reply({ op: \"token\", id: req.id, chunk });\n }\n reply({ op: \"stream-end\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleStream(req: Extract<WorkerRequest, { op: \"stream\" }>): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n for await (const chunk of engine.stream(req.messages, {\n ...req.options,\n signal: controller.signal,\n })) {\n reply({ op: \"token\", id: req.id, chunk });\n }\n reply({ op: \"stream-end\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleUnload(req: Extract<WorkerRequest, { op: \"unload\" }>): Promise<void> {\n try {\n await engine.unload();\n reply({ op: \"unloaded\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n }\n}\n\nfunction handleIsLoaded(req: Extract<WorkerRequest, { op: \"isLoaded\" }>): void {\n reply({ op: \"is-loaded\", id: req.id, value: engine.isLoaded() });\n}\n\nfunction handleAbort(req: Extract<WorkerRequest, { op: \"abort\" }>): void {\n aborts.get(req.id)?.abort();\n}\n\nself.addEventListener(\"message\", (event: MessageEvent<WorkerRequest>): void => {\n const req = event.data;\n switch (req.op) {\n case \"load\":\n void handleLoad(req);\n return;\n case \"generate\":\n void handleGenerate(req);\n return;\n case \"stream\":\n void handleStream(req);\n return;\n case \"complete\":\n void handleComplete(req);\n return;\n case \"stream-completion\":\n void handleStreamCompletion(req);\n return;\n case \"unload\":\n void handleUnload(req);\n return;\n case \"isLoaded\":\n handleIsLoaded(req);\n return;\n case \"abort\":\n handleAbort(req);\n return;\n }\n});\n"],"names":["engine"],"mappings":"AAEA,MAAM,mBAA2B;AACjC,MAAM,kBAA0B;AAazB,SAAS,kBAAkB,MAA8B;AAC9D,MAAI,iBAAiB,KAAK,IAAI,EAAG,QAAO;AACxC,MAAI,gBAAgB,KAAK,IAAI,EAAG,QAAO;AACvC,SAAO;AACT;ACXO,MAAM,uBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKxC,YACE,SACgB,OAChB;AACA,UAAM,OAAO;AAFG,SAAA,QAAA;AAGhB,SAAK,OAAO,WAAW;AAAA,EACzB;AACF;AAGO,MAAM,+BAA+B,eAAe;AAAC;AAGrD,MAAM,uBAAuB,eAAe;AAAC;AAG7C,MAAM,4BAA4B,eAAe;AAAC;AAMlD,MAAM,+BAA+B,eAAe;AAAC;AAerD,MAAM,8BAA8B,eAAe;AAAC;AC1BpD,SAAS,iBAAiB,QAA2C;AAC1E,MAAI,WAAW,QAAQ,OAAO,WAAW,YAAY,MAAM,QAAQ,MAAM,GAAG;AAC1E,UAAM,IAAI,sBAAsB,6DAA6D;AAAA,EAC/F;AACA,QAAM,OAAiB,OAAO,KAAK,MAAM;AACzC,QAAM,aAAgC;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EAAA;AAEF,MAAI,CAAC,KAAK,KAAK,CAAC,QAAQ,WAAW,SAAS,GAAG,CAAC,GAAG;AACjD,UAAM,IAAI;AAAA,MACR;AAAA,IAAA;AAAA,EAEJ;AACF;AAcO,SAAS,oBAAoB,QAAyB;AAC3D,mBAAiB,MAAM;AACvB,SAAO,KAAK,UAAU,MAAM;AAC9B;AC9CA,IAAI,sBAAoD;AAExD,eAAe,aAAoC;AACjD,MAAI,CAAC,qBAAqB;AACxB,0BAAsB,OAAO,qBAAiB;AAAA,EAChD;AACA,SAAO;AACT;AAEA,SAAS,oBAA6B;AACpC,SAAO,OAAO,cAAc,eAAe,SAAS;AACtD;AAQA,SAAS,oBAAoB,SAA4C;AACvE,QAAM,SAAyB,CAAA;AAC/B,MAAI,QAAQ,cAAc,OAAW,QAAO,aAAa,QAAQ;AACjE,MAAI,QAAQ,gBAAgB,OAAW,QAAO,cAAc,QAAQ;AACpE,MAAI,QAAQ,SAAS,OAAW,QAAO,QAAQ,QAAQ;AACvD,SAAO;AACT;AAWA,SAAS,oBAAoB,SAAwD;AACnF,MAAI,QAAQ,eAAe,QAAW;AACpC,WAAO,EAAE,MAAM,eAAe,QAAQ,oBAAoB,QAAQ,UAAU,EAAA;AAAA,EAC9E;AACA,MAAI,QAAQ,MAAM;AAChB,WAAO,EAAE,MAAM,cAAA;AAAA,EACjB;AACA,SAAO;AACT;AAEA,SAAS,eAAe,UAAmD;AACzE,SAAO,SAAS,IAAI,CAAC,MAAkC;AACrD,YAAQ,EAAE,MAAA;AAAA,MACR,KAAK;AACH,eAAO,EAAE,MAAM,UAAU,SAAS,EAAE,QAAA;AAAA,MACtC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,QAAA;AAAA,MACpC,KAAK;AACH,eAAO,EAAE,MAAM,aAAa,SAAS,EAAE,QAAA;AAAA,MACzC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,SAAS,cAAc,EAAE,QAAQ,GAAA;AAAA,IAAG;AAAA,EAE5E,CAAC;AACH;AAQO,MAAM,aAA+B;AAAA,EAClC,SAA2B;AAAA,EAEnC,WAAoB;AAClB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,MAAM,KAAK,SAAiB,YAA8C;AACxE,QAAI,CAAC,qBAAqB;AACxB,YAAM,IAAI;AAAA,QACR;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,SAAS,MAAM,WAAA;AACrB,QAAI;AACF,WAAK,SAAS,MAAM,OAAO,gBAAgB,SAAS;AAAA,QAClD,sBAAsB,CAAC,WAAiB;AACtC,uBAAa;AAAA,YACX,UAAU,OAAO;AAAA,YACjB,MAAM,OAAO;AAAA,YACb,QAAQ;AAAA,YACR,OAAO;AAAA,YACP,OAAO,kBAAkB,OAAO,IAAI;AAAA,UAAA,CACrC;AAAA,QACH;AAAA,MAAA,CACD;AACD,mBAAa;AAAA,QACX,UAAU;AAAA,QACV,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,OAAO;AAAA,QACP,OAAO;AAAA,MAAA,CACR;AAAA,IACH,SAAS,KAAK;AACZ,YAAM,IAAI,eAAe,yBAAyB,OAAO,MAAM,GAAG;AAAA,IACpE;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAqB,UAA6B,IAAqB;AACpF,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,SAAS,WAAW;AAAA,EACpD;AAAA,EAEA,OAAO,OAAO,UAAqB,UAA6B,IAA+B;AAC7F,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,OAAO,WAAW;AACxC,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,QAAgB,UAA6B,IAAqB;AAC/E,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,YAAY,OAAO;AAAA,MACjD,GAAG,oBAAoB,OAAO;AAAA,MAC9B;AAAA,MACA,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,QAAQ;AAAA,EACxC;AAAA,EAEA,OAAO,iBACL,QACA,UAA6B,IACF;AAC3B,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,YAAY,OAAO;AAAA,MACjD,GAAG,oBAAoB,OAAO;AAAA,MAC9B;AAAA,MACA,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,QAAQ;AAC9B,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAwB;AAC5B,QAAI,KAAK,QAAQ;AACf,YAAM,KAAK,OAAO,OAAA;AAClB,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEQ,gBAA2B;AACjC,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,IAAI,oBAAoB,mDAAmD;AAAA,IACnF;AACA,WAAO,KAAK;AAAA,EACd;AACF;AChPA,MAAM,SAAuB,IAAI,aAAA;AACjC,MAAM,6BAA2C,IAAA;AAEjD,SAAS,MAAM,SAA+B;AAC5C,OAAK,YAAY,OAAO;AAC1B;AAEA,SAAS,KAAK,IAAY,KAAoB;AAC5C,QAAM,QAAQ,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAChE,QAAM,EAAE,IAAI,SAAS,IAAI,MAAM,MAAM,MAAM,SAAS,MAAM,QAAA,CAAS;AACrE;AAEA,eAAe,WAAW,KAA4D;AACpF,MAAI;AACF,UAAM,OAAO,KAAK,IAAI,SAAS,CAAC,YAAY;AAC1C,YAAM,EAAE,IAAI,YAAY,IAAI,IAAI,IAAI,SAAS;AAAA,IAC/C,CAAC;AACD,UAAM,EAAE,IAAI,UAAU,IAAI,IAAI,IAAI;AAAA,EACpC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB;AACF;AAEA,eAAe,eAAe,KAAgE;AAC5F,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,UAAM,OAAe,MAAM,OAAO,SAAS,IAAI,UAAU;AAAA,MACvD,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB;AACD,UAAM,EAAE,IAAI,aAAa,IAAI,IAAI,IAAI,MAAM;AAAA,EAC7C,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,eAAe,KAAgE;AAC5F,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,UAAM,OAAe,MAAM,OAAO,SAAS,IAAI,QAAQ;AAAA,MACrD,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB;AACD,UAAM,EAAE,IAAI,aAAa,IAAI,IAAI,IAAI,MAAM;AAAA,EAC7C,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,uBACb,KACe;AACf,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,qBAAiB,SAAS,OAAO,iBAAiB,IAAI,QAAQ;AAAA,MAC5D,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB,GAAG;AACF,YAAM,EAAE,IAAI,SAAS,IAAI,IAAI,IAAI,OAAO;AAAA,IAC1C;AACA,UAAM,EAAE,IAAI,cAAc,IAAI,IAAI,IAAI;AAAA,EACxC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,aAAa,KAA8D;AACxF,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,qBAAiB,SAAS,OAAO,OAAO,IAAI,UAAU;AAAA,MACpD,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB,GAAG;AACF,YAAM,EAAE,IAAI,SAAS,IAAI,IAAI,IAAI,OAAO;AAAA,IAC1C;AACA,UAAM,EAAE,IAAI,cAAc,IAAI,IAAI,IAAI;AAAA,EACxC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,aAAa,KAA8D;AACxF,MAAI;AACF,UAAM,OAAO,OAAA;AACb,UAAM,EAAE,IAAI,YAAY,IAAI,IAAI,IAAI;AAAA,EACtC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB;AACF;AAEA,SAAS,eAAe,KAAuD;AAC7E,QAAM,EAAE,IAAI,aAAa,IAAI,IAAI,IAAI,OAAO,OAAO,SAAA,GAAY;AACjE;AAEA,SAAS,YAAY,KAAoD;AACvE,SAAO,IAAI,IAAI,EAAE,GAAG,MAAA;AACtB;AAEA,KAAK,iBAAiB,WAAW,CAAC,UAA6C;AAC7E,QAAM,MAAM,MAAM;AAClB,UAAQ,IAAI,IAAA;AAAA,IACV,KAAK;AACH,WAAK,WAAW,GAAG;AACnB;AAAA,IACF,KAAK;AACH,WAAK,eAAe,GAAG;AACvB;AAAA,IACF,KAAK;AACH,WAAK,aAAa,GAAG;AACrB;AAAA,IACF,KAAK;AACH,WAAK,eAAe,GAAG;AACvB;AAAA,IACF,KAAK;AACH,WAAK,uBAAuB,GAAG;AAC/B;AAAA,IACF,KAAK;AACH,WAAK,aAAa,GAAG;AACrB;AAAA,IACF,KAAK;AACH,qBAAe,GAAG;AAClB;AAAA,IACF,KAAK;AACH,kBAAY,GAAG;AACf;AAAA,EAAA;AAEN,CAAC;"}

package/dist/index.d.ts CHANGED Viewed

@@ -6,6 +6,43 @@
  * @packageDocumentation
  */
+/**
+ * JSON Schema helpers for structured output.
+ *
+ * The SDK delegates the actual constrained decoding to the underlying
+ * runtime (xgrammar inside WebLLM today, ORT-Web equivalent later). These
+ * helpers normalize user input — turning a JS object schema into the
+ * JSON-string shape that WebLLM's `response_format.schema` expects — and
+ * parse the runtime's textual output back into typed JSON.
+ */
+/**
+ * Minimal structural sanity check for a JSON Schema.
+ *
+ * Does not validate the schema against the JSON Schema meta-schema. The goal
+ * is to fail fast on obvious mistakes (passing a string, an array, `null`)
+ * before handing the value off to the runtime, where errors surface much
+ * later and with much worse messages.
+ *
+ * @param schema - Candidate JSON Schema object.
+ * @throws StructuredOutputError when `schema` is not a plain object or has
+ *   no recognizable schema shape (`type`, `$ref`, `oneOf`, `anyOf`, `allOf`,
+ *   `enum`).
+ */
+export declare function assertJsonSchema(schema: unknown): asserts schema is object;
+/**
+ * Inference backend selector.
+ *
+ * - `"auto"` (default): pick WebLLM when WebGPU is available, fall back to
+ *   the transformers.js engine otherwise.
+ * - `"webllm"`: force WebLLM. Throws `WebGPUUnavailableError` on browsers
+ *   without WebGPU.
+ * - `"transformers"`: force the transformers.js engine. Loads from the
+ *   preset's `transformersId`; throws `BackendNotAvailableError` when the
+ *   preset has no `transformersId`.
+ */
+export declare type BackendChoice = "auto" | "webllm" | "transformers";
 /** Thrown when no usable backend is available on the current platform. */
 export declare class BackendNotAvailableError extends LocalmWebError {
 }
@@ -118,6 +155,18 @@ export declare class ChatReply {
     tokensGenerated: number,
     /** Why the generation loop stopped. */
     finishReason: FinishReason);
+    /**
+     * Parse {@link ChatReply.text} as JSON.
+     *
+     * Intended for replies generated with `json: true` or `jsonSchema`.
+     * The result is cast to `T` without runtime validation; pair with Zod /
+     * Ajv on the call site if you need to verify the schema.
+     *
+     * @typeParam T - Expected parsed shape.
+     * @returns The parsed JSON value.
+     * @throws StructuredOutputError if the text is not valid JSON.
+     */
+    json<T = unknown>(): T;
 }
 /**
@@ -208,6 +257,17 @@ export declare class CompletionResult {
     tokensGenerated: number,
     /** Why the generation loop stopped. */
     finishReason: FinishReason);
+    /**
+     * Parse {@link CompletionResult.text} as JSON.
+     *
+     * Intended for completions generated with `json: true` or `jsonSchema`.
+     * The result is cast to `T` without runtime validation.
+     *
+     * @typeParam T - Expected parsed shape.
+     * @returns The parsed JSON value.
+     * @throws StructuredOutputError if the text is not valid JSON.
+     */
+    json<T = unknown>(): T;
 }
 /**
@@ -423,8 +483,23 @@ export declare interface GenerationOptions {
     /** Cancellation signal. When triggered, the engine stops generation. */
     signal?: AbortSignal;
     /**
-     * JSON Schema for structured output. The engine constrains decoding to
-     * produce a string parseable as JSON matching the schema. Planned for v0.4.
+     * Force the engine to emit a string parseable as JSON.
+     *
+     * When `true` (and `jsonSchema` is not also set), the engine maps to
+     * WebLLM's `response_format: { type: "json_object" }` — the model is free
+     * to choose any JSON shape, but the output is guaranteed to parse.
+     *
+     * Ignored when {@link GenerationOptions.jsonSchema} is set.
+     */
+    json?: boolean;
+    /**
+     * JSON Schema for structured output. When set, the engine constrains
+     * decoding (xgrammar inside WebLLM) so the output parses as JSON matching
+     * the schema. Takes priority over {@link GenerationOptions.json}.
+     *
+     * The schema is passed verbatim to the runtime — the SDK does not validate
+     * the parsed value against it. Use Ajv/Zod on the consumer side if you
+     * need runtime validation in addition to constrained decoding.
      */
     jsonSchema?: object;
 }
@@ -468,7 +543,7 @@ export declare abstract class LMTask {
      * @param options - Task creation options.
      */
     protected static createEngine(modelId: string, options?: LMTaskCreateOptions): Promise<ResolvedEngine>;
-    private static defaultEngine;
+    private static instantiateEngine;
     /** Release engine resources. Safe to call multiple times. */
     unload(): Promise<void>;
     /** Whether the underlying engine has a loaded model. */
@@ -492,8 +567,19 @@ export declare interface LMTaskCreateOptions {
      * `Worker` support or when debugging the runtime directly).
      *
      * Ignored when {@link engine} is provided.
+     *
+     * **Note (v0.5):** the bundled worker entry only supports the WebLLM
+     * backend. When `backend` resolves to `"transformers"` the worker option
+     * is forced to `false` and inference runs on the main thread. A worker
+     * variant for the transformers.js path is on the v0.6 roadmap.
      */
     inWorker?: boolean;
+    /**
+     * Inference backend selector (v0.5+). Defaults to `"auto"` which picks
+     * WebLLM when WebGPU is available and the transformers.js fallback when
+     * it is not. See {@link BackendChoice}.
+     */
+    backend?: BackendChoice;
 }
 /**
@@ -669,14 +755,30 @@ export declare interface ModelPreset {
     quantization: string;
     /** Identifier expected by the WebLLM runtime. */
     webllmId: string;
-    /** Optional ONNX URL used by the future ORT-Web fallback (v0.5+). */
-    ortUrl?: string;
+    /**
+     * Optional HuggingFace Hub repo id used by the transformers.js fallback
+     * (v0.5+). Models without a `transformersId` cannot run on the fallback
+     * path — loading them in a browser without WebGPU raises
+     * `BackendNotAvailableError`.
+     */
+    transformersId?: string;
     /** Maximum context window in tokens. */
     contextWindow: number;
     /** Short human description. */
     description: string;
 }
+/**
+ * Parse the textual output of a structured-decoding generation as JSON.
+ *
+ * @typeParam T - The expected parsed shape. The function does not validate
+ *   the parsed value against `T`; that is the caller's responsibility.
+ * @param text - Raw text returned by the engine.
+ * @returns The parsed JSON value cast to `T`.
+ * @throws StructuredOutputError when the text is not valid JSON.
+ */
+export declare function parseStructuredOutput<T = unknown>(text: string): T;
 /** Callback signature for model load progress. */
 export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
@@ -810,6 +912,19 @@ export declare interface RerankPipeline {
     unload?(): Promise<void>;
 }
+/**
+ * Pure backend resolver, exported for unit tests.
+ *
+ * @param choice - Caller's preference (`"auto"`, `"webllm"`, `"transformers"`).
+ * @param preset - Resolved model preset.
+ * @param webGPUAvailable - Whether WebGPU is available in the host environment.
+ * @returns The concrete backend to instantiate.
+ * @throws BackendNotAvailableError when the choice cannot be satisfied (e.g.
+ *   `"transformers"` requested but the preset has no `transformersId`, or
+ *   `"auto"` with no WebGPU and no `transformersId`).
+ */
+export declare function resolveBackend(choice: BackendChoice, preset: ModelPreset, webGPUAvailable: boolean): "webllm" | "transformers";
 /** Internal payload returned by {@link LMTask.createEngine}. */
 declare interface ResolvedEngine {
     engine: Engine;
@@ -856,6 +971,30 @@ export declare type Role = "system" | "user" | "assistant" | "tool";
  */
 declare type SerializableGenerationOptions = Omit<GenerationOptions, "signal">;
+/**
+ * Serialize a JSON Schema object for the WebLLM `response_format.schema`
+ * field.
+ *
+ * WebLLM expects the schema as a JSON-encoded string (xgrammar parses it
+ * server-side). Validates the shape via {@link assertJsonSchema} first.
+ *
+ * @param schema - JSON Schema object.
+ * @returns The schema serialized as a JSON string.
+ * @throws StructuredOutputError when `schema` is not a recognizable JSON
+ *   Schema shape.
+ */
+export declare function serializeJsonSchema(schema: unknown): string;
+/**
+ * Thrown when structured output (JSON mode or JSON Schema constrained
+ * decoding) fails to parse as valid JSON.
+ *
+ * Wraps the underlying `SyntaxError` from `JSON.parse` so consumers can
+ * distinguish SDK-issued failures from unrelated runtime exceptions.
+ */
+export declare class StructuredOutputError extends LocalmWebError {
+}
 /**
  * Wrap an async iterable so that each `TokenChunk` is also passed to a
  * caller-supplied side-effect callback before being yielded downstream.
@@ -878,6 +1017,33 @@ export declare interface TokenChunk {
     done: boolean;
 }
+/**
+ * Inference engine backed by
+ * [`@huggingface/transformers`](https://github.com/huggingface/transformers.js)
+ * (transformers.js).
+ *
+ * Used by the SDK as the **fallback path** for browsers without WebGPU and as
+ * an explicit alternative backend selectable via `LMTaskCreateOptions.backend`.
+ * It runs ONNX models on WebGPU when available and on WASM-SIMD otherwise, so
+ * a wider range of browsers can run language models with a graceful — if
+ * slower — degrade.
+ *
+ * The package is an optional peer dependency; import it on the consumer side
+ * before instantiating tasks that resolve to this backend.
+ */
+export declare class TransformersTextEngine implements Engine {
+    private generator;
+    private currentAbortController;
+    isLoaded(): boolean;
+    load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
+    generate(messages: Message[], options?: GenerationOptions): Promise<string>;
+    stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    complete(prompt: string, options?: GenerationOptions): Promise<string>;
+    streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    unload(): Promise<void>;
+    private requireGenerator;
+}
 /** Thrown when a model id is not present in the curated registry. */
 export declare class UnknownModelError extends LocalmWebError {
 }
@@ -889,6 +1055,24 @@ export declare const VERSION: string;
 export declare class WebGPUUnavailableError extends LocalmWebError {
 }
+/**
+ * Inference engine backed by [WebLLM (MLC)](https://github.com/mlc-ai/web-llm).
+ *
+ * Requires WebGPU. The fallback path planned for v0.5 will route to ORT-Web
+ * when WebGPU is missing.
+ */
+export declare class WebLLMEngine implements Engine {
+    private engine;
+    isLoaded(): boolean;
+    load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
+    generate(messages: Message[], options?: GenerationOptions): Promise<string>;
+    stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    complete(prompt: string, options?: GenerationOptions): Promise<string>;
+    streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    unload(): Promise<void>;
+    private requireEngine;
+}
 /**
  * Engine implementation that proxies all calls to a Web Worker.
  *