localm-web 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,87 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.5.0] - 2026-05-10
11
+
12
+ ### Added
13
+
14
+ - **ORT-Web fallback path (v0.5)** — `TransformersTextEngine` in
15
+ `src/core/transformers-engine.ts` implements the runtime-agnostic
16
+ `Engine` contract on top of
17
+ [`@huggingface/transformers`](https://github.com/huggingface/transformers.js).
18
+ Lazy-imports the optional peer dep so the WebLLM hot path stays slim.
19
+ Runs ONNX models on WebGPU when available and on WASM-SIMD otherwise,
20
+ with a `TextStreamer` → async-iterable bridge for `stream()` /
21
+ `streamCompletion()` parity with `WebLLMEngine`.
22
+ - **Backend selector + auto-routing** — new `BackendChoice` type
23
+ (`"auto" | "webllm" | "transformers"`) on `LMTaskCreateOptions.backend`.
24
+ `"auto"` (default) picks WebLLM when WebGPU is available and falls
25
+ back to the transformers.js engine otherwise. `resolveBackend(choice,
26
+ preset, webGPUAvailable)` exported from the package root for unit
27
+ tests and custom routing logic. `BackendNotAvailableError` is raised
28
+ when no backend can satisfy the request (e.g. `"transformers"` forced
29
+ on a preset without `transformersId`).
30
+ - `ModelPreset.transformersId?: string` — HuggingFace Hub repo id used
31
+ by the transformers.js fallback. Replaces the unused `ortUrl` field.
32
+ - 4 presets now carry `transformersId` mappings: `phi-3.5-mini-int4`,
33
+ `llama-3.2-1b-int4`, `qwen2.5-1.5b-int4`, and the new
34
+ `smollm2-360m-int8` (the smallest viable chat model, intended as the
35
+ default for low-end devices on the fallback path).
36
+ - Public exports: `TransformersTextEngine`, `WebLLMEngine`,
37
+ `resolveBackend`, `BackendChoice`.
38
+ - 6 unit tests in `test/resolve-backend.test.ts` covering each
39
+ combination of `BackendChoice` × WebGPU availability × preset
40
+ capability, including the two `BackendNotAvailableError` paths.
41
+
42
+ ### Changed
43
+
44
+ - **CI / dev runtime moved to Node 22 + 24.**
45
+ - `engines.node` bumped from `>=20.19.0` to `>=22.0.0`. Node 20
46
+ reached end-of-life on 2026-04-30 per the Node release schedule
47
+ and the `Release to npm` workflow started warning about
48
+ `actions/checkout@v4` / `actions/setup-node@v4` running on Node 20.
49
+ - CI matrix in `.github/workflows/ci.yml` flipped from `["20", "22"]`
50
+ to `["22", "24"]`.
51
+ - Release workflow (`.github/workflows/release-npm.yml`) now sets up
52
+ Node 22 (was 20).
53
+ - `actions/checkout@v4` → `@v5` and `actions/setup-node@v4` → `@v5`
54
+ in both workflows. Eliminates the Node 20 deprecation notice that
55
+ appeared on the v0.4.0 publish run.
56
+ - `docs/getting-started.md` prerequisite row updated to reflect the
57
+ new Node 22+ requirement.
58
+
59
+ ## [0.4.0] - 2026-05-10
60
+
61
+ ### Added
62
+
63
+ - **Structured output (v0.4)** — JSON mode and JSON Schema constrained
64
+ decoding via WebLLM's `response_format` / xgrammar.
65
+ - `GenerationOptions.json: boolean` — when `true`, the engine is forced
66
+ to emit a string parseable as JSON (free-form shape).
67
+ `GenerationOptions.jsonSchema?: object` — when set, takes priority
68
+ over `json` and constrains decoding so the output matches the schema.
69
+ - `ChatReply.json<T>()` and `CompletionResult.json<T>()` parse the
70
+ generated text and return it cast to `T`. No runtime validation of
71
+ the schema is performed; pair with Ajv / Zod on the call site if you
72
+ need it.
73
+ - `StructuredOutputError` (extends `LocalmWebError`) wraps the
74
+ underlying `SyntaxError` from `JSON.parse`, so consumers can
75
+ distinguish SDK-issued failures from unrelated runtime exceptions.
76
+ - `src/structured/json-schema.ts` exposes `assertJsonSchema`,
77
+ `serializeJsonSchema`, and `parseStructuredOutput<T>` re-exported
78
+ from `localm-web`.
79
+ - `WebLLMEngine.generate` / `stream` / `complete` / `streamCompletion`
80
+ forward `response_format` to WebLLM. Worker engine inherits the
81
+ behavior without changes (the worker protocol already passes
82
+ `GenerationOptions` through `postMessage`; only `signal` is stripped).
83
+ - 15 unit tests in `test/structured-output.test.ts` covering schema
84
+ assertion (accept / reject paths), schema serialization, JSON parsing
85
+ of objects / arrays / primitives / invalid input, error chaining via
86
+ `cause`, and the `.json()` helpers on `ChatReply` and
87
+ `CompletionResult`.
88
+
89
+ ## [0.3.0] - 2026-05-10
90
+
10
91
  ### Changed
11
92
 
12
93
  - **`LMTaskCreateOptions.inWorker` default flipped from `false` to `true`.**
package/README.md CHANGED
@@ -134,10 +134,22 @@ const vectors = await emb.embed(["hello world", "another sentence"]);
134
134
  const rerank = await Reranker.create("bge-reranker-base");
135
135
  const scores = await rerank.score("query", ["doc1", "doc2", "doc3"]);
136
136
 
137
- // Structured output (JSON Schema → constrained decoding)
138
- const json = await chat.send("Extract user info from: ...", {
139
- jsonSchema: { type: "object", properties: { name: { type: "string" } } },
137
+ // Structured output free-form JSON
138
+ const jsonReply = await chat.send("List three pros and cons of WebGPU as JSON.", { json: true });
139
+ const data = jsonReply.json<{ pros: string[]; cons: string[] }>();
140
+
141
+ // Structured output — JSON Schema constrained decoding (xgrammar via WebLLM)
142
+ const userReply = await chat.send("Extract user info from: 'Ada, 36, …'", {
143
+ jsonSchema: {
144
+ type: "object",
145
+ required: ["name", "age"],
146
+ properties: {
147
+ name: { type: "string" },
148
+ age: { type: "integer", minimum: 0 },
149
+ },
150
+ },
140
151
  });
152
+ const user = userReply.json<{ name: string; age: number }>();
141
153
  ```
142
154
 
143
155
  The shape mirrors `ort-vision-sdk-web`: `await Class.create(model)` then `predict()` / `send()` / `embed()` / `score()`.
@@ -24,6 +24,33 @@ class ModelNotLoadedError extends LocalmWebError {
24
24
  }
25
25
  class GenerationAbortedError extends LocalmWebError {
26
26
  }
27
+ class StructuredOutputError extends LocalmWebError {
28
+ }
29
+ function assertJsonSchema(schema) {
30
+ if (schema === null || typeof schema !== "object" || Array.isArray(schema)) {
31
+ throw new StructuredOutputError("jsonSchema must be a plain object describing a JSON Schema.");
32
+ }
33
+ const keys = Object.keys(schema);
34
+ const recognized = [
35
+ "type",
36
+ "$ref",
37
+ "oneOf",
38
+ "anyOf",
39
+ "allOf",
40
+ "enum",
41
+ "const",
42
+ "properties"
43
+ ];
44
+ if (!keys.some((key) => recognized.includes(key))) {
45
+ throw new StructuredOutputError(
46
+ "jsonSchema does not look like a JSON Schema (missing type/$ref/oneOf/anyOf/allOf/enum/const/properties)."
47
+ );
48
+ }
49
+ }
50
+ function serializeJsonSchema(schema) {
51
+ assertJsonSchema(schema);
52
+ return JSON.stringify(schema);
53
+ }
27
54
  let webllmModulePromise = null;
28
55
  async function loadWebLLM() {
29
56
  if (!webllmModulePromise) {
@@ -41,6 +68,15 @@ function buildSamplingParams(options) {
41
68
  if (options.topP !== void 0) params.top_p = options.topP;
42
69
  return params;
43
70
  }
71
+ function buildResponseFormat(options) {
72
+ if (options.jsonSchema !== void 0) {
73
+ return { type: "json_object", schema: serializeJsonSchema(options.jsonSchema) };
74
+ }
75
+ if (options.json) {
76
+ return { type: "json_object" };
77
+ }
78
+ return void 0;
79
+ }
44
80
  function toChatMessages(messages) {
45
81
  return messages.map((m) => {
46
82
  switch (m.role) {
@@ -95,10 +131,12 @@ class WebLLMEngine {
95
131
  if (options.signal?.aborted) {
96
132
  throw new GenerationAbortedError("Generation aborted before start.");
97
133
  }
134
+ const responseFormat = buildResponseFormat(options);
98
135
  const completion = await engine2.chat.completions.create({
99
136
  ...buildSamplingParams(options),
100
137
  messages: toChatMessages(messages),
101
- stream: false
138
+ stream: false,
139
+ ...responseFormat ? { response_format: responseFormat } : {}
102
140
  });
103
141
  return completion.choices[0]?.message?.content ?? "";
104
142
  }
@@ -107,10 +145,12 @@ class WebLLMEngine {
107
145
  if (options.signal?.aborted) {
108
146
  throw new GenerationAbortedError("Generation aborted before start.");
109
147
  }
148
+ const responseFormat = buildResponseFormat(options);
110
149
  const completion = await engine2.chat.completions.create({
111
150
  ...buildSamplingParams(options),
112
151
  messages: toChatMessages(messages),
113
- stream: true
152
+ stream: true,
153
+ ...responseFormat ? { response_format: responseFormat } : {}
114
154
  });
115
155
  let index = 0;
116
156
  let finished = false;
@@ -144,10 +184,12 @@ class WebLLMEngine {
144
184
  if (options.signal?.aborted) {
145
185
  throw new GenerationAbortedError("Generation aborted before start.");
146
186
  }
187
+ const responseFormat = buildResponseFormat(options);
147
188
  const completion = await engine2.completions.create({
148
189
  ...buildSamplingParams(options),
149
190
  prompt,
150
- stream: false
191
+ stream: false,
192
+ ...responseFormat ? { response_format: responseFormat } : {}
151
193
  });
152
194
  return completion.choices[0]?.text ?? "";
153
195
  }
@@ -156,10 +198,12 @@ class WebLLMEngine {
156
198
  if (options.signal?.aborted) {
157
199
  throw new GenerationAbortedError("Generation aborted before start.");
158
200
  }
201
+ const responseFormat = buildResponseFormat(options);
159
202
  const completion = await engine2.completions.create({
160
203
  ...buildSamplingParams(options),
161
204
  prompt,
162
- stream: true
205
+ stream: true,
206
+ ...responseFormat ? { response_format: responseFormat } : {}
163
207
  });
164
208
  let index = 0;
165
209
  let finished = false;
@@ -327,4 +371,4 @@ self.addEventListener("message", (event) => {
327
371
  return;
328
372
  }
329
373
  });
330
- //# sourceMappingURL=inference.worker-CwvQtobb.js.map
374
+ //# sourceMappingURL=inference.worker-DZbXKJZY.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"inference.worker-DZbXKJZY.js","sources":["../src/core/load-phase.ts","../src/core/exceptions.ts","../src/structured/json-schema.ts","../src/core/webllm-engine.ts","../src/worker/inference.worker.ts"],"sourcesContent":["import type { ModelLoadPhase } from \"../types\";\n\nconst DOWNLOAD_PATTERN: RegExp = /\\b(fetch|download|loading from cache|cache hit|param)/i;\nconst COMPILE_PATTERN: RegExp = /\\b(compil|shader|kernel|tensor|init|allocat|warm)/i;\n\n/**\n * Classify a runtime status text into a {@link ModelLoadPhase}.\n *\n * Heuristic: match download-related verbs first (network or cache hits are\n * treated as `downloading`), then compile-related verbs. Anything else falls\n * back to the generic `loading` bucket. The `ready` phase is never returned\n * here — callers emit it explicitly when the load resolves.\n *\n * @param text - The raw status string from the runtime.\n * @returns The classified phase.\n */\nexport function classifyLoadPhase(text: string): ModelLoadPhase {\n if (DOWNLOAD_PATTERN.test(text)) return \"downloading\";\n if (COMPILE_PATTERN.test(text)) return \"compiling\";\n return \"loading\";\n}\n","/**\n * Error hierarchy for localm-web.\n *\n * All errors thrown by the SDK extend `LocalmWebError` so consumers can\n * distinguish SDK errors from unrelated runtime errors with a single\n * `instanceof` check.\n */\n\n/** Base class for every error raised by localm-web. */\nexport class LocalmWebError extends Error {\n /**\n * @param message - Human-readable description of the error.\n * @param cause - Underlying error, if any.\n */\n constructor(\n message: string,\n public readonly cause?: unknown\n ) {\n super(message);\n this.name = new.target.name;\n }\n}\n\n/** Thrown when WebGPU is required but not available in the host browser. */\nexport class WebGPUUnavailableError extends LocalmWebError {}\n\n/** Thrown when a model fails to load (network, parsing, runtime init). */\nexport class ModelLoadError extends LocalmWebError {}\n\n/** Thrown when an inference call is made before a model has loaded. */\nexport class ModelNotLoadedError extends LocalmWebError {}\n\n/** Thrown when a model id is not present in the curated registry. */\nexport class UnknownModelError extends LocalmWebError {}\n\n/** Thrown when generation is aborted via an `AbortSignal`. */\nexport class GenerationAbortedError extends LocalmWebError {}\n\n/** Thrown when the browser denies storage quota for the model cache. */\nexport class QuotaExceededError extends LocalmWebError {}\n\n/** Thrown when no usable backend is available on the current platform. */\nexport class BackendNotAvailableError extends LocalmWebError {}\n\n/**\n * Thrown when structured output (JSON mode or JSON Schema constrained\n * decoding) fails to parse as valid JSON.\n *\n * Wraps the underlying `SyntaxError` from `JSON.parse` so consumers can\n * distinguish SDK-issued failures from unrelated runtime exceptions.\n */\nexport class StructuredOutputError extends LocalmWebError {}\n","/**\n * JSON Schema helpers for structured output.\n *\n * The SDK delegates the actual constrained decoding to the underlying\n * runtime (xgrammar inside WebLLM today, ORT-Web equivalent later). These\n * helpers normalize user input — turning a JS object schema into the\n * JSON-string shape that WebLLM's `response_format.schema` expects — and\n * parse the runtime's textual output back into typed JSON.\n */\n\nimport { StructuredOutputError } from \"../core/exceptions\";\n\n/**\n * Minimal structural sanity check for a JSON Schema.\n *\n * Does not validate the schema against the JSON Schema meta-schema. The goal\n * is to fail fast on obvious mistakes (passing a string, an array, `null`)\n * before handing the value off to the runtime, where errors surface much\n * later and with much worse messages.\n *\n * @param schema - Candidate JSON Schema object.\n * @throws StructuredOutputError when `schema` is not a plain object or has\n * no recognizable schema shape (`type`, `$ref`, `oneOf`, `anyOf`, `allOf`,\n * `enum`).\n */\nexport function assertJsonSchema(schema: unknown): asserts schema is object {\n if (schema === null || typeof schema !== \"object\" || Array.isArray(schema)) {\n throw new StructuredOutputError(\"jsonSchema must be a plain object describing a JSON Schema.\");\n }\n const keys: string[] = Object.keys(schema);\n const recognized: readonly string[] = [\n \"type\",\n \"$ref\",\n \"oneOf\",\n \"anyOf\",\n \"allOf\",\n \"enum\",\n \"const\",\n \"properties\",\n ];\n if (!keys.some((key) => recognized.includes(key))) {\n throw new StructuredOutputError(\n \"jsonSchema does not look like a JSON Schema (missing type/$ref/oneOf/anyOf/allOf/enum/const/properties).\"\n );\n }\n}\n\n/**\n * Serialize a JSON Schema object for the WebLLM `response_format.schema`\n * field.\n *\n * WebLLM expects the schema as a JSON-encoded string (xgrammar parses it\n * server-side). Validates the shape via {@link assertJsonSchema} first.\n *\n * @param schema - JSON Schema object.\n * @returns The schema serialized as a JSON string.\n * @throws StructuredOutputError when `schema` is not a recognizable JSON\n * Schema shape.\n */\nexport function serializeJsonSchema(schema: unknown): string {\n assertJsonSchema(schema);\n return JSON.stringify(schema);\n}\n\n/**\n * Parse the textual output of a structured-decoding generation as JSON.\n *\n * @typeParam T - The expected parsed shape. The function does not validate\n * the parsed value against `T`; that is the caller's responsibility.\n * @param text - Raw text returned by the engine.\n * @returns The parsed JSON value cast to `T`.\n * @throws StructuredOutputError when the text is not valid JSON.\n */\nexport function parseStructuredOutput<T = unknown>(text: string): T {\n try {\n return JSON.parse(text) as T;\n } catch (err) {\n throw new StructuredOutputError(\n \"Engine output is not valid JSON. The model may have ignored the constrained decoding directive.\",\n err\n );\n }\n}\n","import type { Engine } from \"./engine\";\nimport { classifyLoadPhase } from \"./load-phase\";\nimport type { GenerationOptions, Message, ProgressCallback, TokenChunk } from \"../types\";\nimport {\n GenerationAbortedError,\n ModelLoadError,\n ModelNotLoadedError,\n WebGPUUnavailableError,\n} from \"./exceptions\";\nimport { serializeJsonSchema } from \"../structured/json-schema\";\n\ntype WebLLMModule = typeof import(\"@mlc-ai/web-llm\");\ntype MLCEngine = import(\"@mlc-ai/web-llm\").MLCEngineInterface;\ntype ChatCompletionMessageParam = import(\"@mlc-ai/web-llm\").ChatCompletionMessageParam;\ntype ResponseFormat = import(\"@mlc-ai/web-llm\").ResponseFormat;\n\nlet webllmModulePromise: Promise<WebLLMModule> | null = null;\n\nasync function loadWebLLM(): Promise<WebLLMModule> {\n if (!webllmModulePromise) {\n webllmModulePromise = import(\"@mlc-ai/web-llm\");\n }\n return webllmModulePromise;\n}\n\nfunction isWebGPUAvailable(): boolean {\n return typeof navigator !== \"undefined\" && \"gpu\" in navigator;\n}\n\ninterface SamplingParams {\n max_tokens?: number;\n temperature?: number;\n top_p?: number;\n}\n\nfunction buildSamplingParams(options: GenerationOptions): SamplingParams {\n const params: SamplingParams = {};\n if (options.maxTokens !== undefined) params.max_tokens = options.maxTokens;\n if (options.temperature !== undefined) params.temperature = options.temperature;\n if (options.topP !== undefined) params.top_p = options.topP;\n return params;\n}\n\n/**\n * Build the WebLLM `response_format` payload from generation options.\n *\n * Returns `undefined` when the caller has not requested structured output —\n * letting WebLLM use its default free-text decoding path. When `jsonSchema`\n * is set it takes priority and is serialized into the `schema` field\n * (xgrammar parses it server-side). When only `json` is set the payload\n * carries `{ type: \"json_object\" }` for unconstrained-but-valid JSON.\n */\nfunction buildResponseFormat(options: GenerationOptions): ResponseFormat | undefined {\n if (options.jsonSchema !== undefined) {\n return { type: \"json_object\", schema: serializeJsonSchema(options.jsonSchema) };\n }\n if (options.json) {\n return { type: \"json_object\" };\n }\n return undefined;\n}\n\nfunction toChatMessages(messages: Message[]): ChatCompletionMessageParam[] {\n return messages.map((m): ChatCompletionMessageParam => {\n switch (m.role) {\n case \"system\":\n return { role: \"system\", content: m.content };\n case \"user\":\n return { role: \"user\", content: m.content };\n case \"assistant\":\n return { role: \"assistant\", content: m.content };\n case \"tool\":\n return { role: \"tool\", content: m.content, tool_call_id: m.name ?? \"\" };\n }\n });\n}\n\n/**\n * Inference engine backed by [WebLLM (MLC)](https://github.com/mlc-ai/web-llm).\n *\n * Requires WebGPU. The fallback path planned for v0.5 will route to ORT-Web\n * when WebGPU is missing.\n */\nexport class WebLLMEngine implements Engine {\n private engine: MLCEngine | null = null;\n\n isLoaded(): boolean {\n return this.engine !== null;\n }\n\n async load(modelId: string, onProgress?: ProgressCallback): Promise<void> {\n if (!isWebGPUAvailable()) {\n throw new WebGPUUnavailableError(\n \"WebGPU is not available in this browser. The ORT-Web fallback is planned for v0.5.\"\n );\n }\n const webllm = await loadWebLLM();\n try {\n this.engine = await webllm.CreateMLCEngine(modelId, {\n initProgressCallback: (report): void => {\n onProgress?.({\n progress: report.progress,\n text: report.text,\n loaded: 0,\n total: 0,\n phase: classifyLoadPhase(report.text),\n });\n },\n });\n onProgress?.({\n progress: 1,\n text: \"Model ready.\",\n loaded: 0,\n total: 0,\n phase: \"ready\",\n });\n } catch (err) {\n throw new ModelLoadError(`Failed to load model \"${modelId}\".`, err);\n }\n }\n\n async generate(messages: Message[], options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: false,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n return completion.choices[0]?.message?.content ?? \"\";\n }\n\n async *stream(messages: Message[], options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: true,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.delta?.content ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming generation failed.\", err);\n }\n }\n\n async complete(prompt: string, options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.completions.create({\n ...buildSamplingParams(options),\n prompt,\n stream: false,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n return completion.choices[0]?.text ?? \"\";\n }\n\n async *streamCompletion(\n prompt: string,\n options: GenerationOptions = {}\n ): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const responseFormat = buildResponseFormat(options);\n const completion = await engine.completions.create({\n ...buildSamplingParams(options),\n prompt,\n stream: true,\n ...(responseFormat ? { response_format: responseFormat } : {}),\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.text ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming completion failed.\", err);\n }\n }\n\n async unload(): Promise<void> {\n if (this.engine) {\n await this.engine.unload();\n this.engine = null;\n }\n }\n\n private requireEngine(): MLCEngine {\n if (!this.engine) {\n throw new ModelNotLoadedError(\"Engine not loaded. Call load() before generation.\");\n }\n return this.engine;\n }\n}\n","/// <reference lib=\"webworker\" />\n\nimport { WebLLMEngine } from \"../core/webllm-engine\";\nimport type { WorkerRequest, WorkerResponse } from \"./protocol\";\n\ndeclare const self: DedicatedWorkerGlobalScope;\n\nconst engine: WebLLMEngine = new WebLLMEngine();\nconst aborts: Map<number, AbortController> = new Map();\n\nfunction reply(message: WorkerResponse): void {\n self.postMessage(message);\n}\n\nfunction fail(id: number, err: unknown): void {\n const error = err instanceof Error ? err : new Error(String(err));\n reply({ op: \"error\", id, name: error.name, message: error.message });\n}\n\nasync function handleLoad(req: Extract<WorkerRequest, { op: \"load\" }>): Promise<void> {\n try {\n await engine.load(req.modelId, (payload) => {\n reply({ op: \"progress\", id: req.id, payload });\n });\n reply({ op: \"loaded\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n }\n}\n\nasync function handleGenerate(req: Extract<WorkerRequest, { op: \"generate\" }>): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n const text: string = await engine.generate(req.messages, {\n ...req.options,\n signal: controller.signal,\n });\n reply({ op: \"generated\", id: req.id, text });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleComplete(req: Extract<WorkerRequest, { op: \"complete\" }>): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n const text: string = await engine.complete(req.prompt, {\n ...req.options,\n signal: controller.signal,\n });\n reply({ op: \"generated\", id: req.id, text });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleStreamCompletion(\n req: Extract<WorkerRequest, { op: \"stream-completion\" }>\n): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n for await (const chunk of engine.streamCompletion(req.prompt, {\n ...req.options,\n signal: controller.signal,\n })) {\n reply({ op: \"token\", id: req.id, chunk });\n }\n reply({ op: \"stream-end\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleStream(req: Extract<WorkerRequest, { op: \"stream\" }>): Promise<void> {\n const controller: AbortController = new AbortController();\n aborts.set(req.id, controller);\n try {\n for await (const chunk of engine.stream(req.messages, {\n ...req.options,\n signal: controller.signal,\n })) {\n reply({ op: \"token\", id: req.id, chunk });\n }\n reply({ op: \"stream-end\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n } finally {\n aborts.delete(req.id);\n }\n}\n\nasync function handleUnload(req: Extract<WorkerRequest, { op: \"unload\" }>): Promise<void> {\n try {\n await engine.unload();\n reply({ op: \"unloaded\", id: req.id });\n } catch (err) {\n fail(req.id, err);\n }\n}\n\nfunction handleIsLoaded(req: Extract<WorkerRequest, { op: \"isLoaded\" }>): void {\n reply({ op: \"is-loaded\", id: req.id, value: engine.isLoaded() });\n}\n\nfunction handleAbort(req: Extract<WorkerRequest, { op: \"abort\" }>): void {\n aborts.get(req.id)?.abort();\n}\n\nself.addEventListener(\"message\", (event: MessageEvent<WorkerRequest>): void => {\n const req = event.data;\n switch (req.op) {\n case \"load\":\n void handleLoad(req);\n return;\n case \"generate\":\n void handleGenerate(req);\n return;\n case \"stream\":\n void handleStream(req);\n return;\n case \"complete\":\n void handleComplete(req);\n return;\n case \"stream-completion\":\n void handleStreamCompletion(req);\n return;\n case \"unload\":\n void handleUnload(req);\n return;\n case \"isLoaded\":\n handleIsLoaded(req);\n return;\n case \"abort\":\n handleAbort(req);\n return;\n }\n});\n"],"names":["engine"],"mappings":"AAEA,MAAM,mBAA2B;AACjC,MAAM,kBAA0B;AAazB,SAAS,kBAAkB,MAA8B;AAC9D,MAAI,iBAAiB,KAAK,IAAI,EAAG,QAAO;AACxC,MAAI,gBAAgB,KAAK,IAAI,EAAG,QAAO;AACvC,SAAO;AACT;ACXO,MAAM,uBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKxC,YACE,SACgB,OAChB;AACA,UAAM,OAAO;AAFG,SAAA,QAAA;AAGhB,SAAK,OAAO,WAAW;AAAA,EACzB;AACF;AAGO,MAAM,+BAA+B,eAAe;AAAC;AAGrD,MAAM,uBAAuB,eAAe;AAAC;AAG7C,MAAM,4BAA4B,eAAe;AAAC;AAMlD,MAAM,+BAA+B,eAAe;AAAC;AAerD,MAAM,8BAA8B,eAAe;AAAC;AC1BpD,SAAS,iBAAiB,QAA2C;AAC1E,MAAI,WAAW,QAAQ,OAAO,WAAW,YAAY,MAAM,QAAQ,MAAM,GAAG;AAC1E,UAAM,IAAI,sBAAsB,6DAA6D;AAAA,EAC/F;AACA,QAAM,OAAiB,OAAO,KAAK,MAAM;AACzC,QAAM,aAAgC;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EAAA;AAEF,MAAI,CAAC,KAAK,KAAK,CAAC,QAAQ,WAAW,SAAS,GAAG,CAAC,GAAG;AACjD,UAAM,IAAI;AAAA,MACR;AAAA,IAAA;AAAA,EAEJ;AACF;AAcO,SAAS,oBAAoB,QAAyB;AAC3D,mBAAiB,MAAM;AACvB,SAAO,KAAK,UAAU,MAAM;AAC9B;AC9CA,IAAI,sBAAoD;AAExD,eAAe,aAAoC;AACjD,MAAI,CAAC,qBAAqB;AACxB,0BAAsB,OAAO,qBAAiB;AAAA,EAChD;AACA,SAAO;AACT;AAEA,SAAS,oBAA6B;AACpC,SAAO,OAAO,cAAc,eAAe,SAAS;AACtD;AAQA,SAAS,oBAAoB,SAA4C;AACvE,QAAM,SAAyB,CAAA;AAC/B,MAAI,QAAQ,cAAc,OAAW,QAAO,aAAa,QAAQ;AACjE,MAAI,QAAQ,gBAAgB,OAAW,QAAO,cAAc,QAAQ;AACpE,MAAI,QAAQ,SAAS,OAAW,QAAO,QAAQ,QAAQ;AACvD,SAAO;AACT;AAWA,SAAS,oBAAoB,SAAwD;AACnF,MAAI,QAAQ,eAAe,QAAW;AACpC,WAAO,EAAE,MAAM,eAAe,QAAQ,oBAAoB,QAAQ,UAAU,EAAA;AAAA,EAC9E;AACA,MAAI,QAAQ,MAAM;AAChB,WAAO,EAAE,MAAM,cAAA;AAAA,EACjB;AACA,SAAO;AACT;AAEA,SAAS,eAAe,UAAmD;AACzE,SAAO,SAAS,IAAI,CAAC,MAAkC;AACrD,YAAQ,EAAE,MAAA;AAAA,MACR,KAAK;AACH,eAAO,EAAE,MAAM,UAAU,SAAS,EAAE,QAAA;AAAA,MACtC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,QAAA;AAAA,MACpC,KAAK;AACH,eAAO,EAAE,MAAM,aAAa,SAAS,EAAE,QAAA;AAAA,MACzC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,SAAS,cAAc,EAAE,QAAQ,GAAA;AAAA,IAAG;AAAA,EAE5E,CAAC;AACH;AAQO,MAAM,aAA+B;AAAA,EAClC,SAA2B;AAAA,EAEnC,WAAoB;AAClB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,MAAM,KAAK,SAAiB,YAA8C;AACxE,QAAI,CAAC,qBAAqB;AACxB,YAAM,IAAI;AAAA,QACR;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,SAAS,MAAM,WAAA;AACrB,QAAI;AACF,WAAK,SAAS,MAAM,OAAO,gBAAgB,SAAS;AAAA,QAClD,sBAAsB,CAAC,WAAiB;AACtC,uBAAa;AAAA,YACX,UAAU,OAAO;AAAA,YACjB,MAAM,OAAO;AAAA,YACb,QAAQ;AAAA,YACR,OAAO;AAAA,YACP,OAAO,kBAAkB,OAAO,IAAI;AAAA,UAAA,CACrC;AAAA,QACH;AAAA,MAAA,CACD;AACD,mBAAa;AAAA,QACX,UAAU;AAAA,QACV,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,OAAO;AAAA,QACP,OAAO;AAAA,MAAA,CACR;AAAA,IACH,SAAS,KAAK;AACZ,YAAM,IAAI,eAAe,yBAAyB,OAAO,MAAM,GAAG;AAAA,IACpE;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAqB,UAA6B,IAAqB;AACpF,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,SAAS,WAAW;AAAA,EACpD;AAAA,EAEA,OAAO,OAAO,UAAqB,UAA6B,IAA+B;AAC7F,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,OAAO,WAAW;AACxC,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,QAAgB,UAA6B,IAAqB;AAC/E,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,YAAY,OAAO;AAAA,MACjD,GAAG,oBAAoB,OAAO;AAAA,MAC9B;AAAA,MACA,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,QAAQ;AAAA,EACxC;AAAA,EAEA,OAAO,iBACL,QACA,UAA6B,IACF;AAC3B,UAAMA,UAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,iBAAiB,oBAAoB,OAAO;AAClD,UAAM,aAAa,MAAMA,QAAO,YAAY,OAAO;AAAA,MACjD,GAAG,oBAAoB,OAAO;AAAA,MAC9B;AAAA,MACA,QAAQ;AAAA,MACR,GAAI,iBAAiB,EAAE,iBAAiB,mBAAmB,CAAA;AAAA,IAAC,CAC7D;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,QAAQ;AAC9B,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAwB;AAC5B,QAAI,KAAK,QAAQ;AACf,YAAM,KAAK,OAAO,OAAA;AAClB,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEQ,gBAA2B;AACjC,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,IAAI,oBAAoB,mDAAmD;AAAA,IACnF;AACA,WAAO,KAAK;AAAA,EACd;AACF;AChPA,MAAM,SAAuB,IAAI,aAAA;AACjC,MAAM,6BAA2C,IAAA;AAEjD,SAAS,MAAM,SAA+B;AAC5C,OAAK,YAAY,OAAO;AAC1B;AAEA,SAAS,KAAK,IAAY,KAAoB;AAC5C,QAAM,QAAQ,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAChE,QAAM,EAAE,IAAI,SAAS,IAAI,MAAM,MAAM,MAAM,SAAS,MAAM,QAAA,CAAS;AACrE;AAEA,eAAe,WAAW,KAA4D;AACpF,MAAI;AACF,UAAM,OAAO,KAAK,IAAI,SAAS,CAAC,YAAY;AAC1C,YAAM,EAAE,IAAI,YAAY,IAAI,IAAI,IAAI,SAAS;AAAA,IAC/C,CAAC;AACD,UAAM,EAAE,IAAI,UAAU,IAAI,IAAI,IAAI;AAAA,EACpC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB;AACF;AAEA,eAAe,eAAe,KAAgE;AAC5F,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,UAAM,OAAe,MAAM,OAAO,SAAS,IAAI,UAAU;AAAA,MACvD,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB;AACD,UAAM,EAAE,IAAI,aAAa,IAAI,IAAI,IAAI,MAAM;AAAA,EAC7C,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,eAAe,KAAgE;AAC5F,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,UAAM,OAAe,MAAM,OAAO,SAAS,IAAI,QAAQ;AAAA,MACrD,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB;AACD,UAAM,EAAE,IAAI,aAAa,IAAI,IAAI,IAAI,MAAM;AAAA,EAC7C,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,uBACb,KACe;AACf,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,qBAAiB,SAAS,OAAO,iBAAiB,IAAI,QAAQ;AAAA,MAC5D,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB,GAAG;AACF,YAAM,EAAE,IAAI,SAAS,IAAI,IAAI,IAAI,OAAO;AAAA,IAC1C;AACA,UAAM,EAAE,IAAI,cAAc,IAAI,IAAI,IAAI;AAAA,EACxC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,aAAa,KAA8D;AACxF,QAAM,aAA8B,IAAI,gBAAA;AACxC,SAAO,IAAI,IAAI,IAAI,UAAU;AAC7B,MAAI;AACF,qBAAiB,SAAS,OAAO,OAAO,IAAI,UAAU;AAAA,MACpD,GAAG,IAAI;AAAA,MACP,QAAQ,WAAW;AAAA,IAAA,CACpB,GAAG;AACF,YAAM,EAAE,IAAI,SAAS,IAAI,IAAI,IAAI,OAAO;AAAA,IAC1C;AACA,UAAM,EAAE,IAAI,cAAc,IAAI,IAAI,IAAI;AAAA,EACxC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB,UAAA;AACE,WAAO,OAAO,IAAI,EAAE;AAAA,EACtB;AACF;AAEA,eAAe,aAAa,KAA8D;AACxF,MAAI;AACF,UAAM,OAAO,OAAA;AACb,UAAM,EAAE,IAAI,YAAY,IAAI,IAAI,IAAI;AAAA,EACtC,SAAS,KAAK;AACZ,SAAK,IAAI,IAAI,GAAG;AAAA,EAClB;AACF;AAEA,SAAS,eAAe,KAAuD;AAC7E,QAAM,EAAE,IAAI,aAAa,IAAI,IAAI,IAAI,OAAO,OAAO,SAAA,GAAY;AACjE;AAEA,SAAS,YAAY,KAAoD;AACvE,SAAO,IAAI,IAAI,EAAE,GAAG,MAAA;AACtB;AAEA,KAAK,iBAAiB,WAAW,CAAC,UAA6C;AAC7E,QAAM,MAAM,MAAM;AAClB,UAAQ,IAAI,IAAA;AAAA,IACV,KAAK;AACH,WAAK,WAAW,GAAG;AACnB;AAAA,IACF,KAAK;AACH,WAAK,eAAe,GAAG;AACvB;AAAA,IACF,KAAK;AACH,WAAK,aAAa,GAAG;AACrB;AAAA,IACF,KAAK;AACH,WAAK,eAAe,GAAG;AACvB;AAAA,IACF,KAAK;AACH,WAAK,uBAAuB,GAAG;AAC/B;AAAA,IACF,KAAK;AACH,WAAK,aAAa,GAAG;AACrB;AAAA,IACF,KAAK;AACH,qBAAe,GAAG;AAClB;AAAA,IACF,KAAK;AACH,kBAAY,GAAG;AACf;AAAA,EAAA;AAEN,CAAC;"}
package/dist/index.d.ts CHANGED
@@ -6,6 +6,43 @@
6
6
  * @packageDocumentation
7
7
  */
8
8
 
9
+ /**
10
+ * JSON Schema helpers for structured output.
11
+ *
12
+ * The SDK delegates the actual constrained decoding to the underlying
13
+ * runtime (xgrammar inside WebLLM today, ORT-Web equivalent later). These
14
+ * helpers normalize user input — turning a JS object schema into the
15
+ * JSON-string shape that WebLLM's `response_format.schema` expects — and
16
+ * parse the runtime's textual output back into typed JSON.
17
+ */
18
+ /**
19
+ * Minimal structural sanity check for a JSON Schema.
20
+ *
21
+ * Does not validate the schema against the JSON Schema meta-schema. The goal
22
+ * is to fail fast on obvious mistakes (passing a string, an array, `null`)
23
+ * before handing the value off to the runtime, where errors surface much
24
+ * later and with much worse messages.
25
+ *
26
+ * @param schema - Candidate JSON Schema object.
27
+ * @throws StructuredOutputError when `schema` is not a plain object or has
28
+ * no recognizable schema shape (`type`, `$ref`, `oneOf`, `anyOf`, `allOf`,
29
+ * `enum`).
30
+ */
31
+ export declare function assertJsonSchema(schema: unknown): asserts schema is object;
32
+
33
+ /**
34
+ * Inference backend selector.
35
+ *
36
+ * - `"auto"` (default): pick WebLLM when WebGPU is available, fall back to
37
+ * the transformers.js engine otherwise.
38
+ * - `"webllm"`: force WebLLM. Throws `WebGPUUnavailableError` on browsers
39
+ * without WebGPU.
40
+ * - `"transformers"`: force the transformers.js engine. Loads from the
41
+ * preset's `transformersId`; throws `BackendNotAvailableError` when the
42
+ * preset has no `transformersId`.
43
+ */
44
+ export declare type BackendChoice = "auto" | "webllm" | "transformers";
45
+
9
46
  /** Thrown when no usable backend is available on the current platform. */
10
47
  export declare class BackendNotAvailableError extends LocalmWebError {
11
48
  }
@@ -118,6 +155,18 @@ export declare class ChatReply {
118
155
  tokensGenerated: number,
119
156
  /** Why the generation loop stopped. */
120
157
  finishReason: FinishReason);
158
+ /**
159
+ * Parse {@link ChatReply.text} as JSON.
160
+ *
161
+ * Intended for replies generated with `json: true` or `jsonSchema`.
162
+ * The result is cast to `T` without runtime validation; pair with Zod /
163
+ * Ajv on the call site if you need to verify the schema.
164
+ *
165
+ * @typeParam T - Expected parsed shape.
166
+ * @returns The parsed JSON value.
167
+ * @throws StructuredOutputError if the text is not valid JSON.
168
+ */
169
+ json<T = unknown>(): T;
121
170
  }
122
171
 
123
172
  /**
@@ -208,6 +257,17 @@ export declare class CompletionResult {
208
257
  tokensGenerated: number,
209
258
  /** Why the generation loop stopped. */
210
259
  finishReason: FinishReason);
260
+ /**
261
+ * Parse {@link CompletionResult.text} as JSON.
262
+ *
263
+ * Intended for completions generated with `json: true` or `jsonSchema`.
264
+ * The result is cast to `T` without runtime validation.
265
+ *
266
+ * @typeParam T - Expected parsed shape.
267
+ * @returns The parsed JSON value.
268
+ * @throws StructuredOutputError if the text is not valid JSON.
269
+ */
270
+ json<T = unknown>(): T;
211
271
  }
212
272
 
213
273
  /**
@@ -423,8 +483,23 @@ export declare interface GenerationOptions {
423
483
  /** Cancellation signal. When triggered, the engine stops generation. */
424
484
  signal?: AbortSignal;
425
485
  /**
426
- * JSON Schema for structured output. The engine constrains decoding to
427
- * produce a string parseable as JSON matching the schema. Planned for v0.4.
486
+ * Force the engine to emit a string parseable as JSON.
487
+ *
488
+ * When `true` (and `jsonSchema` is not also set), the engine maps to
489
+ * WebLLM's `response_format: { type: "json_object" }` — the model is free
490
+ * to choose any JSON shape, but the output is guaranteed to parse.
491
+ *
492
+ * Ignored when {@link GenerationOptions.jsonSchema} is set.
493
+ */
494
+ json?: boolean;
495
+ /**
496
+ * JSON Schema for structured output. When set, the engine constrains
497
+ * decoding (xgrammar inside WebLLM) so the output parses as JSON matching
498
+ * the schema. Takes priority over {@link GenerationOptions.json}.
499
+ *
500
+ * The schema is passed verbatim to the runtime — the SDK does not validate
501
+ * the parsed value against it. Use Ajv/Zod on the consumer side if you
502
+ * need runtime validation in addition to constrained decoding.
428
503
  */
429
504
  jsonSchema?: object;
430
505
  }
@@ -468,7 +543,7 @@ export declare abstract class LMTask {
468
543
  * @param options - Task creation options.
469
544
  */
470
545
  protected static createEngine(modelId: string, options?: LMTaskCreateOptions): Promise<ResolvedEngine>;
471
- private static defaultEngine;
546
+ private static instantiateEngine;
472
547
  /** Release engine resources. Safe to call multiple times. */
473
548
  unload(): Promise<void>;
474
549
  /** Whether the underlying engine has a loaded model. */
@@ -492,8 +567,19 @@ export declare interface LMTaskCreateOptions {
492
567
  * `Worker` support or when debugging the runtime directly).
493
568
  *
494
569
  * Ignored when {@link engine} is provided.
570
+ *
571
+ * **Note (v0.5):** the bundled worker entry only supports the WebLLM
572
+ * backend. When `backend` resolves to `"transformers"` the worker option
573
+ * is forced to `false` and inference runs on the main thread. A worker
574
+ * variant for the transformers.js path is on the v0.6 roadmap.
495
575
  */
496
576
  inWorker?: boolean;
577
+ /**
578
+ * Inference backend selector (v0.5+). Defaults to `"auto"` which picks
579
+ * WebLLM when WebGPU is available and the transformers.js fallback when
580
+ * it is not. See {@link BackendChoice}.
581
+ */
582
+ backend?: BackendChoice;
497
583
  }
498
584
 
499
585
  /**
@@ -669,14 +755,30 @@ export declare interface ModelPreset {
669
755
  quantization: string;
670
756
  /** Identifier expected by the WebLLM runtime. */
671
757
  webllmId: string;
672
- /** Optional ONNX URL used by the future ORT-Web fallback (v0.5+). */
673
- ortUrl?: string;
758
+ /**
759
+ * Optional HuggingFace Hub repo id used by the transformers.js fallback
760
+ * (v0.5+). Models without a `transformersId` cannot run on the fallback
761
+ * path — loading them in a browser without WebGPU raises
762
+ * `BackendNotAvailableError`.
763
+ */
764
+ transformersId?: string;
674
765
  /** Maximum context window in tokens. */
675
766
  contextWindow: number;
676
767
  /** Short human description. */
677
768
  description: string;
678
769
  }
679
770
 
771
+ /**
772
+ * Parse the textual output of a structured-decoding generation as JSON.
773
+ *
774
+ * @typeParam T - The expected parsed shape. The function does not validate
775
+ * the parsed value against `T`; that is the caller's responsibility.
776
+ * @param text - Raw text returned by the engine.
777
+ * @returns The parsed JSON value cast to `T`.
778
+ * @throws StructuredOutputError when the text is not valid JSON.
779
+ */
780
+ export declare function parseStructuredOutput<T = unknown>(text: string): T;
781
+
680
782
  /** Callback signature for model load progress. */
681
783
  export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
682
784
 
@@ -810,6 +912,19 @@ export declare interface RerankPipeline {
810
912
  unload?(): Promise<void>;
811
913
  }
812
914
 
915
+ /**
916
+ * Pure backend resolver, exported for unit tests.
917
+ *
918
+ * @param choice - Caller's preference (`"auto"`, `"webllm"`, `"transformers"`).
919
+ * @param preset - Resolved model preset.
920
+ * @param webGPUAvailable - Whether WebGPU is available in the host environment.
921
+ * @returns The concrete backend to instantiate.
922
+ * @throws BackendNotAvailableError when the choice cannot be satisfied (e.g.
923
+ * `"transformers"` requested but the preset has no `transformersId`, or
924
+ * `"auto"` with no WebGPU and no `transformersId`).
925
+ */
926
+ export declare function resolveBackend(choice: BackendChoice, preset: ModelPreset, webGPUAvailable: boolean): "webllm" | "transformers";
927
+
813
928
  /** Internal payload returned by {@link LMTask.createEngine}. */
814
929
  declare interface ResolvedEngine {
815
930
  engine: Engine;
@@ -856,6 +971,30 @@ export declare type Role = "system" | "user" | "assistant" | "tool";
856
971
  */
857
972
  declare type SerializableGenerationOptions = Omit<GenerationOptions, "signal">;
858
973
 
974
+ /**
975
+ * Serialize a JSON Schema object for the WebLLM `response_format.schema`
976
+ * field.
977
+ *
978
+ * WebLLM expects the schema as a JSON-encoded string (xgrammar parses it
979
+ * server-side). Validates the shape via {@link assertJsonSchema} first.
980
+ *
981
+ * @param schema - JSON Schema object.
982
+ * @returns The schema serialized as a JSON string.
983
+ * @throws StructuredOutputError when `schema` is not a recognizable JSON
984
+ * Schema shape.
985
+ */
986
+ export declare function serializeJsonSchema(schema: unknown): string;
987
+
988
+ /**
989
+ * Thrown when structured output (JSON mode or JSON Schema constrained
990
+ * decoding) fails to parse as valid JSON.
991
+ *
992
+ * Wraps the underlying `SyntaxError` from `JSON.parse` so consumers can
993
+ * distinguish SDK-issued failures from unrelated runtime exceptions.
994
+ */
995
+ export declare class StructuredOutputError extends LocalmWebError {
996
+ }
997
+
859
998
  /**
860
999
  * Wrap an async iterable so that each `TokenChunk` is also passed to a
861
1000
  * caller-supplied side-effect callback before being yielded downstream.
@@ -878,6 +1017,33 @@ export declare interface TokenChunk {
878
1017
  done: boolean;
879
1018
  }
880
1019
 
1020
+ /**
1021
+ * Inference engine backed by
1022
+ * [`@huggingface/transformers`](https://github.com/huggingface/transformers.js)
1023
+ * (transformers.js).
1024
+ *
1025
+ * Used by the SDK as the **fallback path** for browsers without WebGPU and as
1026
+ * an explicit alternative backend selectable via `LMTaskCreateOptions.backend`.
1027
+ * It runs ONNX models on WebGPU when available and on WASM-SIMD otherwise, so
1028
+ * a wider range of browsers can run language models with a graceful — if
1029
+ * slower — degrade.
1030
+ *
1031
+ * The package is an optional peer dependency; import it on the consumer side
1032
+ * before instantiating tasks that resolve to this backend.
1033
+ */
1034
+ export declare class TransformersTextEngine implements Engine {
1035
+ private generator;
1036
+ private currentAbortController;
1037
+ isLoaded(): boolean;
1038
+ load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
1039
+ generate(messages: Message[], options?: GenerationOptions): Promise<string>;
1040
+ stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
1041
+ complete(prompt: string, options?: GenerationOptions): Promise<string>;
1042
+ streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
1043
+ unload(): Promise<void>;
1044
+ private requireGenerator;
1045
+ }
1046
+
881
1047
  /** Thrown when a model id is not present in the curated registry. */
882
1048
  export declare class UnknownModelError extends LocalmWebError {
883
1049
  }
@@ -889,6 +1055,24 @@ export declare const VERSION: string;
889
1055
  export declare class WebGPUUnavailableError extends LocalmWebError {
890
1056
  }
891
1057
 
1058
+ /**
1059
+ * Inference engine backed by [WebLLM (MLC)](https://github.com/mlc-ai/web-llm).
1060
+ *
1061
+ * Requires WebGPU. The fallback path planned for v0.5 will route to ORT-Web
1062
+ * when WebGPU is missing.
1063
+ */
1064
+ export declare class WebLLMEngine implements Engine {
1065
+ private engine;
1066
+ isLoaded(): boolean;
1067
+ load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
1068
+ generate(messages: Message[], options?: GenerationOptions): Promise<string>;
1069
+ stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
1070
+ complete(prompt: string, options?: GenerationOptions): Promise<string>;
1071
+ streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
1072
+ unload(): Promise<void>;
1073
+ private requireEngine;
1074
+ }
1075
+
892
1076
  /**
893
1077
  * Engine implementation that proxies all calls to a Web Worker.
894
1078
  *