localm-web 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sources":["../src/core/exceptions.ts","../src/core/webllm-engine.ts","../src/presets/models.ts","../src/tasks/lm-task.ts","../src/results.ts","../src/tasks/chat.ts","../src/streaming/token-stream.ts","../src/index.ts"],"sourcesContent":["/**\n * Error hierarchy for localm-web.\n *\n * All errors thrown by the SDK extend `LocalmWebError` so consumers can\n * distinguish SDK errors from unrelated runtime errors with a single\n * `instanceof` check.\n */\n\n/** Base class for every error raised by localm-web. */\nexport class LocalmWebError extends Error {\n /**\n * @param message - Human-readable description of the error.\n * @param cause - Underlying error, if any.\n */\n constructor(\n message: string,\n public readonly cause?: unknown\n ) {\n super(message);\n this.name = new.target.name;\n }\n}\n\n/** Thrown when WebGPU is required but not available in the host browser. */\nexport class WebGPUUnavailableError extends LocalmWebError {}\n\n/** Thrown when a model fails to load (network, parsing, runtime init). */\nexport class ModelLoadError extends LocalmWebError {}\n\n/** Thrown when an inference call is made before a model has loaded. */\nexport class ModelNotLoadedError extends LocalmWebError {}\n\n/** Thrown when a model id is not present in the curated registry. */\nexport class UnknownModelError extends LocalmWebError {}\n\n/** Thrown when generation is aborted via an `AbortSignal`. */\nexport class GenerationAbortedError extends LocalmWebError {}\n\n/** Thrown when the browser denies storage quota for the model cache. */\nexport class QuotaExceededError extends LocalmWebError {}\n\n/** Thrown when no usable backend is available on the current platform. */\nexport class BackendNotAvailableError extends LocalmWebError {}\n","import type { Engine } from \"./engine\";\nimport type { GenerationOptions, Message, ProgressCallback, TokenChunk } from \"../types\";\nimport {\n GenerationAbortedError,\n ModelLoadError,\n ModelNotLoadedError,\n WebGPUUnavailableError,\n} from \"./exceptions\";\n\ntype WebLLMModule = typeof import(\"@mlc-ai/web-llm\");\ntype MLCEngine = import(\"@mlc-ai/web-llm\").MLCEngineInterface;\ntype ChatCompletionMessageParam = import(\"@mlc-ai/web-llm\").ChatCompletionMessageParam;\n\nlet webllmModulePromise: Promise<WebLLMModule> | null = null;\n\nasync function loadWebLLM(): Promise<WebLLMModule> {\n if (!webllmModulePromise) {\n webllmModulePromise = import(\"@mlc-ai/web-llm\");\n }\n return webllmModulePromise;\n}\n\nfunction isWebGPUAvailable(): boolean {\n return typeof navigator !== \"undefined\" && \"gpu\" in navigator;\n}\n\ninterface SamplingParams {\n max_tokens?: number;\n temperature?: number;\n top_p?: number;\n}\n\nfunction buildSamplingParams(options: GenerationOptions): SamplingParams {\n const params: SamplingParams = {};\n if (options.maxTokens !== undefined) params.max_tokens = options.maxTokens;\n if (options.temperature !== undefined) params.temperature = options.temperature;\n if (options.topP !== undefined) params.top_p = options.topP;\n return params;\n}\n\nfunction toChatMessages(messages: Message[]): ChatCompletionMessageParam[] {\n return messages.map((m): ChatCompletionMessageParam => {\n switch (m.role) {\n case \"system\":\n return { role: \"system\", content: m.content };\n case \"user\":\n return { role: \"user\", content: m.content };\n case \"assistant\":\n return { role: \"assistant\", content: m.content };\n case \"tool\":\n return { role: \"tool\", content: m.content, tool_call_id: m.name ?? \"\" };\n }\n });\n}\n\n/**\n * Inference engine backed by [WebLLM (MLC)](https://github.com/mlc-ai/web-llm).\n *\n * Requires WebGPU. The fallback path planned for v0.5 will route to ORT-Web\n * when WebGPU is missing.\n */\nexport class WebLLMEngine implements Engine {\n private engine: MLCEngine | null = null;\n\n isLoaded(): boolean {\n return this.engine !== null;\n }\n\n async load(modelId: string, onProgress?: ProgressCallback): Promise<void> {\n if (!isWebGPUAvailable()) {\n throw new WebGPUUnavailableError(\n \"WebGPU is not available in this browser. The ORT-Web fallback is planned for v0.5.\"\n );\n }\n const webllm = await loadWebLLM();\n try {\n this.engine = await webllm.CreateMLCEngine(modelId, {\n initProgressCallback: (report): void => {\n onProgress?.({\n progress: report.progress,\n text: report.text,\n loaded: 0,\n total: 0,\n });\n },\n });\n } catch (err) {\n throw new ModelLoadError(`Failed to load model \"${modelId}\".`, err);\n }\n }\n\n async generate(messages: Message[], options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: false,\n });\n return completion.choices[0]?.message?.content ?? \"\";\n }\n\n async *stream(messages: Message[], options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: true,\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.delta?.content ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming generation failed.\", err);\n }\n }\n\n async unload(): Promise<void> {\n if (this.engine) {\n await this.engine.unload();\n this.engine = null;\n }\n }\n\n private requireEngine(): MLCEngine {\n if (!this.engine) {\n throw new ModelNotLoadedError(\"Engine not loaded. Call load() before generation.\");\n }\n return this.engine;\n }\n}\n","import type { ModelPreset } from \"../types\";\nimport { UnknownModelError } from \"../core/exceptions\";\n\n/**\n * Curated registry of supported models for v0.1.\n *\n * Each entry maps a friendly id (e.g. `\"phi-3.5-mini-int4\"`) to the underlying\n * runtime identifier and metadata. Friendly ids are stable; backend ids may\n * change as upstream MLC packages evolve.\n *\n * Only models that have been validated to load in browsers with WebGPU and\n * that fit the SLM target (≤ 4B parameters at INT4) are included.\n */\nexport const MODEL_PRESETS: Readonly<Record<string, ModelPreset>> = Object.freeze({\n \"phi-3.5-mini-int4\": {\n id: \"phi-3.5-mini-int4\",\n family: \"Phi-3.5\",\n parameters: \"3.8B\",\n quantization: \"q4f16_1\",\n webllmId: \"Phi-3.5-mini-instruct-q4f16_1-MLC\",\n contextWindow: 4096,\n description: \"Microsoft Phi-3.5 mini, INT4 quantized for browser inference.\",\n },\n \"llama-3.2-1b-int4\": {\n id: \"llama-3.2-1b-int4\",\n family: \"Llama-3.2\",\n parameters: \"1B\",\n quantization: \"q4f16_1\",\n webllmId: \"Llama-3.2-1B-Instruct-q4f16_1-MLC\",\n contextWindow: 4096,\n description: \"Meta Llama 3.2 1B Instruct, INT4 quantized.\",\n },\n \"qwen2.5-1.5b-int4\": {\n id: \"qwen2.5-1.5b-int4\",\n family: \"Qwen2.5\",\n parameters: \"1.5B\",\n quantization: \"q4f16_1\",\n webllmId: \"Qwen2.5-1.5B-Instruct-q4f16_1-MLC\",\n contextWindow: 4096,\n description: \"Alibaba Qwen 2.5 1.5B Instruct, INT4 quantized.\",\n },\n});\n\n/**\n * Resolve a friendly model id to its full preset metadata.\n *\n * @param modelId - Friendly id (e.g. `\"phi-3.5-mini-int4\"`).\n * @returns The matching preset.\n * @throws UnknownModelError if no preset matches.\n */\nexport function resolveModelPreset(modelId: string): ModelPreset {\n const preset = MODEL_PRESETS[modelId];\n if (!preset) {\n const available = Object.keys(MODEL_PRESETS).join(\", \");\n throw new UnknownModelError(`Unknown model \"${modelId}\". Available models: ${available}.`);\n }\n return preset;\n}\n\n/** Return the list of supported friendly model ids. */\nexport function listSupportedModels(): string[] {\n return Object.keys(MODEL_PRESETS);\n}\n","import type { Engine } from \"../core/engine\";\nimport { WebLLMEngine } from \"../core/webllm-engine\";\nimport { resolveModelPreset } from \"../presets/models\";\nimport type { ModelPreset, ProgressCallback } from \"../types\";\n\n/** Common options accepted by every task's `create()` factory. */\nexport interface LMTaskCreateOptions {\n /** Optional callback for model load progress updates. */\n onProgress?: ProgressCallback;\n /**\n * Override the engine used for inference. Intended for testing.\n * Production callers should let the SDK pick a backend automatically.\n */\n engine?: Engine;\n}\n\n/** Internal payload returned by {@link LMTask.createEngine}. */\nexport interface ResolvedEngine {\n engine: Engine;\n preset: ModelPreset;\n}\n\n/**\n * Base class shared by all language-model tasks (`Chat` for v0.1; `Completion`,\n * `Embeddings` and `Reranker` planned for later versions).\n *\n * The base owns:\n * - resolving a friendly model id to a {@link ModelPreset};\n * - selecting and loading an {@link Engine} (defaulting to WebLLM);\n * - exposing `unload()` for cleanup.\n *\n * Subclasses add task-specific public methods (`send`, `stream`, etc.).\n */\nexport abstract class LMTask {\n protected constructor(\n /** Engine used for inference. */\n protected readonly engine: Engine,\n /** Resolved metadata for the loaded model. */\n public readonly preset: ModelPreset\n ) {}\n\n /**\n * Load a model into a backend and return the wired-up engine + preset.\n *\n * Subclasses call this from their static `create()` factories.\n *\n * @param modelId - Friendly model id from the registry.\n * @param options - Task creation options.\n */\n protected static async createEngine(\n modelId: string,\n options: LMTaskCreateOptions = {}\n ): Promise<ResolvedEngine> {\n const preset = resolveModelPreset(modelId);\n const engine = options.engine ?? new WebLLMEngine();\n if (!engine.isLoaded()) {\n await engine.load(preset.webllmId, options.onProgress);\n }\n return { engine, preset };\n }\n\n /** Release engine resources. Safe to call multiple times. */\n async unload(): Promise<void> {\n await this.engine.unload();\n }\n\n /** Whether the underlying engine has a loaded model. */\n isLoaded(): boolean {\n return this.engine.isLoaded();\n }\n}\n","import type { FinishReason, Message } from \"./types\";\n\n/**\n * Result returned by `Chat.send()`.\n *\n * Holds the assistant's textual reply, the structured assistant message\n * (already appended to the chat history), and metadata about the generation.\n */\nexport class ChatReply {\n constructor(\n /** The assistant's reply text. */\n public readonly text: string,\n /** The structured assistant message (already appended to chat history). */\n public readonly message: Message,\n /** Number of tokens generated. 0 when the engine does not report it. */\n public readonly tokensGenerated: number,\n /** Why the generation loop stopped. */\n public readonly finishReason: FinishReason\n ) {}\n}\n","import { LMTask, type LMTaskCreateOptions } from \"./lm-task\";\nimport type { Engine } from \"../core/engine\";\nimport { ChatReply } from \"../results\";\nimport type { GenerationOptions, Message, ModelPreset, TokenChunk } from \"../types\";\n\n/**\n * Multi-turn chat task.\n *\n * Maintains an in-memory conversation history and applies the chat template\n * configured for the loaded model. Use {@link Chat.create} to construct an\n * instance — the constructor is private.\n *\n * @example\n * ```ts\n * const chat = await Chat.create(\"phi-3.5-mini-int4\");\n * const reply = await chat.send(\"Explain ONNX in one sentence.\");\n * console.log(reply.text);\n * ```\n *\n * @example Streaming\n * ```ts\n * const controller = new AbortController();\n * for await (const token of chat.stream(\"Explain ONNX.\", { signal: controller.signal })) {\n * process.stdout.write(token.text);\n * }\n * ```\n */\nexport class Chat extends LMTask {\n private readonly history: Message[] = [];\n private systemPrompt: string | null = null;\n\n private constructor(engine: Engine, preset: ModelPreset) {\n super(engine, preset);\n }\n\n /**\n * Create and load a `Chat` task for the given model.\n *\n * @param modelId - Friendly model id from the registry (e.g. `\"phi-3.5-mini-int4\"`).\n * @param options - Optional creation options (progress callback, engine override).\n */\n static async create(modelId: string, options: LMTaskCreateOptions = {}): Promise<Chat> {\n const { engine, preset } = await LMTask.createEngine(modelId, options);\n return new Chat(engine, preset);\n }\n\n /** Set or replace the system prompt prepended to every conversation. */\n setSystemPrompt(prompt: string): void {\n this.systemPrompt = prompt;\n }\n\n /** Clear the system prompt. */\n clearSystemPrompt(): void {\n this.systemPrompt = null;\n }\n\n /** Reset the conversation history. The system prompt is preserved. */\n resetHistory(): void {\n this.history.length = 0;\n }\n\n /** A read-only snapshot of the conversation history. */\n getHistory(): readonly Message[] {\n return this.history.slice();\n }\n\n /**\n * Send a user message and await the full assistant reply.\n *\n * The user message and the assistant reply are appended to the history.\n *\n * @param message - The user-facing message text.\n * @param options - Generation options.\n * @returns A {@link ChatReply} with the assistant's reply.\n */\n async send(message: string, options: GenerationOptions = {}): Promise<ChatReply> {\n const messages = this.buildMessages(message);\n const text = await this.engine.generate(messages, options);\n const userMsg: Message = { role: \"user\", content: message };\n const assistantMsg: Message = { role: \"assistant\", content: text };\n this.history.push(userMsg, assistantMsg);\n return new ChatReply(text, assistantMsg, 0, \"stop\");\n }\n\n /**\n * Stream the assistant reply token-by-token as an async iterable.\n *\n * The full reply is appended to the history when the stream completes\n * normally. If the stream is aborted, neither message is appended.\n *\n * @param message - The user-facing message text.\n * @param options - Generation options including an optional `signal`.\n */\n async *stream(message: string, options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const messages = this.buildMessages(message);\n const userMsg: Message = { role: \"user\", content: message };\n let acc: string = \"\";\n for await (const chunk of this.engine.stream(messages, options)) {\n acc += chunk.text;\n yield chunk;\n }\n const assistantMsg: Message = { role: \"assistant\", content: acc };\n this.history.push(userMsg, assistantMsg);\n }\n\n private buildMessages(userMessage: string): Message[] {\n const messages: Message[] = [];\n if (this.systemPrompt) {\n messages.push({ role: \"system\", content: this.systemPrompt });\n }\n messages.push(...this.history);\n messages.push({ role: \"user\", content: userMessage });\n return messages;\n }\n}\n","import type { TokenChunk } from \"../types\";\n\n/**\n * Drain an async iterable of token chunks into a single string.\n *\n * Useful in tests, for non-streaming consumers, and as a one-line way to\n * reconstruct the final text from a `Chat.stream(...)` call.\n *\n * @param stream - The token-chunk async iterable to consume.\n * @returns The concatenation of every chunk's `text` field.\n */\nexport async function collectStream(stream: AsyncIterable<TokenChunk>): Promise<string> {\n let acc: string = \"\";\n for await (const chunk of stream) {\n acc += chunk.text;\n }\n return acc;\n}\n\n/**\n * Wrap an async iterable so that each `TokenChunk` is also passed to a\n * caller-supplied side-effect callback before being yielded downstream.\n *\n * This is intentionally a passthrough — it does not buffer.\n *\n * @param stream - The upstream token-chunk async iterable.\n * @param onChunk - Side-effect invoked for every chunk.\n * @returns A new async iterable yielding the same chunks.\n */\nexport async function* tap(\n stream: AsyncIterable<TokenChunk>,\n onChunk: (chunk: TokenChunk) => void\n): AsyncIterable<TokenChunk> {\n for await (const chunk of stream) {\n onChunk(chunk);\n yield chunk;\n }\n}\n","/**\n * localm-web — browser-only TypeScript SDK for running LLMs and SLMs locally.\n *\n * Public API surface for v0.1.\n *\n * @packageDocumentation\n */\n\nexport { Chat } from \"./tasks/chat\";\nexport { LMTask } from \"./tasks/lm-task\";\nexport type { LMTaskCreateOptions } from \"./tasks/lm-task\";\n\nexport { ChatReply } from \"./results\";\n\nexport { MODEL_PRESETS, resolveModelPreset, listSupportedModels } from \"./presets/models\";\n\nexport {\n LocalmWebError,\n WebGPUUnavailableError,\n ModelLoadError,\n ModelNotLoadedError,\n UnknownModelError,\n GenerationAbortedError,\n QuotaExceededError,\n BackendNotAvailableError,\n} from \"./core/exceptions\";\n\nexport type { Engine } from \"./core/engine\";\n\nexport { collectStream, tap } from \"./streaming/token-stream\";\n\nexport type {\n Role,\n FinishReason,\n Message,\n GenerationOptions,\n ModelLoadProgress,\n ProgressCallback,\n TokenChunk,\n ModelPreset,\n} from \"./types\";\n\n/** Current package version. Updated at release time. */\nexport const VERSION: string = \"0.1.0\";\n"],"names":[],"mappings":"AASO,MAAM,uBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKxC,YACE,SACgB,OAChB;AACA,UAAM,OAAO;AAFG,SAAA,QAAA;AAGhB,SAAK,OAAO,WAAW;AAAA,EACzB;AACF;AAGO,MAAM,+BAA+B,eAAe;AAAC;AAGrD,MAAM,uBAAuB,eAAe;AAAC;AAG7C,MAAM,4BAA4B,eAAe;AAAC;AAGlD,MAAM,0BAA0B,eAAe;AAAC;AAGhD,MAAM,+BAA+B,eAAe;AAAC;AAGrD,MAAM,2BAA2B,eAAe;AAAC;AAGjD,MAAM,iCAAiC,eAAe;AAAC;AC7B9D,IAAI,sBAAoD;AAExD,eAAe,aAAoC;AACjD,MAAI,CAAC,qBAAqB;AACxB,0BAAsB,OAAO,iBAAiB;AAAA,EAChD;AACA,SAAO;AACT;AAEA,SAAS,oBAA6B;AACpC,SAAO,OAAO,cAAc,eAAe,SAAS;AACtD;AAQA,SAAS,oBAAoB,SAA4C;AACvE,QAAM,SAAyB,CAAA;AAC/B,MAAI,QAAQ,cAAc,OAAW,QAAO,aAAa,QAAQ;AACjE,MAAI,QAAQ,gBAAgB,OAAW,QAAO,cAAc,QAAQ;AACpE,MAAI,QAAQ,SAAS,OAAW,QAAO,QAAQ,QAAQ;AACvD,SAAO;AACT;AAEA,SAAS,eAAe,UAAmD;AACzE,SAAO,SAAS,IAAI,CAAC,MAAkC;AACrD,YAAQ,EAAE,MAAA;AAAA,MACR,KAAK;AACH,eAAO,EAAE,MAAM,UAAU,SAAS,EAAE,QAAA;AAAA,MACtC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,QAAA;AAAA,MACpC,KAAK;AACH,eAAO,EAAE,MAAM,aAAa,SAAS,EAAE,QAAA;AAAA,MACzC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,SAAS,cAAc,EAAE,QAAQ,GAAA;AAAA,IAAG;AAAA,EAE5E,CAAC;AACH;AAQO,MAAM,aAA+B;AAAA,EAClC,SAA2B;AAAA,EAEnC,WAAoB;AAClB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,MAAM,KAAK,SAAiB,YAA8C;AACxE,QAAI,CAAC,qBAAqB;AACxB,YAAM,IAAI;AAAA,QACR;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,SAAS,MAAM,WAAA;AACrB,QAAI;AACF,WAAK,SAAS,MAAM,OAAO,gBAAgB,SAAS;AAAA,QAClD,sBAAsB,CAAC,WAAiB;AACtC,uBAAa;AAAA,YACX,UAAU,OAAO;AAAA,YACjB,MAAM,OAAO;AAAA,YACb,QAAQ;AAAA,YACR,OAAO;AAAA,UAAA,CACR;AAAA,QACH;AAAA,MAAA,CACD;AAAA,IACH,SAAS,KAAK;AACZ,YAAM,IAAI,eAAe,yBAAyB,OAAO,MAAM,GAAG;AAAA,IACpE;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAqB,UAA6B,IAAqB;AACpF,UAAM,SAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,aAAa,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,IAAA,CACT;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,SAAS,WAAW;AAAA,EACpD;AAAA,EAEA,OAAO,OAAO,UAAqB,UAA6B,IAA+B;AAC7F,UAAM,SAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,aAAa,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,IAAA,CACT;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,OAAO,WAAW;AACxC,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAwB;AAC5B,QAAI,KAAK,QAAQ;AACf,YAAM,KAAK,OAAO,OAAA;AAClB,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEQ,gBAA2B;AACjC,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,IAAI,oBAAoB,mDAAmD;AAAA,IACnF;AACA,WAAO,KAAK;AAAA,EACd;AACF;AC9IO,MAAM,gBAAuD,OAAO,OAAO;AAAA,EAChF,qBAAqB;AAAA,IACnB,IAAI;AAAA,IACJ,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ,cAAc;AAAA,IACd,UAAU;AAAA,IACV,eAAe;AAAA,IACf,aAAa;AAAA,EAAA;AAAA,EAEf,qBAAqB;AAAA,IACnB,IAAI;AAAA,IACJ,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ,cAAc;AAAA,IACd,UAAU;AAAA,IACV,eAAe;AAAA,IACf,aAAa;AAAA,EAAA;AAAA,EAEf,qBAAqB;AAAA,IACnB,IAAI;AAAA,IACJ,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ,cAAc;AAAA,IACd,UAAU;AAAA,IACV,eAAe;AAAA,IACf,aAAa;AAAA,EAAA;AAEjB,CAAC;AASM,SAAS,mBAAmB,SAA8B;AAC/D,QAAM,SAAS,cAAc,OAAO;AACpC,MAAI,CAAC,QAAQ;AACX,UAAM,YAAY,OAAO,KAAK,aAAa,EAAE,KAAK,IAAI;AACtD,UAAM,IAAI,kBAAkB,kBAAkB,OAAO,wBAAwB,SAAS,GAAG;AAAA,EAC3F;AACA,SAAO;AACT;AAGO,SAAS,sBAAgC;AAC9C,SAAO,OAAO,KAAK,aAAa;AAClC;AC7BO,MAAe,OAAO;AAAA,EACjB,YAEW,QAEH,QAChB;AAHmB,SAAA,SAAA;AAEH,SAAA,SAAA;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUH,aAAuB,aACrB,SACA,UAA+B,IACN;AACzB,UAAM,SAAS,mBAAmB,OAAO;AACzC,UAAM,SAAS,QAAQ,UAAU,IAAI,aAAA;AACrC,QAAI,CAAC,OAAO,YAAY;AACtB,YAAM,OAAO,KAAK,OAAO,UAAU,QAAQ,UAAU;AAAA,IACvD;AACA,WAAO,EAAE,QAAQ,OAAA;AAAA,EACnB;AAAA;AAAA,EAGA,MAAM,SAAwB;AAC5B,UAAM,KAAK,OAAO,OAAA;AAAA,EACpB;AAAA;AAAA,EAGA,WAAoB;AAClB,WAAO,KAAK,OAAO,SAAA;AAAA,EACrB;AACF;AC9DO,MAAM,UAAU;AAAA,EACrB,YAEkB,MAEA,SAEA,iBAEA,cAChB;AAPgB,SAAA,OAAA;AAEA,SAAA,UAAA;AAEA,SAAA,kBAAA;AAEA,SAAA,eAAA;AAAA,EACf;AACL;ACQO,MAAM,aAAa,OAAO;AAAA,EACd,UAAqB,CAAA;AAAA,EAC9B,eAA8B;AAAA,EAE9B,YAAY,QAAgB,QAAqB;AACvD,UAAM,QAAQ,MAAM;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,aAAa,OAAO,SAAiB,UAA+B,IAAmB;AACrF,UAAM,EAAE,QAAQ,OAAA,IAAW,MAAM,OAAO,aAAa,SAAS,OAAO;AACrE,WAAO,IAAI,KAAK,QAAQ,MAAM;AAAA,EAChC;AAAA;AAAA,EAGA,gBAAgB,QAAsB;AACpC,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,oBAA0B;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,eAAqB;AACnB,SAAK,QAAQ,SAAS;AAAA,EACxB;AAAA;AAAA,EAGA,aAAiC;AAC/B,WAAO,KAAK,QAAQ,MAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,KAAK,SAAiB,UAA6B,IAAwB;AAC/E,UAAM,WAAW,KAAK,cAAc,OAAO;AAC3C,UAAM,OAAO,MAAM,KAAK,OAAO,SAAS,UAAU,OAAO;AACzD,UAAM,UAAmB,EAAE,MAAM,QAAQ,SAAS,QAAA;AAClD,UAAM,eAAwB,EAAE,MAAM,aAAa,SAAS,KAAA;AAC5D,SAAK,QAAQ,KAAK,SAAS,YAAY;AACvC,WAAO,IAAI,UAAU,MAAM,cAAc,GAAG,MAAM;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,OAAO,OAAO,SAAiB,UAA6B,IAA+B;AACzF,UAAM,WAAW,KAAK,cAAc,OAAO;AAC3C,UAAM,UAAmB,EAAE,MAAM,QAAQ,SAAS,QAAA;AAClD,QAAI,MAAc;AAClB,qBAAiB,SAAS,KAAK,OAAO,OAAO,UAAU,OAAO,GAAG;AAC/D,aAAO,MAAM;AACb,YAAM;AAAA,IACR;AACA,UAAM,eAAwB,EAAE,MAAM,aAAa,SAAS,IAAA;AAC5D,SAAK,QAAQ,KAAK,SAAS,YAAY;AAAA,EACzC;AAAA,EAEQ,cAAc,aAAgC;AACpD,UAAM,WAAsB,CAAA;AAC5B,QAAI,KAAK,cAAc;AACrB,eAAS,KAAK,EAAE,MAAM,UAAU,SAAS,KAAK,cAAc;AAAA,IAC9D;AACA,aAAS,KAAK,GAAG,KAAK,OAAO;AAC7B,aAAS,KAAK,EAAE,MAAM,QAAQ,SAAS,aAAa;AACpD,WAAO;AAAA,EACT;AACF;ACvGA,eAAsB,cAAc,QAAoD;AACtF,MAAI,MAAc;AAClB,mBAAiB,SAAS,QAAQ;AAChC,WAAO,MAAM;AAAA,EACf;AACA,SAAO;AACT;AAYA,gBAAuB,IACrB,QACA,SAC2B;AAC3B,mBAAiB,SAAS,QAAQ;AAChC,YAAQ,KAAK;AACb,UAAM;AAAA,EACR;AACF;ACMO,MAAM,UAAkB;"}
1
+ {"version":3,"file":"index.js","sources":["../src/core/load-phase.ts","../src/core/exceptions.ts","../src/core/webllm-engine.ts","../src/worker/protocol.ts","../src/core/worker-engine.ts","../src/presets/models.ts","../src/worker/create-worker.ts","../src/tasks/lm-task.ts","../src/results.ts","../src/tasks/chat.ts","../src/tasks/completion.ts","../src/cache/model-cache.ts","../src/streaming/token-stream.ts","../src/index.ts"],"sourcesContent":["import type { ModelLoadPhase } from \"../types\";\n\nconst DOWNLOAD_PATTERN: RegExp = /\\b(fetch|download|loading from cache|cache hit|param)/i;\nconst COMPILE_PATTERN: RegExp = /\\b(compil|shader|kernel|tensor|init|allocat|warm)/i;\n\n/**\n * Classify a runtime status text into a {@link ModelLoadPhase}.\n *\n * Heuristic: match download-related verbs first (network or cache hits are\n * treated as `downloading`), then compile-related verbs. Anything else falls\n * back to the generic `loading` bucket. The `ready` phase is never returned\n * here — callers emit it explicitly when the load resolves.\n *\n * @param text - The raw status string from the runtime.\n * @returns The classified phase.\n */\nexport function classifyLoadPhase(text: string): ModelLoadPhase {\n if (DOWNLOAD_PATTERN.test(text)) return \"downloading\";\n if (COMPILE_PATTERN.test(text)) return \"compiling\";\n return \"loading\";\n}\n","/**\n * Error hierarchy for localm-web.\n *\n * All errors thrown by the SDK extend `LocalmWebError` so consumers can\n * distinguish SDK errors from unrelated runtime errors with a single\n * `instanceof` check.\n */\n\n/** Base class for every error raised by localm-web. */\nexport class LocalmWebError extends Error {\n /**\n * @param message - Human-readable description of the error.\n * @param cause - Underlying error, if any.\n */\n constructor(\n message: string,\n public readonly cause?: unknown\n ) {\n super(message);\n this.name = new.target.name;\n }\n}\n\n/** Thrown when WebGPU is required but not available in the host browser. */\nexport class WebGPUUnavailableError extends LocalmWebError {}\n\n/** Thrown when a model fails to load (network, parsing, runtime init). */\nexport class ModelLoadError extends LocalmWebError {}\n\n/** Thrown when an inference call is made before a model has loaded. */\nexport class ModelNotLoadedError extends LocalmWebError {}\n\n/** Thrown when a model id is not present in the curated registry. */\nexport class UnknownModelError extends LocalmWebError {}\n\n/** Thrown when generation is aborted via an `AbortSignal`. */\nexport class GenerationAbortedError extends LocalmWebError {}\n\n/** Thrown when the browser denies storage quota for the model cache. */\nexport class QuotaExceededError extends LocalmWebError {}\n\n/** Thrown when no usable backend is available on the current platform. */\nexport class BackendNotAvailableError extends LocalmWebError {}\n","import type { Engine } from \"./engine\";\nimport { classifyLoadPhase } from \"./load-phase\";\nimport type { GenerationOptions, Message, ProgressCallback, TokenChunk } from \"../types\";\nimport {\n GenerationAbortedError,\n ModelLoadError,\n ModelNotLoadedError,\n WebGPUUnavailableError,\n} from \"./exceptions\";\n\ntype WebLLMModule = typeof import(\"@mlc-ai/web-llm\");\ntype MLCEngine = import(\"@mlc-ai/web-llm\").MLCEngineInterface;\ntype ChatCompletionMessageParam = import(\"@mlc-ai/web-llm\").ChatCompletionMessageParam;\n\nlet webllmModulePromise: Promise<WebLLMModule> | null = null;\n\nasync function loadWebLLM(): Promise<WebLLMModule> {\n if (!webllmModulePromise) {\n webllmModulePromise = import(\"@mlc-ai/web-llm\");\n }\n return webllmModulePromise;\n}\n\nfunction isWebGPUAvailable(): boolean {\n return typeof navigator !== \"undefined\" && \"gpu\" in navigator;\n}\n\ninterface SamplingParams {\n max_tokens?: number;\n temperature?: number;\n top_p?: number;\n}\n\nfunction buildSamplingParams(options: GenerationOptions): SamplingParams {\n const params: SamplingParams = {};\n if (options.maxTokens !== undefined) params.max_tokens = options.maxTokens;\n if (options.temperature !== undefined) params.temperature = options.temperature;\n if (options.topP !== undefined) params.top_p = options.topP;\n return params;\n}\n\nfunction toChatMessages(messages: Message[]): ChatCompletionMessageParam[] {\n return messages.map((m): ChatCompletionMessageParam => {\n switch (m.role) {\n case \"system\":\n return { role: \"system\", content: m.content };\n case \"user\":\n return { role: \"user\", content: m.content };\n case \"assistant\":\n return { role: \"assistant\", content: m.content };\n case \"tool\":\n return { role: \"tool\", content: m.content, tool_call_id: m.name ?? \"\" };\n }\n });\n}\n\n/**\n * Inference engine backed by [WebLLM (MLC)](https://github.com/mlc-ai/web-llm).\n *\n * Requires WebGPU. The fallback path planned for v0.5 will route to ORT-Web\n * when WebGPU is missing.\n */\nexport class WebLLMEngine implements Engine {\n private engine: MLCEngine | null = null;\n\n isLoaded(): boolean {\n return this.engine !== null;\n }\n\n async load(modelId: string, onProgress?: ProgressCallback): Promise<void> {\n if (!isWebGPUAvailable()) {\n throw new WebGPUUnavailableError(\n \"WebGPU is not available in this browser. The ORT-Web fallback is planned for v0.5.\"\n );\n }\n const webllm = await loadWebLLM();\n try {\n this.engine = await webllm.CreateMLCEngine(modelId, {\n initProgressCallback: (report): void => {\n onProgress?.({\n progress: report.progress,\n text: report.text,\n loaded: 0,\n total: 0,\n phase: classifyLoadPhase(report.text),\n });\n },\n });\n onProgress?.({\n progress: 1,\n text: \"Model ready.\",\n loaded: 0,\n total: 0,\n phase: \"ready\",\n });\n } catch (err) {\n throw new ModelLoadError(`Failed to load model \"${modelId}\".`, err);\n }\n }\n\n async generate(messages: Message[], options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: false,\n });\n return completion.choices[0]?.message?.content ?? \"\";\n }\n\n async *stream(messages: Message[], options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const completion = await engine.chat.completions.create({\n ...buildSamplingParams(options),\n messages: toChatMessages(messages),\n stream: true,\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.delta?.content ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming generation failed.\", err);\n }\n }\n\n async complete(prompt: string, options: GenerationOptions = {}): Promise<string> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const completion = await engine.completions.create({\n ...buildSamplingParams(options),\n prompt,\n stream: false,\n });\n return completion.choices[0]?.text ?? \"\";\n }\n\n async *streamCompletion(\n prompt: string,\n options: GenerationOptions = {}\n ): AsyncIterable<TokenChunk> {\n const engine = this.requireEngine();\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted before start.\");\n }\n const completion = await engine.completions.create({\n ...buildSamplingParams(options),\n prompt,\n stream: true,\n });\n let index: number = 0;\n let finished: boolean = false;\n try {\n for await (const chunk of completion) {\n if (options.signal?.aborted) {\n throw new GenerationAbortedError(\"Generation aborted by signal.\");\n }\n const choice = chunk.choices[0];\n const delta = choice?.text ?? \"\";\n if (delta) {\n yield { text: delta, index, done: false };\n index += 1;\n }\n if (choice?.finish_reason) {\n finished = true;\n yield { text: \"\", index, done: true };\n index += 1;\n }\n }\n if (!finished) {\n yield { text: \"\", index, done: true };\n }\n } catch (err) {\n if (err instanceof GenerationAbortedError) throw err;\n throw new ModelLoadError(\"Streaming completion failed.\", err);\n }\n }\n\n async unload(): Promise<void> {\n if (this.engine) {\n await this.engine.unload();\n this.engine = null;\n }\n }\n\n private requireEngine(): MLCEngine {\n if (!this.engine) {\n throw new ModelNotLoadedError(\"Engine not loaded. Call load() before generation.\");\n }\n return this.engine;\n }\n}\n","import type { GenerationOptions, Message, ModelLoadProgress, TokenChunk } from \"../types\";\n\n/**\n * Subset of {@link GenerationOptions} that survives `postMessage`.\n *\n * `AbortSignal` cannot be cloned across the worker boundary, so it is replaced\n * by a separate {@link AbortRequest} message keyed on the same operation id.\n */\nexport type SerializableGenerationOptions = Omit<GenerationOptions, \"signal\">;\n\n/** Strip `signal` from a {@link GenerationOptions} before posting it. */\nexport function toSerializableOptions(\n options: GenerationOptions = {}\n): SerializableGenerationOptions {\n const { signal: _signal, ...rest } = options;\n void _signal;\n return rest;\n}\n\n/** Operation request sent from the main thread to the worker. */\nexport type WorkerRequest =\n | { op: \"load\"; id: number; modelId: string }\n | {\n op: \"generate\";\n id: number;\n messages: Message[];\n options: SerializableGenerationOptions;\n }\n | {\n op: \"stream\";\n id: number;\n messages: Message[];\n options: SerializableGenerationOptions;\n }\n | {\n op: \"complete\";\n id: number;\n prompt: string;\n options: SerializableGenerationOptions;\n }\n | {\n op: \"stream-completion\";\n id: number;\n prompt: string;\n options: SerializableGenerationOptions;\n }\n | { op: \"abort\"; id: number }\n | { op: \"unload\"; id: number }\n | { op: \"isLoaded\"; id: number };\n\n/** Operation response sent from the worker back to the main thread. */\nexport type WorkerResponse =\n | { op: \"loaded\"; id: number }\n | { op: \"generated\"; id: number; text: string }\n | { op: \"progress\"; id: number; payload: ModelLoadProgress }\n | { op: \"token\"; id: number; chunk: TokenChunk }\n | { op: \"stream-end\"; id: number }\n | { op: \"error\"; id: number; name: string; message: string }\n | { op: \"unloaded\"; id: number }\n | { op: \"is-loaded\"; id: number; value: boolean };\n\n/** Subset of `Worker` we depend on. Lets tests inject a mock. */\nexport interface WorkerLike {\n postMessage(message: WorkerRequest): void;\n addEventListener(type: \"message\", listener: (event: MessageEvent<WorkerResponse>) => void): void;\n removeEventListener(\n type: \"message\",\n listener: (event: MessageEvent<WorkerResponse>) => void\n ): void;\n terminate(): void;\n}\n\n/** Internal alias used when the message direction is irrelevant (logging, debug). */\nexport type AbortRequest = Extract<WorkerRequest, { op: \"abort\" }>;\n","import { GenerationAbortedError, ModelLoadError, ModelNotLoadedError } from \"./exceptions\";\nimport type { Engine } from \"./engine\";\nimport type { GenerationOptions, Message, ProgressCallback, TokenChunk } from \"../types\";\nimport {\n toSerializableOptions,\n type WorkerLike,\n type WorkerRequest,\n type WorkerResponse,\n} from \"../worker/protocol\";\n\ninterface PendingGenerate {\n resolve: (text: string) => void;\n reject: (err: Error) => void;\n}\n\ninterface PendingStream {\n push: (chunk: TokenChunk) => void;\n end: () => void;\n fail: (err: Error) => void;\n}\n\n/**\n * Engine implementation that proxies all calls to a Web Worker.\n *\n * The worker holds the actual {@link WebLLMEngine}; this class is a thin RPC\n * shell that serializes requests, tracks pending operations by a numeric id,\n * and turns worker responses back into Promises and async iterables.\n *\n * Use {@link createInferenceWorker} to obtain a real worker. Tests can pass a\n * {@link WorkerLike} mock implementing the same `postMessage` /\n * `addEventListener` surface.\n */\nexport class WorkerEngine implements Engine {\n private nextId: number = 1;\n private loaded: boolean = false;\n private currentLoad: { resolve: () => void; reject: (e: Error) => void } | null = null;\n private currentLoadId: number = 0;\n private currentLoadProgress: ProgressCallback | undefined = undefined;\n private currentUnload: { resolve: () => void; reject: (e: Error) => void } | null = null;\n private currentUnloadId: number = 0;\n private pendingGenerates: Map<number, PendingGenerate> = new Map();\n private pendingStreams: Map<number, PendingStream> = new Map();\n\n private readonly listener: (event: MessageEvent<WorkerResponse>) => void;\n\n constructor(private readonly worker: WorkerLike) {\n this.listener = (event): void => this.handleMessage(event.data);\n this.worker.addEventListener(\"message\", this.listener);\n }\n\n isLoaded(): boolean {\n return this.loaded;\n }\n\n async load(modelId: string, onProgress?: ProgressCallback): Promise<void> {\n if (this.currentLoad) {\n throw new ModelLoadError(\"Another load is already in progress.\");\n }\n const id: number = this.allocateId();\n this.currentLoadId = id;\n this.currentLoadProgress = onProgress;\n return new Promise<void>((resolve, reject) => {\n this.currentLoad = { resolve, reject };\n this.send({ op: \"load\", id, modelId });\n });\n }\n\n async generate(messages: Message[], options: GenerationOptions = {}): Promise<string> {\n const id: number = this.allocateId();\n return new Promise<string>((resolve, reject) => {\n this.pendingGenerates.set(id, { resolve, reject });\n this.send({\n op: \"generate\",\n id,\n messages,\n options: toSerializableOptions(options),\n });\n options.signal?.addEventListener(\"abort\", () => this.send({ op: \"abort\", id }));\n });\n }\n\n async *stream(messages: Message[], options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const id: number = this.allocateId();\n const queue: TokenChunk[] = [];\n let done: boolean = false;\n let error: Error | null = null;\n let notify: (() => void) | null = null;\n\n const wakeup = (): void => {\n if (notify) {\n const fn = notify;\n notify = null;\n fn();\n }\n };\n\n this.pendingStreams.set(id, {\n push: (chunk): void => {\n queue.push(chunk);\n wakeup();\n },\n end: (): void => {\n done = true;\n wakeup();\n },\n fail: (err): void => {\n error = err;\n done = true;\n wakeup();\n },\n });\n\n this.send({\n op: \"stream\",\n id,\n messages,\n options: toSerializableOptions(options),\n });\n options.signal?.addEventListener(\"abort\", () => this.send({ op: \"abort\", id }));\n\n try {\n while (true) {\n if (queue.length > 0) {\n const chunk = queue.shift();\n if (chunk) yield chunk;\n continue;\n }\n if (error) throw error;\n if (done) return;\n await new Promise<void>((r) => {\n notify = r;\n });\n }\n } finally {\n this.pendingStreams.delete(id);\n }\n }\n\n async complete(prompt: string, options: GenerationOptions = {}): Promise<string> {\n const id: number = this.allocateId();\n return new Promise<string>((resolve, reject) => {\n this.pendingGenerates.set(id, { resolve, reject });\n this.send({\n op: \"complete\",\n id,\n prompt,\n options: toSerializableOptions(options),\n });\n options.signal?.addEventListener(\"abort\", () => this.send({ op: \"abort\", id }));\n });\n }\n\n async *streamCompletion(\n prompt: string,\n options: GenerationOptions = {}\n ): AsyncIterable<TokenChunk> {\n const id: number = this.allocateId();\n const queue: TokenChunk[] = [];\n let done: boolean = false;\n let error: Error | null = null;\n let notify: (() => void) | null = null;\n\n const wakeup = (): void => {\n if (notify) {\n const fn = notify;\n notify = null;\n fn();\n }\n };\n\n this.pendingStreams.set(id, {\n push: (chunk): void => {\n queue.push(chunk);\n wakeup();\n },\n end: (): void => {\n done = true;\n wakeup();\n },\n fail: (err): void => {\n error = err;\n done = true;\n wakeup();\n },\n });\n\n this.send({\n op: \"stream-completion\",\n id,\n prompt,\n options: toSerializableOptions(options),\n });\n options.signal?.addEventListener(\"abort\", () => this.send({ op: \"abort\", id }));\n\n try {\n while (true) {\n if (queue.length > 0) {\n const chunk = queue.shift();\n if (chunk) yield chunk;\n continue;\n }\n if (error) throw error;\n if (done) return;\n await new Promise<void>((r) => {\n notify = r;\n });\n }\n } finally {\n this.pendingStreams.delete(id);\n }\n }\n\n async unload(): Promise<void> {\n if (!this.loaded) return;\n if (this.currentUnload) {\n throw new ModelLoadError(\"Another unload is already in progress.\");\n }\n const id: number = this.allocateId();\n this.currentUnloadId = id;\n return new Promise<void>((resolve, reject) => {\n this.currentUnload = { resolve, reject };\n this.send({ op: \"unload\", id });\n });\n }\n\n /** Tear down the underlying worker. The engine is unusable after this. */\n terminate(): void {\n this.worker.removeEventListener(\"message\", this.listener);\n this.worker.terminate();\n this.loaded = false;\n }\n\n private allocateId(): number {\n const id = this.nextId;\n this.nextId += 1;\n return id;\n }\n\n private send(req: WorkerRequest): void {\n this.worker.postMessage(req);\n }\n\n private handleMessage(msg: WorkerResponse): void {\n switch (msg.op) {\n case \"loaded\":\n if (this.currentLoad && msg.id === this.currentLoadId) {\n this.loaded = true;\n this.currentLoad.resolve();\n this.currentLoad = null;\n this.currentLoadProgress = undefined;\n }\n return;\n case \"progress\":\n if (msg.id === this.currentLoadId) {\n this.currentLoadProgress?.(msg.payload);\n }\n return;\n case \"generated\": {\n const pending = this.pendingGenerates.get(msg.id);\n if (pending) {\n pending.resolve(msg.text);\n this.pendingGenerates.delete(msg.id);\n }\n return;\n }\n case \"token\": {\n const stream = this.pendingStreams.get(msg.id);\n stream?.push(msg.chunk);\n return;\n }\n case \"stream-end\": {\n const stream = this.pendingStreams.get(msg.id);\n stream?.end();\n return;\n }\n case \"unloaded\":\n if (this.currentUnload && msg.id === this.currentUnloadId) {\n this.loaded = false;\n this.currentUnload.resolve();\n this.currentUnload = null;\n }\n return;\n case \"is-loaded\":\n return;\n case \"error\": {\n const err = mapError(msg.name, msg.message);\n if (this.currentLoad && msg.id === this.currentLoadId) {\n this.currentLoad.reject(err);\n this.currentLoad = null;\n this.currentLoadProgress = undefined;\n return;\n }\n if (this.currentUnload && msg.id === this.currentUnloadId) {\n this.currentUnload.reject(err);\n this.currentUnload = null;\n return;\n }\n const generate = this.pendingGenerates.get(msg.id);\n if (generate) {\n generate.reject(err);\n this.pendingGenerates.delete(msg.id);\n return;\n }\n const stream = this.pendingStreams.get(msg.id);\n if (stream) {\n stream.fail(err);\n return;\n }\n return;\n }\n }\n }\n}\n\nfunction mapError(name: string, message: string): Error {\n switch (name) {\n case \"ModelLoadError\":\n return new ModelLoadError(message);\n case \"ModelNotLoadedError\":\n return new ModelNotLoadedError(message);\n case \"GenerationAbortedError\":\n return new GenerationAbortedError(message);\n default: {\n const err = new Error(message);\n err.name = name;\n return err;\n }\n }\n}\n","import type { ModelPreset } from \"../types\";\nimport { UnknownModelError } from \"../core/exceptions\";\n\n/**\n * Curated registry of supported models for v0.1.\n *\n * Each entry maps a friendly id (e.g. `\"phi-3.5-mini-int4\"`) to the underlying\n * runtime identifier and metadata. Friendly ids are stable; backend ids may\n * change as upstream MLC packages evolve.\n *\n * Only models that have been validated to load in browsers with WebGPU and\n * that fit the SLM target (≤ 4B parameters at INT4) are included.\n */\nexport const MODEL_PRESETS: Readonly<Record<string, ModelPreset>> = Object.freeze({\n \"phi-3.5-mini-int4\": {\n id: \"phi-3.5-mini-int4\",\n family: \"Phi-3.5\",\n parameters: \"3.8B\",\n quantization: \"q4f16_1\",\n webllmId: \"Phi-3.5-mini-instruct-q4f16_1-MLC\",\n contextWindow: 4096,\n description: \"Microsoft Phi-3.5 mini, INT4 quantized for browser inference.\",\n },\n \"llama-3.2-1b-int4\": {\n id: \"llama-3.2-1b-int4\",\n family: \"Llama-3.2\",\n parameters: \"1B\",\n quantization: \"q4f16_1\",\n webllmId: \"Llama-3.2-1B-Instruct-q4f16_1-MLC\",\n contextWindow: 4096,\n description: \"Meta Llama 3.2 1B Instruct, INT4 quantized.\",\n },\n \"qwen2.5-1.5b-int4\": {\n id: \"qwen2.5-1.5b-int4\",\n family: \"Qwen2.5\",\n parameters: \"1.5B\",\n quantization: \"q4f16_1\",\n webllmId: \"Qwen2.5-1.5B-Instruct-q4f16_1-MLC\",\n contextWindow: 4096,\n description: \"Alibaba Qwen 2.5 1.5B Instruct, INT4 quantized.\",\n },\n});\n\n/**\n * Resolve a friendly model id to its full preset metadata.\n *\n * @param modelId - Friendly id (e.g. `\"phi-3.5-mini-int4\"`).\n * @returns The matching preset.\n * @throws UnknownModelError if no preset matches.\n */\nexport function resolveModelPreset(modelId: string): ModelPreset {\n const preset = MODEL_PRESETS[modelId];\n if (!preset) {\n const available = Object.keys(MODEL_PRESETS).join(\", \");\n throw new UnknownModelError(`Unknown model \"${modelId}\". Available models: ${available}.`);\n }\n return preset;\n}\n\n/** Return the list of supported friendly model ids. */\nexport function listSupportedModels(): string[] {\n return Object.keys(MODEL_PRESETS);\n}\n","import type { WorkerLike } from \"./protocol\";\n\n/**\n * Spawn a new inference Web Worker.\n *\n * Uses Vite/webpack-friendly `new Worker(new URL(...), { type: \"module\" })`\n * syntax. The bundler emits the worker as a separate ES module chunk.\n *\n * Consumers normally do not call this directly — `LMTask.create()` invokes it\n * when `inWorker: true` is set. It is exported for advanced scenarios (custom\n * worker management, pooling, lifecycle integration with a host app).\n *\n * @returns A {@link WorkerLike}-compatible Worker instance.\n */\nexport function createInferenceWorker(): WorkerLike {\n return new Worker(new URL(\"./inference.worker.ts\", import.meta.url), {\n type: \"module\",\n }) as unknown as WorkerLike;\n}\n","import type { Engine } from \"../core/engine\";\nimport { WebLLMEngine } from \"../core/webllm-engine\";\nimport { WorkerEngine } from \"../core/worker-engine\";\nimport { resolveModelPreset } from \"../presets/models\";\nimport { createInferenceWorker } from \"../worker/create-worker\";\nimport type { ModelPreset, ProgressCallback } from \"../types\";\n\n/** Common options accepted by every task's `create()` factory. */\nexport interface LMTaskCreateOptions {\n /** Optional callback for model load progress updates. */\n onProgress?: ProgressCallback;\n /**\n * Override the engine used for inference. Intended for testing.\n * Production callers should let the SDK pick a backend automatically.\n */\n engine?: Engine;\n /**\n * Run inference inside a Web Worker, isolating the UI thread from\n * tokenization and generation. Defaults to `false` in v0.2 (opt-in) and\n * will flip to `true` in v0.3 once the Cache API / OPFS integration\n * (also v0.2) has been validated against worker-thread storage access.\n *\n * Ignored when {@link engine} is provided.\n */\n inWorker?: boolean;\n}\n\n/** Internal payload returned by {@link LMTask.createEngine}. */\nexport interface ResolvedEngine {\n engine: Engine;\n preset: ModelPreset;\n}\n\n/**\n * Base class shared by all language-model tasks (`Chat` for v0.1; `Completion`,\n * `Embeddings` and `Reranker` planned for later versions).\n *\n * The base owns:\n * - resolving a friendly model id to a {@link ModelPreset};\n * - selecting and loading an {@link Engine} (defaulting to WebLLM);\n * - exposing `unload()` for cleanup.\n *\n * Subclasses add task-specific public methods (`send`, `stream`, etc.).\n */\nexport abstract class LMTask {\n protected constructor(\n /** Engine used for inference. */\n protected readonly engine: Engine,\n /** Resolved metadata for the loaded model. */\n public readonly preset: ModelPreset\n ) {}\n\n /**\n * Load a model into a backend and return the wired-up engine + preset.\n *\n * Subclasses call this from their static `create()` factories.\n *\n * @param modelId - Friendly model id from the registry.\n * @param options - Task creation options.\n */\n protected static async createEngine(\n modelId: string,\n options: LMTaskCreateOptions = {}\n ): Promise<ResolvedEngine> {\n const preset = resolveModelPreset(modelId);\n const engine = options.engine ?? LMTask.defaultEngine(options);\n if (!engine.isLoaded()) {\n await engine.load(preset.webllmId, options.onProgress);\n }\n return { engine, preset };\n }\n\n private static defaultEngine(options: LMTaskCreateOptions): Engine {\n if (options.inWorker) {\n return new WorkerEngine(createInferenceWorker());\n }\n return new WebLLMEngine();\n }\n\n /** Release engine resources. Safe to call multiple times. */\n async unload(): Promise<void> {\n await this.engine.unload();\n }\n\n /** Whether the underlying engine has a loaded model. */\n isLoaded(): boolean {\n return this.engine.isLoaded();\n }\n}\n","import type { FinishReason, Message } from \"./types\";\n\n/**\n * Result returned by `Chat.send()`.\n *\n * Holds the assistant's textual reply, the structured assistant message\n * (already appended to the chat history), and metadata about the generation.\n */\nexport class ChatReply {\n constructor(\n /** The assistant's reply text. */\n public readonly text: string,\n /** The structured assistant message (already appended to chat history). */\n public readonly message: Message,\n /** Number of tokens generated. 0 when the engine does not report it. */\n public readonly tokensGenerated: number,\n /** Why the generation loop stopped. */\n public readonly finishReason: FinishReason\n ) {}\n}\n\n/**\n * Result returned by `Completion.predict()`.\n *\n * Holds the generated continuation text (the prompt itself is not included)\n * plus metadata about the generation loop.\n */\nexport class CompletionResult {\n constructor(\n /** The generated text (continuation only, prompt excluded). */\n public readonly text: string,\n /** The original prompt that was fed to the model. */\n public readonly prompt: string,\n /** Number of tokens generated. 0 when the engine does not report it. */\n public readonly tokensGenerated: number,\n /** Why the generation loop stopped. */\n public readonly finishReason: FinishReason\n ) {}\n}\n","import { LMTask, type LMTaskCreateOptions } from \"./lm-task\";\nimport type { Engine } from \"../core/engine\";\nimport { ChatReply } from \"../results\";\nimport type { GenerationOptions, Message, ModelPreset, TokenChunk } from \"../types\";\n\n/**\n * Multi-turn chat task.\n *\n * Maintains an in-memory conversation history and applies the chat template\n * configured for the loaded model. Use {@link Chat.create} to construct an\n * instance — the constructor is private.\n *\n * @example\n * ```ts\n * const chat = await Chat.create(\"phi-3.5-mini-int4\");\n * const reply = await chat.send(\"Explain ONNX in one sentence.\");\n * console.log(reply.text);\n * ```\n *\n * @example Streaming\n * ```ts\n * const controller = new AbortController();\n * for await (const token of chat.stream(\"Explain ONNX.\", { signal: controller.signal })) {\n * process.stdout.write(token.text);\n * }\n * ```\n */\nexport class Chat extends LMTask {\n private readonly history: Message[] = [];\n private systemPrompt: string | null = null;\n\n private constructor(engine: Engine, preset: ModelPreset) {\n super(engine, preset);\n }\n\n /**\n * Create and load a `Chat` task for the given model.\n *\n * @param modelId - Friendly model id from the registry (e.g. `\"phi-3.5-mini-int4\"`).\n * @param options - Optional creation options (progress callback, engine override).\n */\n static async create(modelId: string, options: LMTaskCreateOptions = {}): Promise<Chat> {\n const { engine, preset } = await LMTask.createEngine(modelId, options);\n return new Chat(engine, preset);\n }\n\n /** Set or replace the system prompt prepended to every conversation. */\n setSystemPrompt(prompt: string): void {\n this.systemPrompt = prompt;\n }\n\n /** Clear the system prompt. */\n clearSystemPrompt(): void {\n this.systemPrompt = null;\n }\n\n /** Reset the conversation history. The system prompt is preserved. */\n resetHistory(): void {\n this.history.length = 0;\n }\n\n /** A read-only snapshot of the conversation history. */\n getHistory(): readonly Message[] {\n return this.history.slice();\n }\n\n /**\n * Send a user message and await the full assistant reply.\n *\n * The user message and the assistant reply are appended to the history.\n *\n * @param message - The user-facing message text.\n * @param options - Generation options.\n * @returns A {@link ChatReply} with the assistant's reply.\n */\n async send(message: string, options: GenerationOptions = {}): Promise<ChatReply> {\n const messages = this.buildMessages(message);\n const text = await this.engine.generate(messages, options);\n const userMsg: Message = { role: \"user\", content: message };\n const assistantMsg: Message = { role: \"assistant\", content: text };\n this.history.push(userMsg, assistantMsg);\n return new ChatReply(text, assistantMsg, 0, \"stop\");\n }\n\n /**\n * Stream the assistant reply token-by-token as an async iterable.\n *\n * The full reply is appended to the history when the stream completes\n * normally. If the stream is aborted, neither message is appended.\n *\n * @param message - The user-facing message text.\n * @param options - Generation options including an optional `signal`.\n */\n async *stream(message: string, options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n const messages = this.buildMessages(message);\n const userMsg: Message = { role: \"user\", content: message };\n let acc: string = \"\";\n for await (const chunk of this.engine.stream(messages, options)) {\n acc += chunk.text;\n yield chunk;\n }\n const assistantMsg: Message = { role: \"assistant\", content: acc };\n this.history.push(userMsg, assistantMsg);\n }\n\n private buildMessages(userMessage: string): Message[] {\n const messages: Message[] = [];\n if (this.systemPrompt) {\n messages.push({ role: \"system\", content: this.systemPrompt });\n }\n messages.push(...this.history);\n messages.push({ role: \"user\", content: userMessage });\n return messages;\n }\n}\n","import { LMTask, type LMTaskCreateOptions } from \"./lm-task\";\nimport type { Engine } from \"../core/engine\";\nimport { CompletionResult } from \"../results\";\nimport type { GenerationOptions, ModelPreset, TokenChunk } from \"../types\";\n\n/**\n * Raw text-completion task.\n *\n * Unlike {@link Chat}, `Completion` does not maintain a conversation history\n * and does not apply a chat template. The prompt is fed to the model verbatim\n * and the model continues it. Useful for \"Once upon a time…\" style generation,\n * code completion, or any scenario where chat formatting would interfere.\n *\n * Use {@link Completion.create} to construct an instance — the constructor is\n * private.\n *\n * @example\n * ```ts\n * const comp = await Completion.create(\"qwen2.5-1.5b-int4\");\n * const result = await comp.predict(\"Once upon a time\", { maxTokens: 50 });\n * console.log(result.text);\n * ```\n *\n * @example Streaming\n * ```ts\n * const controller = new AbortController();\n * for await (const token of comp.stream(\"def fibonacci(n):\", { signal: controller.signal })) {\n * process.stdout.write(token.text);\n * }\n * ```\n */\nexport class Completion extends LMTask {\n private constructor(engine: Engine, preset: ModelPreset) {\n super(engine, preset);\n }\n\n /**\n * Create and load a `Completion` task for the given model.\n *\n * @param modelId - Friendly model id from the registry (e.g. `\"qwen2.5-1.5b-int4\"`).\n * @param options - Optional creation options (progress callback, engine override).\n */\n static async create(modelId: string, options: LMTaskCreateOptions = {}): Promise<Completion> {\n const { engine, preset } = await LMTask.createEngine(modelId, options);\n return new Completion(engine, preset);\n }\n\n /**\n * Generate a continuation for the given prompt.\n *\n * @param prompt - Raw text fed to the model.\n * @param options - Generation options.\n * @returns A {@link CompletionResult} with the generated continuation.\n */\n async predict(prompt: string, options: GenerationOptions = {}): Promise<CompletionResult> {\n const text = await this.engine.complete(prompt, options);\n return new CompletionResult(text, prompt, 0, \"stop\");\n }\n\n /**\n * Stream a continuation for the given prompt as an async iterable of token\n * chunks.\n *\n * @param prompt - Raw text fed to the model.\n * @param options - Generation options including an optional `signal`.\n */\n async *stream(prompt: string, options: GenerationOptions = {}): AsyncIterable<TokenChunk> {\n for await (const chunk of this.engine.streamCompletion(prompt, options)) {\n yield chunk;\n }\n }\n}\n","import { MODEL_PRESETS, resolveModelPreset } from \"../presets/models\";\nimport { UnknownModelError } from \"../core/exceptions\";\n\n/** Snapshot of a single cached model's metadata. */\nexport interface CachedModelEntry {\n /** Friendly id from the registry (e.g. `\"llama-3.2-1b-int4\"`). */\n id: string;\n /** Backend-specific id (e.g. WebLLM `webllmId`). */\n backendId: string;\n /** Human-readable family name. */\n family: string;\n /** Approx parameter count, e.g. `\"1B\"`. */\n parameters: string;\n}\n\n/** Aggregate storage usage reported by the browser. */\nexport interface CacheUsage {\n /** Bytes used by the entire origin's storage (not just our cache). */\n usage: number;\n /** Bytes the browser is willing to give the origin. */\n quota: number;\n}\n\n/**\n * Hooks the {@link ModelCache} uses to talk to the underlying runtime and\n * the browser. Tests inject mocks; production code leaves them undefined,\n * letting `ModelCache` resolve the real `@mlc-ai/web-llm` helpers and\n * `navigator.storage.estimate()` lazily.\n */\nexport interface ModelCacheOptions {\n /** Override `hasModelInCache` from the runtime. */\n hasModel?: (backendId: string) => Promise<boolean>;\n /** Override `deleteModelInCache` from the runtime. */\n deleteModel?: (backendId: string) => Promise<void>;\n /** Override `navigator.storage.estimate()`. */\n estimate?: () => Promise<CacheUsage>;\n}\n\ntype WebLLMCacheModule = {\n hasModelInCache: (id: string) => Promise<boolean>;\n deleteModelInCache: (id: string) => Promise<void>;\n};\n\nlet webllmCachePromise: Promise<WebLLMCacheModule> | null = null;\n\nasync function loadWebLLMCacheHelpers(): Promise<WebLLMCacheModule> {\n if (!webllmCachePromise) {\n webllmCachePromise = import(\"@mlc-ai/web-llm\").then((m) => ({\n hasModelInCache: m.hasModelInCache,\n deleteModelInCache: m.deleteModelInCache,\n }));\n }\n return webllmCachePromise;\n}\n\nasync function defaultEstimate(): Promise<CacheUsage> {\n if (typeof navigator === \"undefined\" || !navigator.storage?.estimate) {\n return { usage: 0, quota: 0 };\n }\n const estimate = await navigator.storage.estimate();\n return {\n usage: estimate.usage ?? 0,\n quota: estimate.quota ?? 0,\n };\n}\n\n/**\n * Inspect and manage cached model weights.\n *\n * `localm-web` does not download or cache weights itself — that work is owned\n * by `@mlc-ai/web-llm`, which writes to the browser Cache API. `ModelCache`\n * is a thin wrapper that lets a consuming app surface cache state in its UI:\n * \"this model is downloaded\", \"you have 1.4 GB cached, free up space?\",\n * \"clear all models on logout\".\n *\n * @example\n * ```ts\n * const cache = new ModelCache();\n * if (await cache.has(\"llama-3.2-1b-int4\")) {\n * console.log(\"ready offline\");\n * }\n * const cached = await cache.list();\n * await cache.delete(\"phi-3.5-mini-int4\");\n * const usage = await cache.estimateUsage();\n * console.log(`${usage.usage} / ${usage.quota} bytes`);\n * ```\n */\nexport class ModelCache {\n private readonly hasModelHook: ((id: string) => Promise<boolean>) | undefined;\n private readonly deleteModelHook: ((id: string) => Promise<void>) | undefined;\n private readonly estimateHook: () => Promise<CacheUsage>;\n\n constructor(options: ModelCacheOptions = {}) {\n this.hasModelHook = options.hasModel;\n this.deleteModelHook = options.deleteModel;\n this.estimateHook = options.estimate ?? defaultEstimate;\n }\n\n /**\n * Whether the model's weights are present in the browser cache.\n *\n * @param modelId - Friendly id from the registry.\n * @throws UnknownModelError if `modelId` is not in the registry.\n */\n async has(modelId: string): Promise<boolean> {\n const backendId: string = resolveModelPreset(modelId).webllmId;\n const fn = this.hasModelHook ?? (await loadWebLLMCacheHelpers()).hasModelInCache;\n return fn(backendId);\n }\n\n /**\n * Delete a single model's weights from the browser cache. No-op when the\n * model is not cached.\n *\n * @param modelId - Friendly id from the registry.\n * @throws UnknownModelError if `modelId` is not in the registry.\n */\n async delete(modelId: string): Promise<void> {\n const backendId: string = resolveModelPreset(modelId).webllmId;\n const fn = this.deleteModelHook ?? (await loadWebLLMCacheHelpers()).deleteModelInCache;\n await fn(backendId);\n }\n\n /**\n * List the registry models that are currently cached.\n *\n * Iterates `MODEL_PRESETS` and probes each one. Only returns models known\n * to the SDK — models cached by external WebLLM calls outside our registry\n * are not included.\n *\n * @returns Empty list when nothing is cached.\n */\n async list(): Promise<CachedModelEntry[]> {\n const fn = this.hasModelHook ?? (await loadWebLLMCacheHelpers()).hasModelInCache;\n const probes = await Promise.all(\n Object.values(MODEL_PRESETS).map(async (preset) => {\n const cached: boolean = await fn(preset.webllmId);\n if (!cached) return null;\n const entry: CachedModelEntry = {\n id: preset.id,\n backendId: preset.webllmId,\n family: preset.family,\n parameters: preset.parameters,\n };\n return entry;\n })\n );\n return probes.filter((p): p is CachedModelEntry => p !== null);\n }\n\n /**\n * Delete every registry model from the cache. Useful for logout flows or\n * \"reset\" buttons. Models cached outside the registry are not touched.\n */\n async clear(): Promise<void> {\n const fn = this.deleteModelHook ?? (await loadWebLLMCacheHelpers()).deleteModelInCache;\n await Promise.all(Object.values(MODEL_PRESETS).map((p) => fn(p.webllmId)));\n }\n\n /**\n * Aggregate storage stats from the browser. Returned numbers cover the\n * entire origin (Cache API + IndexedDB + Service Workers + OPFS), not\n * just our model cache — use it for \"you have X of Y available\" hints.\n */\n async estimateUsage(): Promise<CacheUsage> {\n return this.estimateHook();\n }\n\n /**\n * Throw a descriptive error if the given id is not in the registry.\n * Exposed for code paths that want to validate before calling other\n * methods (those already throw on their own).\n *\n * @throws UnknownModelError\n */\n static assertKnown(modelId: string): void {\n if (!(modelId in MODEL_PRESETS)) {\n const available = Object.keys(MODEL_PRESETS).join(\", \");\n throw new UnknownModelError(`Unknown model \"${modelId}\". Available models: ${available}.`);\n }\n }\n}\n","import type { TokenChunk } from \"../types\";\n\n/**\n * Drain an async iterable of token chunks into a single string.\n *\n * Useful in tests, for non-streaming consumers, and as a one-line way to\n * reconstruct the final text from a `Chat.stream(...)` call.\n *\n * @param stream - The token-chunk async iterable to consume.\n * @returns The concatenation of every chunk's `text` field.\n */\nexport async function collectStream(stream: AsyncIterable<TokenChunk>): Promise<string> {\n let acc: string = \"\";\n for await (const chunk of stream) {\n acc += chunk.text;\n }\n return acc;\n}\n\n/**\n * Wrap an async iterable so that each `TokenChunk` is also passed to a\n * caller-supplied side-effect callback before being yielded downstream.\n *\n * This is intentionally a passthrough — it does not buffer.\n *\n * @param stream - The upstream token-chunk async iterable.\n * @param onChunk - Side-effect invoked for every chunk.\n * @returns A new async iterable yielding the same chunks.\n */\nexport async function* tap(\n stream: AsyncIterable<TokenChunk>,\n onChunk: (chunk: TokenChunk) => void\n): AsyncIterable<TokenChunk> {\n for await (const chunk of stream) {\n onChunk(chunk);\n yield chunk;\n }\n}\n","/**\n * localm-web — browser-only TypeScript SDK for running LLMs and SLMs locally.\n *\n * Public API surface for v0.1.\n *\n * @packageDocumentation\n */\n\nexport { Chat } from \"./tasks/chat\";\nexport { Completion } from \"./tasks/completion\";\nexport { LMTask } from \"./tasks/lm-task\";\nexport type { LMTaskCreateOptions } from \"./tasks/lm-task\";\n\nexport { ChatReply, CompletionResult } from \"./results\";\n\nexport { MODEL_PRESETS, resolveModelPreset, listSupportedModels } from \"./presets/models\";\n\nexport {\n LocalmWebError,\n WebGPUUnavailableError,\n ModelLoadError,\n ModelNotLoadedError,\n UnknownModelError,\n GenerationAbortedError,\n QuotaExceededError,\n BackendNotAvailableError,\n} from \"./core/exceptions\";\n\nexport type { Engine } from \"./core/engine\";\nexport { WorkerEngine } from \"./core/worker-engine\";\nexport { createInferenceWorker } from \"./worker/create-worker\";\nexport type { WorkerLike } from \"./worker/protocol\";\n\nexport { ModelCache } from \"./cache\";\nexport type { CachedModelEntry, CacheUsage, ModelCacheOptions } from \"./cache\";\n\nexport { collectStream, tap } from \"./streaming/token-stream\";\n\nexport type {\n Role,\n FinishReason,\n Message,\n GenerationOptions,\n ModelLoadProgress,\n ModelLoadPhase,\n ProgressCallback,\n TokenChunk,\n ModelPreset,\n} from \"./types\";\n\n/** Current package version. Updated at release time. */\nexport const VERSION: string = \"0.2.0\";\n"],"names":[],"mappings":"AAEA,MAAM,mBAA2B;AACjC,MAAM,kBAA0B;AAazB,SAAS,kBAAkB,MAA8B;AAC9D,MAAI,iBAAiB,KAAK,IAAI,EAAG,QAAO;AACxC,MAAI,gBAAgB,KAAK,IAAI,EAAG,QAAO;AACvC,SAAO;AACT;ACXO,MAAM,uBAAuB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA,EAKxC,YACE,SACgB,OAChB;AACA,UAAM,OAAO;AAFG,SAAA,QAAA;AAGhB,SAAK,OAAO,WAAW;AAAA,EACzB;AACF;AAGO,MAAM,+BAA+B,eAAe;AAAC;AAGrD,MAAM,uBAAuB,eAAe;AAAC;AAG7C,MAAM,4BAA4B,eAAe;AAAC;AAGlD,MAAM,0BAA0B,eAAe;AAAC;AAGhD,MAAM,+BAA+B,eAAe;AAAC;AAGrD,MAAM,2BAA2B,eAAe;AAAC;AAGjD,MAAM,iCAAiC,eAAe;AAAC;AC5B9D,IAAI,sBAAoD;AAExD,eAAe,aAAoC;AACjD,MAAI,CAAC,qBAAqB;AACxB,0BAAsB,OAAO,iBAAiB;AAAA,EAChD;AACA,SAAO;AACT;AAEA,SAAS,oBAA6B;AACpC,SAAO,OAAO,cAAc,eAAe,SAAS;AACtD;AAQA,SAAS,oBAAoB,SAA4C;AACvE,QAAM,SAAyB,CAAA;AAC/B,MAAI,QAAQ,cAAc,OAAW,QAAO,aAAa,QAAQ;AACjE,MAAI,QAAQ,gBAAgB,OAAW,QAAO,cAAc,QAAQ;AACpE,MAAI,QAAQ,SAAS,OAAW,QAAO,QAAQ,QAAQ;AACvD,SAAO;AACT;AAEA,SAAS,eAAe,UAAmD;AACzE,SAAO,SAAS,IAAI,CAAC,MAAkC;AACrD,YAAQ,EAAE,MAAA;AAAA,MACR,KAAK;AACH,eAAO,EAAE,MAAM,UAAU,SAAS,EAAE,QAAA;AAAA,MACtC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,QAAA;AAAA,MACpC,KAAK;AACH,eAAO,EAAE,MAAM,aAAa,SAAS,EAAE,QAAA;AAAA,MACzC,KAAK;AACH,eAAO,EAAE,MAAM,QAAQ,SAAS,EAAE,SAAS,cAAc,EAAE,QAAQ,GAAA;AAAA,IAAG;AAAA,EAE5E,CAAC;AACH;AAQO,MAAM,aAA+B;AAAA,EAClC,SAA2B;AAAA,EAEnC,WAAoB;AAClB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,MAAM,KAAK,SAAiB,YAA8C;AACxE,QAAI,CAAC,qBAAqB;AACxB,YAAM,IAAI;AAAA,QACR;AAAA,MAAA;AAAA,IAEJ;AACA,UAAM,SAAS,MAAM,WAAA;AACrB,QAAI;AACF,WAAK,SAAS,MAAM,OAAO,gBAAgB,SAAS;AAAA,QAClD,sBAAsB,CAAC,WAAiB;AACtC,uBAAa;AAAA,YACX,UAAU,OAAO;AAAA,YACjB,MAAM,OAAO;AAAA,YACb,QAAQ;AAAA,YACR,OAAO;AAAA,YACP,OAAO,kBAAkB,OAAO,IAAI;AAAA,UAAA,CACrC;AAAA,QACH;AAAA,MAAA,CACD;AACD,mBAAa;AAAA,QACX,UAAU;AAAA,QACV,MAAM;AAAA,QACN,QAAQ;AAAA,QACR,OAAO;AAAA,QACP,OAAO;AAAA,MAAA,CACR;AAAA,IACH,SAAS,KAAK;AACZ,YAAM,IAAI,eAAe,yBAAyB,OAAO,MAAM,GAAG;AAAA,IACpE;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,UAAqB,UAA6B,IAAqB;AACpF,UAAM,SAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,aAAa,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,IAAA,CACT;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,SAAS,WAAW;AAAA,EACpD;AAAA,EAEA,OAAO,OAAO,UAAqB,UAA6B,IAA+B;AAC7F,UAAM,SAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,aAAa,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,MACtD,GAAG,oBAAoB,OAAO;AAAA,MAC9B,UAAU,eAAe,QAAQ;AAAA,MACjC,QAAQ;AAAA,IAAA,CACT;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,OAAO,WAAW;AACxC,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,QAAgB,UAA6B,IAAqB;AAC/E,UAAM,SAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,aAAa,MAAM,OAAO,YAAY,OAAO;AAAA,MACjD,GAAG,oBAAoB,OAAO;AAAA,MAC9B;AAAA,MACA,QAAQ;AAAA,IAAA,CACT;AACD,WAAO,WAAW,QAAQ,CAAC,GAAG,QAAQ;AAAA,EACxC;AAAA,EAEA,OAAO,iBACL,QACA,UAA6B,IACF;AAC3B,UAAM,SAAS,KAAK,cAAA;AACpB,QAAI,QAAQ,QAAQ,SAAS;AAC3B,YAAM,IAAI,uBAAuB,kCAAkC;AAAA,IACrE;AACA,UAAM,aAAa,MAAM,OAAO,YAAY,OAAO;AAAA,MACjD,GAAG,oBAAoB,OAAO;AAAA,MAC9B;AAAA,MACA,QAAQ;AAAA,IAAA,CACT;AACD,QAAI,QAAgB;AACpB,QAAI,WAAoB;AACxB,QAAI;AACF,uBAAiB,SAAS,YAAY;AACpC,YAAI,QAAQ,QAAQ,SAAS;AAC3B,gBAAM,IAAI,uBAAuB,+BAA+B;AAAA,QAClE;AACA,cAAM,SAAS,MAAM,QAAQ,CAAC;AAC9B,cAAM,QAAQ,QAAQ,QAAQ;AAC9B,YAAI,OAAO;AACT,gBAAM,EAAE,MAAM,OAAO,OAAO,MAAM,MAAA;AAClC,mBAAS;AAAA,QACX;AACA,YAAI,QAAQ,eAAe;AACzB,qBAAW;AACX,gBAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAC/B,mBAAS;AAAA,QACX;AAAA,MACF;AACA,UAAI,CAAC,UAAU;AACb,cAAM,EAAE,MAAM,IAAI,OAAO,MAAM,KAAA;AAAA,MACjC;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,eAAe,uBAAwB,OAAM;AACjD,YAAM,IAAI,eAAe,gCAAgC,GAAG;AAAA,IAC9D;AAAA,EACF;AAAA,EAEA,MAAM,SAAwB;AAC5B,QAAI,KAAK,QAAQ;AACf,YAAM,KAAK,OAAO,OAAA;AAClB,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEQ,gBAA2B;AACjC,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,IAAI,oBAAoB,mDAAmD;AAAA,IACnF;AACA,WAAO,KAAK;AAAA,EACd;AACF;AC/MO,SAAS,sBACd,UAA6B,IACE;AAC/B,QAAM,EAAE,QAAQ,SAAS,GAAG,SAAS;AAErC,SAAO;AACT;ACeO,MAAM,aAA+B;AAAA,EAa1C,YAA6B,QAAoB;AAApB,SAAA,SAAA;AAC3B,SAAK,WAAW,CAAC,UAAgB,KAAK,cAAc,MAAM,IAAI;AAC9D,SAAK,OAAO,iBAAiB,WAAW,KAAK,QAAQ;AAAA,EACvD;AAAA,EAfQ,SAAiB;AAAA,EACjB,SAAkB;AAAA,EAClB,cAA0E;AAAA,EAC1E,gBAAwB;AAAA,EACxB,sBAAoD;AAAA,EACpD,gBAA4E;AAAA,EAC5E,kBAA0B;AAAA,EAC1B,uCAAqD,IAAA;AAAA,EACrD,qCAAiD,IAAA;AAAA,EAExC;AAAA,EAOjB,WAAoB;AAClB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,KAAK,SAAiB,YAA8C;AACxE,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,eAAe,sCAAsC;AAAA,IACjE;AACA,UAAM,KAAa,KAAK,WAAA;AACxB,SAAK,gBAAgB;AACrB,SAAK,sBAAsB;AAC3B,WAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAK,cAAc,EAAE,SAAS,OAAA;AAC9B,WAAK,KAAK,EAAE,IAAI,QAAQ,IAAI,SAAS;AAAA,IACvC,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,SAAS,UAAqB,UAA6B,IAAqB;AACpF,UAAM,KAAa,KAAK,WAAA;AACxB,WAAO,IAAI,QAAgB,CAAC,SAAS,WAAW;AAC9C,WAAK,iBAAiB,IAAI,IAAI,EAAE,SAAS,QAAQ;AACjD,WAAK,KAAK;AAAA,QACR,IAAI;AAAA,QACJ;AAAA,QACA;AAAA,QACA,SAAS,sBAAsB,OAAO;AAAA,MAAA,CACvC;AACD,cAAQ,QAAQ,iBAAiB,SAAS,MAAM,KAAK,KAAK,EAAE,IAAI,SAAS,GAAA,CAAI,CAAC;AAAA,IAChF,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,OAAO,UAAqB,UAA6B,IAA+B;AAC7F,UAAM,KAAa,KAAK,WAAA;AACxB,UAAM,QAAsB,CAAA;AAC5B,QAAI,OAAgB;AACpB,QAAI,QAAsB;AAC1B,QAAI,SAA8B;AAElC,UAAM,SAAS,MAAY;AACzB,UAAI,QAAQ;AACV,cAAM,KAAK;AACX,iBAAS;AACT,WAAA;AAAA,MACF;AAAA,IACF;AAEA,SAAK,eAAe,IAAI,IAAI;AAAA,MAC1B,MAAM,CAAC,UAAgB;AACrB,cAAM,KAAK,KAAK;AAChB,eAAA;AAAA,MACF;AAAA,MACA,KAAK,MAAY;AACf,eAAO;AACP,eAAA;AAAA,MACF;AAAA,MACA,MAAM,CAAC,QAAc;AACnB,gBAAQ;AACR,eAAO;AACP,eAAA;AAAA,MACF;AAAA,IAAA,CACD;AAED,SAAK,KAAK;AAAA,MACR,IAAI;AAAA,MACJ;AAAA,MACA;AAAA,MACA,SAAS,sBAAsB,OAAO;AAAA,IAAA,CACvC;AACD,YAAQ,QAAQ,iBAAiB,SAAS,MAAM,KAAK,KAAK,EAAE,IAAI,SAAS,GAAA,CAAI,CAAC;AAE9E,QAAI;AACF,aAAO,MAAM;AACX,YAAI,MAAM,SAAS,GAAG;AACpB,gBAAM,QAAQ,MAAM,MAAA;AACpB,cAAI,MAAO,OAAM;AACjB;AAAA,QACF;AACA,YAAI,MAAO,OAAM;AACjB,YAAI,KAAM;AACV,cAAM,IAAI,QAAc,CAAC,MAAM;AAC7B,mBAAS;AAAA,QACX,CAAC;AAAA,MACH;AAAA,IACF,UAAA;AACE,WAAK,eAAe,OAAO,EAAE;AAAA,IAC/B;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,QAAgB,UAA6B,IAAqB;AAC/E,UAAM,KAAa,KAAK,WAAA;AACxB,WAAO,IAAI,QAAgB,CAAC,SAAS,WAAW;AAC9C,WAAK,iBAAiB,IAAI,IAAI,EAAE,SAAS,QAAQ;AACjD,WAAK,KAAK;AAAA,QACR,IAAI;AAAA,QACJ;AAAA,QACA;AAAA,QACA,SAAS,sBAAsB,OAAO;AAAA,MAAA,CACvC;AACD,cAAQ,QAAQ,iBAAiB,SAAS,MAAM,KAAK,KAAK,EAAE,IAAI,SAAS,GAAA,CAAI,CAAC;AAAA,IAChF,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,iBACL,QACA,UAA6B,IACF;AAC3B,UAAM,KAAa,KAAK,WAAA;AACxB,UAAM,QAAsB,CAAA;AAC5B,QAAI,OAAgB;AACpB,QAAI,QAAsB;AAC1B,QAAI,SAA8B;AAElC,UAAM,SAAS,MAAY;AACzB,UAAI,QAAQ;AACV,cAAM,KAAK;AACX,iBAAS;AACT,WAAA;AAAA,MACF;AAAA,IACF;AAEA,SAAK,eAAe,IAAI,IAAI;AAAA,MAC1B,MAAM,CAAC,UAAgB;AACrB,cAAM,KAAK,KAAK;AAChB,eAAA;AAAA,MACF;AAAA,MACA,KAAK,MAAY;AACf,eAAO;AACP,eAAA;AAAA,MACF;AAAA,MACA,MAAM,CAAC,QAAc;AACnB,gBAAQ;AACR,eAAO;AACP,eAAA;AAAA,MACF;AAAA,IAAA,CACD;AAED,SAAK,KAAK;AAAA,MACR,IAAI;AAAA,MACJ;AAAA,MACA;AAAA,MACA,SAAS,sBAAsB,OAAO;AAAA,IAAA,CACvC;AACD,YAAQ,QAAQ,iBAAiB,SAAS,MAAM,KAAK,KAAK,EAAE,IAAI,SAAS,GAAA,CAAI,CAAC;AAE9E,QAAI;AACF,aAAO,MAAM;AACX,YAAI,MAAM,SAAS,GAAG;AACpB,gBAAM,QAAQ,MAAM,MAAA;AACpB,cAAI,MAAO,OAAM;AACjB;AAAA,QACF;AACA,YAAI,MAAO,OAAM;AACjB,YAAI,KAAM;AACV,cAAM,IAAI,QAAc,CAAC,MAAM;AAC7B,mBAAS;AAAA,QACX,CAAC;AAAA,MACH;AAAA,IACF,UAAA;AACE,WAAK,eAAe,OAAO,EAAE;AAAA,IAC/B;AAAA,EACF;AAAA,EAEA,MAAM,SAAwB;AAC5B,QAAI,CAAC,KAAK,OAAQ;AAClB,QAAI,KAAK,eAAe;AACtB,YAAM,IAAI,eAAe,wCAAwC;AAAA,IACnE;AACA,UAAM,KAAa,KAAK,WAAA;AACxB,SAAK,kBAAkB;AACvB,WAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAK,gBAAgB,EAAE,SAAS,OAAA;AAChC,WAAK,KAAK,EAAE,IAAI,UAAU,IAAI;AAAA,IAChC,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,YAAkB;AAChB,SAAK,OAAO,oBAAoB,WAAW,KAAK,QAAQ;AACxD,SAAK,OAAO,UAAA;AACZ,SAAK,SAAS;AAAA,EAChB;AAAA,EAEQ,aAAqB;AAC3B,UAAM,KAAK,KAAK;AAChB,SAAK,UAAU;AACf,WAAO;AAAA,EACT;AAAA,EAEQ,KAAK,KAA0B;AACrC,SAAK,OAAO,YAAY,GAAG;AAAA,EAC7B;AAAA,EAEQ,cAAc,KAA2B;AAC/C,YAAQ,IAAI,IAAA;AAAA,MACV,KAAK;AACH,YAAI,KAAK,eAAe,IAAI,OAAO,KAAK,eAAe;AACrD,eAAK,SAAS;AACd,eAAK,YAAY,QAAA;AACjB,eAAK,cAAc;AACnB,eAAK,sBAAsB;AAAA,QAC7B;AACA;AAAA,MACF,KAAK;AACH,YAAI,IAAI,OAAO,KAAK,eAAe;AACjC,eAAK,sBAAsB,IAAI,OAAO;AAAA,QACxC;AACA;AAAA,MACF,KAAK,aAAa;AAChB,cAAM,UAAU,KAAK,iBAAiB,IAAI,IAAI,EAAE;AAChD,YAAI,SAAS;AACX,kBAAQ,QAAQ,IAAI,IAAI;AACxB,eAAK,iBAAiB,OAAO,IAAI,EAAE;AAAA,QACrC;AACA;AAAA,MACF;AAAA,MACA,KAAK,SAAS;AACZ,cAAM,SAAS,KAAK,eAAe,IAAI,IAAI,EAAE;AAC7C,gBAAQ,KAAK,IAAI,KAAK;AACtB;AAAA,MACF;AAAA,MACA,KAAK,cAAc;AACjB,cAAM,SAAS,KAAK,eAAe,IAAI,IAAI,EAAE;AAC7C,gBAAQ,IAAA;AACR;AAAA,MACF;AAAA,MACA,KAAK;AACH,YAAI,KAAK,iBAAiB,IAAI,OAAO,KAAK,iBAAiB;AACzD,eAAK,SAAS;AACd,eAAK,cAAc,QAAA;AACnB,eAAK,gBAAgB;AAAA,QACvB;AACA;AAAA,MACF,KAAK;AACH;AAAA,MACF,KAAK,SAAS;AACZ,cAAM,MAAM,SAAS,IAAI,MAAM,IAAI,OAAO;AAC1C,YAAI,KAAK,eAAe,IAAI,OAAO,KAAK,eAAe;AACrD,eAAK,YAAY,OAAO,GAAG;AAC3B,eAAK,cAAc;AACnB,eAAK,sBAAsB;AAC3B;AAAA,QACF;AACA,YAAI,KAAK,iBAAiB,IAAI,OAAO,KAAK,iBAAiB;AACzD,eAAK,cAAc,OAAO,GAAG;AAC7B,eAAK,gBAAgB;AACrB;AAAA,QACF;AACA,cAAM,WAAW,KAAK,iBAAiB,IAAI,IAAI,EAAE;AACjD,YAAI,UAAU;AACZ,mBAAS,OAAO,GAAG;AACnB,eAAK,iBAAiB,OAAO,IAAI,EAAE;AACnC;AAAA,QACF;AACA,cAAM,SAAS,KAAK,eAAe,IAAI,IAAI,EAAE;AAC7C,YAAI,QAAQ;AACV,iBAAO,KAAK,GAAG;AACf;AAAA,QACF;AACA;AAAA,MACF;AAAA,IAAA;AAAA,EAEJ;AACF;AAEA,SAAS,SAAS,MAAc,SAAwB;AACtD,UAAQ,MAAA;AAAA,IACN,KAAK;AACH,aAAO,IAAI,eAAe,OAAO;AAAA,IACnC,KAAK;AACH,aAAO,IAAI,oBAAoB,OAAO;AAAA,IACxC,KAAK;AACH,aAAO,IAAI,uBAAuB,OAAO;AAAA,IAC3C,SAAS;AACP,YAAM,MAAM,IAAI,MAAM,OAAO;AAC7B,UAAI,OAAO;AACX,aAAO;AAAA,IACT;AAAA,EAAA;AAEJ;AC3TO,MAAM,gBAAuD,OAAO,OAAO;AAAA,EAChF,qBAAqB;AAAA,IACnB,IAAI;AAAA,IACJ,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ,cAAc;AAAA,IACd,UAAU;AAAA,IACV,eAAe;AAAA,IACf,aAAa;AAAA,EAAA;AAAA,EAEf,qBAAqB;AAAA,IACnB,IAAI;AAAA,IACJ,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ,cAAc;AAAA,IACd,UAAU;AAAA,IACV,eAAe;AAAA,IACf,aAAa;AAAA,EAAA;AAAA,EAEf,qBAAqB;AAAA,IACnB,IAAI;AAAA,IACJ,QAAQ;AAAA,IACR,YAAY;AAAA,IACZ,cAAc;AAAA,IACd,UAAU;AAAA,IACV,eAAe;AAAA,IACf,aAAa;AAAA,EAAA;AAEjB,CAAC;AASM,SAAS,mBAAmB,SAA8B;AAC/D,QAAM,SAAS,cAAc,OAAO;AACpC,MAAI,CAAC,QAAQ;AACX,UAAM,YAAY,OAAO,KAAK,aAAa,EAAE,KAAK,IAAI;AACtD,UAAM,IAAI,kBAAkB,kBAAkB,OAAO,wBAAwB,SAAS,GAAG;AAAA,EAC3F;AACA,SAAO;AACT;AAGO,SAAS,sBAAgC;AAC9C,SAAO,OAAO,KAAK,aAAa;AAClC;AChDO,SAAS,wBAAoC;AAClD,SAAO,IAAI,OAAO,IAAA;AAAA;AAAA,IAAA;AAAA,IAAA,YAAA;AAAA,EAAA,GAAmD;AAAA,IACnE,MAAM;AAAA,EAAA,CACP;AACH;AC0BO,MAAe,OAAO;AAAA,EACjB,YAEW,QAEH,QAChB;AAHmB,SAAA,SAAA;AAEH,SAAA,SAAA;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUH,aAAuB,aACrB,SACA,UAA+B,IACN;AACzB,UAAM,SAAS,mBAAmB,OAAO;AACzC,UAAM,SAAS,QAAQ,UAAU,OAAO,cAAc,OAAO;AAC7D,QAAI,CAAC,OAAO,YAAY;AACtB,YAAM,OAAO,KAAK,OAAO,UAAU,QAAQ,UAAU;AAAA,IACvD;AACA,WAAO,EAAE,QAAQ,OAAA;AAAA,EACnB;AAAA,EAEA,OAAe,cAAc,SAAsC;AACjE,QAAI,QAAQ,UAAU;AACpB,aAAO,IAAI,aAAa,uBAAuB;AAAA,IACjD;AACA,WAAO,IAAI,aAAA;AAAA,EACb;AAAA;AAAA,EAGA,MAAM,SAAwB;AAC5B,UAAM,KAAK,OAAO,OAAA;AAAA,EACpB;AAAA;AAAA,EAGA,WAAoB;AAClB,WAAO,KAAK,OAAO,SAAA;AAAA,EACrB;AACF;AChFO,MAAM,UAAU;AAAA,EACrB,YAEkB,MAEA,SAEA,iBAEA,cAChB;AAPgB,SAAA,OAAA;AAEA,SAAA,UAAA;AAEA,SAAA,kBAAA;AAEA,SAAA,eAAA;AAAA,EACf;AACL;AAQO,MAAM,iBAAiB;AAAA,EAC5B,YAEkB,MAEA,QAEA,iBAEA,cAChB;AAPgB,SAAA,OAAA;AAEA,SAAA,SAAA;AAEA,SAAA,kBAAA;AAEA,SAAA,eAAA;AAAA,EACf;AACL;ACXO,MAAM,aAAa,OAAO;AAAA,EACd,UAAqB,CAAA;AAAA,EAC9B,eAA8B;AAAA,EAE9B,YAAY,QAAgB,QAAqB;AACvD,UAAM,QAAQ,MAAM;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,aAAa,OAAO,SAAiB,UAA+B,IAAmB;AACrF,UAAM,EAAE,QAAQ,OAAA,IAAW,MAAM,OAAO,aAAa,SAAS,OAAO;AACrE,WAAO,IAAI,KAAK,QAAQ,MAAM;AAAA,EAChC;AAAA;AAAA,EAGA,gBAAgB,QAAsB;AACpC,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,oBAA0B;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,eAAqB;AACnB,SAAK,QAAQ,SAAS;AAAA,EACxB;AAAA;AAAA,EAGA,aAAiC;AAC/B,WAAO,KAAK,QAAQ,MAAA;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,KAAK,SAAiB,UAA6B,IAAwB;AAC/E,UAAM,WAAW,KAAK,cAAc,OAAO;AAC3C,UAAM,OAAO,MAAM,KAAK,OAAO,SAAS,UAAU,OAAO;AACzD,UAAM,UAAmB,EAAE,MAAM,QAAQ,SAAS,QAAA;AAClD,UAAM,eAAwB,EAAE,MAAM,aAAa,SAAS,KAAA;AAC5D,SAAK,QAAQ,KAAK,SAAS,YAAY;AACvC,WAAO,IAAI,UAAU,MAAM,cAAc,GAAG,MAAM;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,OAAO,OAAO,SAAiB,UAA6B,IAA+B;AACzF,UAAM,WAAW,KAAK,cAAc,OAAO;AAC3C,UAAM,UAAmB,EAAE,MAAM,QAAQ,SAAS,QAAA;AAClD,QAAI,MAAc;AAClB,qBAAiB,SAAS,KAAK,OAAO,OAAO,UAAU,OAAO,GAAG;AAC/D,aAAO,MAAM;AACb,YAAM;AAAA,IACR;AACA,UAAM,eAAwB,EAAE,MAAM,aAAa,SAAS,IAAA;AAC5D,SAAK,QAAQ,KAAK,SAAS,YAAY;AAAA,EACzC;AAAA,EAEQ,cAAc,aAAgC;AACpD,UAAM,WAAsB,CAAA;AAC5B,QAAI,KAAK,cAAc;AACrB,eAAS,KAAK,EAAE,MAAM,UAAU,SAAS,KAAK,cAAc;AAAA,IAC9D;AACA,aAAS,KAAK,GAAG,KAAK,OAAO;AAC7B,aAAS,KAAK,EAAE,MAAM,QAAQ,SAAS,aAAa;AACpD,WAAO;AAAA,EACT;AACF;ACnFO,MAAM,mBAAmB,OAAO;AAAA,EAC7B,YAAY,QAAgB,QAAqB;AACvD,UAAM,QAAQ,MAAM;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,aAAa,OAAO,SAAiB,UAA+B,IAAyB;AAC3F,UAAM,EAAE,QAAQ,OAAA,IAAW,MAAM,OAAO,aAAa,SAAS,OAAO;AACrE,WAAO,IAAI,WAAW,QAAQ,MAAM;AAAA,EACtC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,QAAQ,QAAgB,UAA6B,IAA+B;AACxF,UAAM,OAAO,MAAM,KAAK,OAAO,SAAS,QAAQ,OAAO;AACvD,WAAO,IAAI,iBAAiB,MAAM,QAAQ,GAAG,MAAM;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,OAAO,QAAgB,UAA6B,IAA+B;AACxF,qBAAiB,SAAS,KAAK,OAAO,iBAAiB,QAAQ,OAAO,GAAG;AACvE,YAAM;AAAA,IACR;AAAA,EACF;AACF;AC5BA,IAAI,qBAAwD;AAE5D,eAAe,yBAAqD;AAClE,MAAI,CAAC,oBAAoB;AACvB,yBAAqB,OAAO,iBAAiB,EAAE,KAAK,CAAC,OAAO;AAAA,MAC1D,iBAAiB,EAAE;AAAA,MACnB,oBAAoB,EAAE;AAAA,IAAA,EACtB;AAAA,EACJ;AACA,SAAO;AACT;AAEA,eAAe,kBAAuC;AACpD,MAAI,OAAO,cAAc,eAAe,CAAC,UAAU,SAAS,UAAU;AACpE,WAAO,EAAE,OAAO,GAAG,OAAO,EAAA;AAAA,EAC5B;AACA,QAAM,WAAW,MAAM,UAAU,QAAQ,SAAA;AACzC,SAAO;AAAA,IACL,OAAO,SAAS,SAAS;AAAA,IACzB,OAAO,SAAS,SAAS;AAAA,EAAA;AAE7B;AAuBO,MAAM,WAAW;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,UAA6B,IAAI;AAC3C,SAAK,eAAe,QAAQ;AAC5B,SAAK,kBAAkB,QAAQ;AAC/B,SAAK,eAAe,QAAQ,YAAY;AAAA,EAC1C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,IAAI,SAAmC;AAC3C,UAAM,YAAoB,mBAAmB,OAAO,EAAE;AACtD,UAAM,KAAK,KAAK,iBAAiB,MAAM,0BAA0B;AACjE,WAAO,GAAG,SAAS;AAAA,EACrB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,OAAO,SAAgC;AAC3C,UAAM,YAAoB,mBAAmB,OAAO,EAAE;AACtD,UAAM,KAAK,KAAK,oBAAoB,MAAM,0BAA0B;AACpE,UAAM,GAAG,SAAS;AAAA,EACpB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,OAAoC;AACxC,UAAM,KAAK,KAAK,iBAAiB,MAAM,0BAA0B;AACjE,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,OAAO,OAAO,aAAa,EAAE,IAAI,OAAO,WAAW;AACjD,cAAM,SAAkB,MAAM,GAAG,OAAO,QAAQ;AAChD,YAAI,CAAC,OAAQ,QAAO;AACpB,cAAM,QAA0B;AAAA,UAC9B,IAAI,OAAO;AAAA,UACX,WAAW,OAAO;AAAA,UAClB,QAAQ,OAAO;AAAA,UACf,YAAY,OAAO;AAAA,QAAA;AAErB,eAAO;AAAA,MACT,CAAC;AAAA,IAAA;AAEH,WAAO,OAAO,OAAO,CAAC,MAA6B,MAAM,IAAI;AAAA,EAC/D;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,oBAAoB,MAAM,0BAA0B;AACpE,UAAM,QAAQ,IAAI,OAAO,OAAO,aAAa,EAAE,IAAI,CAAC,MAAM,GAAG,EAAE,QAAQ,CAAC,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,gBAAqC;AACzC,WAAO,KAAK,aAAA;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,OAAO,YAAY,SAAuB;AACxC,QAAI,EAAE,WAAW,gBAAgB;AAC/B,YAAM,YAAY,OAAO,KAAK,aAAa,EAAE,KAAK,IAAI;AACtD,YAAM,IAAI,kBAAkB,kBAAkB,OAAO,wBAAwB,SAAS,GAAG;AAAA,IAC3F;AAAA,EACF;AACF;AC1KA,eAAsB,cAAc,QAAoD;AACtF,MAAI,MAAc;AAClB,mBAAiB,SAAS,QAAQ;AAChC,WAAO,MAAM;AAAA,EACf;AACA,SAAO;AACT;AAYA,gBAAuB,IACrB,QACA,SAC2B;AAC3B,mBAAiB,SAAS,QAAQ;AAChC,YAAQ,KAAK;AACb,UAAM;AAAA,EACR;AACF;ACcO,MAAM,UAAkB;"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "localm-web",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Browser-only TypeScript SDK for running LLMs and SLMs locally with WebGPU. Ultralytics-style DX, Vite-first.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -46,7 +46,7 @@
46
46
  "vitest": "^3.2.4"
47
47
  },
48
48
  "engines": {
49
- "node": ">=18.0.0"
49
+ "node": ">=20.19.0"
50
50
  },
51
51
  "overrides": {
52
52
  "esbuild": "^0.25.0"