npm - localm-web - Versions diffs - 0.1.0 → 0.3.0 - Mend

localm-web 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +154 -0
package/README.md +3 -3
package/dist/assets/index-ChQoBCqA.js +23168 -0
package/dist/assets/index-ChQoBCqA.js.map +1 -0
package/dist/assets/inference.worker-CwvQtobb.js +330 -0
package/dist/assets/inference.worker-CwvQtobb.js.map +1 -0
package/dist/index.d.ts +634 -0
package/dist/index.js +807 -3
package/dist/index.js.map +1 -1
package/package.json +9 -2

package/dist/index.d.ts CHANGED Viewed

@@ -10,6 +10,26 @@
 export declare class BackendNotAvailableError extends LocalmWebError {
 }
+/** Snapshot of a single cached model's metadata. */
+export declare interface CachedModelEntry {
+    /** Friendly id from the registry (e.g. `"llama-3.2-1b-int4"`). */
+    id: string;
+    /** Backend-specific id (e.g. WebLLM `webllmId`). */
+    backendId: string;
+    /** Human-readable family name. */
+    family: string;
+    /** Approx parameter count, e.g. `"1B"`. */
+    parameters: string;
+}
+/** Aggregate storage usage reported by the browser. */
+export declare interface CacheUsage {
+    /** Bytes used by the entire origin's storage (not just our cache). */
+    usage: number;
+    /** Bytes the browser is willing to give the origin. */
+    quota: number;
+}
 /**
  * Multi-turn chat task.
  *
@@ -111,6 +131,209 @@ export declare class ChatReply {
  */
 export declare function collectStream(stream: AsyncIterable<TokenChunk>): Promise<string>;
+/**
+ * Raw text-completion task.
+ *
+ * Unlike {@link Chat}, `Completion` does not maintain a conversation history
+ * and does not apply a chat template. The prompt is fed to the model verbatim
+ * and the model continues it. Useful for "Once upon a time…" style generation,
+ * code completion, or any scenario where chat formatting would interfere.
+ *
+ * Use {@link Completion.create} to construct an instance — the constructor is
+ * private.
+ *
+ * @example
+ * ```ts
+ * const comp = await Completion.create("qwen2.5-1.5b-int4");
+ * const result = await comp.predict("Once upon a time", { maxTokens: 50 });
+ * console.log(result.text);
+ * ```
+ *
+ * @example Streaming
+ * ```ts
+ * const controller = new AbortController();
+ * for await (const token of comp.stream("def fibonacci(n):", { signal: controller.signal })) {
+ *   process.stdout.write(token.text);
+ * }
+ * ```
+ */
+export declare class Completion extends LMTask {
+    private constructor();
+    /**
+     * Create and load a `Completion` task for the given model.
+     *
+     * @param modelId - Friendly model id from the registry (e.g. `"qwen2.5-1.5b-int4"`).
+     * @param options - Optional creation options (progress callback, engine override).
+     */
+    static create(modelId: string, options?: LMTaskCreateOptions): Promise<Completion>;
+    /**
+     * Generate a continuation for the given prompt.
+     *
+     * @param prompt - Raw text fed to the model.
+     * @param options - Generation options.
+     * @returns A {@link CompletionResult} with the generated continuation.
+     */
+    predict(prompt: string, options?: GenerationOptions): Promise<CompletionResult>;
+    /**
+     * Stream a continuation for the given prompt as an async iterable of token
+     * chunks.
+     *
+     * @param prompt - Raw text fed to the model.
+     * @param options - Generation options including an optional `signal`.
+     */
+    stream(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
+}
+/**
+ * Result returned by `Completion.predict()`.
+ *
+ * Holds the generated continuation text (the prompt itself is not included)
+ * plus metadata about the generation loop.
+ */
+export declare class CompletionResult {
+    /** The generated text (continuation only, prompt excluded). */
+    readonly text: string;
+    /** The original prompt that was fed to the model. */
+    readonly prompt: string;
+    /** Number of tokens generated. 0 when the engine does not report it. */
+    readonly tokensGenerated: number;
+    /** Why the generation loop stopped. */
+    readonly finishReason: FinishReason;
+    constructor(
+    /** The generated text (continuation only, prompt excluded). */
+    text: string,
+    /** The original prompt that was fed to the model. */
+    prompt: string,
+    /** Number of tokens generated. 0 when the engine does not report it. */
+    tokensGenerated: number,
+    /** Why the generation loop stopped. */
+    finishReason: FinishReason);
+}
+/**
+ * Spawn a new inference Web Worker.
+ *
+ * Uses Vite/webpack-friendly `new Worker(new URL(...), { type: "module" })`
+ * syntax. The bundler emits the worker as a separate ES module chunk.
+ *
+ * Consumers normally do not call this directly — `LMTask.create()` invokes it
+ * when `inWorker: true` is set. It is exported for advanced scenarios (custom
+ * worker management, pooling, lifecycle integration with a host app).
+ *
+ * @returns A {@link WorkerLike}-compatible Worker instance.
+ */
+export declare function createInferenceWorker(): WorkerLike;
+/**
+ * Curated registry of supported embedding models for v0.3.
+ *
+ * Each entry maps a friendly id to the underlying transformers.js model id.
+ */
+export declare const EMBEDDING_PRESETS: Readonly<Record<string, EmbeddingPreset>>;
+/** Curated metadata for a supported embedding model. */
+export declare interface EmbeddingPreset {
+    /** Friendly identifier (e.g. `"bge-small-en-v1.5"`). */
+    id: string;
+    /** Family name (e.g. `"BGE"`). */
+    family: string;
+    /** Embedding dimension. */
+    dimension: number;
+    /** Maximum input length in tokens. */
+    maxTokens: number;
+    /** Identifier passed to `@huggingface/transformers`. */
+    transformersId: string;
+    /** Approximate quantization scheme (e.g. `"fp32"`, `"int8"`). */
+    quantization: string;
+    /** Short human description. */
+    description: string;
+}
+/**
+ * Sentence embedding task backed by `@huggingface/transformers`.
+ *
+ * Use {@link Embeddings.create} to construct an instance — the constructor is
+ * private. The default backend lazy-loads the transformers.js runtime; tests
+ * inject a {@link EmbedPipeline} mock instead.
+ *
+ * @example
+ * ```ts
+ * const emb = await Embeddings.create("bge-small-en-v1.5");
+ * const vectors = await emb.embed(["hello world", "another sentence"]);
+ * console.log(vectors[0].length); // 384
+ * ```
+ */
+export declare class Embeddings {
+    private readonly pipeline;
+    /** Resolved metadata for the loaded model. */
+    readonly preset: EmbeddingPreset;
+    private constructor();
+    /**
+     * Create and load an `Embeddings` task for the given model.
+     *
+     * @param modelId - Friendly id from the embedding registry.
+     * @param options - Optional creation options.
+     * @throws UnknownModelError if `modelId` is not in the registry.
+     * @throws ModelLoadError if the underlying pipeline fails to load.
+     */
+    static create(modelId: string, options?: EmbeddingsCreateOptions): Promise<Embeddings>;
+    /**
+     * Encode an array of strings into dense vectors.
+     *
+     * Returns one vector per input, in the same order. Empty input array
+     * returns an empty array (no error).
+     *
+     * @param texts - Input strings.
+     * @param options - Pooling + normalization. Defaults: `pooling: "mean"`, `normalize: true`.
+     */
+    embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
+    /**
+     * Convenience: encode a single string and return its vector.
+     *
+     * @param text - Input string.
+     * @param options - Forwarded to {@link Embeddings.embed}.
+     */
+    embedSingle(text: string, options?: EmbedOptions): Promise<number[]>;
+    /** Embedding dimension exposed by the loaded model. */
+    get dimension(): number;
+    /** Release pipeline resources. Safe to call multiple times. */
+    unload(): Promise<void>;
+}
+/** Options accepted by {@link Embeddings.create}. */
+export declare interface EmbeddingsCreateOptions {
+    /** Optional callback for model load progress updates. */
+    onProgress?: ProgressCallback;
+    /** Override the embedding pipeline. Intended for testing. */
+    pipeline?: EmbedPipeline;
+}
+/** Options accepted by {@link Embeddings.embed}. */
+export declare interface EmbedOptions {
+    /** L2-normalize each vector. Recommended for cosine similarity downstream. Default `true`. */
+    normalize?: boolean;
+    /** Pooling strategy. BGE-style models use `"cls"`. Most sentence-transformers use `"mean"`. Default `"mean"`. */
+    pooling?: "mean" | "cls";
+}
+/**
+ * Minimal pipeline contract that {@link Embeddings} depends on.
+ *
+ * The default implementation wraps `@huggingface/transformers`. Tests inject
+ * a fake satisfying the same shape — they never load the real runtime.
+ */
+export declare interface EmbedPipeline {
+    /**
+     * Run the encoder on a batch of inputs and return raw vectors.
+     *
+     * @param texts - Input strings.
+     * @param options - Pooling + normalization passed to the underlying pipeline.
+     */
+    embed(texts: string[], options: Required<EmbedOptions>): Promise<number[][]>;
+    /** Release pipeline resources. */
+    unload?(): Promise<void>;
+}
 /**
  * Runtime-agnostic inference contract.
  *
@@ -148,6 +371,32 @@ export declare interface Engine {
      * @throws GenerationAbortedError if `options.signal` is triggered.
      */
     stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    /**
+     * Generate a non-streaming raw text completion.
+     *
+     * Unlike {@link Engine.generate}, this skips the chat template and feeds the
+     * prompt to the underlying model verbatim. Useful for "Once upon a time…"
+     * style continuation.
+     *
+     * @param prompt - Raw text fed to the model.
+     * @param options - Generation options.
+     * @returns The full generated text (excluding the prompt).
+     * @throws ModelNotLoadedError if called before {@link Engine.load}.
+     * @throws GenerationAbortedError if `options.signal` is triggered.
+     */
+    complete(prompt: string, options?: GenerationOptions): Promise<string>;
+    /**
+     * Stream a raw text completion as an async iterable of token chunks.
+     *
+     * Unlike {@link Engine.stream}, this skips the chat template.
+     *
+     * @param prompt - Raw text fed to the model.
+     * @param options - Generation options.
+     * @returns Async iterable yielding token chunks. The final chunk has `done: true`.
+     * @throws ModelNotLoadedError if called before {@link Engine.load}.
+     * @throws GenerationAbortedError if `options.signal` is triggered.
+     */
+    streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
     /** Release any resources held by the engine. Safe to call when not loaded. */
     unload(): Promise<void>;
     /** Whether a model is currently loaded and ready for inference. */
@@ -180,9 +429,15 @@ export declare interface GenerationOptions {
     jsonSchema?: object;
 }
+/** Return the list of supported embedding model ids. */
+export declare function listSupportedEmbeddingModels(): string[];
 /** Return the list of supported friendly model ids. */
 export declare function listSupportedModels(): string[];
+/** Return the list of supported reranker model ids. */
+export declare function listSupportedRerankerModels(): string[];
 /**
  * Base class shared by all language-model tasks (`Chat` for v0.1; `Completion`,
  * `Embeddings` and `Reranker` planned for later versions).
@@ -213,6 +468,7 @@ export declare abstract class LMTask {
      * @param options - Task creation options.
      */
     protected static createEngine(modelId: string, options?: LMTaskCreateOptions): Promise<ResolvedEngine>;
+    private static defaultEngine;
     /** Release engine resources. Safe to call multiple times. */
     unload(): Promise<void>;
     /** Whether the underlying engine has a loaded model. */
@@ -228,6 +484,16 @@ export declare interface LMTaskCreateOptions {
      * Production callers should let the SDK pick a backend automatically.
      */
     engine?: Engine;
+    /**
+     * Run inference inside a Web Worker, isolating the UI thread from
+     * tokenization and generation. **Default `true` from v0.3** — the
+     * `WorkerEngine` is the recommended path. Pass `false` to keep
+     * inference on the main thread (useful for environments without
+     * `Worker` support or when debugging the runtime directly).
+     *
+     * Ignored when {@link engine} is provided.
+     */
+    inWorker?: boolean;
 }
 /**
@@ -269,10 +535,110 @@ export declare interface Message {
  */
 export declare const MODEL_PRESETS: Readonly<Record<string, ModelPreset>>;
+/**
+ * Inspect and manage cached model weights.
+ *
+ * `localm-web` does not download or cache weights itself — that work is owned
+ * by `@mlc-ai/web-llm`, which writes to the browser Cache API. `ModelCache`
+ * is a thin wrapper that lets a consuming app surface cache state in its UI:
+ * "this model is downloaded", "you have 1.4 GB cached, free up space?",
+ * "clear all models on logout".
+ *
+ * @example
+ * ```ts
+ * const cache = new ModelCache();
+ * if (await cache.has("llama-3.2-1b-int4")) {
+ *   console.log("ready offline");
+ * }
+ * const cached = await cache.list();
+ * await cache.delete("phi-3.5-mini-int4");
+ * const usage = await cache.estimateUsage();
+ * console.log(`${usage.usage} / ${usage.quota} bytes`);
+ * ```
+ */
+export declare class ModelCache {
+    private readonly hasModelHook;
+    private readonly deleteModelHook;
+    private readonly estimateHook;
+    constructor(options?: ModelCacheOptions);
+    /**
+     * Whether the model's weights are present in the browser cache.
+     *
+     * @param modelId - Friendly id from the registry.
+     * @throws UnknownModelError if `modelId` is not in the registry.
+     */
+    has(modelId: string): Promise<boolean>;
+    /**
+     * Delete a single model's weights from the browser cache. No-op when the
+     * model is not cached.
+     *
+     * @param modelId - Friendly id from the registry.
+     * @throws UnknownModelError if `modelId` is not in the registry.
+     */
+    delete(modelId: string): Promise<void>;
+    /**
+     * List the registry models that are currently cached.
+     *
+     * Iterates `MODEL_PRESETS` and probes each one. Only returns models known
+     * to the SDK — models cached by external WebLLM calls outside our registry
+     * are not included.
+     *
+     * @returns Empty list when nothing is cached.
+     */
+    list(): Promise<CachedModelEntry[]>;
+    /**
+     * Delete every registry model from the cache. Useful for logout flows or
+     * "reset" buttons. Models cached outside the registry are not touched.
+     */
+    clear(): Promise<void>;
+    /**
+     * Aggregate storage stats from the browser. Returned numbers cover the
+     * entire origin (Cache API + IndexedDB + Service Workers + OPFS), not
+     * just our model cache — use it for "you have X of Y available" hints.
+     */
+    estimateUsage(): Promise<CacheUsage>;
+    /**
+     * Throw a descriptive error if the given id is not in the registry.
+     * Exposed for code paths that want to validate before calling other
+     * methods (those already throw on their own).
+     *
+     * @throws UnknownModelError
+     */
+    static assertKnown(modelId: string): void;
+}
+/**
+ * Hooks the {@link ModelCache} uses to talk to the underlying runtime and
+ * the browser. Tests inject mocks; production code leaves them undefined,
+ * letting `ModelCache` resolve the real `@mlc-ai/web-llm` helpers and
+ * `navigator.storage.estimate()` lazily.
+ */
+export declare interface ModelCacheOptions {
+    /** Override `hasModelInCache` from the runtime. */
+    hasModel?: (backendId: string) => Promise<boolean>;
+    /** Override `deleteModelInCache` from the runtime. */
+    deleteModel?: (backendId: string) => Promise<void>;
+    /** Override `navigator.storage.estimate()`. */
+    estimate?: () => Promise<CacheUsage>;
+}
 /** Thrown when a model fails to load (network, parsing, runtime init). */
 export declare class ModelLoadError extends LocalmWebError {
 }
+/**
+ * Lifecycle phase of a model load.
+ *
+ * - `downloading`: weight files are being fetched from the network or cache.
+ * - `compiling`: the runtime is preparing the model (shader compilation,
+ *   tensor allocation, KV cache setup).
+ * - `loading`: a generic "still working" phase reported by the runtime when
+ *   it has not classified the work into download or compile.
+ * - `ready`: the model is loaded and the engine is ready for inference.
+ *   Emitted exactly once, at the end of a successful load.
+ */
+export declare type ModelLoadPhase = "downloading" | "compiling" | "loading" | "ready";
 /** Progress event emitted while a model is loading. */
 export declare interface ModelLoadProgress {
     /** Fraction of total work completed, in [0, 1]. */
@@ -283,6 +649,8 @@ export declare interface ModelLoadProgress {
     loaded: number;
     /** Total bytes to load. 0 when unavailable. */
     total: number;
+    /** Lifecycle phase classified from the runtime's status text. */
+    phase: ModelLoadPhase;
 }
 /** Thrown when an inference call is made before a model has loaded. */
@@ -316,12 +684,147 @@ export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
 export declare class QuotaExceededError extends LocalmWebError {
 }
+/** A document paired with its score, for {@link Reranker.rank}. */
+export declare interface RankedDocument {
+    /** The document text. */
+    text: string;
+    /** Score from the cross-encoder. */
+    score: number;
+    /** Original index of the document in the input array. */
+    index: number;
+}
+/**
+ * Cross-encoder reranking task backed by `@huggingface/transformers`.
+ *
+ * Use {@link Reranker.create} to construct an instance — the constructor is
+ * private. Useful as a second-stage step in a retrieve-then-rerank pipeline:
+ * pull top-K candidates with a fast embedding similarity, then rerank with
+ * a cross-encoder for higher precision.
+ *
+ * @example
+ * ```ts
+ * const rerank = await Reranker.create("bge-reranker-base");
+ * const scores = await rerank.score("what is webgpu?", [
+ *   "WebGPU is a modern graphics API",
+ *   "Bananas grow on trees",
+ * ]);
+ * // scores[0] >> scores[1]
+ * ```
+ *
+ * @example Ranked output sorted by score
+ * ```ts
+ * const ranked = await rerank.rank("what is webgpu?", docs);
+ * for (const r of ranked) console.log(r.score, r.text);
+ * ```
+ */
+export declare class Reranker {
+    private readonly pipeline;
+    /** Resolved metadata for the loaded model. */
+    readonly preset: RerankerPreset;
+    private constructor();
+    /**
+     * Create and load a `Reranker` task for the given model.
+     *
+     * @param modelId - Friendly id from the reranker registry.
+     * @param options - Optional creation options.
+     * @throws UnknownModelError if `modelId` is not in the registry.
+     * @throws ModelLoadError if the underlying pipeline fails to load.
+     */
+    static create(modelId: string, options?: RerankerCreateOptions): Promise<Reranker>;
+    /**
+     * Score each document against the query. Returns one score per doc, in
+     * the same order. Empty `docs` returns `[]` (no error).
+     *
+     * @param query - Query string.
+     * @param docs - Documents to score.
+     * @param options - `sigmoid: true` maps logits into `[0, 1]`.
+     */
+    score(query: string, docs: string[], options?: RerankOptions): Promise<number[]>;
+    /**
+     * Score and sort documents by score in descending order. Returns a list of
+     * {@link RankedDocument}s carrying the original index.
+     *
+     * @param query - Query string.
+     * @param docs - Documents to rank.
+     * @param options - Forwarded to {@link Reranker.score}.
+     */
+    rank(query: string, docs: string[], options?: RerankOptions): Promise<RankedDocument[]>;
+    /** Release pipeline resources. Safe to call multiple times. */
+    unload(): Promise<void>;
+}
+/**
+ * Curated registry of supported reranker models for v0.3.
+ */
+export declare const RERANKER_PRESETS: Readonly<Record<string, RerankerPreset>>;
+/** Options accepted by {@link Reranker.create}. */
+export declare interface RerankerCreateOptions {
+    /** Optional callback for model load progress updates. */
+    onProgress?: ProgressCallback;
+    /** Override the rerank pipeline. Intended for testing. */
+    pipeline?: RerankPipeline;
+}
+/** Curated metadata for a supported reranker (cross-encoder) model. */
+export declare interface RerankerPreset {
+    /** Friendly identifier (e.g. `"bge-reranker-base"`). */
+    id: string;
+    /** Family name (e.g. `"BGE Reranker"`). */
+    family: string;
+    /** Maximum input length in tokens (combined query + document). */
+    maxTokens: number;
+    /** Identifier passed to `@huggingface/transformers`. */
+    transformersId: string;
+    /** Approximate quantization (e.g. `"fp32"`). */
+    quantization: string;
+    /** Short human description. */
+    description: string;
+}
+/** Options accepted by {@link Reranker.score}. */
+export declare interface RerankOptions {
+    /**
+     * Apply sigmoid to logits to map scores into `[0, 1]`. Recommended when the
+     * downstream code uses scores as probabilities. Default `false` (raw logits).
+     */
+    sigmoid?: boolean;
+}
+/**
+ * Minimal pipeline contract that {@link Reranker} depends on.
+ *
+ * The default implementation wraps `@huggingface/transformers`. Tests inject
+ * a fake satisfying the same shape — they never load the real runtime.
+ */
+export declare interface RerankPipeline {
+    /**
+     * Score `(query, doc)` pairs. One score per doc, in the same order.
+     *
+     * @param query - Single query string.
+     * @param docs - Documents to score against the query.
+     */
+    score(query: string, docs: string[]): Promise<number[]>;
+    /** Release pipeline resources. */
+    unload?(): Promise<void>;
+}
 /** Internal payload returned by {@link LMTask.createEngine}. */
 declare interface ResolvedEngine {
     engine: Engine;
     preset: ModelPreset;
 }
+/**
+ * Resolve a friendly embedding model id to its full preset metadata.
+ *
+ * @param modelId - Friendly id (e.g. `"bge-small-en-v1.5"`).
+ * @returns The matching preset.
+ * @throws UnknownModelError if no preset matches.
+ */
+export declare function resolveEmbeddingPreset(modelId: string): EmbeddingPreset;
 /**
  * Resolve a friendly model id to its full preset metadata.
  *
@@ -331,12 +834,28 @@ declare interface ResolvedEngine {
  */
 export declare function resolveModelPreset(modelId: string): ModelPreset;
+/**
+ * Resolve a friendly reranker model id to its full preset metadata.
+ *
+ * @param modelId - Friendly id (e.g. `"bge-reranker-base"`).
+ * @throws UnknownModelError if no preset matches.
+ */
+export declare function resolveRerankerPreset(modelId: string): RerankerPreset;
 /**
  * Public type primitives for localm-web.
  */
 /** Conversation roles supported by chat templates. */
 export declare type Role = "system" | "user" | "assistant" | "tool";
+/**
+ * Subset of {@link GenerationOptions} that survives `postMessage`.
+ *
+ * `AbortSignal` cannot be cloned across the worker boundary, so it is replaced
+ * by a separate {@link AbortRequest} message keyed on the same operation id.
+ */
+declare type SerializableGenerationOptions = Omit<GenerationOptions, "signal">;
 /**
  * Wrap an async iterable so that each `TokenChunk` is also passed to a
  * caller-supplied side-effect callback before being yielded downstream.
@@ -370,4 +889,119 @@ export declare const VERSION: string;
 export declare class WebGPUUnavailableError extends LocalmWebError {
 }
+/**
+ * Engine implementation that proxies all calls to a Web Worker.
+ *
+ * The worker holds the actual {@link WebLLMEngine}; this class is a thin RPC
+ * shell that serializes requests, tracks pending operations by a numeric id,
+ * and turns worker responses back into Promises and async iterables.
+ *
+ * Use {@link createInferenceWorker} to obtain a real worker. Tests can pass a
+ * {@link WorkerLike} mock implementing the same `postMessage` /
+ * `addEventListener` surface.
+ */
+export declare class WorkerEngine implements Engine {
+    private readonly worker;
+    private nextId;
+    private loaded;
+    private currentLoad;
+    private currentLoadId;
+    private currentLoadProgress;
+    private currentUnload;
+    private currentUnloadId;
+    private pendingGenerates;
+    private pendingStreams;
+    private readonly listener;
+    constructor(worker: WorkerLike);
+    isLoaded(): boolean;
+    load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
+    generate(messages: Message[], options?: GenerationOptions): Promise<string>;
+    stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    complete(prompt: string, options?: GenerationOptions): Promise<string>;
+    streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
+    unload(): Promise<void>;
+    /** Tear down the underlying worker. The engine is unusable after this. */
+    terminate(): void;
+    private allocateId;
+    private send;
+    private handleMessage;
+}
+/** Subset of `Worker` we depend on. Lets tests inject a mock. */
+export declare interface WorkerLike {
+    postMessage(message: WorkerRequest): void;
+    addEventListener(type: "message", listener: (event: MessageEvent<WorkerResponse>) => void): void;
+    removeEventListener(type: "message", listener: (event: MessageEvent<WorkerResponse>) => void): void;
+    terminate(): void;
+}
+/** Operation request sent from the main thread to the worker. */
+declare type WorkerRequest = {
+    op: "load";
+    id: number;
+    modelId: string;
+} | {
+    op: "generate";
+    id: number;
+    messages: Message[];
+    options: SerializableGenerationOptions;
+} | {
+    op: "stream";
+    id: number;
+    messages: Message[];
+    options: SerializableGenerationOptions;
+} | {
+    op: "complete";
+    id: number;
+    prompt: string;
+    options: SerializableGenerationOptions;
+} | {
+    op: "stream-completion";
+    id: number;
+    prompt: string;
+    options: SerializableGenerationOptions;
+} | {
+    op: "abort";
+    id: number;
+} | {
+    op: "unload";
+    id: number;
+} | {
+    op: "isLoaded";
+    id: number;
+};
+/** Operation response sent from the worker back to the main thread. */
+declare type WorkerResponse = {
+    op: "loaded";
+    id: number;
+} | {
+    op: "generated";
+    id: number;
+    text: string;
+} | {
+    op: "progress";
+    id: number;
+    payload: ModelLoadProgress;
+} | {
+    op: "token";
+    id: number;
+    chunk: TokenChunk;
+} | {
+    op: "stream-end";
+    id: number;
+} | {
+    op: "error";
+    id: number;
+    name: string;
+    message: string;
+} | {
+    op: "unloaded";
+    id: number;
+} | {
+    op: "is-loaded";
+    id: number;
+    value: boolean;
+};
 export { }