localm-web 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -10,6 +10,26 @@
10
10
  export declare class BackendNotAvailableError extends LocalmWebError {
11
11
  }
12
12
 
13
+ /** Snapshot of a single cached model's metadata. */
14
+ export declare interface CachedModelEntry {
15
+ /** Friendly id from the registry (e.g. `"llama-3.2-1b-int4"`). */
16
+ id: string;
17
+ /** Backend-specific id (e.g. WebLLM `webllmId`). */
18
+ backendId: string;
19
+ /** Human-readable family name. */
20
+ family: string;
21
+ /** Approx parameter count, e.g. `"1B"`. */
22
+ parameters: string;
23
+ }
24
+
25
+ /** Aggregate storage usage reported by the browser. */
26
+ export declare interface CacheUsage {
27
+ /** Bytes used by the entire origin's storage (not just our cache). */
28
+ usage: number;
29
+ /** Bytes the browser is willing to give the origin. */
30
+ quota: number;
31
+ }
32
+
13
33
  /**
14
34
  * Multi-turn chat task.
15
35
  *
@@ -111,6 +131,209 @@ export declare class ChatReply {
111
131
  */
112
132
  export declare function collectStream(stream: AsyncIterable<TokenChunk>): Promise<string>;
113
133
 
134
+ /**
135
+ * Raw text-completion task.
136
+ *
137
+ * Unlike {@link Chat}, `Completion` does not maintain a conversation history
138
+ * and does not apply a chat template. The prompt is fed to the model verbatim
139
+ * and the model continues it. Useful for "Once upon a time…" style generation,
140
+ * code completion, or any scenario where chat formatting would interfere.
141
+ *
142
+ * Use {@link Completion.create} to construct an instance — the constructor is
143
+ * private.
144
+ *
145
+ * @example
146
+ * ```ts
147
+ * const comp = await Completion.create("qwen2.5-1.5b-int4");
148
+ * const result = await comp.predict("Once upon a time", { maxTokens: 50 });
149
+ * console.log(result.text);
150
+ * ```
151
+ *
152
+ * @example Streaming
153
+ * ```ts
154
+ * const controller = new AbortController();
155
+ * for await (const token of comp.stream("def fibonacci(n):", { signal: controller.signal })) {
156
+ * process.stdout.write(token.text);
157
+ * }
158
+ * ```
159
+ */
160
+ export declare class Completion extends LMTask {
161
+ private constructor();
162
+ /**
163
+ * Create and load a `Completion` task for the given model.
164
+ *
165
+ * @param modelId - Friendly model id from the registry (e.g. `"qwen2.5-1.5b-int4"`).
166
+ * @param options - Optional creation options (progress callback, engine override).
167
+ */
168
+ static create(modelId: string, options?: LMTaskCreateOptions): Promise<Completion>;
169
+ /**
170
+ * Generate a continuation for the given prompt.
171
+ *
172
+ * @param prompt - Raw text fed to the model.
173
+ * @param options - Generation options.
174
+ * @returns A {@link CompletionResult} with the generated continuation.
175
+ */
176
+ predict(prompt: string, options?: GenerationOptions): Promise<CompletionResult>;
177
+ /**
178
+ * Stream a continuation for the given prompt as an async iterable of token
179
+ * chunks.
180
+ *
181
+ * @param prompt - Raw text fed to the model.
182
+ * @param options - Generation options including an optional `signal`.
183
+ */
184
+ stream(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
185
+ }
186
+
187
+ /**
188
+ * Result returned by `Completion.predict()`.
189
+ *
190
+ * Holds the generated continuation text (the prompt itself is not included)
191
+ * plus metadata about the generation loop.
192
+ */
193
+ export declare class CompletionResult {
194
+ /** The generated text (continuation only, prompt excluded). */
195
+ readonly text: string;
196
+ /** The original prompt that was fed to the model. */
197
+ readonly prompt: string;
198
+ /** Number of tokens generated. 0 when the engine does not report it. */
199
+ readonly tokensGenerated: number;
200
+ /** Why the generation loop stopped. */
201
+ readonly finishReason: FinishReason;
202
+ constructor(
203
+ /** The generated text (continuation only, prompt excluded). */
204
+ text: string,
205
+ /** The original prompt that was fed to the model. */
206
+ prompt: string,
207
+ /** Number of tokens generated. 0 when the engine does not report it. */
208
+ tokensGenerated: number,
209
+ /** Why the generation loop stopped. */
210
+ finishReason: FinishReason);
211
+ }
212
+
213
+ /**
214
+ * Spawn a new inference Web Worker.
215
+ *
216
+ * Uses Vite/webpack-friendly `new Worker(new URL(...), { type: "module" })`
217
+ * syntax. The bundler emits the worker as a separate ES module chunk.
218
+ *
219
+ * Consumers normally do not call this directly — `LMTask.create()` invokes it
220
+ * when `inWorker: true` is set. It is exported for advanced scenarios (custom
221
+ * worker management, pooling, lifecycle integration with a host app).
222
+ *
223
+ * @returns A {@link WorkerLike}-compatible Worker instance.
224
+ */
225
+ export declare function createInferenceWorker(): WorkerLike;
226
+
227
+ /**
228
+ * Curated registry of supported embedding models for v0.3.
229
+ *
230
+ * Each entry maps a friendly id to the underlying transformers.js model id.
231
+ */
232
+ export declare const EMBEDDING_PRESETS: Readonly<Record<string, EmbeddingPreset>>;
233
+
234
+ /** Curated metadata for a supported embedding model. */
235
+ export declare interface EmbeddingPreset {
236
+ /** Friendly identifier (e.g. `"bge-small-en-v1.5"`). */
237
+ id: string;
238
+ /** Family name (e.g. `"BGE"`). */
239
+ family: string;
240
+ /** Embedding dimension. */
241
+ dimension: number;
242
+ /** Maximum input length in tokens. */
243
+ maxTokens: number;
244
+ /** Identifier passed to `@huggingface/transformers`. */
245
+ transformersId: string;
246
+ /** Approximate quantization scheme (e.g. `"fp32"`, `"int8"`). */
247
+ quantization: string;
248
+ /** Short human description. */
249
+ description: string;
250
+ }
251
+
252
+ /**
253
+ * Sentence embedding task backed by `@huggingface/transformers`.
254
+ *
255
+ * Use {@link Embeddings.create} to construct an instance — the constructor is
256
+ * private. The default backend lazy-loads the transformers.js runtime; tests
257
+ * inject a {@link EmbedPipeline} mock instead.
258
+ *
259
+ * @example
260
+ * ```ts
261
+ * const emb = await Embeddings.create("bge-small-en-v1.5");
262
+ * const vectors = await emb.embed(["hello world", "another sentence"]);
263
+ * console.log(vectors[0].length); // 384
264
+ * ```
265
+ */
266
+ export declare class Embeddings {
267
+ private readonly pipeline;
268
+ /** Resolved metadata for the loaded model. */
269
+ readonly preset: EmbeddingPreset;
270
+ private constructor();
271
+ /**
272
+ * Create and load an `Embeddings` task for the given model.
273
+ *
274
+ * @param modelId - Friendly id from the embedding registry.
275
+ * @param options - Optional creation options.
276
+ * @throws UnknownModelError if `modelId` is not in the registry.
277
+ * @throws ModelLoadError if the underlying pipeline fails to load.
278
+ */
279
+ static create(modelId: string, options?: EmbeddingsCreateOptions): Promise<Embeddings>;
280
+ /**
281
+ * Encode an array of strings into dense vectors.
282
+ *
283
+ * Returns one vector per input, in the same order. Empty input array
284
+ * returns an empty array (no error).
285
+ *
286
+ * @param texts - Input strings.
287
+ * @param options - Pooling + normalization. Defaults: `pooling: "mean"`, `normalize: true`.
288
+ */
289
+ embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
290
+ /**
291
+ * Convenience: encode a single string and return its vector.
292
+ *
293
+ * @param text - Input string.
294
+ * @param options - Forwarded to {@link Embeddings.embed}.
295
+ */
296
+ embedSingle(text: string, options?: EmbedOptions): Promise<number[]>;
297
+ /** Embedding dimension exposed by the loaded model. */
298
+ get dimension(): number;
299
+ /** Release pipeline resources. Safe to call multiple times. */
300
+ unload(): Promise<void>;
301
+ }
302
+
303
+ /** Options accepted by {@link Embeddings.create}. */
304
+ export declare interface EmbeddingsCreateOptions {
305
+ /** Optional callback for model load progress updates. */
306
+ onProgress?: ProgressCallback;
307
+ /** Override the embedding pipeline. Intended for testing. */
308
+ pipeline?: EmbedPipeline;
309
+ }
310
+
311
+ /** Options accepted by {@link Embeddings.embed}. */
312
+ export declare interface EmbedOptions {
313
+ /** L2-normalize each vector. Recommended for cosine similarity downstream. Default `true`. */
314
+ normalize?: boolean;
315
+ /** Pooling strategy. BGE-style models use `"cls"`. Most sentence-transformers use `"mean"`. Default `"mean"`. */
316
+ pooling?: "mean" | "cls";
317
+ }
318
+
319
+ /**
320
+ * Minimal pipeline contract that {@link Embeddings} depends on.
321
+ *
322
+ * The default implementation wraps `@huggingface/transformers`. Tests inject
323
+ * a fake satisfying the same shape — they never load the real runtime.
324
+ */
325
+ export declare interface EmbedPipeline {
326
+ /**
327
+ * Run the encoder on a batch of inputs and return raw vectors.
328
+ *
329
+ * @param texts - Input strings.
330
+ * @param options - Pooling + normalization passed to the underlying pipeline.
331
+ */
332
+ embed(texts: string[], options: Required<EmbedOptions>): Promise<number[][]>;
333
+ /** Release pipeline resources. */
334
+ unload?(): Promise<void>;
335
+ }
336
+
114
337
  /**
115
338
  * Runtime-agnostic inference contract.
116
339
  *
@@ -148,6 +371,32 @@ export declare interface Engine {
148
371
  * @throws GenerationAbortedError if `options.signal` is triggered.
149
372
  */
150
373
  stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
374
+ /**
375
+ * Generate a non-streaming raw text completion.
376
+ *
377
+ * Unlike {@link Engine.generate}, this skips the chat template and feeds the
378
+ * prompt to the underlying model verbatim. Useful for "Once upon a time…"
379
+ * style continuation.
380
+ *
381
+ * @param prompt - Raw text fed to the model.
382
+ * @param options - Generation options.
383
+ * @returns The full generated text (excluding the prompt).
384
+ * @throws ModelNotLoadedError if called before {@link Engine.load}.
385
+ * @throws GenerationAbortedError if `options.signal` is triggered.
386
+ */
387
+ complete(prompt: string, options?: GenerationOptions): Promise<string>;
388
+ /**
389
+ * Stream a raw text completion as an async iterable of token chunks.
390
+ *
391
+ * Unlike {@link Engine.stream}, this skips the chat template.
392
+ *
393
+ * @param prompt - Raw text fed to the model.
394
+ * @param options - Generation options.
395
+ * @returns Async iterable yielding token chunks. The final chunk has `done: true`.
396
+ * @throws ModelNotLoadedError if called before {@link Engine.load}.
397
+ * @throws GenerationAbortedError if `options.signal` is triggered.
398
+ */
399
+ streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
151
400
  /** Release any resources held by the engine. Safe to call when not loaded. */
152
401
  unload(): Promise<void>;
153
402
  /** Whether a model is currently loaded and ready for inference. */
@@ -180,9 +429,15 @@ export declare interface GenerationOptions {
180
429
  jsonSchema?: object;
181
430
  }
182
431
 
432
+ /** Return the list of supported embedding model ids. */
433
+ export declare function listSupportedEmbeddingModels(): string[];
434
+
183
435
  /** Return the list of supported friendly model ids. */
184
436
  export declare function listSupportedModels(): string[];
185
437
 
438
+ /** Return the list of supported reranker model ids. */
439
+ export declare function listSupportedRerankerModels(): string[];
440
+
186
441
  /**
187
442
  * Base class shared by all language-model tasks (`Chat` for v0.1; `Completion`,
188
443
  * `Embeddings` and `Reranker` planned for later versions).
@@ -213,6 +468,7 @@ export declare abstract class LMTask {
213
468
  * @param options - Task creation options.
214
469
  */
215
470
  protected static createEngine(modelId: string, options?: LMTaskCreateOptions): Promise<ResolvedEngine>;
471
+ private static defaultEngine;
216
472
  /** Release engine resources. Safe to call multiple times. */
217
473
  unload(): Promise<void>;
218
474
  /** Whether the underlying engine has a loaded model. */
@@ -228,6 +484,16 @@ export declare interface LMTaskCreateOptions {
228
484
  * Production callers should let the SDK pick a backend automatically.
229
485
  */
230
486
  engine?: Engine;
487
+ /**
488
+ * Run inference inside a Web Worker, isolating the UI thread from
489
+ * tokenization and generation. **Default `true` from v0.3** — the
490
+ * `WorkerEngine` is the recommended path. Pass `false` to keep
491
+ * inference on the main thread (useful for environments without
492
+ * `Worker` support or when debugging the runtime directly).
493
+ *
494
+ * Ignored when {@link engine} is provided.
495
+ */
496
+ inWorker?: boolean;
231
497
  }
232
498
 
233
499
  /**
@@ -269,10 +535,110 @@ export declare interface Message {
269
535
  */
270
536
  export declare const MODEL_PRESETS: Readonly<Record<string, ModelPreset>>;
271
537
 
538
+ /**
539
+ * Inspect and manage cached model weights.
540
+ *
541
+ * `localm-web` does not download or cache weights itself — that work is owned
542
+ * by `@mlc-ai/web-llm`, which writes to the browser Cache API. `ModelCache`
543
+ * is a thin wrapper that lets a consuming app surface cache state in its UI:
544
+ * "this model is downloaded", "you have 1.4 GB cached, free up space?",
545
+ * "clear all models on logout".
546
+ *
547
+ * @example
548
+ * ```ts
549
+ * const cache = new ModelCache();
550
+ * if (await cache.has("llama-3.2-1b-int4")) {
551
+ * console.log("ready offline");
552
+ * }
553
+ * const cached = await cache.list();
554
+ * await cache.delete("phi-3.5-mini-int4");
555
+ * const usage = await cache.estimateUsage();
556
+ * console.log(`${usage.usage} / ${usage.quota} bytes`);
557
+ * ```
558
+ */
559
+ export declare class ModelCache {
560
+ private readonly hasModelHook;
561
+ private readonly deleteModelHook;
562
+ private readonly estimateHook;
563
+ constructor(options?: ModelCacheOptions);
564
+ /**
565
+ * Whether the model's weights are present in the browser cache.
566
+ *
567
+ * @param modelId - Friendly id from the registry.
568
+ * @throws UnknownModelError if `modelId` is not in the registry.
569
+ */
570
+ has(modelId: string): Promise<boolean>;
571
+ /**
572
+ * Delete a single model's weights from the browser cache. No-op when the
573
+ * model is not cached.
574
+ *
575
+ * @param modelId - Friendly id from the registry.
576
+ * @throws UnknownModelError if `modelId` is not in the registry.
577
+ */
578
+ delete(modelId: string): Promise<void>;
579
+ /**
580
+ * List the registry models that are currently cached.
581
+ *
582
+ * Iterates `MODEL_PRESETS` and probes each one. Only returns models known
583
+ * to the SDK — models cached by external WebLLM calls outside our registry
584
+ * are not included.
585
+ *
586
+ * @returns Empty list when nothing is cached.
587
+ */
588
+ list(): Promise<CachedModelEntry[]>;
589
+ /**
590
+ * Delete every registry model from the cache. Useful for logout flows or
591
+ * "reset" buttons. Models cached outside the registry are not touched.
592
+ */
593
+ clear(): Promise<void>;
594
+ /**
595
+ * Aggregate storage stats from the browser. Returned numbers cover the
596
+ * entire origin (Cache API + IndexedDB + Service Workers + OPFS), not
597
+ * just our model cache — use it for "you have X of Y available" hints.
598
+ */
599
+ estimateUsage(): Promise<CacheUsage>;
600
+ /**
601
+ * Throw a descriptive error if the given id is not in the registry.
602
+ * Exposed for code paths that want to validate before calling other
603
+ * methods (those already throw on their own).
604
+ *
605
+ * @throws UnknownModelError
606
+ */
607
+ static assertKnown(modelId: string): void;
608
+ }
609
+
610
+ /**
611
+ * Hooks the {@link ModelCache} uses to talk to the underlying runtime and
612
+ * the browser. Tests inject mocks; production code leaves them undefined,
613
+ * letting `ModelCache` resolve the real `@mlc-ai/web-llm` helpers and
614
+ * `navigator.storage.estimate()` lazily.
615
+ */
616
+ export declare interface ModelCacheOptions {
617
+ /** Override `hasModelInCache` from the runtime. */
618
+ hasModel?: (backendId: string) => Promise<boolean>;
619
+ /** Override `deleteModelInCache` from the runtime. */
620
+ deleteModel?: (backendId: string) => Promise<void>;
621
+ /** Override `navigator.storage.estimate()`. */
622
+ estimate?: () => Promise<CacheUsage>;
623
+ }
624
+
272
625
  /** Thrown when a model fails to load (network, parsing, runtime init). */
273
626
  export declare class ModelLoadError extends LocalmWebError {
274
627
  }
275
628
 
629
+ /**
630
+ * Lifecycle phase of a model load.
631
+ *
632
+ * - `downloading`: weight files are being fetched from the network or cache.
633
+ * - `compiling`: the runtime is preparing the model (shader compilation,
634
+ * tensor allocation, KV cache setup).
635
+ * - `loading`: a generic "still working" phase reported by the runtime when
636
+ * it has not classified the work into download or compile.
637
+ * - `ready`: the model is loaded and the engine is ready for inference.
638
+ * Emitted exactly once, at the end of a successful load.
639
+ */
640
+ export declare type ModelLoadPhase = "downloading" | "compiling" | "loading" | "ready";
641
+
276
642
  /** Progress event emitted while a model is loading. */
277
643
  export declare interface ModelLoadProgress {
278
644
  /** Fraction of total work completed, in [0, 1]. */
@@ -283,6 +649,8 @@ export declare interface ModelLoadProgress {
283
649
  loaded: number;
284
650
  /** Total bytes to load. 0 when unavailable. */
285
651
  total: number;
652
+ /** Lifecycle phase classified from the runtime's status text. */
653
+ phase: ModelLoadPhase;
286
654
  }
287
655
 
288
656
  /** Thrown when an inference call is made before a model has loaded. */
@@ -316,12 +684,147 @@ export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
316
684
  export declare class QuotaExceededError extends LocalmWebError {
317
685
  }
318
686
 
687
+ /** A document paired with its score, for {@link Reranker.rank}. */
688
+ export declare interface RankedDocument {
689
+ /** The document text. */
690
+ text: string;
691
+ /** Score from the cross-encoder. */
692
+ score: number;
693
+ /** Original index of the document in the input array. */
694
+ index: number;
695
+ }
696
+
697
+ /**
698
+ * Cross-encoder reranking task backed by `@huggingface/transformers`.
699
+ *
700
+ * Use {@link Reranker.create} to construct an instance — the constructor is
701
+ * private. Useful as a second-stage step in a retrieve-then-rerank pipeline:
702
+ * pull top-K candidates with a fast embedding similarity, then rerank with
703
+ * a cross-encoder for higher precision.
704
+ *
705
+ * @example
706
+ * ```ts
707
+ * const rerank = await Reranker.create("bge-reranker-base");
708
+ * const scores = await rerank.score("what is webgpu?", [
709
+ * "WebGPU is a modern graphics API",
710
+ * "Bananas grow on trees",
711
+ * ]);
712
+ * // scores[0] >> scores[1]
713
+ * ```
714
+ *
715
+ * @example Ranked output sorted by score
716
+ * ```ts
717
+ * const ranked = await rerank.rank("what is webgpu?", docs);
718
+ * for (const r of ranked) console.log(r.score, r.text);
719
+ * ```
720
+ */
721
+ export declare class Reranker {
722
+ private readonly pipeline;
723
+ /** Resolved metadata for the loaded model. */
724
+ readonly preset: RerankerPreset;
725
+ private constructor();
726
+ /**
727
+ * Create and load a `Reranker` task for the given model.
728
+ *
729
+ * @param modelId - Friendly id from the reranker registry.
730
+ * @param options - Optional creation options.
731
+ * @throws UnknownModelError if `modelId` is not in the registry.
732
+ * @throws ModelLoadError if the underlying pipeline fails to load.
733
+ */
734
+ static create(modelId: string, options?: RerankerCreateOptions): Promise<Reranker>;
735
+ /**
736
+ * Score each document against the query. Returns one score per doc, in
737
+ * the same order. Empty `docs` returns `[]` (no error).
738
+ *
739
+ * @param query - Query string.
740
+ * @param docs - Documents to score.
741
+ * @param options - `sigmoid: true` maps logits into `[0, 1]`.
742
+ */
743
+ score(query: string, docs: string[], options?: RerankOptions): Promise<number[]>;
744
+ /**
745
+ * Score and sort documents by score in descending order. Returns a list of
746
+ * {@link RankedDocument}s carrying the original index.
747
+ *
748
+ * @param query - Query string.
749
+ * @param docs - Documents to rank.
750
+ * @param options - Forwarded to {@link Reranker.score}.
751
+ */
752
+ rank(query: string, docs: string[], options?: RerankOptions): Promise<RankedDocument[]>;
753
+ /** Release pipeline resources. Safe to call multiple times. */
754
+ unload(): Promise<void>;
755
+ }
756
+
757
+ /**
758
+ * Curated registry of supported reranker models for v0.3.
759
+ */
760
+ export declare const RERANKER_PRESETS: Readonly<Record<string, RerankerPreset>>;
761
+
762
+ /** Options accepted by {@link Reranker.create}. */
763
+ export declare interface RerankerCreateOptions {
764
+ /** Optional callback for model load progress updates. */
765
+ onProgress?: ProgressCallback;
766
+ /** Override the rerank pipeline. Intended for testing. */
767
+ pipeline?: RerankPipeline;
768
+ }
769
+
770
+ /** Curated metadata for a supported reranker (cross-encoder) model. */
771
+ export declare interface RerankerPreset {
772
+ /** Friendly identifier (e.g. `"bge-reranker-base"`). */
773
+ id: string;
774
+ /** Family name (e.g. `"BGE Reranker"`). */
775
+ family: string;
776
+ /** Maximum input length in tokens (combined query + document). */
777
+ maxTokens: number;
778
+ /** Identifier passed to `@huggingface/transformers`. */
779
+ transformersId: string;
780
+ /** Approximate quantization (e.g. `"fp32"`). */
781
+ quantization: string;
782
+ /** Short human description. */
783
+ description: string;
784
+ }
785
+
786
+ /** Options accepted by {@link Reranker.score}. */
787
+ export declare interface RerankOptions {
788
+ /**
789
+ * Apply sigmoid to logits to map scores into `[0, 1]`. Recommended when the
790
+ * downstream code uses scores as probabilities. Default `false` (raw logits).
791
+ */
792
+ sigmoid?: boolean;
793
+ }
794
+
795
+ /**
796
+ * Minimal pipeline contract that {@link Reranker} depends on.
797
+ *
798
+ * The default implementation wraps `@huggingface/transformers`. Tests inject
799
+ * a fake satisfying the same shape — they never load the real runtime.
800
+ */
801
+ export declare interface RerankPipeline {
802
+ /**
803
+ * Score `(query, doc)` pairs. One score per doc, in the same order.
804
+ *
805
+ * @param query - Single query string.
806
+ * @param docs - Documents to score against the query.
807
+ */
808
+ score(query: string, docs: string[]): Promise<number[]>;
809
+ /** Release pipeline resources. */
810
+ unload?(): Promise<void>;
811
+ }
812
+
319
813
  /** Internal payload returned by {@link LMTask.createEngine}. */
320
814
  declare interface ResolvedEngine {
321
815
  engine: Engine;
322
816
  preset: ModelPreset;
323
817
  }
324
818
 
819
+ /**
820
+ * Resolve a friendly embedding model id to its full preset metadata.
821
+ *
822
+ * @param modelId - Friendly id (e.g. `"bge-small-en-v1.5"`).
823
+ * @returns The matching preset.
824
+ * @throws UnknownModelError if no preset matches.
825
+ */
826
+ export declare function resolveEmbeddingPreset(modelId: string): EmbeddingPreset;
827
+
325
828
  /**
326
829
  * Resolve a friendly model id to its full preset metadata.
327
830
  *
@@ -331,12 +834,28 @@ declare interface ResolvedEngine {
331
834
  */
332
835
  export declare function resolveModelPreset(modelId: string): ModelPreset;
333
836
 
837
+ /**
838
+ * Resolve a friendly reranker model id to its full preset metadata.
839
+ *
840
+ * @param modelId - Friendly id (e.g. `"bge-reranker-base"`).
841
+ * @throws UnknownModelError if no preset matches.
842
+ */
843
+ export declare function resolveRerankerPreset(modelId: string): RerankerPreset;
844
+
334
845
  /**
335
846
  * Public type primitives for localm-web.
336
847
  */
337
848
  /** Conversation roles supported by chat templates. */
338
849
  export declare type Role = "system" | "user" | "assistant" | "tool";
339
850
 
851
+ /**
852
+ * Subset of {@link GenerationOptions} that survives `postMessage`.
853
+ *
854
+ * `AbortSignal` cannot be cloned across the worker boundary, so it is replaced
855
+ * by a separate {@link AbortRequest} message keyed on the same operation id.
856
+ */
857
+ declare type SerializableGenerationOptions = Omit<GenerationOptions, "signal">;
858
+
340
859
  /**
341
860
  * Wrap an async iterable so that each `TokenChunk` is also passed to a
342
861
  * caller-supplied side-effect callback before being yielded downstream.
@@ -370,4 +889,119 @@ export declare const VERSION: string;
370
889
  export declare class WebGPUUnavailableError extends LocalmWebError {
371
890
  }
372
891
 
892
+ /**
893
+ * Engine implementation that proxies all calls to a Web Worker.
894
+ *
895
+ * The worker holds the actual {@link WebLLMEngine}; this class is a thin RPC
896
+ * shell that serializes requests, tracks pending operations by a numeric id,
897
+ * and turns worker responses back into Promises and async iterables.
898
+ *
899
+ * Use {@link createInferenceWorker} to obtain a real worker. Tests can pass a
900
+ * {@link WorkerLike} mock implementing the same `postMessage` /
901
+ * `addEventListener` surface.
902
+ */
903
+ export declare class WorkerEngine implements Engine {
904
+ private readonly worker;
905
+ private nextId;
906
+ private loaded;
907
+ private currentLoad;
908
+ private currentLoadId;
909
+ private currentLoadProgress;
910
+ private currentUnload;
911
+ private currentUnloadId;
912
+ private pendingGenerates;
913
+ private pendingStreams;
914
+ private readonly listener;
915
+ constructor(worker: WorkerLike);
916
+ isLoaded(): boolean;
917
+ load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
918
+ generate(messages: Message[], options?: GenerationOptions): Promise<string>;
919
+ stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
920
+ complete(prompt: string, options?: GenerationOptions): Promise<string>;
921
+ streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
922
+ unload(): Promise<void>;
923
+ /** Tear down the underlying worker. The engine is unusable after this. */
924
+ terminate(): void;
925
+ private allocateId;
926
+ private send;
927
+ private handleMessage;
928
+ }
929
+
930
+ /** Subset of `Worker` we depend on. Lets tests inject a mock. */
931
+ export declare interface WorkerLike {
932
+ postMessage(message: WorkerRequest): void;
933
+ addEventListener(type: "message", listener: (event: MessageEvent<WorkerResponse>) => void): void;
934
+ removeEventListener(type: "message", listener: (event: MessageEvent<WorkerResponse>) => void): void;
935
+ terminate(): void;
936
+ }
937
+
938
+ /** Operation request sent from the main thread to the worker. */
939
+ declare type WorkerRequest = {
940
+ op: "load";
941
+ id: number;
942
+ modelId: string;
943
+ } | {
944
+ op: "generate";
945
+ id: number;
946
+ messages: Message[];
947
+ options: SerializableGenerationOptions;
948
+ } | {
949
+ op: "stream";
950
+ id: number;
951
+ messages: Message[];
952
+ options: SerializableGenerationOptions;
953
+ } | {
954
+ op: "complete";
955
+ id: number;
956
+ prompt: string;
957
+ options: SerializableGenerationOptions;
958
+ } | {
959
+ op: "stream-completion";
960
+ id: number;
961
+ prompt: string;
962
+ options: SerializableGenerationOptions;
963
+ } | {
964
+ op: "abort";
965
+ id: number;
966
+ } | {
967
+ op: "unload";
968
+ id: number;
969
+ } | {
970
+ op: "isLoaded";
971
+ id: number;
972
+ };
973
+
974
+ /** Operation response sent from the worker back to the main thread. */
975
+ declare type WorkerResponse = {
976
+ op: "loaded";
977
+ id: number;
978
+ } | {
979
+ op: "generated";
980
+ id: number;
981
+ text: string;
982
+ } | {
983
+ op: "progress";
984
+ id: number;
985
+ payload: ModelLoadProgress;
986
+ } | {
987
+ op: "token";
988
+ id: number;
989
+ chunk: TokenChunk;
990
+ } | {
991
+ op: "stream-end";
992
+ id: number;
993
+ } | {
994
+ op: "error";
995
+ id: number;
996
+ name: string;
997
+ message: string;
998
+ } | {
999
+ op: "unloaded";
1000
+ id: number;
1001
+ } | {
1002
+ op: "is-loaded";
1003
+ id: number;
1004
+ value: boolean;
1005
+ };
1006
+
373
1007
  export { }