localm-web 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +154 -0
- package/README.md +3 -3
- package/dist/assets/index-ChQoBCqA.js +23168 -0
- package/dist/assets/index-ChQoBCqA.js.map +1 -0
- package/dist/assets/inference.worker-CwvQtobb.js +330 -0
- package/dist/assets/inference.worker-CwvQtobb.js.map +1 -0
- package/dist/index.d.ts +634 -0
- package/dist/index.js +807 -3
- package/dist/index.js.map +1 -1
- package/package.json +9 -2
package/dist/index.d.ts
CHANGED
|
@@ -10,6 +10,26 @@
|
|
|
10
10
|
export declare class BackendNotAvailableError extends LocalmWebError {
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
+
/** Snapshot of a single cached model's metadata. */
|
|
14
|
+
export declare interface CachedModelEntry {
|
|
15
|
+
/** Friendly id from the registry (e.g. `"llama-3.2-1b-int4"`). */
|
|
16
|
+
id: string;
|
|
17
|
+
/** Backend-specific id (e.g. WebLLM `webllmId`). */
|
|
18
|
+
backendId: string;
|
|
19
|
+
/** Human-readable family name. */
|
|
20
|
+
family: string;
|
|
21
|
+
/** Approx parameter count, e.g. `"1B"`. */
|
|
22
|
+
parameters: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Aggregate storage usage reported by the browser. */
|
|
26
|
+
export declare interface CacheUsage {
|
|
27
|
+
/** Bytes used by the entire origin's storage (not just our cache). */
|
|
28
|
+
usage: number;
|
|
29
|
+
/** Bytes the browser is willing to give the origin. */
|
|
30
|
+
quota: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
13
33
|
/**
|
|
14
34
|
* Multi-turn chat task.
|
|
15
35
|
*
|
|
@@ -111,6 +131,209 @@ export declare class ChatReply {
|
|
|
111
131
|
*/
|
|
112
132
|
export declare function collectStream(stream: AsyncIterable<TokenChunk>): Promise<string>;
|
|
113
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Raw text-completion task.
|
|
136
|
+
*
|
|
137
|
+
* Unlike {@link Chat}, `Completion` does not maintain a conversation history
|
|
138
|
+
* and does not apply a chat template. The prompt is fed to the model verbatim
|
|
139
|
+
* and the model continues it. Useful for "Once upon a time…" style generation,
|
|
140
|
+
* code completion, or any scenario where chat formatting would interfere.
|
|
141
|
+
*
|
|
142
|
+
* Use {@link Completion.create} to construct an instance — the constructor is
|
|
143
|
+
* private.
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* ```ts
|
|
147
|
+
* const comp = await Completion.create("qwen2.5-1.5b-int4");
|
|
148
|
+
* const result = await comp.predict("Once upon a time", { maxTokens: 50 });
|
|
149
|
+
* console.log(result.text);
|
|
150
|
+
* ```
|
|
151
|
+
*
|
|
152
|
+
* @example Streaming
|
|
153
|
+
* ```ts
|
|
154
|
+
* const controller = new AbortController();
|
|
155
|
+
* for await (const token of comp.stream("def fibonacci(n):", { signal: controller.signal })) {
|
|
156
|
+
* process.stdout.write(token.text);
|
|
157
|
+
* }
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
export declare class Completion extends LMTask {
|
|
161
|
+
private constructor();
|
|
162
|
+
/**
|
|
163
|
+
* Create and load a `Completion` task for the given model.
|
|
164
|
+
*
|
|
165
|
+
* @param modelId - Friendly model id from the registry (e.g. `"qwen2.5-1.5b-int4"`).
|
|
166
|
+
* @param options - Optional creation options (progress callback, engine override).
|
|
167
|
+
*/
|
|
168
|
+
static create(modelId: string, options?: LMTaskCreateOptions): Promise<Completion>;
|
|
169
|
+
/**
|
|
170
|
+
* Generate a continuation for the given prompt.
|
|
171
|
+
*
|
|
172
|
+
* @param prompt - Raw text fed to the model.
|
|
173
|
+
* @param options - Generation options.
|
|
174
|
+
* @returns A {@link CompletionResult} with the generated continuation.
|
|
175
|
+
*/
|
|
176
|
+
predict(prompt: string, options?: GenerationOptions): Promise<CompletionResult>;
|
|
177
|
+
/**
|
|
178
|
+
* Stream a continuation for the given prompt as an async iterable of token
|
|
179
|
+
* chunks.
|
|
180
|
+
*
|
|
181
|
+
* @param prompt - Raw text fed to the model.
|
|
182
|
+
* @param options - Generation options including an optional `signal`.
|
|
183
|
+
*/
|
|
184
|
+
stream(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Result returned by `Completion.predict()`.
|
|
189
|
+
*
|
|
190
|
+
* Holds the generated continuation text (the prompt itself is not included)
|
|
191
|
+
* plus metadata about the generation loop.
|
|
192
|
+
*/
|
|
193
|
+
export declare class CompletionResult {
|
|
194
|
+
/** The generated text (continuation only, prompt excluded). */
|
|
195
|
+
readonly text: string;
|
|
196
|
+
/** The original prompt that was fed to the model. */
|
|
197
|
+
readonly prompt: string;
|
|
198
|
+
/** Number of tokens generated. 0 when the engine does not report it. */
|
|
199
|
+
readonly tokensGenerated: number;
|
|
200
|
+
/** Why the generation loop stopped. */
|
|
201
|
+
readonly finishReason: FinishReason;
|
|
202
|
+
constructor(
|
|
203
|
+
/** The generated text (continuation only, prompt excluded). */
|
|
204
|
+
text: string,
|
|
205
|
+
/** The original prompt that was fed to the model. */
|
|
206
|
+
prompt: string,
|
|
207
|
+
/** Number of tokens generated. 0 when the engine does not report it. */
|
|
208
|
+
tokensGenerated: number,
|
|
209
|
+
/** Why the generation loop stopped. */
|
|
210
|
+
finishReason: FinishReason);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Spawn a new inference Web Worker.
|
|
215
|
+
*
|
|
216
|
+
* Uses Vite/webpack-friendly `new Worker(new URL(...), { type: "module" })`
|
|
217
|
+
* syntax. The bundler emits the worker as a separate ES module chunk.
|
|
218
|
+
*
|
|
219
|
+
* Consumers normally do not call this directly — `LMTask.create()` invokes it
|
|
220
|
+
* when `inWorker: true` is set. It is exported for advanced scenarios (custom
|
|
221
|
+
* worker management, pooling, lifecycle integration with a host app).
|
|
222
|
+
*
|
|
223
|
+
* @returns A {@link WorkerLike}-compatible Worker instance.
|
|
224
|
+
*/
|
|
225
|
+
export declare function createInferenceWorker(): WorkerLike;
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Curated registry of supported embedding models for v0.3.
|
|
229
|
+
*
|
|
230
|
+
* Each entry maps a friendly id to the underlying transformers.js model id.
|
|
231
|
+
*/
|
|
232
|
+
export declare const EMBEDDING_PRESETS: Readonly<Record<string, EmbeddingPreset>>;
|
|
233
|
+
|
|
234
|
+
/** Curated metadata for a supported embedding model. */
|
|
235
|
+
export declare interface EmbeddingPreset {
|
|
236
|
+
/** Friendly identifier (e.g. `"bge-small-en-v1.5"`). */
|
|
237
|
+
id: string;
|
|
238
|
+
/** Family name (e.g. `"BGE"`). */
|
|
239
|
+
family: string;
|
|
240
|
+
/** Embedding dimension. */
|
|
241
|
+
dimension: number;
|
|
242
|
+
/** Maximum input length in tokens. */
|
|
243
|
+
maxTokens: number;
|
|
244
|
+
/** Identifier passed to `@huggingface/transformers`. */
|
|
245
|
+
transformersId: string;
|
|
246
|
+
/** Approximate quantization scheme (e.g. `"fp32"`, `"int8"`). */
|
|
247
|
+
quantization: string;
|
|
248
|
+
/** Short human description. */
|
|
249
|
+
description: string;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Sentence embedding task backed by `@huggingface/transformers`.
|
|
254
|
+
*
|
|
255
|
+
* Use {@link Embeddings.create} to construct an instance — the constructor is
|
|
256
|
+
* private. The default backend lazy-loads the transformers.js runtime; tests
|
|
257
|
+
* inject a {@link EmbedPipeline} mock instead.
|
|
258
|
+
*
|
|
259
|
+
* @example
|
|
260
|
+
* ```ts
|
|
261
|
+
* const emb = await Embeddings.create("bge-small-en-v1.5");
|
|
262
|
+
* const vectors = await emb.embed(["hello world", "another sentence"]);
|
|
263
|
+
* console.log(vectors[0].length); // 384
|
|
264
|
+
* ```
|
|
265
|
+
*/
|
|
266
|
+
export declare class Embeddings {
|
|
267
|
+
private readonly pipeline;
|
|
268
|
+
/** Resolved metadata for the loaded model. */
|
|
269
|
+
readonly preset: EmbeddingPreset;
|
|
270
|
+
private constructor();
|
|
271
|
+
/**
|
|
272
|
+
* Create and load an `Embeddings` task for the given model.
|
|
273
|
+
*
|
|
274
|
+
* @param modelId - Friendly id from the embedding registry.
|
|
275
|
+
* @param options - Optional creation options.
|
|
276
|
+
* @throws UnknownModelError if `modelId` is not in the registry.
|
|
277
|
+
* @throws ModelLoadError if the underlying pipeline fails to load.
|
|
278
|
+
*/
|
|
279
|
+
static create(modelId: string, options?: EmbeddingsCreateOptions): Promise<Embeddings>;
|
|
280
|
+
/**
|
|
281
|
+
* Encode an array of strings into dense vectors.
|
|
282
|
+
*
|
|
283
|
+
* Returns one vector per input, in the same order. Empty input array
|
|
284
|
+
* returns an empty array (no error).
|
|
285
|
+
*
|
|
286
|
+
* @param texts - Input strings.
|
|
287
|
+
* @param options - Pooling + normalization. Defaults: `pooling: "mean"`, `normalize: true`.
|
|
288
|
+
*/
|
|
289
|
+
embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
|
|
290
|
+
/**
|
|
291
|
+
* Convenience: encode a single string and return its vector.
|
|
292
|
+
*
|
|
293
|
+
* @param text - Input string.
|
|
294
|
+
* @param options - Forwarded to {@link Embeddings.embed}.
|
|
295
|
+
*/
|
|
296
|
+
embedSingle(text: string, options?: EmbedOptions): Promise<number[]>;
|
|
297
|
+
/** Embedding dimension exposed by the loaded model. */
|
|
298
|
+
get dimension(): number;
|
|
299
|
+
/** Release pipeline resources. Safe to call multiple times. */
|
|
300
|
+
unload(): Promise<void>;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/** Options accepted by {@link Embeddings.create}. */
|
|
304
|
+
export declare interface EmbeddingsCreateOptions {
|
|
305
|
+
/** Optional callback for model load progress updates. */
|
|
306
|
+
onProgress?: ProgressCallback;
|
|
307
|
+
/** Override the embedding pipeline. Intended for testing. */
|
|
308
|
+
pipeline?: EmbedPipeline;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/** Options accepted by {@link Embeddings.embed}. */
|
|
312
|
+
export declare interface EmbedOptions {
|
|
313
|
+
/** L2-normalize each vector. Recommended for cosine similarity downstream. Default `true`. */
|
|
314
|
+
normalize?: boolean;
|
|
315
|
+
/** Pooling strategy. BGE-style models use `"cls"`. Most sentence-transformers use `"mean"`. Default `"mean"`. */
|
|
316
|
+
pooling?: "mean" | "cls";
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Minimal pipeline contract that {@link Embeddings} depends on.
|
|
321
|
+
*
|
|
322
|
+
* The default implementation wraps `@huggingface/transformers`. Tests inject
|
|
323
|
+
* a fake satisfying the same shape — they never load the real runtime.
|
|
324
|
+
*/
|
|
325
|
+
export declare interface EmbedPipeline {
|
|
326
|
+
/**
|
|
327
|
+
* Run the encoder on a batch of inputs and return raw vectors.
|
|
328
|
+
*
|
|
329
|
+
* @param texts - Input strings.
|
|
330
|
+
* @param options - Pooling + normalization passed to the underlying pipeline.
|
|
331
|
+
*/
|
|
332
|
+
embed(texts: string[], options: Required<EmbedOptions>): Promise<number[][]>;
|
|
333
|
+
/** Release pipeline resources. */
|
|
334
|
+
unload?(): Promise<void>;
|
|
335
|
+
}
|
|
336
|
+
|
|
114
337
|
/**
|
|
115
338
|
* Runtime-agnostic inference contract.
|
|
116
339
|
*
|
|
@@ -148,6 +371,32 @@ export declare interface Engine {
|
|
|
148
371
|
* @throws GenerationAbortedError if `options.signal` is triggered.
|
|
149
372
|
*/
|
|
150
373
|
stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
|
|
374
|
+
/**
|
|
375
|
+
* Generate a non-streaming raw text completion.
|
|
376
|
+
*
|
|
377
|
+
* Unlike {@link Engine.generate}, this skips the chat template and feeds the
|
|
378
|
+
* prompt to the underlying model verbatim. Useful for "Once upon a time…"
|
|
379
|
+
* style continuation.
|
|
380
|
+
*
|
|
381
|
+
* @param prompt - Raw text fed to the model.
|
|
382
|
+
* @param options - Generation options.
|
|
383
|
+
* @returns The full generated text (excluding the prompt).
|
|
384
|
+
* @throws ModelNotLoadedError if called before {@link Engine.load}.
|
|
385
|
+
* @throws GenerationAbortedError if `options.signal` is triggered.
|
|
386
|
+
*/
|
|
387
|
+
complete(prompt: string, options?: GenerationOptions): Promise<string>;
|
|
388
|
+
/**
|
|
389
|
+
* Stream a raw text completion as an async iterable of token chunks.
|
|
390
|
+
*
|
|
391
|
+
* Unlike {@link Engine.stream}, this skips the chat template.
|
|
392
|
+
*
|
|
393
|
+
* @param prompt - Raw text fed to the model.
|
|
394
|
+
* @param options - Generation options.
|
|
395
|
+
* @returns Async iterable yielding token chunks. The final chunk has `done: true`.
|
|
396
|
+
* @throws ModelNotLoadedError if called before {@link Engine.load}.
|
|
397
|
+
* @throws GenerationAbortedError if `options.signal` is triggered.
|
|
398
|
+
*/
|
|
399
|
+
streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
|
|
151
400
|
/** Release any resources held by the engine. Safe to call when not loaded. */
|
|
152
401
|
unload(): Promise<void>;
|
|
153
402
|
/** Whether a model is currently loaded and ready for inference. */
|
|
@@ -180,9 +429,15 @@ export declare interface GenerationOptions {
|
|
|
180
429
|
jsonSchema?: object;
|
|
181
430
|
}
|
|
182
431
|
|
|
432
|
+
/** Return the list of supported embedding model ids. */
|
|
433
|
+
export declare function listSupportedEmbeddingModels(): string[];
|
|
434
|
+
|
|
183
435
|
/** Return the list of supported friendly model ids. */
|
|
184
436
|
export declare function listSupportedModels(): string[];
|
|
185
437
|
|
|
438
|
+
/** Return the list of supported reranker model ids. */
|
|
439
|
+
export declare function listSupportedRerankerModels(): string[];
|
|
440
|
+
|
|
186
441
|
/**
|
|
187
442
|
* Base class shared by all language-model tasks (`Chat` for v0.1; `Completion`,
|
|
188
443
|
* `Embeddings` and `Reranker` planned for later versions).
|
|
@@ -213,6 +468,7 @@ export declare abstract class LMTask {
|
|
|
213
468
|
* @param options - Task creation options.
|
|
214
469
|
*/
|
|
215
470
|
protected static createEngine(modelId: string, options?: LMTaskCreateOptions): Promise<ResolvedEngine>;
|
|
471
|
+
private static defaultEngine;
|
|
216
472
|
/** Release engine resources. Safe to call multiple times. */
|
|
217
473
|
unload(): Promise<void>;
|
|
218
474
|
/** Whether the underlying engine has a loaded model. */
|
|
@@ -228,6 +484,16 @@ export declare interface LMTaskCreateOptions {
|
|
|
228
484
|
* Production callers should let the SDK pick a backend automatically.
|
|
229
485
|
*/
|
|
230
486
|
engine?: Engine;
|
|
487
|
+
/**
|
|
488
|
+
* Run inference inside a Web Worker, isolating the UI thread from
|
|
489
|
+
* tokenization and generation. **Default `true` from v0.3** — the
|
|
490
|
+
* `WorkerEngine` is the recommended path. Pass `false` to keep
|
|
491
|
+
* inference on the main thread (useful for environments without
|
|
492
|
+
* `Worker` support or when debugging the runtime directly).
|
|
493
|
+
*
|
|
494
|
+
* Ignored when {@link engine} is provided.
|
|
495
|
+
*/
|
|
496
|
+
inWorker?: boolean;
|
|
231
497
|
}
|
|
232
498
|
|
|
233
499
|
/**
|
|
@@ -269,10 +535,110 @@ export declare interface Message {
|
|
|
269
535
|
*/
|
|
270
536
|
export declare const MODEL_PRESETS: Readonly<Record<string, ModelPreset>>;
|
|
271
537
|
|
|
538
|
+
/**
|
|
539
|
+
* Inspect and manage cached model weights.
|
|
540
|
+
*
|
|
541
|
+
* `localm-web` does not download or cache weights itself — that work is owned
|
|
542
|
+
* by `@mlc-ai/web-llm`, which writes to the browser Cache API. `ModelCache`
|
|
543
|
+
* is a thin wrapper that lets a consuming app surface cache state in its UI:
|
|
544
|
+
* "this model is downloaded", "you have 1.4 GB cached, free up space?",
|
|
545
|
+
* "clear all models on logout".
|
|
546
|
+
*
|
|
547
|
+
* @example
|
|
548
|
+
* ```ts
|
|
549
|
+
* const cache = new ModelCache();
|
|
550
|
+
* if (await cache.has("llama-3.2-1b-int4")) {
|
|
551
|
+
* console.log("ready offline");
|
|
552
|
+
* }
|
|
553
|
+
* const cached = await cache.list();
|
|
554
|
+
* await cache.delete("phi-3.5-mini-int4");
|
|
555
|
+
* const usage = await cache.estimateUsage();
|
|
556
|
+
* console.log(`${usage.usage} / ${usage.quota} bytes`);
|
|
557
|
+
* ```
|
|
558
|
+
*/
|
|
559
|
+
export declare class ModelCache {
|
|
560
|
+
private readonly hasModelHook;
|
|
561
|
+
private readonly deleteModelHook;
|
|
562
|
+
private readonly estimateHook;
|
|
563
|
+
constructor(options?: ModelCacheOptions);
|
|
564
|
+
/**
|
|
565
|
+
* Whether the model's weights are present in the browser cache.
|
|
566
|
+
*
|
|
567
|
+
* @param modelId - Friendly id from the registry.
|
|
568
|
+
* @throws UnknownModelError if `modelId` is not in the registry.
|
|
569
|
+
*/
|
|
570
|
+
has(modelId: string): Promise<boolean>;
|
|
571
|
+
/**
|
|
572
|
+
* Delete a single model's weights from the browser cache. No-op when the
|
|
573
|
+
* model is not cached.
|
|
574
|
+
*
|
|
575
|
+
* @param modelId - Friendly id from the registry.
|
|
576
|
+
* @throws UnknownModelError if `modelId` is not in the registry.
|
|
577
|
+
*/
|
|
578
|
+
delete(modelId: string): Promise<void>;
|
|
579
|
+
/**
|
|
580
|
+
* List the registry models that are currently cached.
|
|
581
|
+
*
|
|
582
|
+
* Iterates `MODEL_PRESETS` and probes each one. Only returns models known
|
|
583
|
+
* to the SDK — models cached by external WebLLM calls outside our registry
|
|
584
|
+
* are not included.
|
|
585
|
+
*
|
|
586
|
+
* @returns Empty list when nothing is cached.
|
|
587
|
+
*/
|
|
588
|
+
list(): Promise<CachedModelEntry[]>;
|
|
589
|
+
/**
|
|
590
|
+
* Delete every registry model from the cache. Useful for logout flows or
|
|
591
|
+
* "reset" buttons. Models cached outside the registry are not touched.
|
|
592
|
+
*/
|
|
593
|
+
clear(): Promise<void>;
|
|
594
|
+
/**
|
|
595
|
+
* Aggregate storage stats from the browser. Returned numbers cover the
|
|
596
|
+
* entire origin (Cache API + IndexedDB + Service Workers + OPFS), not
|
|
597
|
+
* just our model cache — use it for "you have X of Y available" hints.
|
|
598
|
+
*/
|
|
599
|
+
estimateUsage(): Promise<CacheUsage>;
|
|
600
|
+
/**
|
|
601
|
+
* Throw a descriptive error if the given id is not in the registry.
|
|
602
|
+
* Exposed for code paths that want to validate before calling other
|
|
603
|
+
* methods (those already throw on their own).
|
|
604
|
+
*
|
|
605
|
+
* @throws UnknownModelError
|
|
606
|
+
*/
|
|
607
|
+
static assertKnown(modelId: string): void;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Hooks the {@link ModelCache} uses to talk to the underlying runtime and
|
|
612
|
+
* the browser. Tests inject mocks; production code leaves them undefined,
|
|
613
|
+
* letting `ModelCache` resolve the real `@mlc-ai/web-llm` helpers and
|
|
614
|
+
* `navigator.storage.estimate()` lazily.
|
|
615
|
+
*/
|
|
616
|
+
export declare interface ModelCacheOptions {
|
|
617
|
+
/** Override `hasModelInCache` from the runtime. */
|
|
618
|
+
hasModel?: (backendId: string) => Promise<boolean>;
|
|
619
|
+
/** Override `deleteModelInCache` from the runtime. */
|
|
620
|
+
deleteModel?: (backendId: string) => Promise<void>;
|
|
621
|
+
/** Override `navigator.storage.estimate()`. */
|
|
622
|
+
estimate?: () => Promise<CacheUsage>;
|
|
623
|
+
}
|
|
624
|
+
|
|
272
625
|
/** Thrown when a model fails to load (network, parsing, runtime init). */
|
|
273
626
|
export declare class ModelLoadError extends LocalmWebError {
|
|
274
627
|
}
|
|
275
628
|
|
|
629
|
+
/**
|
|
630
|
+
* Lifecycle phase of a model load.
|
|
631
|
+
*
|
|
632
|
+
* - `downloading`: weight files are being fetched from the network or cache.
|
|
633
|
+
* - `compiling`: the runtime is preparing the model (shader compilation,
|
|
634
|
+
* tensor allocation, KV cache setup).
|
|
635
|
+
* - `loading`: a generic "still working" phase reported by the runtime when
|
|
636
|
+
* it has not classified the work into download or compile.
|
|
637
|
+
* - `ready`: the model is loaded and the engine is ready for inference.
|
|
638
|
+
* Emitted exactly once, at the end of a successful load.
|
|
639
|
+
*/
|
|
640
|
+
export declare type ModelLoadPhase = "downloading" | "compiling" | "loading" | "ready";
|
|
641
|
+
|
|
276
642
|
/** Progress event emitted while a model is loading. */
|
|
277
643
|
export declare interface ModelLoadProgress {
|
|
278
644
|
/** Fraction of total work completed, in [0, 1]. */
|
|
@@ -283,6 +649,8 @@ export declare interface ModelLoadProgress {
|
|
|
283
649
|
loaded: number;
|
|
284
650
|
/** Total bytes to load. 0 when unavailable. */
|
|
285
651
|
total: number;
|
|
652
|
+
/** Lifecycle phase classified from the runtime's status text. */
|
|
653
|
+
phase: ModelLoadPhase;
|
|
286
654
|
}
|
|
287
655
|
|
|
288
656
|
/** Thrown when an inference call is made before a model has loaded. */
|
|
@@ -316,12 +684,147 @@ export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
|
|
|
316
684
|
export declare class QuotaExceededError extends LocalmWebError {
|
|
317
685
|
}
|
|
318
686
|
|
|
687
|
+
/** A document paired with its score, for {@link Reranker.rank}. */
|
|
688
|
+
export declare interface RankedDocument {
|
|
689
|
+
/** The document text. */
|
|
690
|
+
text: string;
|
|
691
|
+
/** Score from the cross-encoder. */
|
|
692
|
+
score: number;
|
|
693
|
+
/** Original index of the document in the input array. */
|
|
694
|
+
index: number;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Cross-encoder reranking task backed by `@huggingface/transformers`.
|
|
699
|
+
*
|
|
700
|
+
* Use {@link Reranker.create} to construct an instance — the constructor is
|
|
701
|
+
* private. Useful as a second-stage step in a retrieve-then-rerank pipeline:
|
|
702
|
+
* pull top-K candidates with a fast embedding similarity, then rerank with
|
|
703
|
+
* a cross-encoder for higher precision.
|
|
704
|
+
*
|
|
705
|
+
* @example
|
|
706
|
+
* ```ts
|
|
707
|
+
* const rerank = await Reranker.create("bge-reranker-base");
|
|
708
|
+
* const scores = await rerank.score("what is webgpu?", [
|
|
709
|
+
* "WebGPU is a modern graphics API",
|
|
710
|
+
* "Bananas grow on trees",
|
|
711
|
+
* ]);
|
|
712
|
+
* // scores[0] >> scores[1]
|
|
713
|
+
* ```
|
|
714
|
+
*
|
|
715
|
+
* @example Ranked output sorted by score
|
|
716
|
+
* ```ts
|
|
717
|
+
* const ranked = await rerank.rank("what is webgpu?", docs);
|
|
718
|
+
* for (const r of ranked) console.log(r.score, r.text);
|
|
719
|
+
* ```
|
|
720
|
+
*/
|
|
721
|
+
export declare class Reranker {
|
|
722
|
+
private readonly pipeline;
|
|
723
|
+
/** Resolved metadata for the loaded model. */
|
|
724
|
+
readonly preset: RerankerPreset;
|
|
725
|
+
private constructor();
|
|
726
|
+
/**
|
|
727
|
+
* Create and load a `Reranker` task for the given model.
|
|
728
|
+
*
|
|
729
|
+
* @param modelId - Friendly id from the reranker registry.
|
|
730
|
+
* @param options - Optional creation options.
|
|
731
|
+
* @throws UnknownModelError if `modelId` is not in the registry.
|
|
732
|
+
* @throws ModelLoadError if the underlying pipeline fails to load.
|
|
733
|
+
*/
|
|
734
|
+
static create(modelId: string, options?: RerankerCreateOptions): Promise<Reranker>;
|
|
735
|
+
/**
|
|
736
|
+
* Score each document against the query. Returns one score per doc, in
|
|
737
|
+
* the same order. Empty `docs` returns `[]` (no error).
|
|
738
|
+
*
|
|
739
|
+
* @param query - Query string.
|
|
740
|
+
* @param docs - Documents to score.
|
|
741
|
+
* @param options - `sigmoid: true` maps logits into `[0, 1]`.
|
|
742
|
+
*/
|
|
743
|
+
score(query: string, docs: string[], options?: RerankOptions): Promise<number[]>;
|
|
744
|
+
/**
|
|
745
|
+
* Score and sort documents by score in descending order. Returns a list of
|
|
746
|
+
* {@link RankedDocument}s carrying the original index.
|
|
747
|
+
*
|
|
748
|
+
* @param query - Query string.
|
|
749
|
+
* @param docs - Documents to rank.
|
|
750
|
+
* @param options - Forwarded to {@link Reranker.score}.
|
|
751
|
+
*/
|
|
752
|
+
rank(query: string, docs: string[], options?: RerankOptions): Promise<RankedDocument[]>;
|
|
753
|
+
/** Release pipeline resources. Safe to call multiple times. */
|
|
754
|
+
unload(): Promise<void>;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
/**
|
|
758
|
+
* Curated registry of supported reranker models for v0.3.
|
|
759
|
+
*/
|
|
760
|
+
export declare const RERANKER_PRESETS: Readonly<Record<string, RerankerPreset>>;
|
|
761
|
+
|
|
762
|
+
/** Options accepted by {@link Reranker.create}. */
|
|
763
|
+
export declare interface RerankerCreateOptions {
|
|
764
|
+
/** Optional callback for model load progress updates. */
|
|
765
|
+
onProgress?: ProgressCallback;
|
|
766
|
+
/** Override the rerank pipeline. Intended for testing. */
|
|
767
|
+
pipeline?: RerankPipeline;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
/** Curated metadata for a supported reranker (cross-encoder) model. */
|
|
771
|
+
export declare interface RerankerPreset {
|
|
772
|
+
/** Friendly identifier (e.g. `"bge-reranker-base"`). */
|
|
773
|
+
id: string;
|
|
774
|
+
/** Family name (e.g. `"BGE Reranker"`). */
|
|
775
|
+
family: string;
|
|
776
|
+
/** Maximum input length in tokens (combined query + document). */
|
|
777
|
+
maxTokens: number;
|
|
778
|
+
/** Identifier passed to `@huggingface/transformers`. */
|
|
779
|
+
transformersId: string;
|
|
780
|
+
/** Approximate quantization (e.g. `"fp32"`). */
|
|
781
|
+
quantization: string;
|
|
782
|
+
/** Short human description. */
|
|
783
|
+
description: string;
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
/** Options accepted by {@link Reranker.score}. */
|
|
787
|
+
export declare interface RerankOptions {
|
|
788
|
+
/**
|
|
789
|
+
* Apply sigmoid to logits to map scores into `[0, 1]`. Recommended when the
|
|
790
|
+
* downstream code uses scores as probabilities. Default `false` (raw logits).
|
|
791
|
+
*/
|
|
792
|
+
sigmoid?: boolean;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
/**
|
|
796
|
+
* Minimal pipeline contract that {@link Reranker} depends on.
|
|
797
|
+
*
|
|
798
|
+
* The default implementation wraps `@huggingface/transformers`. Tests inject
|
|
799
|
+
* a fake satisfying the same shape — they never load the real runtime.
|
|
800
|
+
*/
|
|
801
|
+
export declare interface RerankPipeline {
|
|
802
|
+
/**
|
|
803
|
+
* Score `(query, doc)` pairs. One score per doc, in the same order.
|
|
804
|
+
*
|
|
805
|
+
* @param query - Single query string.
|
|
806
|
+
* @param docs - Documents to score against the query.
|
|
807
|
+
*/
|
|
808
|
+
score(query: string, docs: string[]): Promise<number[]>;
|
|
809
|
+
/** Release pipeline resources. */
|
|
810
|
+
unload?(): Promise<void>;
|
|
811
|
+
}
|
|
812
|
+
|
|
319
813
|
/** Internal payload returned by {@link LMTask.createEngine}. */
|
|
320
814
|
declare interface ResolvedEngine {
|
|
321
815
|
engine: Engine;
|
|
322
816
|
preset: ModelPreset;
|
|
323
817
|
}
|
|
324
818
|
|
|
819
|
+
/**
|
|
820
|
+
* Resolve a friendly embedding model id to its full preset metadata.
|
|
821
|
+
*
|
|
822
|
+
* @param modelId - Friendly id (e.g. `"bge-small-en-v1.5"`).
|
|
823
|
+
* @returns The matching preset.
|
|
824
|
+
* @throws UnknownModelError if no preset matches.
|
|
825
|
+
*/
|
|
826
|
+
export declare function resolveEmbeddingPreset(modelId: string): EmbeddingPreset;
|
|
827
|
+
|
|
325
828
|
/**
|
|
326
829
|
* Resolve a friendly model id to its full preset metadata.
|
|
327
830
|
*
|
|
@@ -331,12 +834,28 @@ declare interface ResolvedEngine {
|
|
|
331
834
|
*/
|
|
332
835
|
export declare function resolveModelPreset(modelId: string): ModelPreset;
|
|
333
836
|
|
|
837
|
+
/**
|
|
838
|
+
* Resolve a friendly reranker model id to its full preset metadata.
|
|
839
|
+
*
|
|
840
|
+
* @param modelId - Friendly id (e.g. `"bge-reranker-base"`).
|
|
841
|
+
* @throws UnknownModelError if no preset matches.
|
|
842
|
+
*/
|
|
843
|
+
export declare function resolveRerankerPreset(modelId: string): RerankerPreset;
|
|
844
|
+
|
|
334
845
|
/**
|
|
335
846
|
* Public type primitives for localm-web.
|
|
336
847
|
*/
|
|
337
848
|
/** Conversation roles supported by chat templates. */
|
|
338
849
|
export declare type Role = "system" | "user" | "assistant" | "tool";
|
|
339
850
|
|
|
851
|
+
/**
|
|
852
|
+
* Subset of {@link GenerationOptions} that survives `postMessage`.
|
|
853
|
+
*
|
|
854
|
+
* `AbortSignal` cannot be cloned across the worker boundary, so it is replaced
|
|
855
|
+
* by a separate {@link AbortRequest} message keyed on the same operation id.
|
|
856
|
+
*/
|
|
857
|
+
declare type SerializableGenerationOptions = Omit<GenerationOptions, "signal">;
|
|
858
|
+
|
|
340
859
|
/**
|
|
341
860
|
* Wrap an async iterable so that each `TokenChunk` is also passed to a
|
|
342
861
|
* caller-supplied side-effect callback before being yielded downstream.
|
|
@@ -370,4 +889,119 @@ export declare const VERSION: string;
|
|
|
370
889
|
export declare class WebGPUUnavailableError extends LocalmWebError {
|
|
371
890
|
}
|
|
372
891
|
|
|
892
|
+
/**
|
|
893
|
+
* Engine implementation that proxies all calls to a Web Worker.
|
|
894
|
+
*
|
|
895
|
+
* The worker holds the actual {@link WebLLMEngine}; this class is a thin RPC
|
|
896
|
+
* shell that serializes requests, tracks pending operations by a numeric id,
|
|
897
|
+
* and turns worker responses back into Promises and async iterables.
|
|
898
|
+
*
|
|
899
|
+
* Use {@link createInferenceWorker} to obtain a real worker. Tests can pass a
|
|
900
|
+
* {@link WorkerLike} mock implementing the same `postMessage` /
|
|
901
|
+
* `addEventListener` surface.
|
|
902
|
+
*/
|
|
903
|
+
export declare class WorkerEngine implements Engine {
|
|
904
|
+
private readonly worker;
|
|
905
|
+
private nextId;
|
|
906
|
+
private loaded;
|
|
907
|
+
private currentLoad;
|
|
908
|
+
private currentLoadId;
|
|
909
|
+
private currentLoadProgress;
|
|
910
|
+
private currentUnload;
|
|
911
|
+
private currentUnloadId;
|
|
912
|
+
private pendingGenerates;
|
|
913
|
+
private pendingStreams;
|
|
914
|
+
private readonly listener;
|
|
915
|
+
constructor(worker: WorkerLike);
|
|
916
|
+
isLoaded(): boolean;
|
|
917
|
+
load(modelId: string, onProgress?: ProgressCallback): Promise<void>;
|
|
918
|
+
generate(messages: Message[], options?: GenerationOptions): Promise<string>;
|
|
919
|
+
stream(messages: Message[], options?: GenerationOptions): AsyncIterable<TokenChunk>;
|
|
920
|
+
complete(prompt: string, options?: GenerationOptions): Promise<string>;
|
|
921
|
+
streamCompletion(prompt: string, options?: GenerationOptions): AsyncIterable<TokenChunk>;
|
|
922
|
+
unload(): Promise<void>;
|
|
923
|
+
/** Tear down the underlying worker. The engine is unusable after this. */
|
|
924
|
+
terminate(): void;
|
|
925
|
+
private allocateId;
|
|
926
|
+
private send;
|
|
927
|
+
private handleMessage;
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
/** Subset of `Worker` we depend on. Lets tests inject a mock. */
|
|
931
|
+
export declare interface WorkerLike {
|
|
932
|
+
postMessage(message: WorkerRequest): void;
|
|
933
|
+
addEventListener(type: "message", listener: (event: MessageEvent<WorkerResponse>) => void): void;
|
|
934
|
+
removeEventListener(type: "message", listener: (event: MessageEvent<WorkerResponse>) => void): void;
|
|
935
|
+
terminate(): void;
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
/** Operation request sent from the main thread to the worker. */
|
|
939
|
+
declare type WorkerRequest = {
|
|
940
|
+
op: "load";
|
|
941
|
+
id: number;
|
|
942
|
+
modelId: string;
|
|
943
|
+
} | {
|
|
944
|
+
op: "generate";
|
|
945
|
+
id: number;
|
|
946
|
+
messages: Message[];
|
|
947
|
+
options: SerializableGenerationOptions;
|
|
948
|
+
} | {
|
|
949
|
+
op: "stream";
|
|
950
|
+
id: number;
|
|
951
|
+
messages: Message[];
|
|
952
|
+
options: SerializableGenerationOptions;
|
|
953
|
+
} | {
|
|
954
|
+
op: "complete";
|
|
955
|
+
id: number;
|
|
956
|
+
prompt: string;
|
|
957
|
+
options: SerializableGenerationOptions;
|
|
958
|
+
} | {
|
|
959
|
+
op: "stream-completion";
|
|
960
|
+
id: number;
|
|
961
|
+
prompt: string;
|
|
962
|
+
options: SerializableGenerationOptions;
|
|
963
|
+
} | {
|
|
964
|
+
op: "abort";
|
|
965
|
+
id: number;
|
|
966
|
+
} | {
|
|
967
|
+
op: "unload";
|
|
968
|
+
id: number;
|
|
969
|
+
} | {
|
|
970
|
+
op: "isLoaded";
|
|
971
|
+
id: number;
|
|
972
|
+
};
|
|
973
|
+
|
|
974
|
+
/** Operation response sent from the worker back to the main thread. */
|
|
975
|
+
declare type WorkerResponse = {
|
|
976
|
+
op: "loaded";
|
|
977
|
+
id: number;
|
|
978
|
+
} | {
|
|
979
|
+
op: "generated";
|
|
980
|
+
id: number;
|
|
981
|
+
text: string;
|
|
982
|
+
} | {
|
|
983
|
+
op: "progress";
|
|
984
|
+
id: number;
|
|
985
|
+
payload: ModelLoadProgress;
|
|
986
|
+
} | {
|
|
987
|
+
op: "token";
|
|
988
|
+
id: number;
|
|
989
|
+
chunk: TokenChunk;
|
|
990
|
+
} | {
|
|
991
|
+
op: "stream-end";
|
|
992
|
+
id: number;
|
|
993
|
+
} | {
|
|
994
|
+
op: "error";
|
|
995
|
+
id: number;
|
|
996
|
+
name: string;
|
|
997
|
+
message: string;
|
|
998
|
+
} | {
|
|
999
|
+
op: "unloaded";
|
|
1000
|
+
id: number;
|
|
1001
|
+
} | {
|
|
1002
|
+
op: "is-loaded";
|
|
1003
|
+
id: number;
|
|
1004
|
+
value: boolean;
|
|
1005
|
+
};
|
|
1006
|
+
|
|
373
1007
|
export { }
|