@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +318 -104
- package/dist/architectures-C1I5V3Dt.mjs +6070 -0
- package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
- package/dist/browser/index.d.ts +276 -590
- package/dist/browser/index.d.ts.map +1 -1
- package/dist/browser/index.js +592 -2334
- package/dist/browser/index.js.map +1 -1
- package/dist/cli.mjs +625 -1098
- package/dist/cli.mjs.map +1 -1
- package/dist/defaults-9komdrbY.mjs +24 -0
- package/dist/defaults-9komdrbY.mjs.map +1 -0
- package/dist/frameworks/express.d.mts +1 -3
- package/dist/frameworks/express.d.mts.map +1 -1
- package/dist/frameworks/express.mjs +7 -7
- package/dist/frameworks/express.mjs.map +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.d.mts.map +1 -1
- package/dist/frameworks/fastify.mjs +3 -3
- package/dist/frameworks/fastify.mjs.map +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.d.mts.map +1 -1
- package/dist/frameworks/hono.mjs +4 -4
- package/dist/frameworks/hono.mjs.map +1 -1
- package/dist/frameworks/next.d.mts +3 -2
- package/dist/frameworks/next.d.mts.map +1 -1
- package/dist/frameworks/next.mjs +4 -4
- package/dist/frameworks/next.mjs.map +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts.map +1 -1
- package/dist/frameworks/trpc.mjs +4 -4
- package/dist/frameworks/trpc.mjs.map +1 -1
- package/dist/gerbil-BetB5xb0.d.mts +488 -0
- package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
- package/dist/gerbil-CTZUa8EZ.mjs +4 -0
- package/dist/gerbil-DNniplr4.mjs +1656 -0
- package/dist/gerbil-DNniplr4.mjs.map +1 -0
- package/dist/gpu/hooks.d.mts +640 -0
- package/dist/gpu/hooks.d.mts.map +1 -0
- package/dist/gpu/hooks.mjs +1369 -0
- package/dist/gpu/hooks.mjs.map +1 -0
- package/dist/gpu/index.d.mts +2 -0
- package/dist/gpu/index.mjs +6 -0
- package/dist/gpu-DFuglcEx.mjs +3790 -0
- package/dist/gpu-DFuglcEx.mjs.map +1 -0
- package/dist/index-Dgmb2kE3.d.mts +245 -0
- package/dist/index-Dgmb2kE3.d.mts.map +1 -0
- package/dist/index-DukkJRMj.d.mts +2114 -0
- package/dist/index-DukkJRMj.d.mts.map +1 -0
- package/dist/index.d.mts +22 -487
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +13 -8
- package/dist/index.mjs.map +1 -1
- package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
- package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
- package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
- package/dist/integrations/ai-sdk.d.mts +75 -6
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +131 -15
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +1 -1
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +5 -5
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.d.mts.map +1 -1
- package/dist/integrations/llamaindex.mjs +5 -5
- package/dist/integrations/llamaindex.mjs.map +1 -1
- package/dist/integrations/mcp-client.mjs +3 -3
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +3 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +5 -5
- package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
- package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
- package/dist/memory/index.d.mts +3 -0
- package/dist/memory/index.mjs +6 -0
- package/dist/memory-D1P7Tmda.mjs +4 -0
- package/dist/memory-DVN0MnIG.mjs +132 -0
- package/dist/memory-DVN0MnIG.mjs.map +1 -0
- package/dist/memory-Dj0J1v88.mjs +294 -0
- package/dist/memory-Dj0J1v88.mjs.map +1 -0
- package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
- package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
- package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
- package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
- package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
- package/dist/repl-BDRkwPGX.mjs +9 -0
- package/dist/skills/index.d.mts +270 -320
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +5 -5
- package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
- package/dist/skills-CU694Dc8.mjs.map +1 -0
- package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
- package/dist/tools-DQ1mPUw5.mjs.map +1 -0
- package/dist/types-DQBe2lFo.d.mts +165 -0
- package/dist/types-DQBe2lFo.d.mts.map +1 -0
- package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
- package/dist/types-LlyYILII.d.mts.map +1 -0
- package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
- package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
- package/dist/vector-B0panuy6.mjs +95 -0
- package/dist/vector-B0panuy6.mjs.map +1 -0
- package/docs/PROJECT-STATE.md +321 -0
- package/docs/adding-a-model-family.md +280 -0
- package/docs/ai-sdk.md +70 -61
- package/docs/architecture/overview.md +17 -7
- package/docs/browser.md +203 -8
- package/docs/embeddings.md +156 -0
- package/docs/gerbil-site-native-migration.md +217 -0
- package/docs/gpu-engine/architectures.md +398 -0
- package/docs/gpu-engine/ir.md +372 -0
- package/docs/gpu-engine/kernels.md +718 -0
- package/docs/gpu-engine/paper.html +1759 -0
- package/docs/gpu-engine/paper.md +2109 -0
- package/docs/gpu-engine/safetensors.md +312 -0
- package/docs/gpu-engine/tokenizer.md +302 -0
- package/docs/memory-rag.md +91 -0
- package/docs/metal-safari-intel.md +190 -0
- package/docs/mobile-failure-diagnosis.md +124 -0
- package/docs/mobile.md +99 -0
- package/docs/observability.md +230 -0
- package/docs/onnx-removal-plan.md +339 -0
- package/docs/research/autoresearch-portable.md +904 -0
- package/docs/research/dispatch-reduction-hivemind.md +84 -0
- package/docs/research/ios-safari-model-caching.md +117 -0
- package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
- package/docs/research/native-stt-model-selection.md +49 -0
- package/docs/research/native-tts-model-selection.md +90 -0
- package/docs/research/native-vs-chromium-decision.md +152 -0
- package/docs/research/nemotron-mamba2-inference.md +910 -0
- package/docs/research/qwen35-multimodal.md +293 -0
- package/docs/research/qwen36-gemma4-targets.md +337 -0
- package/docs/research/sota-embedding-models.md +179 -0
- package/docs/research/sota-mobile-models-2026.md +263 -0
- package/docs/research/sota-modality-models.md +202 -0
- package/docs/research/tps-baselines.md +71 -0
- package/docs/research/webgpu-m4-reference.md +104 -0
- package/docs/site-update-plan.md +155 -0
- package/docs/structured-output.md +123 -0
- package/docs/stt.md +63 -446
- package/docs/tts.md +77 -499
- package/docs/vision.md +100 -338
- package/package.json +22 -7
- package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
- package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
- package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
- package/dist/gerbil-CJ3ifloF.mjs +0 -4
- package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
- package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
- package/dist/gerbil-qOTe1nl2.d.mts +0 -431
- package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
- package/dist/kokoro-BNTb6egA.mjs +0 -20210
- package/dist/kokoro-BNTb6egA.mjs.map +0 -1
- package/dist/kokoro-CMOGDSgT.js +0 -20212
- package/dist/kokoro-CMOGDSgT.js.map +0 -1
- package/dist/mcp-BvbriaBy.mjs.map +0 -1
- package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
- package/dist/repl-DveXw36T.mjs +0 -9
- package/dist/skills-CD3Orlex.mjs.map +0 -1
- package/dist/stt-Bu-E23Sc.js +0 -433
- package/dist/stt-Bu-E23Sc.js.map +0 -1
- package/dist/stt-CpLYbGFd.mjs +0 -433
- package/dist/stt-CpLYbGFd.mjs.map +0 -1
- package/dist/stt-DRPLEEHB.mjs +0 -3
- package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
- package/dist/transformers.web-DiD1gTwk.js +0 -44695
- package/dist/transformers.web-DiD1gTwk.js.map +0 -1
- package/dist/transformers.web-u34VxRFM.js +0 -3
- package/dist/tts-CqroPaSK.js +0 -724
- package/dist/tts-CqroPaSK.js.map +0 -1
- package/dist/tts-DXgsKGCe.mjs +0 -3
- package/dist/tts-DeGANMNV.mjs +0 -730
- package/dist/tts-DeGANMNV.mjs.map +0 -1
- package/dist/types-CiTc7ez3.d.mts.map +0 -1
- /package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
- /package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
- /package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0
package/dist/browser/index.d.ts
CHANGED
|
@@ -41,8 +41,12 @@ type GenerateOptions = {
|
|
|
41
41
|
system?: string;
|
|
42
42
|
/** Enable thinking/reasoning mode (Qwen3) */
|
|
43
43
|
thinking?: boolean;
|
|
44
|
-
/** Callback for each token (streaming) */
|
|
45
|
-
onToken?: (token: string
|
|
44
|
+
/** Callback for each token (streaming); `meta` carries live decode-only tok/s */
|
|
45
|
+
onToken?: (token: string, meta?: {
|
|
46
|
+
tokenIndex: number;
|
|
47
|
+
tps: number;
|
|
48
|
+
elapsedMs: number;
|
|
49
|
+
}) => void;
|
|
46
50
|
/** Images to include (only used if model supports vision) */
|
|
47
51
|
images?: ImageInput[];
|
|
48
52
|
/** Enable response caching (default: false) */
|
|
@@ -92,16 +96,47 @@ type EmbedResult = {
|
|
|
92
96
|
/** Time in ms */
|
|
93
97
|
totalTime: number;
|
|
94
98
|
};
|
|
99
|
+
type SearchResult = {
|
|
100
|
+
/** The matched text */
|
|
101
|
+
text: string;
|
|
102
|
+
/** Similarity score (0-1, higher is more similar) */
|
|
103
|
+
score: number;
|
|
104
|
+
/** Index in the original corpus */
|
|
105
|
+
index: number;
|
|
106
|
+
};
|
|
107
|
+
type SimilarityResult = {
|
|
108
|
+
/** Similarity score (0-1, higher is more similar) */
|
|
109
|
+
score: number;
|
|
110
|
+
/** First text */
|
|
111
|
+
textA: string;
|
|
112
|
+
/** Second text */
|
|
113
|
+
textB: string;
|
|
114
|
+
/** Time in ms */
|
|
115
|
+
totalTime: number;
|
|
116
|
+
};
|
|
95
117
|
type LoadOptions = {
|
|
96
118
|
/** Progress callback */
|
|
97
119
|
onProgress?: (info: ProgressInfo) => void;
|
|
98
|
-
/**
|
|
99
|
-
|
|
100
|
-
|
|
120
|
+
/**
|
|
121
|
+
* Compute device. The only inference backend is the native WebGPU engine
|
|
122
|
+
* (Dawn in Node, WebGPU in the browser); "auto" resolves to "webgpu". There
|
|
123
|
+
* is no CPU/WASM or ONNX path.
|
|
124
|
+
*/
|
|
125
|
+
device?: "auto" | "webgpu";
|
|
126
|
+
/**
|
|
127
|
+
* Weight quantization. The engine quantizes to INT4 ("q4") on load; the other
|
|
128
|
+
* values are accepted for forward-compat but currently map to q4.
|
|
129
|
+
*/
|
|
101
130
|
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
102
131
|
/** Override context length */
|
|
103
132
|
contextLength?: number;
|
|
104
133
|
};
|
|
134
|
+
type PreloadOptions = {
|
|
135
|
+
/** Progress callback for download status */
|
|
136
|
+
onProgress?: (info: ProgressInfo) => void;
|
|
137
|
+
/** Keep model loaded in memory after preload (default: false - disposes to free memory) */
|
|
138
|
+
keepLoaded?: boolean;
|
|
139
|
+
};
|
|
105
140
|
type ProgressInfo = {
|
|
106
141
|
status: string;
|
|
107
142
|
progress?: number;
|
|
@@ -112,14 +147,18 @@ type ProgressInfo = {
|
|
|
112
147
|
type GerbilConfig = {
|
|
113
148
|
/** Default model */
|
|
114
149
|
model?: string;
|
|
115
|
-
/** Default device */
|
|
116
|
-
device?: "auto" | "
|
|
117
|
-
/** Default quantization */
|
|
150
|
+
/** Default device (native WebGPU only; "auto" resolves to "webgpu") */
|
|
151
|
+
device?: "auto" | "webgpu";
|
|
152
|
+
/** Default quantization (engine uses INT4 "q4") */
|
|
118
153
|
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
119
154
|
/** Cache configuration */
|
|
120
155
|
cache?: CacheConfig;
|
|
121
156
|
/** Fallback configuration */
|
|
122
157
|
fallback?: FallbackConfig;
|
|
158
|
+
/** Telemetry hooks for observability (Sentry, logging, etc.) */
|
|
159
|
+
telemetry?: TelemetryConfig;
|
|
160
|
+
/** Concurrency control for request queuing */
|
|
161
|
+
concurrency?: ConcurrencyConfig;
|
|
123
162
|
};
|
|
124
163
|
type CacheConfig = {
|
|
125
164
|
/** Enable caching (default: true) */
|
|
@@ -183,14 +222,14 @@ type SystemInfo = {
|
|
|
183
222
|
type GerbilModelSettings = {
|
|
184
223
|
/** Enable thinking mode */
|
|
185
224
|
thinking?: boolean;
|
|
186
|
-
/** Device to use */
|
|
187
|
-
device?: "auto" | "
|
|
225
|
+
/** Device to use (native WebGPU only) */
|
|
226
|
+
device?: "auto" | "webgpu";
|
|
188
227
|
/** Quantization level */
|
|
189
228
|
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
190
229
|
};
|
|
191
230
|
type GerbilProviderSettings = {
|
|
192
|
-
/** Default device */
|
|
193
|
-
device?: "auto" | "
|
|
231
|
+
/** Default device (native WebGPU only) */
|
|
232
|
+
device?: "auto" | "webgpu";
|
|
194
233
|
/** Default quantization */
|
|
195
234
|
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
196
235
|
};
|
|
@@ -348,381 +387,150 @@ type StreamingTranscriptionSession = {
|
|
|
348
387
|
/** Reset session (clear buffer and transcript) */
|
|
349
388
|
reset: () => void;
|
|
350
389
|
};
|
|
351
|
-
//#endregion
|
|
352
|
-
//#region src/core/models.d.ts
|
|
353
|
-
declare const BUILTIN_MODELS: Record<string, ModelConfig>;
|
|
354
|
-
//#endregion
|
|
355
|
-
//#region src/browser/index.d.ts
|
|
356
|
-
|
|
357
|
-
type WorkerProgress = {
|
|
358
|
-
status: "loading" | "downloading" | "ready" | "error";
|
|
359
|
-
message?: string;
|
|
360
|
-
file?: string;
|
|
361
|
-
progress?: number;
|
|
362
|
-
/** Number of files being downloaded (0 = loading from cache) */
|
|
363
|
-
downloadCount?: number;
|
|
364
|
-
/** Total files to process */
|
|
365
|
-
totalFiles?: number;
|
|
366
|
-
error?: string;
|
|
367
|
-
};
|
|
368
|
-
type WorkerToken = {
|
|
369
|
-
status: "token";
|
|
370
|
-
text: string;
|
|
371
|
-
state: "thinking" | "answering";
|
|
372
|
-
numTokens: number;
|
|
373
|
-
tps: number;
|
|
374
|
-
};
|
|
375
|
-
type WorkerComplete = {
|
|
376
|
-
status: "complete";
|
|
377
|
-
text: string;
|
|
378
|
-
numTokens: number;
|
|
379
|
-
totalTime: number;
|
|
380
|
-
tps: number;
|
|
381
|
-
};
|
|
382
|
-
type GerbilWorkerOptions = {
|
|
383
|
-
/** Model ID to load (default: "qwen3-0.6b") */
|
|
384
|
-
modelId?: string;
|
|
385
|
-
/** Called during model loading with progress updates */
|
|
386
|
-
onProgress?: (progress: WorkerProgress) => void;
|
|
387
|
-
/** Called for each token during streaming generation */
|
|
388
|
-
onToken?: (token: WorkerToken) => void;
|
|
389
|
-
/** Called when generation is complete */
|
|
390
|
-
onComplete?: (result: WorkerComplete) => void;
|
|
391
|
-
/** Called on errors */
|
|
392
|
-
onError?: (error: string) => void;
|
|
393
|
-
/** Worker script URL (auto-detected if not provided) */
|
|
394
|
-
workerUrl?: string;
|
|
395
|
-
};
|
|
396
|
-
type GenerateStreamOptions = {
|
|
397
|
-
/** Maximum tokens to generate */
|
|
398
|
-
maxTokens?: number;
|
|
399
|
-
/** Temperature for sampling (0 = deterministic) */
|
|
400
|
-
temperature?: number;
|
|
401
|
-
/** Top-p nucleus sampling */
|
|
402
|
-
topP?: number;
|
|
403
|
-
/** Top-k sampling */
|
|
404
|
-
topK?: number;
|
|
405
|
-
/** Enable thinking mode (Qwen3) */
|
|
406
|
-
thinking?: boolean;
|
|
407
|
-
/** System prompt */
|
|
408
|
-
system?: string;
|
|
409
|
-
/** Image URLs or data URIs (for vision models) */
|
|
410
|
-
images?: string[];
|
|
411
|
-
/** Conversation history for multi-turn (includes all previous messages) */
|
|
412
|
-
history?: Array<{
|
|
413
|
-
role: "user" | "assistant" | "system";
|
|
414
|
-
content: string;
|
|
415
|
-
}>;
|
|
416
|
-
};
|
|
417
|
-
type GerbilWorker = {
|
|
418
|
-
/** Generate text with streaming */
|
|
419
|
-
generate: (prompt: string, options?: GenerateStreamOptions) => Promise<string>;
|
|
420
|
-
/** Interrupt current generation */
|
|
421
|
-
interrupt: () => void;
|
|
422
|
-
/** Reset conversation cache */
|
|
423
|
-
reset: () => void;
|
|
424
|
-
/** Terminate the worker */
|
|
425
|
-
terminate: () => void;
|
|
426
|
-
/** Check if model is loaded */
|
|
427
|
-
isReady: () => boolean;
|
|
428
|
-
};
|
|
429
390
|
/**
|
|
430
|
-
*
|
|
431
|
-
*
|
|
432
|
-
* Uses a Web Worker to keep the UI responsive during model loading
|
|
433
|
-
* and text generation, with real-time token streaming.
|
|
391
|
+
* Telemetry hooks for production observability.
|
|
392
|
+
* Pass your own Sentry instance or custom logging functions.
|
|
434
393
|
*/
|
|
435
|
-
|
|
436
|
-
/**
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
/**
|
|
457
|
-
|
|
458
|
-
/**
|
|
459
|
-
|
|
460
|
-
/**
|
|
461
|
-
|
|
462
|
-
/**
|
|
463
|
-
|
|
464
|
-
/** Max tokens per response */
|
|
465
|
-
maxTokens?: number;
|
|
466
|
-
/** Temperature (0-2) */
|
|
467
|
-
temperature?: number;
|
|
468
|
-
/** Initial messages */
|
|
469
|
-
initialMessages?: Message[];
|
|
470
|
-
/** Auto-load model on mount (default: false - loads on first generate or load()) */
|
|
471
|
-
autoLoad?: boolean;
|
|
472
|
-
/** Called when model is ready */
|
|
473
|
-
onReady?: () => void;
|
|
474
|
-
/** Called on error */
|
|
475
|
-
onError?: (error: string) => void;
|
|
476
|
-
};
|
|
477
|
-
/** Return type for useChat hook */
|
|
478
|
-
type UseChatReturn = {
|
|
479
|
-
/** Chat messages */
|
|
480
|
-
messages: Message[];
|
|
481
|
-
/** Current input value */
|
|
482
|
-
input: string;
|
|
483
|
-
/** Set input value */
|
|
484
|
-
setInput: (value: string) => void;
|
|
485
|
-
/** Submit current input */
|
|
486
|
-
handleSubmit: (e?: {
|
|
487
|
-
preventDefault?: () => void;
|
|
488
|
-
}) => void;
|
|
489
|
-
/** Whether model is loading */
|
|
490
|
-
isLoading: boolean;
|
|
491
|
-
/** Loading progress */
|
|
492
|
-
loadingProgress: LoadingProgress | null;
|
|
493
|
-
/** Whether generating a response */
|
|
494
|
-
isGenerating: boolean;
|
|
495
|
-
/** Current thinking content (streaming) */
|
|
496
|
-
thinking: string;
|
|
497
|
-
/** Stop generation */
|
|
498
|
-
stop: () => void;
|
|
499
|
-
/** Clear all messages */
|
|
500
|
-
clear: () => void;
|
|
501
|
-
/** Current tokens per second */
|
|
502
|
-
tps: number;
|
|
503
|
-
/** Whether model is ready */
|
|
504
|
-
isReady: boolean;
|
|
505
|
-
/** Error message if any */
|
|
506
|
-
error: string | null;
|
|
507
|
-
/** Load the model (only needed if lazy: true) */
|
|
508
|
-
load: () => void;
|
|
509
|
-
/** Currently attached images (for next message) */
|
|
510
|
-
attachedImages: string[];
|
|
511
|
-
/** Attach an image to the next message */
|
|
512
|
-
attachImage: (imageUrl: string) => void;
|
|
513
|
-
/** Remove an attached image */
|
|
514
|
-
removeImage: (index: number) => void;
|
|
515
|
-
/** Clear all attached images */
|
|
516
|
-
clearImages: () => void;
|
|
517
|
-
/** Send message with specific images (convenience method) */
|
|
518
|
-
sendWithImages: (text: string, images: string[]) => void;
|
|
394
|
+
type TelemetryConfig = {
|
|
395
|
+
/**
|
|
396
|
+
* Called after successful generation with full result and timing.
|
|
397
|
+
* Use for logging, metrics, or analytics.
|
|
398
|
+
*/
|
|
399
|
+
onGenerate?: (event: GenerateEvent) => void;
|
|
400
|
+
/**
|
|
401
|
+
* Called when any error occurs during Gerbil operations.
|
|
402
|
+
* Perfect for Sentry.captureException() or similar.
|
|
403
|
+
*/
|
|
404
|
+
onError?: (error: Error, context: ErrorContext) => void;
|
|
405
|
+
/**
|
|
406
|
+
* Called after model loading completes (success or failure).
|
|
407
|
+
*/
|
|
408
|
+
onModelLoad?: (event: ModelLoadEvent) => void;
|
|
409
|
+
/**
|
|
410
|
+
* Called when a request is queued (if concurrency limit reached).
|
|
411
|
+
*/
|
|
412
|
+
onQueueWait?: (waitTimeMs: number) => void;
|
|
413
|
+
};
|
|
414
|
+
type GenerateEvent = {
|
|
415
|
+
/** Model used for generation */
|
|
416
|
+
modelId: string;
|
|
417
|
+
/** Generation result */
|
|
418
|
+
result: GenerateResult;
|
|
419
|
+
/** Whether response came from cache */
|
|
420
|
+
cached: boolean;
|
|
421
|
+
/** Time spent waiting in queue (if any) */
|
|
422
|
+
queueTimeMs?: number;
|
|
519
423
|
};
|
|
520
424
|
/**
|
|
521
|
-
*
|
|
522
|
-
*
|
|
523
|
-
* @example
|
|
524
|
-
* ```tsx
|
|
525
|
-
* import { useChat } from "@tryhamster/gerbil/browser";
|
|
526
|
-
*
|
|
527
|
-
* function Chat() {
|
|
528
|
-
* const { messages, input, setInput, handleSubmit, isLoading, isGenerating } = useChat();
|
|
529
|
-
*
|
|
530
|
-
* if (isLoading) return <div>Loading model...</div>;
|
|
531
|
-
*
|
|
532
|
-
* return (
|
|
533
|
-
* <div>
|
|
534
|
-
* {messages.map(m => (
|
|
535
|
-
* <div key={m.id}>{m.role}: {m.content}</div>
|
|
536
|
-
* ))}
|
|
537
|
-
* <form onSubmit={handleSubmit}>
|
|
538
|
-
* <input value={input} onChange={e => setInput(e.target.value)} />
|
|
539
|
-
* <button disabled={isGenerating}>Send</button>
|
|
540
|
-
* </form>
|
|
541
|
-
* </div>
|
|
542
|
-
* );
|
|
543
|
-
* }
|
|
544
|
-
* ```
|
|
425
|
+
* Context passed to telemetry onError callback.
|
|
426
|
+
* Flexible record to allow any relevant context data.
|
|
545
427
|
*/
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
autoLoad?: boolean;
|
|
561
|
-
/** Called when model is ready */
|
|
562
|
-
onReady?: () => void;
|
|
563
|
-
/** Called on error */
|
|
564
|
-
onError?: (error: string) => void;
|
|
428
|
+
type ErrorContext = Record<string, unknown>;
|
|
429
|
+
type ModelLoadEvent = {
|
|
430
|
+
/** Model that was loaded */
|
|
431
|
+
modelId: string;
|
|
432
|
+
/** Time to load in ms */
|
|
433
|
+
loadTimeMs: number;
|
|
434
|
+
/** Whether loaded from cache */
|
|
435
|
+
fromCache: boolean;
|
|
436
|
+
/** Device used */
|
|
437
|
+
device: "webgpu" | "cpu" | "wasm";
|
|
438
|
+
/** Whether load succeeded */
|
|
439
|
+
success: boolean;
|
|
440
|
+
/** Error message if failed */
|
|
441
|
+
error?: string;
|
|
565
442
|
};
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
/** Return type for useCompletion hook */
|
|
572
|
-
type UseCompletionReturn = {
|
|
573
|
-
/** Generated completion */
|
|
574
|
-
completion: string;
|
|
575
|
-
/** Thinking content (if enabled) */
|
|
576
|
-
thinking: string;
|
|
577
|
-
/** Generate completion (optionally with images for vision models) */
|
|
578
|
-
complete: (prompt: string, options?: CompleteOptions) => Promise<string>;
|
|
579
|
-
/** Whether model is loading */
|
|
580
|
-
isLoading: boolean;
|
|
581
|
-
/** Loading progress */
|
|
582
|
-
loadingProgress: LoadingProgress | null;
|
|
583
|
-
/** Whether generating */
|
|
584
|
-
isGenerating: boolean;
|
|
585
|
-
/** Stop generation */
|
|
586
|
-
stop: () => void;
|
|
587
|
-
/** Current tokens per second */
|
|
588
|
-
tps: number;
|
|
589
|
-
/** Whether model is ready */
|
|
590
|
-
isReady: boolean;
|
|
591
|
-
/** Error message if any */
|
|
592
|
-
error: string | null;
|
|
593
|
-
/** Load the model (only needed if lazy: true) */
|
|
594
|
-
load: () => void;
|
|
443
|
+
type ConcurrencyConfig = {
|
|
444
|
+
/** Maximum concurrent generation requests (default: 1 for LLM) */
|
|
445
|
+
maxConcurrent?: number;
|
|
446
|
+
/** Request timeout in ms (default: 300000 = 5 min) */
|
|
447
|
+
timeout?: number;
|
|
595
448
|
};
|
|
449
|
+
//#endregion
|
|
450
|
+
//#region src/core/models.d.ts
|
|
451
|
+
declare const BUILTIN_MODELS: Record<string, ModelConfig>;
|
|
452
|
+
//#endregion
|
|
453
|
+
//#region src/browser/pwa.d.ts
|
|
596
454
|
/**
|
|
597
|
-
*
|
|
455
|
+
* Mobile / PWA storage helpers.
|
|
598
456
|
*
|
|
599
|
-
*
|
|
600
|
-
*
|
|
601
|
-
*
|
|
457
|
+
* On-device models are large (a 4-bit 0.8B is ~400 MB; vision/larger models are
|
|
458
|
+
* GBs). Mobile browsers — iOS Safari especially — wall a web origin off from the
|
|
459
|
+
* real disk with TWO independent ceilings:
|
|
602
460
|
*
|
|
603
|
-
*
|
|
604
|
-
*
|
|
461
|
+
* 1. **Storage quota** (disk for the model cache). An *uninstalled* Safari tab
|
|
462
|
+
* gets only ~1 GB, best-effort and evictable, regardless of how much free
|
|
463
|
+
* disk the device has. Exceed it and every cache write fails → the model
|
|
464
|
+
* re-downloads on every visit.
|
|
465
|
+
* 2. **Tab memory** (RAM during load/inference) — a separate, smaller ceiling.
|
|
605
466
|
*
|
|
606
|
-
*
|
|
467
|
+
* The unlock for the storage ceiling is **persistent storage**, which iOS Safari
|
|
468
|
+
* grants when the site is **installed to the Home Screen** (a PWA). Installed, the
|
|
469
|
+
* quota jumps to a large fraction of actual disk and is never evicted — so models
|
|
470
|
+
* cache once and stay. These helpers let an app surface that to its users and
|
|
471
|
+
* request it, so on-device AI is actually practical on mobile.
|
|
607
472
|
*
|
|
608
|
-
*
|
|
609
|
-
* <div>
|
|
610
|
-
* <button onClick={() => complete("Write a haiku")}>Generate</button>
|
|
611
|
-
* <p>{completion}</p>
|
|
612
|
-
* </div>
|
|
613
|
-
* );
|
|
614
|
-
* }
|
|
615
|
-
* ```
|
|
473
|
+
* All functions are SSR/Node-safe (guarded; return conservative defaults).
|
|
616
474
|
*/
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
/**
|
|
627
|
-
|
|
628
|
-
/**
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
/**
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
type UseSpeechReturn = {
|
|
657
|
-
/** Speak text aloud */
|
|
658
|
-
speak: (text: string, options?: {
|
|
659
|
-
voice?: string;
|
|
660
|
-
speed?: number;
|
|
661
|
-
}) => Promise<void>;
|
|
662
|
-
/** Stop current speech */
|
|
663
|
-
stop: () => void;
|
|
664
|
-
/** Whether TTS model is loading */
|
|
665
|
-
isLoading: boolean;
|
|
666
|
-
/** Loading progress */
|
|
667
|
-
loadingProgress: TTSProgress | null;
|
|
668
|
-
/** Whether currently speaking */
|
|
669
|
-
isSpeaking: boolean;
|
|
670
|
-
/** Whether TTS model is ready */
|
|
671
|
-
isReady: boolean;
|
|
672
|
-
/** Load the TTS model */
|
|
673
|
-
load: () => void;
|
|
674
|
-
/** Error message if any */
|
|
675
|
-
error: string | null;
|
|
676
|
-
/** List available voices for current model */
|
|
677
|
-
listVoices: () => BrowserVoiceInfo[];
|
|
678
|
-
/** Current voice ID */
|
|
679
|
-
currentVoice: string;
|
|
680
|
-
/** Set current voice */
|
|
681
|
-
setVoice: (voiceId: string) => void;
|
|
682
|
-
/** Current speed */
|
|
683
|
-
currentSpeed: number;
|
|
684
|
-
/** Set speed */
|
|
685
|
-
setSpeed: (speed: number) => void;
|
|
686
|
-
/** Current TTS model ID */
|
|
687
|
-
currentModel: TTSModelId;
|
|
688
|
-
/** Sample rate for current model (24000 for Kokoro, 44100 for Supertonic) */
|
|
689
|
-
sampleRate: number;
|
|
475
|
+
/** True when the page is running as an installed/standalone PWA (Home Screen). */
|
|
476
|
+
declare function isStandalone(): boolean;
|
|
477
|
+
/** True when running on iOS/iPadOS (where install is the quota unlock and the
|
|
478
|
+
* install flow is manual: Share → Add to Home Screen). iPadOS masquerades as
|
|
479
|
+
* macOS, so we also treat touch-capable WebKit-on-Mac as iOS. */
|
|
480
|
+
declare function isIOS(): boolean;
|
|
481
|
+
type StorageStatus = {
|
|
482
|
+
/** Total quota granted to this origin, in MB (best-effort estimate). */
|
|
483
|
+
quotaMB: number;
|
|
484
|
+
/** Bytes currently used by this origin, in MB. */
|
|
485
|
+
usageMB: number;
|
|
486
|
+
/** quota − usage, in MB. */
|
|
487
|
+
availableMB: number;
|
|
488
|
+
/** Storage is persistent (exempt from eviction). On iOS this is effectively
|
|
489
|
+
* only true once the site is installed to the Home Screen. */
|
|
490
|
+
persisted: boolean;
|
|
491
|
+
/** Running as an installed/standalone PWA. */
|
|
492
|
+
installed: boolean;
|
|
493
|
+
/** Platform is iOS/iPadOS (install is the quota unlock here). */
|
|
494
|
+
ios: boolean;
|
|
495
|
+
};
|
|
496
|
+
/** Snapshot of the origin's storage situation — quota, usage, persistence, and
|
|
497
|
+
* whether the app is installed. Use it to decide whether to recommend install
|
|
498
|
+
* before downloading a large model. */
|
|
499
|
+
declare function getStorageStatus(): Promise<StorageStatus>;
|
|
500
|
+
/**
|
|
501
|
+
* Request persistent storage (exempt from eviction). Returns whether the origin
|
|
502
|
+
* is persistent afterwards. Browsers grant this based on engagement/installation;
|
|
503
|
+
* on iOS Safari it is effectively granted only to an installed (Home Screen) PWA,
|
|
504
|
+
* so call this AND guide users to install when it returns false on iOS.
|
|
505
|
+
*/
|
|
506
|
+
declare function requestPersistentStorage(): Promise<boolean>;
|
|
507
|
+
type ModelFit = {
|
|
508
|
+
/** The model likely fits in the currently-available quota. */
|
|
509
|
+
fits: boolean;
|
|
510
|
+
availableMB: number;
|
|
511
|
+
/** Caching durably would benefit from installing to the Home Screen — true when
|
|
512
|
+
* not installed on iOS, or when the model doesn't fit the current quota. */
|
|
513
|
+
recommendInstall: boolean;
|
|
690
514
|
};
|
|
691
515
|
/**
|
|
692
|
-
*
|
|
693
|
-
*
|
|
694
|
-
*
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
*
|
|
699
|
-
*
|
|
700
|
-
*
|
|
701
|
-
*
|
|
702
|
-
* const { speak, stop, isLoading, isSpeaking, listVoices, setVoice } = useSpeech();
|
|
703
|
-
*
|
|
704
|
-
* // Or use Supertonic (44.1kHz, faster)
|
|
705
|
-
* // const { speak, listVoices } = useSpeech({ model: "supertonic-66m" });
|
|
706
|
-
*
|
|
707
|
-
* if (isLoading) return <div>Loading TTS...</div>;
|
|
708
|
-
*
|
|
709
|
-
* return (
|
|
710
|
-
* <div>
|
|
711
|
-
* <select onChange={e => setVoice(e.target.value)}>
|
|
712
|
-
* {listVoices().map(v => (
|
|
713
|
-
* <option key={v.id} value={v.id}>{v.name}</option>
|
|
714
|
-
* ))}
|
|
715
|
-
* </select>
|
|
716
|
-
* <button onClick={() => speak("Hello world!")}>
|
|
717
|
-
* {isSpeaking ? "Speaking..." : "Speak"}
|
|
718
|
-
* </button>
|
|
719
|
-
* {isSpeaking && <button onClick={stop}>Stop</button>}
|
|
720
|
-
* </div>
|
|
721
|
-
* );
|
|
722
|
-
* }
|
|
723
|
-
* ```
|
|
516
|
+
* Estimate whether a model of `sizeMB` will cache in the current quota, and
|
|
517
|
+
* whether you should recommend installing to the Home Screen first. Pair with a
|
|
518
|
+
* one-time "Install for offline use" prompt before a large download on mobile.
|
|
519
|
+
*/
|
|
520
|
+
declare function canCacheModel(sizeMB: number): Promise<ModelFit>;
|
|
521
|
+
/**
|
|
522
|
+
* Platform-appropriate install guidance. iOS Safari has NO programmatic install
|
|
523
|
+
* prompt — installation is manual (Share → Add to Home Screen), so apps should
|
|
524
|
+
* show these instructions. Other platforms (Android/Chrome) fire
|
|
525
|
+
* `beforeinstallprompt`, which apps can capture for a one-tap button.
|
|
724
526
|
*/
|
|
725
|
-
declare function
|
|
527
|
+
declare function getInstallGuidance(): {
|
|
528
|
+
installed: boolean;
|
|
529
|
+
manual: boolean;
|
|
530
|
+
steps: string;
|
|
531
|
+
};
|
|
532
|
+
//#endregion
|
|
533
|
+
//#region src/browser/audio.d.ts
|
|
726
534
|
/**
|
|
727
535
|
* Play audio from Float32Array using Web Audio API
|
|
728
536
|
*
|
|
@@ -763,247 +571,125 @@ declare function createAudioPlayer(sampleRate?: number): {
|
|
|
763
571
|
stop: () => void;
|
|
764
572
|
isPlaying: () => boolean;
|
|
765
573
|
};
|
|
574
|
+
//#endregion
|
|
575
|
+
//#region src/browser/device-guards.d.ts
|
|
766
576
|
/**
|
|
767
|
-
*
|
|
577
|
+
* Approximate on-device (INT4) memory footprint in MB for the models the native
|
|
578
|
+
* engine actually ships. Used for memory-aware selection and messaging.
|
|
768
579
|
*/
|
|
769
|
-
|
|
770
|
-
status: "downloading" | "loading" | "ready" | "error";
|
|
771
|
-
message?: string;
|
|
772
|
-
progress?: number;
|
|
773
|
-
file?: string;
|
|
774
|
-
};
|
|
580
|
+
declare const MODEL_SIZES: Record<string, number>;
|
|
775
581
|
/**
|
|
776
|
-
*
|
|
582
|
+
* Check if a model is safe to load on the current device.
|
|
583
|
+
* Returns guidance specific to iOS memory constraints. Matches on the real
|
|
584
|
+
* native-engine repo ids (MLX 4-bit / upstream Qwen / Liquid).
|
|
777
585
|
*/
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
/** Callback during loading */
|
|
790
|
-
onProgress?: (progress: STTProgress) => void;
|
|
791
|
-
/** Enable streaming transcription - transcribes audio in chunks as you speak */
|
|
792
|
-
streaming?: boolean;
|
|
793
|
-
/** Chunk duration in ms for streaming mode (default: 3000 = 3 seconds) */
|
|
794
|
-
chunkDuration?: number;
|
|
795
|
-
/** Callback for each streaming chunk with partial transcript */
|
|
796
|
-
onChunk?: (text: string, chunkIndex: number) => void;
|
|
586
|
+
declare function isModelSafeForDevice(modelId: string): {
|
|
587
|
+
safe: boolean;
|
|
588
|
+
/**
|
|
589
|
+
* Borderline: may run on the newest hardware but is prone to OOM. Reported
|
|
590
|
+
* unsafe (`safe: false`) so callers block by default; a UI can use `risky` to
|
|
591
|
+
* offer an explicit "load anyway" on capable devices.
|
|
592
|
+
*/
|
|
593
|
+
risky: boolean;
|
|
594
|
+
reason: string;
|
|
595
|
+
recommendation?: string;
|
|
596
|
+
maxSafeModel?: string;
|
|
797
597
|
};
|
|
798
598
|
/**
|
|
799
|
-
*
|
|
599
|
+
* Get recommended models based on device memory and capabilities.
|
|
600
|
+
* Helps prevent OOM crashes on low-memory mobile devices.
|
|
800
601
|
*/
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
transcript: string;
|
|
820
|
-
/** Current streaming chunk being transcribed (streaming mode only) */
|
|
821
|
-
streamingChunk: string;
|
|
822
|
-
/** Number of chunks transcribed so far (streaming mode only) */
|
|
823
|
-
chunkCount: number;
|
|
824
|
-
/** Loading progress */
|
|
825
|
-
loadingProgress: STTProgress | null;
|
|
826
|
-
/** Error message */
|
|
827
|
-
error: string | null;
|
|
828
|
-
/** Manually load the model */
|
|
829
|
-
load: () => void;
|
|
602
|
+
declare function getRecommendedModels(): {
|
|
603
|
+
chat: string;
|
|
604
|
+
tts: string;
|
|
605
|
+
stt: string;
|
|
606
|
+
embedding: string;
|
|
607
|
+
reason: string;
|
|
608
|
+
deviceMemory: number | null;
|
|
609
|
+
isMobile: boolean;
|
|
610
|
+
};
|
|
611
|
+
type DownloadPhase = "idle" | "downloading" | "caching" | "initializing" | "ready" | "error";
|
|
612
|
+
declare const SESSION_STORAGE_KEY = "gerbil_session_phase";
|
|
613
|
+
type SessionState = {
|
|
614
|
+
phase: DownloadPhase;
|
|
615
|
+
modelId: string | null;
|
|
616
|
+
sessionId: string;
|
|
617
|
+
timestamp: number;
|
|
618
|
+
bytesDownloaded?: number;
|
|
619
|
+
totalBytes?: number;
|
|
830
620
|
};
|
|
831
621
|
/**
|
|
832
|
-
*
|
|
833
|
-
*
|
|
834
|
-
* Uses MediaRecorder to capture audio and Whisper for transcription.
|
|
835
|
-
* Supports both one-shot and streaming transcription modes.
|
|
836
|
-
*
|
|
837
|
-
* @example Basic usage (one-shot)
|
|
838
|
-
* ```tsx
|
|
839
|
-
* function VoiceInput() {
|
|
840
|
-
* const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
|
|
841
|
-
* onTranscript: (text) => console.log("User said:", text),
|
|
842
|
-
* });
|
|
843
|
-
*
|
|
844
|
-
* return (
|
|
845
|
-
* <button onClick={isRecording ? stopRecording : startRecording}>
|
|
846
|
-
* {isRecording ? "Stop" : "Record"}
|
|
847
|
-
* </button>
|
|
848
|
-
* );
|
|
849
|
-
* }
|
|
850
|
-
* ```
|
|
851
|
-
*
|
|
852
|
-
* @example Streaming transcription (real-time)
|
|
853
|
-
* ```tsx
|
|
854
|
-
* function LiveTranscription() {
|
|
855
|
-
* const { startRecording, stopRecording, isRecording, transcript, streamingChunk } = useVoiceInput({
|
|
856
|
-
* streaming: true, // Enable streaming mode
|
|
857
|
-
* chunkDuration: 1500, // Transcribe every 1.5 seconds (default)
|
|
858
|
-
* onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
|
|
859
|
-
* });
|
|
860
|
-
*
|
|
861
|
-
* return (
|
|
862
|
-
* <div>
|
|
863
|
-
* <button onClick={isRecording ? stopRecording : startRecording}>
|
|
864
|
-
* {isRecording ? "Stop" : "Start Live Transcription"}
|
|
865
|
-
* </button>
|
|
866
|
-
* <p>Current chunk: {streamingChunk}</p>
|
|
867
|
-
* <p>Full transcript: {transcript}</p>
|
|
868
|
-
* </div>
|
|
869
|
-
* );
|
|
870
|
-
* }
|
|
871
|
-
* ```
|
|
622
|
+
* Set the current download/initialization phase.
|
|
623
|
+
* Used to detect if a reload happened during a critical operation.
|
|
872
624
|
*/
|
|
873
|
-
declare function
|
|
625
|
+
declare function setDownloadPhase(phase: DownloadPhase, modelId?: string, progress?: {
|
|
626
|
+
bytesDownloaded: number;
|
|
627
|
+
totalBytes: number;
|
|
628
|
+
}): void;
|
|
874
629
|
/**
|
|
875
|
-
*
|
|
630
|
+
* Get the last known download phase from storage.
|
|
876
631
|
*/
|
|
877
|
-
|
|
878
|
-
/** LLM model ID (default: qwen3-0.6b) */
|
|
879
|
-
llmModel?: string;
|
|
880
|
-
/** STT model ID (default: whisper-tiny.en) */
|
|
881
|
-
sttModel?: string;
|
|
882
|
-
/** TTS model ID (default: kokoro-82m, also supports supertonic-66m) */
|
|
883
|
-
ttsModel?: TTSModelId;
|
|
884
|
-
/** System prompt for LLM */
|
|
885
|
-
system?: string;
|
|
886
|
-
/** Enable thinking mode (default: false) */
|
|
887
|
-
thinking?: boolean;
|
|
888
|
-
/** TTS voice ID (default: model's default voice) */
|
|
889
|
-
voice?: string;
|
|
890
|
-
/** TTS speech speed (default: 1.0) */
|
|
891
|
-
speed?: number;
|
|
892
|
-
/** Auto-load all models on mount (default: false) */
|
|
893
|
-
autoLoad?: boolean;
|
|
894
|
-
/** Callback when user speaks */
|
|
895
|
-
onUserSpeak?: (text: string) => void;
|
|
896
|
-
/** Callback when assistant responds */
|
|
897
|
-
onAssistantSpeak?: (text: string) => void;
|
|
898
|
-
/** Callback on error */
|
|
899
|
-
onError?: (error: string) => void;
|
|
900
|
-
};
|
|
632
|
+
declare function getDownloadPhase(): SessionState | null;
|
|
901
633
|
/**
|
|
902
|
-
*
|
|
634
|
+
* Detect if the page reloaded during a model download/initialization.
|
|
635
|
+
* This typically indicates an iOS memory crash.
|
|
636
|
+
*
|
|
637
|
+
* @returns Detection result with recommended action
|
|
903
638
|
*/
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
639
|
+
declare function detectMemoryCrash(): {
|
|
640
|
+
crashed: boolean;
|
|
641
|
+
phase?: DownloadPhase;
|
|
642
|
+
modelId?: string;
|
|
643
|
+
timeSinceCrash?: number;
|
|
644
|
+
recommendation?: string;
|
|
910
645
|
};
|
|
911
646
|
/**
|
|
912
|
-
*
|
|
647
|
+
* Clear session phase (call when model loads successfully).
|
|
913
648
|
*/
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
/** Cancel current operation */
|
|
922
|
-
cancel: () => void;
|
|
923
|
-
/** Clear conversation history */
|
|
924
|
-
clear: () => void;
|
|
925
|
-
/** Whether recording user speech */
|
|
926
|
-
isListening: boolean;
|
|
927
|
-
/** Whether processing (STT/LLM/TTS) */
|
|
928
|
-
isProcessing: boolean;
|
|
929
|
-
/** Whether assistant is speaking */
|
|
930
|
-
isSpeaking: boolean;
|
|
931
|
-
/** Current stage: idle, listening, transcribing, thinking, speaking */
|
|
932
|
-
stage: "idle" | "listening" | "transcribing" | "thinking" | "speaking";
|
|
933
|
-
/** Whether all models are loaded */
|
|
934
|
-
isReady: boolean;
|
|
935
|
-
/** Whether loading models */
|
|
936
|
-
isLoading: boolean;
|
|
937
|
-
/** Loading progress message */
|
|
938
|
-
loadingMessage: string;
|
|
939
|
-
/** Error message */
|
|
940
|
-
error: string | null;
|
|
941
|
-
/** Manually load all models */
|
|
942
|
-
load: () => void;
|
|
943
|
-
};
|
|
649
|
+
declare function clearDownloadPhase(): void;
|
|
650
|
+
//#endregion
|
|
651
|
+
//#region src/browser/download.d.ts
|
|
652
|
+
/** Chunk size for downloads: 1.5MB (safe for iOS IndexedDB transactions) */
|
|
653
|
+
declare const CHUNK_SIZE_BYTES: number;
|
|
654
|
+
/** IndexedDB database name for chunked downloads */
|
|
655
|
+
declare const DOWNLOAD_DB_NAME = "gerbil-model-chunks";
|
|
944
656
|
/**
|
|
945
|
-
*
|
|
946
|
-
*
|
|
947
|
-
* Complete voice-to-voice conversation loop:
|
|
948
|
-
* 1. User presses button to speak
|
|
949
|
-
* 2. Speech is transcribed (Whisper)
|
|
950
|
-
* 3. LLM generates response
|
|
951
|
-
* 4. Response is spoken aloud (Kokoro or Supertonic TTS)
|
|
952
|
-
*
|
|
953
|
-
* @example
|
|
954
|
-
* ```tsx
|
|
955
|
-
* function VoiceChat() {
|
|
956
|
-
* const {
|
|
957
|
-
* messages,
|
|
958
|
-
* startListening,
|
|
959
|
-
* stopListening,
|
|
960
|
-
* isListening,
|
|
961
|
-
* isSpeaking,
|
|
962
|
-
* stage,
|
|
963
|
-
* } = useVoiceChat({
|
|
964
|
-
* system: "You are a helpful voice assistant.",
|
|
965
|
-
* voice: "af_bella",
|
|
966
|
-
* // Or use Supertonic for faster synthesis:
|
|
967
|
-
* // ttsModel: "supertonic-66m",
|
|
968
|
-
* // voice: "F1",
|
|
969
|
-
* });
|
|
970
|
-
*
|
|
971
|
-
* return (
|
|
972
|
-
* <div>
|
|
973
|
-
* {messages.map(m => (
|
|
974
|
-
* <div key={m.id}>{m.role}: {m.content}</div>
|
|
975
|
-
* ))}
|
|
976
|
-
* <button
|
|
977
|
-
* onMouseDown={startListening}
|
|
978
|
-
* onMouseUp={stopListening}
|
|
979
|
-
* >
|
|
980
|
-
* {stage === "idle" ? "🎤 Hold to Speak" : stage}
|
|
981
|
-
* </button>
|
|
982
|
-
* </div>
|
|
983
|
-
* );
|
|
984
|
-
* }
|
|
985
|
-
* ```
|
|
657
|
+
* Chunked resumable downloader for large model files.
|
|
658
|
+
* Downloads in 1.5MB chunks to avoid iOS memory pressure.
|
|
986
659
|
*/
|
|
987
|
-
declare function
|
|
660
|
+
declare function downloadModelChunked(url: string, modelId: string, options?: {
|
|
661
|
+
onProgress?: (info: {
|
|
662
|
+
phase: string;
|
|
663
|
+
bytesDownloaded: number;
|
|
664
|
+
totalBytes: number;
|
|
665
|
+
percent: number;
|
|
666
|
+
}) => void;
|
|
667
|
+
signal?: AbortSignal;
|
|
668
|
+
}): Promise<ArrayBuffer>;
|
|
988
669
|
/**
|
|
989
|
-
* Check if
|
|
670
|
+
* Check if a model has an incomplete download.
|
|
990
671
|
*/
|
|
991
|
-
declare function
|
|
672
|
+
declare function hasIncompleteDownload(modelId: string): Promise<{
|
|
673
|
+
incomplete: boolean;
|
|
674
|
+
bytesDownloaded?: number;
|
|
675
|
+
totalBytes?: number;
|
|
676
|
+
percent?: number;
|
|
677
|
+
}>;
|
|
992
678
|
/**
|
|
993
|
-
*
|
|
679
|
+
* Clear incomplete download data for a model.
|
|
994
680
|
*/
|
|
995
|
-
declare function
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
declare
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
}
|
|
681
|
+
declare function clearIncompleteDownload(modelId: string): Promise<void>;
|
|
682
|
+
/**
|
|
683
|
+
* Check if there's enough storage quota for a model download.
|
|
684
|
+
* Returns estimated available space and whether download should proceed.
|
|
685
|
+
*/
|
|
686
|
+
declare function checkStorageQuota(requiredMB?: number): Promise<{
|
|
687
|
+
ok: boolean;
|
|
688
|
+
availableMB: number;
|
|
689
|
+
usedMB: number;
|
|
690
|
+
quotaMB: number;
|
|
691
|
+
message?: string;
|
|
692
|
+
}>;
|
|
1007
693
|
//#endregion
|
|
1008
|
-
export { AudioChunk, BUILTIN_MODELS,
|
|
694
|
+
export { AudioChunk, BUILTIN_MODELS, CHUNK_SIZE_BYTES, CacheConfig, ConcurrencyConfig, DOWNLOAD_DB_NAME, EmbedOptions, EmbedResult, ErrorContext, FallbackConfig, GenerateEvent, GenerateOptions, GenerateResult, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, MODEL_SIZES, ModelConfig, type ModelFit, ModelLoadEvent, ModelSource, ModelStats, PreloadOptions, ProgressInfo, SESSION_STORAGE_KEY, STTModelConfig, SearchResult, type SessionState, SessionStats, SimilarityResult, SpeakOptions, SpeakResult, type StorageStatus, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TelemetryConfig, TranscribeOptions, TranscribeResult, TranscribeSegment, VoiceInfo, canCacheModel, checkStorageQuota, clearDownloadPhase, clearIncompleteDownload, createAudioPlayer, detectMemoryCrash, downloadModelChunked, getDownloadPhase, getInstallGuidance, getRecommendedModels, getStorageStatus, hasIncompleteDownload, isIOS, isModelSafeForDevice, isStandalone, playAudio, requestPersistentStorage, setDownloadPhase };
|
|
1009
695
|
//# sourceMappingURL=index.d.ts.map
|