@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-S9s5-g0C.mjs +3 -0
  3. package/dist/browser/index.d.ts +1009 -0
  4. package/dist/browser/index.d.ts.map +1 -0
  5. package/dist/browser/index.js +2492 -0
  6. package/dist/browser/index.js.map +1 -0
  7. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
  8. package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
  9. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
  10. package/dist/cli.mjs +3359 -647
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -4
  14. package/dist/frameworks/express.mjs.map +1 -1
  15. package/dist/frameworks/fastify.d.mts +1 -1
  16. package/dist/frameworks/fastify.mjs +2 -3
  17. package/dist/frameworks/fastify.mjs.map +1 -1
  18. package/dist/frameworks/hono.d.mts +1 -1
  19. package/dist/frameworks/hono.mjs +2 -3
  20. package/dist/frameworks/hono.mjs.map +1 -1
  21. package/dist/frameworks/next.d.mts +2 -2
  22. package/dist/frameworks/next.mjs +2 -3
  23. package/dist/frameworks/next.mjs.map +1 -1
  24. package/dist/frameworks/react.d.mts +1 -1
  25. package/dist/frameworks/trpc.d.mts +1 -1
  26. package/dist/frameworks/trpc.mjs +2 -3
  27. package/dist/frameworks/trpc.mjs.map +1 -1
  28. package/dist/gerbil-DJGqq7BX.mjs +4 -0
  29. package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
  30. package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
  31. package/dist/gerbil-qOTe1nl2.d.mts +431 -0
  32. package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
  33. package/dist/index.d.mts +411 -9
  34. package/dist/index.d.mts.map +1 -1
  35. package/dist/index.mjs +7 -6
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/integrations/ai-sdk.d.mts +122 -4
  38. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  39. package/dist/integrations/ai-sdk.mjs +238 -11
  40. package/dist/integrations/ai-sdk.mjs.map +1 -1
  41. package/dist/integrations/langchain.d.mts +132 -2
  42. package/dist/integrations/langchain.d.mts.map +1 -1
  43. package/dist/integrations/langchain.mjs +175 -8
  44. package/dist/integrations/langchain.mjs.map +1 -1
  45. package/dist/integrations/llamaindex.d.mts +1 -1
  46. package/dist/integrations/llamaindex.mjs +2 -3
  47. package/dist/integrations/llamaindex.mjs.map +1 -1
  48. package/dist/integrations/mcp-client.mjs +4 -4
  49. package/dist/integrations/mcp-client.mjs.map +1 -1
  50. package/dist/integrations/mcp.d.mts +2 -2
  51. package/dist/integrations/mcp.d.mts.map +1 -1
  52. package/dist/integrations/mcp.mjs +5 -6
  53. package/dist/kokoro-BNTb6egA.mjs +20210 -0
  54. package/dist/kokoro-BNTb6egA.mjs.map +1 -0
  55. package/dist/kokoro-CMOGDSgT.js +20212 -0
  56. package/dist/kokoro-CMOGDSgT.js.map +1 -0
  57. package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
  58. package/dist/mcp-kzDDWIoS.mjs.map +1 -0
  59. package/dist/microphone-DaMZFRuR.mjs +3 -0
  60. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
  61. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
  62. package/dist/repl-DGUw4fCc.mjs +9 -0
  63. package/dist/skills/index.d.mts +305 -14
  64. package/dist/skills/index.d.mts.map +1 -1
  65. package/dist/skills/index.mjs +5 -6
  66. package/dist/skills-DulrOPeP.mjs +1435 -0
  67. package/dist/skills-DulrOPeP.mjs.map +1 -0
  68. package/dist/stt-1WIefHwc.mjs +3 -0
  69. package/dist/stt-CG_7KB_0.mjs +434 -0
  70. package/dist/stt-CG_7KB_0.mjs.map +1 -0
  71. package/dist/stt-Dne6SENv.js +434 -0
  72. package/dist/stt-Dne6SENv.js.map +1 -0
  73. package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
  74. package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
  75. package/dist/transformers.web-DiD1gTwk.js +44695 -0
  76. package/dist/transformers.web-DiD1gTwk.js.map +1 -0
  77. package/dist/transformers.web-u34VxRFM.js +3 -0
  78. package/dist/tts-B1pZMlDv.mjs +3 -0
  79. package/dist/tts-C2FzKuSx.js +725 -0
  80. package/dist/tts-C2FzKuSx.js.map +1 -0
  81. package/dist/tts-CyHhcLtN.mjs +731 -0
  82. package/dist/tts-CyHhcLtN.mjs.map +1 -0
  83. package/dist/types-CiTc7ez3.d.mts +353 -0
  84. package/dist/types-CiTc7ez3.d.mts.map +1 -0
  85. package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
  86. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
  87. package/docs/ai-sdk.md +137 -21
  88. package/docs/browser.md +241 -2
  89. package/docs/memory.md +72 -0
  90. package/docs/stt.md +494 -0
  91. package/docs/tts.md +569 -0
  92. package/docs/vision.md +396 -0
  93. package/package.json +21 -22
  94. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  95. package/dist/browser/index.d.mts +0 -262
  96. package/dist/browser/index.d.mts.map +0 -1
  97. package/dist/browser/index.mjs +0 -755
  98. package/dist/browser/index.mjs.map +0 -1
  99. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  100. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  101. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  102. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  103. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  104. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  105. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  106. package/dist/models-DKULvhOr.mjs +0 -136
  107. package/dist/models-DKULvhOr.mjs.map +0 -1
  108. package/dist/models-De2-_GmQ.d.mts +0 -22
  109. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  110. package/dist/skills-D3CEpgDc.mjs +0 -630
  111. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  112. package/dist/types-BS1N92Jt.d.mts +0 -183
  113. package/dist/types-BS1N92Jt.d.mts.map +0 -1
  114. /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
@@ -0,0 +1,1009 @@
1
+ import { z } from "zod";
2
+
3
+ //#region src/core/types.d.ts
4
+
5
+ type ModelConfig = {
6
+ id: string;
7
+ repo: string;
8
+ description: string;
9
+ size: string;
10
+ contextLength: number;
11
+ supportsThinking: boolean;
12
+ supportsJson: boolean;
13
+ /** Whether model supports vision/image input */
14
+ supportsVision?: boolean;
15
+ /** Vision encoder size (for display, e.g., "0.4B") */
16
+ visionEncoderSize?: string;
17
+ family: "qwen" | "smollm" | "phi" | "mistral" | "llama" | "other";
18
+ };
19
+ type ModelSource = {
20
+ type: "builtin" | "huggingface" | "local";
21
+ path: string;
22
+ };
23
+ type ImageInput = {
24
+ /** Image source: URL, base64 data URI, or local file path */
25
+ source: string;
26
+ /** Optional alt text for context */
27
+ alt?: string;
28
+ };
29
+ type GenerateOptions = {
30
+ /** Maximum tokens to generate (default: 256) */
31
+ maxTokens?: number;
32
+ /** Temperature for sampling, 0-2 (default: 0.7) */
33
+ temperature?: number;
34
+ /** Top-p sampling (default: 0.9) */
35
+ topP?: number;
36
+ /** Top-k sampling (default: 50) */
37
+ topK?: number;
38
+ /** Stop sequences */
39
+ stopSequences?: string[];
40
+ /** System prompt */
41
+ system?: string;
42
+ /** Enable thinking/reasoning mode (Qwen3) */
43
+ thinking?: boolean;
44
+ /** Callback for each token (streaming) */
45
+ onToken?: (token: string) => void;
46
+ /** Images to include (only used if model supports vision) */
47
+ images?: ImageInput[];
48
+ /** Enable response caching (default: false) */
49
+ cache?: boolean;
50
+ /** Cache TTL in milliseconds (default: 5 minutes) */
51
+ cacheTtl?: number;
52
+ };
53
+ type GenerateResult = {
54
+ /** Generated text */
55
+ text: string;
56
+ /** Thinking/reasoning (if enabled) */
57
+ thinking?: string;
58
+ /** Tokens generated */
59
+ tokensGenerated: number;
60
+ /** Generation speed */
61
+ tokensPerSecond: number;
62
+ /** Total time in ms */
63
+ totalTime: number;
64
+ /** Why generation stopped */
65
+ finishReason: "stop" | "length" | "error";
66
+ /** Which provider was used (for hybrid mode) */
67
+ provider?: "local" | "openai" | "anthropic";
68
+ /** Whether result came from cache */
69
+ cached?: boolean;
70
+ };
71
+ type JsonOptions<T = unknown> = {
72
+ /** Zod schema for validation */
73
+ schema: z.ZodType<T>;
74
+ /** Number of retries on invalid JSON (default: 3) */
75
+ retries?: number;
76
+ /** Temperature (lower = more deterministic, default: 0.3) */
77
+ temperature?: number;
78
+ /** System prompt override */
79
+ system?: string;
80
+ };
81
+ type EmbedOptions = {
82
+ /** Model to use for embeddings */
83
+ model?: string;
84
+ /** Normalize vectors (default: true) */
85
+ normalize?: boolean;
86
+ };
87
+ type EmbedResult = {
88
+ /** Embedding vector */
89
+ vector: number[];
90
+ /** Original text */
91
+ text: string;
92
+ /** Time in ms */
93
+ totalTime: number;
94
+ };
95
+ type LoadOptions = {
96
+ /** Progress callback */
97
+ onProgress?: (info: ProgressInfo) => void;
98
+ /** Device: 'auto', 'gpu', 'cpu', 'webgpu' (default: 'auto') */
99
+ device?: "auto" | "gpu" | "cpu" | "webgpu";
100
+ /** Quantization: 'q4', 'q8', 'fp16', 'fp32' (default: 'q4') */
101
+ dtype?: "q4" | "q8" | "fp16" | "fp32";
102
+ /** Override context length */
103
+ contextLength?: number;
104
+ };
105
+ type ProgressInfo = {
106
+ status: string;
107
+ progress?: number;
108
+ file?: string;
109
+ loaded?: number;
110
+ total?: number;
111
+ };
112
+ type GerbilConfig = {
113
+ /** Default model */
114
+ model?: string;
115
+ /** Default device */
116
+ device?: "auto" | "gpu" | "cpu";
117
+ /** Default quantization */
118
+ dtype?: "q4" | "q8" | "fp16" | "fp32";
119
+ /** Cache configuration */
120
+ cache?: CacheConfig;
121
+ /** Fallback configuration */
122
+ fallback?: FallbackConfig;
123
+ };
124
+ type CacheConfig = {
125
+ /** Enable caching (default: true) */
126
+ enabled?: boolean;
127
+ /** Time-to-live in seconds (default: 3600) */
128
+ ttl?: number;
129
+ /** Max cache size (default: "500mb") */
130
+ maxSize?: string;
131
+ /** Storage backend */
132
+ storage?: "memory" | "disk" | "redis";
133
+ /** Redis URL (if storage is redis) */
134
+ redisUrl?: string;
135
+ };
136
+ type FallbackConfig = {
137
+ /** Fallback provider */
138
+ provider: "openai" | "anthropic";
139
+ /** API key */
140
+ apiKey: string;
141
+ /** Model to use */
142
+ model: string;
143
+ /** When to fallback */
144
+ when: "timeout" | "error" | "always-verify";
145
+ /** Timeout in ms before fallback (default: 5000) */
146
+ timeout?: number;
147
+ };
148
+ type SessionStats = {
149
+ prompts: number;
150
+ tokensIn: number;
151
+ tokensOut: number;
152
+ avgSpeed: number;
153
+ totalTime: number;
154
+ cacheHits: number;
155
+ cacheMisses: number;
156
+ };
157
+ type ModelStats = {
158
+ modelId: string;
159
+ avgSpeed: number;
160
+ totalGenerations: number;
161
+ totalTokens: number;
162
+ };
163
+ type SystemInfo = {
164
+ version: string;
165
+ model: ModelConfig | null;
166
+ device: {
167
+ backend: string;
168
+ gpu: string | null;
169
+ vram: string | null;
170
+ status: "ready" | "loading" | "error";
171
+ };
172
+ context: {
173
+ max: number;
174
+ used: number;
175
+ available: number;
176
+ };
177
+ cache: {
178
+ location: string;
179
+ size: string;
180
+ modelCount: number;
181
+ };
182
+ };
183
+ type GerbilModelSettings = {
184
+ /** Enable thinking mode */
185
+ thinking?: boolean;
186
+ /** Device to use */
187
+ device?: "auto" | "gpu" | "cpu";
188
+ /** Quantization level */
189
+ dtype?: "q4" | "q8" | "fp16" | "fp32";
190
+ };
191
+ type GerbilProviderSettings = {
192
+ /** Default device */
193
+ device?: "auto" | "gpu" | "cpu";
194
+ /** Default quantization */
195
+ dtype?: "q4" | "q8" | "fp16" | "fp32";
196
+ };
197
+ type VoiceInfo = {
198
+ /** Voice ID (e.g., "af_bella", "am_adam") */
199
+ id: string;
200
+ /** Display name (e.g., "Bella", "Adam") */
201
+ name: string;
202
+ /** Gender: male, female, or neutral */
203
+ gender: "male" | "female" | "neutral";
204
+ /** Language code (e.g., "en", "en-us", "zh") */
205
+ language: string;
206
+ /** Optional description */
207
+ description?: string;
208
+ /** Speaker embedding file name (internal) */
209
+ embeddingFile?: string;
210
+ };
211
+ type TTSModelConfig = {
212
+ /** Model ID (e.g., "kokoro-82m") */
213
+ id: string;
214
+ /** HuggingFace repo path */
215
+ repo: string;
216
+ /** Human-readable description */
217
+ description: string;
218
+ /** Approximate model size */
219
+ size: string;
220
+ /** Output sample rate in Hz (e.g., 24000) */
221
+ sampleRate: number;
222
+ /** Available voices */
223
+ voices: VoiceInfo[];
224
+ /** Default voice ID */
225
+ defaultVoice: string;
226
+ /** Supported languages */
227
+ languages: string[];
228
+ };
229
+ type SpeakOptions = {
230
+ /** Voice ID to use (default: model's default voice) */
231
+ voice?: string;
232
+ /** Speech speed multiplier (0.5 = half speed, 2.0 = double speed, default: 1.0) */
233
+ speed?: number;
234
+ /** Progress callback */
235
+ onProgress?: (info: ProgressInfo) => void;
236
+ /** Callback for audio chunks during streaming */
237
+ onAudioChunk?: (chunk: AudioChunk) => void;
238
+ };
239
+ type AudioChunk = {
240
+ /** Raw PCM audio samples (Float32Array) */
241
+ samples: Float32Array;
242
+ /** Sample rate in Hz */
243
+ sampleRate: number;
244
+ /** Chunk index (0-based) */
245
+ index: number;
246
+ /** Whether this is the final chunk */
247
+ isFinal: boolean;
248
+ };
249
+ type SpeakResult = {
250
+ /** Raw PCM audio samples (Float32Array) */
251
+ audio: Float32Array;
252
+ /** Sample rate in Hz */
253
+ sampleRate: number;
254
+ /** Audio duration in seconds */
255
+ duration: number;
256
+ /** Voice used */
257
+ voice: string;
258
+ /** Total processing time in ms */
259
+ totalTime: number;
260
+ };
261
+ type LoadTTSOptions = {
262
+ /** Progress callback */
263
+ onProgress?: (info: ProgressInfo) => void;
264
+ /** Device: 'auto', 'webgpu', 'cpu' (default: 'auto') */
265
+ device?: "auto" | "webgpu" | "cpu";
266
+ };
267
+ type STTModelConfig = {
268
+ /** Model ID (e.g., "whisper-tiny.en") */
269
+ id: string;
270
+ /** HuggingFace repo path */
271
+ repo: string;
272
+ /** Human-readable description */
273
+ description: string;
274
+ /** Model size (e.g., "39M", "244M") */
275
+ size: string;
276
+ /** Whether model supports multiple languages */
277
+ multilingual: boolean;
278
+ /** Supported languages (ISO 639-1 codes) */
279
+ languages: string[];
280
+ /** Expected sample rate (default: 16000) */
281
+ sampleRate: number;
282
+ };
283
+ type TranscribeOptions = {
284
+ /** Language hint (ISO 639-1 code like "en", "es", "fr") - only for multilingual models */
285
+ language?: string;
286
+ /** Return word/segment timestamps */
287
+ timestamps?: boolean;
288
+ /** Progress callback */
289
+ onProgress?: (info: ProgressInfo) => void;
290
+ };
291
+ type TranscribeSegment = {
292
+ /** Segment text */
293
+ text: string;
294
+ /** Start time in seconds */
295
+ start: number;
296
+ /** End time in seconds */
297
+ end: number;
298
+ };
299
+ type TranscribeResult = {
300
+ /** Full transcribed text */
301
+ text: string;
302
+ /** Detected or specified language */
303
+ language: string;
304
+ /** Segments with timestamps (if timestamps option enabled) */
305
+ segments?: TranscribeSegment[];
306
+ /** Audio duration in seconds */
307
+ duration: number;
308
+ /** Total processing time in ms */
309
+ totalTime: number;
310
+ };
311
+ type LoadSTTOptions = {
312
+ /** Progress callback */
313
+ onProgress?: (info: ProgressInfo) => void;
314
+ /** Device: 'auto', 'webgpu', 'cpu' (default: 'auto') */
315
+ device?: "auto" | "webgpu" | "cpu";
316
+ };
317
+ type StreamingTranscriptionOptions = {
318
+ /** Interval between transcriptions in ms (default: 3000) */
319
+ chunkDuration?: number;
320
+ /** Minimum audio samples before transcribing (default: 8000 = 0.5s at 16kHz) */
321
+ minChunkSize?: number;
322
+ /** Callback for each transcribed chunk */
323
+ onChunk?: (text: string, chunkIndex: number) => void;
324
+ /** Callback with full accumulated transcript */
325
+ onTranscript?: (fullText: string) => void;
326
+ /** Callback on transcription error */
327
+ onError?: (error: string) => void;
328
+ /** Language hint (for multilingual models) */
329
+ language?: string;
330
+ };
331
+ type StreamingTranscriptionSession = {
332
+ /** Feed audio data to the buffer (Float32Array at 16kHz) */
333
+ feedAudio: (audio: Float32Array) => void;
334
+ /** Manually trigger transcription of buffered audio */
335
+ flush: () => Promise<string>;
336
+ /** Start automatic interval-based transcription */
337
+ start: () => void;
338
+ /** Stop transcription and return final transcript */
339
+ stop: () => Promise<string>;
340
+ /** Immediately abort without final transcription (for cleanup) */
341
+ abort: () => void;
342
+ /** Check if session is running */
343
+ isRunning: () => boolean;
344
+ /** Get current full transcript */
345
+ getTranscript: () => string;
346
+ /** Get number of chunks transcribed */
347
+ getChunkCount: () => number;
348
+ /** Reset session (clear buffer and transcript) */
349
+ reset: () => void;
350
+ };
351
+ //#endregion
352
+ //#region src/core/models.d.ts
353
+ declare const BUILTIN_MODELS: Record<string, ModelConfig>;
354
+ //#endregion
355
+ //#region src/browser/index.d.ts
356
+
357
+ type WorkerProgress = {
358
+ status: "loading" | "downloading" | "ready" | "error";
359
+ message?: string;
360
+ file?: string;
361
+ progress?: number;
362
+ /** Number of files being downloaded (0 = loading from cache) */
363
+ downloadCount?: number;
364
+ /** Total files to process */
365
+ totalFiles?: number;
366
+ error?: string;
367
+ };
368
+ type WorkerToken = {
369
+ status: "token";
370
+ text: string;
371
+ state: "thinking" | "answering";
372
+ numTokens: number;
373
+ tps: number;
374
+ };
375
+ type WorkerComplete = {
376
+ status: "complete";
377
+ text: string;
378
+ numTokens: number;
379
+ totalTime: number;
380
+ tps: number;
381
+ };
382
+ type GerbilWorkerOptions = {
383
+ /** Model ID to load (default: "qwen3-0.6b") */
384
+ modelId?: string;
385
+ /** Called during model loading with progress updates */
386
+ onProgress?: (progress: WorkerProgress) => void;
387
+ /** Called for each token during streaming generation */
388
+ onToken?: (token: WorkerToken) => void;
389
+ /** Called when generation is complete */
390
+ onComplete?: (result: WorkerComplete) => void;
391
+ /** Called on errors */
392
+ onError?: (error: string) => void;
393
+ /** Worker script URL (auto-detected if not provided) */
394
+ workerUrl?: string;
395
+ };
396
+ type GenerateStreamOptions = {
397
+ /** Maximum tokens to generate */
398
+ maxTokens?: number;
399
+ /** Temperature for sampling (0 = deterministic) */
400
+ temperature?: number;
401
+ /** Top-p nucleus sampling */
402
+ topP?: number;
403
+ /** Top-k sampling */
404
+ topK?: number;
405
+ /** Enable thinking mode (Qwen3) */
406
+ thinking?: boolean;
407
+ /** System prompt */
408
+ system?: string;
409
+ /** Image URLs or data URIs (for vision models) */
410
+ images?: string[];
411
+ /** Conversation history for multi-turn (includes all previous messages) */
412
+ history?: Array<{
413
+ role: "user" | "assistant" | "system";
414
+ content: string;
415
+ }>;
416
+ };
417
+ type GerbilWorker = {
418
+ /** Generate text with streaming */
419
+ generate: (prompt: string, options?: GenerateStreamOptions) => Promise<string>;
420
+ /** Interrupt current generation */
421
+ interrupt: () => void;
422
+ /** Reset conversation cache */
423
+ reset: () => void;
424
+ /** Terminate the worker */
425
+ terminate: () => void;
426
+ /** Check if model is loaded */
427
+ isReady: () => boolean;
428
+ };
429
+ /**
430
+ * Create a Gerbil worker for streaming WebGPU inference
431
+ *
432
+ * Uses a Web Worker to keep the UI responsive during model loading
433
+ * and text generation, with real-time token streaming.
434
+ */
435
+ declare function createGerbilWorker(options?: GerbilWorkerOptions): Promise<GerbilWorker>;
436
+ /** Message in a chat conversation */
437
+ type Message = {
438
+ id: string;
439
+ role: "user" | "assistant";
440
+ content: string;
441
+ thinking?: string;
442
+ /** Attached images (URLs or data URIs) - for vision models */
443
+ images?: string[];
444
+ };
445
+ /** Loading progress state */
446
+ type LoadingProgress = {
447
+ status: "loading" | "downloading" | "ready" | "error";
448
+ message?: string;
449
+ file?: string;
450
+ progress?: number;
451
+ /** Number of files being downloaded (0 = loading from cache) */
452
+ downloadCount?: number;
453
+ /** Total files to process */
454
+ totalFiles?: number;
455
+ };
456
+ /** Options for useChat hook */
457
+ type UseChatOptions = {
458
+ /** Model ID (default: "qwen3-0.6b") */
459
+ model?: string;
460
+ /** System prompt */
461
+ system?: string;
462
+ /** Enable thinking mode (Qwen3) */
463
+ thinking?: boolean;
464
+ /** Max tokens per response */
465
+ maxTokens?: number;
466
+ /** Temperature (0-2) */
467
+ temperature?: number;
468
+ /** Initial messages */
469
+ initialMessages?: Message[];
470
+ /** Auto-load model on mount (default: false - loads on first generate or load()) */
471
+ autoLoad?: boolean;
472
+ /** Called when model is ready */
473
+ onReady?: () => void;
474
+ /** Called on error */
475
+ onError?: (error: string) => void;
476
+ };
477
+ /** Return type for useChat hook */
478
+ type UseChatReturn = {
479
+ /** Chat messages */
480
+ messages: Message[];
481
+ /** Current input value */
482
+ input: string;
483
+ /** Set input value */
484
+ setInput: (value: string) => void;
485
+ /** Submit current input */
486
+ handleSubmit: (e?: {
487
+ preventDefault?: () => void;
488
+ }) => void;
489
+ /** Whether model is loading */
490
+ isLoading: boolean;
491
+ /** Loading progress */
492
+ loadingProgress: LoadingProgress | null;
493
+ /** Whether generating a response */
494
+ isGenerating: boolean;
495
+ /** Current thinking content (streaming) */
496
+ thinking: string;
497
+ /** Stop generation */
498
+ stop: () => void;
499
+ /** Clear all messages */
500
+ clear: () => void;
501
+ /** Current tokens per second */
502
+ tps: number;
503
+ /** Whether model is ready */
504
+ isReady: boolean;
505
+ /** Error message if any */
506
+ error: string | null;
507
+ /** Load the model (only needed if lazy: true) */
508
+ load: () => void;
509
+ /** Currently attached images (for next message) */
510
+ attachedImages: string[];
511
+ /** Attach an image to the next message */
512
+ attachImage: (imageUrl: string) => void;
513
+ /** Remove an attached image */
514
+ removeImage: (index: number) => void;
515
+ /** Clear all attached images */
516
+ clearImages: () => void;
517
+ /** Send message with specific images (convenience method) */
518
+ sendWithImages: (text: string, images: string[]) => void;
519
+ };
520
+ /**
521
+ * React hook for chat with local LLM
522
+ *
523
+ * @example
524
+ * ```tsx
525
+ * import { useChat } from "@tryhamster/gerbil/browser";
526
+ *
527
+ * function Chat() {
528
+ * const { messages, input, setInput, handleSubmit, isLoading, isGenerating } = useChat();
529
+ *
530
+ * if (isLoading) return <div>Loading model...</div>;
531
+ *
532
+ * return (
533
+ * <div>
534
+ * {messages.map(m => (
535
+ * <div key={m.id}>{m.role}: {m.content}</div>
536
+ * ))}
537
+ * <form onSubmit={handleSubmit}>
538
+ * <input value={input} onChange={e => setInput(e.target.value)} />
539
+ * <button disabled={isGenerating}>Send</button>
540
+ * </form>
541
+ * </div>
542
+ * );
543
+ * }
544
+ * ```
545
+ */
546
+ declare function useChat(options?: UseChatOptions): UseChatReturn;
547
+ /** Options for useCompletion hook */
548
+ type UseCompletionOptions = {
549
+ /** Model ID (default: "qwen3-0.6b") */
550
+ model?: string;
551
+ /** System prompt */
552
+ system?: string;
553
+ /** Enable thinking mode (Qwen3) */
554
+ thinking?: boolean;
555
+ /** Max tokens */
556
+ maxTokens?: number;
557
+ /** Temperature (0-2) */
558
+ temperature?: number;
559
+ /** Auto-load model on mount (default: false - loads on first complete() or load()) */
560
+ autoLoad?: boolean;
561
+ /** Called when model is ready */
562
+ onReady?: () => void;
563
+ /** Called on error */
564
+ onError?: (error: string) => void;
565
+ };
566
+ /** Options for single completion call */
567
+ type CompleteOptions = {
568
+ /** Image URLs or data URIs to analyze (for vision models) */
569
+ images?: string[];
570
+ };
571
+ /** Return type for useCompletion hook */
572
+ type UseCompletionReturn = {
573
+ /** Generated completion */
574
+ completion: string;
575
+ /** Thinking content (if enabled) */
576
+ thinking: string;
577
+ /** Generate completion (optionally with images for vision models) */
578
+ complete: (prompt: string, options?: CompleteOptions) => Promise<string>;
579
+ /** Whether model is loading */
580
+ isLoading: boolean;
581
+ /** Loading progress */
582
+ loadingProgress: LoadingProgress | null;
583
+ /** Whether generating */
584
+ isGenerating: boolean;
585
+ /** Stop generation */
586
+ stop: () => void;
587
+ /** Current tokens per second */
588
+ tps: number;
589
+ /** Whether model is ready */
590
+ isReady: boolean;
591
+ /** Error message if any */
592
+ error: string | null;
593
+ /** Load the model (only needed if lazy: true) */
594
+ load: () => void;
595
+ };
596
+ /**
597
+ * React hook for text completion with local LLM
598
+ *
599
+ * @example
600
+ * ```tsx
601
+ * import { useCompletion } from "@tryhamster/gerbil/browser";
602
+ *
603
+ * function App() {
604
+ * const { complete, completion, isLoading, isGenerating } = useCompletion();
605
+ *
606
+ * if (isLoading) return <div>Loading...</div>;
607
+ *
608
+ * return (
609
+ * <div>
610
+ * <button onClick={() => complete("Write a haiku")}>Generate</button>
611
+ * <p>{completion}</p>
612
+ * </div>
613
+ * );
614
+ * }
615
+ * ```
616
+ */
617
+ declare function useCompletion(options?: UseCompletionOptions): UseCompletionReturn;
618
+ /** TTS loading progress */
619
+ type TTSProgress = {
620
+ status: "idle" | "loading" | "downloading" | "ready" | "error";
621
+ message?: string;
622
+ file?: string;
623
+ progress?: number;
624
+ error?: string;
625
+ };
626
+ /** Available TTS models */
627
+ type TTSModelId = "kokoro-82m" | "supertonic-66m";
628
+ /** Voice info for TTS models */
629
+ type BrowserVoiceInfo = {
630
+ id: string;
631
+ name: string;
632
+ gender: "male" | "female";
633
+ language: string;
634
+ description: string;
635
+ };
636
+ /** Options for useSpeech hook */
637
+ type UseSpeechOptions = {
638
+ /** TTS model to use (default: "kokoro-82m") */
639
+ model?: TTSModelId;
640
+ /** Default voice ID (default: model's default voice) */
641
+ voice?: string;
642
+ /** Speech speed multiplier (default: 1.0) */
643
+ speed?: number;
644
+ /** Auto-load TTS model on mount (default: false) */
645
+ autoLoad?: boolean;
646
+ /** Called when model is ready */
647
+ onReady?: () => void;
648
+ /** Called on error */
649
+ onError?: (error: string) => void;
650
+ /** Called when speech starts */
651
+ onStart?: () => void;
652
+ /** Called when speech ends */
653
+ onEnd?: () => void;
654
+ };
655
+ /** Return type for useSpeech hook */
656
+ type UseSpeechReturn = {
657
+ /** Speak text aloud */
658
+ speak: (text: string, options?: {
659
+ voice?: string;
660
+ speed?: number;
661
+ }) => Promise<void>;
662
+ /** Stop current speech */
663
+ stop: () => void;
664
+ /** Whether TTS model is loading */
665
+ isLoading: boolean;
666
+ /** Loading progress */
667
+ loadingProgress: TTSProgress | null;
668
+ /** Whether currently speaking */
669
+ isSpeaking: boolean;
670
+ /** Whether TTS model is ready */
671
+ isReady: boolean;
672
+ /** Load the TTS model */
673
+ load: () => void;
674
+ /** Error message if any */
675
+ error: string | null;
676
+ /** List available voices for current model */
677
+ listVoices: () => BrowserVoiceInfo[];
678
+ /** Current voice ID */
679
+ currentVoice: string;
680
+ /** Set current voice */
681
+ setVoice: (voiceId: string) => void;
682
+ /** Current speed */
683
+ currentSpeed: number;
684
+ /** Set speed */
685
+ setSpeed: (speed: number) => void;
686
+ /** Current TTS model ID */
687
+ currentModel: TTSModelId;
688
+ /** Sample rate for current model (24000 for Kokoro, 44100 for Supertonic) */
689
+ sampleRate: number;
690
+ };
691
+ /**
692
+ * React hook for text-to-speech with Web Audio API playback
693
+ *
694
+ * Supports both Kokoro (24kHz, high quality) and Supertonic (44.1kHz, faster).
695
+ *
696
+ * @example
697
+ * ```tsx
698
+ * import { useSpeech } from "@tryhamster/gerbil/browser";
699
+ *
700
+ * function App() {
701
+ * // Default: Kokoro TTS
702
+ * const { speak, stop, isLoading, isSpeaking, listVoices, setVoice } = useSpeech();
703
+ *
704
+ * // Or use Supertonic (44.1kHz, faster)
705
+ * // const { speak, listVoices } = useSpeech({ model: "supertonic-66m" });
706
+ *
707
+ * if (isLoading) return <div>Loading TTS...</div>;
708
+ *
709
+ * return (
710
+ * <div>
711
+ * <select onChange={e => setVoice(e.target.value)}>
712
+ * {listVoices().map(v => (
713
+ * <option key={v.id} value={v.id}>{v.name}</option>
714
+ * ))}
715
+ * </select>
716
+ * <button onClick={() => speak("Hello world!")}>
717
+ * {isSpeaking ? "Speaking..." : "Speak"}
718
+ * </button>
719
+ * {isSpeaking && <button onClick={stop}>Stop</button>}
720
+ * </div>
721
+ * );
722
+ * }
723
+ * ```
724
+ */
725
+ declare function useSpeech(options?: UseSpeechOptions): UseSpeechReturn;
726
+ /**
727
+ * Play audio from Float32Array using Web Audio API
728
+ *
729
+ * @example
730
+ * ```ts
731
+ * import { playAudio } from "@tryhamster/gerbil/browser";
732
+ *
733
+ * const audio = new Float32Array([...]); // TTS output
734
+ * const controller = await playAudio(audio, 24000);
735
+ *
736
+ * // Stop playback
737
+ * controller.stop();
738
+ * ```
739
+ */
740
+ declare function playAudio(audio: Float32Array, sampleRate?: number): Promise<{
741
+ stop: () => void;
742
+ onEnded: Promise<void>;
743
+ }>;
744
+ /**
745
+ * Create a reusable audio player for streaming TTS
746
+ *
747
+ * @example
748
+ * ```ts
749
+ * import { createAudioPlayer } from "@tryhamster/gerbil/browser";
750
+ *
751
+ * const player = createAudioPlayer(24000);
752
+ *
753
+ * // Queue audio chunks as they arrive
754
+ * player.queue(chunk1);
755
+ * player.queue(chunk2);
756
+ *
757
+ * // Stop and clear
758
+ * player.stop();
759
+ * ```
760
+ */
761
+ declare function createAudioPlayer(sampleRate?: number): {
762
+ queue: (audio: Float32Array) => void;
763
+ stop: () => void;
764
+ isPlaying: () => boolean;
765
+ };
766
+ /**
767
+ * Progress info for STT loading
768
+ */
769
+ type STTProgress = {
770
+ status: "downloading" | "loading" | "ready" | "error";
771
+ message?: string;
772
+ progress?: number;
773
+ file?: string;
774
+ };
775
+ /**
776
+ * Options for useVoiceInput hook
777
+ */
778
+ type UseVoiceInputOptions = {
779
+ /** STT model ID (default: whisper-tiny.en) */
780
+ model?: string;
781
+ /** Auto-load model on mount (default: false) */
782
+ autoLoad?: boolean;
783
+ /** Callback when model is ready */
784
+ onReady?: () => void;
785
+ /** Callback when transcription completes (or for each chunk in streaming mode) */
786
+ onTranscript?: (text: string) => void;
787
+ /** Callback on error */
788
+ onError?: (error: string) => void;
789
+ /** Callback during loading */
790
+ onProgress?: (progress: STTProgress) => void;
791
+ /** Enable streaming transcription - transcribes audio in chunks as you speak */
792
+ streaming?: boolean;
793
+ /** Chunk duration in ms for streaming mode (default: 3000 = 3 seconds) */
794
+ chunkDuration?: number;
795
+ /** Callback for each streaming chunk with partial transcript */
796
+ onChunk?: (text: string, chunkIndex: number) => void;
797
+ };
798
+ /**
799
+ * Return type for useVoiceInput hook
800
+ */
801
+ type UseVoiceInputReturn = {
802
+ /** Start recording audio */
803
+ startRecording: () => Promise<void>;
804
+ /** Stop recording and transcribe */
805
+ stopRecording: () => Promise<string>;
806
+ /** Cancel recording without transcribing */
807
+ cancelRecording: () => void;
808
+ /** Transcribe raw audio data (Float32Array at 16kHz) */
809
+ transcribe: (audio: Float32Array) => Promise<string>;
810
+ /** Whether currently recording */
811
+ isRecording: boolean;
812
+ /** Whether transcribing */
813
+ isTranscribing: boolean;
814
+ /** Whether model is loading */
815
+ isLoading: boolean;
816
+ /** Whether model is ready */
817
+ isReady: boolean;
818
+ /** Latest transcription result (full transcript in streaming mode) */
819
+ transcript: string;
820
+ /** Current streaming chunk being transcribed (streaming mode only) */
821
+ streamingChunk: string;
822
+ /** Number of chunks transcribed so far (streaming mode only) */
823
+ chunkCount: number;
824
+ /** Loading progress */
825
+ loadingProgress: STTProgress | null;
826
+ /** Error message */
827
+ error: string | null;
828
+ /** Manually load the model */
829
+ load: () => void;
830
+ };
831
+ /**
832
+ * React hook for voice input with browser microphone
833
+ *
834
+ * Uses MediaRecorder to capture audio and Whisper for transcription.
835
+ * Supports both one-shot and streaming transcription modes.
836
+ *
837
+ * @example Basic usage (one-shot)
838
+ * ```tsx
839
+ * function VoiceInput() {
840
+ * const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
841
+ * onTranscript: (text) => console.log("User said:", text),
842
+ * });
843
+ *
844
+ * return (
845
+ * <button onClick={isRecording ? stopRecording : startRecording}>
846
+ * {isRecording ? "Stop" : "Record"}
847
+ * </button>
848
+ * );
849
+ * }
850
+ * ```
851
+ *
852
+ * @example Streaming transcription (real-time)
853
+ * ```tsx
854
+ * function LiveTranscription() {
855
+ * const { startRecording, stopRecording, isRecording, transcript, streamingChunk } = useVoiceInput({
856
+ * streaming: true, // Enable streaming mode
857
+ * chunkDuration: 1500, // Transcribe every 1.5 seconds (default)
858
+ * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
859
+ * });
860
+ *
861
+ * return (
862
+ * <div>
863
+ * <button onClick={isRecording ? stopRecording : startRecording}>
864
+ * {isRecording ? "Stop" : "Start Live Transcription"}
865
+ * </button>
866
+ * <p>Current chunk: {streamingChunk}</p>
867
+ * <p>Full transcript: {transcript}</p>
868
+ * </div>
869
+ * );
870
+ * }
871
+ * ```
872
+ */
873
+ declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
874
+ /**
875
+ * Options for useVoiceChat hook
876
+ */
877
+ type UseVoiceChatOptions = {
878
+ /** LLM model ID (default: qwen3-0.6b) */
879
+ llmModel?: string;
880
+ /** STT model ID (default: whisper-tiny.en) */
881
+ sttModel?: string;
882
+ /** TTS model ID (default: kokoro-82m, also supports supertonic-66m) */
883
+ ttsModel?: TTSModelId;
884
+ /** System prompt for LLM */
885
+ system?: string;
886
+ /** Enable thinking mode (default: false) */
887
+ thinking?: boolean;
888
+ /** TTS voice ID (default: model's default voice) */
889
+ voice?: string;
890
+ /** TTS speech speed (default: 1.0) */
891
+ speed?: number;
892
+ /** Auto-load all models on mount (default: false) */
893
+ autoLoad?: boolean;
894
+ /** Callback when user speaks */
895
+ onUserSpeak?: (text: string) => void;
896
+ /** Callback when assistant responds */
897
+ onAssistantSpeak?: (text: string) => void;
898
+ /** Callback on error */
899
+ onError?: (error: string) => void;
900
+ };
901
+ /**
902
+ * Message in voice chat
903
+ */
904
+ type VoiceChatMessage = {
905
+ id: string;
906
+ role: "user" | "assistant";
907
+ content: string;
908
+ thinking?: string;
909
+ audioUrl?: string;
910
+ };
911
+ /**
912
+ * Return type for useVoiceChat hook
913
+ */
914
+ type UseVoiceChatReturn = {
915
+ /** Messages in the conversation */
916
+ messages: VoiceChatMessage[];
917
+ /** Start recording user speech */
918
+ startListening: () => Promise<void>;
919
+ /** Stop recording and process (STT → LLM → TTS) */
920
+ stopListening: () => Promise<void>;
921
+ /** Cancel current operation */
922
+ cancel: () => void;
923
+ /** Clear conversation history */
924
+ clear: () => void;
925
+ /** Whether recording user speech */
926
+ isListening: boolean;
927
+ /** Whether processing (STT/LLM/TTS) */
928
+ isProcessing: boolean;
929
+ /** Whether assistant is speaking */
930
+ isSpeaking: boolean;
931
+ /** Current stage: idle, listening, transcribing, thinking, speaking */
932
+ stage: "idle" | "listening" | "transcribing" | "thinking" | "speaking";
933
+ /** Whether all models are loaded */
934
+ isReady: boolean;
935
+ /** Whether loading models */
936
+ isLoading: boolean;
937
+ /** Loading progress message */
938
+ loadingMessage: string;
939
+ /** Error message */
940
+ error: string | null;
941
+ /** Manually load all models */
942
+ load: () => void;
943
+ };
944
+ /**
945
+ * React hook for voice conversation with STT + LLM + TTS
946
+ *
947
+ * Complete voice-to-voice conversation loop:
948
+ * 1. User presses button to speak
949
+ * 2. Speech is transcribed (Whisper)
950
+ * 3. LLM generates response
951
+ * 4. Response is spoken aloud (Kokoro or Supertonic TTS)
952
+ *
953
+ * @example
954
+ * ```tsx
955
+ * function VoiceChat() {
956
+ * const {
957
+ * messages,
958
+ * startListening,
959
+ * stopListening,
960
+ * isListening,
961
+ * isSpeaking,
962
+ * stage,
963
+ * } = useVoiceChat({
964
+ * system: "You are a helpful voice assistant.",
965
+ * voice: "af_bella",
966
+ * // Or use Supertonic for faster synthesis:
967
+ * // ttsModel: "supertonic-66m",
968
+ * // voice: "F1",
969
+ * });
970
+ *
971
+ * return (
972
+ * <div>
973
+ * {messages.map(m => (
974
+ * <div key={m.id}>{m.role}: {m.content}</div>
975
+ * ))}
976
+ * <button
977
+ * onMouseDown={startListening}
978
+ * onMouseUp={stopListening}
979
+ * >
980
+ * {stage === "idle" ? "🎤 Hold to Speak" : stage}
981
+ * </button>
982
+ * </div>
983
+ * );
984
+ * }
985
+ * ```
986
+ */
987
+ declare function useVoiceChat(options?: UseVoiceChatOptions): UseVoiceChatReturn;
988
+ /**
989
+ * Check if WebGPU is supported
990
+ */
991
+ declare function isWebGPUSupported(): boolean;
992
+ /**
993
+ * Get WebGPU adapter info
994
+ */
995
+ declare function getWebGPUInfo(): Promise<{
996
+ supported: boolean;
997
+ adapter?: string;
998
+ device?: string;
999
+ } | null>;
1000
+ declare const _default: {
1001
+ isWebGPUSupported: typeof isWebGPUSupported;
1002
+ getWebGPUInfo: typeof getWebGPUInfo;
1003
+ createGerbilWorker: typeof createGerbilWorker;
1004
+ playAudio: typeof playAudio;
1005
+ createAudioPlayer: typeof createAudioPlayer;
1006
+ };
1007
+ //#endregion
1008
+ export { AudioChunk, BUILTIN_MODELS, BrowserVoiceInfo, CacheConfig, CompleteOptions, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, STTModelConfig, STTProgress, SessionStats, SpeakOptions, SpeakResult, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TTSModelId, TTSProgress, TranscribeOptions, TranscribeResult, TranscribeSegment, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, UseSpeechOptions, UseSpeechReturn, UseVoiceChatOptions, UseVoiceChatReturn, UseVoiceInputOptions, UseVoiceInputReturn, VoiceChatMessage, VoiceInfo, WorkerComplete, WorkerProgress, WorkerToken, createAudioPlayer, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, playAudio, useChat, useCompletion, useSpeech, useVoiceChat, useVoiceInput };
1009
+ //# sourceMappingURL=index.d.ts.map