@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-DsWBBnEk.mjs +3 -0
  3. package/dist/browser/index.d.mts +401 -5
  4. package/dist/browser/index.d.mts.map +1 -1
  5. package/dist/browser/index.mjs +1772 -146
  6. package/dist/browser/index.mjs.map +1 -1
  7. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-JEPeM2YE.mjs} +1 -1
  8. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-Y9F7W5VQ.mjs} +514 -73
  9. package/dist/chrome-backend-Y9F7W5VQ.mjs.map +1 -0
  10. package/dist/cli.mjs +3359 -646
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -3
  14. package/dist/frameworks/fastify.d.mts +1 -1
  15. package/dist/frameworks/fastify.mjs +3 -3
  16. package/dist/frameworks/hono.d.mts +1 -1
  17. package/dist/frameworks/hono.mjs +3 -3
  18. package/dist/frameworks/next.d.mts +2 -2
  19. package/dist/frameworks/next.mjs +3 -3
  20. package/dist/frameworks/react.d.mts +1 -1
  21. package/dist/frameworks/trpc.d.mts +1 -1
  22. package/dist/frameworks/trpc.mjs +3 -3
  23. package/dist/gerbil-DeQlX_Mt.mjs +5 -0
  24. package/dist/gerbil-POAz8peb.d.mts +431 -0
  25. package/dist/gerbil-POAz8peb.d.mts.map +1 -0
  26. package/dist/gerbil-yoSpRHgv.mjs +1463 -0
  27. package/dist/gerbil-yoSpRHgv.mjs.map +1 -0
  28. package/dist/index.d.mts +395 -9
  29. package/dist/index.d.mts.map +1 -1
  30. package/dist/index.mjs +8 -6
  31. package/dist/index.mjs.map +1 -1
  32. package/dist/integrations/ai-sdk.d.mts +122 -4
  33. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  34. package/dist/integrations/ai-sdk.mjs +239 -11
  35. package/dist/integrations/ai-sdk.mjs.map +1 -1
  36. package/dist/integrations/langchain.d.mts +132 -2
  37. package/dist/integrations/langchain.d.mts.map +1 -1
  38. package/dist/integrations/langchain.mjs +176 -8
  39. package/dist/integrations/langchain.mjs.map +1 -1
  40. package/dist/integrations/llamaindex.d.mts +1 -1
  41. package/dist/integrations/llamaindex.mjs +3 -3
  42. package/dist/integrations/mcp-client.mjs +4 -4
  43. package/dist/integrations/mcp-client.mjs.map +1 -1
  44. package/dist/integrations/mcp.d.mts +2 -2
  45. package/dist/integrations/mcp.d.mts.map +1 -1
  46. package/dist/integrations/mcp.mjs +6 -6
  47. package/dist/{mcp-R8kRLIKb.mjs → mcp-Bitg4sjX.mjs} +10 -37
  48. package/dist/mcp-Bitg4sjX.mjs.map +1 -0
  49. package/dist/microphone-D-6y9aiE.mjs +3 -0
  50. package/dist/{models-DKULvhOr.mjs → models-BAtL8qsA.mjs} +42 -7
  51. package/dist/models-BAtL8qsA.mjs.map +1 -0
  52. package/dist/{models-De2-_GmQ.d.mts → models-CE0fBq0U.d.mts} +2 -2
  53. package/dist/models-CE0fBq0U.d.mts.map +1 -0
  54. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-B1rmFto6.mjs} +2 -2
  55. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-B1rmFto6.mjs.map} +1 -1
  56. package/dist/repl-D20JO260.mjs +10 -0
  57. package/dist/skills/index.d.mts +303 -12
  58. package/dist/skills/index.d.mts.map +1 -1
  59. package/dist/skills/index.mjs +6 -6
  60. package/dist/skills-5DxAV-rn.mjs +1435 -0
  61. package/dist/skills-5DxAV-rn.mjs.map +1 -0
  62. package/dist/stt-Bv_dum-R.mjs +433 -0
  63. package/dist/stt-Bv_dum-R.mjs.map +1 -0
  64. package/dist/stt-KzSoNvwI.mjs +3 -0
  65. package/dist/{tools-BsiEE6f2.mjs → tools-IYPrqoek.mjs} +6 -7
  66. package/dist/{tools-BsiEE6f2.mjs.map → tools-IYPrqoek.mjs.map} +1 -1
  67. package/dist/tts-5yWeP_I0.mjs +3 -0
  68. package/dist/tts-DG6denWG.mjs +729 -0
  69. package/dist/tts-DG6denWG.mjs.map +1 -0
  70. package/dist/types-s6Py2_DL.d.mts +353 -0
  71. package/dist/types-s6Py2_DL.d.mts.map +1 -0
  72. package/dist/{utils-7vXqtq2Q.mjs → utils-CkB4Roi6.mjs} +1 -1
  73. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CkB4Roi6.mjs.map} +1 -1
  74. package/docs/ai-sdk.md +137 -21
  75. package/docs/browser.md +241 -2
  76. package/docs/memory.md +72 -0
  77. package/docs/stt.md +494 -0
  78. package/docs/tts.md +569 -0
  79. package/docs/vision.md +396 -0
  80. package/package.json +17 -18
  81. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  82. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  83. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  84. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  85. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  86. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  87. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  88. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  89. package/dist/models-DKULvhOr.mjs.map +0 -1
  90. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  91. package/dist/skills-D3CEpgDc.mjs +0 -630
  92. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  93. package/dist/types-BS1N92Jt.d.mts +0 -183
  94. package/dist/types-BS1N92Jt.d.mts.map +0 -1
package/README.md CHANGED
@@ -5,15 +5,15 @@
5
5
  <h1 align="center">Gerbil</h1>
6
6
 
7
7
  <p align="center">
8
- <strong>Local LLM inference for Node.js. GPU-accelerated. Zero config.</strong>
8
+ <strong>Local AI inference for Node.js. LLM, TTS, STT. GPU-accelerated. Zero config.</strong>
9
9
  </p>
10
10
 
11
11
  <p align="center">
12
12
  <a href="#install">Install</a> •
13
13
  <a href="#quick-start">Quick Start</a> •
14
- <a href="#skills">Skills</a> •
14
+ <a href="#text-to-speech">TTS</a> •
15
+ <a href="#speech-to-text">STT</a> •
15
16
  <a href="./docs/ai-sdk.md">AI SDK</a> •
16
- <a href="./docs/frameworks.md">Frameworks</a> •
17
17
  <a href="./docs/cli.md">CLI</a>
18
18
  </p>
19
19
 
@@ -45,6 +45,7 @@ const text = await gerbil("Explain recursion in one sentence");
45
45
  - **Zero Config** — `npx @tryhamster/gerbil "your prompt"` just works
46
46
  - **Local & Private** — No API keys, no data leaves your machine
47
47
  - **GPU Accelerated** — WebGPU with CPU fallback
48
+ - **Complete Audio** — Text-to-Speech (Kokoro) & Speech-to-Text (Whisper)
48
49
  - **Framework Ready** — AI SDK v5, Next.js, Express, LangChain
49
50
  - **Skills System** — Built-in + custom skills with Zod validation
50
51
  - **Tool Calling** — Agentic capabilities with Qwen3 models
@@ -92,6 +93,55 @@ const data = await g.json("Extract: John, 32, NYC", {
92
93
  });
93
94
  ```
94
95
 
96
+ ## Text-to-Speech
97
+
98
+ Generate natural speech locally using Kokoro TTS (28 voices):
99
+
100
+ ```typescript
101
+ const result = await g.speak("Hello, I'm Gerbil!", { voice: "af_heart" });
102
+ // result.audio = Float32Array, result.sampleRate = 24000
103
+
104
+ // Stream long text
105
+ for await (const chunk of g.speakStream("Long paragraph...")) {
106
+ // Play each chunk as it's generated
107
+ }
108
+ ```
109
+
110
+ ```bash
111
+ # CLI
112
+ gerbil speak "Hello world" --voice bf_emma
113
+ ```
114
+
115
+ 📖 **[Full TTS Documentation →](./docs/tts.md)**
116
+
117
+ ## Speech-to-Text
118
+
119
+ Transcribe audio locally using Whisper (7 models, 80+ languages):
120
+
121
+ ```typescript
122
+ import { readFileSync } from "fs";
123
+
124
+ const audio = new Uint8Array(readFileSync("recording.wav"));
125
+ const result = await g.transcribe(audio);
126
+ console.log(result.text);
127
+
128
+ // With timestamps
129
+ const result = await g.transcribe(audio, { timestamps: true });
130
+ for (const seg of result.segments) {
131
+ console.log(`[${seg.start}s] ${seg.text}`);
132
+ }
133
+
134
+ // Record from microphone
135
+ const result = await g.listen(5000); // 5 seconds
136
+ ```
137
+
138
+ ```bash
139
+ # CLI
140
+ gerbil transcribe audio.wav --timestamps
141
+ ```
142
+
143
+ 📖 **[Full STT Documentation →](./docs/stt.md)**
144
+
95
145
  ## Skills
96
146
 
97
147
  Built-in AI skills with Zod-validated inputs:
@@ -169,6 +219,8 @@ gerbil "Write a haiku" # Generate text
169
219
  gerbil commit # Commit message from staged changes
170
220
  gerbil summarize README.md # Summarize file
171
221
  gerbil chat --thinking # Interactive chat
222
+ gerbil speak "Hello world" --voice af_heart # Text-to-speech
223
+ gerbil transcribe audio.wav # Speech-to-text
172
224
  gerbil serve --mcp # MCP server for Claude/Cursor
173
225
  gerbil update # Update to latest version
174
226
  ```
@@ -204,17 +256,19 @@ function Chat() {
204
256
 
205
257
  | Integration | Import | Docs |
206
258
  |-------------|--------|------|
207
- | **Browser** | `@tryhamster/gerbil/browser` | [📖 Browser Guide](./docs/browser.md) |
208
- | **AI SDK v5** | `@tryhamster/gerbil/ai` | [📖 AI SDK Guide](./docs/ai-sdk.md) |
259
+ | **Browser** | `@tryhamster/gerbil/browser` | [📖 Browser](./docs/browser.md) |
260
+ | **AI SDK v5** | `@tryhamster/gerbil/ai` | [📖 AI SDK](./docs/ai-sdk.md) |
209
261
  | **Next.js** | `@tryhamster/gerbil/next` | [📖 Frameworks](./docs/frameworks.md) |
210
262
  | **Express** | `@tryhamster/gerbil/express` | [📖 Frameworks](./docs/frameworks.md) |
211
- | **React** | `@tryhamster/gerbil/react` | [📖 Frameworks](./docs/frameworks.md) |
212
263
  | **LangChain** | `@tryhamster/gerbil/langchain` | [📖 Frameworks](./docs/frameworks.md) |
213
- | **MCP Server** | `npx @tryhamster/gerbil serve --mcp` | [📖 MCP Guide](./docs/mcp.md) |
214
- | **MCP Client** | `@tryhamster/gerbil/mcp-client` | [📖 MCP Client](./docs/mcp-client.md) |
264
+ | **MCP Server** | `npx @tryhamster/gerbil serve --mcp` | [📖 MCP](./docs/mcp.md) |
265
+
266
+ **Audio capabilities:** TTS and STT are built into the core `Gerbil` class, `@tryhamster/gerbil/browser` hooks, and `@tryhamster/gerbil/ai` provider.
215
267
 
216
268
  ## Models
217
269
 
270
+ ### Language Models
271
+
218
272
  | Model | Size | Best For |
219
273
  |-------|------|----------|
220
274
  | `qwen3-0.6b` | ~400MB | General use, reasoning (thinking mode) |
@@ -223,18 +277,29 @@ function Chat() {
223
277
 
224
278
  Use any HuggingFace model: `npx @tryhamster/gerbil -m hf:org/model "prompt"`
225
279
 
280
+ ### Audio Models
281
+
282
+ | Model | Type | Size | Notes |
283
+ |-------|------|------|-------|
284
+ | `kokoro-82m` | TTS | ~330MB | 28 voices, English |
285
+ | `whisper-tiny.en` | STT | 39MB | English, fastest |
286
+ | `whisper-base.en` | STT | 74MB | English, balanced |
287
+ | `whisper-small` | STT | 244MB | 80+ languages |
288
+
226
289
  ## Documentation
227
290
 
228
291
  | Guide | Description |
229
292
  |-------|-------------|
230
- | [📖 Browser](./docs/browser.md) | WebGPU inference in the browser |
231
- | [📖 Skills](./docs/skills.md) | Built-in skills, custom skill development, registry API |
232
- | [📖 Tools](./docs/tools.md) | Tool calling, agentic workflows, custom tools |
293
+ | [📖 Text-to-Speech](./docs/tts.md) | Kokoro TTS, 28 voices, streaming audio |
294
+ | [📖 Speech-to-Text](./docs/stt.md) | Whisper STT, transcription, voice input |
295
+ | [📖 Browser](./docs/browser.md) | WebGPU inference, React hooks |
296
+ | [📖 Skills](./docs/skills.md) | Built-in skills, custom skill development |
297
+ | [📖 Tools](./docs/tools.md) | Tool calling, agentic workflows |
233
298
  | [📖 REPL](./docs/repl.md) | Interactive terminal dashboard |
234
- | [📖 AI SDK](./docs/ai-sdk.md) | Vercel AI SDK v5 integration |
235
- | [📖 Frameworks](./docs/frameworks.md) | Next.js, Express, React, Fastify, Hono, tRPC, LangChain |
299
+ | [📖 AI SDK](./docs/ai-sdk.md) | Vercel AI SDK v5 (LLM, TTS, STT) |
300
+ | [📖 Frameworks](./docs/frameworks.md) | Next.js, Express, React, LangChain |
236
301
  | [📖 CLI](./docs/cli.md) | All CLI commands and options |
237
- | [📖 MCP Server](./docs/mcp.md) | MCP server setup for Claude Desktop & Cursor |
302
+ | [📖 MCP Server](./docs/mcp.md) | MCP server for Claude Desktop & Cursor |
238
303
  | [📖 MCP Client](./docs/mcp-client.md) | Connect to external MCP servers |
239
304
 
240
305
  ## Requirements
@@ -0,0 +1,3 @@
1
+ import { c as checkForUpdate, l as compareVersions, s as CURRENT_VERSION, u as installUpdate } from "./cli.mjs";
2
+
3
+ export { CURRENT_VERSION, checkForUpdate, installUpdate };
@@ -1,5 +1,5 @@
1
- import { _ as SystemInfo, a as GenerateOptions, c as GerbilModelSettings, d as LoadOptions, f as ModelConfig, g as SessionStats, h as ProgressInfo, i as FallbackConfig, l as GerbilProviderSettings, m as ModelStats, n as EmbedOptions, o as GenerateResult, p as ModelSource, r as EmbedResult, s as GerbilConfig, t as CacheConfig, u as JsonOptions } from "../types-BS1N92Jt.mjs";
2
- import { t as BUILTIN_MODELS } from "../models-De2-_GmQ.mjs";
1
+ import { A as TranscribeSegment, C as SpeakResult, D as TTSModelConfig, E as SystemInfo, O as TranscribeOptions, S as SpeakOptions, T as StreamingTranscriptionSession, _ as ModelSource, a as FallbackConfig, b as STTModelConfig, c as GerbilConfig, d as ImageInput, f as JsonOptions, g as ModelConfig, h as LoadTTSOptions, i as EmbedResult, j as VoiceInfo, k as TranscribeResult, l as GerbilModelSettings, m as LoadSTTOptions, n as CacheConfig, o as GenerateOptions, p as LoadOptions, r as EmbedOptions, s as GenerateResult, t as AudioChunk, u as GerbilProviderSettings, v as ModelStats, w as StreamingTranscriptionOptions, x as SessionStats, y as ProgressInfo } from "../types-s6Py2_DL.mjs";
2
+ import { t as BUILTIN_MODELS } from "../models-CE0fBq0U.mjs";
3
3
 
4
4
  //#region src/browser/index.d.ts
5
5
 
@@ -55,6 +55,13 @@ type GenerateStreamOptions = {
55
55
  thinking?: boolean;
56
56
  /** System prompt */
57
57
  system?: string;
58
+ /** Image URLs or data URIs (for vision models) */
59
+ images?: string[];
60
+ /** Conversation history for multi-turn (includes all previous messages) */
61
+ history?: Array<{
62
+ role: "user" | "assistant" | "system";
63
+ content: string;
64
+ }>;
58
65
  };
59
66
  type GerbilWorker = {
60
67
  /** Generate text with streaming */
@@ -81,6 +88,8 @@ type Message = {
81
88
  role: "user" | "assistant";
82
89
  content: string;
83
90
  thinking?: string;
91
+ /** Attached images (URLs or data URIs) - for vision models */
92
+ images?: string[];
84
93
  };
85
94
  /** Loading progress state */
86
95
  type LoadingProgress = {
@@ -146,6 +155,16 @@ type UseChatReturn = {
146
155
  error: string | null;
147
156
  /** Load the model (only needed if lazy: true) */
148
157
  load: () => void;
158
+ /** Currently attached images (for next message) */
159
+ attachedImages: string[];
160
+ /** Attach an image to the next message */
161
+ attachImage: (imageUrl: string) => void;
162
+ /** Remove an attached image */
163
+ removeImage: (index: number) => void;
164
+ /** Clear all attached images */
165
+ clearImages: () => void;
166
+ /** Send message with specific images (convenience method) */
167
+ sendWithImages: (text: string, images: string[]) => void;
149
168
  };
150
169
  /**
151
170
  * React hook for chat with local LLM
@@ -193,14 +212,19 @@ type UseCompletionOptions = {
193
212
  /** Called on error */
194
213
  onError?: (error: string) => void;
195
214
  };
215
+ /** Options for single completion call */
216
+ type CompleteOptions = {
217
+ /** Image URLs or data URIs to analyze (for vision models) */
218
+ images?: string[];
219
+ };
196
220
  /** Return type for useCompletion hook */
197
221
  type UseCompletionReturn = {
198
222
  /** Generated completion */
199
223
  completion: string;
200
224
  /** Thinking content (if enabled) */
201
225
  thinking: string;
202
- /** Generate completion */
203
- complete: (prompt: string) => Promise<string>;
226
+ /** Generate completion (optionally with images for vision models) */
227
+ complete: (prompt: string, options?: CompleteOptions) => Promise<string>;
204
228
  /** Whether model is loading */
205
229
  isLoading: boolean;
206
230
  /** Loading progress */
@@ -240,6 +264,376 @@ type UseCompletionReturn = {
240
264
  * ```
241
265
  */
242
266
  declare function useCompletion(options?: UseCompletionOptions): UseCompletionReturn;
267
+ /** TTS loading progress */
268
+ type TTSProgress = {
269
+ status: "idle" | "loading" | "downloading" | "ready" | "error";
270
+ message?: string;
271
+ file?: string;
272
+ progress?: number;
273
+ error?: string;
274
+ };
275
+ /** Available TTS models */
276
+ type TTSModelId = "kokoro-82m" | "supertonic-66m";
277
+ /** Voice info for TTS models */
278
+ type BrowserVoiceInfo = {
279
+ id: string;
280
+ name: string;
281
+ gender: "male" | "female";
282
+ language: string;
283
+ description: string;
284
+ };
285
+ /** Options for useSpeech hook */
286
+ type UseSpeechOptions = {
287
+ /** TTS model to use (default: "kokoro-82m") */
288
+ model?: TTSModelId;
289
+ /** Default voice ID (default: model's default voice) */
290
+ voice?: string;
291
+ /** Speech speed multiplier (default: 1.0) */
292
+ speed?: number;
293
+ /** Auto-load TTS model on mount (default: false) */
294
+ autoLoad?: boolean;
295
+ /** Called when model is ready */
296
+ onReady?: () => void;
297
+ /** Called on error */
298
+ onError?: (error: string) => void;
299
+ /** Called when speech starts */
300
+ onStart?: () => void;
301
+ /** Called when speech ends */
302
+ onEnd?: () => void;
303
+ };
304
+ /** Return type for useSpeech hook */
305
+ type UseSpeechReturn = {
306
+ /** Speak text aloud */
307
+ speak: (text: string, options?: {
308
+ voice?: string;
309
+ speed?: number;
310
+ }) => Promise<void>;
311
+ /** Stop current speech */
312
+ stop: () => void;
313
+ /** Whether TTS model is loading */
314
+ isLoading: boolean;
315
+ /** Loading progress */
316
+ loadingProgress: TTSProgress | null;
317
+ /** Whether currently speaking */
318
+ isSpeaking: boolean;
319
+ /** Whether TTS model is ready */
320
+ isReady: boolean;
321
+ /** Load the TTS model */
322
+ load: () => void;
323
+ /** Error message if any */
324
+ error: string | null;
325
+ /** List available voices for current model */
326
+ listVoices: () => BrowserVoiceInfo[];
327
+ /** Current voice ID */
328
+ currentVoice: string;
329
+ /** Set current voice */
330
+ setVoice: (voiceId: string) => void;
331
+ /** Current speed */
332
+ currentSpeed: number;
333
+ /** Set speed */
334
+ setSpeed: (speed: number) => void;
335
+ /** Current TTS model ID */
336
+ currentModel: TTSModelId;
337
+ /** Sample rate for current model (24000 for Kokoro, 44100 for Supertonic) */
338
+ sampleRate: number;
339
+ };
340
+ /**
341
+ * React hook for text-to-speech with Web Audio API playback
342
+ *
343
+ * Supports both Kokoro (24kHz, high quality) and Supertonic (44.1kHz, faster).
344
+ *
345
+ * @example
346
+ * ```tsx
347
+ * import { useSpeech } from "@tryhamster/gerbil/browser";
348
+ *
349
+ * function App() {
350
+ * // Default: Kokoro TTS
351
+ * const { speak, stop, isLoading, isSpeaking, listVoices, setVoice } = useSpeech();
352
+ *
353
+ * // Or use Supertonic (44.1kHz, faster)
354
+ * // const { speak, listVoices } = useSpeech({ model: "supertonic-66m" });
355
+ *
356
+ * if (isLoading) return <div>Loading TTS...</div>;
357
+ *
358
+ * return (
359
+ * <div>
360
+ * <select onChange={e => setVoice(e.target.value)}>
361
+ * {listVoices().map(v => (
362
+ * <option key={v.id} value={v.id}>{v.name}</option>
363
+ * ))}
364
+ * </select>
365
+ * <button onClick={() => speak("Hello world!")}>
366
+ * {isSpeaking ? "Speaking..." : "Speak"}
367
+ * </button>
368
+ * {isSpeaking && <button onClick={stop}>Stop</button>}
369
+ * </div>
370
+ * );
371
+ * }
372
+ * ```
373
+ */
374
+ declare function useSpeech(options?: UseSpeechOptions): UseSpeechReturn;
375
+ /**
376
+ * Play audio from Float32Array using Web Audio API
377
+ *
378
+ * @example
379
+ * ```ts
380
+ * import { playAudio } from "@tryhamster/gerbil/browser";
381
+ *
382
+ * const audio = new Float32Array([...]); // TTS output
383
+ * const controller = await playAudio(audio, 24000);
384
+ *
385
+ * // Stop playback
386
+ * controller.stop();
387
+ * ```
388
+ */
389
+ declare function playAudio(audio: Float32Array, sampleRate?: number): Promise<{
390
+ stop: () => void;
391
+ onEnded: Promise<void>;
392
+ }>;
393
+ /**
394
+ * Create a reusable audio player for streaming TTS
395
+ *
396
+ * @example
397
+ * ```ts
398
+ * import { createAudioPlayer } from "@tryhamster/gerbil/browser";
399
+ *
400
+ * const player = createAudioPlayer(24000);
401
+ *
402
+ * // Queue audio chunks as they arrive
403
+ * player.queue(chunk1);
404
+ * player.queue(chunk2);
405
+ *
406
+ * // Stop and clear
407
+ * player.stop();
408
+ * ```
409
+ */
410
+ declare function createAudioPlayer(sampleRate?: number): {
411
+ queue: (audio: Float32Array) => void;
412
+ stop: () => void;
413
+ isPlaying: () => boolean;
414
+ };
415
+ /**
416
+ * Progress info for STT loading
417
+ */
418
+ type STTProgress = {
419
+ status: "downloading" | "loading" | "ready" | "error";
420
+ message?: string;
421
+ progress?: number;
422
+ file?: string;
423
+ };
424
+ /**
425
+ * Options for useVoiceInput hook
426
+ */
427
+ type UseVoiceInputOptions = {
428
+ /** STT model ID (default: whisper-tiny.en) */
429
+ model?: string;
430
+ /** Auto-load model on mount (default: false) */
431
+ autoLoad?: boolean;
432
+ /** Callback when model is ready */
433
+ onReady?: () => void;
434
+ /** Callback when transcription completes (or for each chunk in streaming mode) */
435
+ onTranscript?: (text: string) => void;
436
+ /** Callback on error */
437
+ onError?: (error: string) => void;
438
+ /** Callback during loading */
439
+ onProgress?: (progress: STTProgress) => void;
440
+ /** Enable streaming transcription - transcribes audio in chunks as you speak */
441
+ streaming?: boolean;
442
+ /** Chunk duration in ms for streaming mode (default: 3000 = 3 seconds) */
443
+ chunkDuration?: number;
444
+ /** Callback for each streaming chunk with partial transcript */
445
+ onChunk?: (text: string, chunkIndex: number) => void;
446
+ };
447
+ /**
448
+ * Return type for useVoiceInput hook
449
+ */
450
+ type UseVoiceInputReturn = {
451
+ /** Start recording audio */
452
+ startRecording: () => Promise<void>;
453
+ /** Stop recording and transcribe */
454
+ stopRecording: () => Promise<string>;
455
+ /** Cancel recording without transcribing */
456
+ cancelRecording: () => void;
457
+ /** Transcribe raw audio data (Float32Array at 16kHz) */
458
+ transcribe: (audio: Float32Array) => Promise<string>;
459
+ /** Whether currently recording */
460
+ isRecording: boolean;
461
+ /** Whether transcribing */
462
+ isTranscribing: boolean;
463
+ /** Whether model is loading */
464
+ isLoading: boolean;
465
+ /** Whether model is ready */
466
+ isReady: boolean;
467
+ /** Latest transcription result (full transcript in streaming mode) */
468
+ transcript: string;
469
+ /** Current streaming chunk being transcribed (streaming mode only) */
470
+ streamingChunk: string;
471
+ /** Number of chunks transcribed so far (streaming mode only) */
472
+ chunkCount: number;
473
+ /** Loading progress */
474
+ loadingProgress: STTProgress | null;
475
+ /** Error message */
476
+ error: string | null;
477
+ /** Manually load the model */
478
+ load: () => void;
479
+ };
480
+ /**
481
+ * React hook for voice input with browser microphone
482
+ *
483
+ * Uses MediaRecorder to capture audio and Whisper for transcription.
484
+ * Supports both one-shot and streaming transcription modes.
485
+ *
486
+ * @example Basic usage (one-shot)
487
+ * ```tsx
488
+ * function VoiceInput() {
489
+ * const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
490
+ * onTranscript: (text) => console.log("User said:", text),
491
+ * });
492
+ *
493
+ * return (
494
+ * <button onClick={isRecording ? stopRecording : startRecording}>
495
+ * {isRecording ? "Stop" : "Record"}
496
+ * </button>
497
+ * );
498
+ * }
499
+ * ```
500
+ *
501
+ * @example Streaming transcription (real-time)
502
+ * ```tsx
503
+ * function LiveTranscription() {
504
+ * const { startRecording, stopRecording, isRecording, transcript, streamingChunk } = useVoiceInput({
505
+ * streaming: true, // Enable streaming mode
506
+ * chunkDuration: 1500, // Transcribe every 1.5 seconds (default)
507
+ * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
508
+ * });
509
+ *
510
+ * return (
511
+ * <div>
512
+ * <button onClick={isRecording ? stopRecording : startRecording}>
513
+ * {isRecording ? "Stop" : "Start Live Transcription"}
514
+ * </button>
515
+ * <p>Current chunk: {streamingChunk}</p>
516
+ * <p>Full transcript: {transcript}</p>
517
+ * </div>
518
+ * );
519
+ * }
520
+ * ```
521
+ */
522
+ declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
523
+ /**
524
+ * Options for useVoiceChat hook
525
+ */
526
+ type UseVoiceChatOptions = {
527
+ /** LLM model ID (default: qwen3-0.6b) */
528
+ llmModel?: string;
529
+ /** STT model ID (default: whisper-tiny.en) */
530
+ sttModel?: string;
531
+ /** TTS model ID (default: kokoro-82m, also supports supertonic-66m) */
532
+ ttsModel?: TTSModelId;
533
+ /** System prompt for LLM */
534
+ system?: string;
535
+ /** Enable thinking mode (default: false) */
536
+ thinking?: boolean;
537
+ /** TTS voice ID (default: model's default voice) */
538
+ voice?: string;
539
+ /** TTS speech speed (default: 1.0) */
540
+ speed?: number;
541
+ /** Auto-load all models on mount (default: false) */
542
+ autoLoad?: boolean;
543
+ /** Callback when user speaks */
544
+ onUserSpeak?: (text: string) => void;
545
+ /** Callback when assistant responds */
546
+ onAssistantSpeak?: (text: string) => void;
547
+ /** Callback on error */
548
+ onError?: (error: string) => void;
549
+ };
550
+ /**
551
+ * Message in voice chat
552
+ */
553
+ type VoiceChatMessage = {
554
+ id: string;
555
+ role: "user" | "assistant";
556
+ content: string;
557
+ thinking?: string;
558
+ audioUrl?: string;
559
+ };
560
+ /**
561
+ * Return type for useVoiceChat hook
562
+ */
563
+ type UseVoiceChatReturn = {
564
+ /** Messages in the conversation */
565
+ messages: VoiceChatMessage[];
566
+ /** Start recording user speech */
567
+ startListening: () => Promise<void>;
568
+ /** Stop recording and process (STT → LLM → TTS) */
569
+ stopListening: () => Promise<void>;
570
+ /** Cancel current operation */
571
+ cancel: () => void;
572
+ /** Clear conversation history */
573
+ clear: () => void;
574
+ /** Whether recording user speech */
575
+ isListening: boolean;
576
+ /** Whether processing (STT/LLM/TTS) */
577
+ isProcessing: boolean;
578
+ /** Whether assistant is speaking */
579
+ isSpeaking: boolean;
580
+ /** Current stage: idle, listening, transcribing, thinking, speaking */
581
+ stage: "idle" | "listening" | "transcribing" | "thinking" | "speaking";
582
+ /** Whether all models are loaded */
583
+ isReady: boolean;
584
+ /** Whether loading models */
585
+ isLoading: boolean;
586
+ /** Loading progress message */
587
+ loadingMessage: string;
588
+ /** Error message */
589
+ error: string | null;
590
+ /** Manually load all models */
591
+ load: () => void;
592
+ };
593
+ /**
594
+ * React hook for voice conversation with STT + LLM + TTS
595
+ *
596
+ * Complete voice-to-voice conversation loop:
597
+ * 1. User presses button to speak
598
+ * 2. Speech is transcribed (Whisper)
599
+ * 3. LLM generates response
600
+ * 4. Response is spoken aloud (Kokoro or Supertonic TTS)
601
+ *
602
+ * @example
603
+ * ```tsx
604
+ * function VoiceChat() {
605
+ * const {
606
+ * messages,
607
+ * startListening,
608
+ * stopListening,
609
+ * isListening,
610
+ * isSpeaking,
611
+ * stage,
612
+ * } = useVoiceChat({
613
+ * system: "You are a helpful voice assistant.",
614
+ * voice: "af_bella",
615
+ * // Or use Supertonic for faster synthesis:
616
+ * // ttsModel: "supertonic-66m",
617
+ * // voice: "F1",
618
+ * });
619
+ *
620
+ * return (
621
+ * <div>
622
+ * {messages.map(m => (
623
+ * <div key={m.id}>{m.role}: {m.content}</div>
624
+ * ))}
625
+ * <button
626
+ * onMouseDown={startListening}
627
+ * onMouseUp={stopListening}
628
+ * >
629
+ * {stage === "idle" ? "🎤 Hold to Speak" : stage}
630
+ * </button>
631
+ * </div>
632
+ * );
633
+ * }
634
+ * ```
635
+ */
636
+ declare function useVoiceChat(options?: UseVoiceChatOptions): UseVoiceChatReturn;
243
637
  /**
244
638
  * Check if WebGPU is supported
245
639
  */
@@ -256,7 +650,9 @@ declare const _default: {
256
650
  isWebGPUSupported: typeof isWebGPUSupported;
257
651
  getWebGPUInfo: typeof getWebGPUInfo;
258
652
  createGerbilWorker: typeof createGerbilWorker;
653
+ playAudio: typeof playAudio;
654
+ createAudioPlayer: typeof createAudioPlayer;
259
655
  };
260
656
  //#endregion
261
- export { BUILTIN_MODELS, CacheConfig, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, JsonOptions, LoadOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, SessionStats, SystemInfo, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, WorkerComplete, WorkerProgress, WorkerToken, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, useChat, useCompletion };
657
+ export { AudioChunk, BUILTIN_MODELS, BrowserVoiceInfo, CacheConfig, CompleteOptions, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, STTModelConfig, STTProgress, SessionStats, SpeakOptions, SpeakResult, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TTSModelId, TTSProgress, TranscribeOptions, TranscribeResult, TranscribeSegment, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, UseSpeechOptions, UseSpeechReturn, UseVoiceChatOptions, UseVoiceChatReturn, UseVoiceInputOptions, UseVoiceInputReturn, VoiceChatMessage, VoiceInfo, WorkerComplete, WorkerProgress, WorkerToken, createAudioPlayer, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, playAudio, useChat, useCompletion, useSpeech, useVoiceChat, useVoiceInput };
262
658
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/browser/index.ts"],"sourcesContent":[],"mappings":";;;;;KA6DY,cAAA;;;;;;;;;;;KAYA,WAAA;;;;;;;KAQA,cAAA;;;;;;;KAQA,mBAAA;;;;0BAIc;;oBAEN;;wBAEI;;;;;;KAOZ,qBAAA;;;;;;;;;;;;;;KAeA,YAAA;;uCAE2B,0BAA0B;;;;;;;;;;;;;;;;iBAqB3C,kBAAA,WAA4B,sBAA2B,QAAQ;;KAwTzE,OAAA;;;;;;;KAQA,eAAA;;;;;;;;;;;KAYA,cAAA;;;;;;;;;;;;oBAYQ;;;;;;;;;KAUR,aAAA;;YAEA;;;;;;;;;;;;mBAUO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBA6CH,OAAA,WAAiB,iBAAsB;;KAmQ3C,oBAAA;;;;;;;;;;;;;;;;;;;KAoBA,mBAAA;;;;;;gCAMoB;;;;mBAIb;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAoCH,aAAA,WAAuB,uBAA4B;;;;iBAqMnD,iBAAA,CAAA;;;;iBAUM,aAAA,CAAA,GAAiB;;;;;cAwBtC"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/browser/index.ts"],"sourcesContent":[],"mappings":";;;;;AAuhEgB,KA19DJ,cAAA,GA09DqB;EAgErB,MAAA,EAAA,SAAW,GAAA,aAAA,GAAA,OAAA,GAAA,OAAA;EAUX,OAAA,CAAA,EAAA,MAAA;EAwBA,IAAA,CAAA,EAAA,MAAA;EAEY,QAAA,CAAA,EAAA,MAAA;EAED;EAID,aAAA,CAAA,EAAA,MAAA;EAAiB;EAgBpB,UAAA,CAAA,EAAA,MAAA;EAAW,KAAA,CAAA,EAAA,MAAA;AAiD9B,CAAA;AA2iBY,KApqFA,WAAA,GAoqFmB;EA4BnB,MAAA,EAAA,OAAA;EAWA,IAAA,EAAA,MAAA;EAEA,KAAA,EAAA,UAAA,GAAA,WAAA;EAEY,SAAA,EAAA,MAAA;EAED,GAAA,EAAA,MAAA;CAAO;AAoEd,KA7wFJ,cAAA,GA6wFgB;EAgcZ,MAAA,EAAA,UAAA;EAUM,IAAA,EAAA,MAAA;EAwBrB,SAAA,EAAA,MAAA;;;;KAvuGW,mBAAA;;;;0BAIc;;oBAEN;;wBAEI;;;;;;KAOZ,qBAAA;;;;;;;;;;;;;;;;YAgBA;;;;;KAGA,YAAA;;uCAE2B,0BAA0B;;;;;;;;;;;;;;;;iBAqB3C,kBAAA,WAA4B,sBAA2B,QAAQ;;KA+ezE,OAAA;;;;;;;;;KAUA,eAAA;;;;;;;;;;;KAYA,cAAA;;;;;;;;;;;;oBAYQ;;;;;;;;;KAUR,aAAA;;YAEA;;;;;;;;;;;;mBAUO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAuDH,OAAA,WAAiB,iBAAsB;;KAwS3C,oBAAA;;;;;;;;;;;;;;;;;;;KAoBA,eAAA;;;;;KAMA,mBAAA;;;;;;uCAM2B,oBAAoB;;;;mBAIxC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAoCH,aAAA,WAAuB,uBAA4B;;KAsMvD,WAAA;;;;;;;;KASA,UAAA;;KAGA,gBAAA;;;;;;;;KA0NA,gBAAA;;UAEF;;;;;;;;;;;;;;;;;KAkBE,eAAA;;;;;QAE6D;;;;;;mBAMtD;;;;;;;;;;oBAUC;;;;;;;;;;gBAUJ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAuCA,SAAA,WAAmB,mBAAwB;;;;;;;;;;;;;;;iBA8XrC,SAAA,QACb,oCAEN;;WAAqC;;;;;;;;;;;;;;;;;;;iBAmDxB,iBAAA;iBACC;;;;;;;KA+DL,WAAA;;;;;;;;;KAUA,oBAAA;;;;;;;;;;;;0BAYc;;;;;;;;;;;KAYd,mBAAA;;wBAEY;;uBAED;;;;sBAID,iBAAiB;;;;;;;;;;;;;;;;mBAgBpB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAiDH,aAAA,WAAuB,uBAA4B;;;;KA2iBvD,mBAAA;;;;;;aAMC;;;;;;;;;;;;;;;;;;;;;KAsBD,gBAAA;;;;;;;;;;KAWA,kBAAA;;YAEA;;wBAEY;;uBAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAoEP,YAAA,WAAsB,sBAA2B;;;;iBAgcjD,iBAAA,CAAA;;;;iBAUM,aAAA,CAAA,GAAiB;;;;;cAwBtC"}