@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -14
- package/dist/auto-update-DsWBBnEk.mjs +3 -0
- package/dist/browser/index.d.mts +401 -5
- package/dist/browser/index.d.mts.map +1 -1
- package/dist/browser/index.mjs +1772 -146
- package/dist/browser/index.mjs.map +1 -1
- package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-JEPeM2YE.mjs} +1 -1
- package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-Y9F7W5VQ.mjs} +514 -73
- package/dist/chrome-backend-Y9F7W5VQ.mjs.map +1 -0
- package/dist/cli.mjs +3359 -646
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +3 -3
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +3 -3
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +3 -3
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +3 -3
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +3 -3
- package/dist/gerbil-DeQlX_Mt.mjs +5 -0
- package/dist/gerbil-POAz8peb.d.mts +431 -0
- package/dist/gerbil-POAz8peb.d.mts.map +1 -0
- package/dist/gerbil-yoSpRHgv.mjs +1463 -0
- package/dist/gerbil-yoSpRHgv.mjs.map +1 -0
- package/dist/index.d.mts +395 -9
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +8 -6
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +122 -4
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +239 -11
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +132 -2
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +176 -8
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +3 -3
- package/dist/integrations/mcp-client.mjs +4 -4
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +6 -6
- package/dist/{mcp-R8kRLIKb.mjs → mcp-Bitg4sjX.mjs} +10 -37
- package/dist/mcp-Bitg4sjX.mjs.map +1 -0
- package/dist/microphone-D-6y9aiE.mjs +3 -0
- package/dist/{models-DKULvhOr.mjs → models-BAtL8qsA.mjs} +42 -7
- package/dist/models-BAtL8qsA.mjs.map +1 -0
- package/dist/{models-De2-_GmQ.d.mts → models-CE0fBq0U.d.mts} +2 -2
- package/dist/models-CE0fBq0U.d.mts.map +1 -0
- package/dist/{one-liner-BUQR0nqq.mjs → one-liner-B1rmFto6.mjs} +2 -2
- package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-B1rmFto6.mjs.map} +1 -1
- package/dist/repl-D20JO260.mjs +10 -0
- package/dist/skills/index.d.mts +303 -12
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +6 -6
- package/dist/skills-5DxAV-rn.mjs +1435 -0
- package/dist/skills-5DxAV-rn.mjs.map +1 -0
- package/dist/stt-Bv_dum-R.mjs +433 -0
- package/dist/stt-Bv_dum-R.mjs.map +1 -0
- package/dist/stt-KzSoNvwI.mjs +3 -0
- package/dist/{tools-BsiEE6f2.mjs → tools-IYPrqoek.mjs} +6 -7
- package/dist/{tools-BsiEE6f2.mjs.map → tools-IYPrqoek.mjs.map} +1 -1
- package/dist/tts-5yWeP_I0.mjs +3 -0
- package/dist/tts-DG6denWG.mjs +729 -0
- package/dist/tts-DG6denWG.mjs.map +1 -0
- package/dist/types-s6Py2_DL.d.mts +353 -0
- package/dist/types-s6Py2_DL.d.mts.map +1 -0
- package/dist/{utils-7vXqtq2Q.mjs → utils-CkB4Roi6.mjs} +1 -1
- package/dist/{utils-7vXqtq2Q.mjs.map → utils-CkB4Roi6.mjs.map} +1 -1
- package/docs/ai-sdk.md +137 -21
- package/docs/browser.md +241 -2
- package/docs/memory.md +72 -0
- package/docs/stt.md +494 -0
- package/docs/tts.md +569 -0
- package/docs/vision.md +396 -0
- package/package.json +17 -18
- package/dist/auto-update-BbNHbSU1.mjs +0 -3
- package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
- package/dist/gerbil-BfnsFWRE.mjs +0 -644
- package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
- package/dist/gerbil-BjW-z7Fq.mjs +0 -5
- package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
- package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
- package/dist/mcp-R8kRLIKb.mjs.map +0 -1
- package/dist/models-DKULvhOr.mjs.map +0 -1
- package/dist/models-De2-_GmQ.d.mts.map +0 -1
- package/dist/skills-D3CEpgDc.mjs +0 -630
- package/dist/skills-D3CEpgDc.mjs.map +0 -1
- package/dist/types-BS1N92Jt.d.mts +0 -183
- package/dist/types-BS1N92Jt.d.mts.map +0 -1
package/docs/ai-sdk.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Gerbil + AI SDK
|
|
2
2
|
|
|
3
|
-
Gerbil works as a [Vercel AI SDK v5](https://sdk.vercel.ai/) provider.
|
|
3
|
+
Gerbil works as a [Vercel AI SDK v5](https://sdk.vercel.ai/) provider, supporting text generation, speech synthesis (TTS), and transcription (STT).
|
|
4
4
|
|
|
5
5
|
## Setup
|
|
6
6
|
|
|
@@ -9,7 +9,9 @@ import { generateText, streamText } from "ai";
|
|
|
9
9
|
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
10
10
|
```
|
|
11
11
|
|
|
12
|
-
##
|
|
12
|
+
## Text Generation
|
|
13
|
+
|
|
14
|
+
### Generate Text
|
|
13
15
|
|
|
14
16
|
```typescript
|
|
15
17
|
const { text } = await generateText({
|
|
@@ -18,7 +20,7 @@ const { text } = await generateText({
|
|
|
18
20
|
});
|
|
19
21
|
```
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
### Stream Text
|
|
22
24
|
|
|
23
25
|
```typescript
|
|
24
26
|
const stream = streamText({
|
|
@@ -31,7 +33,7 @@ for await (const chunk of stream.textStream) {
|
|
|
31
33
|
}
|
|
32
34
|
```
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
### With System Prompt
|
|
35
37
|
|
|
36
38
|
```typescript
|
|
37
39
|
const { text } = await generateText({
|
|
@@ -41,40 +43,154 @@ const { text } = await generateText({
|
|
|
41
43
|
});
|
|
42
44
|
```
|
|
43
45
|
|
|
44
|
-
|
|
46
|
+
### Thinking Mode
|
|
45
47
|
|
|
46
48
|
```typescript
|
|
47
49
|
import { createGerbil } from "@tryhamster/gerbil/ai";
|
|
48
50
|
|
|
49
|
-
|
|
50
|
-
const local = createGerbil({
|
|
51
|
-
device: "gpu",
|
|
52
|
-
dtype: "q4",
|
|
53
|
-
});
|
|
51
|
+
const local = createGerbil({ device: "gpu" });
|
|
54
52
|
|
|
55
|
-
// Use with settings
|
|
56
53
|
const { text } = await generateText({
|
|
57
54
|
model: local("qwen3-0.6b", { thinking: true }),
|
|
58
55
|
prompt: "What is 127 × 43?",
|
|
59
56
|
});
|
|
60
57
|
```
|
|
61
58
|
|
|
62
|
-
##
|
|
59
|
+
## Speech Generation (TTS)
|
|
60
|
+
|
|
61
|
+
Generate speech from text using Kokoro TTS:
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
import { experimental_generateSpeech as generateSpeech } from "ai";
|
|
65
|
+
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
66
|
+
|
|
67
|
+
const result = await generateSpeech({
|
|
68
|
+
model: gerbil.speech(), // kokoro-82m by default
|
|
69
|
+
text: "Hello, welcome to Gerbil!",
|
|
70
|
+
voice: "af_heart", // Female American voice
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
// result.audio is a Uint8Array in WAV format
|
|
74
|
+
await writeFile("output.wav", result.audio);
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Available Voices
|
|
78
|
+
|
|
79
|
+
```typescript
|
|
80
|
+
const voices = gerbil.listVoices();
|
|
81
|
+
// Returns: [{ id, name, gender, language }, ...]
|
|
82
|
+
|
|
83
|
+
// Example voices:
|
|
84
|
+
// - af_heart (Female, American)
|
|
85
|
+
// - bf_emma (Female, British)
|
|
86
|
+
// - am_fenrir (Male, American)
|
|
87
|
+
// - bm_daniel (Male, British)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Speech Options
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
const result = await generateSpeech({
|
|
94
|
+
model: gerbil.speech("kokoro-82m", {
|
|
95
|
+
voice: "bf_emma", // Default voice
|
|
96
|
+
speed: 1.2, // Speed multiplier
|
|
97
|
+
}),
|
|
98
|
+
text: "Speak faster!",
|
|
99
|
+
});
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Transcription (STT)
|
|
103
|
+
|
|
104
|
+
Transcribe audio to text using Whisper:
|
|
105
|
+
|
|
106
|
+
```typescript
|
|
107
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
108
|
+
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
109
|
+
import { readFile } from "fs/promises";
|
|
110
|
+
|
|
111
|
+
const result = await transcribe({
|
|
112
|
+
model: gerbil.transcription(), // whisper-tiny.en by default
|
|
113
|
+
audio: await readFile("audio.wav"),
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
console.log(result.text); // "Hello world"
|
|
117
|
+
console.log(result.language); // "en"
|
|
118
|
+
console.log(result.durationInSeconds); // 2.5
|
|
119
|
+
console.log(result.segments); // Timestamped segments
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Available Models
|
|
63
123
|
|
|
64
124
|
```typescript
|
|
65
|
-
gerbil(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
125
|
+
const models = gerbil.listTranscriptionModels();
|
|
126
|
+
|
|
127
|
+
// Models (smallest to largest):
|
|
128
|
+
// - whisper-tiny.en (39M, English only, fastest)
|
|
129
|
+
// - whisper-tiny (39M, multilingual)
|
|
130
|
+
// - whisper-base.en (74M, English only)
|
|
131
|
+
// - whisper-base (74M, multilingual)
|
|
132
|
+
// - whisper-small.en (244M, English only)
|
|
133
|
+
// - whisper-small (244M, multilingual)
|
|
134
|
+
// - whisper-large-v3-turbo (809M, 80+ languages, best quality)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Larger Models
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
// Use a larger model for better accuracy
|
|
141
|
+
const result = await transcribe({
|
|
142
|
+
model: gerbil.transcription("whisper-base"),
|
|
143
|
+
audio: audioBuffer,
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// Use multilingual model with language hint
|
|
147
|
+
const result = await transcribe({
|
|
148
|
+
model: gerbil.transcription("whisper-small", { language: "es" }),
|
|
149
|
+
audio: spanishAudio,
|
|
150
|
+
});
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Custom Provider
|
|
154
|
+
|
|
155
|
+
```typescript
|
|
156
|
+
import { createGerbil } from "@tryhamster/gerbil/ai";
|
|
157
|
+
|
|
158
|
+
const local = createGerbil({
|
|
159
|
+
device: "gpu",
|
|
160
|
+
dtype: "q4",
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
// Text generation
|
|
164
|
+
const { text } = await generateText({
|
|
165
|
+
model: local("qwen3-0.6b"),
|
|
166
|
+
prompt: "Hello",
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
// Speech
|
|
170
|
+
const speech = await generateSpeech({
|
|
171
|
+
model: local.speech(),
|
|
172
|
+
text: "Hello",
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Transcription
|
|
176
|
+
const transcript = await transcribe({
|
|
177
|
+
model: local.transcription(),
|
|
178
|
+
audio: audioData,
|
|
179
|
+
});
|
|
70
180
|
```
|
|
71
181
|
|
|
72
182
|
## Specification
|
|
73
183
|
|
|
74
|
-
Gerbil implements
|
|
184
|
+
Gerbil implements the following AI SDK v5 interfaces:
|
|
75
185
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
186
|
+
| Interface | Purpose | Method |
|
|
187
|
+
|-----------|---------|--------|
|
|
188
|
+
| `LanguageModelV2` | Text generation | `gerbil(modelId)` |
|
|
189
|
+
| `SpeechModelV2` | Text-to-Speech | `gerbil.speech()` |
|
|
190
|
+
| `TranscriptionModelV2` | Speech-to-Text | `gerbil.transcription()` |
|
|
79
191
|
|
|
192
|
+
All models support:
|
|
193
|
+
- `specificationVersion: "v2"`
|
|
194
|
+
- Proper warning reporting
|
|
195
|
+
- Request/response metadata
|
|
80
196
|
|
package/docs/browser.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Browser Usage
|
|
2
2
|
|
|
3
|
-
Run LLMs directly in the browser with WebGPU acceleration. No server required.
|
|
3
|
+
Run LLMs, TTS, and STT directly in the browser with WebGPU acceleration. No server required.
|
|
4
4
|
|
|
5
5
|
## Quick Start (React)
|
|
6
6
|
|
|
@@ -203,7 +203,7 @@ function App() {
|
|
|
203
203
|
const {
|
|
204
204
|
completion, // string - generated text
|
|
205
205
|
thinking, // string - thinking content
|
|
206
|
-
complete, // (prompt: string) => Promise<string>
|
|
206
|
+
complete, // (prompt: string, options?) => Promise<string>
|
|
207
207
|
isLoading, // boolean - model loading
|
|
208
208
|
loadingProgress, // { status, file?, progress? }
|
|
209
209
|
isGenerating, // boolean - generating
|
|
@@ -218,6 +218,244 @@ const {
|
|
|
218
218
|
});
|
|
219
219
|
```
|
|
220
220
|
|
|
221
|
+
#### Vision (Image Analysis)
|
|
222
|
+
|
|
223
|
+
Use `useCompletion` with a vision model to analyze images:
|
|
224
|
+
|
|
225
|
+
```tsx
|
|
226
|
+
import { useCompletion } from "@tryhamster/gerbil/browser";
|
|
227
|
+
|
|
228
|
+
function ImageAnalyzer() {
|
|
229
|
+
const { complete, completion, isLoading, isGenerating } = useCompletion({
|
|
230
|
+
model: "ministral-3b", // Vision model
|
|
231
|
+
maxTokens: 2048,
|
|
232
|
+
});
|
|
233
|
+
const [imageUrl, setImageUrl] = useState<string | null>(null);
|
|
234
|
+
|
|
235
|
+
const handleFile = (e: React.ChangeEvent<HTMLInputElement>) => {
|
|
236
|
+
const file = e.target.files?.[0];
|
|
237
|
+
if (file) {
|
|
238
|
+
const reader = new FileReader();
|
|
239
|
+
reader.onload = () => setImageUrl(reader.result as string);
|
|
240
|
+
reader.readAsDataURL(file);
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const analyze = () => {
|
|
245
|
+
if (imageUrl) {
|
|
246
|
+
// Pass images array in the second argument
|
|
247
|
+
complete("Describe this image in detail", { images: [imageUrl] });
|
|
248
|
+
}
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
if (isLoading) return <div>Loading vision model...</div>;
|
|
252
|
+
|
|
253
|
+
return (
|
|
254
|
+
<div>
|
|
255
|
+
<input type="file" accept="image/*" onChange={handleFile} />
|
|
256
|
+
{imageUrl && <img src={imageUrl} style={{ maxWidth: 300 }} />}
|
|
257
|
+
<button onClick={analyze} disabled={!imageUrl || isGenerating}>
|
|
258
|
+
Analyze Image
|
|
259
|
+
</button>
|
|
260
|
+
<p>{completion}</p>
|
|
261
|
+
</div>
|
|
262
|
+
);
|
|
263
|
+
}
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
Images can be:
|
|
267
|
+
- **Data URIs** (`data:image/png;base64,...`) — from FileReader or canvas
|
|
268
|
+
- **HTTP URLs** — external image links (must be CORS-accessible)
|
|
269
|
+
|
|
270
|
+
Both formats work:
|
|
271
|
+
```tsx
|
|
272
|
+
// Plain strings
|
|
273
|
+
complete("Describe", { images: ["https://example.com/photo.jpg"] });
|
|
274
|
+
|
|
275
|
+
// ImageInput objects (same as core Gerbil API)
|
|
276
|
+
complete("Describe", { images: [{ source: "https://example.com/photo.jpg" }] });
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
## Voice Hooks
|
|
280
|
+
|
|
281
|
+
### `useSpeech` (TTS)
|
|
282
|
+
|
|
283
|
+
Generate speech from text in the browser:
|
|
284
|
+
|
|
285
|
+
```tsx
|
|
286
|
+
import { useSpeech } from "@tryhamster/gerbil/browser";
|
|
287
|
+
|
|
288
|
+
function SpeechDemo() {
|
|
289
|
+
const { speak, stop, isSpeaking, isLoading, listVoices } = useSpeech();
|
|
290
|
+
|
|
291
|
+
if (isLoading) return <div>Loading TTS model...</div>;
|
|
292
|
+
|
|
293
|
+
return (
|
|
294
|
+
<div>
|
|
295
|
+
<button onClick={() => speak("Hello from the browser!")}>
|
|
296
|
+
{isSpeaking ? "Speaking..." : "Speak"}
|
|
297
|
+
</button>
|
|
298
|
+
{isSpeaking && <button onClick={stop}>Stop</button>}
|
|
299
|
+
</div>
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
#### API
|
|
305
|
+
|
|
306
|
+
```typescript
|
|
307
|
+
const {
|
|
308
|
+
speak, // (text: string, opts?) => Promise<void>
|
|
309
|
+
stop, // () => void
|
|
310
|
+
isSpeaking, // boolean
|
|
311
|
+
isLoading, // boolean
|
|
312
|
+
isReady, // boolean
|
|
313
|
+
listVoices, // () => VoiceInfo[]
|
|
314
|
+
currentVoice, // string
|
|
315
|
+
setVoice, // (id: string) => void
|
|
316
|
+
currentSpeed, // number
|
|
317
|
+
setSpeed, // (speed: number) => void
|
|
318
|
+
loadingProgress, // { status, file?, progress? }
|
|
319
|
+
error, // string | null
|
|
320
|
+
} = useSpeech({
|
|
321
|
+
voice: "af_heart", // Default voice
|
|
322
|
+
speed: 1.0, // Speed multiplier
|
|
323
|
+
autoLoad: false, // Loads on first speak()
|
|
324
|
+
});
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
📖 See [TTS docs](./tts.md) for voice list and options.
|
|
328
|
+
|
|
329
|
+
### `useVoiceInput` (STT)
|
|
330
|
+
|
|
331
|
+
Record and transcribe audio:
|
|
332
|
+
|
|
333
|
+
```tsx
|
|
334
|
+
import { useVoiceInput } from "@tryhamster/gerbil/browser";
|
|
335
|
+
|
|
336
|
+
function VoiceInput() {
|
|
337
|
+
const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
|
|
338
|
+
onTranscript: (text) => console.log("User said:", text),
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
return (
|
|
342
|
+
<button onClick={isRecording ? stopRecording : startRecording}>
|
|
343
|
+
{isRecording ? "🔴 Stop" : "🎤 Record"}
|
|
344
|
+
</button>
|
|
345
|
+
);
|
|
346
|
+
}
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
#### Streaming Transcription (Real-time)
|
|
350
|
+
|
|
351
|
+
Transcribe audio in chunks as the user speaks - perfect for live captioning or call transcription:
|
|
352
|
+
|
|
353
|
+
```tsx
|
|
354
|
+
function LiveTranscription() {
|
|
355
|
+
const {
|
|
356
|
+
startRecording,
|
|
357
|
+
stopRecording,
|
|
358
|
+
isRecording,
|
|
359
|
+
transcript, // Full accumulated transcript
|
|
360
|
+
streamingChunk, // Current chunk being transcribed
|
|
361
|
+
chunkCount, // Number of chunks processed
|
|
362
|
+
} = useVoiceInput({
|
|
363
|
+
streaming: true, // Enable streaming mode
|
|
364
|
+
chunkDuration: 3000, // Transcribe every 3 seconds
|
|
365
|
+
onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
return (
|
|
369
|
+
<div>
|
|
370
|
+
<button onClick={isRecording ? stopRecording : startRecording}>
|
|
371
|
+
{isRecording ? "Stop" : "Start Live Transcription"}
|
|
372
|
+
</button>
|
|
373
|
+
{streamingChunk && <p style={{ color: 'gray' }}>Current: {streamingChunk}</p>}
|
|
374
|
+
<p>Transcript: {transcript}</p>
|
|
375
|
+
</div>
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
#### API
|
|
381
|
+
|
|
382
|
+
```typescript
|
|
383
|
+
const {
|
|
384
|
+
startRecording, // () => Promise<void>
|
|
385
|
+
stopRecording, // () => Promise<string>
|
|
386
|
+
cancelRecording, // () => void
|
|
387
|
+
transcribe, // (audio: Float32Array) => Promise<string>
|
|
388
|
+
isRecording, // boolean
|
|
389
|
+
isTranscribing, // boolean
|
|
390
|
+
isLoading, // boolean
|
|
391
|
+
isReady, // boolean
|
|
392
|
+
transcript, // string - full transcript
|
|
393
|
+
streamingChunk, // string - current chunk (streaming mode)
|
|
394
|
+
chunkCount, // number - chunks processed (streaming mode)
|
|
395
|
+
loadingProgress, // { status, file?, progress? }
|
|
396
|
+
error, // string | null
|
|
397
|
+
} = useVoiceInput({
|
|
398
|
+
model: "whisper-tiny.en",
|
|
399
|
+
autoLoad: false,
|
|
400
|
+
onTranscript: (text) => {},
|
|
401
|
+
// Streaming options:
|
|
402
|
+
streaming: false, // Enable streaming mode
|
|
403
|
+
chunkDuration: 1500, // ms between transcriptions (default)
|
|
404
|
+
onChunk: (text, idx) => {}, // Called for each chunk
|
|
405
|
+
});
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
📖 See [STT docs](./stt.md) for model options.
|
|
409
|
+
|
|
410
|
+
### `useVoiceChat` (Full Voice Conversation)
|
|
411
|
+
|
|
412
|
+
Complete voice-to-voice: record → transcribe → LLM → speak:
|
|
413
|
+
|
|
414
|
+
```tsx
|
|
415
|
+
import { useVoiceChat } from "@tryhamster/gerbil/browser";
|
|
416
|
+
|
|
417
|
+
function VoiceAssistant() {
|
|
418
|
+
const {
|
|
419
|
+
messages,
|
|
420
|
+
startListening,
|
|
421
|
+
stopListening,
|
|
422
|
+
isListening,
|
|
423
|
+
isSpeaking,
|
|
424
|
+
stage, // "idle" | "listening" | "transcribing" | "thinking" | "speaking"
|
|
425
|
+
} = useVoiceChat({
|
|
426
|
+
llmModel: "qwen3-0.6b",
|
|
427
|
+
sttModel: "whisper-tiny.en",
|
|
428
|
+
voice: "af_bella",
|
|
429
|
+
system: "You are a helpful assistant.",
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
return (
|
|
433
|
+
<button
|
|
434
|
+
onMouseDown={startListening}
|
|
435
|
+
onMouseUp={stopListening}
|
|
436
|
+
>
|
|
437
|
+
{stage === "idle" ? "🎤 Hold to Speak" : stage}
|
|
438
|
+
</button>
|
|
439
|
+
);
|
|
440
|
+
}
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
### Audio Playback Utilities
|
|
444
|
+
|
|
445
|
+
```typescript
|
|
446
|
+
import { playAudio, createAudioPlayer } from "@tryhamster/gerbil/browser";
|
|
447
|
+
|
|
448
|
+
// One-shot playback
|
|
449
|
+
const controller = await playAudio(audioFloat32Array, 24000);
|
|
450
|
+
await controller.onEnded;
|
|
451
|
+
|
|
452
|
+
// Streaming playback
|
|
453
|
+
const player = createAudioPlayer(24000);
|
|
454
|
+
for await (const chunk of gerbil.speakStream("Long text...")) {
|
|
455
|
+
player.queue(chunk.samples);
|
|
456
|
+
}
|
|
457
|
+
```
|
|
458
|
+
|
|
221
459
|
## Low-Level API
|
|
222
460
|
|
|
223
461
|
For full control, use `createGerbilWorker` directly:
|
|
@@ -302,6 +540,7 @@ const info = await getWebGPUInfo();
|
|
|
302
540
|
| `qwen3-0.6b` | ~400MB | General use, thinking mode |
|
|
303
541
|
| `smollm2-360m` | ~250MB | Faster, smaller |
|
|
304
542
|
| `smollm2-135m` | ~100MB | Fastest, basic tasks |
|
|
543
|
+
| `ministral-3b` | ~2.5GB | **Vision** — image analysis |
|
|
305
544
|
|
|
306
545
|
Models are cached in IndexedDB after first download.
|
|
307
546
|
|
package/docs/memory.md
CHANGED
|
@@ -212,6 +212,72 @@ await gerbil.dispose();
|
|
|
212
212
|
// Closes Chrome page, releases memory
|
|
213
213
|
```
|
|
214
214
|
|
|
215
|
+
## Response Caching
|
|
216
|
+
|
|
217
|
+
Gerbil also supports caching inference **responses** (different from KV cache). This is useful for repeated prompts:
|
|
218
|
+
|
|
219
|
+
### Enable Response Caching
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
// First call: ~150ms (runs inference)
|
|
223
|
+
const result = await g.generate("What is 2+2?", { cache: true });
|
|
224
|
+
|
|
225
|
+
// Second call: ~0ms (returns from cache!)
|
|
226
|
+
const cached = await g.generate("What is 2+2?", { cache: true });
|
|
227
|
+
console.log(cached.cached); // true
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Custom TTL
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
// Cache for 10 minutes (default: 5 min)
|
|
234
|
+
await g.generate("prompt", {
|
|
235
|
+
cache: true,
|
|
236
|
+
cacheTtl: 10 * 60 * 1000
|
|
237
|
+
});
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Cache Statistics
|
|
241
|
+
|
|
242
|
+
```typescript
|
|
243
|
+
const stats = g.getResponseCacheStats();
|
|
244
|
+
console.log(stats);
|
|
245
|
+
// { hits: 1, misses: 1, size: 1, hitRate: 50 }
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Clear Response Cache
|
|
249
|
+
|
|
250
|
+
```typescript
|
|
251
|
+
// Clear all cached responses
|
|
252
|
+
g.clearResponseCache();
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### Cache Key
|
|
256
|
+
|
|
257
|
+
The cache key is a hash of:
|
|
258
|
+
- Prompt text
|
|
259
|
+
- Model ID
|
|
260
|
+
- maxTokens, temperature, topP, topK
|
|
261
|
+
- System prompt
|
|
262
|
+
- Thinking mode
|
|
263
|
+
|
|
264
|
+
Different parameters = different cache entries.
|
|
265
|
+
|
|
266
|
+
### Limitations
|
|
267
|
+
|
|
268
|
+
Response caching is **not supported** for:
|
|
269
|
+
- Streaming calls (`onToken` callback)
|
|
270
|
+
- Vision/image calls
|
|
271
|
+
|
|
272
|
+
### KV Cache vs Response Cache
|
|
273
|
+
|
|
274
|
+
| Feature | KV Cache | Response Cache |
|
|
275
|
+
|---------|----------|----------------|
|
|
276
|
+
| What's cached | Attention states | Full responses |
|
|
277
|
+
| Purpose | Conversation context | Repeated prompts |
|
|
278
|
+
| Clear method | `clearCache()` | `clearResponseCache()` |
|
|
279
|
+
| Default | Always on | Off (`cache: true` to enable) |
|
|
280
|
+
|
|
215
281
|
## Testing
|
|
216
282
|
|
|
217
283
|
Run the memory management test suite:
|
|
@@ -227,3 +293,9 @@ This verifies:
|
|
|
227
293
|
- Threshold-based cleanup works
|
|
228
294
|
- Proper cleanup on dispose
|
|
229
295
|
|
|
296
|
+
Run response caching test:
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
npx tsx examples/test-cache.ts
|
|
300
|
+
```
|
|
301
|
+
|