@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-S9s5-g0C.mjs +3 -0
  3. package/dist/browser/index.d.ts +1009 -0
  4. package/dist/browser/index.d.ts.map +1 -0
  5. package/dist/browser/index.js +2492 -0
  6. package/dist/browser/index.js.map +1 -0
  7. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
  8. package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
  9. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
  10. package/dist/cli.mjs +3359 -647
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -4
  14. package/dist/frameworks/express.mjs.map +1 -1
  15. package/dist/frameworks/fastify.d.mts +1 -1
  16. package/dist/frameworks/fastify.mjs +2 -3
  17. package/dist/frameworks/fastify.mjs.map +1 -1
  18. package/dist/frameworks/hono.d.mts +1 -1
  19. package/dist/frameworks/hono.mjs +2 -3
  20. package/dist/frameworks/hono.mjs.map +1 -1
  21. package/dist/frameworks/next.d.mts +2 -2
  22. package/dist/frameworks/next.mjs +2 -3
  23. package/dist/frameworks/next.mjs.map +1 -1
  24. package/dist/frameworks/react.d.mts +1 -1
  25. package/dist/frameworks/trpc.d.mts +1 -1
  26. package/dist/frameworks/trpc.mjs +2 -3
  27. package/dist/frameworks/trpc.mjs.map +1 -1
  28. package/dist/gerbil-DJGqq7BX.mjs +4 -0
  29. package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
  30. package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
  31. package/dist/gerbil-qOTe1nl2.d.mts +431 -0
  32. package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
  33. package/dist/index.d.mts +411 -9
  34. package/dist/index.d.mts.map +1 -1
  35. package/dist/index.mjs +7 -6
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/integrations/ai-sdk.d.mts +122 -4
  38. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  39. package/dist/integrations/ai-sdk.mjs +238 -11
  40. package/dist/integrations/ai-sdk.mjs.map +1 -1
  41. package/dist/integrations/langchain.d.mts +132 -2
  42. package/dist/integrations/langchain.d.mts.map +1 -1
  43. package/dist/integrations/langchain.mjs +175 -8
  44. package/dist/integrations/langchain.mjs.map +1 -1
  45. package/dist/integrations/llamaindex.d.mts +1 -1
  46. package/dist/integrations/llamaindex.mjs +2 -3
  47. package/dist/integrations/llamaindex.mjs.map +1 -1
  48. package/dist/integrations/mcp-client.mjs +4 -4
  49. package/dist/integrations/mcp-client.mjs.map +1 -1
  50. package/dist/integrations/mcp.d.mts +2 -2
  51. package/dist/integrations/mcp.d.mts.map +1 -1
  52. package/dist/integrations/mcp.mjs +5 -6
  53. package/dist/kokoro-BNTb6egA.mjs +20210 -0
  54. package/dist/kokoro-BNTb6egA.mjs.map +1 -0
  55. package/dist/kokoro-CMOGDSgT.js +20212 -0
  56. package/dist/kokoro-CMOGDSgT.js.map +1 -0
  57. package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
  58. package/dist/mcp-kzDDWIoS.mjs.map +1 -0
  59. package/dist/microphone-DaMZFRuR.mjs +3 -0
  60. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
  61. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
  62. package/dist/repl-DGUw4fCc.mjs +9 -0
  63. package/dist/skills/index.d.mts +305 -14
  64. package/dist/skills/index.d.mts.map +1 -1
  65. package/dist/skills/index.mjs +5 -6
  66. package/dist/skills-DulrOPeP.mjs +1435 -0
  67. package/dist/skills-DulrOPeP.mjs.map +1 -0
  68. package/dist/stt-1WIefHwc.mjs +3 -0
  69. package/dist/stt-CG_7KB_0.mjs +434 -0
  70. package/dist/stt-CG_7KB_0.mjs.map +1 -0
  71. package/dist/stt-Dne6SENv.js +434 -0
  72. package/dist/stt-Dne6SENv.js.map +1 -0
  73. package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
  74. package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
  75. package/dist/transformers.web-DiD1gTwk.js +44695 -0
  76. package/dist/transformers.web-DiD1gTwk.js.map +1 -0
  77. package/dist/transformers.web-u34VxRFM.js +3 -0
  78. package/dist/tts-B1pZMlDv.mjs +3 -0
  79. package/dist/tts-C2FzKuSx.js +725 -0
  80. package/dist/tts-C2FzKuSx.js.map +1 -0
  81. package/dist/tts-CyHhcLtN.mjs +731 -0
  82. package/dist/tts-CyHhcLtN.mjs.map +1 -0
  83. package/dist/types-CiTc7ez3.d.mts +353 -0
  84. package/dist/types-CiTc7ez3.d.mts.map +1 -0
  85. package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
  86. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
  87. package/docs/ai-sdk.md +137 -21
  88. package/docs/browser.md +241 -2
  89. package/docs/memory.md +72 -0
  90. package/docs/stt.md +494 -0
  91. package/docs/tts.md +569 -0
  92. package/docs/vision.md +396 -0
  93. package/package.json +21 -22
  94. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  95. package/dist/browser/index.d.mts +0 -262
  96. package/dist/browser/index.d.mts.map +0 -1
  97. package/dist/browser/index.mjs +0 -755
  98. package/dist/browser/index.mjs.map +0 -1
  99. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  100. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  101. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  102. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  103. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  104. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  105. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  106. package/dist/models-DKULvhOr.mjs +0 -136
  107. package/dist/models-DKULvhOr.mjs.map +0 -1
  108. package/dist/models-De2-_GmQ.d.mts +0 -22
  109. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  110. package/dist/skills-D3CEpgDc.mjs +0 -630
  111. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  112. package/dist/types-BS1N92Jt.d.mts +0 -183
  113. package/dist/types-BS1N92Jt.d.mts.map +0 -1
  114. /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
package/docs/ai-sdk.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Gerbil + AI SDK
2
2
 
3
- Gerbil works as a [Vercel AI SDK v5](https://sdk.vercel.ai/) provider.
3
+ Gerbil works as a [Vercel AI SDK v5](https://sdk.vercel.ai/) provider, supporting text generation, speech synthesis (TTS), and transcription (STT).
4
4
 
5
5
  ## Setup
6
6
 
@@ -9,7 +9,9 @@ import { generateText, streamText } from "ai";
9
9
  import { gerbil } from "@tryhamster/gerbil/ai";
10
10
  ```
11
11
 
12
- ## Generate Text
12
+ ## Text Generation
13
+
14
+ ### Generate Text
13
15
 
14
16
  ```typescript
15
17
  const { text } = await generateText({
@@ -18,7 +20,7 @@ const { text } = await generateText({
18
20
  });
19
21
  ```
20
22
 
21
- ## Stream Text
23
+ ### Stream Text
22
24
 
23
25
  ```typescript
24
26
  const stream = streamText({
@@ -31,7 +33,7 @@ for await (const chunk of stream.textStream) {
31
33
  }
32
34
  ```
33
35
 
34
- ## With System Prompt
36
+ ### With System Prompt
35
37
 
36
38
  ```typescript
37
39
  const { text } = await generateText({
@@ -41,40 +43,154 @@ const { text } = await generateText({
41
43
  });
42
44
  ```
43
45
 
44
- ## Model Settings
46
+ ### Thinking Mode
45
47
 
46
48
  ```typescript
47
49
  import { createGerbil } from "@tryhamster/gerbil/ai";
48
50
 
49
- // Create provider with defaults
50
- const local = createGerbil({
51
- device: "gpu",
52
- dtype: "q4",
53
- });
51
+ const local = createGerbil({ device: "gpu" });
54
52
 
55
- // Use with settings
56
53
  const { text } = await generateText({
57
54
  model: local("qwen3-0.6b", { thinking: true }),
58
55
  prompt: "What is 127 × 43?",
59
56
  });
60
57
  ```
61
58
 
62
- ## Available Options
59
+ ## Speech Generation (TTS)
60
+
61
+ Generate speech from text using Kokoro TTS:
62
+
63
+ ```typescript
64
+ import { experimental_generateSpeech as generateSpeech } from "ai";
65
+ import { gerbil } from "@tryhamster/gerbil/ai";
66
+
67
+ const result = await generateSpeech({
68
+ model: gerbil.speech(), // kokoro-82m by default
69
+ text: "Hello, welcome to Gerbil!",
70
+ voice: "af_heart", // Female American voice
71
+ });
72
+
73
+ // result.audio is a Uint8Array in WAV format
74
+ await writeFile("output.wav", result.audio);
75
+ ```
76
+
77
+ ### Available Voices
78
+
79
+ ```typescript
80
+ const voices = gerbil.listVoices();
81
+ // Returns: [{ id, name, gender, language }, ...]
82
+
83
+ // Example voices:
84
+ // - af_heart (Female, American)
85
+ // - bf_emma (Female, British)
86
+ // - am_fenrir (Male, American)
87
+ // - bm_daniel (Male, British)
88
+ ```
89
+
90
+ ### Speech Options
91
+
92
+ ```typescript
93
+ const result = await generateSpeech({
94
+ model: gerbil.speech("kokoro-82m", {
95
+ voice: "bf_emma", // Default voice
96
+ speed: 1.2, // Speed multiplier
97
+ }),
98
+ text: "Speak faster!",
99
+ });
100
+ ```
101
+
102
+ ## Transcription (STT)
103
+
104
+ Transcribe audio to text using Whisper:
105
+
106
+ ```typescript
107
+ import { experimental_transcribe as transcribe } from "ai";
108
+ import { gerbil } from "@tryhamster/gerbil/ai";
109
+ import { readFile } from "fs/promises";
110
+
111
+ const result = await transcribe({
112
+ model: gerbil.transcription(), // whisper-tiny.en by default
113
+ audio: await readFile("audio.wav"),
114
+ });
115
+
116
+ console.log(result.text); // "Hello world"
117
+ console.log(result.language); // "en"
118
+ console.log(result.durationInSeconds); // 2.5
119
+ console.log(result.segments); // Timestamped segments
120
+ ```
121
+
122
+ ### Available Models
63
123
 
64
124
  ```typescript
65
- gerbil(modelId, {
66
- thinking?: boolean, // Enable reasoning mode (Qwen3)
67
- device?: "auto" | "gpu" | "cpu",
68
- dtype?: "q4" | "q8" | "fp16" | "fp32",
69
- })
125
+ const models = gerbil.listTranscriptionModels();
126
+
127
+ // Models (smallest to largest):
128
+ // - whisper-tiny.en (39M, English only, fastest)
129
+ // - whisper-tiny (39M, multilingual)
130
+ // - whisper-base.en (74M, English only)
131
+ // - whisper-base (74M, multilingual)
132
+ // - whisper-small.en (244M, English only)
133
+ // - whisper-small (244M, multilingual)
134
+ // - whisper-large-v3-turbo (809M, 80+ languages, best quality)
135
+ ```
136
+
137
+ ### Larger Models
138
+
139
+ ```typescript
140
+ // Use a larger model for better accuracy
141
+ const result = await transcribe({
142
+ model: gerbil.transcription("whisper-base"),
143
+ audio: audioBuffer,
144
+ });
145
+
146
+ // Use multilingual model with language hint
147
+ const result = await transcribe({
148
+ model: gerbil.transcription("whisper-small", { language: "es" }),
149
+ audio: spanishAudio,
150
+ });
151
+ ```
152
+
153
+ ## Custom Provider
154
+
155
+ ```typescript
156
+ import { createGerbil } from "@tryhamster/gerbil/ai";
157
+
158
+ const local = createGerbil({
159
+ device: "gpu",
160
+ dtype: "q4",
161
+ });
162
+
163
+ // Text generation
164
+ const { text } = await generateText({
165
+ model: local("qwen3-0.6b"),
166
+ prompt: "Hello",
167
+ });
168
+
169
+ // Speech
170
+ const speech = await generateSpeech({
171
+ model: local.speech(),
172
+ text: "Hello",
173
+ });
174
+
175
+ // Transcription
176
+ const transcript = await transcribe({
177
+ model: local.transcription(),
178
+ audio: audioData,
179
+ });
70
180
  ```
71
181
 
72
182
  ## Specification
73
183
 
74
- Gerbil implements `LanguageModelV2` from `@ai-sdk/provider`:
184
+ Gerbil implements the following AI SDK v5 interfaces:
75
185
 
76
- - `specificationVersion: "v2"`
77
- - Streaming with `text-start`, `text-delta`, `text-end`, `finish` events
78
- - Reasoning content type for thinking mode
186
+ | Interface | Purpose | Method |
187
+ |-----------|---------|--------|
188
+ | `LanguageModelV2` | Text generation | `gerbil(modelId)` |
189
+ | `SpeechModelV2` | Text-to-Speech | `gerbil.speech()` |
190
+ | `TranscriptionModelV2` | Speech-to-Text | `gerbil.transcription()` |
79
191
 
192
+ All models support:
193
+ - `specificationVersion: "v2"`
194
+ - Proper warning reporting
195
+ - Request/response metadata
80
196
 
package/docs/browser.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Browser Usage
2
2
 
3
- Run LLMs directly in the browser with WebGPU acceleration. No server required.
3
+ Run LLMs, TTS, and STT directly in the browser with WebGPU acceleration. No server required.
4
4
 
5
5
  ## Quick Start (React)
6
6
 
@@ -203,7 +203,7 @@ function App() {
203
203
  const {
204
204
  completion, // string - generated text
205
205
  thinking, // string - thinking content
206
- complete, // (prompt: string) => Promise<string>
206
+ complete, // (prompt: string, options?) => Promise<string>
207
207
  isLoading, // boolean - model loading
208
208
  loadingProgress, // { status, file?, progress? }
209
209
  isGenerating, // boolean - generating
@@ -218,6 +218,244 @@ const {
218
218
  });
219
219
  ```
220
220
 
221
+ #### Vision (Image Analysis)
222
+
223
+ Use `useCompletion` with a vision model to analyze images:
224
+
225
+ ```tsx
226
+ import { useCompletion } from "@tryhamster/gerbil/browser";
227
+
228
+ function ImageAnalyzer() {
229
+ const { complete, completion, isLoading, isGenerating } = useCompletion({
230
+ model: "ministral-3b", // Vision model
231
+ maxTokens: 2048,
232
+ });
233
+ const [imageUrl, setImageUrl] = useState<string | null>(null);
234
+
235
+ const handleFile = (e: React.ChangeEvent<HTMLInputElement>) => {
236
+ const file = e.target.files?.[0];
237
+ if (file) {
238
+ const reader = new FileReader();
239
+ reader.onload = () => setImageUrl(reader.result as string);
240
+ reader.readAsDataURL(file);
241
+ }
242
+ };
243
+
244
+ const analyze = () => {
245
+ if (imageUrl) {
246
+ // Pass images array in the second argument
247
+ complete("Describe this image in detail", { images: [imageUrl] });
248
+ }
249
+ };
250
+
251
+ if (isLoading) return <div>Loading vision model...</div>;
252
+
253
+ return (
254
+ <div>
255
+ <input type="file" accept="image/*" onChange={handleFile} />
256
+ {imageUrl && <img src={imageUrl} style={{ maxWidth: 300 }} />}
257
+ <button onClick={analyze} disabled={!imageUrl || isGenerating}>
258
+ Analyze Image
259
+ </button>
260
+ <p>{completion}</p>
261
+ </div>
262
+ );
263
+ }
264
+ ```
265
+
266
+ Images can be:
267
+ - **Data URIs** (`data:image/png;base64,...`) — from FileReader or canvas
268
+ - **HTTP URLs** — external image links (must be CORS-accessible)
269
+
270
+ Both formats work:
271
+ ```tsx
272
+ // Plain strings
273
+ complete("Describe", { images: ["https://example.com/photo.jpg"] });
274
+
275
+ // ImageInput objects (same as core Gerbil API)
276
+ complete("Describe", { images: [{ source: "https://example.com/photo.jpg" }] });
277
+ ```
278
+
279
+ ## Voice Hooks
280
+
281
+ ### `useSpeech` (TTS)
282
+
283
+ Generate speech from text in the browser:
284
+
285
+ ```tsx
286
+ import { useSpeech } from "@tryhamster/gerbil/browser";
287
+
288
+ function SpeechDemo() {
289
+ const { speak, stop, isSpeaking, isLoading, listVoices } = useSpeech();
290
+
291
+ if (isLoading) return <div>Loading TTS model...</div>;
292
+
293
+ return (
294
+ <div>
295
+ <button onClick={() => speak("Hello from the browser!")}>
296
+ {isSpeaking ? "Speaking..." : "Speak"}
297
+ </button>
298
+ {isSpeaking && <button onClick={stop}>Stop</button>}
299
+ </div>
300
+ );
301
+ }
302
+ ```
303
+
304
+ #### API
305
+
306
+ ```typescript
307
+ const {
308
+ speak, // (text: string, opts?) => Promise<void>
309
+ stop, // () => void
310
+ isSpeaking, // boolean
311
+ isLoading, // boolean
312
+ isReady, // boolean
313
+ listVoices, // () => VoiceInfo[]
314
+ currentVoice, // string
315
+ setVoice, // (id: string) => void
316
+ currentSpeed, // number
317
+ setSpeed, // (speed: number) => void
318
+ loadingProgress, // { status, file?, progress? }
319
+ error, // string | null
320
+ } = useSpeech({
321
+ voice: "af_heart", // Default voice
322
+ speed: 1.0, // Speed multiplier
323
+ autoLoad: false, // Loads on first speak()
324
+ });
325
+ ```
326
+
327
+ 📖 See [TTS docs](./tts.md) for voice list and options.
328
+
329
+ ### `useVoiceInput` (STT)
330
+
331
+ Record and transcribe audio:
332
+
333
+ ```tsx
334
+ import { useVoiceInput } from "@tryhamster/gerbil/browser";
335
+
336
+ function VoiceInput() {
337
+ const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
338
+ onTranscript: (text) => console.log("User said:", text),
339
+ });
340
+
341
+ return (
342
+ <button onClick={isRecording ? stopRecording : startRecording}>
343
+ {isRecording ? "🔴 Stop" : "🎤 Record"}
344
+ </button>
345
+ );
346
+ }
347
+ ```
348
+
349
+ #### Streaming Transcription (Real-time)
350
+
351
+ Transcribe audio in chunks as the user speaks - perfect for live captioning or call transcription:
352
+
353
+ ```tsx
354
+ function LiveTranscription() {
355
+ const {
356
+ startRecording,
357
+ stopRecording,
358
+ isRecording,
359
+ transcript, // Full accumulated transcript
360
+ streamingChunk, // Current chunk being transcribed
361
+ chunkCount, // Number of chunks processed
362
+ } = useVoiceInput({
363
+ streaming: true, // Enable streaming mode
364
+ chunkDuration: 3000, // Transcribe every 3 seconds
365
+ onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
366
+ });
367
+
368
+ return (
369
+ <div>
370
+ <button onClick={isRecording ? stopRecording : startRecording}>
371
+ {isRecording ? "Stop" : "Start Live Transcription"}
372
+ </button>
373
+ {streamingChunk && <p style={{ color: 'gray' }}>Current: {streamingChunk}</p>}
374
+ <p>Transcript: {transcript}</p>
375
+ </div>
376
+ );
377
+ }
378
+ ```
379
+
380
+ #### API
381
+
382
+ ```typescript
383
+ const {
384
+ startRecording, // () => Promise<void>
385
+ stopRecording, // () => Promise<string>
386
+ cancelRecording, // () => void
387
+ transcribe, // (audio: Float32Array) => Promise<string>
388
+ isRecording, // boolean
389
+ isTranscribing, // boolean
390
+ isLoading, // boolean
391
+ isReady, // boolean
392
+ transcript, // string - full transcript
393
+ streamingChunk, // string - current chunk (streaming mode)
394
+ chunkCount, // number - chunks processed (streaming mode)
395
+ loadingProgress, // { status, file?, progress? }
396
+ error, // string | null
397
+ } = useVoiceInput({
398
+ model: "whisper-tiny.en",
399
+ autoLoad: false,
400
+ onTranscript: (text) => {},
401
+ // Streaming options:
402
+ streaming: false, // Enable streaming mode
403
+ chunkDuration: 1500, // ms between transcriptions (default)
404
+ onChunk: (text, idx) => {}, // Called for each chunk
405
+ });
406
+ ```
407
+
408
+ 📖 See [STT docs](./stt.md) for model options.
409
+
410
+ ### `useVoiceChat` (Full Voice Conversation)
411
+
412
+ Complete voice-to-voice: record → transcribe → LLM → speak:
413
+
414
+ ```tsx
415
+ import { useVoiceChat } from "@tryhamster/gerbil/browser";
416
+
417
+ function VoiceAssistant() {
418
+ const {
419
+ messages,
420
+ startListening,
421
+ stopListening,
422
+ isListening,
423
+ isSpeaking,
424
+ stage, // "idle" | "listening" | "transcribing" | "thinking" | "speaking"
425
+ } = useVoiceChat({
426
+ llmModel: "qwen3-0.6b",
427
+ sttModel: "whisper-tiny.en",
428
+ voice: "af_bella",
429
+ system: "You are a helpful assistant.",
430
+ });
431
+
432
+ return (
433
+ <button
434
+ onMouseDown={startListening}
435
+ onMouseUp={stopListening}
436
+ >
437
+ {stage === "idle" ? "🎤 Hold to Speak" : stage}
438
+ </button>
439
+ );
440
+ }
441
+ ```
442
+
443
+ ### Audio Playback Utilities
444
+
445
+ ```typescript
446
+ import { playAudio, createAudioPlayer } from "@tryhamster/gerbil/browser";
447
+
448
+ // One-shot playback
449
+ const controller = await playAudio(audioFloat32Array, 24000);
450
+ await controller.onEnded;
451
+
452
+ // Streaming playback
453
+ const player = createAudioPlayer(24000);
454
+ for await (const chunk of gerbil.speakStream("Long text...")) {
455
+ player.queue(chunk.samples);
456
+ }
457
+ ```
458
+
221
459
  ## Low-Level API
222
460
 
223
461
  For full control, use `createGerbilWorker` directly:
@@ -302,6 +540,7 @@ const info = await getWebGPUInfo();
302
540
  | `qwen3-0.6b` | ~400MB | General use, thinking mode |
303
541
  | `smollm2-360m` | ~250MB | Faster, smaller |
304
542
  | `smollm2-135m` | ~100MB | Fastest, basic tasks |
543
+ | `ministral-3b` | ~2.5GB | **Vision** — image analysis |
305
544
 
306
545
  Models are cached in IndexedDB after first download.
307
546
 
package/docs/memory.md CHANGED
@@ -212,6 +212,72 @@ await gerbil.dispose();
212
212
  // Closes Chrome page, releases memory
213
213
  ```
214
214
 
215
+ ## Response Caching
216
+
217
+ Gerbil also supports caching inference **responses** (different from KV cache). This is useful for repeated prompts:
218
+
219
+ ### Enable Response Caching
220
+
221
+ ```typescript
222
+ // First call: ~150ms (runs inference)
223
+ const result = await g.generate("What is 2+2?", { cache: true });
224
+
225
+ // Second call: ~0ms (returns from cache!)
226
+ const cached = await g.generate("What is 2+2?", { cache: true });
227
+ console.log(cached.cached); // true
228
+ ```
229
+
230
+ ### Custom TTL
231
+
232
+ ```typescript
233
+ // Cache for 10 minutes (default: 5 min)
234
+ await g.generate("prompt", {
235
+ cache: true,
236
+ cacheTtl: 10 * 60 * 1000
237
+ });
238
+ ```
239
+
240
+ ### Cache Statistics
241
+
242
+ ```typescript
243
+ const stats = g.getResponseCacheStats();
244
+ console.log(stats);
245
+ // { hits: 1, misses: 1, size: 1, hitRate: 50 }
246
+ ```
247
+
248
+ ### Clear Response Cache
249
+
250
+ ```typescript
251
+ // Clear all cached responses
252
+ g.clearResponseCache();
253
+ ```
254
+
255
+ ### Cache Key
256
+
257
+ The cache key is a hash of:
258
+ - Prompt text
259
+ - Model ID
260
+ - maxTokens, temperature, topP, topK
261
+ - System prompt
262
+ - Thinking mode
263
+
264
+ Different parameters = different cache entries.
265
+
266
+ ### Limitations
267
+
268
+ Response caching is **not supported** for:
269
+ - Streaming calls (`onToken` callback)
270
+ - Vision/image calls
271
+
272
+ ### KV Cache vs Response Cache
273
+
274
+ | Feature | KV Cache | Response Cache |
275
+ |---------|----------|----------------|
276
+ | What's cached | Attention states | Full responses |
277
+ | Purpose | Conversation context | Repeated prompts |
278
+ | Clear method | `clearCache()` | `clearResponseCache()` |
279
+ | Default | Always on | Off (`cache: true` to enable) |
280
+
215
281
  ## Testing
216
282
 
217
283
  Run the memory management test suite:
@@ -227,3 +293,9 @@ This verifies:
227
293
  - Threshold-based cleanup works
228
294
  - Proper cleanup on dispose
229
295
 
296
+ Run response caching test:
297
+
298
+ ```bash
299
+ npx tsx examples/test-cache.ts
300
+ ```
301
+