@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -14
- package/dist/auto-update-DsWBBnEk.mjs +3 -0
- package/dist/browser/index.d.mts +401 -5
- package/dist/browser/index.d.mts.map +1 -1
- package/dist/browser/index.mjs +1772 -146
- package/dist/browser/index.mjs.map +1 -1
- package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-JEPeM2YE.mjs} +1 -1
- package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-Y9F7W5VQ.mjs} +514 -73
- package/dist/chrome-backend-Y9F7W5VQ.mjs.map +1 -0
- package/dist/cli.mjs +3359 -646
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +3 -3
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +3 -3
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +3 -3
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +3 -3
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +3 -3
- package/dist/gerbil-DeQlX_Mt.mjs +5 -0
- package/dist/gerbil-POAz8peb.d.mts +431 -0
- package/dist/gerbil-POAz8peb.d.mts.map +1 -0
- package/dist/gerbil-yoSpRHgv.mjs +1463 -0
- package/dist/gerbil-yoSpRHgv.mjs.map +1 -0
- package/dist/index.d.mts +395 -9
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +8 -6
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +122 -4
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +239 -11
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +132 -2
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +176 -8
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +3 -3
- package/dist/integrations/mcp-client.mjs +4 -4
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +6 -6
- package/dist/{mcp-R8kRLIKb.mjs → mcp-Bitg4sjX.mjs} +10 -37
- package/dist/mcp-Bitg4sjX.mjs.map +1 -0
- package/dist/microphone-D-6y9aiE.mjs +3 -0
- package/dist/{models-DKULvhOr.mjs → models-BAtL8qsA.mjs} +42 -7
- package/dist/models-BAtL8qsA.mjs.map +1 -0
- package/dist/{models-De2-_GmQ.d.mts → models-CE0fBq0U.d.mts} +2 -2
- package/dist/models-CE0fBq0U.d.mts.map +1 -0
- package/dist/{one-liner-BUQR0nqq.mjs → one-liner-B1rmFto6.mjs} +2 -2
- package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-B1rmFto6.mjs.map} +1 -1
- package/dist/repl-D20JO260.mjs +10 -0
- package/dist/skills/index.d.mts +303 -12
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +6 -6
- package/dist/skills-5DxAV-rn.mjs +1435 -0
- package/dist/skills-5DxAV-rn.mjs.map +1 -0
- package/dist/stt-Bv_dum-R.mjs +433 -0
- package/dist/stt-Bv_dum-R.mjs.map +1 -0
- package/dist/stt-KzSoNvwI.mjs +3 -0
- package/dist/{tools-BsiEE6f2.mjs → tools-IYPrqoek.mjs} +6 -7
- package/dist/{tools-BsiEE6f2.mjs.map → tools-IYPrqoek.mjs.map} +1 -1
- package/dist/tts-5yWeP_I0.mjs +3 -0
- package/dist/tts-DG6denWG.mjs +729 -0
- package/dist/tts-DG6denWG.mjs.map +1 -0
- package/dist/types-s6Py2_DL.d.mts +353 -0
- package/dist/types-s6Py2_DL.d.mts.map +1 -0
- package/dist/{utils-7vXqtq2Q.mjs → utils-CkB4Roi6.mjs} +1 -1
- package/dist/{utils-7vXqtq2Q.mjs.map → utils-CkB4Roi6.mjs.map} +1 -1
- package/docs/ai-sdk.md +137 -21
- package/docs/browser.md +241 -2
- package/docs/memory.md +72 -0
- package/docs/stt.md +494 -0
- package/docs/tts.md +569 -0
- package/docs/vision.md +396 -0
- package/package.json +17 -18
- package/dist/auto-update-BbNHbSU1.mjs +0 -3
- package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
- package/dist/gerbil-BfnsFWRE.mjs +0 -644
- package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
- package/dist/gerbil-BjW-z7Fq.mjs +0 -5
- package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
- package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
- package/dist/mcp-R8kRLIKb.mjs.map +0 -1
- package/dist/models-DKULvhOr.mjs.map +0 -1
- package/dist/models-De2-_GmQ.d.mts.map +0 -1
- package/dist/skills-D3CEpgDc.mjs +0 -630
- package/dist/skills-D3CEpgDc.mjs.map +0 -1
- package/dist/types-BS1N92Jt.d.mts +0 -183
- package/dist/types-BS1N92Jt.d.mts.map +0 -1
package/README.md
CHANGED
|
@@ -5,15 +5,15 @@
|
|
|
5
5
|
<h1 align="center">Gerbil</h1>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
|
-
<strong>Local
|
|
8
|
+
<strong>Local AI inference for Node.js. LLM, TTS, STT. GPU-accelerated. Zero config.</strong>
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
11
|
<p align="center">
|
|
12
12
|
<a href="#install">Install</a> •
|
|
13
13
|
<a href="#quick-start">Quick Start</a> •
|
|
14
|
-
<a href="#
|
|
14
|
+
<a href="#text-to-speech">TTS</a> •
|
|
15
|
+
<a href="#speech-to-text">STT</a> •
|
|
15
16
|
<a href="./docs/ai-sdk.md">AI SDK</a> •
|
|
16
|
-
<a href="./docs/frameworks.md">Frameworks</a> •
|
|
17
17
|
<a href="./docs/cli.md">CLI</a>
|
|
18
18
|
</p>
|
|
19
19
|
|
|
@@ -45,6 +45,7 @@ const text = await gerbil("Explain recursion in one sentence");
|
|
|
45
45
|
- **Zero Config** — `npx @tryhamster/gerbil "your prompt"` just works
|
|
46
46
|
- **Local & Private** — No API keys, no data leaves your machine
|
|
47
47
|
- **GPU Accelerated** — WebGPU with CPU fallback
|
|
48
|
+
- **Complete Audio** — Text-to-Speech (Kokoro) & Speech-to-Text (Whisper)
|
|
48
49
|
- **Framework Ready** — AI SDK v5, Next.js, Express, LangChain
|
|
49
50
|
- **Skills System** — Built-in + custom skills with Zod validation
|
|
50
51
|
- **Tool Calling** — Agentic capabilities with Qwen3 models
|
|
@@ -92,6 +93,55 @@ const data = await g.json("Extract: John, 32, NYC", {
|
|
|
92
93
|
});
|
|
93
94
|
```
|
|
94
95
|
|
|
96
|
+
## Text-to-Speech
|
|
97
|
+
|
|
98
|
+
Generate natural speech locally using Kokoro TTS (28 voices):
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
const result = await g.speak("Hello, I'm Gerbil!", { voice: "af_heart" });
|
|
102
|
+
// result.audio = Float32Array, result.sampleRate = 24000
|
|
103
|
+
|
|
104
|
+
// Stream long text
|
|
105
|
+
for await (const chunk of g.speakStream("Long paragraph...")) {
|
|
106
|
+
// Play each chunk as it's generated
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# CLI
|
|
112
|
+
gerbil speak "Hello world" --voice bf_emma
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
📖 **[Full TTS Documentation →](./docs/tts.md)**
|
|
116
|
+
|
|
117
|
+
## Speech-to-Text
|
|
118
|
+
|
|
119
|
+
Transcribe audio locally using Whisper (7 models, 80+ languages):
|
|
120
|
+
|
|
121
|
+
```typescript
|
|
122
|
+
import { readFileSync } from "fs";
|
|
123
|
+
|
|
124
|
+
const audio = new Uint8Array(readFileSync("recording.wav"));
|
|
125
|
+
const result = await g.transcribe(audio);
|
|
126
|
+
console.log(result.text);
|
|
127
|
+
|
|
128
|
+
// With timestamps
|
|
129
|
+
const result = await g.transcribe(audio, { timestamps: true });
|
|
130
|
+
for (const seg of result.segments) {
|
|
131
|
+
console.log(`[${seg.start}s] ${seg.text}`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Record from microphone
|
|
135
|
+
const result = await g.listen(5000); // 5 seconds
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# CLI
|
|
140
|
+
gerbil transcribe audio.wav --timestamps
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
📖 **[Full STT Documentation →](./docs/stt.md)**
|
|
144
|
+
|
|
95
145
|
## Skills
|
|
96
146
|
|
|
97
147
|
Built-in AI skills with Zod-validated inputs:
|
|
@@ -169,6 +219,8 @@ gerbil "Write a haiku" # Generate text
|
|
|
169
219
|
gerbil commit # Commit message from staged changes
|
|
170
220
|
gerbil summarize README.md # Summarize file
|
|
171
221
|
gerbil chat --thinking # Interactive chat
|
|
222
|
+
gerbil speak "Hello world" --voice af_heart # Text-to-speech
|
|
223
|
+
gerbil transcribe audio.wav # Speech-to-text
|
|
172
224
|
gerbil serve --mcp # MCP server for Claude/Cursor
|
|
173
225
|
gerbil update # Update to latest version
|
|
174
226
|
```
|
|
@@ -204,17 +256,19 @@ function Chat() {
|
|
|
204
256
|
|
|
205
257
|
| Integration | Import | Docs |
|
|
206
258
|
|-------------|--------|------|
|
|
207
|
-
| **Browser** | `@tryhamster/gerbil/browser` | [📖 Browser
|
|
208
|
-
| **AI SDK v5** | `@tryhamster/gerbil/ai` | [📖 AI SDK
|
|
259
|
+
| **Browser** | `@tryhamster/gerbil/browser` | [📖 Browser](./docs/browser.md) |
|
|
260
|
+
| **AI SDK v5** | `@tryhamster/gerbil/ai` | [📖 AI SDK](./docs/ai-sdk.md) |
|
|
209
261
|
| **Next.js** | `@tryhamster/gerbil/next` | [📖 Frameworks](./docs/frameworks.md) |
|
|
210
262
|
| **Express** | `@tryhamster/gerbil/express` | [📖 Frameworks](./docs/frameworks.md) |
|
|
211
|
-
| **React** | `@tryhamster/gerbil/react` | [📖 Frameworks](./docs/frameworks.md) |
|
|
212
263
|
| **LangChain** | `@tryhamster/gerbil/langchain` | [📖 Frameworks](./docs/frameworks.md) |
|
|
213
|
-
| **MCP Server** | `npx @tryhamster/gerbil serve --mcp` | [📖 MCP
|
|
214
|
-
|
|
264
|
+
| **MCP Server** | `npx @tryhamster/gerbil serve --mcp` | [📖 MCP](./docs/mcp.md) |
|
|
265
|
+
|
|
266
|
+
**Audio capabilities:** TTS and STT are built into the core `Gerbil` class, `@tryhamster/gerbil/browser` hooks, and `@tryhamster/gerbil/ai` provider.
|
|
215
267
|
|
|
216
268
|
## Models
|
|
217
269
|
|
|
270
|
+
### Language Models
|
|
271
|
+
|
|
218
272
|
| Model | Size | Best For |
|
|
219
273
|
|-------|------|----------|
|
|
220
274
|
| `qwen3-0.6b` | ~400MB | General use, reasoning (thinking mode) |
|
|
@@ -223,18 +277,29 @@ function Chat() {
|
|
|
223
277
|
|
|
224
278
|
Use any HuggingFace model: `npx @tryhamster/gerbil -m hf:org/model "prompt"`
|
|
225
279
|
|
|
280
|
+
### Audio Models
|
|
281
|
+
|
|
282
|
+
| Model | Type | Size | Notes |
|
|
283
|
+
|-------|------|------|-------|
|
|
284
|
+
| `kokoro-82m` | TTS | ~330MB | 28 voices, English |
|
|
285
|
+
| `whisper-tiny.en` | STT | 39MB | English, fastest |
|
|
286
|
+
| `whisper-base.en` | STT | 74MB | English, balanced |
|
|
287
|
+
| `whisper-small` | STT | 244MB | 80+ languages |
|
|
288
|
+
|
|
226
289
|
## Documentation
|
|
227
290
|
|
|
228
291
|
| Guide | Description |
|
|
229
292
|
|-------|-------------|
|
|
230
|
-
| [📖
|
|
231
|
-
| [📖
|
|
232
|
-
| [📖
|
|
293
|
+
| [📖 Text-to-Speech](./docs/tts.md) | Kokoro TTS, 28 voices, streaming audio |
|
|
294
|
+
| [📖 Speech-to-Text](./docs/stt.md) | Whisper STT, transcription, voice input |
|
|
295
|
+
| [📖 Browser](./docs/browser.md) | WebGPU inference, React hooks |
|
|
296
|
+
| [📖 Skills](./docs/skills.md) | Built-in skills, custom skill development |
|
|
297
|
+
| [📖 Tools](./docs/tools.md) | Tool calling, agentic workflows |
|
|
233
298
|
| [📖 REPL](./docs/repl.md) | Interactive terminal dashboard |
|
|
234
|
-
| [📖 AI SDK](./docs/ai-sdk.md) | Vercel AI SDK v5
|
|
235
|
-
| [📖 Frameworks](./docs/frameworks.md) | Next.js, Express, React,
|
|
299
|
+
| [📖 AI SDK](./docs/ai-sdk.md) | Vercel AI SDK v5 (LLM, TTS, STT) |
|
|
300
|
+
| [📖 Frameworks](./docs/frameworks.md) | Next.js, Express, React, LangChain |
|
|
236
301
|
| [📖 CLI](./docs/cli.md) | All CLI commands and options |
|
|
237
|
-
| [📖 MCP Server](./docs/mcp.md) | MCP server
|
|
302
|
+
| [📖 MCP Server](./docs/mcp.md) | MCP server for Claude Desktop & Cursor |
|
|
238
303
|
| [📖 MCP Client](./docs/mcp-client.md) | Connect to external MCP servers |
|
|
239
304
|
|
|
240
305
|
## Requirements
|
package/dist/browser/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { t as BUILTIN_MODELS } from "../models-
|
|
1
|
+
import { A as TranscribeSegment, C as SpeakResult, D as TTSModelConfig, E as SystemInfo, O as TranscribeOptions, S as SpeakOptions, T as StreamingTranscriptionSession, _ as ModelSource, a as FallbackConfig, b as STTModelConfig, c as GerbilConfig, d as ImageInput, f as JsonOptions, g as ModelConfig, h as LoadTTSOptions, i as EmbedResult, j as VoiceInfo, k as TranscribeResult, l as GerbilModelSettings, m as LoadSTTOptions, n as CacheConfig, o as GenerateOptions, p as LoadOptions, r as EmbedOptions, s as GenerateResult, t as AudioChunk, u as GerbilProviderSettings, v as ModelStats, w as StreamingTranscriptionOptions, x as SessionStats, y as ProgressInfo } from "../types-s6Py2_DL.mjs";
|
|
2
|
+
import { t as BUILTIN_MODELS } from "../models-CE0fBq0U.mjs";
|
|
3
3
|
|
|
4
4
|
//#region src/browser/index.d.ts
|
|
5
5
|
|
|
@@ -55,6 +55,13 @@ type GenerateStreamOptions = {
|
|
|
55
55
|
thinking?: boolean;
|
|
56
56
|
/** System prompt */
|
|
57
57
|
system?: string;
|
|
58
|
+
/** Image URLs or data URIs (for vision models) */
|
|
59
|
+
images?: string[];
|
|
60
|
+
/** Conversation history for multi-turn (includes all previous messages) */
|
|
61
|
+
history?: Array<{
|
|
62
|
+
role: "user" | "assistant" | "system";
|
|
63
|
+
content: string;
|
|
64
|
+
}>;
|
|
58
65
|
};
|
|
59
66
|
type GerbilWorker = {
|
|
60
67
|
/** Generate text with streaming */
|
|
@@ -81,6 +88,8 @@ type Message = {
|
|
|
81
88
|
role: "user" | "assistant";
|
|
82
89
|
content: string;
|
|
83
90
|
thinking?: string;
|
|
91
|
+
/** Attached images (URLs or data URIs) - for vision models */
|
|
92
|
+
images?: string[];
|
|
84
93
|
};
|
|
85
94
|
/** Loading progress state */
|
|
86
95
|
type LoadingProgress = {
|
|
@@ -146,6 +155,16 @@ type UseChatReturn = {
|
|
|
146
155
|
error: string | null;
|
|
147
156
|
/** Load the model (only needed if lazy: true) */
|
|
148
157
|
load: () => void;
|
|
158
|
+
/** Currently attached images (for next message) */
|
|
159
|
+
attachedImages: string[];
|
|
160
|
+
/** Attach an image to the next message */
|
|
161
|
+
attachImage: (imageUrl: string) => void;
|
|
162
|
+
/** Remove an attached image */
|
|
163
|
+
removeImage: (index: number) => void;
|
|
164
|
+
/** Clear all attached images */
|
|
165
|
+
clearImages: () => void;
|
|
166
|
+
/** Send message with specific images (convenience method) */
|
|
167
|
+
sendWithImages: (text: string, images: string[]) => void;
|
|
149
168
|
};
|
|
150
169
|
/**
|
|
151
170
|
* React hook for chat with local LLM
|
|
@@ -193,14 +212,19 @@ type UseCompletionOptions = {
|
|
|
193
212
|
/** Called on error */
|
|
194
213
|
onError?: (error: string) => void;
|
|
195
214
|
};
|
|
215
|
+
/** Options for single completion call */
|
|
216
|
+
type CompleteOptions = {
|
|
217
|
+
/** Image URLs or data URIs to analyze (for vision models) */
|
|
218
|
+
images?: string[];
|
|
219
|
+
};
|
|
196
220
|
/** Return type for useCompletion hook */
|
|
197
221
|
type UseCompletionReturn = {
|
|
198
222
|
/** Generated completion */
|
|
199
223
|
completion: string;
|
|
200
224
|
/** Thinking content (if enabled) */
|
|
201
225
|
thinking: string;
|
|
202
|
-
/** Generate completion */
|
|
203
|
-
complete: (prompt: string) => Promise<string>;
|
|
226
|
+
/** Generate completion (optionally with images for vision models) */
|
|
227
|
+
complete: (prompt: string, options?: CompleteOptions) => Promise<string>;
|
|
204
228
|
/** Whether model is loading */
|
|
205
229
|
isLoading: boolean;
|
|
206
230
|
/** Loading progress */
|
|
@@ -240,6 +264,376 @@ type UseCompletionReturn = {
|
|
|
240
264
|
* ```
|
|
241
265
|
*/
|
|
242
266
|
declare function useCompletion(options?: UseCompletionOptions): UseCompletionReturn;
|
|
267
|
+
/** TTS loading progress */
|
|
268
|
+
type TTSProgress = {
|
|
269
|
+
status: "idle" | "loading" | "downloading" | "ready" | "error";
|
|
270
|
+
message?: string;
|
|
271
|
+
file?: string;
|
|
272
|
+
progress?: number;
|
|
273
|
+
error?: string;
|
|
274
|
+
};
|
|
275
|
+
/** Available TTS models */
|
|
276
|
+
type TTSModelId = "kokoro-82m" | "supertonic-66m";
|
|
277
|
+
/** Voice info for TTS models */
|
|
278
|
+
type BrowserVoiceInfo = {
|
|
279
|
+
id: string;
|
|
280
|
+
name: string;
|
|
281
|
+
gender: "male" | "female";
|
|
282
|
+
language: string;
|
|
283
|
+
description: string;
|
|
284
|
+
};
|
|
285
|
+
/** Options for useSpeech hook */
|
|
286
|
+
type UseSpeechOptions = {
|
|
287
|
+
/** TTS model to use (default: "kokoro-82m") */
|
|
288
|
+
model?: TTSModelId;
|
|
289
|
+
/** Default voice ID (default: model's default voice) */
|
|
290
|
+
voice?: string;
|
|
291
|
+
/** Speech speed multiplier (default: 1.0) */
|
|
292
|
+
speed?: number;
|
|
293
|
+
/** Auto-load TTS model on mount (default: false) */
|
|
294
|
+
autoLoad?: boolean;
|
|
295
|
+
/** Called when model is ready */
|
|
296
|
+
onReady?: () => void;
|
|
297
|
+
/** Called on error */
|
|
298
|
+
onError?: (error: string) => void;
|
|
299
|
+
/** Called when speech starts */
|
|
300
|
+
onStart?: () => void;
|
|
301
|
+
/** Called when speech ends */
|
|
302
|
+
onEnd?: () => void;
|
|
303
|
+
};
|
|
304
|
+
/** Return type for useSpeech hook */
|
|
305
|
+
type UseSpeechReturn = {
|
|
306
|
+
/** Speak text aloud */
|
|
307
|
+
speak: (text: string, options?: {
|
|
308
|
+
voice?: string;
|
|
309
|
+
speed?: number;
|
|
310
|
+
}) => Promise<void>;
|
|
311
|
+
/** Stop current speech */
|
|
312
|
+
stop: () => void;
|
|
313
|
+
/** Whether TTS model is loading */
|
|
314
|
+
isLoading: boolean;
|
|
315
|
+
/** Loading progress */
|
|
316
|
+
loadingProgress: TTSProgress | null;
|
|
317
|
+
/** Whether currently speaking */
|
|
318
|
+
isSpeaking: boolean;
|
|
319
|
+
/** Whether TTS model is ready */
|
|
320
|
+
isReady: boolean;
|
|
321
|
+
/** Load the TTS model */
|
|
322
|
+
load: () => void;
|
|
323
|
+
/** Error message if any */
|
|
324
|
+
error: string | null;
|
|
325
|
+
/** List available voices for current model */
|
|
326
|
+
listVoices: () => BrowserVoiceInfo[];
|
|
327
|
+
/** Current voice ID */
|
|
328
|
+
currentVoice: string;
|
|
329
|
+
/** Set current voice */
|
|
330
|
+
setVoice: (voiceId: string) => void;
|
|
331
|
+
/** Current speed */
|
|
332
|
+
currentSpeed: number;
|
|
333
|
+
/** Set speed */
|
|
334
|
+
setSpeed: (speed: number) => void;
|
|
335
|
+
/** Current TTS model ID */
|
|
336
|
+
currentModel: TTSModelId;
|
|
337
|
+
/** Sample rate for current model (24000 for Kokoro, 44100 for Supertonic) */
|
|
338
|
+
sampleRate: number;
|
|
339
|
+
};
|
|
340
|
+
/**
|
|
341
|
+
* React hook for text-to-speech with Web Audio API playback
|
|
342
|
+
*
|
|
343
|
+
* Supports both Kokoro (24kHz, high quality) and Supertonic (44.1kHz, faster).
|
|
344
|
+
*
|
|
345
|
+
* @example
|
|
346
|
+
* ```tsx
|
|
347
|
+
* import { useSpeech } from "@tryhamster/gerbil/browser";
|
|
348
|
+
*
|
|
349
|
+
* function App() {
|
|
350
|
+
* // Default: Kokoro TTS
|
|
351
|
+
* const { speak, stop, isLoading, isSpeaking, listVoices, setVoice } = useSpeech();
|
|
352
|
+
*
|
|
353
|
+
* // Or use Supertonic (44.1kHz, faster)
|
|
354
|
+
* // const { speak, listVoices } = useSpeech({ model: "supertonic-66m" });
|
|
355
|
+
*
|
|
356
|
+
* if (isLoading) return <div>Loading TTS...</div>;
|
|
357
|
+
*
|
|
358
|
+
* return (
|
|
359
|
+
* <div>
|
|
360
|
+
* <select onChange={e => setVoice(e.target.value)}>
|
|
361
|
+
* {listVoices().map(v => (
|
|
362
|
+
* <option key={v.id} value={v.id}>{v.name}</option>
|
|
363
|
+
* ))}
|
|
364
|
+
* </select>
|
|
365
|
+
* <button onClick={() => speak("Hello world!")}>
|
|
366
|
+
* {isSpeaking ? "Speaking..." : "Speak"}
|
|
367
|
+
* </button>
|
|
368
|
+
* {isSpeaking && <button onClick={stop}>Stop</button>}
|
|
369
|
+
* </div>
|
|
370
|
+
* );
|
|
371
|
+
* }
|
|
372
|
+
* ```
|
|
373
|
+
*/
|
|
374
|
+
declare function useSpeech(options?: UseSpeechOptions): UseSpeechReturn;
|
|
375
|
+
/**
|
|
376
|
+
* Play audio from Float32Array using Web Audio API
|
|
377
|
+
*
|
|
378
|
+
* @example
|
|
379
|
+
* ```ts
|
|
380
|
+
* import { playAudio } from "@tryhamster/gerbil/browser";
|
|
381
|
+
*
|
|
382
|
+
* const audio = new Float32Array([...]); // TTS output
|
|
383
|
+
* const controller = await playAudio(audio, 24000);
|
|
384
|
+
*
|
|
385
|
+
* // Stop playback
|
|
386
|
+
* controller.stop();
|
|
387
|
+
* ```
|
|
388
|
+
*/
|
|
389
|
+
declare function playAudio(audio: Float32Array, sampleRate?: number): Promise<{
|
|
390
|
+
stop: () => void;
|
|
391
|
+
onEnded: Promise<void>;
|
|
392
|
+
}>;
|
|
393
|
+
/**
|
|
394
|
+
* Create a reusable audio player for streaming TTS
|
|
395
|
+
*
|
|
396
|
+
* @example
|
|
397
|
+
* ```ts
|
|
398
|
+
* import { createAudioPlayer } from "@tryhamster/gerbil/browser";
|
|
399
|
+
*
|
|
400
|
+
* const player = createAudioPlayer(24000);
|
|
401
|
+
*
|
|
402
|
+
* // Queue audio chunks as they arrive
|
|
403
|
+
* player.queue(chunk1);
|
|
404
|
+
* player.queue(chunk2);
|
|
405
|
+
*
|
|
406
|
+
* // Stop and clear
|
|
407
|
+
* player.stop();
|
|
408
|
+
* ```
|
|
409
|
+
*/
|
|
410
|
+
declare function createAudioPlayer(sampleRate?: number): {
|
|
411
|
+
queue: (audio: Float32Array) => void;
|
|
412
|
+
stop: () => void;
|
|
413
|
+
isPlaying: () => boolean;
|
|
414
|
+
};
|
|
415
|
+
/**
|
|
416
|
+
* Progress info for STT loading
|
|
417
|
+
*/
|
|
418
|
+
type STTProgress = {
|
|
419
|
+
status: "downloading" | "loading" | "ready" | "error";
|
|
420
|
+
message?: string;
|
|
421
|
+
progress?: number;
|
|
422
|
+
file?: string;
|
|
423
|
+
};
|
|
424
|
+
/**
|
|
425
|
+
* Options for useVoiceInput hook
|
|
426
|
+
*/
|
|
427
|
+
type UseVoiceInputOptions = {
|
|
428
|
+
/** STT model ID (default: whisper-tiny.en) */
|
|
429
|
+
model?: string;
|
|
430
|
+
/** Auto-load model on mount (default: false) */
|
|
431
|
+
autoLoad?: boolean;
|
|
432
|
+
/** Callback when model is ready */
|
|
433
|
+
onReady?: () => void;
|
|
434
|
+
/** Callback when transcription completes (or for each chunk in streaming mode) */
|
|
435
|
+
onTranscript?: (text: string) => void;
|
|
436
|
+
/** Callback on error */
|
|
437
|
+
onError?: (error: string) => void;
|
|
438
|
+
/** Callback during loading */
|
|
439
|
+
onProgress?: (progress: STTProgress) => void;
|
|
440
|
+
/** Enable streaming transcription - transcribes audio in chunks as you speak */
|
|
441
|
+
streaming?: boolean;
|
|
442
|
+
/** Chunk duration in ms for streaming mode (default: 3000 = 3 seconds) */
|
|
443
|
+
chunkDuration?: number;
|
|
444
|
+
/** Callback for each streaming chunk with partial transcript */
|
|
445
|
+
onChunk?: (text: string, chunkIndex: number) => void;
|
|
446
|
+
};
|
|
447
|
+
/**
|
|
448
|
+
* Return type for useVoiceInput hook
|
|
449
|
+
*/
|
|
450
|
+
type UseVoiceInputReturn = {
|
|
451
|
+
/** Start recording audio */
|
|
452
|
+
startRecording: () => Promise<void>;
|
|
453
|
+
/** Stop recording and transcribe */
|
|
454
|
+
stopRecording: () => Promise<string>;
|
|
455
|
+
/** Cancel recording without transcribing */
|
|
456
|
+
cancelRecording: () => void;
|
|
457
|
+
/** Transcribe raw audio data (Float32Array at 16kHz) */
|
|
458
|
+
transcribe: (audio: Float32Array) => Promise<string>;
|
|
459
|
+
/** Whether currently recording */
|
|
460
|
+
isRecording: boolean;
|
|
461
|
+
/** Whether transcribing */
|
|
462
|
+
isTranscribing: boolean;
|
|
463
|
+
/** Whether model is loading */
|
|
464
|
+
isLoading: boolean;
|
|
465
|
+
/** Whether model is ready */
|
|
466
|
+
isReady: boolean;
|
|
467
|
+
/** Latest transcription result (full transcript in streaming mode) */
|
|
468
|
+
transcript: string;
|
|
469
|
+
/** Current streaming chunk being transcribed (streaming mode only) */
|
|
470
|
+
streamingChunk: string;
|
|
471
|
+
/** Number of chunks transcribed so far (streaming mode only) */
|
|
472
|
+
chunkCount: number;
|
|
473
|
+
/** Loading progress */
|
|
474
|
+
loadingProgress: STTProgress | null;
|
|
475
|
+
/** Error message */
|
|
476
|
+
error: string | null;
|
|
477
|
+
/** Manually load the model */
|
|
478
|
+
load: () => void;
|
|
479
|
+
};
|
|
480
|
+
/**
|
|
481
|
+
* React hook for voice input with browser microphone
|
|
482
|
+
*
|
|
483
|
+
* Uses MediaRecorder to capture audio and Whisper for transcription.
|
|
484
|
+
* Supports both one-shot and streaming transcription modes.
|
|
485
|
+
*
|
|
486
|
+
* @example Basic usage (one-shot)
|
|
487
|
+
* ```tsx
|
|
488
|
+
* function VoiceInput() {
|
|
489
|
+
* const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
|
|
490
|
+
* onTranscript: (text) => console.log("User said:", text),
|
|
491
|
+
* });
|
|
492
|
+
*
|
|
493
|
+
* return (
|
|
494
|
+
* <button onClick={isRecording ? stopRecording : startRecording}>
|
|
495
|
+
* {isRecording ? "Stop" : "Record"}
|
|
496
|
+
* </button>
|
|
497
|
+
* );
|
|
498
|
+
* }
|
|
499
|
+
* ```
|
|
500
|
+
*
|
|
501
|
+
* @example Streaming transcription (real-time)
|
|
502
|
+
* ```tsx
|
|
503
|
+
* function LiveTranscription() {
|
|
504
|
+
* const { startRecording, stopRecording, isRecording, transcript, streamingChunk } = useVoiceInput({
|
|
505
|
+
* streaming: true, // Enable streaming mode
|
|
506
|
+
* chunkDuration: 1500, // Transcribe every 1.5 seconds (default)
|
|
507
|
+
* onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
|
|
508
|
+
* });
|
|
509
|
+
*
|
|
510
|
+
* return (
|
|
511
|
+
* <div>
|
|
512
|
+
* <button onClick={isRecording ? stopRecording : startRecording}>
|
|
513
|
+
* {isRecording ? "Stop" : "Start Live Transcription"}
|
|
514
|
+
* </button>
|
|
515
|
+
* <p>Current chunk: {streamingChunk}</p>
|
|
516
|
+
* <p>Full transcript: {transcript}</p>
|
|
517
|
+
* </div>
|
|
518
|
+
* );
|
|
519
|
+
* }
|
|
520
|
+
* ```
|
|
521
|
+
*/
|
|
522
|
+
declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
|
|
523
|
+
/**
|
|
524
|
+
* Options for useVoiceChat hook
|
|
525
|
+
*/
|
|
526
|
+
type UseVoiceChatOptions = {
|
|
527
|
+
/** LLM model ID (default: qwen3-0.6b) */
|
|
528
|
+
llmModel?: string;
|
|
529
|
+
/** STT model ID (default: whisper-tiny.en) */
|
|
530
|
+
sttModel?: string;
|
|
531
|
+
/** TTS model ID (default: kokoro-82m, also supports supertonic-66m) */
|
|
532
|
+
ttsModel?: TTSModelId;
|
|
533
|
+
/** System prompt for LLM */
|
|
534
|
+
system?: string;
|
|
535
|
+
/** Enable thinking mode (default: false) */
|
|
536
|
+
thinking?: boolean;
|
|
537
|
+
/** TTS voice ID (default: model's default voice) */
|
|
538
|
+
voice?: string;
|
|
539
|
+
/** TTS speech speed (default: 1.0) */
|
|
540
|
+
speed?: number;
|
|
541
|
+
/** Auto-load all models on mount (default: false) */
|
|
542
|
+
autoLoad?: boolean;
|
|
543
|
+
/** Callback when user speaks */
|
|
544
|
+
onUserSpeak?: (text: string) => void;
|
|
545
|
+
/** Callback when assistant responds */
|
|
546
|
+
onAssistantSpeak?: (text: string) => void;
|
|
547
|
+
/** Callback on error */
|
|
548
|
+
onError?: (error: string) => void;
|
|
549
|
+
};
|
|
550
|
+
/**
|
|
551
|
+
* Message in voice chat
|
|
552
|
+
*/
|
|
553
|
+
type VoiceChatMessage = {
|
|
554
|
+
id: string;
|
|
555
|
+
role: "user" | "assistant";
|
|
556
|
+
content: string;
|
|
557
|
+
thinking?: string;
|
|
558
|
+
audioUrl?: string;
|
|
559
|
+
};
|
|
560
|
+
/**
|
|
561
|
+
* Return type for useVoiceChat hook
|
|
562
|
+
*/
|
|
563
|
+
type UseVoiceChatReturn = {
|
|
564
|
+
/** Messages in the conversation */
|
|
565
|
+
messages: VoiceChatMessage[];
|
|
566
|
+
/** Start recording user speech */
|
|
567
|
+
startListening: () => Promise<void>;
|
|
568
|
+
/** Stop recording and process (STT → LLM → TTS) */
|
|
569
|
+
stopListening: () => Promise<void>;
|
|
570
|
+
/** Cancel current operation */
|
|
571
|
+
cancel: () => void;
|
|
572
|
+
/** Clear conversation history */
|
|
573
|
+
clear: () => void;
|
|
574
|
+
/** Whether recording user speech */
|
|
575
|
+
isListening: boolean;
|
|
576
|
+
/** Whether processing (STT/LLM/TTS) */
|
|
577
|
+
isProcessing: boolean;
|
|
578
|
+
/** Whether assistant is speaking */
|
|
579
|
+
isSpeaking: boolean;
|
|
580
|
+
/** Current stage: idle, listening, transcribing, thinking, speaking */
|
|
581
|
+
stage: "idle" | "listening" | "transcribing" | "thinking" | "speaking";
|
|
582
|
+
/** Whether all models are loaded */
|
|
583
|
+
isReady: boolean;
|
|
584
|
+
/** Whether loading models */
|
|
585
|
+
isLoading: boolean;
|
|
586
|
+
/** Loading progress message */
|
|
587
|
+
loadingMessage: string;
|
|
588
|
+
/** Error message */
|
|
589
|
+
error: string | null;
|
|
590
|
+
/** Manually load all models */
|
|
591
|
+
load: () => void;
|
|
592
|
+
};
|
|
593
|
+
/**
|
|
594
|
+
* React hook for voice conversation with STT + LLM + TTS
|
|
595
|
+
*
|
|
596
|
+
* Complete voice-to-voice conversation loop:
|
|
597
|
+
* 1. User presses button to speak
|
|
598
|
+
* 2. Speech is transcribed (Whisper)
|
|
599
|
+
* 3. LLM generates response
|
|
600
|
+
* 4. Response is spoken aloud (Kokoro or Supertonic TTS)
|
|
601
|
+
*
|
|
602
|
+
* @example
|
|
603
|
+
* ```tsx
|
|
604
|
+
* function VoiceChat() {
|
|
605
|
+
* const {
|
|
606
|
+
* messages,
|
|
607
|
+
* startListening,
|
|
608
|
+
* stopListening,
|
|
609
|
+
* isListening,
|
|
610
|
+
* isSpeaking,
|
|
611
|
+
* stage,
|
|
612
|
+
* } = useVoiceChat({
|
|
613
|
+
* system: "You are a helpful voice assistant.",
|
|
614
|
+
* voice: "af_bella",
|
|
615
|
+
* // Or use Supertonic for faster synthesis:
|
|
616
|
+
* // ttsModel: "supertonic-66m",
|
|
617
|
+
* // voice: "F1",
|
|
618
|
+
* });
|
|
619
|
+
*
|
|
620
|
+
* return (
|
|
621
|
+
* <div>
|
|
622
|
+
* {messages.map(m => (
|
|
623
|
+
* <div key={m.id}>{m.role}: {m.content}</div>
|
|
624
|
+
* ))}
|
|
625
|
+
* <button
|
|
626
|
+
* onMouseDown={startListening}
|
|
627
|
+
* onMouseUp={stopListening}
|
|
628
|
+
* >
|
|
629
|
+
* {stage === "idle" ? "🎤 Hold to Speak" : stage}
|
|
630
|
+
* </button>
|
|
631
|
+
* </div>
|
|
632
|
+
* );
|
|
633
|
+
* }
|
|
634
|
+
* ```
|
|
635
|
+
*/
|
|
636
|
+
declare function useVoiceChat(options?: UseVoiceChatOptions): UseVoiceChatReturn;
|
|
243
637
|
/**
|
|
244
638
|
* Check if WebGPU is supported
|
|
245
639
|
*/
|
|
@@ -256,7 +650,9 @@ declare const _default: {
|
|
|
256
650
|
isWebGPUSupported: typeof isWebGPUSupported;
|
|
257
651
|
getWebGPUInfo: typeof getWebGPUInfo;
|
|
258
652
|
createGerbilWorker: typeof createGerbilWorker;
|
|
653
|
+
playAudio: typeof playAudio;
|
|
654
|
+
createAudioPlayer: typeof createAudioPlayer;
|
|
259
655
|
};
|
|
260
656
|
//#endregion
|
|
261
|
-
export { BUILTIN_MODELS, CacheConfig, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, JsonOptions, LoadOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, SessionStats, SystemInfo, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, WorkerComplete, WorkerProgress, WorkerToken, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, useChat, useCompletion };
|
|
657
|
+
export { AudioChunk, BUILTIN_MODELS, BrowserVoiceInfo, CacheConfig, CompleteOptions, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, STTModelConfig, STTProgress, SessionStats, SpeakOptions, SpeakResult, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TTSModelId, TTSProgress, TranscribeOptions, TranscribeResult, TranscribeSegment, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, UseSpeechOptions, UseSpeechReturn, UseVoiceChatOptions, UseVoiceChatReturn, UseVoiceInputOptions, UseVoiceInputReturn, VoiceChatMessage, VoiceInfo, WorkerComplete, WorkerProgress, WorkerToken, createAudioPlayer, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, playAudio, useChat, useCompletion, useSpeech, useVoiceChat, useVoiceInput };
|
|
262
658
|
//# sourceMappingURL=index.d.mts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/browser/index.ts"],"sourcesContent":[],"mappings":";;;;;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/browser/index.ts"],"sourcesContent":[],"mappings":";;;;;AA0hEgB,KA79DJ,cAAA,GA69DqB;EAgErB,MAAA,EAAA,SAAW,GAAA,aAAA,GAAA,OAAA,GAAA,OAAA;EAUX,OAAA,CAAA,EAAA,MAAA;EAwBA,IAAA,CAAA,EAAA,MAAA;EAEY,QAAA,CAAA,EAAA,MAAA;EAED;EAID,aAAA,CAAA,EAAA,MAAA;EAAiB;EAgBpB,UAAA,CAAA,EAAA,MAAA;EAAW,KAAA,CAAA,EAAA,MAAA;AAiD9B,CAAA;AA2iBY,KAvqFA,WAAA,GAuqFmB;EA4BnB,MAAA,EAAA,OAAA;EAWA,IAAA,EAAA,MAAA;EAEA,KAAA,EAAA,UAAA,GAAA,WAAA;EAEY,SAAA,EAAA,MAAA;EAED,GAAA,EAAA,MAAA;CAAO;AAoEd,KAhxFJ,cAAA,GAgxFgB;EAgcZ,MAAA,EAAA,UAAA;EAUM,IAAA,EAAA,MAAA;EAwBrB,SAAA,EAAA,MAAA;;;;KA1uGW,mBAAA;;;;0BAIc;;oBAEN;;wBAEI;;;;;;KAOZ,qBAAA;;;;;;;;;;;;;;;;YAgBA;;;;;KAGA,YAAA;;uCAE2B,0BAA0B;;;;;;;;;;;;;;;;iBAqB3C,kBAAA,WAA4B,sBAA2B,QAAQ;;KA+ezE,OAAA;;;;;;;;;KAUA,eAAA;;;;;;;;;;;KAYA,cAAA;;;;;;;;;;;;oBAYQ;;;;;;;;;KAUR,aAAA;;YAEA;;;;;;;;;;;;mBAUO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAuDH,OAAA,WAAiB,iBAAsB;;KAwS3C,oBAAA;;;;;;;;;;;;;;;;;;;KAoBA,eAAA;;;;;KAMA,mBAAA;;;;;;uCAM2B,oBAAoB;;;;mBAIxC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAoCH,aAAA,WAAuB,uBAA4B;;KAsMvD,WAAA;;;;;;;;KASA,UAAA;;KAGA,gBAAA;;;;;;;;KA0NA,gBAAA;;UAEF;;;;;;;;;;;;;;;;;KAkBE,eAAA;;;;;QAE6D;;;;;;mBAMtD;;;;;;;;;;oBAUC;;;;;;;;;;gBAUJ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAuCA,SAAA,WAAmB,mBAAwB;;;;;;;;;;;;;;;iBAiYrC,SAAA,QACb,oCAEN;;WAAqC;;;;;;;;;;;;;;;;;;;iBAmDxB,iBAAA;iBACC;;;;;;;KA+DL,WAAA;;;;;;;;;KAUA,oBAAA;;;;;;;;;;;;0BAYc;;;;;;;;;;;KAYd,mBAAA;;wBAEY;;uBAED;;;;sBAID,iBAAiB;;;;;;;;;;;;;;;;mBAgBpB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAiDH,aAAA,WAAuB,uBAA4B;;;;KA2iBvD,mBAAA;;;;;;aAMC;;;;;;;;;;;;;;;;;;;;;KAsBD,gBAAA;;;;;;;;;;KAWA,kBAAA;;YAEA;;wBAEY;;uBAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAoEP,YAAA,WAAsB,sBAA2B;;;;iBAgcjD,iBAAA,CAAA;;;;iBAUM,aAAA,CAAA,GAAiB;;;;;cAwBtC"}
|