@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -14
- package/dist/auto-update-S9s5-g0C.mjs +3 -0
- package/dist/browser/index.d.ts +1009 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +2492 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
- package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
- package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
- package/dist/cli.mjs +3359 -647
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +3 -4
- package/dist/frameworks/express.mjs.map +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +2 -3
- package/dist/frameworks/fastify.mjs.map +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +2 -3
- package/dist/frameworks/hono.mjs.map +1 -1
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +2 -3
- package/dist/frameworks/next.mjs.map +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +2 -3
- package/dist/frameworks/trpc.mjs.map +1 -1
- package/dist/gerbil-DJGqq7BX.mjs +4 -0
- package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
- package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
- package/dist/gerbil-qOTe1nl2.d.mts +431 -0
- package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
- package/dist/index.d.mts +411 -9
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +7 -6
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +122 -4
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +238 -11
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +132 -2
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +175 -8
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +2 -3
- package/dist/integrations/llamaindex.mjs.map +1 -1
- package/dist/integrations/mcp-client.mjs +4 -4
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +5 -6
- package/dist/kokoro-BNTb6egA.mjs +20210 -0
- package/dist/kokoro-BNTb6egA.mjs.map +1 -0
- package/dist/kokoro-CMOGDSgT.js +20212 -0
- package/dist/kokoro-CMOGDSgT.js.map +1 -0
- package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
- package/dist/mcp-kzDDWIoS.mjs.map +1 -0
- package/dist/microphone-DaMZFRuR.mjs +3 -0
- package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
- package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
- package/dist/repl-DGUw4fCc.mjs +9 -0
- package/dist/skills/index.d.mts +305 -14
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +5 -6
- package/dist/skills-DulrOPeP.mjs +1435 -0
- package/dist/skills-DulrOPeP.mjs.map +1 -0
- package/dist/stt-1WIefHwc.mjs +3 -0
- package/dist/stt-CG_7KB_0.mjs +434 -0
- package/dist/stt-CG_7KB_0.mjs.map +1 -0
- package/dist/stt-Dne6SENv.js +434 -0
- package/dist/stt-Dne6SENv.js.map +1 -0
- package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
- package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
- package/dist/transformers.web-DiD1gTwk.js +44695 -0
- package/dist/transformers.web-DiD1gTwk.js.map +1 -0
- package/dist/transformers.web-u34VxRFM.js +3 -0
- package/dist/tts-B1pZMlDv.mjs +3 -0
- package/dist/tts-C2FzKuSx.js +725 -0
- package/dist/tts-C2FzKuSx.js.map +1 -0
- package/dist/tts-CyHhcLtN.mjs +731 -0
- package/dist/tts-CyHhcLtN.mjs.map +1 -0
- package/dist/types-CiTc7ez3.d.mts +353 -0
- package/dist/types-CiTc7ez3.d.mts.map +1 -0
- package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
- package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
- package/docs/ai-sdk.md +137 -21
- package/docs/browser.md +241 -2
- package/docs/memory.md +72 -0
- package/docs/stt.md +494 -0
- package/docs/tts.md +569 -0
- package/docs/vision.md +396 -0
- package/package.json +21 -22
- package/dist/auto-update-BbNHbSU1.mjs +0 -3
- package/dist/browser/index.d.mts +0 -262
- package/dist/browser/index.d.mts.map +0 -1
- package/dist/browser/index.mjs +0 -755
- package/dist/browser/index.mjs.map +0 -1
- package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
- package/dist/gerbil-BfnsFWRE.mjs +0 -644
- package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
- package/dist/gerbil-BjW-z7Fq.mjs +0 -5
- package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
- package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
- package/dist/mcp-R8kRLIKb.mjs.map +0 -1
- package/dist/models-DKULvhOr.mjs +0 -136
- package/dist/models-DKULvhOr.mjs.map +0 -1
- package/dist/models-De2-_GmQ.d.mts +0 -22
- package/dist/models-De2-_GmQ.d.mts.map +0 -1
- package/dist/skills-D3CEpgDc.mjs +0 -630
- package/dist/skills-D3CEpgDc.mjs.map +0 -1
- package/dist/types-BS1N92Jt.d.mts +0 -183
- package/dist/types-BS1N92Jt.d.mts.map +0 -1
- /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
|
@@ -0,0 +1,1009 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
//#region src/core/types.d.ts
|
|
4
|
+
|
|
5
|
+
type ModelConfig = {
|
|
6
|
+
id: string;
|
|
7
|
+
repo: string;
|
|
8
|
+
description: string;
|
|
9
|
+
size: string;
|
|
10
|
+
contextLength: number;
|
|
11
|
+
supportsThinking: boolean;
|
|
12
|
+
supportsJson: boolean;
|
|
13
|
+
/** Whether model supports vision/image input */
|
|
14
|
+
supportsVision?: boolean;
|
|
15
|
+
/** Vision encoder size (for display, e.g., "0.4B") */
|
|
16
|
+
visionEncoderSize?: string;
|
|
17
|
+
family: "qwen" | "smollm" | "phi" | "mistral" | "llama" | "other";
|
|
18
|
+
};
|
|
19
|
+
type ModelSource = {
|
|
20
|
+
type: "builtin" | "huggingface" | "local";
|
|
21
|
+
path: string;
|
|
22
|
+
};
|
|
23
|
+
type ImageInput = {
|
|
24
|
+
/** Image source: URL, base64 data URI, or local file path */
|
|
25
|
+
source: string;
|
|
26
|
+
/** Optional alt text for context */
|
|
27
|
+
alt?: string;
|
|
28
|
+
};
|
|
29
|
+
type GenerateOptions = {
|
|
30
|
+
/** Maximum tokens to generate (default: 256) */
|
|
31
|
+
maxTokens?: number;
|
|
32
|
+
/** Temperature for sampling, 0-2 (default: 0.7) */
|
|
33
|
+
temperature?: number;
|
|
34
|
+
/** Top-p sampling (default: 0.9) */
|
|
35
|
+
topP?: number;
|
|
36
|
+
/** Top-k sampling (default: 50) */
|
|
37
|
+
topK?: number;
|
|
38
|
+
/** Stop sequences */
|
|
39
|
+
stopSequences?: string[];
|
|
40
|
+
/** System prompt */
|
|
41
|
+
system?: string;
|
|
42
|
+
/** Enable thinking/reasoning mode (Qwen3) */
|
|
43
|
+
thinking?: boolean;
|
|
44
|
+
/** Callback for each token (streaming) */
|
|
45
|
+
onToken?: (token: string) => void;
|
|
46
|
+
/** Images to include (only used if model supports vision) */
|
|
47
|
+
images?: ImageInput[];
|
|
48
|
+
/** Enable response caching (default: false) */
|
|
49
|
+
cache?: boolean;
|
|
50
|
+
/** Cache TTL in milliseconds (default: 5 minutes) */
|
|
51
|
+
cacheTtl?: number;
|
|
52
|
+
};
|
|
53
|
+
type GenerateResult = {
|
|
54
|
+
/** Generated text */
|
|
55
|
+
text: string;
|
|
56
|
+
/** Thinking/reasoning (if enabled) */
|
|
57
|
+
thinking?: string;
|
|
58
|
+
/** Tokens generated */
|
|
59
|
+
tokensGenerated: number;
|
|
60
|
+
/** Generation speed */
|
|
61
|
+
tokensPerSecond: number;
|
|
62
|
+
/** Total time in ms */
|
|
63
|
+
totalTime: number;
|
|
64
|
+
/** Why generation stopped */
|
|
65
|
+
finishReason: "stop" | "length" | "error";
|
|
66
|
+
/** Which provider was used (for hybrid mode) */
|
|
67
|
+
provider?: "local" | "openai" | "anthropic";
|
|
68
|
+
/** Whether result came from cache */
|
|
69
|
+
cached?: boolean;
|
|
70
|
+
};
|
|
71
|
+
type JsonOptions<T = unknown> = {
|
|
72
|
+
/** Zod schema for validation */
|
|
73
|
+
schema: z.ZodType<T>;
|
|
74
|
+
/** Number of retries on invalid JSON (default: 3) */
|
|
75
|
+
retries?: number;
|
|
76
|
+
/** Temperature (lower = more deterministic, default: 0.3) */
|
|
77
|
+
temperature?: number;
|
|
78
|
+
/** System prompt override */
|
|
79
|
+
system?: string;
|
|
80
|
+
};
|
|
81
|
+
type EmbedOptions = {
|
|
82
|
+
/** Model to use for embeddings */
|
|
83
|
+
model?: string;
|
|
84
|
+
/** Normalize vectors (default: true) */
|
|
85
|
+
normalize?: boolean;
|
|
86
|
+
};
|
|
87
|
+
type EmbedResult = {
|
|
88
|
+
/** Embedding vector */
|
|
89
|
+
vector: number[];
|
|
90
|
+
/** Original text */
|
|
91
|
+
text: string;
|
|
92
|
+
/** Time in ms */
|
|
93
|
+
totalTime: number;
|
|
94
|
+
};
|
|
95
|
+
type LoadOptions = {
|
|
96
|
+
/** Progress callback */
|
|
97
|
+
onProgress?: (info: ProgressInfo) => void;
|
|
98
|
+
/** Device: 'auto', 'gpu', 'cpu', 'webgpu' (default: 'auto') */
|
|
99
|
+
device?: "auto" | "gpu" | "cpu" | "webgpu";
|
|
100
|
+
/** Quantization: 'q4', 'q8', 'fp16', 'fp32' (default: 'q4') */
|
|
101
|
+
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
102
|
+
/** Override context length */
|
|
103
|
+
contextLength?: number;
|
|
104
|
+
};
|
|
105
|
+
type ProgressInfo = {
|
|
106
|
+
status: string;
|
|
107
|
+
progress?: number;
|
|
108
|
+
file?: string;
|
|
109
|
+
loaded?: number;
|
|
110
|
+
total?: number;
|
|
111
|
+
};
|
|
112
|
+
type GerbilConfig = {
|
|
113
|
+
/** Default model */
|
|
114
|
+
model?: string;
|
|
115
|
+
/** Default device */
|
|
116
|
+
device?: "auto" | "gpu" | "cpu";
|
|
117
|
+
/** Default quantization */
|
|
118
|
+
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
119
|
+
/** Cache configuration */
|
|
120
|
+
cache?: CacheConfig;
|
|
121
|
+
/** Fallback configuration */
|
|
122
|
+
fallback?: FallbackConfig;
|
|
123
|
+
};
|
|
124
|
+
type CacheConfig = {
|
|
125
|
+
/** Enable caching (default: true) */
|
|
126
|
+
enabled?: boolean;
|
|
127
|
+
/** Time-to-live in seconds (default: 3600) */
|
|
128
|
+
ttl?: number;
|
|
129
|
+
/** Max cache size (default: "500mb") */
|
|
130
|
+
maxSize?: string;
|
|
131
|
+
/** Storage backend */
|
|
132
|
+
storage?: "memory" | "disk" | "redis";
|
|
133
|
+
/** Redis URL (if storage is redis) */
|
|
134
|
+
redisUrl?: string;
|
|
135
|
+
};
|
|
136
|
+
type FallbackConfig = {
|
|
137
|
+
/** Fallback provider */
|
|
138
|
+
provider: "openai" | "anthropic";
|
|
139
|
+
/** API key */
|
|
140
|
+
apiKey: string;
|
|
141
|
+
/** Model to use */
|
|
142
|
+
model: string;
|
|
143
|
+
/** When to fallback */
|
|
144
|
+
when: "timeout" | "error" | "always-verify";
|
|
145
|
+
/** Timeout in ms before fallback (default: 5000) */
|
|
146
|
+
timeout?: number;
|
|
147
|
+
};
|
|
148
|
+
type SessionStats = {
|
|
149
|
+
prompts: number;
|
|
150
|
+
tokensIn: number;
|
|
151
|
+
tokensOut: number;
|
|
152
|
+
avgSpeed: number;
|
|
153
|
+
totalTime: number;
|
|
154
|
+
cacheHits: number;
|
|
155
|
+
cacheMisses: number;
|
|
156
|
+
};
|
|
157
|
+
type ModelStats = {
|
|
158
|
+
modelId: string;
|
|
159
|
+
avgSpeed: number;
|
|
160
|
+
totalGenerations: number;
|
|
161
|
+
totalTokens: number;
|
|
162
|
+
};
|
|
163
|
+
type SystemInfo = {
|
|
164
|
+
version: string;
|
|
165
|
+
model: ModelConfig | null;
|
|
166
|
+
device: {
|
|
167
|
+
backend: string;
|
|
168
|
+
gpu: string | null;
|
|
169
|
+
vram: string | null;
|
|
170
|
+
status: "ready" | "loading" | "error";
|
|
171
|
+
};
|
|
172
|
+
context: {
|
|
173
|
+
max: number;
|
|
174
|
+
used: number;
|
|
175
|
+
available: number;
|
|
176
|
+
};
|
|
177
|
+
cache: {
|
|
178
|
+
location: string;
|
|
179
|
+
size: string;
|
|
180
|
+
modelCount: number;
|
|
181
|
+
};
|
|
182
|
+
};
|
|
183
|
+
type GerbilModelSettings = {
|
|
184
|
+
/** Enable thinking mode */
|
|
185
|
+
thinking?: boolean;
|
|
186
|
+
/** Device to use */
|
|
187
|
+
device?: "auto" | "gpu" | "cpu";
|
|
188
|
+
/** Quantization level */
|
|
189
|
+
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
190
|
+
};
|
|
191
|
+
type GerbilProviderSettings = {
|
|
192
|
+
/** Default device */
|
|
193
|
+
device?: "auto" | "gpu" | "cpu";
|
|
194
|
+
/** Default quantization */
|
|
195
|
+
dtype?: "q4" | "q8" | "fp16" | "fp32";
|
|
196
|
+
};
|
|
197
|
+
type VoiceInfo = {
|
|
198
|
+
/** Voice ID (e.g., "af_bella", "am_adam") */
|
|
199
|
+
id: string;
|
|
200
|
+
/** Display name (e.g., "Bella", "Adam") */
|
|
201
|
+
name: string;
|
|
202
|
+
/** Gender: male, female, or neutral */
|
|
203
|
+
gender: "male" | "female" | "neutral";
|
|
204
|
+
/** Language code (e.g., "en", "en-us", "zh") */
|
|
205
|
+
language: string;
|
|
206
|
+
/** Optional description */
|
|
207
|
+
description?: string;
|
|
208
|
+
/** Speaker embedding file name (internal) */
|
|
209
|
+
embeddingFile?: string;
|
|
210
|
+
};
|
|
211
|
+
type TTSModelConfig = {
|
|
212
|
+
/** Model ID (e.g., "kokoro-82m") */
|
|
213
|
+
id: string;
|
|
214
|
+
/** HuggingFace repo path */
|
|
215
|
+
repo: string;
|
|
216
|
+
/** Human-readable description */
|
|
217
|
+
description: string;
|
|
218
|
+
/** Approximate model size */
|
|
219
|
+
size: string;
|
|
220
|
+
/** Output sample rate in Hz (e.g., 24000) */
|
|
221
|
+
sampleRate: number;
|
|
222
|
+
/** Available voices */
|
|
223
|
+
voices: VoiceInfo[];
|
|
224
|
+
/** Default voice ID */
|
|
225
|
+
defaultVoice: string;
|
|
226
|
+
/** Supported languages */
|
|
227
|
+
languages: string[];
|
|
228
|
+
};
|
|
229
|
+
type SpeakOptions = {
|
|
230
|
+
/** Voice ID to use (default: model's default voice) */
|
|
231
|
+
voice?: string;
|
|
232
|
+
/** Speech speed multiplier (0.5 = half speed, 2.0 = double speed, default: 1.0) */
|
|
233
|
+
speed?: number;
|
|
234
|
+
/** Progress callback */
|
|
235
|
+
onProgress?: (info: ProgressInfo) => void;
|
|
236
|
+
/** Callback for audio chunks during streaming */
|
|
237
|
+
onAudioChunk?: (chunk: AudioChunk) => void;
|
|
238
|
+
};
|
|
239
|
+
type AudioChunk = {
|
|
240
|
+
/** Raw PCM audio samples (Float32Array) */
|
|
241
|
+
samples: Float32Array;
|
|
242
|
+
/** Sample rate in Hz */
|
|
243
|
+
sampleRate: number;
|
|
244
|
+
/** Chunk index (0-based) */
|
|
245
|
+
index: number;
|
|
246
|
+
/** Whether this is the final chunk */
|
|
247
|
+
isFinal: boolean;
|
|
248
|
+
};
|
|
249
|
+
type SpeakResult = {
|
|
250
|
+
/** Raw PCM audio samples (Float32Array) */
|
|
251
|
+
audio: Float32Array;
|
|
252
|
+
/** Sample rate in Hz */
|
|
253
|
+
sampleRate: number;
|
|
254
|
+
/** Audio duration in seconds */
|
|
255
|
+
duration: number;
|
|
256
|
+
/** Voice used */
|
|
257
|
+
voice: string;
|
|
258
|
+
/** Total processing time in ms */
|
|
259
|
+
totalTime: number;
|
|
260
|
+
};
|
|
261
|
+
type LoadTTSOptions = {
|
|
262
|
+
/** Progress callback */
|
|
263
|
+
onProgress?: (info: ProgressInfo) => void;
|
|
264
|
+
/** Device: 'auto', 'webgpu', 'cpu' (default: 'auto') */
|
|
265
|
+
device?: "auto" | "webgpu" | "cpu";
|
|
266
|
+
};
|
|
267
|
+
type STTModelConfig = {
|
|
268
|
+
/** Model ID (e.g., "whisper-tiny.en") */
|
|
269
|
+
id: string;
|
|
270
|
+
/** HuggingFace repo path */
|
|
271
|
+
repo: string;
|
|
272
|
+
/** Human-readable description */
|
|
273
|
+
description: string;
|
|
274
|
+
/** Model size (e.g., "39M", "244M") */
|
|
275
|
+
size: string;
|
|
276
|
+
/** Whether model supports multiple languages */
|
|
277
|
+
multilingual: boolean;
|
|
278
|
+
/** Supported languages (ISO 639-1 codes) */
|
|
279
|
+
languages: string[];
|
|
280
|
+
/** Expected sample rate (default: 16000) */
|
|
281
|
+
sampleRate: number;
|
|
282
|
+
};
|
|
283
|
+
type TranscribeOptions = {
|
|
284
|
+
/** Language hint (ISO 639-1 code like "en", "es", "fr") - only for multilingual models */
|
|
285
|
+
language?: string;
|
|
286
|
+
/** Return word/segment timestamps */
|
|
287
|
+
timestamps?: boolean;
|
|
288
|
+
/** Progress callback */
|
|
289
|
+
onProgress?: (info: ProgressInfo) => void;
|
|
290
|
+
};
|
|
291
|
+
type TranscribeSegment = {
|
|
292
|
+
/** Segment text */
|
|
293
|
+
text: string;
|
|
294
|
+
/** Start time in seconds */
|
|
295
|
+
start: number;
|
|
296
|
+
/** End time in seconds */
|
|
297
|
+
end: number;
|
|
298
|
+
};
|
|
299
|
+
type TranscribeResult = {
|
|
300
|
+
/** Full transcribed text */
|
|
301
|
+
text: string;
|
|
302
|
+
/** Detected or specified language */
|
|
303
|
+
language: string;
|
|
304
|
+
/** Segments with timestamps (if timestamps option enabled) */
|
|
305
|
+
segments?: TranscribeSegment[];
|
|
306
|
+
/** Audio duration in seconds */
|
|
307
|
+
duration: number;
|
|
308
|
+
/** Total processing time in ms */
|
|
309
|
+
totalTime: number;
|
|
310
|
+
};
|
|
311
|
+
type LoadSTTOptions = {
|
|
312
|
+
/** Progress callback */
|
|
313
|
+
onProgress?: (info: ProgressInfo) => void;
|
|
314
|
+
/** Device: 'auto', 'webgpu', 'cpu' (default: 'auto') */
|
|
315
|
+
device?: "auto" | "webgpu" | "cpu";
|
|
316
|
+
};
|
|
317
|
+
type StreamingTranscriptionOptions = {
|
|
318
|
+
/** Interval between transcriptions in ms (default: 3000) */
|
|
319
|
+
chunkDuration?: number;
|
|
320
|
+
/** Minimum audio samples before transcribing (default: 8000 = 0.5s at 16kHz) */
|
|
321
|
+
minChunkSize?: number;
|
|
322
|
+
/** Callback for each transcribed chunk */
|
|
323
|
+
onChunk?: (text: string, chunkIndex: number) => void;
|
|
324
|
+
/** Callback with full accumulated transcript */
|
|
325
|
+
onTranscript?: (fullText: string) => void;
|
|
326
|
+
/** Callback on transcription error */
|
|
327
|
+
onError?: (error: string) => void;
|
|
328
|
+
/** Language hint (for multilingual models) */
|
|
329
|
+
language?: string;
|
|
330
|
+
};
|
|
331
|
+
type StreamingTranscriptionSession = {
|
|
332
|
+
/** Feed audio data to the buffer (Float32Array at 16kHz) */
|
|
333
|
+
feedAudio: (audio: Float32Array) => void;
|
|
334
|
+
/** Manually trigger transcription of buffered audio */
|
|
335
|
+
flush: () => Promise<string>;
|
|
336
|
+
/** Start automatic interval-based transcription */
|
|
337
|
+
start: () => void;
|
|
338
|
+
/** Stop transcription and return final transcript */
|
|
339
|
+
stop: () => Promise<string>;
|
|
340
|
+
/** Immediately abort without final transcription (for cleanup) */
|
|
341
|
+
abort: () => void;
|
|
342
|
+
/** Check if session is running */
|
|
343
|
+
isRunning: () => boolean;
|
|
344
|
+
/** Get current full transcript */
|
|
345
|
+
getTranscript: () => string;
|
|
346
|
+
/** Get number of chunks transcribed */
|
|
347
|
+
getChunkCount: () => number;
|
|
348
|
+
/** Reset session (clear buffer and transcript) */
|
|
349
|
+
reset: () => void;
|
|
350
|
+
};
|
|
351
|
+
//#endregion
|
|
352
|
+
//#region src/core/models.d.ts
|
|
353
|
+
declare const BUILTIN_MODELS: Record<string, ModelConfig>;
|
|
354
|
+
//#endregion
|
|
355
|
+
//#region src/browser/index.d.ts
|
|
356
|
+
|
|
357
|
+
type WorkerProgress = {
|
|
358
|
+
status: "loading" | "downloading" | "ready" | "error";
|
|
359
|
+
message?: string;
|
|
360
|
+
file?: string;
|
|
361
|
+
progress?: number;
|
|
362
|
+
/** Number of files being downloaded (0 = loading from cache) */
|
|
363
|
+
downloadCount?: number;
|
|
364
|
+
/** Total files to process */
|
|
365
|
+
totalFiles?: number;
|
|
366
|
+
error?: string;
|
|
367
|
+
};
|
|
368
|
+
type WorkerToken = {
|
|
369
|
+
status: "token";
|
|
370
|
+
text: string;
|
|
371
|
+
state: "thinking" | "answering";
|
|
372
|
+
numTokens: number;
|
|
373
|
+
tps: number;
|
|
374
|
+
};
|
|
375
|
+
type WorkerComplete = {
|
|
376
|
+
status: "complete";
|
|
377
|
+
text: string;
|
|
378
|
+
numTokens: number;
|
|
379
|
+
totalTime: number;
|
|
380
|
+
tps: number;
|
|
381
|
+
};
|
|
382
|
+
type GerbilWorkerOptions = {
|
|
383
|
+
/** Model ID to load (default: "qwen3-0.6b") */
|
|
384
|
+
modelId?: string;
|
|
385
|
+
/** Called during model loading with progress updates */
|
|
386
|
+
onProgress?: (progress: WorkerProgress) => void;
|
|
387
|
+
/** Called for each token during streaming generation */
|
|
388
|
+
onToken?: (token: WorkerToken) => void;
|
|
389
|
+
/** Called when generation is complete */
|
|
390
|
+
onComplete?: (result: WorkerComplete) => void;
|
|
391
|
+
/** Called on errors */
|
|
392
|
+
onError?: (error: string) => void;
|
|
393
|
+
/** Worker script URL (auto-detected if not provided) */
|
|
394
|
+
workerUrl?: string;
|
|
395
|
+
};
|
|
396
|
+
type GenerateStreamOptions = {
|
|
397
|
+
/** Maximum tokens to generate */
|
|
398
|
+
maxTokens?: number;
|
|
399
|
+
/** Temperature for sampling (0 = deterministic) */
|
|
400
|
+
temperature?: number;
|
|
401
|
+
/** Top-p nucleus sampling */
|
|
402
|
+
topP?: number;
|
|
403
|
+
/** Top-k sampling */
|
|
404
|
+
topK?: number;
|
|
405
|
+
/** Enable thinking mode (Qwen3) */
|
|
406
|
+
thinking?: boolean;
|
|
407
|
+
/** System prompt */
|
|
408
|
+
system?: string;
|
|
409
|
+
/** Image URLs or data URIs (for vision models) */
|
|
410
|
+
images?: string[];
|
|
411
|
+
/** Conversation history for multi-turn (includes all previous messages) */
|
|
412
|
+
history?: Array<{
|
|
413
|
+
role: "user" | "assistant" | "system";
|
|
414
|
+
content: string;
|
|
415
|
+
}>;
|
|
416
|
+
};
|
|
417
|
+
type GerbilWorker = {
|
|
418
|
+
/** Generate text with streaming */
|
|
419
|
+
generate: (prompt: string, options?: GenerateStreamOptions) => Promise<string>;
|
|
420
|
+
/** Interrupt current generation */
|
|
421
|
+
interrupt: () => void;
|
|
422
|
+
/** Reset conversation cache */
|
|
423
|
+
reset: () => void;
|
|
424
|
+
/** Terminate the worker */
|
|
425
|
+
terminate: () => void;
|
|
426
|
+
/** Check if model is loaded */
|
|
427
|
+
isReady: () => boolean;
|
|
428
|
+
};
|
|
429
|
+
/**
|
|
430
|
+
* Create a Gerbil worker for streaming WebGPU inference
|
|
431
|
+
*
|
|
432
|
+
* Uses a Web Worker to keep the UI responsive during model loading
|
|
433
|
+
* and text generation, with real-time token streaming.
|
|
434
|
+
*/
|
|
435
|
+
declare function createGerbilWorker(options?: GerbilWorkerOptions): Promise<GerbilWorker>;
|
|
436
|
+
/** Message in a chat conversation */
|
|
437
|
+
type Message = {
|
|
438
|
+
id: string;
|
|
439
|
+
role: "user" | "assistant";
|
|
440
|
+
content: string;
|
|
441
|
+
thinking?: string;
|
|
442
|
+
/** Attached images (URLs or data URIs) - for vision models */
|
|
443
|
+
images?: string[];
|
|
444
|
+
};
|
|
445
|
+
/** Loading progress state */
|
|
446
|
+
type LoadingProgress = {
|
|
447
|
+
status: "loading" | "downloading" | "ready" | "error";
|
|
448
|
+
message?: string;
|
|
449
|
+
file?: string;
|
|
450
|
+
progress?: number;
|
|
451
|
+
/** Number of files being downloaded (0 = loading from cache) */
|
|
452
|
+
downloadCount?: number;
|
|
453
|
+
/** Total files to process */
|
|
454
|
+
totalFiles?: number;
|
|
455
|
+
};
|
|
456
|
+
/** Options for useChat hook */
|
|
457
|
+
type UseChatOptions = {
|
|
458
|
+
/** Model ID (default: "qwen3-0.6b") */
|
|
459
|
+
model?: string;
|
|
460
|
+
/** System prompt */
|
|
461
|
+
system?: string;
|
|
462
|
+
/** Enable thinking mode (Qwen3) */
|
|
463
|
+
thinking?: boolean;
|
|
464
|
+
/** Max tokens per response */
|
|
465
|
+
maxTokens?: number;
|
|
466
|
+
/** Temperature (0-2) */
|
|
467
|
+
temperature?: number;
|
|
468
|
+
/** Initial messages */
|
|
469
|
+
initialMessages?: Message[];
|
|
470
|
+
/** Auto-load model on mount (default: false - loads on first generate or load()) */
|
|
471
|
+
autoLoad?: boolean;
|
|
472
|
+
/** Called when model is ready */
|
|
473
|
+
onReady?: () => void;
|
|
474
|
+
/** Called on error */
|
|
475
|
+
onError?: (error: string) => void;
|
|
476
|
+
};
|
|
477
|
+
/** Return type for useChat hook */
|
|
478
|
+
type UseChatReturn = {
|
|
479
|
+
/** Chat messages */
|
|
480
|
+
messages: Message[];
|
|
481
|
+
/** Current input value */
|
|
482
|
+
input: string;
|
|
483
|
+
/** Set input value */
|
|
484
|
+
setInput: (value: string) => void;
|
|
485
|
+
/** Submit current input */
|
|
486
|
+
handleSubmit: (e?: {
|
|
487
|
+
preventDefault?: () => void;
|
|
488
|
+
}) => void;
|
|
489
|
+
/** Whether model is loading */
|
|
490
|
+
isLoading: boolean;
|
|
491
|
+
/** Loading progress */
|
|
492
|
+
loadingProgress: LoadingProgress | null;
|
|
493
|
+
/** Whether generating a response */
|
|
494
|
+
isGenerating: boolean;
|
|
495
|
+
/** Current thinking content (streaming) */
|
|
496
|
+
thinking: string;
|
|
497
|
+
/** Stop generation */
|
|
498
|
+
stop: () => void;
|
|
499
|
+
/** Clear all messages */
|
|
500
|
+
clear: () => void;
|
|
501
|
+
/** Current tokens per second */
|
|
502
|
+
tps: number;
|
|
503
|
+
/** Whether model is ready */
|
|
504
|
+
isReady: boolean;
|
|
505
|
+
/** Error message if any */
|
|
506
|
+
error: string | null;
|
|
507
|
+
/** Load the model (only needed if lazy: true) */
|
|
508
|
+
load: () => void;
|
|
509
|
+
/** Currently attached images (for next message) */
|
|
510
|
+
attachedImages: string[];
|
|
511
|
+
/** Attach an image to the next message */
|
|
512
|
+
attachImage: (imageUrl: string) => void;
|
|
513
|
+
/** Remove an attached image */
|
|
514
|
+
removeImage: (index: number) => void;
|
|
515
|
+
/** Clear all attached images */
|
|
516
|
+
clearImages: () => void;
|
|
517
|
+
/** Send message with specific images (convenience method) */
|
|
518
|
+
sendWithImages: (text: string, images: string[]) => void;
|
|
519
|
+
};
|
|
520
|
+
/**
|
|
521
|
+
* React hook for chat with local LLM
|
|
522
|
+
*
|
|
523
|
+
* @example
|
|
524
|
+
* ```tsx
|
|
525
|
+
* import { useChat } from "@tryhamster/gerbil/browser";
|
|
526
|
+
*
|
|
527
|
+
* function Chat() {
|
|
528
|
+
* const { messages, input, setInput, handleSubmit, isLoading, isGenerating } = useChat();
|
|
529
|
+
*
|
|
530
|
+
* if (isLoading) return <div>Loading model...</div>;
|
|
531
|
+
*
|
|
532
|
+
* return (
|
|
533
|
+
* <div>
|
|
534
|
+
* {messages.map(m => (
|
|
535
|
+
* <div key={m.id}>{m.role}: {m.content}</div>
|
|
536
|
+
* ))}
|
|
537
|
+
* <form onSubmit={handleSubmit}>
|
|
538
|
+
* <input value={input} onChange={e => setInput(e.target.value)} />
|
|
539
|
+
* <button disabled={isGenerating}>Send</button>
|
|
540
|
+
* </form>
|
|
541
|
+
* </div>
|
|
542
|
+
* );
|
|
543
|
+
* }
|
|
544
|
+
* ```
|
|
545
|
+
*/
|
|
546
|
+
declare function useChat(options?: UseChatOptions): UseChatReturn;
|
|
547
|
+
/** Options for useCompletion hook */
|
|
548
|
+
type UseCompletionOptions = {
|
|
549
|
+
/** Model ID (default: "qwen3-0.6b") */
|
|
550
|
+
model?: string;
|
|
551
|
+
/** System prompt */
|
|
552
|
+
system?: string;
|
|
553
|
+
/** Enable thinking mode (Qwen3) */
|
|
554
|
+
thinking?: boolean;
|
|
555
|
+
/** Max tokens */
|
|
556
|
+
maxTokens?: number;
|
|
557
|
+
/** Temperature (0-2) */
|
|
558
|
+
temperature?: number;
|
|
559
|
+
/** Auto-load model on mount (default: false - loads on first complete() or load()) */
|
|
560
|
+
autoLoad?: boolean;
|
|
561
|
+
/** Called when model is ready */
|
|
562
|
+
onReady?: () => void;
|
|
563
|
+
/** Called on error */
|
|
564
|
+
onError?: (error: string) => void;
|
|
565
|
+
};
|
|
566
|
+
/** Options for single completion call */
|
|
567
|
+
type CompleteOptions = {
|
|
568
|
+
/** Image URLs or data URIs to analyze (for vision models) */
|
|
569
|
+
images?: string[];
|
|
570
|
+
};
|
|
571
|
+
/** Return type for useCompletion hook */
|
|
572
|
+
type UseCompletionReturn = {
|
|
573
|
+
/** Generated completion */
|
|
574
|
+
completion: string;
|
|
575
|
+
/** Thinking content (if enabled) */
|
|
576
|
+
thinking: string;
|
|
577
|
+
/** Generate completion (optionally with images for vision models) */
|
|
578
|
+
complete: (prompt: string, options?: CompleteOptions) => Promise<string>;
|
|
579
|
+
/** Whether model is loading */
|
|
580
|
+
isLoading: boolean;
|
|
581
|
+
/** Loading progress */
|
|
582
|
+
loadingProgress: LoadingProgress | null;
|
|
583
|
+
/** Whether generating */
|
|
584
|
+
isGenerating: boolean;
|
|
585
|
+
/** Stop generation */
|
|
586
|
+
stop: () => void;
|
|
587
|
+
/** Current tokens per second */
|
|
588
|
+
tps: number;
|
|
589
|
+
/** Whether model is ready */
|
|
590
|
+
isReady: boolean;
|
|
591
|
+
/** Error message if any */
|
|
592
|
+
error: string | null;
|
|
593
|
+
/** Load the model (only needed if lazy: true) */
|
|
594
|
+
load: () => void;
|
|
595
|
+
};
|
|
596
|
+
/**
|
|
597
|
+
* React hook for text completion with local LLM
|
|
598
|
+
*
|
|
599
|
+
* @example
|
|
600
|
+
* ```tsx
|
|
601
|
+
* import { useCompletion } from "@tryhamster/gerbil/browser";
|
|
602
|
+
*
|
|
603
|
+
* function App() {
|
|
604
|
+
* const { complete, completion, isLoading, isGenerating } = useCompletion();
|
|
605
|
+
*
|
|
606
|
+
* if (isLoading) return <div>Loading...</div>;
|
|
607
|
+
*
|
|
608
|
+
* return (
|
|
609
|
+
* <div>
|
|
610
|
+
* <button onClick={() => complete("Write a haiku")}>Generate</button>
|
|
611
|
+
* <p>{completion}</p>
|
|
612
|
+
* </div>
|
|
613
|
+
* );
|
|
614
|
+
* }
|
|
615
|
+
* ```
|
|
616
|
+
*/
|
|
617
|
+
declare function useCompletion(options?: UseCompletionOptions): UseCompletionReturn;
|
|
618
|
+
/** TTS loading progress */
|
|
619
|
+
type TTSProgress = {
|
|
620
|
+
status: "idle" | "loading" | "downloading" | "ready" | "error";
|
|
621
|
+
message?: string;
|
|
622
|
+
file?: string;
|
|
623
|
+
progress?: number;
|
|
624
|
+
error?: string;
|
|
625
|
+
};
|
|
626
|
+
/** Available TTS models */
|
|
627
|
+
type TTSModelId = "kokoro-82m" | "supertonic-66m";
|
|
628
|
+
/** Voice info for TTS models */
|
|
629
|
+
type BrowserVoiceInfo = {
|
|
630
|
+
id: string;
|
|
631
|
+
name: string;
|
|
632
|
+
gender: "male" | "female";
|
|
633
|
+
language: string;
|
|
634
|
+
description: string;
|
|
635
|
+
};
|
|
636
|
+
/** Options for useSpeech hook */
|
|
637
|
+
type UseSpeechOptions = {
|
|
638
|
+
/** TTS model to use (default: "kokoro-82m") */
|
|
639
|
+
model?: TTSModelId;
|
|
640
|
+
/** Default voice ID (default: model's default voice) */
|
|
641
|
+
voice?: string;
|
|
642
|
+
/** Speech speed multiplier (default: 1.0) */
|
|
643
|
+
speed?: number;
|
|
644
|
+
/** Auto-load TTS model on mount (default: false) */
|
|
645
|
+
autoLoad?: boolean;
|
|
646
|
+
/** Called when model is ready */
|
|
647
|
+
onReady?: () => void;
|
|
648
|
+
/** Called on error */
|
|
649
|
+
onError?: (error: string) => void;
|
|
650
|
+
/** Called when speech starts */
|
|
651
|
+
onStart?: () => void;
|
|
652
|
+
/** Called when speech ends */
|
|
653
|
+
onEnd?: () => void;
|
|
654
|
+
};
|
|
655
|
+
/** Return type for useSpeech hook */
|
|
656
|
+
type UseSpeechReturn = {
|
|
657
|
+
/** Speak text aloud */
|
|
658
|
+
speak: (text: string, options?: {
|
|
659
|
+
voice?: string;
|
|
660
|
+
speed?: number;
|
|
661
|
+
}) => Promise<void>;
|
|
662
|
+
/** Stop current speech */
|
|
663
|
+
stop: () => void;
|
|
664
|
+
/** Whether TTS model is loading */
|
|
665
|
+
isLoading: boolean;
|
|
666
|
+
/** Loading progress */
|
|
667
|
+
loadingProgress: TTSProgress | null;
|
|
668
|
+
/** Whether currently speaking */
|
|
669
|
+
isSpeaking: boolean;
|
|
670
|
+
/** Whether TTS model is ready */
|
|
671
|
+
isReady: boolean;
|
|
672
|
+
/** Load the TTS model */
|
|
673
|
+
load: () => void;
|
|
674
|
+
/** Error message if any */
|
|
675
|
+
error: string | null;
|
|
676
|
+
/** List available voices for current model */
|
|
677
|
+
listVoices: () => BrowserVoiceInfo[];
|
|
678
|
+
/** Current voice ID */
|
|
679
|
+
currentVoice: string;
|
|
680
|
+
/** Set current voice */
|
|
681
|
+
setVoice: (voiceId: string) => void;
|
|
682
|
+
/** Current speed */
|
|
683
|
+
currentSpeed: number;
|
|
684
|
+
/** Set speed */
|
|
685
|
+
setSpeed: (speed: number) => void;
|
|
686
|
+
/** Current TTS model ID */
|
|
687
|
+
currentModel: TTSModelId;
|
|
688
|
+
/** Sample rate for current model (24000 for Kokoro, 44100 for Supertonic) */
|
|
689
|
+
sampleRate: number;
|
|
690
|
+
};
|
|
691
|
+
/**
|
|
692
|
+
* React hook for text-to-speech with Web Audio API playback
|
|
693
|
+
*
|
|
694
|
+
* Supports both Kokoro (24kHz, high quality) and Supertonic (44.1kHz, faster).
|
|
695
|
+
*
|
|
696
|
+
* @example
|
|
697
|
+
* ```tsx
|
|
698
|
+
* import { useSpeech } from "@tryhamster/gerbil/browser";
|
|
699
|
+
*
|
|
700
|
+
* function App() {
|
|
701
|
+
* // Default: Kokoro TTS
|
|
702
|
+
* const { speak, stop, isLoading, isSpeaking, listVoices, setVoice } = useSpeech();
|
|
703
|
+
*
|
|
704
|
+
* // Or use Supertonic (44.1kHz, faster)
|
|
705
|
+
* // const { speak, listVoices } = useSpeech({ model: "supertonic-66m" });
|
|
706
|
+
*
|
|
707
|
+
* if (isLoading) return <div>Loading TTS...</div>;
|
|
708
|
+
*
|
|
709
|
+
* return (
|
|
710
|
+
* <div>
|
|
711
|
+
* <select onChange={e => setVoice(e.target.value)}>
|
|
712
|
+
* {listVoices().map(v => (
|
|
713
|
+
* <option key={v.id} value={v.id}>{v.name}</option>
|
|
714
|
+
* ))}
|
|
715
|
+
* </select>
|
|
716
|
+
* <button onClick={() => speak("Hello world!")}>
|
|
717
|
+
* {isSpeaking ? "Speaking..." : "Speak"}
|
|
718
|
+
* </button>
|
|
719
|
+
* {isSpeaking && <button onClick={stop}>Stop</button>}
|
|
720
|
+
* </div>
|
|
721
|
+
* );
|
|
722
|
+
* }
|
|
723
|
+
* ```
|
|
724
|
+
*/
|
|
725
|
+
declare function useSpeech(options?: UseSpeechOptions): UseSpeechReturn;
|
|
726
|
+
/**
|
|
727
|
+
* Play audio from Float32Array using Web Audio API
|
|
728
|
+
*
|
|
729
|
+
* @example
|
|
730
|
+
* ```ts
|
|
731
|
+
* import { playAudio } from "@tryhamster/gerbil/browser";
|
|
732
|
+
*
|
|
733
|
+
* const audio = new Float32Array([...]); // TTS output
|
|
734
|
+
* const controller = await playAudio(audio, 24000);
|
|
735
|
+
*
|
|
736
|
+
* // Stop playback
|
|
737
|
+
* controller.stop();
|
|
738
|
+
* ```
|
|
739
|
+
*/
|
|
740
|
+
declare function playAudio(audio: Float32Array, sampleRate?: number): Promise<{
|
|
741
|
+
stop: () => void;
|
|
742
|
+
onEnded: Promise<void>;
|
|
743
|
+
}>;
|
|
744
|
+
/**
|
|
745
|
+
* Create a reusable audio player for streaming TTS
|
|
746
|
+
*
|
|
747
|
+
* @example
|
|
748
|
+
* ```ts
|
|
749
|
+
* import { createAudioPlayer } from "@tryhamster/gerbil/browser";
|
|
750
|
+
*
|
|
751
|
+
* const player = createAudioPlayer(24000);
|
|
752
|
+
*
|
|
753
|
+
* // Queue audio chunks as they arrive
|
|
754
|
+
* player.queue(chunk1);
|
|
755
|
+
* player.queue(chunk2);
|
|
756
|
+
*
|
|
757
|
+
* // Stop and clear
|
|
758
|
+
* player.stop();
|
|
759
|
+
* ```
|
|
760
|
+
*/
|
|
761
|
+
declare function createAudioPlayer(sampleRate?: number): {
|
|
762
|
+
queue: (audio: Float32Array) => void;
|
|
763
|
+
stop: () => void;
|
|
764
|
+
isPlaying: () => boolean;
|
|
765
|
+
};
|
|
766
|
+
/**
|
|
767
|
+
* Progress info for STT loading
|
|
768
|
+
*/
|
|
769
|
+
type STTProgress = {
|
|
770
|
+
status: "downloading" | "loading" | "ready" | "error";
|
|
771
|
+
message?: string;
|
|
772
|
+
progress?: number;
|
|
773
|
+
file?: string;
|
|
774
|
+
};
|
|
775
|
+
/**
|
|
776
|
+
* Options for useVoiceInput hook
|
|
777
|
+
*/
|
|
778
|
+
type UseVoiceInputOptions = {
|
|
779
|
+
/** STT model ID (default: whisper-tiny.en) */
|
|
780
|
+
model?: string;
|
|
781
|
+
/** Auto-load model on mount (default: false) */
|
|
782
|
+
autoLoad?: boolean;
|
|
783
|
+
/** Callback when model is ready */
|
|
784
|
+
onReady?: () => void;
|
|
785
|
+
/** Callback when transcription completes (or for each chunk in streaming mode) */
|
|
786
|
+
onTranscript?: (text: string) => void;
|
|
787
|
+
/** Callback on error */
|
|
788
|
+
onError?: (error: string) => void;
|
|
789
|
+
/** Callback during loading */
|
|
790
|
+
onProgress?: (progress: STTProgress) => void;
|
|
791
|
+
/** Enable streaming transcription - transcribes audio in chunks as you speak */
|
|
792
|
+
streaming?: boolean;
|
|
793
|
+
/** Chunk duration in ms for streaming mode (default: 3000 = 3 seconds) */
|
|
794
|
+
chunkDuration?: number;
|
|
795
|
+
/** Callback for each streaming chunk with partial transcript */
|
|
796
|
+
onChunk?: (text: string, chunkIndex: number) => void;
|
|
797
|
+
};
|
|
798
|
+
/**
|
|
799
|
+
* Return type for useVoiceInput hook
|
|
800
|
+
*/
|
|
801
|
+
type UseVoiceInputReturn = {
|
|
802
|
+
/** Start recording audio */
|
|
803
|
+
startRecording: () => Promise<void>;
|
|
804
|
+
/** Stop recording and transcribe */
|
|
805
|
+
stopRecording: () => Promise<string>;
|
|
806
|
+
/** Cancel recording without transcribing */
|
|
807
|
+
cancelRecording: () => void;
|
|
808
|
+
/** Transcribe raw audio data (Float32Array at 16kHz) */
|
|
809
|
+
transcribe: (audio: Float32Array) => Promise<string>;
|
|
810
|
+
/** Whether currently recording */
|
|
811
|
+
isRecording: boolean;
|
|
812
|
+
/** Whether transcribing */
|
|
813
|
+
isTranscribing: boolean;
|
|
814
|
+
/** Whether model is loading */
|
|
815
|
+
isLoading: boolean;
|
|
816
|
+
/** Whether model is ready */
|
|
817
|
+
isReady: boolean;
|
|
818
|
+
/** Latest transcription result (full transcript in streaming mode) */
|
|
819
|
+
transcript: string;
|
|
820
|
+
/** Current streaming chunk being transcribed (streaming mode only) */
|
|
821
|
+
streamingChunk: string;
|
|
822
|
+
/** Number of chunks transcribed so far (streaming mode only) */
|
|
823
|
+
chunkCount: number;
|
|
824
|
+
/** Loading progress */
|
|
825
|
+
loadingProgress: STTProgress | null;
|
|
826
|
+
/** Error message */
|
|
827
|
+
error: string | null;
|
|
828
|
+
/** Manually load the model */
|
|
829
|
+
load: () => void;
|
|
830
|
+
};
|
|
831
|
+
/**
|
|
832
|
+
* React hook for voice input with browser microphone
|
|
833
|
+
*
|
|
834
|
+
* Uses MediaRecorder to capture audio and Whisper for transcription.
|
|
835
|
+
* Supports both one-shot and streaming transcription modes.
|
|
836
|
+
*
|
|
837
|
+
* @example Basic usage (one-shot)
|
|
838
|
+
* ```tsx
|
|
839
|
+
* function VoiceInput() {
|
|
840
|
+
* const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
|
|
841
|
+
* onTranscript: (text) => console.log("User said:", text),
|
|
842
|
+
* });
|
|
843
|
+
*
|
|
844
|
+
* return (
|
|
845
|
+
* <button onClick={isRecording ? stopRecording : startRecording}>
|
|
846
|
+
* {isRecording ? "Stop" : "Record"}
|
|
847
|
+
* </button>
|
|
848
|
+
* );
|
|
849
|
+
* }
|
|
850
|
+
* ```
|
|
851
|
+
*
|
|
852
|
+
* @example Streaming transcription (real-time)
|
|
853
|
+
* ```tsx
|
|
854
|
+
* function LiveTranscription() {
|
|
855
|
+
* const { startRecording, stopRecording, isRecording, transcript, streamingChunk } = useVoiceInput({
|
|
856
|
+
* streaming: true, // Enable streaming mode
|
|
857
|
+
* chunkDuration: 1500, // Transcribe every 1.5 seconds (default)
|
|
858
|
+
* onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
|
|
859
|
+
* });
|
|
860
|
+
*
|
|
861
|
+
* return (
|
|
862
|
+
* <div>
|
|
863
|
+
* <button onClick={isRecording ? stopRecording : startRecording}>
|
|
864
|
+
* {isRecording ? "Stop" : "Start Live Transcription"}
|
|
865
|
+
* </button>
|
|
866
|
+
* <p>Current chunk: {streamingChunk}</p>
|
|
867
|
+
* <p>Full transcript: {transcript}</p>
|
|
868
|
+
* </div>
|
|
869
|
+
* );
|
|
870
|
+
* }
|
|
871
|
+
* ```
|
|
872
|
+
*/
|
|
873
|
+
declare function useVoiceInput(options?: UseVoiceInputOptions): UseVoiceInputReturn;
|
|
874
|
+
/**
|
|
875
|
+
* Options for useVoiceChat hook
|
|
876
|
+
*/
|
|
877
|
+
type UseVoiceChatOptions = {
|
|
878
|
+
/** LLM model ID (default: qwen3-0.6b) */
|
|
879
|
+
llmModel?: string;
|
|
880
|
+
/** STT model ID (default: whisper-tiny.en) */
|
|
881
|
+
sttModel?: string;
|
|
882
|
+
/** TTS model ID (default: kokoro-82m, also supports supertonic-66m) */
|
|
883
|
+
ttsModel?: TTSModelId;
|
|
884
|
+
/** System prompt for LLM */
|
|
885
|
+
system?: string;
|
|
886
|
+
/** Enable thinking mode (default: false) */
|
|
887
|
+
thinking?: boolean;
|
|
888
|
+
/** TTS voice ID (default: model's default voice) */
|
|
889
|
+
voice?: string;
|
|
890
|
+
/** TTS speech speed (default: 1.0) */
|
|
891
|
+
speed?: number;
|
|
892
|
+
/** Auto-load all models on mount (default: false) */
|
|
893
|
+
autoLoad?: boolean;
|
|
894
|
+
/** Callback when user speaks */
|
|
895
|
+
onUserSpeak?: (text: string) => void;
|
|
896
|
+
/** Callback when assistant responds */
|
|
897
|
+
onAssistantSpeak?: (text: string) => void;
|
|
898
|
+
/** Callback on error */
|
|
899
|
+
onError?: (error: string) => void;
|
|
900
|
+
};
|
|
901
|
+
/**
|
|
902
|
+
* Message in voice chat
|
|
903
|
+
*/
|
|
904
|
+
type VoiceChatMessage = {
|
|
905
|
+
id: string;
|
|
906
|
+
role: "user" | "assistant";
|
|
907
|
+
content: string;
|
|
908
|
+
thinking?: string;
|
|
909
|
+
audioUrl?: string;
|
|
910
|
+
};
|
|
911
|
+
/**
|
|
912
|
+
* Return type for useVoiceChat hook
|
|
913
|
+
*/
|
|
914
|
+
type UseVoiceChatReturn = {
|
|
915
|
+
/** Messages in the conversation */
|
|
916
|
+
messages: VoiceChatMessage[];
|
|
917
|
+
/** Start recording user speech */
|
|
918
|
+
startListening: () => Promise<void>;
|
|
919
|
+
/** Stop recording and process (STT → LLM → TTS) */
|
|
920
|
+
stopListening: () => Promise<void>;
|
|
921
|
+
/** Cancel current operation */
|
|
922
|
+
cancel: () => void;
|
|
923
|
+
/** Clear conversation history */
|
|
924
|
+
clear: () => void;
|
|
925
|
+
/** Whether recording user speech */
|
|
926
|
+
isListening: boolean;
|
|
927
|
+
/** Whether processing (STT/LLM/TTS) */
|
|
928
|
+
isProcessing: boolean;
|
|
929
|
+
/** Whether assistant is speaking */
|
|
930
|
+
isSpeaking: boolean;
|
|
931
|
+
/** Current stage: idle, listening, transcribing, thinking, speaking */
|
|
932
|
+
stage: "idle" | "listening" | "transcribing" | "thinking" | "speaking";
|
|
933
|
+
/** Whether all models are loaded */
|
|
934
|
+
isReady: boolean;
|
|
935
|
+
/** Whether loading models */
|
|
936
|
+
isLoading: boolean;
|
|
937
|
+
/** Loading progress message */
|
|
938
|
+
loadingMessage: string;
|
|
939
|
+
/** Error message */
|
|
940
|
+
error: string | null;
|
|
941
|
+
/** Manually load all models */
|
|
942
|
+
load: () => void;
|
|
943
|
+
};
|
|
944
|
+
/**
|
|
945
|
+
* React hook for voice conversation with STT + LLM + TTS
|
|
946
|
+
*
|
|
947
|
+
* Complete voice-to-voice conversation loop:
|
|
948
|
+
* 1. User presses button to speak
|
|
949
|
+
* 2. Speech is transcribed (Whisper)
|
|
950
|
+
* 3. LLM generates response
|
|
951
|
+
* 4. Response is spoken aloud (Kokoro or Supertonic TTS)
|
|
952
|
+
*
|
|
953
|
+
* @example
|
|
954
|
+
* ```tsx
|
|
955
|
+
* function VoiceChat() {
|
|
956
|
+
* const {
|
|
957
|
+
* messages,
|
|
958
|
+
* startListening,
|
|
959
|
+
* stopListening,
|
|
960
|
+
* isListening,
|
|
961
|
+
* isSpeaking,
|
|
962
|
+
* stage,
|
|
963
|
+
* } = useVoiceChat({
|
|
964
|
+
* system: "You are a helpful voice assistant.",
|
|
965
|
+
* voice: "af_bella",
|
|
966
|
+
* // Or use Supertonic for faster synthesis:
|
|
967
|
+
* // ttsModel: "supertonic-66m",
|
|
968
|
+
* // voice: "F1",
|
|
969
|
+
* });
|
|
970
|
+
*
|
|
971
|
+
* return (
|
|
972
|
+
* <div>
|
|
973
|
+
* {messages.map(m => (
|
|
974
|
+
* <div key={m.id}>{m.role}: {m.content}</div>
|
|
975
|
+
* ))}
|
|
976
|
+
* <button
|
|
977
|
+
* onMouseDown={startListening}
|
|
978
|
+
* onMouseUp={stopListening}
|
|
979
|
+
* >
|
|
980
|
+
* {stage === "idle" ? "🎤 Hold to Speak" : stage}
|
|
981
|
+
* </button>
|
|
982
|
+
* </div>
|
|
983
|
+
* );
|
|
984
|
+
* }
|
|
985
|
+
* ```
|
|
986
|
+
*/
|
|
987
|
+
declare function useVoiceChat(options?: UseVoiceChatOptions): UseVoiceChatReturn;
|
|
988
|
+
/**
|
|
989
|
+
* Check if WebGPU is supported
|
|
990
|
+
*/
|
|
991
|
+
declare function isWebGPUSupported(): boolean;
|
|
992
|
+
/**
|
|
993
|
+
* Get WebGPU adapter info
|
|
994
|
+
*/
|
|
995
|
+
declare function getWebGPUInfo(): Promise<{
|
|
996
|
+
supported: boolean;
|
|
997
|
+
adapter?: string;
|
|
998
|
+
device?: string;
|
|
999
|
+
} | null>;
|
|
1000
|
+
declare const _default: {
|
|
1001
|
+
isWebGPUSupported: typeof isWebGPUSupported;
|
|
1002
|
+
getWebGPUInfo: typeof getWebGPUInfo;
|
|
1003
|
+
createGerbilWorker: typeof createGerbilWorker;
|
|
1004
|
+
playAudio: typeof playAudio;
|
|
1005
|
+
createAudioPlayer: typeof createAudioPlayer;
|
|
1006
|
+
};
|
|
1007
|
+
//#endregion
|
|
1008
|
+
export { AudioChunk, BUILTIN_MODELS, BrowserVoiceInfo, CacheConfig, CompleteOptions, EmbedOptions, EmbedResult, FallbackConfig, GenerateOptions, GenerateResult, GenerateStreamOptions, GerbilConfig, GerbilModelSettings, GerbilProviderSettings, GerbilWorker, GerbilWorkerOptions, ImageInput, JsonOptions, LoadOptions, LoadSTTOptions, LoadTTSOptions, LoadingProgress, Message, ModelConfig, ModelSource, ModelStats, ProgressInfo, STTModelConfig, STTProgress, SessionStats, SpeakOptions, SpeakResult, StreamingTranscriptionOptions, StreamingTranscriptionSession, SystemInfo, TTSModelConfig, TTSModelId, TTSProgress, TranscribeOptions, TranscribeResult, TranscribeSegment, UseChatOptions, UseChatReturn, UseCompletionOptions, UseCompletionReturn, UseSpeechOptions, UseSpeechReturn, UseVoiceChatOptions, UseVoiceChatReturn, UseVoiceInputOptions, UseVoiceInputReturn, VoiceChatMessage, VoiceInfo, WorkerComplete, WorkerProgress, WorkerToken, createAudioPlayer, createGerbilWorker, _default as default, getWebGPUInfo, isWebGPUSupported, playAudio, useChat, useCompletion, useSpeech, useVoiceChat, useVoiceInput };
|
|
1009
|
+
//# sourceMappingURL=index.d.ts.map
|