@tryhamster/gerbil 1.0.0-rc.1 → 1.0.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/dist/browser/{index.d.mts → index.d.ts} +354 -3
  2. package/dist/browser/index.d.ts.map +1 -0
  3. package/dist/browser/{index.mjs → index.js} +119 -8
  4. package/dist/browser/index.js.map +1 -0
  5. package/dist/{chrome-backend-Y9F7W5VQ.mjs → chrome-backend-CORwaIyC.mjs} +1 -1
  6. package/dist/{chrome-backend-Y9F7W5VQ.mjs.map → chrome-backend-CORwaIyC.mjs.map} +1 -1
  7. package/dist/{chrome-backend-JEPeM2YE.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
  8. package/dist/cli.mjs +14 -15
  9. package/dist/cli.mjs.map +1 -1
  10. package/dist/frameworks/express.d.mts +1 -1
  11. package/dist/frameworks/express.mjs +3 -4
  12. package/dist/frameworks/express.mjs.map +1 -1
  13. package/dist/frameworks/fastify.d.mts +1 -1
  14. package/dist/frameworks/fastify.mjs +2 -3
  15. package/dist/frameworks/fastify.mjs.map +1 -1
  16. package/dist/frameworks/hono.d.mts +1 -1
  17. package/dist/frameworks/hono.mjs +2 -3
  18. package/dist/frameworks/hono.mjs.map +1 -1
  19. package/dist/frameworks/next.d.mts +2 -2
  20. package/dist/frameworks/next.mjs +2 -3
  21. package/dist/frameworks/next.mjs.map +1 -1
  22. package/dist/frameworks/react.d.mts +1 -1
  23. package/dist/frameworks/trpc.d.mts +1 -1
  24. package/dist/frameworks/trpc.mjs +2 -3
  25. package/dist/frameworks/trpc.mjs.map +1 -1
  26. package/dist/gerbil-DJGqq7BX.mjs +4 -0
  27. package/dist/{gerbil-yoSpRHgv.mjs → gerbil-DoDGHe6Z.mjs} +187 -19
  28. package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
  29. package/dist/{gerbil-POAz8peb.d.mts → gerbil-qOTe1nl2.d.mts} +2 -2
  30. package/dist/{gerbil-POAz8peb.d.mts.map → gerbil-qOTe1nl2.d.mts.map} +1 -1
  31. package/dist/index.d.mts +19 -3
  32. package/dist/index.d.mts.map +1 -1
  33. package/dist/index.mjs +6 -7
  34. package/dist/index.mjs.map +1 -1
  35. package/dist/integrations/ai-sdk.d.mts +1 -1
  36. package/dist/integrations/ai-sdk.mjs +4 -5
  37. package/dist/integrations/ai-sdk.mjs.map +1 -1
  38. package/dist/integrations/langchain.d.mts +1 -1
  39. package/dist/integrations/langchain.mjs +2 -3
  40. package/dist/integrations/langchain.mjs.map +1 -1
  41. package/dist/integrations/llamaindex.d.mts +1 -1
  42. package/dist/integrations/llamaindex.mjs +2 -3
  43. package/dist/integrations/llamaindex.mjs.map +1 -1
  44. package/dist/integrations/mcp-client.mjs +2 -2
  45. package/dist/integrations/mcp.d.mts +2 -2
  46. package/dist/integrations/mcp.mjs +5 -6
  47. package/dist/kokoro-BNTb6egA.mjs +20210 -0
  48. package/dist/kokoro-BNTb6egA.mjs.map +1 -0
  49. package/dist/kokoro-CMOGDSgT.js +20212 -0
  50. package/dist/kokoro-CMOGDSgT.js.map +1 -0
  51. package/dist/{mcp-Bitg4sjX.mjs → mcp-kzDDWIoS.mjs} +3 -3
  52. package/dist/{mcp-Bitg4sjX.mjs.map → mcp-kzDDWIoS.mjs.map} +1 -1
  53. package/dist/{one-liner-B1rmFto6.mjs → one-liner-DxnNs_JK.mjs} +2 -2
  54. package/dist/{one-liner-B1rmFto6.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
  55. package/dist/repl-DGUw4fCc.mjs +9 -0
  56. package/dist/skills/index.d.mts +24 -24
  57. package/dist/skills/index.d.mts.map +1 -1
  58. package/dist/skills/index.mjs +4 -5
  59. package/dist/{skills-5DxAV-rn.mjs → skills-DulrOPeP.mjs} +12 -12
  60. package/dist/skills-DulrOPeP.mjs.map +1 -0
  61. package/dist/stt-1WIefHwc.mjs +3 -0
  62. package/dist/{stt-Bv_dum-R.mjs → stt-CG_7KB_0.mjs} +3 -2
  63. package/dist/stt-CG_7KB_0.mjs.map +1 -0
  64. package/dist/stt-Dne6SENv.js +434 -0
  65. package/dist/stt-Dne6SENv.js.map +1 -0
  66. package/dist/{tools-IYPrqoek.mjs → tools-Bi1P7Xoy.mjs} +2 -2
  67. package/dist/{tools-IYPrqoek.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
  68. package/dist/transformers.web-DiD1gTwk.js +44695 -0
  69. package/dist/transformers.web-DiD1gTwk.js.map +1 -0
  70. package/dist/transformers.web-u34VxRFM.js +3 -0
  71. package/dist/{tts-5yWeP_I0.mjs → tts-B1pZMlDv.mjs} +1 -1
  72. package/dist/tts-C2FzKuSx.js +725 -0
  73. package/dist/tts-C2FzKuSx.js.map +1 -0
  74. package/dist/{tts-DG6denWG.mjs → tts-CyHhcLtN.mjs} +6 -4
  75. package/dist/tts-CyHhcLtN.mjs.map +1 -0
  76. package/dist/{types-s6Py2_DL.d.mts → types-CiTc7ez3.d.mts} +1 -1
  77. package/dist/{types-s6Py2_DL.d.mts.map → types-CiTc7ez3.d.mts.map} +1 -1
  78. package/dist/{utils-CkB4Roi6.mjs → utils-CZBZ8dgR.mjs} +1 -1
  79. package/dist/{utils-CkB4Roi6.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
  80. package/package.json +6 -6
  81. package/dist/browser/index.d.mts.map +0 -1
  82. package/dist/browser/index.mjs.map +0 -1
  83. package/dist/gerbil-DeQlX_Mt.mjs +0 -5
  84. package/dist/gerbil-yoSpRHgv.mjs.map +0 -1
  85. package/dist/models-BAtL8qsA.mjs +0 -171
  86. package/dist/models-BAtL8qsA.mjs.map +0 -1
  87. package/dist/models-CE0fBq0U.d.mts +0 -22
  88. package/dist/models-CE0fBq0U.d.mts.map +0 -1
  89. package/dist/repl-D20JO260.mjs +0 -10
  90. package/dist/skills-5DxAV-rn.mjs.map +0 -1
  91. package/dist/stt-Bv_dum-R.mjs.map +0 -1
  92. package/dist/stt-KzSoNvwI.mjs +0 -3
  93. package/dist/tts-DG6denWG.mjs.map +0 -1
  94. /package/dist/{auto-update-DsWBBnEk.mjs → auto-update-S9s5-g0C.mjs} +0 -0
  95. /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
  96. /package/dist/{microphone-D-6y9aiE.mjs → microphone-DaMZFRuR.mjs} +0 -0
@@ -0,0 +1,725 @@
1
+ //#region src/core/tts.ts
2
+ const SENTENCE_SPLIT_REGEX = /(?<=[.!?])\s+/;
3
+ /**
4
+ * Kokoro voice definitions
5
+ * Voice IDs follow pattern: {language}{gender}_{name}
6
+ * - a = American English
7
+ * - b = British English
8
+ * - f = female, m = male
9
+ */
10
+ const KOKORO_VOICES = [
11
+ {
12
+ id: "af_heart",
13
+ name: "Heart",
14
+ gender: "female",
15
+ language: "en-us",
16
+ description: "American female, highest quality voice (Grade A)",
17
+ embeddingFile: "voices/af_heart.bin"
18
+ },
19
+ {
20
+ id: "af_bella",
21
+ name: "Bella",
22
+ gender: "female",
23
+ language: "en-us",
24
+ description: "American female, warm and friendly (Grade A-)",
25
+ embeddingFile: "voices/af_bella.bin"
26
+ },
27
+ {
28
+ id: "af_nicole",
29
+ name: "Nicole",
30
+ gender: "female",
31
+ language: "en-us",
32
+ description: "American female, soft and gentle (Grade B-)",
33
+ embeddingFile: "voices/af_nicole.bin"
34
+ },
35
+ {
36
+ id: "af_sarah",
37
+ name: "Sarah",
38
+ gender: "female",
39
+ language: "en-us",
40
+ description: "American female, clear and professional (Grade C+)",
41
+ embeddingFile: "voices/af_sarah.bin"
42
+ },
43
+ {
44
+ id: "af_sky",
45
+ name: "Sky",
46
+ gender: "female",
47
+ language: "en-us",
48
+ description: "American female, young and energetic (Grade C-)",
49
+ embeddingFile: "voices/af_sky.bin"
50
+ },
51
+ {
52
+ id: "af_alloy",
53
+ name: "Alloy",
54
+ gender: "female",
55
+ language: "en-us",
56
+ description: "American female (Grade C)",
57
+ embeddingFile: "voices/af_alloy.bin"
58
+ },
59
+ {
60
+ id: "af_aoede",
61
+ name: "Aoede",
62
+ gender: "female",
63
+ language: "en-us",
64
+ description: "American female (Grade C+)",
65
+ embeddingFile: "voices/af_aoede.bin"
66
+ },
67
+ {
68
+ id: "af_kore",
69
+ name: "Kore",
70
+ gender: "female",
71
+ language: "en-us",
72
+ description: "American female (Grade C+)",
73
+ embeddingFile: "voices/af_kore.bin"
74
+ },
75
+ {
76
+ id: "af_nova",
77
+ name: "Nova",
78
+ gender: "female",
79
+ language: "en-us",
80
+ description: "American female (Grade C)",
81
+ embeddingFile: "voices/af_nova.bin"
82
+ },
83
+ {
84
+ id: "af_river",
85
+ name: "River",
86
+ gender: "female",
87
+ language: "en-us",
88
+ description: "American female (Grade D)",
89
+ embeddingFile: "voices/af_river.bin"
90
+ },
91
+ {
92
+ id: "af_jessica",
93
+ name: "Jessica",
94
+ gender: "female",
95
+ language: "en-us",
96
+ description: "American female (Grade D)",
97
+ embeddingFile: "voices/af_jessica.bin"
98
+ },
99
+ {
100
+ id: "am_fenrir",
101
+ name: "Fenrir",
102
+ gender: "male",
103
+ language: "en-us",
104
+ description: "American male, best quality (Grade C+)",
105
+ embeddingFile: "voices/am_fenrir.bin"
106
+ },
107
+ {
108
+ id: "am_michael",
109
+ name: "Michael",
110
+ gender: "male",
111
+ language: "en-us",
112
+ description: "American male, warm and friendly (Grade C+)",
113
+ embeddingFile: "voices/am_michael.bin"
114
+ },
115
+ {
116
+ id: "am_puck",
117
+ name: "Puck",
118
+ gender: "male",
119
+ language: "en-us",
120
+ description: "American male (Grade C+)",
121
+ embeddingFile: "voices/am_puck.bin"
122
+ },
123
+ {
124
+ id: "am_adam",
125
+ name: "Adam",
126
+ gender: "male",
127
+ language: "en-us",
128
+ description: "American male, deep voice (Grade F+)",
129
+ embeddingFile: "voices/am_adam.bin"
130
+ },
131
+ {
132
+ id: "am_echo",
133
+ name: "Echo",
134
+ gender: "male",
135
+ language: "en-us",
136
+ description: "American male (Grade D)",
137
+ embeddingFile: "voices/am_echo.bin"
138
+ },
139
+ {
140
+ id: "am_eric",
141
+ name: "Eric",
142
+ gender: "male",
143
+ language: "en-us",
144
+ description: "American male (Grade D)",
145
+ embeddingFile: "voices/am_eric.bin"
146
+ },
147
+ {
148
+ id: "am_liam",
149
+ name: "Liam",
150
+ gender: "male",
151
+ language: "en-us",
152
+ description: "American male (Grade D)",
153
+ embeddingFile: "voices/am_liam.bin"
154
+ },
155
+ {
156
+ id: "am_onyx",
157
+ name: "Onyx",
158
+ gender: "male",
159
+ language: "en-us",
160
+ description: "American male (Grade D)",
161
+ embeddingFile: "voices/am_onyx.bin"
162
+ },
163
+ {
164
+ id: "am_santa",
165
+ name: "Santa",
166
+ gender: "male",
167
+ language: "en-us",
168
+ description: "American male, festive (Grade D-)",
169
+ embeddingFile: "voices/am_santa.bin"
170
+ },
171
+ {
172
+ id: "bf_emma",
173
+ name: "Emma",
174
+ gender: "female",
175
+ language: "en-gb",
176
+ description: "British female, elegant and clear (Grade B-)",
177
+ embeddingFile: "voices/bf_emma.bin"
178
+ },
179
+ {
180
+ id: "bf_isabella",
181
+ name: "Isabella",
182
+ gender: "female",
183
+ language: "en-gb",
184
+ description: "British female, sophisticated (Grade C)",
185
+ embeddingFile: "voices/bf_isabella.bin"
186
+ },
187
+ {
188
+ id: "bf_alice",
189
+ name: "Alice",
190
+ gender: "female",
191
+ language: "en-gb",
192
+ description: "British female (Grade D)",
193
+ embeddingFile: "voices/bf_alice.bin"
194
+ },
195
+ {
196
+ id: "bf_lily",
197
+ name: "Lily",
198
+ gender: "female",
199
+ language: "en-gb",
200
+ description: "British female (Grade D)",
201
+ embeddingFile: "voices/bf_lily.bin"
202
+ },
203
+ {
204
+ id: "bm_george",
205
+ name: "George",
206
+ gender: "male",
207
+ language: "en-gb",
208
+ description: "British male, distinguished (Grade C)",
209
+ embeddingFile: "voices/bm_george.bin"
210
+ },
211
+ {
212
+ id: "bm_fable",
213
+ name: "Fable",
214
+ gender: "male",
215
+ language: "en-gb",
216
+ description: "British male (Grade C)",
217
+ embeddingFile: "voices/bm_fable.bin"
218
+ },
219
+ {
220
+ id: "bm_lewis",
221
+ name: "Lewis",
222
+ gender: "male",
223
+ language: "en-gb",
224
+ description: "British male, friendly (Grade D+)",
225
+ embeddingFile: "voices/bm_lewis.bin"
226
+ },
227
+ {
228
+ id: "bm_daniel",
229
+ name: "Daniel",
230
+ gender: "male",
231
+ language: "en-gb",
232
+ description: "British male (Grade D)",
233
+ embeddingFile: "voices/bm_daniel.bin"
234
+ }
235
+ ];
236
+ /**
237
+ * Supertonic voice definitions
238
+ * 4 built-in voices: F1, F2 (female), M1, M2 (male)
239
+ */
240
+ const SUPERTONIC_VOICES = [
241
+ {
242
+ id: "F1",
243
+ name: "Female 1",
244
+ gender: "female",
245
+ language: "en",
246
+ description: "Female voice 1 - Clear and natural",
247
+ embeddingFile: "voices/F1.bin"
248
+ },
249
+ {
250
+ id: "F2",
251
+ name: "Female 2",
252
+ gender: "female",
253
+ language: "en",
254
+ description: "Female voice 2 - Warm and expressive",
255
+ embeddingFile: "voices/F2.bin"
256
+ },
257
+ {
258
+ id: "M1",
259
+ name: "Male 1",
260
+ gender: "male",
261
+ language: "en",
262
+ description: "Male voice 1 - Deep and confident",
263
+ embeddingFile: "voices/M1.bin"
264
+ },
265
+ {
266
+ id: "M2",
267
+ name: "Male 2",
268
+ gender: "male",
269
+ language: "en",
270
+ description: "Male voice 2 - Friendly and casual",
271
+ embeddingFile: "voices/M2.bin"
272
+ }
273
+ ];
274
+ const TTS_MODELS = {
275
+ "kokoro-82m": {
276
+ id: "kokoro-82m",
277
+ repo: "onnx-community/Kokoro-82M-v1.0-ONNX",
278
+ description: "Kokoro 82M - High-quality multilingual TTS",
279
+ size: "~330MB",
280
+ sampleRate: 24e3,
281
+ voices: KOKORO_VOICES,
282
+ defaultVoice: "af_heart",
283
+ languages: ["en-us", "en-gb"]
284
+ },
285
+ "supertonic-66m": {
286
+ id: "supertonic-66m",
287
+ repo: "onnx-community/Supertonic-TTS-ONNX",
288
+ description: "Supertonic 66M - Fast on-device TTS (167x realtime)",
289
+ size: "~250MB",
290
+ sampleRate: 44100,
291
+ voices: SUPERTONIC_VOICES,
292
+ defaultVoice: "F1",
293
+ languages: ["en"]
294
+ }
295
+ };
296
+ /**
297
+ * Get TTS model config by ID
298
+ */
299
+ function getTTSModelConfig(modelId) {
300
+ return TTS_MODELS[modelId] || null;
301
+ }
302
+ /**
303
+ * Kokoro TTS - Local text-to-speech with voice selection
304
+ *
305
+ * Uses kokoro-js (official Kokoro library by xenova) for high-quality speech synthesis.
306
+ * Includes proper G2P (grapheme-to-phoneme) conversion for accurate pronunciation.
307
+ */
308
+ var KokoroTTS = class {
309
+ kokoroInstance = null;
310
+ modelConfig;
311
+ loadPromise = null;
312
+ _isLoaded = false;
313
+ _deviceMode = "cpu";
314
+ constructor(modelId = "kokoro-82m") {
315
+ const config = getTTSModelConfig(modelId);
316
+ if (!config) throw new Error(`Unknown TTS model: ${modelId}. Available: ${Object.keys(TTS_MODELS).join(", ")}`);
317
+ this.modelConfig = config;
318
+ }
319
+ /**
320
+ * Load the TTS model
321
+ *
322
+ * @example
323
+ * ```ts
324
+ * const tts = new KokoroTTS();
325
+ * await tts.load({
326
+ * onProgress: (p) => console.log(p.status, p.progress),
327
+ * device: "webgpu",
328
+ * });
329
+ * ```
330
+ */
331
+ async load(options = {}) {
332
+ if (this._isLoaded) return;
333
+ if (this.loadPromise) return this.loadPromise;
334
+ this.loadPromise = this._load(options);
335
+ await this.loadPromise;
336
+ }
337
+ async _load(options = {}) {
338
+ const { onProgress, device = "auto" } = options;
339
+ onProgress?.({ status: `Loading TTS model (${this.modelConfig.id})...` });
340
+ try {
341
+ const { KokoroTTS: KokoroJS } = await import("./kokoro-CMOGDSgT.js");
342
+ const isBrowser = typeof window !== "undefined";
343
+ let dtype = "fp32";
344
+ if (device === "webgpu" || device === "auto" && isBrowser && "gpu" in navigator) {
345
+ dtype = "fp16";
346
+ this._deviceMode = "webgpu";
347
+ } else {
348
+ dtype = "fp32";
349
+ this._deviceMode = "cpu";
350
+ }
351
+ onProgress?.({ status: `Loading model with ${dtype} precision...` });
352
+ this.kokoroInstance = await KokoroJS.from_pretrained(this.modelConfig.repo, {
353
+ dtype,
354
+ progress_callback: (progress) => {
355
+ if (progress.status === "progress" && progress.file) onProgress?.({
356
+ status: `Downloading ${progress.file}`,
357
+ progress: Math.round(progress.progress || 0),
358
+ file: progress.file
359
+ });
360
+ else if (progress.status === "ready") onProgress?.({ status: "Model ready" });
361
+ }
362
+ });
363
+ this._isLoaded = true;
364
+ onProgress?.({ status: `Ready (${this._deviceMode.toUpperCase()})!` });
365
+ } catch (error) {
366
+ this.loadPromise = null;
367
+ throw error;
368
+ }
369
+ }
370
+ /**
371
+ * Ensure model is loaded (lazy loading)
372
+ */
373
+ async ensureLoaded(options) {
374
+ if (!this._isLoaded) await this.load(options);
375
+ }
376
+ /**
377
+ * Get list of available voices
378
+ *
379
+ * @example
380
+ * ```ts
381
+ * const voices = tts.listVoices();
382
+ * // [{ id: "af_heart", name: "Heart", gender: "female", ... }, ...]
383
+ * ```
384
+ */
385
+ listVoices() {
386
+ return [...this.modelConfig.voices];
387
+ }
388
+ /**
389
+ * Get a specific voice by ID
390
+ */
391
+ getVoice(voiceId) {
392
+ return this.modelConfig.voices.find((v) => v.id === voiceId) || null;
393
+ }
394
+ /**
395
+ * Get voices by gender
396
+ */
397
+ getVoicesByGender(gender) {
398
+ return this.modelConfig.voices.filter((v) => v.gender === gender);
399
+ }
400
+ /**
401
+ * Get voices by language
402
+ */
403
+ getVoicesByLanguage(language) {
404
+ return this.modelConfig.voices.filter((v) => v.language === language || v.language.startsWith(language));
405
+ }
406
+ /**
407
+ * Generate speech from text
408
+ *
409
+ * @example
410
+ * ```ts
411
+ * const result = await tts.speak("Hello world", {
412
+ * voice: "af_heart",
413
+ * speed: 1.0,
414
+ * });
415
+ *
416
+ * // Play in browser
417
+ * const audioContext = new AudioContext();
418
+ * const buffer = audioContext.createBuffer(1, result.audio.length, result.sampleRate);
419
+ * buffer.copyToChannel(result.audio, 0);
420
+ * const source = audioContext.createBufferSource();
421
+ * source.buffer = buffer;
422
+ * source.connect(audioContext.destination);
423
+ * source.start();
424
+ * ```
425
+ */
426
+ async speak(text, options = {}) {
427
+ await this.ensureLoaded({ onProgress: options.onProgress });
428
+ const { voice = this.modelConfig.defaultVoice, speed = 1 } = options;
429
+ if (!this.getVoice(voice)) throw new Error(`Unknown voice: ${voice}. Use listVoices() to see available options.`);
430
+ if (!this.kokoroInstance) throw new Error("Model not loaded");
431
+ const startTime = performance.now();
432
+ const result = await this.kokoroInstance.generate(text, {
433
+ voice,
434
+ speed
435
+ });
436
+ const totalTime = performance.now() - startTime;
437
+ return {
438
+ audio: result.audio,
439
+ sampleRate: result.sampling_rate,
440
+ duration: result.audio.length / result.sampling_rate,
441
+ voice,
442
+ totalTime
443
+ };
444
+ }
445
+ /**
446
+ * Stream speech generation (yields audio chunks as they're generated)
447
+ *
448
+ * @example
449
+ * ```ts
450
+ * for await (const chunk of tts.speakStream("Long text...")) {
451
+ * // chunk.samples = Float32Array
452
+ * // chunk.sampleRate = 24000
453
+ * // chunk.isFinal = boolean
454
+ * playChunk(chunk);
455
+ * }
456
+ * ```
457
+ */
458
+ async *speakStream(text, options = {}) {
459
+ await this.ensureLoaded({ onProgress: options.onProgress });
460
+ const { voice = this.modelConfig.defaultVoice, speed = 1 } = options;
461
+ if (!this.getVoice(voice)) throw new Error(`Unknown voice: ${voice}. Use listVoices() to see available options.`);
462
+ if (!this.kokoroInstance) throw new Error("Model not loaded");
463
+ const startTime = performance.now();
464
+ const sentences = this.splitIntoSentences(text);
465
+ const allAudio = [];
466
+ let chunkIndex = 0;
467
+ let sampleRate = this.modelConfig.sampleRate;
468
+ for (let i = 0; i < sentences.length; i++) {
469
+ const sentence = sentences[i];
470
+ if (!sentence.trim()) continue;
471
+ const result = await this.kokoroInstance.generate(sentence, {
472
+ voice,
473
+ speed
474
+ });
475
+ sampleRate = result.sampling_rate;
476
+ allAudio.push(result.audio);
477
+ const chunk = {
478
+ samples: result.audio,
479
+ sampleRate: result.sampling_rate,
480
+ index: chunkIndex++,
481
+ isFinal: i === sentences.length - 1
482
+ };
483
+ yield chunk;
484
+ options.onAudioChunk?.(chunk);
485
+ }
486
+ const totalLength = allAudio.reduce((sum, arr) => sum + arr.length, 0);
487
+ const fullAudio = new Float32Array(totalLength);
488
+ let offset = 0;
489
+ for (const chunk of allAudio) {
490
+ fullAudio.set(chunk, offset);
491
+ offset += chunk.length;
492
+ }
493
+ const totalTime = performance.now() - startTime;
494
+ return {
495
+ audio: fullAudio,
496
+ sampleRate,
497
+ duration: fullAudio.length / sampleRate,
498
+ voice,
499
+ totalTime
500
+ };
501
+ }
502
+ /**
503
+ * Split text into sentences for streaming
504
+ */
505
+ splitIntoSentences(text) {
506
+ return text.split(SENTENCE_SPLIT_REGEX).filter((s) => s.trim());
507
+ }
508
+ /**
509
+ * Check if model is loaded
510
+ */
511
+ isLoaded() {
512
+ return this._isLoaded;
513
+ }
514
+ /**
515
+ * Get current device mode
516
+ */
517
+ getDeviceMode() {
518
+ return this._deviceMode;
519
+ }
520
+ /**
521
+ * Get model configuration
522
+ */
523
+ getModelInfo() {
524
+ return { ...this.modelConfig };
525
+ }
526
+ /**
527
+ * Get sample rate
528
+ */
529
+ getSampleRate() {
530
+ return this.modelConfig.sampleRate;
531
+ }
532
+ /**
533
+ * Dispose of resources
534
+ */
535
+ async dispose() {
536
+ this.kokoroInstance = null;
537
+ this._isLoaded = false;
538
+ this.loadPromise = null;
539
+ }
540
+ };
541
+ /**
542
+ * Supertonic TTS - Fast on-device text-to-speech
543
+ *
544
+ * Uses transformers.js with the Supertonic-TTS-ONNX model.
545
+ * Generates speech at 167x realtime with 66M parameters.
546
+ * Outputs at 44100 Hz sample rate.
547
+ */
548
+ var SupertonicTTS = class {
549
+ pipeline = null;
550
+ modelConfig;
551
+ loadPromise = null;
552
+ _isLoaded = false;
553
+ _deviceMode = "cpu";
554
+ voiceEmbeddings = /* @__PURE__ */ new Map();
555
+ constructor(modelId = "supertonic-66m") {
556
+ const config = getTTSModelConfig(modelId);
557
+ if (!config) throw new Error(`Unknown TTS model: ${modelId}. Available: ${Object.keys(TTS_MODELS).join(", ")}`);
558
+ this.modelConfig = config;
559
+ }
560
+ /**
561
+ * Load the TTS model
562
+ */
563
+ async load(options = {}) {
564
+ if (this._isLoaded) return;
565
+ if (this.loadPromise) return this.loadPromise;
566
+ this.loadPromise = this._load(options);
567
+ await this.loadPromise;
568
+ }
569
+ async _load(options = {}) {
570
+ const { onProgress, device = "auto" } = options;
571
+ onProgress?.({ status: `Loading TTS model (${this.modelConfig.id})...` });
572
+ try {
573
+ const isBrowser = typeof window !== "undefined";
574
+ const { pipeline, env } = await import("./transformers.web-u34VxRFM.js");
575
+ if (isBrowser && env.backends?.onnx?.wasm) env.backends.onnx.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.21.0/dist/";
576
+ if (device === "webgpu" || device === "auto" && isBrowser && "gpu" in navigator) this._deviceMode = "webgpu";
577
+ else this._deviceMode = "cpu";
578
+ onProgress?.({ status: `Loading Supertonic model...` });
579
+ this.pipeline = await pipeline("text-to-speech", this.modelConfig.repo, {
580
+ dtype: "fp32",
581
+ device: this._deviceMode,
582
+ progress_callback: (progress) => {
583
+ if (progress.status === "progress" && progress.file) onProgress?.({
584
+ status: `Downloading ${progress.file}`,
585
+ progress: Math.round(progress.progress || 0),
586
+ file: progress.file
587
+ });
588
+ }
589
+ });
590
+ onProgress?.({ status: "Loading voice embeddings..." });
591
+ await this.loadVoiceEmbeddings();
592
+ this._isLoaded = true;
593
+ onProgress?.({ status: `Ready (${this._deviceMode.toUpperCase()})!` });
594
+ } catch (error) {
595
+ this.loadPromise = null;
596
+ throw error;
597
+ }
598
+ }
599
+ /**
600
+ * Load speaker embeddings for all voices
601
+ * Supertonic uses 101x128 = 12,928 floats per voice
602
+ */
603
+ async loadVoiceEmbeddings() {}
604
+ async ensureLoaded(options) {
605
+ if (!this._isLoaded) await this.load(options);
606
+ }
607
+ listVoices() {
608
+ return [...this.modelConfig.voices];
609
+ }
610
+ getVoice(voiceId) {
611
+ return this.modelConfig.voices.find((v) => v.id === voiceId) || null;
612
+ }
613
+ getVoicesByGender(gender) {
614
+ return this.modelConfig.voices.filter((v) => v.gender === gender);
615
+ }
616
+ /**
617
+ * Generate speech from text
618
+ */
619
+ async speak(text, options = {}) {
620
+ await this.ensureLoaded({ onProgress: options.onProgress });
621
+ const { voice = this.modelConfig.defaultVoice } = options;
622
+ if (!this.getVoice(voice)) throw new Error(`Unknown voice: ${voice}. Use listVoices() to see available options.`);
623
+ if (!this.pipeline) throw new Error("Model not loaded");
624
+ const startTime = performance.now();
625
+ let speakerEmbedding = this.voiceEmbeddings.get(voice);
626
+ if (!speakerEmbedding) try {
627
+ const voiceUrl = `https://huggingface.co/${this.modelConfig.repo}/resolve/main/voices/${voice}.bin`;
628
+ const response = await fetch(voiceUrl);
629
+ if (response.ok) {
630
+ const buffer = await response.arrayBuffer();
631
+ speakerEmbedding = new Float32Array(buffer);
632
+ this.voiceEmbeddings.set(voice, speakerEmbedding);
633
+ } else throw new Error(`Failed to load voice: ${response.status}`);
634
+ } catch {
635
+ speakerEmbedding = new Float32Array(12928).fill(.1);
636
+ this.voiceEmbeddings.set(voice, speakerEmbedding);
637
+ }
638
+ const result = await this.pipeline(text, { speaker_embeddings: speakerEmbedding });
639
+ const totalTime = performance.now() - startTime;
640
+ const audio = result.audio;
641
+ const sampleRate = result.sampling_rate;
642
+ return {
643
+ audio,
644
+ sampleRate,
645
+ duration: audio.length / sampleRate,
646
+ voice,
647
+ totalTime
648
+ };
649
+ }
650
+ /**
651
+ * Stream speech generation
652
+ */
653
+ async *speakStream(text, options = {}) {
654
+ await this.ensureLoaded({ onProgress: options.onProgress });
655
+ const { voice = this.modelConfig.defaultVoice, speed = 1 } = options;
656
+ if (!this.getVoice(voice)) throw new Error(`Unknown voice: ${voice}. Use listVoices() to see available options.`);
657
+ const startTime = performance.now();
658
+ const sentences = text.split(SENTENCE_SPLIT_REGEX).filter((s) => s.trim());
659
+ const allAudio = [];
660
+ let chunkIndex = 0;
661
+ let sampleRate = this.modelConfig.sampleRate;
662
+ for (let i = 0; i < sentences.length; i++) {
663
+ const sentence = sentences[i];
664
+ if (!sentence.trim()) continue;
665
+ const result = await this.speak(sentence, {
666
+ voice,
667
+ speed
668
+ });
669
+ sampleRate = result.sampleRate;
670
+ allAudio.push(result.audio);
671
+ const chunk = {
672
+ samples: result.audio,
673
+ sampleRate: result.sampleRate,
674
+ index: chunkIndex++,
675
+ isFinal: i === sentences.length - 1
676
+ };
677
+ yield chunk;
678
+ options.onAudioChunk?.(chunk);
679
+ }
680
+ const totalLength = allAudio.reduce((sum, arr) => sum + arr.length, 0);
681
+ const fullAudio = new Float32Array(totalLength);
682
+ let offset = 0;
683
+ for (const chunk of allAudio) {
684
+ fullAudio.set(chunk, offset);
685
+ offset += chunk.length;
686
+ }
687
+ const totalTime = performance.now() - startTime;
688
+ return {
689
+ audio: fullAudio,
690
+ sampleRate,
691
+ duration: fullAudio.length / sampleRate,
692
+ voice,
693
+ totalTime
694
+ };
695
+ }
696
+ isLoaded() {
697
+ return this._isLoaded;
698
+ }
699
+ getDeviceMode() {
700
+ return this._deviceMode;
701
+ }
702
+ getModelInfo() {
703
+ return { ...this.modelConfig };
704
+ }
705
+ getSampleRate() {
706
+ return this.modelConfig.sampleRate;
707
+ }
708
+ async dispose() {
709
+ this.pipeline = null;
710
+ this.voiceEmbeddings.clear();
711
+ this._isLoaded = false;
712
+ this.loadPromise = null;
713
+ }
714
+ };
715
+ /**
716
+ * Create a TTS instance based on model ID
717
+ */
718
+ function createTTS(modelId = "kokoro-82m") {
719
+ if (modelId.startsWith("supertonic")) return new SupertonicTTS(modelId);
720
+ return new KokoroTTS(modelId);
721
+ }
722
+
723
+ //#endregion
724
+ export { createTTS };
725
+ //# sourceMappingURL=tts-C2FzKuSx.js.map