agentgui 1.0.168 → 1.0.170

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { createRequire } from 'module';
2
2
  import fs from 'fs';
3
+ import os from 'os';
3
4
  import path from 'path';
4
5
  import { fileURLToPath } from 'url';
5
6
 
@@ -7,19 +8,74 @@ const require = createRequire(import.meta.url);
7
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
9
  const ROOT = path.dirname(__dirname);
9
10
  const DATA_DIR = path.join(ROOT, 'data');
11
+ const VOICES_DIR = path.join(ROOT, 'voices');
12
+ const HOME_VOICES_DIR = path.join(os.homedir(), 'voices');
13
+ const AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.flac', '.m4a'];
14
+ const MIN_WAV_SIZE = 1000;
10
15
 
11
- const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
16
+ const BASE_VOICES = [
17
+ { id: 'default', name: 'Default', gender: 'male', accent: 'US' },
18
+ { id: 'bdl', name: 'BDL', gender: 'male', accent: 'US' },
19
+ { id: 'slt', name: 'SLT', gender: 'female', accent: 'US' },
20
+ { id: 'clb', name: 'CLB', gender: 'female', accent: 'US' },
21
+ { id: 'rms', name: 'RMS', gender: 'male', accent: 'US' },
22
+ { id: 'awb', name: 'AWB', gender: 'male', accent: 'Scottish' },
23
+ { id: 'jmk', name: 'JMK', gender: 'male', accent: 'Canadian' },
24
+ { id: 'ksp', name: 'KSP', gender: 'male', accent: 'Indian' },
25
+ ];
26
+
27
+ function scanVoiceDir(dir) {
28
+ const voices = [];
29
+ try {
30
+ if (!fs.existsSync(dir)) return voices;
31
+ for (const file of fs.readdirSync(dir)) {
32
+ const ext = path.extname(file).toLowerCase();
33
+ if (!AUDIO_EXTENSIONS.includes(ext)) continue;
34
+ const baseName = path.basename(file, ext);
35
+ const id = 'custom_' + baseName.replace(/[^a-zA-Z0-9_-]/g, '_');
36
+ const name = baseName.replace(/_/g, ' ');
37
+ voices.push({ id, name, gender: 'custom', accent: 'custom', isCustom: true, sourceDir: dir });
38
+ }
39
+ } catch (err) {
40
+ console.error('[VOICES] Error scanning', dir + ':', err.message);
41
+ }
42
+ return voices;
43
+ }
44
+
45
+ function loadCustomVoices() {
46
+ const seen = new Set();
47
+ const voices = [];
48
+ for (const dir of [VOICES_DIR, HOME_VOICES_DIR]) {
49
+ for (const v of scanVoiceDir(dir)) {
50
+ if (seen.has(v.id)) continue;
51
+ seen.add(v.id);
52
+ voices.push(v);
53
+ }
54
+ }
55
+ return voices;
56
+ }
57
+
58
+ function getVoices() {
59
+ return [...BASE_VOICES, ...loadCustomVoices()];
60
+ }
61
+
62
+ const SPEAKER_OFFSETS = { awb: 0, bdl: 1200, clb: 2300, jmk: 3500, ksp: 4700, rms: 5900, slt: 7100 };
63
+ const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/speaker_embeddings/resolve/main/spkrec-xvectors-voxceleb.hf';
12
64
  const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
13
- const SAMPLE_RATE_TTS = 16000;
14
- const SAMPLE_RATE_STT = 16000;
15
- const MIN_WAV_SIZE = 44;
65
+ const DATASET_API = 'https://datasets-server.huggingface.co/rows?dataset=Xenova%2Fspeaker_embeddings&config=default&split=train';
66
+ const SAMPLES_TO_AVERAGE = 30;
16
67
 
17
68
  let transformersModule = null;
18
69
  let sttPipeline = null;
19
70
  let ttsPipeline = null;
20
71
  let speakerEmbeddings = null;
72
+ let speakerEmbeddingPipeline = null;
21
73
  let sttLoading = false;
22
74
  let ttsLoading = false;
75
+ let speakerEmbeddingLoading = false;
76
+ const voiceEmbeddingsCache = new Map();
77
+ const SAMPLE_RATE_STT = 16000;
78
+ const SAMPLE_RATE_TTS = 16000;
23
79
 
24
80
  const TTS_CACHE_MAX = 100;
25
81
  const ttsCache = new Map();
@@ -52,6 +108,115 @@ async function ensureSpeakerEmbeddings() {
52
108
  return speakerEmbeddings;
53
109
  }
54
110
 
111
+ async function loadVoiceEmbedding(voiceId) {
112
+ if (!voiceId || voiceId === 'default') return ensureSpeakerEmbeddings();
113
+ if (voiceEmbeddingsCache.has(voiceId)) return voiceEmbeddingsCache.get(voiceId);
114
+ const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
115
+ if (fs.existsSync(binPath)) {
116
+ const buf = fs.readFileSync(binPath);
117
+ const emb = new Float32Array(new Uint8Array(buf).buffer);
118
+ voiceEmbeddingsCache.set(voiceId, emb);
119
+ return emb;
120
+ }
121
+ if (voiceId.startsWith('custom_')) {
122
+ return generateEmbeddingFromCustomVoice(voiceId);
123
+ }
124
+ const offset = SPEAKER_OFFSETS[voiceId];
125
+ if (offset === undefined) return ensureSpeakerEmbeddings();
126
+ const url = `${DATASET_API}&offset=${offset}&length=${SAMPLES_TO_AVERAGE}`;
127
+ const resp = await fetch(url);
128
+ if (!resp.ok) throw new Error('Failed to fetch voice embeddings for ' + voiceId);
129
+ const data = await resp.json();
130
+ const avg = new Float32Array(512);
131
+ let count = 0;
132
+ for (const item of data.rows) {
133
+ const match = item.row.filename.match(/cmu_us_(\w+)_arctic/);
134
+ if (match && match[1] === voiceId) {
135
+ for (let i = 0; i < 512; i++) avg[i] += item.row.xvector[i];
136
+ count++;
137
+ }
138
+ }
139
+ if (count === 0) return ensureSpeakerEmbeddings();
140
+ for (let i = 0; i < 512; i++) avg[i] /= count;
141
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
142
+ fs.writeFileSync(binPath, Buffer.from(avg.buffer));
143
+ voiceEmbeddingsCache.set(voiceId, avg);
144
+ return avg;
145
+ }
146
+
147
+ async function getSpeakerEmbeddingPipeline() {
148
+ if (speakerEmbeddingPipeline) return speakerEmbeddingPipeline;
149
+ if (speakerEmbeddingLoading) {
150
+ while (speakerEmbeddingLoading) await new Promise(r => setTimeout(r, 100));
151
+ if (!speakerEmbeddingPipeline) throw new Error('Speaker embedding pipeline failed to load');
152
+ return speakerEmbeddingPipeline;
153
+ }
154
+ speakerEmbeddingLoading = true;
155
+ try {
156
+ const { pipeline, env } = await loadTransformers();
157
+ env.allowRemoteModels = true;
158
+ speakerEmbeddingPipeline = await pipeline('feature-extraction', 'speechbrain/spkrec-xvectors-voxceleb', {
159
+ device: 'cpu',
160
+ dtype: 'fp32',
161
+ });
162
+ return speakerEmbeddingPipeline;
163
+ } catch (err) {
164
+ speakerEmbeddingPipeline = null;
165
+ throw new Error('Speaker embedding model load failed: ' + err.message);
166
+ } finally {
167
+ speakerEmbeddingLoading = false;
168
+ }
169
+ }
170
+
171
+ function findCustomVoiceFile(voiceId) {
172
+ const baseName = voiceId.replace(/^custom_/, '');
173
+ for (const dir of [VOICES_DIR, HOME_VOICES_DIR]) {
174
+ for (const ext of AUDIO_EXTENSIONS) {
175
+ const candidate = path.join(dir, baseName + ext);
176
+ if (fs.existsSync(candidate)) return candidate;
177
+ }
178
+ }
179
+ return null;
180
+ }
181
+
182
+ async function decodeAudioFile(filePath) {
183
+ const buf = fs.readFileSync(filePath);
184
+ const ext = path.extname(filePath).toLowerCase();
185
+ if (ext === '.wav') {
186
+ const decoded = decodeWavToFloat32(buf);
187
+ return resampleTo16k(decoded.audio, decoded.sampleRate);
188
+ }
189
+ const decode = (await import('audio-decode')).default;
190
+ const audioBuffer = await decode(buf);
191
+ const mono = audioBuffer.getChannelData(0);
192
+ return resampleTo16k(mono, audioBuffer.sampleRate);
193
+ }
194
+
195
+ async function generateEmbeddingFromCustomVoice(voiceId) {
196
+ const audioFile = findCustomVoiceFile(voiceId);
197
+ if (!audioFile) {
198
+ console.error('[VOICES] Custom voice file not found for:', voiceId);
199
+ return ensureSpeakerEmbeddings();
200
+ }
201
+ console.log('[VOICES] Generating embedding from:', audioFile);
202
+ const audio = await decodeAudioFile(audioFile);
203
+ if (audio.length < SAMPLE_RATE_STT * 0.5) {
204
+ throw new Error('Audio too short for embedding extraction (need at least 0.5 seconds)');
205
+ }
206
+ const pipe = await getSpeakerEmbeddingPipeline();
207
+ const output = await pipe(audio, { pooling: 'mean', normalize: true });
208
+ const embedding = new Float32Array(512);
209
+ for (let i = 0; i < Math.min(512, output.data.length); i++) {
210
+ embedding[i] = output.data[i];
211
+ }
212
+ if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
213
+ const binPath = path.join(DATA_DIR, `speaker_${voiceId}.bin`);
214
+ fs.writeFileSync(binPath, Buffer.from(embedding.buffer));
215
+ voiceEmbeddingsCache.set(voiceId, embedding);
216
+ console.log('[VOICES] Generated embedding for custom voice:', voiceId);
217
+ return embedding;
218
+ }
219
+
55
220
  async function getSTT() {
56
221
  if (sttPipeline) return sttPipeline;
57
222
  if (sttLoading) {
@@ -233,36 +398,38 @@ function cachePut(key, buf) {
233
398
  ttsCache.set(key, buf);
234
399
  }
235
400
 
236
- async function synthesize(text) {
237
- const cached = ttsCache.get(text);
401
+ async function synthesize(text, voiceId) {
402
+ const cacheKey = (voiceId || 'default') + ':' + text;
403
+ const cached = ttsCache.get(cacheKey);
238
404
  if (cached) {
239
- ttsCache.delete(text);
240
- ttsCache.set(text, cached);
405
+ ttsCache.delete(cacheKey);
406
+ ttsCache.set(cacheKey, cached);
241
407
  return cached;
242
408
  }
243
409
  const tts = await getTTS();
244
- const embeddings = await ensureSpeakerEmbeddings();
410
+ const embeddings = await loadVoiceEmbedding(voiceId);
245
411
  const result = await tts(text, { speaker_embeddings: embeddings });
246
412
  const wav = encodeWav(result.audio, result.sampling_rate || SAMPLE_RATE_TTS);
247
- cachePut(text, wav);
413
+ cachePut(cacheKey, wav);
248
414
  return wav;
249
415
  }
250
416
 
251
- async function* synthesizeStream(text) {
417
+ async function* synthesizeStream(text, voiceId) {
252
418
  const sentences = splitSentences(text);
253
419
  const tts = await getTTS();
254
- const embeddings = await ensureSpeakerEmbeddings();
420
+ const embeddings = await loadVoiceEmbedding(voiceId);
255
421
  for (const sentence of sentences) {
256
- const cached = ttsCache.get(sentence);
422
+ const cacheKey = (voiceId || 'default') + ':' + sentence;
423
+ const cached = ttsCache.get(cacheKey);
257
424
  if (cached) {
258
- ttsCache.delete(sentence);
259
- ttsCache.set(sentence, cached);
425
+ ttsCache.delete(cacheKey);
426
+ ttsCache.set(cacheKey, cached);
260
427
  yield cached;
261
428
  continue;
262
429
  }
263
430
  const result = await tts(sentence, { speaker_embeddings: embeddings });
264
431
  const wav = encodeWav(result.audio, result.sampling_rate || SAMPLE_RATE_TTS);
265
- cachePut(sentence, wav);
432
+ cachePut(cacheKey, wav);
266
433
  yield wav;
267
434
  }
268
435
  }
@@ -276,4 +443,4 @@ function getStatus() {
276
443
  };
277
444
  }
278
445
 
279
- export { transcribe, synthesize, synthesizeStream, getSTT, getTTS, getStatus };
446
+ export { transcribe, synthesize, synthesizeStream, getSTT, getTTS, getStatus, getVoices };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.168",
3
+ "version": "1.0.170",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -23,6 +23,7 @@
23
23
  "dependencies": {
24
24
  "@anthropic-ai/claude-code": "^2.1.37",
25
25
  "@huggingface/transformers": "^3.8.1",
26
+ "audio-decode": "^2.2.3",
26
27
  "better-sqlite3": "^12.6.2",
27
28
  "busboy": "^1.6.0",
28
29
  "express": "^5.2.1",
package/server.js CHANGED
@@ -535,16 +535,27 @@ const server = http.createServer(async (req, res) => {
535
535
  return;
536
536
  }
537
537
 
538
+ if (routePath === '/api/voices' && req.method === 'GET') {
539
+ try {
540
+ const { getVoices } = await getSpeech();
541
+ sendJSON(req, res, 200, { ok: true, voices: getVoices() });
542
+ } catch (err) {
543
+ sendJSON(req, res, 200, { ok: true, voices: [] });
544
+ }
545
+ return;
546
+ }
547
+
538
548
  if (routePath === '/api/tts' && req.method === 'POST') {
539
549
  try {
540
550
  const body = await parseBody(req);
541
551
  const text = body.text || '';
552
+ const voiceId = body.voiceId || null;
542
553
  if (!text) {
543
- sendJSON(req, res, 400, { error: 'No text provided' });
554
+ sendJSON(req, res, 400, { error: 'No text provided' });
544
555
  return;
545
556
  }
546
557
  const { synthesize } = await getSpeech();
547
- const wavBuffer = await synthesize(text);
558
+ const wavBuffer = await synthesize(text, voiceId);
548
559
  res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
549
560
  res.end(wavBuffer);
550
561
  } catch (err) {
@@ -558,6 +569,7 @@ const server = http.createServer(async (req, res) => {
558
569
  try {
559
570
  const body = await parseBody(req);
560
571
  const text = body.text || '';
572
+ const voiceId = body.voiceId || null;
561
573
  if (!text) {
562
574
  sendJSON(req, res, 400, { error: 'No text provided' });
563
575
  return;
@@ -569,7 +581,7 @@ const server = http.createServer(async (req, res) => {
569
581
  'X-Content-Type': 'audio/wav-stream',
570
582
  'Cache-Control': 'no-cache'
571
583
  });
572
- for await (const wavChunk of synthesizeStream(text)) {
584
+ for await (const wavChunk of synthesizeStream(text, voiceId)) {
573
585
  const lenBuf = Buffer.alloc(4);
574
586
  lenBuf.writeUInt32BE(wavChunk.length, 0);
575
587
  res.write(lenBuf);
@@ -589,7 +601,7 @@ const server = http.createServer(async (req, res) => {
589
601
  const { getStatus } = await getSpeech();
590
602
  sendJSON(req, res, 200, getStatus());
591
603
  } catch (err) {
592
- sendJSON(req, res, 200, { sttReady: false, ttsReady: false, sttLoading: false, ttsLoading: false });
604
+ sendJSON(req, res, 200, { sttReady: false, ttsReady: false, sttLoading: false, ttsLoading: false });
593
605
  }
594
606
  return;
595
607
  }
package/static/index.html CHANGED
@@ -1056,6 +1056,28 @@
1056
1056
  border-color: var(--color-error);
1057
1057
  }
1058
1058
 
1059
+ .voice-selector-wrapper {
1060
+ display: flex;
1061
+ align-items: center;
1062
+ gap: 0.25rem;
1063
+ }
1064
+
1065
+ .voice-selector {
1066
+ padding: 0.2rem 0.5rem;
1067
+ border: 1px solid var(--color-border);
1068
+ border-radius: 0.375rem;
1069
+ background: var(--color-bg-secondary);
1070
+ color: var(--color-text-primary);
1071
+ font-size: 0.75rem;
1072
+ cursor: pointer;
1073
+ max-width: 160px;
1074
+ }
1075
+
1076
+ .voice-selector:focus {
1077
+ outline: none;
1078
+ border-color: var(--color-primary);
1079
+ }
1080
+
1059
1081
  .voice-empty {
1060
1082
  text-align: center;
1061
1083
  color: var(--color-text-secondary);
@@ -2146,6 +2168,11 @@
2146
2168
  <input type="checkbox" id="voiceTTSToggle" checked>
2147
2169
  <span>Auto-speak responses</span>
2148
2170
  </label>
2171
+ <div class="voice-selector-wrapper">
2172
+ <select class="voice-selector" id="voiceSelector" title="Select voice">
2173
+ <option value="default">Default</option>
2174
+ </select>
2175
+ </div>
2149
2176
  <button class="voice-stop-btn" id="voiceStopSpeaking" title="Stop speaking">Stop</button>
2150
2177
  </div>
2151
2178
  </div>
@@ -14,12 +14,62 @@
14
14
  var TARGET_SAMPLE_RATE = 16000;
15
15
  var spokenChunks = new Set();
16
16
  var isLoadingHistory = false;
17
+ var selectedVoiceId = localStorage.getItem('voice-selected-id') || 'default';
17
18
 
18
19
  function init() {
19
20
  setupTTSToggle();
20
21
  setupUI();
21
22
  setupStreamingListener();
22
23
  setupAgentSelector();
24
+ setupVoiceSelector();
25
+ }
26
+
27
+ function setupVoiceSelector() {
28
+ var selector = document.getElementById('voiceSelector');
29
+ if (!selector) return;
30
+ var saved = localStorage.getItem('voice-selected-id');
31
+ if (saved) selectedVoiceId = saved;
32
+ fetch(BASE + '/api/voices')
33
+ .then(function(res) { return res.json(); })
34
+ .then(function(data) {
35
+ if (!data.ok || !Array.isArray(data.voices)) return;
36
+ selector.innerHTML = '';
37
+ var builtIn = data.voices.filter(function(v) { return !v.isCustom; });
38
+ var custom = data.voices.filter(function(v) { return v.isCustom; });
39
+ if (builtIn.length) {
40
+ var grp1 = document.createElement('optgroup');
41
+ grp1.label = 'Built-in Voices';
42
+ builtIn.forEach(function(voice) {
43
+ var opt = document.createElement('option');
44
+ opt.value = voice.id;
45
+ var parts = [];
46
+ if (voice.gender) parts.push(voice.gender);
47
+ if (voice.accent) parts.push(voice.accent);
48
+ opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
49
+ grp1.appendChild(opt);
50
+ });
51
+ selector.appendChild(grp1);
52
+ }
53
+ if (custom.length) {
54
+ var grp2 = document.createElement('optgroup');
55
+ grp2.label = 'Custom Voices';
56
+ custom.forEach(function(voice) {
57
+ var opt = document.createElement('option');
58
+ opt.value = voice.id;
59
+ opt.textContent = voice.name;
60
+ grp2.appendChild(opt);
61
+ });
62
+ selector.appendChild(grp2);
63
+ }
64
+ if (saved && selector.querySelector('option[value="' + saved + '"]')) {
65
+ selector.value = saved;
66
+ }
67
+ })
68
+ .catch(function() {});
69
+ selector.addEventListener('change', function() {
70
+ selectedVoiceId = selector.value;
71
+ localStorage.setItem('voice-selected-id', selectedVoiceId);
72
+ });
23
73
  }
24
74
 
25
75
  function syncVoiceSelector() {
@@ -286,53 +336,78 @@
286
336
  var text = speechQueue.shift();
287
337
  audioChunkQueue = [];
288
338
  isPlayingChunk = false;
289
- fetch(BASE + '/api/tts-stream', {
290
- method: 'POST',
291
- headers: { 'Content-Type': 'application/json' },
292
- body: JSON.stringify({ text: text })
293
- }).then(function(resp) {
294
- if (!resp.ok) throw new Error('TTS failed');
295
- var reader = resp.body.getReader();
296
- var buffer = new Uint8Array(0);
297
-
298
- function concat(a, b) {
299
- var c = new Uint8Array(a.length + b.length);
300
- c.set(a, 0);
301
- c.set(b, a.length);
302
- return c;
303
- }
339
+
340
+ function tryStreaming() {
341
+ fetch(BASE + '/api/tts-stream', {
342
+ method: 'POST',
343
+ headers: { 'Content-Type': 'application/json' },
344
+ body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
345
+ }).then(function(resp) {
346
+ if (!resp.ok) throw new Error('TTS stream failed');
347
+ var reader = resp.body.getReader();
348
+ var buffer = new Uint8Array(0);
304
349
 
305
- function pump() {
306
- return reader.read().then(function(result) {
307
- if (result.done) {
308
- streamDone = true;
309
- if (!isPlayingChunk && audioChunkQueue.length === 0) {
310
- isSpeaking = false;
311
- processQueue();
350
+ function concat(a, b) {
351
+ var c = new Uint8Array(a.length + b.length);
352
+ c.set(a, 0);
353
+ c.set(b, a.length);
354
+ return c;
355
+ }
356
+
357
+ function pump() {
358
+ return reader.read().then(function(result) {
359
+ if (result.done) {
360
+ streamDone = true;
361
+ if (!isPlayingChunk && audioChunkQueue.length === 0) {
362
+ isSpeaking = false;
363
+ processQueue();
364
+ }
365
+ return;
312
366
  }
313
- return;
314
- }
315
- buffer = concat(buffer, result.value);
316
- while (buffer.length >= 4) {
317
- var view = new DataView(buffer.buffer, buffer.byteOffset, 4);
318
- var chunkLen = view.getUint32(0, false);
319
- if (buffer.length < 4 + chunkLen) break;
320
- var wavData = buffer.slice(4, 4 + chunkLen);
321
- buffer = buffer.slice(4 + chunkLen);
322
- var blob = new Blob([wavData], { type: 'audio/wav' });
323
- audioChunkQueue.push(blob);
324
- if (!isPlayingChunk) playNextChunk();
325
- }
326
- return pump();
327
- });
328
- }
367
+ buffer = concat(buffer, result.value);
368
+ while (buffer.length >= 4) {
369
+ var view = new DataView(buffer.buffer, buffer.byteOffset, 4);
370
+ var chunkLen = view.getUint32(0, false);
371
+ if (buffer.length < 4 + chunkLen) break;
372
+ var wavData = buffer.slice(4, 4 + chunkLen);
373
+ buffer = buffer.slice(4 + chunkLen);
374
+ var blob = new Blob([wavData], { type: 'audio/wav' });
375
+ audioChunkQueue.push(blob);
376
+ if (!isPlayingChunk) playNextChunk();
377
+ }
378
+ return pump();
379
+ });
380
+ }
329
381
 
330
- return pump();
331
- }).catch(function() {
332
- streamDone = true;
333
- isSpeaking = false;
334
- processQueue();
335
- });
382
+ return pump();
383
+ }).catch(function() {
384
+ tryNonStreaming(text);
385
+ });
386
+ }
387
+
388
+ function tryNonStreaming(txt) {
389
+ fetch(BASE + '/api/tts', {
390
+ method: 'POST',
391
+ headers: { 'Content-Type': 'application/json' },
392
+ body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
393
+ }).then(function(resp) {
394
+ if (!resp.ok) throw new Error('TTS failed');
395
+ return resp.arrayBuffer();
396
+ }).then(function(buf) {
397
+ var blob = new Blob([buf], { type: 'audio/wav' });
398
+ audioChunkQueue.push(blob);
399
+ if (!isPlayingChunk) playNextChunk();
400
+ streamDone = true;
401
+ isSpeaking = false;
402
+ processQueue();
403
+ }).catch(function() {
404
+ streamDone = true;
405
+ isSpeaking = false;
406
+ processQueue();
407
+ });
408
+ }
409
+
410
+ tryStreaming();
336
411
  }
337
412
 
338
413
  function stopSpeaking() {