agentgui 1.0.174 → 1.0.175

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.js CHANGED
@@ -115,6 +115,8 @@ let speakerEmbeddingPipeline = null;
115
115
  let sttLoading = false;
116
116
  let ttsLoading = false;
117
117
  let speakerEmbeddingLoading = false;
118
+ let ttsLoadError = null;
119
+ let sttLoadError = null;
118
120
  const voiceEmbeddingsCache = new Map();
119
121
  const SAMPLE_RATE_STT = 16000;
120
122
  const SAMPLE_RATE_TTS = 16000;
@@ -261,8 +263,10 @@ async function generateEmbeddingFromCustomVoice(voiceId) {
261
263
 
262
264
  async function getSTT() {
263
265
  if (sttPipeline) return sttPipeline;
266
+ if (sttLoadError) throw sttLoadError;
264
267
  if (sttLoading) {
265
268
  while (sttLoading) await new Promise(r => setTimeout(r, 100));
269
+ if (sttLoadError) throw sttLoadError;
266
270
  if (!sttPipeline) throw new Error('STT pipeline failed to load');
267
271
  return sttPipeline;
268
272
  }
@@ -278,10 +282,12 @@ async function getSTT() {
278
282
  device: 'cpu',
279
283
  local_files_only: isLocal,
280
284
  });
285
+ sttLoadError = null;
281
286
  return sttPipeline;
282
287
  } catch (err) {
283
288
  sttPipeline = null;
284
- throw new Error('STT model load failed: ' + err.message);
289
+ sttLoadError = new Error('STT model load failed: ' + err.message);
290
+ throw sttLoadError;
285
291
  } finally {
286
292
  sttLoading = false;
287
293
  }
@@ -289,8 +295,10 @@ async function getSTT() {
289
295
 
290
296
  async function getTTS() {
291
297
  if (ttsPipeline) return ttsPipeline;
298
+ if (ttsLoadError) throw ttsLoadError;
292
299
  if (ttsLoading) {
293
300
  while (ttsLoading) await new Promise(r => setTimeout(r, 100));
301
+ if (ttsLoadError) throw ttsLoadError;
294
302
  if (!ttsPipeline) throw new Error('TTS pipeline failed to load');
295
303
  return ttsPipeline;
296
304
  }
@@ -303,10 +311,12 @@ async function getTTS() {
303
311
  dtype: 'fp32',
304
312
  });
305
313
  await ensureSpeakerEmbeddings();
314
+ ttsLoadError = null;
306
315
  return ttsPipeline;
307
316
  } catch (err) {
308
317
  ttsPipeline = null;
309
- throw new Error('TTS model load failed: ' + err.message);
318
+ ttsLoadError = new Error('TTS model load failed: ' + err.message);
319
+ throw ttsLoadError;
310
320
  } finally {
311
321
  ttsLoading = false;
312
322
  }
@@ -482,6 +492,8 @@ function getStatus() {
482
492
  ttsReady: !!ttsPipeline,
483
493
  sttLoading,
484
494
  ttsLoading,
495
+ sttError: sttLoadError ? sttLoadError.message : null,
496
+ ttsError: ttsLoadError ? ttsLoadError.message : null,
485
497
  };
486
498
  }
487
499
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.174",
3
+ "version": "1.0.175",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -554,13 +554,20 @@ const server = http.createServer(async (req, res) => {
554
554
  sendJSON(req, res, 400, { error: 'No text provided' });
555
555
  return;
556
556
  }
557
- const { synthesize } = await getSpeech();
558
- const wavBuffer = await synthesize(text, voiceId);
557
+ const speech = await getSpeech();
558
+ const status = speech.getStatus();
559
+ if (status.ttsError) {
560
+ sendJSON(req, res, 503, { error: status.ttsError, retryable: false });
561
+ return;
562
+ }
563
+ const wavBuffer = await speech.synthesize(text, voiceId);
559
564
  res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
560
565
  res.end(wavBuffer);
561
566
  } catch (err) {
562
567
  debugLog('[TTS] Error: ' + err.message);
563
- if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'TTS failed' });
568
+ const isModelError = /model.*load|pipeline.*failed|failed to load/i.test(err.message);
569
+ const statusCode = isModelError ? 503 : 500;
570
+ if (!res.headersSent) sendJSON(req, res, statusCode, { error: err.message || 'TTS failed', retryable: !isModelError });
564
571
  }
565
572
  return;
566
573
  }
@@ -574,14 +581,19 @@ const server = http.createServer(async (req, res) => {
574
581
  sendJSON(req, res, 400, { error: 'No text provided' });
575
582
  return;
576
583
  }
577
- const { synthesizeStream } = await getSpeech();
584
+ const speech = await getSpeech();
585
+ const status = speech.getStatus();
586
+ if (status.ttsError) {
587
+ sendJSON(req, res, 503, { error: status.ttsError, retryable: false });
588
+ return;
589
+ }
578
590
  res.writeHead(200, {
579
591
  'Content-Type': 'application/octet-stream',
580
592
  'Transfer-Encoding': 'chunked',
581
593
  'X-Content-Type': 'audio/wav-stream',
582
594
  'Cache-Control': 'no-cache'
583
595
  });
584
- for await (const wavChunk of synthesizeStream(text, voiceId)) {
596
+ for await (const wavChunk of speech.synthesizeStream(text, voiceId)) {
585
597
  const lenBuf = Buffer.alloc(4);
586
598
  lenBuf.writeUInt32BE(wavChunk.length, 0);
587
599
  res.write(lenBuf);
@@ -590,7 +602,9 @@ const server = http.createServer(async (req, res) => {
590
602
  res.end();
591
603
  } catch (err) {
592
604
  debugLog('[TTS-STREAM] Error: ' + err.message);
593
- if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'TTS stream failed' });
605
+ const isModelError = /model.*load|pipeline.*failed|failed to load/i.test(err.message);
606
+ const statusCode = isModelError ? 503 : 500;
607
+ if (!res.headersSent) sendJSON(req, res, statusCode, { error: err.message || 'TTS stream failed', retryable: !isModelError });
594
608
  else res.end();
595
609
  }
596
610
  return;
@@ -298,6 +298,9 @@
298
298
  var audioChunkQueue = [];
299
299
  var isPlayingChunk = false;
300
300
  var streamDone = false;
301
+ var ttsConsecutiveFailures = 0;
302
+ var TTS_MAX_FAILURES = 3;
303
+ var ttsDisabledUntilReset = false;
301
304
 
302
305
  function playNextChunk() {
303
306
  if (audioChunkQueue.length === 0) {
@@ -331,19 +334,41 @@
331
334
 
332
335
  function processQueue() {
333
336
  if (isSpeaking || speechQueue.length === 0) return;
337
+ if (ttsDisabledUntilReset) {
338
+ speechQueue = [];
339
+ return;
340
+ }
334
341
  isSpeaking = true;
335
342
  streamDone = false;
336
343
  var text = speechQueue.shift();
337
344
  audioChunkQueue = [];
338
345
  isPlayingChunk = false;
339
-
346
+
347
+ function onTtsSuccess() {
348
+ ttsConsecutiveFailures = 0;
349
+ }
350
+
351
+ function onTtsFailed() {
352
+ ttsConsecutiveFailures++;
353
+ if (ttsConsecutiveFailures >= TTS_MAX_FAILURES) {
354
+ console.warn('[Voice] TTS failed ' + ttsConsecutiveFailures + ' times consecutively, disabling until reset');
355
+ ttsDisabledUntilReset = true;
356
+ speechQueue = [];
357
+ }
358
+ streamDone = true;
359
+ isSpeaking = false;
360
+ if (!ttsDisabledUntilReset) {
361
+ processQueue();
362
+ }
363
+ }
364
+
340
365
  function tryStreaming() {
341
366
  fetch(BASE + '/api/tts-stream', {
342
367
  method: 'POST',
343
368
  headers: { 'Content-Type': 'application/json' },
344
369
  body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
345
370
  }).then(function(resp) {
346
- if (!resp.ok) throw new Error('TTS stream failed');
371
+ if (!resp.ok) throw new Error('TTS stream failed: ' + resp.status);
347
372
  var reader = resp.body.getReader();
348
373
  var buffer = new Uint8Array(0);
349
374
 
@@ -357,6 +382,7 @@
357
382
  function pump() {
358
383
  return reader.read().then(function(result) {
359
384
  if (result.done) {
385
+ onTtsSuccess();
360
386
  streamDone = true;
361
387
  if (!isPlayingChunk && audioChunkQueue.length === 0) {
362
388
  isSpeaking = false;
@@ -384,16 +410,17 @@
384
410
  tryNonStreaming(text);
385
411
  });
386
412
  }
387
-
413
+
388
414
  function tryNonStreaming(txt) {
389
415
  fetch(BASE + '/api/tts', {
390
416
  method: 'POST',
391
417
  headers: { 'Content-Type': 'application/json' },
392
418
  body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
393
419
  }).then(function(resp) {
394
- if (!resp.ok) throw new Error('TTS failed');
420
+ if (!resp.ok) throw new Error('TTS failed: ' + resp.status);
395
421
  return resp.arrayBuffer();
396
422
  }).then(function(buf) {
423
+ onTtsSuccess();
397
424
  var blob = new Blob([buf], { type: 'audio/wav' });
398
425
  audioChunkQueue.push(blob);
399
426
  if (!isPlayingChunk) playNextChunk();
@@ -401,12 +428,10 @@
401
428
  isSpeaking = false;
402
429
  processQueue();
403
430
  }).catch(function() {
404
- streamDone = true;
405
- isSpeaking = false;
406
- processQueue();
431
+ onTtsFailed();
407
432
  });
408
433
  }
409
-
434
+
410
435
  tryStreaming();
411
436
  }
412
437
 
@@ -415,6 +440,8 @@
415
440
  audioChunkQueue = [];
416
441
  isPlayingChunk = false;
417
442
  isSpeaking = false;
443
+ ttsConsecutiveFailures = 0;
444
+ ttsDisabledUntilReset = false;
418
445
  if (currentAudio) {
419
446
  currentAudio.pause();
420
447
  currentAudio = null;