agentgui 1.0.173 → 1.0.175

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.js CHANGED
@@ -115,6 +115,8 @@ let speakerEmbeddingPipeline = null;
115
115
  let sttLoading = false;
116
116
  let ttsLoading = false;
117
117
  let speakerEmbeddingLoading = false;
118
+ let ttsLoadError = null;
119
+ let sttLoadError = null;
118
120
  const voiceEmbeddingsCache = new Map();
119
121
  const SAMPLE_RATE_STT = 16000;
120
122
  const SAMPLE_RATE_TTS = 16000;
@@ -261,8 +263,10 @@ async function generateEmbeddingFromCustomVoice(voiceId) {
261
263
 
262
264
  async function getSTT() {
263
265
  if (sttPipeline) return sttPipeline;
266
+ if (sttLoadError) throw sttLoadError;
264
267
  if (sttLoading) {
265
268
  while (sttLoading) await new Promise(r => setTimeout(r, 100));
269
+ if (sttLoadError) throw sttLoadError;
266
270
  if (!sttPipeline) throw new Error('STT pipeline failed to load');
267
271
  return sttPipeline;
268
272
  }
@@ -278,10 +282,12 @@ async function getSTT() {
278
282
  device: 'cpu',
279
283
  local_files_only: isLocal,
280
284
  });
285
+ sttLoadError = null;
281
286
  return sttPipeline;
282
287
  } catch (err) {
283
288
  sttPipeline = null;
284
- throw new Error('STT model load failed: ' + err.message);
289
+ sttLoadError = new Error('STT model load failed: ' + err.message);
290
+ throw sttLoadError;
285
291
  } finally {
286
292
  sttLoading = false;
287
293
  }
@@ -289,8 +295,10 @@ async function getSTT() {
289
295
 
290
296
  async function getTTS() {
291
297
  if (ttsPipeline) return ttsPipeline;
298
+ if (ttsLoadError) throw ttsLoadError;
292
299
  if (ttsLoading) {
293
300
  while (ttsLoading) await new Promise(r => setTimeout(r, 100));
301
+ if (ttsLoadError) throw ttsLoadError;
294
302
  if (!ttsPipeline) throw new Error('TTS pipeline failed to load');
295
303
  return ttsPipeline;
296
304
  }
@@ -303,10 +311,12 @@ async function getTTS() {
303
311
  dtype: 'fp32',
304
312
  });
305
313
  await ensureSpeakerEmbeddings();
314
+ ttsLoadError = null;
306
315
  return ttsPipeline;
307
316
  } catch (err) {
308
317
  ttsPipeline = null;
309
- throw new Error('TTS model load failed: ' + err.message);
318
+ ttsLoadError = new Error('TTS model load failed: ' + err.message);
319
+ throw ttsLoadError;
310
320
  } finally {
311
321
  ttsLoading = false;
312
322
  }
@@ -482,6 +492,8 @@ function getStatus() {
482
492
  ttsReady: !!ttsPipeline,
483
493
  sttLoading,
484
494
  ttsLoading,
495
+ sttError: sttLoadError ? sttLoadError.message : null,
496
+ ttsError: ttsLoadError ? ttsLoadError.message : null,
485
497
  };
486
498
  }
487
499
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.173",
3
+ "version": "1.0.175",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -554,13 +554,20 @@ const server = http.createServer(async (req, res) => {
554
554
  sendJSON(req, res, 400, { error: 'No text provided' });
555
555
  return;
556
556
  }
557
- const { synthesize } = await getSpeech();
558
- const wavBuffer = await synthesize(text, voiceId);
557
+ const speech = await getSpeech();
558
+ const status = speech.getStatus();
559
+ if (status.ttsError) {
560
+ sendJSON(req, res, 503, { error: status.ttsError, retryable: false });
561
+ return;
562
+ }
563
+ const wavBuffer = await speech.synthesize(text, voiceId);
559
564
  res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
560
565
  res.end(wavBuffer);
561
566
  } catch (err) {
562
567
  debugLog('[TTS] Error: ' + err.message);
563
- if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'TTS failed' });
568
+ const isModelError = /model.*load|pipeline.*failed|failed to load/i.test(err.message);
569
+ const statusCode = isModelError ? 503 : 500;
570
+ if (!res.headersSent) sendJSON(req, res, statusCode, { error: err.message || 'TTS failed', retryable: !isModelError });
564
571
  }
565
572
  return;
566
573
  }
@@ -574,14 +581,19 @@ const server = http.createServer(async (req, res) => {
574
581
  sendJSON(req, res, 400, { error: 'No text provided' });
575
582
  return;
576
583
  }
577
- const { synthesizeStream } = await getSpeech();
584
+ const speech = await getSpeech();
585
+ const status = speech.getStatus();
586
+ if (status.ttsError) {
587
+ sendJSON(req, res, 503, { error: status.ttsError, retryable: false });
588
+ return;
589
+ }
578
590
  res.writeHead(200, {
579
591
  'Content-Type': 'application/octet-stream',
580
592
  'Transfer-Encoding': 'chunked',
581
593
  'X-Content-Type': 'audio/wav-stream',
582
594
  'Cache-Control': 'no-cache'
583
595
  });
584
- for await (const wavChunk of synthesizeStream(text, voiceId)) {
596
+ for await (const wavChunk of speech.synthesizeStream(text, voiceId)) {
585
597
  const lenBuf = Buffer.alloc(4);
586
598
  lenBuf.writeUInt32BE(wavChunk.length, 0);
587
599
  res.write(lenBuf);
@@ -590,7 +602,9 @@ const server = http.createServer(async (req, res) => {
590
602
  res.end();
591
603
  } catch (err) {
592
604
  debugLog('[TTS-STREAM] Error: ' + err.message);
593
- if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'TTS stream failed' });
605
+ const isModelError = /model.*load|pipeline.*failed|failed to load/i.test(err.message);
606
+ const statusCode = isModelError ? 503 : 500;
607
+ if (!res.headersSent) sendJSON(req, res, statusCode, { error: err.message || 'TTS stream failed', retryable: !isModelError });
594
608
  else res.end();
595
609
  }
596
610
  return;
package/static/index.html CHANGED
@@ -415,7 +415,7 @@
415
415
 
416
416
  /* --- Messages --- */
417
417
  .message {
418
- margin-bottom: 0.125rem;
418
+ margin-bottom: 0;
419
419
  padding: 0.5rem 0.75rem;
420
420
  border-radius: 0.75rem;
421
421
  max-width: 85%;
@@ -466,7 +466,7 @@
466
466
  .message-blocks {
467
467
  display: flex;
468
468
  flex-direction: column;
469
- gap: 0.375rem;
469
+ gap: 0.125rem;
470
470
  }
471
471
 
472
472
  .message-text {
@@ -503,7 +503,7 @@
503
503
  /* --- Streaming block types --- */
504
504
  .streaming-block-system {
505
505
  padding: 0.375rem 0.75rem;
506
- margin: 0.0625rem 0;
506
+ margin: 0;
507
507
  background: rgba(59,130,246,0.08);
508
508
  border-radius: 0.375rem;
509
509
  font-size: 0.8rem;
@@ -558,7 +558,7 @@
558
558
  html.dark .tool-input-pre { background: rgba(255,255,255,0.03); }
559
559
 
560
560
  .streaming-block-tool-result {
561
- margin: 0.0625rem 0;
561
+ margin: 0;
562
562
  border-radius: 0.375rem;
563
563
  background: var(--color-bg-code);
564
564
  overflow: hidden;
@@ -601,7 +601,7 @@
601
601
 
602
602
  .streaming-block-result {
603
603
  padding: 0.375rem 0.75rem;
604
- margin: 0.0625rem 0;
604
+ margin: 0;
605
605
  border-radius: 0.375rem;
606
606
  background: rgba(16,185,129,0.08);
607
607
  font-size: 0.8rem;
@@ -1149,7 +1149,7 @@
1149
1149
 
1150
1150
  /* ===== STREAMING BLOCK STYLES ===== */
1151
1151
  .block-text {
1152
- margin-bottom: 0.0625rem;
1152
+ margin-bottom: 0;
1153
1153
  padding: 0.5rem 0.75rem;
1154
1154
  background: var(--color-bg-primary);
1155
1155
  border-radius: 0.5rem;
@@ -1171,7 +1171,7 @@
1171
1171
  }
1172
1172
 
1173
1173
  .block-code {
1174
- margin-bottom: 0.0625rem;
1174
+ margin-bottom: 0;
1175
1175
  border-radius: 0.5rem;
1176
1176
  overflow: hidden;
1177
1177
  }
@@ -1218,7 +1218,7 @@
1218
1218
  }
1219
1219
 
1220
1220
  .block-thinking {
1221
- margin-bottom: 0.0625rem;
1221
+ margin-bottom: 0;
1222
1222
  border-radius: 0.5rem;
1223
1223
  background: #f5f3ff;
1224
1224
  overflow: hidden;
@@ -1255,7 +1255,7 @@
1255
1255
 
1256
1256
  /* --- Tool Use Block --- */
1257
1257
  .block-tool-use {
1258
- margin-bottom: 0.0625rem;
1258
+ margin-bottom: 0;
1259
1259
  border-radius: 0.5rem;
1260
1260
  background: #ecfeff;
1261
1261
  overflow: hidden;
@@ -1497,7 +1497,7 @@
1497
1497
 
1498
1498
  /* --- Folded Tool Use (compact success-style bar) --- */
1499
1499
  .folded-tool {
1500
- margin: 0.0625rem 0;
1500
+ margin: 0;
1501
1501
  border-radius: 0.375rem;
1502
1502
  overflow: hidden;
1503
1503
  background: #f0fdf4;
@@ -1681,7 +1681,7 @@
1681
1681
 
1682
1682
  /* --- Collapsible Code Summary --- */
1683
1683
  .collapsible-code {
1684
- margin: 0.0625rem 0;
1684
+ margin: 0;
1685
1685
  border-radius: 0.375rem;
1686
1686
  overflow: hidden;
1687
1687
  background: #1e293b;
@@ -1736,7 +1736,7 @@
1736
1736
 
1737
1737
  /* --- Tool Result Block --- */
1738
1738
  .block-tool-result {
1739
- margin-bottom: 0.0625rem;
1739
+ margin-bottom: 0;
1740
1740
  border-radius: 0.5rem;
1741
1741
  overflow: hidden;
1742
1742
  }
@@ -1822,7 +1822,7 @@
1822
1822
 
1823
1823
  /* --- Result Summary Block --- */
1824
1824
  .block-result {
1825
- margin-bottom: 0.0625rem;
1825
+ margin-bottom: 0;
1826
1826
  border-radius: 0.5rem;
1827
1827
  overflow: hidden;
1828
1828
  }
@@ -1882,7 +1882,7 @@
1882
1882
 
1883
1883
  /* --- System Block --- */
1884
1884
  .block-system {
1885
- margin-bottom: 0.0625rem;
1885
+ margin-bottom: 0;
1886
1886
  border-radius: 0.5rem;
1887
1887
  background: #eef2ff;
1888
1888
  overflow: hidden;
@@ -1939,7 +1939,7 @@
1939
1939
 
1940
1940
  /* --- Bash Block --- */
1941
1941
  .block-bash {
1942
- margin-bottom: 0.0625rem;
1942
+ margin-bottom: 0;
1943
1943
  border-radius: 0.5rem;
1944
1944
  overflow: hidden;
1945
1945
  background: #111827;
@@ -1973,7 +1973,7 @@
1973
1973
 
1974
1974
  /* --- Generic Block --- */
1975
1975
  .block-generic {
1976
- margin-bottom: 0.0625rem;
1976
+ margin-bottom: 0;
1977
1977
  padding: 0.5rem 0.75rem;
1978
1978
  border-radius: 0.5rem;
1979
1979
  background: var(--color-bg-secondary);
@@ -2016,7 +2016,7 @@
2016
2016
 
2017
2017
  /* --- Error Block --- */
2018
2018
  .block-error {
2019
- margin-bottom: 0.0625rem;
2019
+ margin-bottom: 0;
2020
2020
  padding: 0.5rem 0.75rem;
2021
2021
  border-radius: 0.5rem;
2022
2022
  background: #fef2f2;
@@ -2026,7 +2026,7 @@
2026
2026
 
2027
2027
  /* --- Image Block --- */
2028
2028
  .block-image {
2029
- margin-bottom: 0.0625rem;
2029
+ margin-bottom: 0;
2030
2030
  border-radius: 0.5rem;
2031
2031
  overflow: hidden;
2032
2032
  }
@@ -2036,7 +2036,7 @@
2036
2036
 
2037
2037
  /* ===== STREAMING EVENTS ===== */
2038
2038
  .event-streaming-start {
2039
- margin-bottom: 0.0625rem;
2039
+ margin-bottom: 0;
2040
2040
  padding: 0.375rem 0.75rem;
2041
2041
  background: #eff6ff;
2042
2042
  border-radius: 0.5rem;
@@ -2048,7 +2048,7 @@
2048
2048
  html.dark .event-streaming-start { background: #0c1a2e; }
2049
2049
 
2050
2050
  .event-streaming-complete {
2051
- margin-bottom: 0.0625rem;
2051
+ margin-bottom: 0;
2052
2052
  padding: 0.375rem 0.75rem;
2053
2053
  background: linear-gradient(135deg, #ecfdf5, #f0fdf4);
2054
2054
  border-radius: 0.5rem;
@@ -298,6 +298,9 @@
298
298
  var audioChunkQueue = [];
299
299
  var isPlayingChunk = false;
300
300
  var streamDone = false;
301
+ var ttsConsecutiveFailures = 0;
302
+ var TTS_MAX_FAILURES = 3;
303
+ var ttsDisabledUntilReset = false;
301
304
 
302
305
  function playNextChunk() {
303
306
  if (audioChunkQueue.length === 0) {
@@ -331,19 +334,41 @@
331
334
 
332
335
  function processQueue() {
333
336
  if (isSpeaking || speechQueue.length === 0) return;
337
+ if (ttsDisabledUntilReset) {
338
+ speechQueue = [];
339
+ return;
340
+ }
334
341
  isSpeaking = true;
335
342
  streamDone = false;
336
343
  var text = speechQueue.shift();
337
344
  audioChunkQueue = [];
338
345
  isPlayingChunk = false;
339
-
346
+
347
+ function onTtsSuccess() {
348
+ ttsConsecutiveFailures = 0;
349
+ }
350
+
351
+ function onTtsFailed() {
352
+ ttsConsecutiveFailures++;
353
+ if (ttsConsecutiveFailures >= TTS_MAX_FAILURES) {
354
+ console.warn('[Voice] TTS failed ' + ttsConsecutiveFailures + ' times consecutively, disabling until reset');
355
+ ttsDisabledUntilReset = true;
356
+ speechQueue = [];
357
+ }
358
+ streamDone = true;
359
+ isSpeaking = false;
360
+ if (!ttsDisabledUntilReset) {
361
+ processQueue();
362
+ }
363
+ }
364
+
340
365
  function tryStreaming() {
341
366
  fetch(BASE + '/api/tts-stream', {
342
367
  method: 'POST',
343
368
  headers: { 'Content-Type': 'application/json' },
344
369
  body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
345
370
  }).then(function(resp) {
346
- if (!resp.ok) throw new Error('TTS stream failed');
371
+ if (!resp.ok) throw new Error('TTS stream failed: ' + resp.status);
347
372
  var reader = resp.body.getReader();
348
373
  var buffer = new Uint8Array(0);
349
374
 
@@ -357,6 +382,7 @@
357
382
  function pump() {
358
383
  return reader.read().then(function(result) {
359
384
  if (result.done) {
385
+ onTtsSuccess();
360
386
  streamDone = true;
361
387
  if (!isPlayingChunk && audioChunkQueue.length === 0) {
362
388
  isSpeaking = false;
@@ -384,16 +410,17 @@
384
410
  tryNonStreaming(text);
385
411
  });
386
412
  }
387
-
413
+
388
414
  function tryNonStreaming(txt) {
389
415
  fetch(BASE + '/api/tts', {
390
416
  method: 'POST',
391
417
  headers: { 'Content-Type': 'application/json' },
392
418
  body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
393
419
  }).then(function(resp) {
394
- if (!resp.ok) throw new Error('TTS failed');
420
+ if (!resp.ok) throw new Error('TTS failed: ' + resp.status);
395
421
  return resp.arrayBuffer();
396
422
  }).then(function(buf) {
423
+ onTtsSuccess();
397
424
  var blob = new Blob([buf], { type: 'audio/wav' });
398
425
  audioChunkQueue.push(blob);
399
426
  if (!isPlayingChunk) playNextChunk();
@@ -401,12 +428,10 @@
401
428
  isSpeaking = false;
402
429
  processQueue();
403
430
  }).catch(function() {
404
- streamDone = true;
405
- isSpeaking = false;
406
- processQueue();
431
+ onTtsFailed();
407
432
  });
408
433
  }
409
-
434
+
410
435
  tryStreaming();
411
436
  }
412
437
 
@@ -415,6 +440,8 @@
415
440
  audioChunkQueue = [];
416
441
  isPlayingChunk = false;
417
442
  isSpeaking = false;
443
+ ttsConsecutiveFailures = 0;
444
+ ttsDisabledUntilReset = false;
418
445
  if (currentAudio) {
419
446
  currentAudio.pause();
420
447
  currentAudio = null;