agentgui 1.0.216 → 1.0.217

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/static/js/voice.js +47 -3
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.216",
3
+ "version": "1.0.217",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -13,6 +13,7 @@
13
13
  var recordedChunks = [];
14
14
  var TARGET_SAMPLE_RATE = 16000;
15
15
  var spokenChunks = new Set();
16
+ var renderedSeqs = new Set();
16
17
  var isLoadingHistory = false;
17
18
  var selectedVoiceId = localStorage.getItem('voice-selected-id') || 'default';
18
19
  var ttsAudioCache = new Map();
@@ -313,6 +314,12 @@
313
314
  return ttsAudioCache.get(key) || null;
314
315
  }
315
316
 
317
+ function splitSentences(text) {
318
+ var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
319
+ if (!raw) return [text];
320
+ return raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
321
+ }
322
+
316
323
  var audioChunkQueue = [];
317
324
  var isPlayingChunk = false;
318
325
  var streamDone = false;
@@ -373,6 +380,38 @@
373
380
  return;
374
381
  }
375
382
 
383
+ var sentences = splitSentences(text);
384
+ var cachedSentences = [];
385
+ var uncachedText = [];
386
+ for (var i = 0; i < sentences.length; i++) {
387
+ var blob = getCachedTTSBlob(sentences[i]);
388
+ if (blob) {
389
+ cachedSentences.push({ idx: i, blob: blob });
390
+ } else {
391
+ uncachedText.push(sentences[i]);
392
+ }
393
+ }
394
+
395
+ if (cachedSentences.length === sentences.length) {
396
+ ttsConsecutiveFailures = 0;
397
+ for (var j = 0; j < cachedSentences.length; j++) {
398
+ audioChunkQueue.push(cachedSentences[j].blob);
399
+ }
400
+ streamDone = true;
401
+ if (!isPlayingChunk) playNextChunk();
402
+ return;
403
+ }
404
+
405
+ if (cachedSentences.length > 0) {
406
+ ttsConsecutiveFailures = 0;
407
+ for (var k = 0; k < cachedSentences.length; k++) {
408
+ audioChunkQueue.push(cachedSentences[k].blob);
409
+ }
410
+ if (!isPlayingChunk) playNextChunk();
411
+ }
412
+
413
+ var remainingText = uncachedText.join(' ');
414
+
376
415
  function onTtsSuccess() {
377
416
  ttsConsecutiveFailures = 0;
378
417
  }
@@ -392,11 +431,11 @@
392
431
  }
393
432
 
394
433
  function tryStreaming() {
395
- if (!streamingSupported) { tryNonStreaming(text); return; }
434
+ if (!streamingSupported) { tryNonStreaming(remainingText); return; }
396
435
  fetch(BASE + '/api/tts-stream', {
397
436
  method: 'POST',
398
437
  headers: { 'Content-Type': 'application/json' },
399
- body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
438
+ body: JSON.stringify({ text: remainingText, voiceId: selectedVoiceId })
400
439
  }).then(function(resp) {
401
440
  if (!resp.ok) {
402
441
  streamingSupported = false;
@@ -441,7 +480,7 @@
441
480
 
442
481
  return pump();
443
482
  }).catch(function() {
444
- tryNonStreaming(text);
483
+ tryNonStreaming(remainingText);
445
484
  });
446
485
  }
447
486
 
@@ -583,17 +622,21 @@
583
622
  if (!voiceActive) return;
584
623
  if (data.type === 'streaming_progress' && data.block) {
585
624
  if (data.conversationId && data.conversationId !== currentConversationId) return;
625
+ if (data.seq !== undefined && renderedSeqs.has(data.seq)) return;
626
+ if (data.seq !== undefined) renderedSeqs.add(data.seq);
586
627
  handleVoiceBlock(data.block, true);
587
628
  }
588
629
  if (data.type === 'streaming_start') {
589
630
  if (data.conversationId && data.conversationId !== currentConversationId) return;
590
631
  spokenChunks = new Set();
632
+ renderedSeqs = new Set();
591
633
  }
592
634
  });
593
635
  window.addEventListener('conversation-selected', function(e) {
594
636
  currentConversationId = e.detail.conversationId;
595
637
  stopSpeaking();
596
638
  spokenChunks = new Set();
639
+ renderedSeqs = new Set();
597
640
  if (voiceActive) {
598
641
  loadVoiceBlocks(currentConversationId);
599
642
  }
@@ -633,6 +676,7 @@
633
676
  }
634
677
  var hasContent = false;
635
678
  data.chunks.forEach(function(chunk) {
679
+ if (chunk.sequence !== undefined) renderedSeqs.add(chunk.sequence);
636
680
  var block = typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data;
637
681
  if (!block) return;
638
682
  if (block.type === 'text' && block.text) {