agentgui 1.0.215 → 1.0.217

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.215",
3
+ "version": "1.0.217",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -99,7 +99,7 @@ const express = require('express');
99
99
  const Busboy = require('busboy');
100
100
  const fsbrowse = require('fsbrowse');
101
101
 
102
- const SYSTEM_PROMPT = `Your output will be spoken aloud by a text-to-speech system. Write ONLY plain conversational sentences that sound natural when read aloud. Never use markdown, bold, italics, headers, bullet points, numbered lists, tables, or any formatting. Never use colons to introduce lists or options. Never use labels like "Option A" or "1." followed by a title. Instead of listing options, describe them conversationally in flowing sentences. For example, instead of "**Option 1**: Do X" say "One approach would be to do X." Keep sentences short and simple. Use transition words like "also", "another option", "or alternatively" to connect ideas. Write as if you are speaking to someone in a casual conversation.`;
102
+ const SYSTEM_PROMPT = `Your output will be spoken aloud by a text-to-speech system. Write ONLY plain conversational sentences that sound natural when read aloud. Never use markdown, bold, italics, headers, bullet points, numbered lists, tables, or any formatting. Never use colons to introduce lists or options. Never use labels like "Option A" or "1." followed by a title. Instead of listing options, describe them conversationally in flowing sentences. For example, instead of "**Option 1**: Do X" say "One approach would be to do X." Keep sentences short and simple. Use transition words like "also", "another option", "or alternatively" to connect ideas. When mentioning file names, spell out the dot between the name and extension as the word "dot" so it is spoken clearly. For example, say "server dot js" instead of "server.js", "index dot html" instead of "index.html", and "package dot json" instead of "package.json". Write as if you are speaking to someone in a casual conversation.`;
103
103
 
104
104
  const activeExecutions = new Map();
105
105
  const activeScripts = new Map();
@@ -13,6 +13,7 @@
13
13
  var recordedChunks = [];
14
14
  var TARGET_SAMPLE_RATE = 16000;
15
15
  var spokenChunks = new Set();
16
+ var renderedSeqs = new Set();
16
17
  var isLoadingHistory = false;
17
18
  var selectedVoiceId = localStorage.getItem('voice-selected-id') || 'default';
18
19
  var ttsAudioCache = new Map();
@@ -313,6 +314,12 @@
313
314
  return ttsAudioCache.get(key) || null;
314
315
  }
315
316
 
317
+ function splitSentences(text) {
318
+ var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
319
+ if (!raw) return [text];
320
+ return raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
321
+ }
322
+
316
323
  var audioChunkQueue = [];
317
324
  var isPlayingChunk = false;
318
325
  var streamDone = false;
@@ -373,6 +380,38 @@
373
380
  return;
374
381
  }
375
382
 
383
+ var sentences = splitSentences(text);
384
+ var cachedSentences = [];
385
+ var uncachedText = [];
386
+ for (var i = 0; i < sentences.length; i++) {
387
+ var blob = getCachedTTSBlob(sentences[i]);
388
+ if (blob) {
389
+ cachedSentences.push({ idx: i, blob: blob });
390
+ } else {
391
+ uncachedText.push(sentences[i]);
392
+ }
393
+ }
394
+
395
+ if (cachedSentences.length === sentences.length) {
396
+ ttsConsecutiveFailures = 0;
397
+ for (var j = 0; j < cachedSentences.length; j++) {
398
+ audioChunkQueue.push(cachedSentences[j].blob);
399
+ }
400
+ streamDone = true;
401
+ if (!isPlayingChunk) playNextChunk();
402
+ return;
403
+ }
404
+
405
+ if (cachedSentences.length > 0) {
406
+ ttsConsecutiveFailures = 0;
407
+ for (var k = 0; k < cachedSentences.length; k++) {
408
+ audioChunkQueue.push(cachedSentences[k].blob);
409
+ }
410
+ if (!isPlayingChunk) playNextChunk();
411
+ }
412
+
413
+ var remainingText = uncachedText.join(' ');
414
+
376
415
  function onTtsSuccess() {
377
416
  ttsConsecutiveFailures = 0;
378
417
  }
@@ -392,11 +431,11 @@
392
431
  }
393
432
 
394
433
  function tryStreaming() {
395
- if (!streamingSupported) { tryNonStreaming(text); return; }
434
+ if (!streamingSupported) { tryNonStreaming(remainingText); return; }
396
435
  fetch(BASE + '/api/tts-stream', {
397
436
  method: 'POST',
398
437
  headers: { 'Content-Type': 'application/json' },
399
- body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
438
+ body: JSON.stringify({ text: remainingText, voiceId: selectedVoiceId })
400
439
  }).then(function(resp) {
401
440
  if (!resp.ok) {
402
441
  streamingSupported = false;
@@ -441,7 +480,7 @@
441
480
 
442
481
  return pump();
443
482
  }).catch(function() {
444
- tryNonStreaming(text);
483
+ tryNonStreaming(remainingText);
445
484
  });
446
485
  }
447
486
 
@@ -583,17 +622,21 @@
583
622
  if (!voiceActive) return;
584
623
  if (data.type === 'streaming_progress' && data.block) {
585
624
  if (data.conversationId && data.conversationId !== currentConversationId) return;
625
+ if (data.seq !== undefined && renderedSeqs.has(data.seq)) return;
626
+ if (data.seq !== undefined) renderedSeqs.add(data.seq);
586
627
  handleVoiceBlock(data.block, true);
587
628
  }
588
629
  if (data.type === 'streaming_start') {
589
630
  if (data.conversationId && data.conversationId !== currentConversationId) return;
590
631
  spokenChunks = new Set();
632
+ renderedSeqs = new Set();
591
633
  }
592
634
  });
593
635
  window.addEventListener('conversation-selected', function(e) {
594
636
  currentConversationId = e.detail.conversationId;
595
637
  stopSpeaking();
596
638
  spokenChunks = new Set();
639
+ renderedSeqs = new Set();
597
640
  if (voiceActive) {
598
641
  loadVoiceBlocks(currentConversationId);
599
642
  }
@@ -633,6 +676,7 @@
633
676
  }
634
677
  var hasContent = false;
635
678
  data.chunks.forEach(function(chunk) {
679
+ if (chunk.sequence !== undefined) renderedSeqs.add(chunk.sequence);
636
680
  var block = typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data;
637
681
  if (!block) return;
638
682
  if (block.type === 'text' && block.text) {