agentgui 1.0.215 → 1.0.217
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server.js +1 -1
- package/static/js/voice.js +47 -3
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -99,7 +99,7 @@ const express = require('express');
|
|
|
99
99
|
const Busboy = require('busboy');
|
|
100
100
|
const fsbrowse = require('fsbrowse');
|
|
101
101
|
|
|
102
|
-
const SYSTEM_PROMPT = `Your output will be spoken aloud by a text-to-speech system. Write ONLY plain conversational sentences that sound natural when read aloud. Never use markdown, bold, italics, headers, bullet points, numbered lists, tables, or any formatting. Never use colons to introduce lists or options. Never use labels like "Option A" or "1." followed by a title. Instead of listing options, describe them conversationally in flowing sentences. For example, instead of "**Option 1**: Do X" say "One approach would be to do X." Keep sentences short and simple. Use transition words like "also", "another option", "or alternatively" to connect ideas. Write as if you are speaking to someone in a casual conversation.`;
|
|
102
|
+
const SYSTEM_PROMPT = `Your output will be spoken aloud by a text-to-speech system. Write ONLY plain conversational sentences that sound natural when read aloud. Never use markdown, bold, italics, headers, bullet points, numbered lists, tables, or any formatting. Never use colons to introduce lists or options. Never use labels like "Option A" or "1." followed by a title. Instead of listing options, describe them conversationally in flowing sentences. For example, instead of "**Option 1**: Do X" say "One approach would be to do X." Keep sentences short and simple. Use transition words like "also", "another option", "or alternatively" to connect ideas. When mentioning file names, spell out the dot between the name and extension as the word "dot" so it is spoken clearly. For example, say "server dot js" instead of "server.js", "index dot html" instead of "index.html", and "package dot json" instead of "package.json". Write as if you are speaking to someone in a casual conversation.`;
|
|
103
103
|
|
|
104
104
|
const activeExecutions = new Map();
|
|
105
105
|
const activeScripts = new Map();
|
package/static/js/voice.js
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
var recordedChunks = [];
|
|
14
14
|
var TARGET_SAMPLE_RATE = 16000;
|
|
15
15
|
var spokenChunks = new Set();
|
|
16
|
+
var renderedSeqs = new Set();
|
|
16
17
|
var isLoadingHistory = false;
|
|
17
18
|
var selectedVoiceId = localStorage.getItem('voice-selected-id') || 'default';
|
|
18
19
|
var ttsAudioCache = new Map();
|
|
@@ -313,6 +314,12 @@
|
|
|
313
314
|
return ttsAudioCache.get(key) || null;
|
|
314
315
|
}
|
|
315
316
|
|
|
317
|
+
function splitSentences(text) {
|
|
318
|
+
var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
|
|
319
|
+
if (!raw) return [text];
|
|
320
|
+
return raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
|
|
321
|
+
}
|
|
322
|
+
|
|
316
323
|
var audioChunkQueue = [];
|
|
317
324
|
var isPlayingChunk = false;
|
|
318
325
|
var streamDone = false;
|
|
@@ -373,6 +380,38 @@
|
|
|
373
380
|
return;
|
|
374
381
|
}
|
|
375
382
|
|
|
383
|
+
var sentences = splitSentences(text);
|
|
384
|
+
var cachedSentences = [];
|
|
385
|
+
var uncachedText = [];
|
|
386
|
+
for (var i = 0; i < sentences.length; i++) {
|
|
387
|
+
var blob = getCachedTTSBlob(sentences[i]);
|
|
388
|
+
if (blob) {
|
|
389
|
+
cachedSentences.push({ idx: i, blob: blob });
|
|
390
|
+
} else {
|
|
391
|
+
uncachedText.push(sentences[i]);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
if (cachedSentences.length === sentences.length) {
|
|
396
|
+
ttsConsecutiveFailures = 0;
|
|
397
|
+
for (var j = 0; j < cachedSentences.length; j++) {
|
|
398
|
+
audioChunkQueue.push(cachedSentences[j].blob);
|
|
399
|
+
}
|
|
400
|
+
streamDone = true;
|
|
401
|
+
if (!isPlayingChunk) playNextChunk();
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (cachedSentences.length > 0) {
|
|
406
|
+
ttsConsecutiveFailures = 0;
|
|
407
|
+
for (var k = 0; k < cachedSentences.length; k++) {
|
|
408
|
+
audioChunkQueue.push(cachedSentences[k].blob);
|
|
409
|
+
}
|
|
410
|
+
if (!isPlayingChunk) playNextChunk();
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
var remainingText = uncachedText.join(' ');
|
|
414
|
+
|
|
376
415
|
function onTtsSuccess() {
|
|
377
416
|
ttsConsecutiveFailures = 0;
|
|
378
417
|
}
|
|
@@ -392,11 +431,11 @@
|
|
|
392
431
|
}
|
|
393
432
|
|
|
394
433
|
function tryStreaming() {
|
|
395
|
-
if (!streamingSupported) { tryNonStreaming(
|
|
434
|
+
if (!streamingSupported) { tryNonStreaming(remainingText); return; }
|
|
396
435
|
fetch(BASE + '/api/tts-stream', {
|
|
397
436
|
method: 'POST',
|
|
398
437
|
headers: { 'Content-Type': 'application/json' },
|
|
399
|
-
body: JSON.stringify({ text:
|
|
438
|
+
body: JSON.stringify({ text: remainingText, voiceId: selectedVoiceId })
|
|
400
439
|
}).then(function(resp) {
|
|
401
440
|
if (!resp.ok) {
|
|
402
441
|
streamingSupported = false;
|
|
@@ -441,7 +480,7 @@
|
|
|
441
480
|
|
|
442
481
|
return pump();
|
|
443
482
|
}).catch(function() {
|
|
444
|
-
tryNonStreaming(
|
|
483
|
+
tryNonStreaming(remainingText);
|
|
445
484
|
});
|
|
446
485
|
}
|
|
447
486
|
|
|
@@ -583,17 +622,21 @@
|
|
|
583
622
|
if (!voiceActive) return;
|
|
584
623
|
if (data.type === 'streaming_progress' && data.block) {
|
|
585
624
|
if (data.conversationId && data.conversationId !== currentConversationId) return;
|
|
625
|
+
if (data.seq !== undefined && renderedSeqs.has(data.seq)) return;
|
|
626
|
+
if (data.seq !== undefined) renderedSeqs.add(data.seq);
|
|
586
627
|
handleVoiceBlock(data.block, true);
|
|
587
628
|
}
|
|
588
629
|
if (data.type === 'streaming_start') {
|
|
589
630
|
if (data.conversationId && data.conversationId !== currentConversationId) return;
|
|
590
631
|
spokenChunks = new Set();
|
|
632
|
+
renderedSeqs = new Set();
|
|
591
633
|
}
|
|
592
634
|
});
|
|
593
635
|
window.addEventListener('conversation-selected', function(e) {
|
|
594
636
|
currentConversationId = e.detail.conversationId;
|
|
595
637
|
stopSpeaking();
|
|
596
638
|
spokenChunks = new Set();
|
|
639
|
+
renderedSeqs = new Set();
|
|
597
640
|
if (voiceActive) {
|
|
598
641
|
loadVoiceBlocks(currentConversationId);
|
|
599
642
|
}
|
|
@@ -633,6 +676,7 @@
|
|
|
633
676
|
}
|
|
634
677
|
var hasContent = false;
|
|
635
678
|
data.chunks.forEach(function(chunk) {
|
|
679
|
+
if (chunk.sequence !== undefined) renderedSeqs.add(chunk.sequence);
|
|
636
680
|
var block = typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data;
|
|
637
681
|
if (!block) return;
|
|
638
682
|
if (block.type === 'text' && block.text) {
|