agentgui 1.0.264 → 1.0.266
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server.js +22 -2
- package/static/js/client.js +29 -0
- package/static/js/tts-websocket-handler.js +152 -0
- package/static/js/voice.js +59 -1
- package/static/js/websocket-manager.js +22 -0
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -2418,11 +2418,15 @@ const server = http.createServer(async (req, res) => {
|
|
|
2418
2418
|
sendJSON(req, res, 400, { error: 'No audio data' });
|
|
2419
2419
|
return;
|
|
2420
2420
|
}
|
|
2421
|
+
broadcastSync({ type: 'stt_progress', status: 'transcribing', percentComplete: 0 });
|
|
2421
2422
|
const { transcribe } = await getSpeech();
|
|
2422
2423
|
const text = await transcribe(audioBuffer);
|
|
2423
|
-
|
|
2424
|
+
const finalText = (text || '').trim();
|
|
2425
|
+
broadcastSync({ type: 'stt_progress', status: 'completed', percentComplete: 100, transcript: finalText });
|
|
2426
|
+
sendJSON(req, res, 200, { text: finalText });
|
|
2424
2427
|
} catch (err) {
|
|
2425
2428
|
debugLog('[STT] Error: ' + err.message);
|
|
2429
|
+
broadcastSync({ type: 'stt_progress', status: 'failed', percentComplete: 0, error: err.message });
|
|
2426
2430
|
if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'STT failed' });
|
|
2427
2431
|
}
|
|
2428
2432
|
return;
|
|
@@ -3303,7 +3307,7 @@ const BROADCAST_TYPES = new Set([
|
|
|
3303
3307
|
'conversations_updated', 'conversation_deleted', 'queue_status', 'queue_updated',
|
|
3304
3308
|
'rate_limit_hit', 'rate_limit_clear',
|
|
3305
3309
|
'script_started', 'script_stopped', 'script_output',
|
|
3306
|
-
'model_download_progress'
|
|
3310
|
+
'model_download_progress', 'stt_progress', 'tts_setup_progress', 'voice_list'
|
|
3307
3311
|
]);
|
|
3308
3312
|
|
|
3309
3313
|
const wsBatchQueues = new Map();
|
|
@@ -3600,8 +3604,24 @@ function onServerReady() {
|
|
|
3600
3604
|
ensureModelsDownloaded().then(ok => {
|
|
3601
3605
|
if (ok) console.log('[MODELS] Speech models ready');
|
|
3602
3606
|
else console.log('[MODELS] Speech model download failed');
|
|
3607
|
+
try {
|
|
3608
|
+
const { getVoices } = require('./lib/speech.js');
|
|
3609
|
+
const voices = getVoices();
|
|
3610
|
+
broadcastSync({ type: 'voice_list', voices });
|
|
3611
|
+
} catch (err) {
|
|
3612
|
+
debugLog('[VOICE] Failed to broadcast voices: ' + err.message);
|
|
3613
|
+
broadcastSync({ type: 'voice_list', voices: [] });
|
|
3614
|
+
}
|
|
3603
3615
|
}).catch(err => {
|
|
3604
3616
|
console.error('[MODELS] Download error:', err.message);
|
|
3617
|
+
try {
|
|
3618
|
+
const { getVoices } = require('./lib/speech.js');
|
|
3619
|
+
const voices = getVoices();
|
|
3620
|
+
broadcastSync({ type: 'voice_list', voices });
|
|
3621
|
+
} catch (err2) {
|
|
3622
|
+
debugLog('[VOICE] Failed to broadcast voices: ' + err2.message);
|
|
3623
|
+
broadcastSync({ type: 'voice_list', voices: [] });
|
|
3624
|
+
}
|
|
3605
3625
|
});
|
|
3606
3626
|
|
|
3607
3627
|
getSpeech().then(s => s.preloadTTS()).catch(e => debugLog('[TTS] Preload failed: ' + e.message));
|
package/static/js/client.js
CHANGED
|
@@ -496,6 +496,12 @@ class AgentGUIClient {
|
|
|
496
496
|
case 'model_download_progress':
|
|
497
497
|
this._handleModelDownloadProgress(data.progress || data);
|
|
498
498
|
break;
|
|
499
|
+
case 'stt_progress':
|
|
500
|
+
this._handleSTTProgress(data);
|
|
501
|
+
break;
|
|
502
|
+
case 'tts_setup_progress':
|
|
503
|
+
this._handleTTSSetupProgress(data);
|
|
504
|
+
break;
|
|
499
505
|
default:
|
|
500
506
|
break;
|
|
501
507
|
}
|
|
@@ -2100,6 +2106,29 @@ class AgentGUIClient {
|
|
|
2100
2106
|
}
|
|
2101
2107
|
}
|
|
2102
2108
|
|
|
2109
|
+
_handleSTTProgress(data) {
|
|
2110
|
+
const el = document.getElementById('voiceTranscript');
|
|
2111
|
+
if (!el) return;
|
|
2112
|
+
|
|
2113
|
+
if (data.status === 'transcribing') {
|
|
2114
|
+
el.textContent = 'Transcribing...';
|
|
2115
|
+
el.classList.add('transcribing');
|
|
2116
|
+
} else if (data.status === 'completed') {
|
|
2117
|
+
el.textContent = data.transcript || '';
|
|
2118
|
+
el.setAttribute('data-final', data.transcript || '');
|
|
2119
|
+
el.classList.remove('transcribing');
|
|
2120
|
+
} else if (data.status === 'failed') {
|
|
2121
|
+
el.textContent = 'Transcription failed: ' + (data.error || 'unknown error');
|
|
2122
|
+
el.classList.remove('transcribing');
|
|
2123
|
+
}
|
|
2124
|
+
}
|
|
2125
|
+
|
|
2126
|
+
_handleTTSSetupProgress(data) {
|
|
2127
|
+
if (data.step && data.status) {
|
|
2128
|
+
console.log('[TTS Setup]', data.step, ':', data.status, data.message || '');
|
|
2129
|
+
}
|
|
2130
|
+
}
|
|
2131
|
+
|
|
2103
2132
|
_updateVoiceTabState() {
|
|
2104
2133
|
var voiceBtn = document.querySelector('[data-view="voice"]');
|
|
2105
2134
|
if (voiceBtn) {
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
(function() {
|
|
2
|
+
class TTSWebSocketHandler {
|
|
3
|
+
constructor(wsManager) {
|
|
4
|
+
this.wsManager = wsManager;
|
|
5
|
+
this.streamBuffers = new Map();
|
|
6
|
+
this.playbackBuffers = new Map();
|
|
7
|
+
this.sequenceTrackers = new Map();
|
|
8
|
+
this.MIN_BUFFER_CHUNKS = 2;
|
|
9
|
+
this.JITTER_BUFFER_SIZE = 10;
|
|
10
|
+
this.chunkTimeoutMs = 5000;
|
|
11
|
+
this.chunkTimers = new Map();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
initStream(streamId) {
|
|
15
|
+
if (!this.streamBuffers.has(streamId)) {
|
|
16
|
+
this.streamBuffers.set(streamId, []);
|
|
17
|
+
this.playbackBuffers.set(streamId, []);
|
|
18
|
+
this.sequenceTrackers.set(streamId, {
|
|
19
|
+
lastSeq: -1,
|
|
20
|
+
missing: [],
|
|
21
|
+
outOfOrder: 0,
|
|
22
|
+
complete: false
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
receiveChunk(streamId, chunk, seq, isLast) {
|
|
28
|
+
this.initStream(streamId);
|
|
29
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
30
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
31
|
+
|
|
32
|
+
clearTimeout(this.chunkTimers.get(`${streamId}:${seq}`));
|
|
33
|
+
|
|
34
|
+
if (seq <= tracker.lastSeq) {
|
|
35
|
+
tracker.outOfOrder++;
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (seq > tracker.lastSeq + 1) {
|
|
40
|
+
for (let i = tracker.lastSeq + 1; i < seq; i++) {
|
|
41
|
+
tracker.missing.push(i);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
tracker.lastSeq = seq;
|
|
46
|
+
buffer.push({ chunk, seq, isLast, receivedAt: Date.now() });
|
|
47
|
+
|
|
48
|
+
if (buffer.length > this.JITTER_BUFFER_SIZE) {
|
|
49
|
+
buffer.shift();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (isLast) {
|
|
53
|
+
this.markStreamComplete(streamId);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
this.setChunkTimeout(streamId, seq);
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
setChunkTimeout(streamId, seq) {
|
|
61
|
+
const key = `${streamId}:${seq}`;
|
|
62
|
+
const timer = setTimeout(() => {
|
|
63
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
64
|
+
if (tracker && !tracker.missing.includes(seq)) {
|
|
65
|
+
tracker.missing.push(seq);
|
|
66
|
+
}
|
|
67
|
+
}, this.chunkTimeoutMs);
|
|
68
|
+
this.chunkTimers.set(key, timer);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
getPlayableChunks(streamId) {
|
|
72
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
73
|
+
if (!buffer || buffer.length === 0) return [];
|
|
74
|
+
|
|
75
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
76
|
+
const lastPlayedSeq = playback.length > 0
|
|
77
|
+
? playback[playback.length - 1].seq
|
|
78
|
+
: -1;
|
|
79
|
+
|
|
80
|
+
const chunks = buffer.filter(c => c.seq > lastPlayedSeq);
|
|
81
|
+
return chunks;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
markChunksPlayed(streamId, upToSeq) {
|
|
85
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
86
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
87
|
+
|
|
88
|
+
const toPlay = buffer.filter(c => c.seq <= upToSeq);
|
|
89
|
+
playback.push(...toPlay);
|
|
90
|
+
|
|
91
|
+
const newBuffer = buffer.filter(c => c.seq > upToSeq);
|
|
92
|
+
this.streamBuffers.set(streamId, newBuffer);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
canStartPlayback(streamId) {
|
|
96
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
97
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
98
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
99
|
+
|
|
100
|
+
if (!buffer) return false;
|
|
101
|
+
if (buffer.length === 0 && !tracker.complete) return false;
|
|
102
|
+
|
|
103
|
+
return buffer.length >= this.MIN_BUFFER_CHUNKS || tracker.complete;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
markStreamComplete(streamId) {
|
|
107
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
108
|
+
if (tracker) tracker.complete = true;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
isStreamComplete(streamId) {
|
|
112
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
113
|
+
return tracker && tracker.complete;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
hasLostPackets(streamId) {
|
|
117
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
118
|
+
return tracker && tracker.missing.length > 0;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
getStreamStats(streamId) {
|
|
122
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
123
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
124
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
buffered: buffer ? buffer.length : 0,
|
|
128
|
+
played: playback ? playback.length : 0,
|
|
129
|
+
totalSeq: tracker ? tracker.lastSeq + 1 : 0,
|
|
130
|
+
missing: tracker ? tracker.missing.length : 0,
|
|
131
|
+
outOfOrder: tracker ? tracker.outOfOrder : 0,
|
|
132
|
+
complete: tracker ? tracker.complete : false
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
cleanupStream(streamId) {
|
|
137
|
+
this.streamBuffers.delete(streamId);
|
|
138
|
+
this.playbackBuffers.delete(streamId);
|
|
139
|
+
this.sequenceTrackers.delete(streamId);
|
|
140
|
+
|
|
141
|
+
const keys = Array.from(this.chunkTimers.keys());
|
|
142
|
+
keys.forEach(key => {
|
|
143
|
+
if (key.startsWith(`${streamId}:`)) {
|
|
144
|
+
clearTimeout(this.chunkTimers.get(key));
|
|
145
|
+
this.chunkTimers.delete(key);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
window.TTSWebSocketHandler = TTSWebSocketHandler;
|
|
152
|
+
})();
|
package/static/js/voice.js
CHANGED
|
@@ -35,6 +35,43 @@
|
|
|
35
35
|
if (!selector) return;
|
|
36
36
|
var saved = localStorage.getItem('voice-selected-id');
|
|
37
37
|
if (saved) selectedVoiceId = saved;
|
|
38
|
+
if (window.wsManager) {
|
|
39
|
+
window.wsManager.subscribeToVoiceList(function(voices) {
|
|
40
|
+
if (!Array.isArray(voices)) return;
|
|
41
|
+
selector.innerHTML = '';
|
|
42
|
+
var builtIn = voices.filter(function(v) { return !v.isCustom; });
|
|
43
|
+
var custom = voices.filter(function(v) { return v.isCustom; });
|
|
44
|
+
if (builtIn.length) {
|
|
45
|
+
var grp1 = document.createElement('optgroup');
|
|
46
|
+
grp1.label = 'Built-in Voices';
|
|
47
|
+
builtIn.forEach(function(voice) {
|
|
48
|
+
var opt = document.createElement('option');
|
|
49
|
+
opt.value = voice.id;
|
|
50
|
+
var parts = [];
|
|
51
|
+
if (voice.gender) parts.push(voice.gender);
|
|
52
|
+
if (voice.accent) parts.push(voice.accent);
|
|
53
|
+
opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
|
|
54
|
+
grp1.appendChild(opt);
|
|
55
|
+
});
|
|
56
|
+
selector.appendChild(grp1);
|
|
57
|
+
}
|
|
58
|
+
if (custom.length) {
|
|
59
|
+
var grp2 = document.createElement('optgroup');
|
|
60
|
+
grp2.label = 'Custom Voices';
|
|
61
|
+
custom.forEach(function(voice) {
|
|
62
|
+
var opt = document.createElement('option');
|
|
63
|
+
opt.value = voice.id;
|
|
64
|
+
opt.textContent = voice.name;
|
|
65
|
+
grp2.appendChild(opt);
|
|
66
|
+
});
|
|
67
|
+
selector.appendChild(grp2);
|
|
68
|
+
}
|
|
69
|
+
if (saved && selector.querySelector('option[value="' + saved + '"]')) {
|
|
70
|
+
selector.value = saved;
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
38
75
|
fetch(BASE + '/api/voices')
|
|
39
76
|
.then(function(res) { return res.json(); })
|
|
40
77
|
.then(function(data) {
|
|
@@ -672,7 +709,11 @@
|
|
|
672
709
|
}
|
|
673
710
|
});
|
|
674
711
|
window.addEventListener('conversation-selected', function(e) {
|
|
675
|
-
|
|
712
|
+
var newConversationId = e.detail.conversationId;
|
|
713
|
+
if (currentConversationId && currentConversationId !== newConversationId) {
|
|
714
|
+
unsubscribeFromConversation();
|
|
715
|
+
}
|
|
716
|
+
currentConversationId = newConversationId;
|
|
676
717
|
stopSpeaking();
|
|
677
718
|
spokenChunks = new Set();
|
|
678
719
|
renderedSeqs = new Set();
|
|
@@ -715,9 +756,11 @@
|
|
|
715
756
|
_voiceBreakNext = false;
|
|
716
757
|
if (!conversationId) {
|
|
717
758
|
showVoiceEmpty(container);
|
|
759
|
+
unsubscribeFromConversation();
|
|
718
760
|
return;
|
|
719
761
|
}
|
|
720
762
|
isLoadingHistory = true;
|
|
763
|
+
subscribeToConversation(conversationId);
|
|
721
764
|
fetch(BASE + '/api/conversations/' + conversationId + '/chunks')
|
|
722
765
|
.then(function(res) { return res.json(); })
|
|
723
766
|
.then(function(data) {
|
|
@@ -751,6 +794,20 @@
|
|
|
751
794
|
});
|
|
752
795
|
}
|
|
753
796
|
|
|
797
|
+
function subscribeToConversation(conversationId) {
|
|
798
|
+
if (!conversationId || typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager) {
|
|
799
|
+
return;
|
|
800
|
+
}
|
|
801
|
+
agentGUIClient.wsManager.sendMessage({ type: 'subscribe', conversationId: conversationId, timestamp: Date.now() });
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
function unsubscribeFromConversation() {
|
|
805
|
+
if (typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager || !currentConversationId) {
|
|
806
|
+
return;
|
|
807
|
+
}
|
|
808
|
+
agentGUIClient.wsManager.sendMessage({ type: 'unsubscribe', conversationId: currentConversationId, timestamp: Date.now() });
|
|
809
|
+
}
|
|
810
|
+
|
|
754
811
|
function showVoiceEmpty(container) {
|
|
755
812
|
container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Hold the microphone button to record.<br>Release to transcribe. Tap Send to submit.<br>New responses will be read aloud.</div></div>';
|
|
756
813
|
}
|
|
@@ -770,6 +827,7 @@
|
|
|
770
827
|
function deactivate() {
|
|
771
828
|
voiceActive = false;
|
|
772
829
|
stopSpeaking();
|
|
830
|
+
unsubscribeFromConversation();
|
|
773
831
|
}
|
|
774
832
|
|
|
775
833
|
function escapeHtml(text) {
|
|
@@ -139,6 +139,13 @@ class WebSocketManager {
|
|
|
139
139
|
continue;
|
|
140
140
|
}
|
|
141
141
|
|
|
142
|
+
if (data.type === 'voice_list') {
|
|
143
|
+
this.cachedVoiceList = data.voices || [];
|
|
144
|
+
for (const listener of this.voiceListListeners) {
|
|
145
|
+
try { listener(this.cachedVoiceList); } catch (_) {}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
142
149
|
if (data.seq !== undefined && data.sessionId) {
|
|
143
150
|
this.lastSeqBySession[data.sessionId] = Math.max(
|
|
144
151
|
this.lastSeqBySession[data.sessionId] || -1, data.seq
|
|
@@ -565,6 +572,21 @@ class WebSocketManager {
|
|
|
565
572
|
this.disconnect();
|
|
566
573
|
this.listeners = {};
|
|
567
574
|
}
|
|
575
|
+
subscribeToVoiceList(callback) {
|
|
576
|
+
if (!this.voiceListListeners.includes(callback)) {
|
|
577
|
+
this.voiceListListeners.push(callback);
|
|
578
|
+
}
|
|
579
|
+
if (this.cachedVoiceList !== null) {
|
|
580
|
+
callback(this.cachedVoiceList);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
unsubscribeFromVoiceList(callback) {
|
|
585
|
+
const idx = this.voiceListListeners.indexOf(callback);
|
|
586
|
+
if (idx > -1) {
|
|
587
|
+
this.voiceListListeners.splice(idx, 1);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
568
590
|
}
|
|
569
591
|
|
|
570
592
|
if (typeof module !== 'undefined' && module.exports) {
|