agentgui 1.0.265 → 1.0.266
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server.js +22 -2
- package/static/js/client.js +29 -0
- package/static/js/tts-websocket-handler.js +152 -0
- package/static/js/voice.js +37 -0
- package/static/js/websocket-manager.js +22 -0
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -2418,11 +2418,15 @@ const server = http.createServer(async (req, res) => {
|
|
|
2418
2418
|
sendJSON(req, res, 400, { error: 'No audio data' });
|
|
2419
2419
|
return;
|
|
2420
2420
|
}
|
|
2421
|
+
broadcastSync({ type: 'stt_progress', status: 'transcribing', percentComplete: 0 });
|
|
2421
2422
|
const { transcribe } = await getSpeech();
|
|
2422
2423
|
const text = await transcribe(audioBuffer);
|
|
2423
|
-
|
|
2424
|
+
const finalText = (text || '').trim();
|
|
2425
|
+
broadcastSync({ type: 'stt_progress', status: 'completed', percentComplete: 100, transcript: finalText });
|
|
2426
|
+
sendJSON(req, res, 200, { text: finalText });
|
|
2424
2427
|
} catch (err) {
|
|
2425
2428
|
debugLog('[STT] Error: ' + err.message);
|
|
2429
|
+
broadcastSync({ type: 'stt_progress', status: 'failed', percentComplete: 0, error: err.message });
|
|
2426
2430
|
if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'STT failed' });
|
|
2427
2431
|
}
|
|
2428
2432
|
return;
|
|
@@ -3303,7 +3307,7 @@ const BROADCAST_TYPES = new Set([
|
|
|
3303
3307
|
'conversations_updated', 'conversation_deleted', 'queue_status', 'queue_updated',
|
|
3304
3308
|
'rate_limit_hit', 'rate_limit_clear',
|
|
3305
3309
|
'script_started', 'script_stopped', 'script_output',
|
|
3306
|
-
'model_download_progress'
|
|
3310
|
+
'model_download_progress', 'stt_progress', 'tts_setup_progress', 'voice_list'
|
|
3307
3311
|
]);
|
|
3308
3312
|
|
|
3309
3313
|
const wsBatchQueues = new Map();
|
|
@@ -3600,8 +3604,24 @@ function onServerReady() {
|
|
|
3600
3604
|
ensureModelsDownloaded().then(ok => {
|
|
3601
3605
|
if (ok) console.log('[MODELS] Speech models ready');
|
|
3602
3606
|
else console.log('[MODELS] Speech model download failed');
|
|
3607
|
+
try {
|
|
3608
|
+
const { getVoices } = require('./lib/speech.js');
|
|
3609
|
+
const voices = getVoices();
|
|
3610
|
+
broadcastSync({ type: 'voice_list', voices });
|
|
3611
|
+
} catch (err) {
|
|
3612
|
+
debugLog('[VOICE] Failed to broadcast voices: ' + err.message);
|
|
3613
|
+
broadcastSync({ type: 'voice_list', voices: [] });
|
|
3614
|
+
}
|
|
3603
3615
|
}).catch(err => {
|
|
3604
3616
|
console.error('[MODELS] Download error:', err.message);
|
|
3617
|
+
try {
|
|
3618
|
+
const { getVoices } = require('./lib/speech.js');
|
|
3619
|
+
const voices = getVoices();
|
|
3620
|
+
broadcastSync({ type: 'voice_list', voices });
|
|
3621
|
+
} catch (err2) {
|
|
3622
|
+
debugLog('[VOICE] Failed to broadcast voices: ' + err2.message);
|
|
3623
|
+
broadcastSync({ type: 'voice_list', voices: [] });
|
|
3624
|
+
}
|
|
3605
3625
|
});
|
|
3606
3626
|
|
|
3607
3627
|
getSpeech().then(s => s.preloadTTS()).catch(e => debugLog('[TTS] Preload failed: ' + e.message));
|
package/static/js/client.js
CHANGED
|
@@ -496,6 +496,12 @@ class AgentGUIClient {
|
|
|
496
496
|
case 'model_download_progress':
|
|
497
497
|
this._handleModelDownloadProgress(data.progress || data);
|
|
498
498
|
break;
|
|
499
|
+
case 'stt_progress':
|
|
500
|
+
this._handleSTTProgress(data);
|
|
501
|
+
break;
|
|
502
|
+
case 'tts_setup_progress':
|
|
503
|
+
this._handleTTSSetupProgress(data);
|
|
504
|
+
break;
|
|
499
505
|
default:
|
|
500
506
|
break;
|
|
501
507
|
}
|
|
@@ -2100,6 +2106,29 @@ class AgentGUIClient {
|
|
|
2100
2106
|
}
|
|
2101
2107
|
}
|
|
2102
2108
|
|
|
2109
|
+
_handleSTTProgress(data) {
|
|
2110
|
+
const el = document.getElementById('voiceTranscript');
|
|
2111
|
+
if (!el) return;
|
|
2112
|
+
|
|
2113
|
+
if (data.status === 'transcribing') {
|
|
2114
|
+
el.textContent = 'Transcribing...';
|
|
2115
|
+
el.classList.add('transcribing');
|
|
2116
|
+
} else if (data.status === 'completed') {
|
|
2117
|
+
el.textContent = data.transcript || '';
|
|
2118
|
+
el.setAttribute('data-final', data.transcript || '');
|
|
2119
|
+
el.classList.remove('transcribing');
|
|
2120
|
+
} else if (data.status === 'failed') {
|
|
2121
|
+
el.textContent = 'Transcription failed: ' + (data.error || 'unknown error');
|
|
2122
|
+
el.classList.remove('transcribing');
|
|
2123
|
+
}
|
|
2124
|
+
}
|
|
2125
|
+
|
|
2126
|
+
_handleTTSSetupProgress(data) {
|
|
2127
|
+
if (data.step && data.status) {
|
|
2128
|
+
console.log('[TTS Setup]', data.step, ':', data.status, data.message || '');
|
|
2129
|
+
}
|
|
2130
|
+
}
|
|
2131
|
+
|
|
2103
2132
|
_updateVoiceTabState() {
|
|
2104
2133
|
var voiceBtn = document.querySelector('[data-view="voice"]');
|
|
2105
2134
|
if (voiceBtn) {
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
(function() {
|
|
2
|
+
class TTSWebSocketHandler {
|
|
3
|
+
constructor(wsManager) {
|
|
4
|
+
this.wsManager = wsManager;
|
|
5
|
+
this.streamBuffers = new Map();
|
|
6
|
+
this.playbackBuffers = new Map();
|
|
7
|
+
this.sequenceTrackers = new Map();
|
|
8
|
+
this.MIN_BUFFER_CHUNKS = 2;
|
|
9
|
+
this.JITTER_BUFFER_SIZE = 10;
|
|
10
|
+
this.chunkTimeoutMs = 5000;
|
|
11
|
+
this.chunkTimers = new Map();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
initStream(streamId) {
|
|
15
|
+
if (!this.streamBuffers.has(streamId)) {
|
|
16
|
+
this.streamBuffers.set(streamId, []);
|
|
17
|
+
this.playbackBuffers.set(streamId, []);
|
|
18
|
+
this.sequenceTrackers.set(streamId, {
|
|
19
|
+
lastSeq: -1,
|
|
20
|
+
missing: [],
|
|
21
|
+
outOfOrder: 0,
|
|
22
|
+
complete: false
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
receiveChunk(streamId, chunk, seq, isLast) {
|
|
28
|
+
this.initStream(streamId);
|
|
29
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
30
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
31
|
+
|
|
32
|
+
clearTimeout(this.chunkTimers.get(`${streamId}:${seq}`));
|
|
33
|
+
|
|
34
|
+
if (seq <= tracker.lastSeq) {
|
|
35
|
+
tracker.outOfOrder++;
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (seq > tracker.lastSeq + 1) {
|
|
40
|
+
for (let i = tracker.lastSeq + 1; i < seq; i++) {
|
|
41
|
+
tracker.missing.push(i);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
tracker.lastSeq = seq;
|
|
46
|
+
buffer.push({ chunk, seq, isLast, receivedAt: Date.now() });
|
|
47
|
+
|
|
48
|
+
if (buffer.length > this.JITTER_BUFFER_SIZE) {
|
|
49
|
+
buffer.shift();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (isLast) {
|
|
53
|
+
this.markStreamComplete(streamId);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
this.setChunkTimeout(streamId, seq);
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
setChunkTimeout(streamId, seq) {
|
|
61
|
+
const key = `${streamId}:${seq}`;
|
|
62
|
+
const timer = setTimeout(() => {
|
|
63
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
64
|
+
if (tracker && !tracker.missing.includes(seq)) {
|
|
65
|
+
tracker.missing.push(seq);
|
|
66
|
+
}
|
|
67
|
+
}, this.chunkTimeoutMs);
|
|
68
|
+
this.chunkTimers.set(key, timer);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
getPlayableChunks(streamId) {
|
|
72
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
73
|
+
if (!buffer || buffer.length === 0) return [];
|
|
74
|
+
|
|
75
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
76
|
+
const lastPlayedSeq = playback.length > 0
|
|
77
|
+
? playback[playback.length - 1].seq
|
|
78
|
+
: -1;
|
|
79
|
+
|
|
80
|
+
const chunks = buffer.filter(c => c.seq > lastPlayedSeq);
|
|
81
|
+
return chunks;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
markChunksPlayed(streamId, upToSeq) {
|
|
85
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
86
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
87
|
+
|
|
88
|
+
const toPlay = buffer.filter(c => c.seq <= upToSeq);
|
|
89
|
+
playback.push(...toPlay);
|
|
90
|
+
|
|
91
|
+
const newBuffer = buffer.filter(c => c.seq > upToSeq);
|
|
92
|
+
this.streamBuffers.set(streamId, newBuffer);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
canStartPlayback(streamId) {
|
|
96
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
97
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
98
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
99
|
+
|
|
100
|
+
if (!buffer) return false;
|
|
101
|
+
if (buffer.length === 0 && !tracker.complete) return false;
|
|
102
|
+
|
|
103
|
+
return buffer.length >= this.MIN_BUFFER_CHUNKS || tracker.complete;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
markStreamComplete(streamId) {
|
|
107
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
108
|
+
if (tracker) tracker.complete = true;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
isStreamComplete(streamId) {
|
|
112
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
113
|
+
return tracker && tracker.complete;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
hasLostPackets(streamId) {
|
|
117
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
118
|
+
return tracker && tracker.missing.length > 0;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
getStreamStats(streamId) {
|
|
122
|
+
const tracker = this.sequenceTrackers.get(streamId);
|
|
123
|
+
const buffer = this.streamBuffers.get(streamId);
|
|
124
|
+
const playback = this.playbackBuffers.get(streamId);
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
buffered: buffer ? buffer.length : 0,
|
|
128
|
+
played: playback ? playback.length : 0,
|
|
129
|
+
totalSeq: tracker ? tracker.lastSeq + 1 : 0,
|
|
130
|
+
missing: tracker ? tracker.missing.length : 0,
|
|
131
|
+
outOfOrder: tracker ? tracker.outOfOrder : 0,
|
|
132
|
+
complete: tracker ? tracker.complete : false
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
cleanupStream(streamId) {
|
|
137
|
+
this.streamBuffers.delete(streamId);
|
|
138
|
+
this.playbackBuffers.delete(streamId);
|
|
139
|
+
this.sequenceTrackers.delete(streamId);
|
|
140
|
+
|
|
141
|
+
const keys = Array.from(this.chunkTimers.keys());
|
|
142
|
+
keys.forEach(key => {
|
|
143
|
+
if (key.startsWith(`${streamId}:`)) {
|
|
144
|
+
clearTimeout(this.chunkTimers.get(key));
|
|
145
|
+
this.chunkTimers.delete(key);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
window.TTSWebSocketHandler = TTSWebSocketHandler;
|
|
152
|
+
})();
|
package/static/js/voice.js
CHANGED
|
@@ -35,6 +35,43 @@
|
|
|
35
35
|
if (!selector) return;
|
|
36
36
|
var saved = localStorage.getItem('voice-selected-id');
|
|
37
37
|
if (saved) selectedVoiceId = saved;
|
|
38
|
+
if (window.wsManager) {
|
|
39
|
+
window.wsManager.subscribeToVoiceList(function(voices) {
|
|
40
|
+
if (!Array.isArray(voices)) return;
|
|
41
|
+
selector.innerHTML = '';
|
|
42
|
+
var builtIn = voices.filter(function(v) { return !v.isCustom; });
|
|
43
|
+
var custom = voices.filter(function(v) { return v.isCustom; });
|
|
44
|
+
if (builtIn.length) {
|
|
45
|
+
var grp1 = document.createElement('optgroup');
|
|
46
|
+
grp1.label = 'Built-in Voices';
|
|
47
|
+
builtIn.forEach(function(voice) {
|
|
48
|
+
var opt = document.createElement('option');
|
|
49
|
+
opt.value = voice.id;
|
|
50
|
+
var parts = [];
|
|
51
|
+
if (voice.gender) parts.push(voice.gender);
|
|
52
|
+
if (voice.accent) parts.push(voice.accent);
|
|
53
|
+
opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
|
|
54
|
+
grp1.appendChild(opt);
|
|
55
|
+
});
|
|
56
|
+
selector.appendChild(grp1);
|
|
57
|
+
}
|
|
58
|
+
if (custom.length) {
|
|
59
|
+
var grp2 = document.createElement('optgroup');
|
|
60
|
+
grp2.label = 'Custom Voices';
|
|
61
|
+
custom.forEach(function(voice) {
|
|
62
|
+
var opt = document.createElement('option');
|
|
63
|
+
opt.value = voice.id;
|
|
64
|
+
opt.textContent = voice.name;
|
|
65
|
+
grp2.appendChild(opt);
|
|
66
|
+
});
|
|
67
|
+
selector.appendChild(grp2);
|
|
68
|
+
}
|
|
69
|
+
if (saved && selector.querySelector('option[value="' + saved + '"]')) {
|
|
70
|
+
selector.value = saved;
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
38
75
|
fetch(BASE + '/api/voices')
|
|
39
76
|
.then(function(res) { return res.json(); })
|
|
40
77
|
.then(function(data) {
|
|
@@ -139,6 +139,13 @@ class WebSocketManager {
|
|
|
139
139
|
continue;
|
|
140
140
|
}
|
|
141
141
|
|
|
142
|
+
if (data.type === 'voice_list') {
|
|
143
|
+
this.cachedVoiceList = data.voices || [];
|
|
144
|
+
for (const listener of this.voiceListListeners) {
|
|
145
|
+
try { listener(this.cachedVoiceList); } catch (_) {}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
142
149
|
if (data.seq !== undefined && data.sessionId) {
|
|
143
150
|
this.lastSeqBySession[data.sessionId] = Math.max(
|
|
144
151
|
this.lastSeqBySession[data.sessionId] || -1, data.seq
|
|
@@ -565,6 +572,21 @@ class WebSocketManager {
|
|
|
565
572
|
this.disconnect();
|
|
566
573
|
this.listeners = {};
|
|
567
574
|
}
|
|
575
|
+
subscribeToVoiceList(callback) {
|
|
576
|
+
if (!this.voiceListListeners.includes(callback)) {
|
|
577
|
+
this.voiceListListeners.push(callback);
|
|
578
|
+
}
|
|
579
|
+
if (this.cachedVoiceList !== null) {
|
|
580
|
+
callback(this.cachedVoiceList);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
unsubscribeFromVoiceList(callback) {
|
|
585
|
+
const idx = this.voiceListListeners.indexOf(callback);
|
|
586
|
+
if (idx > -1) {
|
|
587
|
+
this.voiceListListeners.splice(idx, 1);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
568
590
|
}
|
|
569
591
|
|
|
570
592
|
if (typeof module !== 'undefined' && module.exports) {
|