agentgui 1.0.265 → 1.0.267

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.265",
3
+ "version": "1.0.267",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -2418,11 +2418,15 @@ const server = http.createServer(async (req, res) => {
2418
2418
  sendJSON(req, res, 400, { error: 'No audio data' });
2419
2419
  return;
2420
2420
  }
2421
+ broadcastSync({ type: 'stt_progress', status: 'transcribing', percentComplete: 0 });
2421
2422
  const { transcribe } = await getSpeech();
2422
2423
  const text = await transcribe(audioBuffer);
2423
- sendJSON(req, res, 200, { text: (text || '').trim() });
2424
+ const finalText = (text || '').trim();
2425
+ broadcastSync({ type: 'stt_progress', status: 'completed', percentComplete: 100, transcript: finalText });
2426
+ sendJSON(req, res, 200, { text: finalText });
2424
2427
  } catch (err) {
2425
2428
  debugLog('[STT] Error: ' + err.message);
2429
+ broadcastSync({ type: 'stt_progress', status: 'failed', percentComplete: 0, error: err.message });
2426
2430
  if (!res.headersSent) sendJSON(req, res, 500, { error: err.message || 'STT failed' });
2427
2431
  }
2428
2432
  return;
@@ -3303,7 +3307,7 @@ const BROADCAST_TYPES = new Set([
3303
3307
  'conversations_updated', 'conversation_deleted', 'queue_status', 'queue_updated',
3304
3308
  'rate_limit_hit', 'rate_limit_clear',
3305
3309
  'script_started', 'script_stopped', 'script_output',
3306
- 'model_download_progress'
3310
+ 'model_download_progress', 'stt_progress', 'tts_setup_progress', 'voice_list'
3307
3311
  ]);
3308
3312
 
3309
3313
  const wsBatchQueues = new Map();
@@ -3600,8 +3604,24 @@ function onServerReady() {
3600
3604
  ensureModelsDownloaded().then(ok => {
3601
3605
  if (ok) console.log('[MODELS] Speech models ready');
3602
3606
  else console.log('[MODELS] Speech model download failed');
3607
+ try {
3608
+ const { getVoices } = require('./lib/speech.js');
3609
+ const voices = getVoices();
3610
+ broadcastSync({ type: 'voice_list', voices });
3611
+ } catch (err) {
3612
+ debugLog('[VOICE] Failed to broadcast voices: ' + err.message);
3613
+ broadcastSync({ type: 'voice_list', voices: [] });
3614
+ }
3603
3615
  }).catch(err => {
3604
3616
  console.error('[MODELS] Download error:', err.message);
3617
+ try {
3618
+ const { getVoices } = require('./lib/speech.js');
3619
+ const voices = getVoices();
3620
+ broadcastSync({ type: 'voice_list', voices });
3621
+ } catch (err2) {
3622
+ debugLog('[VOICE] Failed to broadcast voices: ' + err2.message);
3623
+ broadcastSync({ type: 'voice_list', voices: [] });
3624
+ }
3605
3625
  });
3606
3626
 
3607
3627
  getSpeech().then(s => s.preloadTTS()).catch(e => debugLog('[TTS] Preload failed: ' + e.message));
@@ -496,6 +496,12 @@ class AgentGUIClient {
496
496
  case 'model_download_progress':
497
497
  this._handleModelDownloadProgress(data.progress || data);
498
498
  break;
499
+ case 'stt_progress':
500
+ this._handleSTTProgress(data);
501
+ break;
502
+ case 'tts_setup_progress':
503
+ this._handleTTSSetupProgress(data);
504
+ break;
499
505
  default:
500
506
  break;
501
507
  }
@@ -2100,6 +2106,29 @@ class AgentGUIClient {
2100
2106
  }
2101
2107
  }
2102
2108
 
2109
+ _handleSTTProgress(data) {
2110
+ const el = document.getElementById('voiceTranscript');
2111
+ if (!el) return;
2112
+
2113
+ if (data.status === 'transcribing') {
2114
+ el.textContent = 'Transcribing...';
2115
+ el.classList.add('transcribing');
2116
+ } else if (data.status === 'completed') {
2117
+ el.textContent = data.transcript || '';
2118
+ el.setAttribute('data-final', data.transcript || '');
2119
+ el.classList.remove('transcribing');
2120
+ } else if (data.status === 'failed') {
2121
+ el.textContent = 'Transcription failed: ' + (data.error || 'unknown error');
2122
+ el.classList.remove('transcribing');
2123
+ }
2124
+ }
2125
+
2126
+ _handleTTSSetupProgress(data) {
2127
+ if (data.step && data.status) {
2128
+ console.log('[TTS Setup]', data.step, ':', data.status, data.message || '');
2129
+ }
2130
+ }
2131
+
2103
2132
  _updateVoiceTabState() {
2104
2133
  var voiceBtn = document.querySelector('[data-view="voice"]');
2105
2134
  if (voiceBtn) {
@@ -0,0 +1,152 @@
1
+ (function() {
2
+ class TTSWebSocketHandler {
3
+ constructor(wsManager) {
4
+ this.wsManager = wsManager;
5
+ this.streamBuffers = new Map();
6
+ this.playbackBuffers = new Map();
7
+ this.sequenceTrackers = new Map();
8
+ this.MIN_BUFFER_CHUNKS = 2;
9
+ this.JITTER_BUFFER_SIZE = 10;
10
+ this.chunkTimeoutMs = 5000;
11
+ this.chunkTimers = new Map();
12
+ }
13
+
14
+ initStream(streamId) {
15
+ if (!this.streamBuffers.has(streamId)) {
16
+ this.streamBuffers.set(streamId, []);
17
+ this.playbackBuffers.set(streamId, []);
18
+ this.sequenceTrackers.set(streamId, {
19
+ lastSeq: -1,
20
+ missing: [],
21
+ outOfOrder: 0,
22
+ complete: false
23
+ });
24
+ }
25
+ }
26
+
27
+ receiveChunk(streamId, chunk, seq, isLast) {
28
+ this.initStream(streamId);
29
+ const tracker = this.sequenceTrackers.get(streamId);
30
+ const buffer = this.streamBuffers.get(streamId);
31
+
32
+ clearTimeout(this.chunkTimers.get(`${streamId}:${seq}`));
33
+
34
+ if (seq <= tracker.lastSeq) {
35
+ tracker.outOfOrder++;
36
+ return false;
37
+ }
38
+
39
+ if (seq > tracker.lastSeq + 1) {
40
+ for (let i = tracker.lastSeq + 1; i < seq; i++) {
41
+ tracker.missing.push(i);
42
+ }
43
+ }
44
+
45
+ tracker.lastSeq = seq;
46
+ buffer.push({ chunk, seq, isLast, receivedAt: Date.now() });
47
+
48
+ if (buffer.length > this.JITTER_BUFFER_SIZE) {
49
+ buffer.shift();
50
+ }
51
+
52
+ if (isLast) {
53
+ this.markStreamComplete(streamId);
54
+ }
55
+
56
+ this.setChunkTimeout(streamId, seq);
57
+ return true;
58
+ }
59
+
60
+ setChunkTimeout(streamId, seq) {
61
+ const key = `${streamId}:${seq}`;
62
+ const timer = setTimeout(() => {
63
+ const tracker = this.sequenceTrackers.get(streamId);
64
+ if (tracker && !tracker.missing.includes(seq)) {
65
+ tracker.missing.push(seq);
66
+ }
67
+ }, this.chunkTimeoutMs);
68
+ this.chunkTimers.set(key, timer);
69
+ }
70
+
71
+ getPlayableChunks(streamId) {
72
+ const buffer = this.streamBuffers.get(streamId);
73
+ if (!buffer || buffer.length === 0) return [];
74
+
75
+ const playback = this.playbackBuffers.get(streamId);
76
+ const lastPlayedSeq = playback.length > 0
77
+ ? playback[playback.length - 1].seq
78
+ : -1;
79
+
80
+ const chunks = buffer.filter(c => c.seq > lastPlayedSeq);
81
+ return chunks;
82
+ }
83
+
84
+ markChunksPlayed(streamId, upToSeq) {
85
+ const buffer = this.streamBuffers.get(streamId);
86
+ const playback = this.playbackBuffers.get(streamId);
87
+
88
+ const toPlay = buffer.filter(c => c.seq <= upToSeq);
89
+ playback.push(...toPlay);
90
+
91
+ const newBuffer = buffer.filter(c => c.seq > upToSeq);
92
+ this.streamBuffers.set(streamId, newBuffer);
93
+ }
94
+
95
+ canStartPlayback(streamId) {
96
+ const buffer = this.streamBuffers.get(streamId);
97
+ const playback = this.playbackBuffers.get(streamId);
98
+ const tracker = this.sequenceTrackers.get(streamId);
99
+
100
+ if (!buffer) return false;
101
+ if (buffer.length === 0 && !tracker.complete) return false;
102
+
103
+ return buffer.length >= this.MIN_BUFFER_CHUNKS || tracker.complete;
104
+ }
105
+
106
+ markStreamComplete(streamId) {
107
+ const tracker = this.sequenceTrackers.get(streamId);
108
+ if (tracker) tracker.complete = true;
109
+ }
110
+
111
+ isStreamComplete(streamId) {
112
+ const tracker = this.sequenceTrackers.get(streamId);
113
+ return tracker && tracker.complete;
114
+ }
115
+
116
+ hasLostPackets(streamId) {
117
+ const tracker = this.sequenceTrackers.get(streamId);
118
+ return tracker && tracker.missing.length > 0;
119
+ }
120
+
121
+ getStreamStats(streamId) {
122
+ const tracker = this.sequenceTrackers.get(streamId);
123
+ const buffer = this.streamBuffers.get(streamId);
124
+ const playback = this.playbackBuffers.get(streamId);
125
+
126
+ return {
127
+ buffered: buffer ? buffer.length : 0,
128
+ played: playback ? playback.length : 0,
129
+ totalSeq: tracker ? tracker.lastSeq + 1 : 0,
130
+ missing: tracker ? tracker.missing.length : 0,
131
+ outOfOrder: tracker ? tracker.outOfOrder : 0,
132
+ complete: tracker ? tracker.complete : false
133
+ };
134
+ }
135
+
136
+ cleanupStream(streamId) {
137
+ this.streamBuffers.delete(streamId);
138
+ this.playbackBuffers.delete(streamId);
139
+ this.sequenceTrackers.delete(streamId);
140
+
141
+ const keys = Array.from(this.chunkTimers.keys());
142
+ keys.forEach(key => {
143
+ if (key.startsWith(`${streamId}:`)) {
144
+ clearTimeout(this.chunkTimers.get(key));
145
+ this.chunkTimers.delete(key);
146
+ }
147
+ });
148
+ }
149
+ }
150
+
151
+ window.TTSWebSocketHandler = TTSWebSocketHandler;
152
+ })();
@@ -35,6 +35,43 @@
35
35
  if (!selector) return;
36
36
  var saved = localStorage.getItem('voice-selected-id');
37
37
  if (saved) selectedVoiceId = saved;
38
+ if (window.wsManager) {
39
+ window.wsManager.subscribeToVoiceList(function(voices) {
40
+ if (!Array.isArray(voices)) return;
41
+ selector.innerHTML = '';
42
+ var builtIn = voices.filter(function(v) { return !v.isCustom; });
43
+ var custom = voices.filter(function(v) { return v.isCustom; });
44
+ if (builtIn.length) {
45
+ var grp1 = document.createElement('optgroup');
46
+ grp1.label = 'Built-in Voices';
47
+ builtIn.forEach(function(voice) {
48
+ var opt = document.createElement('option');
49
+ opt.value = voice.id;
50
+ var parts = [];
51
+ if (voice.gender) parts.push(voice.gender);
52
+ if (voice.accent) parts.push(voice.accent);
53
+ opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
54
+ grp1.appendChild(opt);
55
+ });
56
+ selector.appendChild(grp1);
57
+ }
58
+ if (custom.length) {
59
+ var grp2 = document.createElement('optgroup');
60
+ grp2.label = 'Custom Voices';
61
+ custom.forEach(function(voice) {
62
+ var opt = document.createElement('option');
63
+ opt.value = voice.id;
64
+ opt.textContent = voice.name;
65
+ grp2.appendChild(opt);
66
+ });
67
+ selector.appendChild(grp2);
68
+ }
69
+ if (saved && selector.querySelector('option[value="' + saved + '"]')) {
70
+ selector.value = saved;
71
+ }
72
+ });
73
+ return;
74
+ }
38
75
  fetch(BASE + '/api/voices')
39
76
  .then(function(res) { return res.json(); })
40
77
  .then(function(data) {
@@ -24,6 +24,8 @@ class WebSocketManager {
24
24
  this.connectionState = 'disconnected';
25
25
  this.activeSubscriptions = new Set();
26
26
  this.connectionEstablishedAt = 0;
27
+ this.cachedVoiceList = null;
28
+ this.voiceListListeners = [];
27
29
 
28
30
  this.latency = {
29
31
  samples: [],
@@ -139,6 +141,13 @@ class WebSocketManager {
139
141
  continue;
140
142
  }
141
143
 
144
+ if (data.type === 'voice_list') {
145
+ this.cachedVoiceList = data.voices || [];
146
+ for (const listener of this.voiceListListeners) {
147
+ try { listener(this.cachedVoiceList); } catch (_) {}
148
+ }
149
+ }
150
+
142
151
  if (data.seq !== undefined && data.sessionId) {
143
152
  this.lastSeqBySession[data.sessionId] = Math.max(
144
153
  this.lastSeqBySession[data.sessionId] || -1, data.seq
@@ -565,6 +574,21 @@ class WebSocketManager {
565
574
  this.disconnect();
566
575
  this.listeners = {};
567
576
  }
577
+ subscribeToVoiceList(callback) {
578
+ if (!this.voiceListListeners.includes(callback)) {
579
+ this.voiceListListeners.push(callback);
580
+ }
581
+ if (this.cachedVoiceList !== null) {
582
+ callback(this.cachedVoiceList);
583
+ }
584
+ }
585
+
586
+ unsubscribeFromVoiceList(callback) {
587
+ const idx = this.voiceListListeners.indexOf(callback);
588
+ if (idx > -1) {
589
+ this.voiceListListeners.splice(idx, 1);
590
+ }
591
+ }
568
592
  }
569
593
 
570
594
  if (typeof module !== 'undefined' && module.exports) {