agentgui 1.0.529 → 1.0.530
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/static/index.html +1 -237
- package/static/js/client.js +0 -38
- package/static/js/features.js +0 -8
- package/static/js/voice.js +73 -946
package/static/js/voice.js
CHANGED
|
@@ -1,415 +1,12 @@
|
|
|
1
1
|
(function() {
|
|
2
2
|
var BASE = window.__BASE_URL || '';
|
|
3
|
-
var isRecording = false;
|
|
4
3
|
var ttsEnabled = true;
|
|
5
|
-
var voiceActive = false;
|
|
6
|
-
var currentConversationId = null;
|
|
7
4
|
var speechQueue = [];
|
|
8
5
|
var isSpeaking = false;
|
|
9
6
|
var currentAudio = null;
|
|
10
|
-
var mediaStream = null;
|
|
11
|
-
var audioContext = null;
|
|
12
|
-
var workletNode = null;
|
|
13
|
-
var recordedChunks = [];
|
|
14
|
-
var TARGET_SAMPLE_RATE = 16000;
|
|
15
|
-
var spokenChunks = new Set();
|
|
16
|
-
var renderedSeqs = new Set();
|
|
17
|
-
var isLoadingHistory = false;
|
|
18
|
-
var _lastVoiceBlockText = null;
|
|
19
|
-
var _lastVoiceBlockTime = 0;
|
|
20
|
-
var _voiceBreakNext = false;
|
|
21
7
|
var selectedVoiceId = localStorage.getItem('gmgui-voice-selection') || 'default';
|
|
22
8
|
var ttsAudioCache = new Map();
|
|
23
9
|
var TTS_CLIENT_CACHE_MAX = 50;
|
|
24
|
-
|
|
25
|
-
function init() {
|
|
26
|
-
setupTTSToggle();
|
|
27
|
-
setupUI();
|
|
28
|
-
setupStreamingListener();
|
|
29
|
-
setupAgentSelector();
|
|
30
|
-
setupVoiceSelector();
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function setupVoiceSelector() {
|
|
34
|
-
var selector = document.getElementById('voiceSelector');
|
|
35
|
-
if (!selector) return;
|
|
36
|
-
var saved = localStorage.getItem('gmgui-voice-selection');
|
|
37
|
-
if (saved) selectedVoiceId = saved;
|
|
38
|
-
if (window.wsManager) {
|
|
39
|
-
window.wsManager.subscribeToVoiceList(function(voices) {
|
|
40
|
-
if (!Array.isArray(voices)) return;
|
|
41
|
-
selector.innerHTML = '';
|
|
42
|
-
var builtIn = voices.filter(function(v) { return !v.isCustom; });
|
|
43
|
-
var custom = voices.filter(function(v) { return v.isCustom; });
|
|
44
|
-
if (builtIn.length) {
|
|
45
|
-
var grp1 = document.createElement('optgroup');
|
|
46
|
-
grp1.label = 'Built-in Voices';
|
|
47
|
-
builtIn.forEach(function(voice) {
|
|
48
|
-
var opt = document.createElement('option');
|
|
49
|
-
opt.value = voice.id;
|
|
50
|
-
var parts = [];
|
|
51
|
-
if (voice.gender) parts.push(voice.gender);
|
|
52
|
-
if (voice.accent) parts.push(voice.accent);
|
|
53
|
-
opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
|
|
54
|
-
grp1.appendChild(opt);
|
|
55
|
-
});
|
|
56
|
-
selector.appendChild(grp1);
|
|
57
|
-
}
|
|
58
|
-
if (custom.length) {
|
|
59
|
-
var grp2 = document.createElement('optgroup');
|
|
60
|
-
grp2.label = 'Custom Voices';
|
|
61
|
-
custom.forEach(function(voice) {
|
|
62
|
-
var opt = document.createElement('option');
|
|
63
|
-
opt.value = voice.id;
|
|
64
|
-
opt.textContent = voice.name;
|
|
65
|
-
grp2.appendChild(opt);
|
|
66
|
-
});
|
|
67
|
-
selector.appendChild(grp2);
|
|
68
|
-
}
|
|
69
|
-
if (selectedVoiceId && selector.querySelector('option[value="' + selectedVoiceId + '"]')) {
|
|
70
|
-
selector.value = selectedVoiceId;
|
|
71
|
-
}
|
|
72
|
-
});
|
|
73
|
-
return;
|
|
74
|
-
}
|
|
75
|
-
if (window.wsClient) {
|
|
76
|
-
window.wsClient.rpc('voices')
|
|
77
|
-
.then(function(data) {
|
|
78
|
-
if (!data.ok || !Array.isArray(data.voices)) return;
|
|
79
|
-
selector.innerHTML = '';
|
|
80
|
-
var builtIn = data.voices.filter(function(v) { return !v.isCustom; });
|
|
81
|
-
var custom = data.voices.filter(function(v) { return v.isCustom; });
|
|
82
|
-
if (builtIn.length) {
|
|
83
|
-
var grp1 = document.createElement('optgroup');
|
|
84
|
-
grp1.label = 'Built-in Voices';
|
|
85
|
-
builtIn.forEach(function(voice) {
|
|
86
|
-
var opt = document.createElement('option');
|
|
87
|
-
opt.value = voice.id;
|
|
88
|
-
var parts = [];
|
|
89
|
-
if (voice.gender) parts.push(voice.gender);
|
|
90
|
-
if (voice.accent) parts.push(voice.accent);
|
|
91
|
-
opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
|
|
92
|
-
grp1.appendChild(opt);
|
|
93
|
-
});
|
|
94
|
-
selector.appendChild(grp1);
|
|
95
|
-
}
|
|
96
|
-
if (custom.length) {
|
|
97
|
-
var grp2 = document.createElement('optgroup');
|
|
98
|
-
grp2.label = 'Custom Voices';
|
|
99
|
-
custom.forEach(function(voice) {
|
|
100
|
-
var opt = document.createElement('option');
|
|
101
|
-
opt.value = voice.id;
|
|
102
|
-
opt.textContent = voice.name;
|
|
103
|
-
grp2.appendChild(opt);
|
|
104
|
-
});
|
|
105
|
-
selector.appendChild(grp2);
|
|
106
|
-
}
|
|
107
|
-
if (selectedVoiceId && selector.querySelector('option[value="' + selectedVoiceId + '"]')) {
|
|
108
|
-
selector.value = selectedVoiceId;
|
|
109
|
-
}
|
|
110
|
-
})
|
|
111
|
-
.catch(function(err) { console.error('[Voice] Failed to load voices:', err); });
|
|
112
|
-
}
|
|
113
|
-
selector.addEventListener('change', function() {
|
|
114
|
-
selectedVoiceId = selector.value;
|
|
115
|
-
localStorage.setItem('gmgui-voice-selection', selectedVoiceId);
|
|
116
|
-
sendVoiceToServer();
|
|
117
|
-
});
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
function syncVoiceSelectorWithRetry(maxRetries) {
|
|
121
|
-
maxRetries = maxRetries || 20;
|
|
122
|
-
var voiceSelector = document.querySelector('[data-voice-agent-selector]');
|
|
123
|
-
var mainSelector = document.querySelector('[data-agent-selector]');
|
|
124
|
-
if (!voiceSelector || !mainSelector) return;
|
|
125
|
-
if (mainSelector.innerHTML.trim() === '' && maxRetries > 0) {
|
|
126
|
-
setTimeout(function() { syncVoiceSelectorWithRetry(maxRetries - 1); }, 250);
|
|
127
|
-
return;
|
|
128
|
-
}
|
|
129
|
-
voiceSelector.innerHTML = mainSelector.innerHTML;
|
|
130
|
-
if (mainSelector.value) voiceSelector.value = mainSelector.value;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
function syncVoiceCliSelectorWithRetry(maxRetries) {
|
|
134
|
-
maxRetries = maxRetries || 20;
|
|
135
|
-
var voiceCliSelector = document.querySelector('[data-voice-cli-selector]');
|
|
136
|
-
var mainCliSelector = document.querySelector('[data-cli-selector]');
|
|
137
|
-
if (!voiceCliSelector || !mainCliSelector) return;
|
|
138
|
-
if (mainCliSelector.innerHTML.trim() === '' && maxRetries > 0) {
|
|
139
|
-
setTimeout(function() { syncVoiceCliSelectorWithRetry(maxRetries - 1); }, 250);
|
|
140
|
-
return;
|
|
141
|
-
}
|
|
142
|
-
voiceCliSelector.innerHTML = mainCliSelector.innerHTML;
|
|
143
|
-
if (mainCliSelector.value) voiceCliSelector.value = mainCliSelector.value;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
function syncVoiceModelSelectorWithRetry(maxRetries) {
|
|
147
|
-
maxRetries = maxRetries || 20;
|
|
148
|
-
var voiceModelSelector = document.querySelector('[data-voice-model-selector]');
|
|
149
|
-
var mainModelSelector = document.querySelector('[data-model-selector]');
|
|
150
|
-
if (!voiceModelSelector || !mainModelSelector) return;
|
|
151
|
-
if (mainModelSelector.innerHTML.trim() === '' && maxRetries > 0) {
|
|
152
|
-
setTimeout(function() { syncVoiceModelSelectorWithRetry(maxRetries - 1); }, 250);
|
|
153
|
-
return;
|
|
154
|
-
}
|
|
155
|
-
voiceModelSelector.innerHTML = mainModelSelector.innerHTML;
|
|
156
|
-
if (mainModelSelector.value) voiceModelSelector.value = mainModelSelector.value;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
function setupAgentSelector() {
|
|
160
|
-
var voiceSelector = document.querySelector('[data-voice-agent-selector]');
|
|
161
|
-
if (!voiceSelector) return;
|
|
162
|
-
var mainSelector = document.querySelector('[data-agent-selector]');
|
|
163
|
-
if (mainSelector) {
|
|
164
|
-
syncVoiceSelectorWithRetry();
|
|
165
|
-
var observer = new MutationObserver(syncVoiceSelectorWithRetry);
|
|
166
|
-
observer.observe(mainSelector, { childList: true, subtree: true });
|
|
167
|
-
mainSelector.addEventListener('change', function() {
|
|
168
|
-
voiceSelector.value = mainSelector.value;
|
|
169
|
-
});
|
|
170
|
-
voiceSelector.addEventListener('change', function() {
|
|
171
|
-
mainSelector.value = voiceSelector.value;
|
|
172
|
-
});
|
|
173
|
-
}
|
|
174
|
-
window.addEventListener('agents-loaded', syncVoiceSelectorWithRetry);
|
|
175
|
-
|
|
176
|
-
var mainCliSelector = document.querySelector('[data-cli-selector]');
|
|
177
|
-
if (mainCliSelector) {
|
|
178
|
-
syncVoiceCliSelectorWithRetry();
|
|
179
|
-
var cliObserver = new MutationObserver(syncVoiceCliSelectorWithRetry);
|
|
180
|
-
cliObserver.observe(mainCliSelector, { childList: true, subtree: true });
|
|
181
|
-
mainCliSelector.addEventListener('change', function() {
|
|
182
|
-
var voiceCliSelector = document.querySelector('[data-voice-cli-selector]');
|
|
183
|
-
if (voiceCliSelector) voiceCliSelector.value = mainCliSelector.value;
|
|
184
|
-
});
|
|
185
|
-
var voiceCliSelector = document.querySelector('[data-voice-cli-selector]');
|
|
186
|
-
if (voiceCliSelector) {
|
|
187
|
-
voiceCliSelector.addEventListener('change', function() {
|
|
188
|
-
mainCliSelector.value = voiceCliSelector.value;
|
|
189
|
-
});
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
var mainModelSelector = document.querySelector('[data-model-selector]');
|
|
194
|
-
if (mainModelSelector) {
|
|
195
|
-
syncVoiceModelSelectorWithRetry();
|
|
196
|
-
var modelObserver = new MutationObserver(syncVoiceModelSelectorWithRetry);
|
|
197
|
-
modelObserver.observe(mainModelSelector, { childList: true, subtree: true });
|
|
198
|
-
mainModelSelector.addEventListener('change', function() {
|
|
199
|
-
var voiceModelSelector = document.querySelector('[data-voice-model-selector]');
|
|
200
|
-
if (voiceModelSelector) voiceModelSelector.value = mainModelSelector.value;
|
|
201
|
-
});
|
|
202
|
-
var voiceModelSelector = document.querySelector('[data-voice-model-selector]');
|
|
203
|
-
if (voiceModelSelector) {
|
|
204
|
-
voiceModelSelector.addEventListener('change', function() {
|
|
205
|
-
mainModelSelector.value = voiceModelSelector.value;
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
function setupTTSToggle() {
|
|
212
|
-
var toggle = document.getElementById('voiceTTSToggle');
|
|
213
|
-
if (toggle) {
|
|
214
|
-
var saved = localStorage.getItem('gmgui-auto-speak');
|
|
215
|
-
if (saved !== null) {
|
|
216
|
-
ttsEnabled = saved === 'true';
|
|
217
|
-
toggle.checked = ttsEnabled;
|
|
218
|
-
}
|
|
219
|
-
toggle.addEventListener('change', function() {
|
|
220
|
-
ttsEnabled = toggle.checked;
|
|
221
|
-
localStorage.setItem('gmgui-auto-speak', ttsEnabled);
|
|
222
|
-
if (!ttsEnabled) stopSpeaking();
|
|
223
|
-
});
|
|
224
|
-
}
|
|
225
|
-
var stopBtn = document.getElementById('voiceStopSpeaking');
|
|
226
|
-
if (stopBtn) {
|
|
227
|
-
stopBtn.addEventListener('click', stopSpeaking);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
function setupUI() {
|
|
232
|
-
var micBtn = document.getElementById('voiceMicBtn');
|
|
233
|
-
if (micBtn) {
|
|
234
|
-
micBtn.removeAttribute('disabled');
|
|
235
|
-
micBtn.title = 'Hold to record';
|
|
236
|
-
micBtn.addEventListener('mousedown', function(e) {
|
|
237
|
-
e.preventDefault();
|
|
238
|
-
startRecording();
|
|
239
|
-
});
|
|
240
|
-
micBtn.addEventListener('mouseup', function(e) {
|
|
241
|
-
e.preventDefault();
|
|
242
|
-
stopRecording();
|
|
243
|
-
});
|
|
244
|
-
micBtn.addEventListener('mouseleave', function(e) {
|
|
245
|
-
if (isRecording) stopRecording();
|
|
246
|
-
});
|
|
247
|
-
micBtn.addEventListener('touchstart', function(e) {
|
|
248
|
-
e.preventDefault();
|
|
249
|
-
startRecording();
|
|
250
|
-
});
|
|
251
|
-
micBtn.addEventListener('touchend', function(e) {
|
|
252
|
-
e.preventDefault();
|
|
253
|
-
stopRecording();
|
|
254
|
-
});
|
|
255
|
-
micBtn.addEventListener('touchcancel', function(e) {
|
|
256
|
-
if (isRecording) stopRecording();
|
|
257
|
-
});
|
|
258
|
-
}
|
|
259
|
-
var sendBtn = document.getElementById('voiceSendBtn');
|
|
260
|
-
if (sendBtn) {
|
|
261
|
-
sendBtn.addEventListener('click', sendVoiceMessage);
|
|
262
|
-
}
|
|
263
|
-
var transcript = document.getElementById('voiceTranscript');
|
|
264
|
-
if (transcript) {
|
|
265
|
-
transcript.addEventListener('keydown', function(e) {
|
|
266
|
-
if (e.ctrlKey && e.key === 'Enter' || e.metaKey && e.key === 'Enter') {
|
|
267
|
-
e.preventDefault();
|
|
268
|
-
sendVoiceMessage();
|
|
269
|
-
}
|
|
270
|
-
});
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
async function startRecording() {
|
|
276
|
-
if (isRecording) return;
|
|
277
|
-
var el = document.getElementById('voiceTranscript');
|
|
278
|
-
if (el) {
|
|
279
|
-
if (el.value !== undefined) {
|
|
280
|
-
el.value = '';
|
|
281
|
-
} else {
|
|
282
|
-
el.textContent = '';
|
|
283
|
-
el.setAttribute('data-final', '');
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
var result = await window.STTHandler.startRecording();
|
|
287
|
-
if (result.success) {
|
|
288
|
-
isRecording = true;
|
|
289
|
-
var micBtn = document.getElementById('voiceMicBtn');
|
|
290
|
-
if (micBtn) micBtn.classList.add('recording');
|
|
291
|
-
} else {
|
|
292
|
-
if (el) el.textContent = 'Mic access denied: ' + result.error;
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
async function stopRecording() {
|
|
297
|
-
if (!isRecording) return;
|
|
298
|
-
isRecording = false;
|
|
299
|
-
var micBtn = document.getElementById('voiceMicBtn');
|
|
300
|
-
if (micBtn) micBtn.classList.remove('recording');
|
|
301
|
-
var el = document.getElementById('voiceTranscript');
|
|
302
|
-
|
|
303
|
-
if (el) {
|
|
304
|
-
if (el.value !== undefined) {
|
|
305
|
-
el.value = 'Transcribing...';
|
|
306
|
-
} else {
|
|
307
|
-
el.textContent = 'Transcribing...';
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
var result = await window.STTHandler.stopRecording();
|
|
312
|
-
if (result.success) {
|
|
313
|
-
if (el) {
|
|
314
|
-
if (el.value !== undefined) {
|
|
315
|
-
el.value = result.text;
|
|
316
|
-
} else {
|
|
317
|
-
el.textContent = result.text;
|
|
318
|
-
el.setAttribute('data-final', result.text);
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
} else {
|
|
322
|
-
if (el) {
|
|
323
|
-
if (el.value !== undefined) {
|
|
324
|
-
el.value = 'Error: ' + result.error;
|
|
325
|
-
} else {
|
|
326
|
-
el.textContent = 'Error: ' + result.error;
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
function sendVoiceMessage() {
|
|
333
|
-
var el = document.getElementById('voiceTranscript');
|
|
334
|
-
if (!el) return;
|
|
335
|
-
var text = (el.value || el.textContent || '').trim();
|
|
336
|
-
if (!text || text.startsWith('Transcribing') || text.startsWith('Error')) return;
|
|
337
|
-
addVoiceBlock(text, true);
|
|
338
|
-
if (el.value !== undefined) {
|
|
339
|
-
el.value = '';
|
|
340
|
-
} else {
|
|
341
|
-
el.textContent = '';
|
|
342
|
-
el.setAttribute('data-final', '');
|
|
343
|
-
}
|
|
344
|
-
if (typeof agentGUIClient !== 'undefined' && agentGUIClient) {
|
|
345
|
-
var input = agentGUIClient.ui.messageInput;
|
|
346
|
-
if (input) {
|
|
347
|
-
input.value = text;
|
|
348
|
-
agentGUIClient.startExecution();
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
function speak(text) {
|
|
354
|
-
if (!ttsEnabled) return;
|
|
355
|
-
speakDirect(text);
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
function speakDirect(text) {
|
|
359
|
-
var clean = text.replace(/<[^>]*>/g, '').trim();
|
|
360
|
-
if (!clean) return;
|
|
361
|
-
var parts = [];
|
|
362
|
-
if (typeof agentGUIClient !== 'undefined' && agentGUIClient && typeof agentGUIClient.parseMarkdownCodeBlocks === 'function') {
|
|
363
|
-
parts = agentGUIClient.parseMarkdownCodeBlocks(clean);
|
|
364
|
-
} else {
|
|
365
|
-
parts = [{ type: 'text', content: clean }];
|
|
366
|
-
}
|
|
367
|
-
parts.forEach(function(part) {
|
|
368
|
-
if (part.type === 'code') return;
|
|
369
|
-
var segment = part.content.trim();
|
|
370
|
-
if (segment) {
|
|
371
|
-
speechQueue.push(segment);
|
|
372
|
-
}
|
|
373
|
-
});
|
|
374
|
-
processQueue();
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
function cacheTTSAudio(cacheKey, b64) {
|
|
378
|
-
if (ttsAudioCache.size >= TTS_CLIENT_CACHE_MAX) {
|
|
379
|
-
var oldest = ttsAudioCache.keys().next().value;
|
|
380
|
-
ttsAudioCache.delete(oldest);
|
|
381
|
-
}
|
|
382
|
-
var binary = atob(b64);
|
|
383
|
-
var bytes = new Uint8Array(binary.length);
|
|
384
|
-
for (var i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
|
385
|
-
ttsAudioCache.set(cacheKey, new Blob([bytes], { type: 'audio/wav' }));
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
function getCachedTTSBlob(text) {
|
|
389
|
-
var key = selectedVoiceId + ':' + text;
|
|
390
|
-
return ttsAudioCache.get(key) || null;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
function splitSentences(text) {
|
|
394
|
-
if (!text) return [text];
|
|
395
|
-
var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
|
|
396
|
-
if (!raw) return [text];
|
|
397
|
-
var sentences = raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
|
|
398
|
-
var result = [];
|
|
399
|
-
for (var i = 0; i < sentences.length; i++) {
|
|
400
|
-
var s = sentences[i];
|
|
401
|
-
if (result.length > 0) {
|
|
402
|
-
var prev = result[result.length - 1];
|
|
403
|
-
if (s.match(/^(\d+[\.\)]|\d+\s)/) || prev.match(/\d+[\.\)]$/)) {
|
|
404
|
-
result[result.length - 1] = prev + ' ' + s;
|
|
405
|
-
continue;
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
result.push(s);
|
|
409
|
-
}
|
|
410
|
-
return result;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
10
|
var audioChunkQueue = [];
|
|
414
11
|
var isPlayingChunk = false;
|
|
415
12
|
var streamDone = false;
|
|
@@ -417,577 +14,107 @@
|
|
|
417
14
|
var TTS_MAX_FAILURES = 3;
|
|
418
15
|
var ttsDisabledUntilReset = false;
|
|
419
16
|
var streamingSupported = true;
|
|
420
|
-
var streamingFailedAt = 0;
|
|
421
17
|
|
|
422
|
-
|
|
423
|
-
|
|
18
|
+
window.addEventListener('ws-message', function(e) {
|
|
19
|
+
var data = e.detail;
|
|
20
|
+
if (!data) return;
|
|
21
|
+
if (data.type === 'tts_audio' && data.audio && data.voiceId === selectedVoiceId) cacheTTSAudio(data.cacheKey, data.audio);
|
|
22
|
+
if (data.type === 'sync_connected') sendVoiceToServer();
|
|
23
|
+
});
|
|
424
24
|
|
|
425
|
-
function
|
|
426
|
-
|
|
427
|
-
|
|
25
|
+
function cacheTTSAudio(cacheKey, b64) {
|
|
26
|
+
if (ttsAudioCache.size >= TTS_CLIENT_CACHE_MAX) ttsAudioCache.delete(ttsAudioCache.keys().next().value);
|
|
27
|
+
var binary = atob(b64), bytes = new Uint8Array(binary.length);
|
|
28
|
+
for (var i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
|
29
|
+
ttsAudioCache.set(cacheKey, new Blob([bytes], { type: 'audio/wav' }));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function speakDirect(text) {
|
|
33
|
+
var clean = text.replace(/<[^>]*>/g, '').trim();
|
|
34
|
+
if (!clean) return;
|
|
35
|
+
var parts = (typeof agentGUIClient !== 'undefined' && agentGUIClient && typeof agentGUIClient.parseMarkdownCodeBlocks === 'function')
|
|
36
|
+
? agentGUIClient.parseMarkdownCodeBlocks(clean) : [{ type: 'text', content: clean }];
|
|
37
|
+
parts.forEach(function(p) { if (p.type !== 'code' && p.content.trim()) speechQueue.push(p.content.trim()); });
|
|
38
|
+
processQueue();
|
|
428
39
|
}
|
|
429
40
|
|
|
430
41
|
function playNextChunk() {
|
|
431
|
-
if (audioChunkQueue.length
|
|
42
|
+
if (!audioChunkQueue.length) {
|
|
432
43
|
isPlayingChunk = false;
|
|
433
|
-
if (streamDone) {
|
|
434
|
-
isSpeaking = false;
|
|
435
|
-
processQueue();
|
|
436
|
-
}
|
|
44
|
+
if (streamDone) { isSpeaking = false; processQueue(); }
|
|
437
45
|
return;
|
|
438
46
|
}
|
|
439
47
|
isPlayingChunk = true;
|
|
440
|
-
var blob = audioChunkQueue.shift();
|
|
441
|
-
var url = URL.createObjectURL(blob);
|
|
48
|
+
var blob = audioChunkQueue.shift(), url = URL.createObjectURL(blob);
|
|
442
49
|
currentAudio = new Audio(url);
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
playNextChunk();
|
|
447
|
-
};
|
|
448
|
-
currentAudio.onerror = function() {
|
|
449
|
-
URL.revokeObjectURL(url);
|
|
450
|
-
currentAudio = null;
|
|
451
|
-
playNextChunk();
|
|
452
|
-
};
|
|
453
|
-
currentAudio.play().catch(function() {
|
|
454
|
-
URL.revokeObjectURL(url);
|
|
455
|
-
currentAudio = null;
|
|
456
|
-
playNextChunk();
|
|
457
|
-
});
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
function preGenerateTTS(text) {
|
|
461
|
-
if (!ttsEnabled) return;
|
|
462
|
-
var clean = text.replace(/<[^>]*>/g, '').trim();
|
|
463
|
-
if (!clean) return;
|
|
464
|
-
var parts = [];
|
|
465
|
-
if (typeof agentGUIClient !== 'undefined' && agentGUIClient && typeof agentGUIClient.parseMarkdownCodeBlocks === 'function') {
|
|
466
|
-
parts = agentGUIClient.parseMarkdownCodeBlocks(clean);
|
|
467
|
-
} else {
|
|
468
|
-
parts = [{ type: 'text', content: clean }];
|
|
469
|
-
}
|
|
470
|
-
parts.forEach(function(part) {
|
|
471
|
-
if (part.type === 'code') return;
|
|
472
|
-
var segment = part.content.trim();
|
|
473
|
-
if (!segment) return;
|
|
474
|
-
var cacheKey = selectedVoiceId + ':' + segment;
|
|
475
|
-
if (ttsAudioCache.has(cacheKey)) return;
|
|
476
|
-
var optimizedText = optimizePromptForSpeech(segment);
|
|
477
|
-
fetch(BASE + '/api/tts', {
|
|
478
|
-
method: 'POST',
|
|
479
|
-
headers: { 'Content-Type': 'application/json' },
|
|
480
|
-
body: JSON.stringify({ text: optimizedText, voiceId: selectedVoiceId })
|
|
481
|
-
}).then(function(resp) {
|
|
482
|
-
if (!resp.ok) throw new Error('TTS pre-generation failed: ' + resp.status);
|
|
483
|
-
return resp.arrayBuffer();
|
|
484
|
-
}).then(function(buf) {
|
|
485
|
-
var blob = new Blob([buf], { type: 'audio/wav' });
|
|
486
|
-
if (ttsAudioCache.size >= TTS_CLIENT_CACHE_MAX) {
|
|
487
|
-
var oldest = ttsAudioCache.keys().next().value;
|
|
488
|
-
ttsAudioCache.delete(oldest);
|
|
489
|
-
}
|
|
490
|
-
ttsAudioCache.set(cacheKey, blob);
|
|
491
|
-
}).catch(function(err) {
|
|
492
|
-
console.warn('[Voice] TTS pre-generation failed:', err);
|
|
493
|
-
});
|
|
494
|
-
});
|
|
50
|
+
var next = function() { URL.revokeObjectURL(url); currentAudio = null; playNextChunk(); };
|
|
51
|
+
currentAudio.onended = next; currentAudio.onerror = next;
|
|
52
|
+
currentAudio.play().catch(next);
|
|
495
53
|
}
|
|
496
54
|
|
|
497
55
|
function processQueue() {
|
|
498
|
-
if (isSpeaking || speechQueue.length
|
|
499
|
-
if (ttsDisabledUntilReset) {
|
|
500
|
-
|
|
501
|
-
return;
|
|
502
|
-
}
|
|
503
|
-
isSpeaking = true;
|
|
504
|
-
streamDone = false;
|
|
56
|
+
if (isSpeaking || !speechQueue.length) return;
|
|
57
|
+
if (ttsDisabledUntilReset) { speechQueue = []; return; }
|
|
58
|
+
isSpeaking = true; streamDone = false;
|
|
505
59
|
var text = speechQueue.shift();
|
|
506
|
-
audioChunkQueue = [];
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
var
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
streamDone = true;
|
|
514
|
-
if (!
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
if (cachedSentences.length > 0) {
|
|
533
|
-
ttsConsecutiveFailures = 0;
|
|
534
|
-
for (var k = 0; k < cachedSentences.length; k++) {
|
|
535
|
-
audioChunkQueue.push(cachedSentences[k].blob);
|
|
536
|
-
}
|
|
537
|
-
if (!isPlayingChunk) playNextChunk();
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
var remainingText = uncachedText.join(' ');
|
|
541
|
-
var optimizedText = optimizePromptForSpeech(remainingText);
|
|
542
|
-
|
|
543
|
-
function onTtsSuccess() {
|
|
544
|
-
ttsConsecutiveFailures = 0;
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
function onTtsFailed() {
|
|
548
|
-
ttsConsecutiveFailures++;
|
|
549
|
-
if (ttsConsecutiveFailures >= TTS_MAX_FAILURES) {
|
|
550
|
-
console.warn('[Voice] TTS failed ' + ttsConsecutiveFailures + ' times consecutively, disabling until reset');
|
|
551
|
-
ttsDisabledUntilReset = true;
|
|
552
|
-
speechQueue = [];
|
|
553
|
-
}
|
|
554
|
-
streamDone = true;
|
|
555
|
-
isSpeaking = false;
|
|
556
|
-
if (!ttsDisabledUntilReset) {
|
|
557
|
-
processQueue();
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
function tryStreaming() {
|
|
562
|
-
if (!streamingSupported) { tryNonStreaming(optimizedText); return; }
|
|
563
|
-
fetch(BASE + '/api/tts-stream', {
|
|
564
|
-
method: 'POST',
|
|
565
|
-
headers: { 'Content-Type': 'application/json' },
|
|
566
|
-
body: JSON.stringify({ text: optimizedText, voiceId: selectedVoiceId })
|
|
567
|
-
}).then(function(resp) {
|
|
568
|
-
if (!resp.ok) {
|
|
569
|
-
streamingSupported = false;
|
|
570
|
-
streamingFailedAt = Date.now();
|
|
571
|
-
throw new Error('TTS stream failed: ' + resp.status);
|
|
572
|
-
}
|
|
573
|
-
var reader = resp.body.getReader();
|
|
574
|
-
var buffer = new Uint8Array(0);
|
|
575
|
-
|
|
576
|
-
function concat(a, b) {
|
|
577
|
-
var c = new Uint8Array(a.length + b.length);
|
|
578
|
-
c.set(a, 0);
|
|
579
|
-
c.set(b, a.length);
|
|
580
|
-
return c;
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
function pump() {
|
|
584
|
-
return reader.read().then(function(result) {
|
|
585
|
-
if (result.done) {
|
|
586
|
-
onTtsSuccess();
|
|
587
|
-
streamDone = true;
|
|
588
|
-
if (!isPlayingChunk && audioChunkQueue.length === 0) {
|
|
589
|
-
isSpeaking = false;
|
|
590
|
-
processQueue();
|
|
591
|
-
}
|
|
592
|
-
return;
|
|
593
|
-
}
|
|
594
|
-
buffer = concat(buffer, result.value);
|
|
595
|
-
while (buffer.length >= 4) {
|
|
596
|
-
var view = new DataView(buffer.buffer, buffer.byteOffset, 4);
|
|
597
|
-
var chunkLen = view.getUint32(0, false);
|
|
598
|
-
if (buffer.length < 4 + chunkLen) break;
|
|
599
|
-
var wavData = buffer.slice(4, 4 + chunkLen);
|
|
600
|
-
buffer = buffer.slice(4 + chunkLen);
|
|
601
|
-
var blob = new Blob([wavData], { type: 'audio/wav' });
|
|
602
|
-
audioChunkQueue.push(blob);
|
|
60
|
+
audioChunkQueue = []; isPlayingChunk = false;
|
|
61
|
+
var cached = ttsAudioCache.get(selectedVoiceId + ':' + text);
|
|
62
|
+
if (cached) { ttsConsecutiveFailures = 0; audioChunkQueue.push(cached); streamDone = true; if (!isPlayingChunk) playNextChunk(); return; }
|
|
63
|
+
var opt = text + ' [Optimize for speech: Keep it short. Use simple words. Use short sentences. Focus on clarity.]';
|
|
64
|
+
function ok() { ttsConsecutiveFailures = 0; }
|
|
65
|
+
function fail() {
|
|
66
|
+
if (++ttsConsecutiveFailures >= TTS_MAX_FAILURES) { ttsDisabledUntilReset = true; speechQueue = []; }
|
|
67
|
+
streamDone = true; isSpeaking = false;
|
|
68
|
+
if (!ttsDisabledUntilReset) processQueue();
|
|
69
|
+
}
|
|
70
|
+
function stream() {
|
|
71
|
+
if (!streamingSupported) { nonStream(opt); return; }
|
|
72
|
+
fetch(BASE + '/api/tts-stream', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text: opt, voiceId: selectedVoiceId }) })
|
|
73
|
+
.then(function(r) {
|
|
74
|
+
if (!r.ok) { streamingSupported = false; throw 0; }
|
|
75
|
+
var reader = r.body.getReader(), buf = new Uint8Array(0);
|
|
76
|
+
function cat(a, b) { var c = new Uint8Array(a.length + b.length); c.set(a); c.set(b, a.length); return c; }
|
|
77
|
+
function pump() { return reader.read().then(function(res) {
|
|
78
|
+
if (res.done) { ok(); streamDone = true; if (!isPlayingChunk && !audioChunkQueue.length) { isSpeaking = false; processQueue(); } return; }
|
|
79
|
+
buf = cat(buf, res.value);
|
|
80
|
+
while (buf.length >= 4) {
|
|
81
|
+
var len = new DataView(buf.buffer, buf.byteOffset, 4).getUint32(0, false);
|
|
82
|
+
if (buf.length < 4 + len) break;
|
|
83
|
+
audioChunkQueue.push(new Blob([buf.slice(4, 4 + len)], { type: 'audio/wav' }));
|
|
84
|
+
buf = buf.slice(4 + len);
|
|
603
85
|
if (!isPlayingChunk) playNextChunk();
|
|
604
86
|
}
|
|
605
87
|
return pump();
|
|
606
|
-
});
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
return pump();
|
|
610
|
-
}).catch(function() {
|
|
611
|
-
tryNonStreaming(remainingText);
|
|
612
|
-
});
|
|
88
|
+
}); }
|
|
89
|
+
return pump();
|
|
90
|
+
}).catch(function() { nonStream(text); });
|
|
613
91
|
}
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
|
|
620
|
-
}).then(function(resp) {
|
|
621
|
-
if (!resp.ok) throw new Error('TTS failed: ' + resp.status);
|
|
622
|
-
return resp.arrayBuffer();
|
|
623
|
-
}).then(function(buf) {
|
|
624
|
-
onTtsSuccess();
|
|
625
|
-
var blob = new Blob([buf], { type: 'audio/wav' });
|
|
626
|
-
audioChunkQueue.push(blob);
|
|
627
|
-
streamDone = true;
|
|
628
|
-
if (!isPlayingChunk) playNextChunk();
|
|
629
|
-
}).catch(function() {
|
|
630
|
-
onTtsFailed();
|
|
631
|
-
});
|
|
92
|
+
function nonStream(txt) {
|
|
93
|
+
fetch(BASE + '/api/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text: txt, voiceId: selectedVoiceId }) })
|
|
94
|
+
.then(function(r) { if (!r.ok) throw 0; return r.arrayBuffer(); })
|
|
95
|
+
.then(function(b) { ok(); audioChunkQueue.push(new Blob([b], { type: 'audio/wav' })); streamDone = true; if (!isPlayingChunk) playNextChunk(); })
|
|
96
|
+
.catch(fail);
|
|
632
97
|
}
|
|
633
|
-
|
|
634
|
-
tryStreaming();
|
|
98
|
+
stream();
|
|
635
99
|
}
|
|
636
100
|
|
|
637
101
|
function stopSpeaking() {
|
|
638
|
-
speechQueue = [];
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
isSpeaking = false;
|
|
642
|
-
ttsConsecutiveFailures = 0;
|
|
643
|
-
ttsDisabledUntilReset = false;
|
|
644
|
-
if (currentAudio) {
|
|
645
|
-
currentAudio.pause();
|
|
646
|
-
currentAudio = null;
|
|
647
|
-
}
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
function stripHtml(text) {
|
|
651
|
-
return text.replace(/<[^>]*>/g, '').replace(/[ \t]+/g, ' ').trim();
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
function addVoiceBlock(text, isUser) {
|
|
655
|
-
var container = document.getElementById('voiceMessages');
|
|
656
|
-
if (!container) return;
|
|
657
|
-
var emptyMsg = container.querySelector('.voice-empty');
|
|
658
|
-
if (emptyMsg) emptyMsg.remove();
|
|
659
|
-
var lastChild = container.lastElementChild;
|
|
660
|
-
if (!isUser && !_voiceBreakNext && !isLoadingHistory && lastChild && lastChild.classList.contains('voice-block') && !lastChild.classList.contains('voice-block-user')) {
|
|
661
|
-
var contentSpan = lastChild.querySelector('.voice-block-content');
|
|
662
|
-
if (contentSpan) {
|
|
663
|
-
contentSpan.textContent += '\n' + stripHtml(text);
|
|
664
|
-
lastChild._fullText = (lastChild._fullText || contentSpan.textContent) + '\n' + text;
|
|
665
|
-
scrollVoiceToBottom();
|
|
666
|
-
return lastChild;
|
|
667
|
-
}
|
|
668
|
-
}
|
|
669
|
-
_voiceBreakNext = false;
|
|
670
|
-
var div = document.createElement('div');
|
|
671
|
-
div.className = 'voice-block' + (isUser ? ' voice-block-user' : '');
|
|
672
|
-
if (isUser) {
|
|
673
|
-
div.textContent = text;
|
|
674
|
-
} else {
|
|
675
|
-
var contentSpan = document.createElement('span');
|
|
676
|
-
contentSpan.className = 'voice-block-content';
|
|
677
|
-
contentSpan.textContent = stripHtml(text);
|
|
678
|
-
div.appendChild(contentSpan);
|
|
679
|
-
div._fullText = text;
|
|
680
|
-
var rereadBtn = document.createElement('button');
|
|
681
|
-
rereadBtn.className = 'voice-reread-btn';
|
|
682
|
-
rereadBtn.title = 'Re-read aloud';
|
|
683
|
-
rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
|
|
684
|
-
rereadBtn.addEventListener('click', function() {
|
|
685
|
-
speak(div._fullText || contentSpan.textContent);
|
|
686
|
-
});
|
|
687
|
-
div.appendChild(rereadBtn);
|
|
688
|
-
}
|
|
689
|
-
container.appendChild(div);
|
|
690
|
-
scrollVoiceToBottom();
|
|
691
|
-
return div;
|
|
692
|
-
}
|
|
693
|
-
|
|
694
|
-
function addVoiceResultBlock(block, autoSpeak) {
|
|
695
|
-
var container = document.getElementById('voiceMessages');
|
|
696
|
-
if (!container) return;
|
|
697
|
-
var emptyMsg = container.querySelector('.voice-empty');
|
|
698
|
-
if (emptyMsg) emptyMsg.remove();
|
|
699
|
-
var div = document.createElement('div');
|
|
700
|
-
div.className = 'voice-block';
|
|
701
|
-
var isError = block.is_error || false;
|
|
702
|
-
var duration = block.duration_ms ? (block.duration_ms / 1000).toFixed(1) + 's' : '';
|
|
703
|
-
var cost = block.total_cost_usd ? '$' + block.total_cost_usd.toFixed(4) : '';
|
|
704
|
-
var resultText = '';
|
|
705
|
-
if (block.result) {
|
|
706
|
-
resultText = typeof block.result === 'string' ? block.result : JSON.stringify(block.result);
|
|
707
|
-
}
|
|
708
|
-
var displayText = stripHtml(resultText);
|
|
709
|
-
var html = '';
|
|
710
|
-
if (displayText) {
|
|
711
|
-
html += '<div>' + escapeHtml(displayText) + '</div>';
|
|
712
|
-
}
|
|
713
|
-
if (duration || cost) {
|
|
714
|
-
html += '<div class="voice-result-stats">';
|
|
715
|
-
if (duration) html += duration;
|
|
716
|
-
if (duration && cost) html += ' | ';
|
|
717
|
-
if (cost) html += cost;
|
|
718
|
-
html += '</div>';
|
|
719
|
-
}
|
|
720
|
-
if (!html) {
|
|
721
|
-
html = isError ? 'Execution failed' : 'Execution complete';
|
|
722
|
-
}
|
|
723
|
-
div.innerHTML = html;
|
|
724
|
-
if (resultText) {
|
|
725
|
-
var rereadBtn = document.createElement('button');
|
|
726
|
-
rereadBtn.className = 'voice-reread-btn';
|
|
727
|
-
rereadBtn.title = 'Re-read aloud';
|
|
728
|
-
rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
|
|
729
|
-
rereadBtn.addEventListener('click', function() {
|
|
730
|
-
speak(resultText);
|
|
731
|
-
});
|
|
732
|
-
div.appendChild(rereadBtn);
|
|
733
|
-
}
|
|
734
|
-
container.appendChild(div);
|
|
735
|
-
scrollVoiceToBottom();
|
|
736
|
-
if (autoSpeak && ttsEnabled && resultText) {
|
|
737
|
-
speak(resultText);
|
|
738
|
-
}
|
|
739
|
-
return div;
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
function scrollVoiceToBottom() {
|
|
743
|
-
var scroll = document.getElementById('voiceScroll');
|
|
744
|
-
if (scroll) {
|
|
745
|
-
requestAnimationFrame(function() {
|
|
746
|
-
scroll.scrollTop = scroll.scrollHeight;
|
|
747
|
-
});
|
|
748
|
-
}
|
|
102
|
+
speechQueue = []; audioChunkQueue = []; isPlayingChunk = false; isSpeaking = false;
|
|
103
|
+
ttsConsecutiveFailures = 0; ttsDisabledUntilReset = false;
|
|
104
|
+
if (currentAudio) { currentAudio.pause(); currentAudio = null; }
|
|
749
105
|
}
|
|
750
106
|
|
|
751
107
|
function sendVoiceToServer() {
|
|
752
|
-
if (typeof agentGUIClient !== 'undefined' && agentGUIClient && agentGUIClient.wsManager && agentGUIClient.wsManager.isConnected)
|
|
108
|
+
if (typeof agentGUIClient !== 'undefined' && agentGUIClient && agentGUIClient.wsManager && agentGUIClient.wsManager.isConnected)
|
|
753
109
|
agentGUIClient.wsManager.sendMessage({ type: 'set_voice', voiceId: selectedVoiceId });
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
function setupStreamingListener() {
|
|
758
|
-
window.addEventListener('ws-message', function(e) {
|
|
759
|
-
var data = e.detail;
|
|
760
|
-
if (!data) return;
|
|
761
|
-
if (data.type === 'tts_audio' && data.audio && data.voiceId === selectedVoiceId) {
|
|
762
|
-
cacheTTSAudio(data.cacheKey, data.audio);
|
|
763
|
-
}
|
|
764
|
-
if (data.type === 'sync_connected') {
|
|
765
|
-
sendVoiceToServer();
|
|
766
|
-
}
|
|
767
|
-
if (data.type === 'streaming_progress' || data.type === 'message_created' || data.type === 'streaming_start') {
|
|
768
|
-
if (data.conversationId && data.conversationId !== currentConversationId) return;
|
|
769
|
-
if (!voiceActive) {
|
|
770
|
-
pendingVoiceUpdates.push(data);
|
|
771
|
-
if (pendingVoiceUpdates.length > MAX_PENDING_UPDATES) {
|
|
772
|
-
pendingVoiceUpdates.shift();
|
|
773
|
-
}
|
|
774
|
-
return;
|
|
775
|
-
}
|
|
776
|
-
}
|
|
777
|
-
if (!voiceActive) return;
|
|
778
|
-
if (data.type === 'streaming_progress' && data.block) {
|
|
779
|
-
if (data.seq !== undefined && renderedSeqs.has(data.seq)) return;
|
|
780
|
-
if (data.seq !== undefined) renderedSeqs.add(data.seq);
|
|
781
|
-
handleVoiceBlock(data.block, true, data.blockRole);
|
|
782
|
-
}
|
|
783
|
-
if (data.type === 'message_created' && data.message) {
|
|
784
|
-
var message = data.message;
|
|
785
|
-
if (message.role === 'user' && message.content) {
|
|
786
|
-
handleVoiceBlock({ type: 'text', text: message.content }, true, 'user');
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
if (data.type === 'streaming_start') {
|
|
790
|
-
spokenChunks = new Set();
|
|
791
|
-
renderedSeqs = new Set();
|
|
792
|
-
_voiceBreakNext = false;
|
|
793
|
-
}
|
|
794
|
-
});
|
|
795
|
-
window.addEventListener('conversation-selected', function(e) {
|
|
796
|
-
var newConversationId = e.detail.conversationId;
|
|
797
|
-
if (currentConversationId && currentConversationId !== newConversationId) {
|
|
798
|
-
unsubscribeFromConversation();
|
|
799
|
-
pendingVoiceUpdates = [];
|
|
800
|
-
}
|
|
801
|
-
currentConversationId = newConversationId;
|
|
802
|
-
stopSpeaking();
|
|
803
|
-
spokenChunks = new Set();
|
|
804
|
-
renderedSeqs = new Set();
|
|
805
|
-
if (voiceActive) {
|
|
806
|
-
subscribeToConversation(currentConversationId);
|
|
807
|
-
loadVoiceBlocks(currentConversationId);
|
|
808
|
-
processPendingUpdates();
|
|
809
|
-
}
|
|
810
|
-
});
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
function handleVoiceBlock(block, isNew, blockRole) {
|
|
814
|
-
if (!block || !block.type) return;
|
|
815
|
-
if (block.type === 'text' && block.text) {
|
|
816
|
-
var now = Date.now();
|
|
817
|
-
if (_lastVoiceBlockText === block.text && (now - _lastVoiceBlockTime) < 500) {
|
|
818
|
-
return;
|
|
819
|
-
}
|
|
820
|
-
_lastVoiceBlockText = block.text;
|
|
821
|
-
_lastVoiceBlockTime = now;
|
|
822
|
-
|
|
823
|
-
var isUser = blockRole === 'user' || blockRole === 'tool_result';
|
|
824
|
-
var div = addVoiceBlock(block.text, isUser);
|
|
825
|
-
if (div && isNew && ttsEnabled && blockRole === 'assistant') {
|
|
826
|
-
div.classList.add('speaking');
|
|
827
|
-
preGenerateTTS(block.text);
|
|
828
|
-
speak(block.text);
|
|
829
|
-
setTimeout(function() { div.classList.remove('speaking'); }, 2000);
|
|
830
|
-
}
|
|
831
|
-
} else if (block.type === 'result') {
|
|
832
|
-
_voiceBreakNext = true;
|
|
833
|
-
}
|
|
834
|
-
}
|
|
835
|
-
|
|
836
|
-
function loadVoiceBlocks(conversationId) {
|
|
837
|
-
var container = document.getElementById('voiceMessages');
|
|
838
|
-
if (!container) return;
|
|
839
|
-
container.innerHTML = '';
|
|
840
|
-
_lastVoiceBlockText = null;
|
|
841
|
-
_lastVoiceBlockTime = 0;
|
|
842
|
-
_voiceBreakNext = false;
|
|
843
|
-
if (!conversationId) {
|
|
844
|
-
showVoiceEmpty(container);
|
|
845
|
-
unsubscribeFromConversation();
|
|
846
|
-
return;
|
|
847
|
-
}
|
|
848
|
-
isLoadingHistory = true;
|
|
849
|
-
subscribeToConversation(conversationId);
|
|
850
|
-
if (window.wsClient) {
|
|
851
|
-
window.wsClient.rpc('conv.chunks', { id: conversationId })
|
|
852
|
-
.then(function(data) {
|
|
853
|
-
if (!data.ok || !Array.isArray(data.chunks) || data.chunks.length === 0) {
|
|
854
|
-
isLoadingHistory = false;
|
|
855
|
-
showVoiceEmpty(container);
|
|
856
|
-
return;
|
|
857
|
-
}
|
|
858
|
-
var hasContent = false;
|
|
859
|
-
_voiceBreakNext = false;
|
|
860
|
-
data.chunks.forEach(function(chunk) {
|
|
861
|
-
if (chunk.sequence !== undefined) renderedSeqs.add(chunk.sequence);
|
|
862
|
-
var block = typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data;
|
|
863
|
-
if (!block) return;
|
|
864
|
-
if (block.type === 'text' && block.text) {
|
|
865
|
-
var isUser = chunk.type === 'user';
|
|
866
|
-
addVoiceBlock(block.text, isUser);
|
|
867
|
-
hasContent = true;
|
|
868
|
-
} else if (block.type === 'result') {
|
|
869
|
-
_voiceBreakNext = true;
|
|
870
|
-
}
|
|
871
|
-
});
|
|
872
|
-
if (!hasContent) showVoiceEmpty(container);
|
|
873
|
-
isLoadingHistory = false;
|
|
874
|
-
})
|
|
875
|
-
.catch(function() {
|
|
876
|
-
isLoadingHistory = false;
|
|
877
|
-
showVoiceEmpty(container);
|
|
878
|
-
});
|
|
879
|
-
} else {
|
|
880
|
-
isLoadingHistory = false;
|
|
881
|
-
showVoiceEmpty(container);
|
|
882
|
-
}
|
|
883
|
-
}
|
|
884
|
-
|
|
885
|
-
function subscribeToConversation(conversationId) {
|
|
886
|
-
if (!conversationId || typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager) {
|
|
887
|
-
return;
|
|
888
|
-
}
|
|
889
|
-
agentGUIClient.wsManager.sendMessage({ type: 'subscribe', conversationId: conversationId, timestamp: Date.now() });
|
|
890
|
-
}
|
|
891
|
-
|
|
892
|
-
function unsubscribeFromConversation() {
|
|
893
|
-
if (typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager || !currentConversationId) {
|
|
894
|
-
return;
|
|
895
|
-
}
|
|
896
|
-
agentGUIClient.wsManager.sendMessage({ type: 'unsubscribe', conversationId: currentConversationId, timestamp: Date.now() });
|
|
897
|
-
}
|
|
898
|
-
|
|
899
|
-
function showVoiceEmpty(container) {
|
|
900
|
-
container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Hold the microphone button to record.<br>Release to transcribe. Tap Send to submit.<br>New responses will be read aloud.</div></div>';
|
|
901
|
-
}
|
|
902
|
-
|
|
903
|
-
function activate() {
|
|
904
|
-
voiceActive = true;
|
|
905
|
-
if (currentConversationId) {
|
|
906
|
-
subscribeToConversation(currentConversationId);
|
|
907
|
-
loadVoiceBlocks(currentConversationId);
|
|
908
|
-
processPendingUpdates();
|
|
909
|
-
} else {
|
|
910
|
-
var container = document.getElementById('voiceMessages');
|
|
911
|
-
if (container && !container.hasChildNodes()) {
|
|
912
|
-
showVoiceEmpty(container);
|
|
913
|
-
}
|
|
914
|
-
}
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
function processPendingUpdates() {
|
|
918
|
-
if (!voiceActive) return;
|
|
919
|
-
var updates = pendingVoiceUpdates.splice(0, pendingVoiceUpdates.length);
|
|
920
|
-
for (var i = 0; i < updates.length; i++) {
|
|
921
|
-
var data = updates[i];
|
|
922
|
-
if (data.type === 'streaming_progress' && data.block) {
|
|
923
|
-
if (data.seq !== undefined && renderedSeqs.has(data.seq)) continue;
|
|
924
|
-
if (data.seq !== undefined) renderedSeqs.add(data.seq);
|
|
925
|
-
handleVoiceBlock(data.block, true, data.blockRole);
|
|
926
|
-
}
|
|
927
|
-
if (data.type === 'message_created' && data.message) {
|
|
928
|
-
var message = data.message;
|
|
929
|
-
if (message.role === 'user' && message.content) {
|
|
930
|
-
handleVoiceBlock({ type: 'text', text: message.content }, true, 'user');
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
|
-
if (data.type === 'streaming_start') {
|
|
934
|
-
spokenChunks = new Set();
|
|
935
|
-
renderedSeqs = new Set();
|
|
936
|
-
_voiceBreakNext = false;
|
|
937
|
-
}
|
|
938
|
-
}
|
|
939
|
-
}
|
|
940
|
-
|
|
941
|
-
function deactivate() {
|
|
942
|
-
voiceActive = false;
|
|
943
|
-
stopSpeaking();
|
|
944
|
-
unsubscribeFromConversation();
|
|
945
|
-
pendingVoiceUpdates = [];
|
|
946
|
-
}
|
|
947
|
-
|
|
948
|
-
function escapeHtml(text) {
|
|
949
|
-
var map = { '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' };
|
|
950
|
-
return text.replace(/[&<>"']/g, function(c) { return map[c]; });
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
function getAutoSpeak() {
|
|
954
|
-
return ttsEnabled;
|
|
955
|
-
}
|
|
956
|
-
|
|
957
|
-
function setAutoSpeak(value) {
|
|
958
|
-
ttsEnabled = Boolean(value);
|
|
959
|
-
localStorage.setItem('gmgui-auto-speak', ttsEnabled);
|
|
960
|
-
var toggle = document.getElementById('voiceTTSToggle');
|
|
961
|
-
if (toggle) toggle.checked = ttsEnabled;
|
|
962
|
-
if (!ttsEnabled) stopSpeaking();
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
function getVoice() {
|
|
966
|
-
return selectedVoiceId;
|
|
967
|
-
}
|
|
968
|
-
|
|
969
|
-
function setVoice(voiceId) {
|
|
970
|
-
selectedVoiceId = String(voiceId);
|
|
971
|
-
localStorage.setItem('gmgui-voice-selection', selectedVoiceId);
|
|
972
|
-
var selector = document.getElementById('voiceSelector');
|
|
973
|
-
if (selector) selector.value = selectedVoiceId;
|
|
974
|
-
sendVoiceToServer();
|
|
975
110
|
}
|
|
976
111
|
|
|
977
112
|
window.voiceModule = {
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
setVoice: setVoice,
|
|
985
|
-
speakText: speakDirect
|
|
113
|
+
getAutoSpeak: function() { return ttsEnabled; },
|
|
114
|
+
setAutoSpeak: function(v) { ttsEnabled = Boolean(v); localStorage.setItem('gmgui-auto-speak', ttsEnabled); if (!ttsEnabled) stopSpeaking(); },
|
|
115
|
+
getVoice: function() { return selectedVoiceId; },
|
|
116
|
+
setVoice: function(id) { selectedVoiceId = String(id); localStorage.setItem('gmgui-voice-selection', selectedVoiceId); sendVoiceToServer(); },
|
|
117
|
+
speakText: speakDirect,
|
|
118
|
+
stopSpeaking: stopSpeaking
|
|
986
119
|
};
|
|
987
|
-
|
|
988
|
-
if (document.readyState === 'loading') {
|
|
989
|
-
document.addEventListener('DOMContentLoaded', init);
|
|
990
|
-
} else {
|
|
991
|
-
init();
|
|
992
|
-
}
|
|
993
120
|
})();
|