agentgui 1.0.274 → 1.0.276

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CLAUDE.md +280 -280
  2. package/IPFS_DOWNLOADER.md +277 -277
  3. package/TASK_2C_COMPLETION.md +334 -334
  4. package/agentgui.ico +0 -0
  5. package/bin/gmgui.cjs +54 -54
  6. package/build-portable.js +13 -42
  7. package/database.js +1422 -1406
  8. package/lib/claude-runner.js +1130 -1130
  9. package/lib/ipfs-downloader.js +459 -459
  10. package/lib/speech.js +159 -152
  11. package/package.json +1 -1
  12. package/readme.md +76 -76
  13. package/server.js +3787 -3794
  14. package/setup-npm-token.sh +68 -68
  15. package/static/app.js +773 -773
  16. package/static/event-rendering-showcase.html +708 -708
  17. package/static/index.html +3178 -3180
  18. package/static/js/agent-auth.js +298 -298
  19. package/static/js/audio-recorder-processor.js +18 -18
  20. package/static/js/client.js +2656 -2656
  21. package/static/js/conversations.js +583 -583
  22. package/static/js/dialogs.js +267 -267
  23. package/static/js/event-consolidator.js +101 -101
  24. package/static/js/event-filter.js +311 -311
  25. package/static/js/event-processor.js +452 -452
  26. package/static/js/features.js +413 -413
  27. package/static/js/kalman-filter.js +67 -67
  28. package/static/js/progress-dialog.js +130 -130
  29. package/static/js/script-runner.js +219 -219
  30. package/static/js/streaming-renderer.js +2123 -2120
  31. package/static/js/syntax-highlighter.js +269 -269
  32. package/static/js/tts-websocket-handler.js +152 -152
  33. package/static/js/ui-components.js +431 -431
  34. package/static/js/voice.js +849 -849
  35. package/static/js/websocket-manager.js +596 -596
  36. package/static/templates/INDEX.html +465 -465
  37. package/static/templates/README.md +190 -190
  38. package/static/templates/agent-capabilities.html +56 -56
  39. package/static/templates/agent-metadata-panel.html +44 -44
  40. package/static/templates/agent-status-badge.html +30 -30
  41. package/static/templates/code-annotation-panel.html +155 -155
  42. package/static/templates/code-suggestion-panel.html +184 -184
  43. package/static/templates/command-header.html +77 -77
  44. package/static/templates/command-output-scrollable.html +118 -118
  45. package/static/templates/elapsed-time.html +54 -54
  46. package/static/templates/error-alert.html +106 -106
  47. package/static/templates/error-history-timeline.html +160 -160
  48. package/static/templates/error-recovery-options.html +109 -109
  49. package/static/templates/error-stack-trace.html +95 -95
  50. package/static/templates/error-summary.html +80 -80
  51. package/static/templates/event-counter.html +48 -48
  52. package/static/templates/execution-actions.html +97 -97
  53. package/static/templates/execution-progress-bar.html +80 -80
  54. package/static/templates/execution-stepper.html +120 -120
  55. package/static/templates/file-breadcrumb.html +118 -118
  56. package/static/templates/file-diff-viewer.html +121 -121
  57. package/static/templates/file-metadata.html +133 -133
  58. package/static/templates/file-read-panel.html +66 -66
  59. package/static/templates/file-write-panel.html +120 -120
  60. package/static/templates/git-branch-remote.html +107 -107
  61. package/static/templates/git-diff-list.html +101 -101
  62. package/static/templates/git-log-visualization.html +153 -153
  63. package/static/templates/git-status-panel.html +115 -115
  64. package/static/templates/quality-metrics-display.html +170 -170
  65. package/static/templates/terminal-output-panel.html +87 -87
  66. package/static/templates/test-results-display.html +144 -144
  67. package/static/theme.js +72 -72
  68. package/test-download-progress.js +223 -223
  69. package/test-websocket-broadcast.js +147 -147
  70. package/tests/ipfs-downloader.test.js +370 -370
@@ -1,849 +1,849 @@
1
- (function() {
2
- var BASE = window.__BASE_URL || '';
3
- var isRecording = false;
4
- var ttsEnabled = true;
5
- var voiceActive = false;
6
- var currentConversationId = null;
7
- var speechQueue = [];
8
- var isSpeaking = false;
9
- var currentAudio = null;
10
- var mediaStream = null;
11
- var audioContext = null;
12
- var workletNode = null;
13
- var recordedChunks = [];
14
- var TARGET_SAMPLE_RATE = 16000;
15
- var spokenChunks = new Set();
16
- var renderedSeqs = new Set();
17
- var isLoadingHistory = false;
18
- var _lastVoiceBlockText = null;
19
- var _lastVoiceBlockTime = 0;
20
- var _voiceBreakNext = false;
21
- var selectedVoiceId = localStorage.getItem('voice-selected-id') || 'default';
22
- var ttsAudioCache = new Map();
23
- var TTS_CLIENT_CACHE_MAX = 50;
24
-
25
- function init() {
26
- setupTTSToggle();
27
- setupUI();
28
- setupStreamingListener();
29
- setupAgentSelector();
30
- setupVoiceSelector();
31
- }
32
-
33
- function setupVoiceSelector() {
34
- var selector = document.getElementById('voiceSelector');
35
- if (!selector) return;
36
- var saved = localStorage.getItem('voice-selected-id');
37
- if (saved) selectedVoiceId = saved;
38
- if (window.wsManager) {
39
- window.wsManager.subscribeToVoiceList(function(voices) {
40
- if (!Array.isArray(voices)) return;
41
- selector.innerHTML = '';
42
- var builtIn = voices.filter(function(v) { return !v.isCustom; });
43
- var custom = voices.filter(function(v) { return v.isCustom; });
44
- if (builtIn.length) {
45
- var grp1 = document.createElement('optgroup');
46
- grp1.label = 'Built-in Voices';
47
- builtIn.forEach(function(voice) {
48
- var opt = document.createElement('option');
49
- opt.value = voice.id;
50
- var parts = [];
51
- if (voice.gender) parts.push(voice.gender);
52
- if (voice.accent) parts.push(voice.accent);
53
- opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
54
- grp1.appendChild(opt);
55
- });
56
- selector.appendChild(grp1);
57
- }
58
- if (custom.length) {
59
- var grp2 = document.createElement('optgroup');
60
- grp2.label = 'Custom Voices';
61
- custom.forEach(function(voice) {
62
- var opt = document.createElement('option');
63
- opt.value = voice.id;
64
- opt.textContent = voice.name;
65
- grp2.appendChild(opt);
66
- });
67
- selector.appendChild(grp2);
68
- }
69
- if (saved && selector.querySelector('option[value="' + saved + '"]')) {
70
- selector.value = saved;
71
- }
72
- });
73
- return;
74
- }
75
- fetch(BASE + '/api/voices')
76
- .then(function(res) { return res.json(); })
77
- .then(function(data) {
78
- if (!data.ok || !Array.isArray(data.voices)) return;
79
- selector.innerHTML = '';
80
- var builtIn = data.voices.filter(function(v) { return !v.isCustom; });
81
- var custom = data.voices.filter(function(v) { return v.isCustom; });
82
- if (builtIn.length) {
83
- var grp1 = document.createElement('optgroup');
84
- grp1.label = 'Built-in Voices';
85
- builtIn.forEach(function(voice) {
86
- var opt = document.createElement('option');
87
- opt.value = voice.id;
88
- var parts = [];
89
- if (voice.gender) parts.push(voice.gender);
90
- if (voice.accent) parts.push(voice.accent);
91
- opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
92
- grp1.appendChild(opt);
93
- });
94
- selector.appendChild(grp1);
95
- }
96
- if (custom.length) {
97
- var grp2 = document.createElement('optgroup');
98
- grp2.label = 'Custom Voices';
99
- custom.forEach(function(voice) {
100
- var opt = document.createElement('option');
101
- opt.value = voice.id;
102
- opt.textContent = voice.name;
103
- grp2.appendChild(opt);
104
- });
105
- selector.appendChild(grp2);
106
- }
107
- if (saved && selector.querySelector('option[value="' + saved + '"]')) {
108
- selector.value = saved;
109
- }
110
- })
111
- .catch(function(err) { console.error('[Voice] Failed to load voices:', err); });
112
- selector.addEventListener('change', function() {
113
- selectedVoiceId = selector.value;
114
- localStorage.setItem('voice-selected-id', selectedVoiceId);
115
- sendVoiceToServer();
116
- });
117
- }
118
-
119
- function syncVoiceSelector() {
120
- var voiceSelector = document.querySelector('[data-voice-agent-selector]');
121
- var mainSelector = document.querySelector('[data-agent-selector]');
122
- if (!voiceSelector || !mainSelector) return;
123
- voiceSelector.innerHTML = mainSelector.innerHTML;
124
- if (mainSelector.value) voiceSelector.value = mainSelector.value;
125
- }
126
-
127
- function setupAgentSelector() {
128
- var voiceSelector = document.querySelector('[data-voice-agent-selector]');
129
- if (!voiceSelector) return;
130
- var mainSelector = document.querySelector('[data-agent-selector]');
131
- if (mainSelector) {
132
- syncVoiceSelector();
133
- mainSelector.addEventListener('change', function() {
134
- voiceSelector.value = mainSelector.value;
135
- });
136
- voiceSelector.addEventListener('change', function() {
137
- mainSelector.value = voiceSelector.value;
138
- });
139
- }
140
- window.addEventListener('agents-loaded', syncVoiceSelector);
141
- }
142
-
143
- function setupTTSToggle() {
144
- var toggle = document.getElementById('voiceTTSToggle');
145
- if (toggle) {
146
- var saved = localStorage.getItem('voice-tts-enabled');
147
- if (saved !== null) {
148
- ttsEnabled = saved === 'true';
149
- toggle.checked = ttsEnabled;
150
- }
151
- toggle.addEventListener('change', function() {
152
- ttsEnabled = toggle.checked;
153
- localStorage.setItem('voice-tts-enabled', ttsEnabled);
154
- if (!ttsEnabled) stopSpeaking();
155
- });
156
- }
157
- var stopBtn = document.getElementById('voiceStopSpeaking');
158
- if (stopBtn) {
159
- stopBtn.addEventListener('click', stopSpeaking);
160
- }
161
- }
162
-
163
- function setupUI() {
164
- var micBtn = document.getElementById('voiceMicBtn');
165
- if (micBtn) {
166
- micBtn.removeAttribute('disabled');
167
- micBtn.title = 'Hold to record';
168
- micBtn.addEventListener('mousedown', function(e) {
169
- e.preventDefault();
170
- startRecording();
171
- });
172
- micBtn.addEventListener('mouseup', function(e) {
173
- e.preventDefault();
174
- stopRecording();
175
- });
176
- micBtn.addEventListener('mouseleave', function(e) {
177
- if (isRecording) stopRecording();
178
- });
179
- micBtn.addEventListener('touchstart', function(e) {
180
- e.preventDefault();
181
- startRecording();
182
- });
183
- micBtn.addEventListener('touchend', function(e) {
184
- e.preventDefault();
185
- stopRecording();
186
- });
187
- micBtn.addEventListener('touchcancel', function(e) {
188
- if (isRecording) stopRecording();
189
- });
190
- }
191
- var sendBtn = document.getElementById('voiceSendBtn');
192
- if (sendBtn) {
193
- sendBtn.addEventListener('click', sendVoiceMessage);
194
- }
195
- }
196
-
197
- function resampleBuffer(inputBuffer, fromRate, toRate) {
198
- if (fromRate === toRate) return inputBuffer;
199
- var ratio = fromRate / toRate;
200
- var newLen = Math.round(inputBuffer.length / ratio);
201
- var result = new Float32Array(newLen);
202
- for (var i = 0; i < newLen; i++) {
203
- var srcIdx = i * ratio;
204
- var lo = Math.floor(srcIdx);
205
- var hi = Math.min(lo + 1, inputBuffer.length - 1);
206
- var frac = srcIdx - lo;
207
- result[i] = inputBuffer[lo] * (1 - frac) + inputBuffer[hi] * frac;
208
- }
209
- return result;
210
- }
211
-
212
- function encodeWav(float32Audio, sampleRate) {
213
- var numSamples = float32Audio.length;
214
- var bytesPerSample = 2;
215
- var dataSize = numSamples * bytesPerSample;
216
- var buffer = new ArrayBuffer(44 + dataSize);
217
- var view = new DataView(buffer);
218
- function writeStr(off, str) {
219
- for (var i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i));
220
- }
221
- writeStr(0, 'RIFF');
222
- view.setUint32(4, 36 + dataSize, true);
223
- writeStr(8, 'WAVE');
224
- writeStr(12, 'fmt ');
225
- view.setUint32(16, 16, true);
226
- view.setUint16(20, 1, true);
227
- view.setUint16(22, 1, true);
228
- view.setUint32(24, sampleRate, true);
229
- view.setUint32(28, sampleRate * bytesPerSample, true);
230
- view.setUint16(32, bytesPerSample, true);
231
- view.setUint16(34, 16, true);
232
- writeStr(36, 'data');
233
- view.setUint32(40, dataSize, true);
234
- for (var i = 0; i < numSamples; i++) {
235
- var s = Math.max(-1, Math.min(1, float32Audio[i]));
236
- view.setInt16(44 + i * 2, s < 0 ? s * 32768 : s * 32767, true);
237
- }
238
- return buffer;
239
- }
240
-
241
- async function startRecording() {
242
- if (isRecording) return;
243
- var el = document.getElementById('voiceTranscript');
244
- if (el) {
245
- el.textContent = '';
246
- el.setAttribute('data-final', '');
247
- }
248
- try {
249
- mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
250
- audioContext = new (window.AudioContext || window.webkitAudioContext)();
251
- var source = audioContext.createMediaStreamSource(mediaStream);
252
- recordedChunks = [];
253
- await audioContext.audioWorklet.addModule(BASE + '/js/audio-recorder-processor.js');
254
- workletNode = new AudioWorkletNode(audioContext, 'recorder-processor');
255
- workletNode.port.onmessage = function(e) {
256
- recordedChunks.push(e.data);
257
- };
258
- source.connect(workletNode);
259
- isRecording = true;
260
- var micBtn = document.getElementById('voiceMicBtn');
261
- if (micBtn) micBtn.classList.add('recording');
262
- } catch (e) {
263
- isRecording = false;
264
- if (el) el.textContent = 'Mic access denied or unavailable: ' + e.message;
265
- }
266
- }
267
-
268
- async function stopRecording() {
269
- if (!isRecording) return;
270
- isRecording = false;
271
- var micBtn = document.getElementById('voiceMicBtn');
272
- if (micBtn) micBtn.classList.remove('recording');
273
- var el = document.getElementById('voiceTranscript');
274
- if (workletNode) { workletNode.port.postMessage('stop'); workletNode.disconnect(); workletNode = null; }
275
- if (mediaStream) {
276
- mediaStream.getTracks().forEach(function(t) { t.stop(); });
277
- mediaStream = null;
278
- }
279
- var sourceSampleRate = audioContext ? audioContext.sampleRate : 48000;
280
- if (audioContext) { audioContext.close().catch(function() {}); audioContext = null; }
281
- if (recordedChunks.length === 0) return;
282
- var totalLen = 0;
283
- for (var i = 0; i < recordedChunks.length; i++) totalLen += recordedChunks[i].length;
284
- var merged = new Float32Array(totalLen);
285
- var offset = 0;
286
- for (var j = 0; j < recordedChunks.length; j++) {
287
- merged.set(recordedChunks[j], offset);
288
- offset += recordedChunks[j].length;
289
- }
290
- recordedChunks = [];
291
- var resampled = resampleBuffer(merged, sourceSampleRate, TARGET_SAMPLE_RATE);
292
- if (el) el.textContent = 'Transcribing...';
293
- try {
294
- var wavBuffer = encodeWav(resampled, TARGET_SAMPLE_RATE);
295
- var resp = await fetch(BASE + '/api/stt', {
296
- method: 'POST',
297
- headers: { 'Content-Type': 'audio/wav' },
298
- body: wavBuffer
299
- });
300
- var data = await resp.json();
301
- if (data.text) {
302
- if (el) {
303
- el.textContent = data.text;
304
- el.setAttribute('data-final', data.text);
305
- }
306
- } else if (data.error) {
307
- if (el) el.textContent = 'Error: ' + data.error;
308
- } else {
309
- if (el) el.textContent = '';
310
- }
311
- } catch (e) {
312
- if (el) el.textContent = 'Transcription failed: ' + e.message;
313
- }
314
- }
315
-
316
- function sendVoiceMessage() {
317
- var el = document.getElementById('voiceTranscript');
318
- if (!el) return;
319
- var text = el.textContent.trim();
320
- if (!text || text.startsWith('Transcribing') || text.startsWith('Error')) return;
321
- addVoiceBlock(text, true);
322
- el.textContent = '';
323
- el.setAttribute('data-final', '');
324
- if (typeof agentGUIClient !== 'undefined' && agentGUIClient) {
325
- var input = agentGUIClient.ui.messageInput;
326
- if (input) {
327
- input.value = text;
328
- agentGUIClient.startExecution();
329
- }
330
- }
331
- }
332
-
333
- function speak(text) {
334
- if (!ttsEnabled) return;
335
- var clean = text.replace(/<[^>]*>/g, '').trim();
336
- if (!clean) return;
337
- var parts = [];
338
- if (typeof agentGUIClient !== 'undefined' && agentGUIClient && typeof agentGUIClient.parseMarkdownCodeBlocks === 'function') {
339
- parts = agentGUIClient.parseMarkdownCodeBlocks(clean);
340
- } else {
341
- parts = [{ type: 'text', content: clean }];
342
- }
343
- parts.forEach(function(part) {
344
- if (part.type === 'code') return;
345
- var segment = part.content.trim();
346
- if (segment) {
347
- speechQueue.push(segment);
348
- }
349
- });
350
- processQueue();
351
- }
352
-
353
- function cacheTTSAudio(cacheKey, b64) {
354
- if (ttsAudioCache.size >= TTS_CLIENT_CACHE_MAX) {
355
- var oldest = ttsAudioCache.keys().next().value;
356
- ttsAudioCache.delete(oldest);
357
- }
358
- var binary = atob(b64);
359
- var bytes = new Uint8Array(binary.length);
360
- for (var i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
361
- ttsAudioCache.set(cacheKey, new Blob([bytes], { type: 'audio/wav' }));
362
- }
363
-
364
- function getCachedTTSBlob(text) {
365
- var key = selectedVoiceId + ':' + text;
366
- return ttsAudioCache.get(key) || null;
367
- }
368
-
369
- function splitSentences(text) {
370
- if (!text) return [text];
371
- var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
372
- if (!raw) return [text];
373
- var sentences = raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
374
- var result = [];
375
- for (var i = 0; i < sentences.length; i++) {
376
- var s = sentences[i];
377
- if (result.length > 0) {
378
- var prev = result[result.length - 1];
379
- if (s.match(/^(\d+[\.\)]|\d+\s)/) || prev.match(/\d+[\.\)]$/)) {
380
- result[result.length - 1] = prev + ' ' + s;
381
- continue;
382
- }
383
- }
384
- result.push(s);
385
- }
386
- return result;
387
- }
388
-
389
- var audioChunkQueue = [];
390
- var isPlayingChunk = false;
391
- var streamDone = false;
392
- var ttsConsecutiveFailures = 0;
393
- var TTS_MAX_FAILURES = 3;
394
- var ttsDisabledUntilReset = false;
395
- var streamingSupported = true;
396
- var streamingFailedAt = 0;
397
-
398
- function playNextChunk() {
399
- if (audioChunkQueue.length === 0) {
400
- isPlayingChunk = false;
401
- if (streamDone) {
402
- isSpeaking = false;
403
- processQueue();
404
- }
405
- return;
406
- }
407
- isPlayingChunk = true;
408
- var blob = audioChunkQueue.shift();
409
- var url = URL.createObjectURL(blob);
410
- currentAudio = new Audio(url);
411
- currentAudio.onended = function() {
412
- URL.revokeObjectURL(url);
413
- currentAudio = null;
414
- playNextChunk();
415
- };
416
- currentAudio.onerror = function() {
417
- URL.revokeObjectURL(url);
418
- currentAudio = null;
419
- playNextChunk();
420
- };
421
- currentAudio.play().catch(function() {
422
- URL.revokeObjectURL(url);
423
- currentAudio = null;
424
- playNextChunk();
425
- });
426
- }
427
-
428
- function processQueue() {
429
- if (isSpeaking || speechQueue.length === 0) return;
430
- if (ttsDisabledUntilReset) {
431
- speechQueue = [];
432
- return;
433
- }
434
- isSpeaking = true;
435
- streamDone = false;
436
- var text = speechQueue.shift();
437
- audioChunkQueue = [];
438
- isPlayingChunk = false;
439
-
440
- var cachedBlob = getCachedTTSBlob(text);
441
- if (cachedBlob) {
442
- ttsConsecutiveFailures = 0;
443
- audioChunkQueue.push(cachedBlob);
444
- streamDone = true;
445
- if (!isPlayingChunk) playNextChunk();
446
- return;
447
- }
448
-
449
- var sentences = [text];
450
- var cachedSentences = [];
451
- var uncachedText = [text];
452
-
453
- if (cachedSentences.length === sentences.length) {
454
- ttsConsecutiveFailures = 0;
455
- for (var j = 0; j < cachedSentences.length; j++) {
456
- audioChunkQueue.push(cachedSentences[j].blob);
457
- }
458
- streamDone = true;
459
- if (!isPlayingChunk) playNextChunk();
460
- return;
461
- }
462
-
463
- if (cachedSentences.length > 0) {
464
- ttsConsecutiveFailures = 0;
465
- for (var k = 0; k < cachedSentences.length; k++) {
466
- audioChunkQueue.push(cachedSentences[k].blob);
467
- }
468
- if (!isPlayingChunk) playNextChunk();
469
- }
470
-
471
- var remainingText = uncachedText.join(' ');
472
-
473
- function onTtsSuccess() {
474
- ttsConsecutiveFailures = 0;
475
- }
476
-
477
- function onTtsFailed() {
478
- ttsConsecutiveFailures++;
479
- if (ttsConsecutiveFailures >= TTS_MAX_FAILURES) {
480
- console.warn('[Voice] TTS failed ' + ttsConsecutiveFailures + ' times consecutively, disabling until reset');
481
- ttsDisabledUntilReset = true;
482
- speechQueue = [];
483
- }
484
- streamDone = true;
485
- isSpeaking = false;
486
- if (!ttsDisabledUntilReset) {
487
- processQueue();
488
- }
489
- }
490
-
491
- function tryStreaming() {
492
- if (!streamingSupported) { tryNonStreaming(remainingText); return; }
493
- fetch(BASE + '/api/tts-stream', {
494
- method: 'POST',
495
- headers: { 'Content-Type': 'application/json' },
496
- body: JSON.stringify({ text: remainingText, voiceId: selectedVoiceId })
497
- }).then(function(resp) {
498
- if (!resp.ok) {
499
- streamingSupported = false;
500
- streamingFailedAt = Date.now();
501
- throw new Error('TTS stream failed: ' + resp.status);
502
- }
503
- var reader = resp.body.getReader();
504
- var buffer = new Uint8Array(0);
505
-
506
- function concat(a, b) {
507
- var c = new Uint8Array(a.length + b.length);
508
- c.set(a, 0);
509
- c.set(b, a.length);
510
- return c;
511
- }
512
-
513
- function pump() {
514
- return reader.read().then(function(result) {
515
- if (result.done) {
516
- onTtsSuccess();
517
- streamDone = true;
518
- if (!isPlayingChunk && audioChunkQueue.length === 0) {
519
- isSpeaking = false;
520
- processQueue();
521
- }
522
- return;
523
- }
524
- buffer = concat(buffer, result.value);
525
- while (buffer.length >= 4) {
526
- var view = new DataView(buffer.buffer, buffer.byteOffset, 4);
527
- var chunkLen = view.getUint32(0, false);
528
- if (buffer.length < 4 + chunkLen) break;
529
- var wavData = buffer.slice(4, 4 + chunkLen);
530
- buffer = buffer.slice(4 + chunkLen);
531
- var blob = new Blob([wavData], { type: 'audio/wav' });
532
- audioChunkQueue.push(blob);
533
- if (!isPlayingChunk) playNextChunk();
534
- }
535
- return pump();
536
- });
537
- }
538
-
539
- return pump();
540
- }).catch(function() {
541
- tryNonStreaming(remainingText);
542
- });
543
- }
544
-
545
- function tryNonStreaming(txt) {
546
- fetch(BASE + '/api/tts', {
547
- method: 'POST',
548
- headers: { 'Content-Type': 'application/json' },
549
- body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
550
- }).then(function(resp) {
551
- if (!resp.ok) throw new Error('TTS failed: ' + resp.status);
552
- return resp.arrayBuffer();
553
- }).then(function(buf) {
554
- onTtsSuccess();
555
- var blob = new Blob([buf], { type: 'audio/wav' });
556
- audioChunkQueue.push(blob);
557
- streamDone = true;
558
- if (!isPlayingChunk) playNextChunk();
559
- }).catch(function() {
560
- onTtsFailed();
561
- });
562
- }
563
-
564
- tryStreaming();
565
- }
566
-
567
- function stopSpeaking() {
568
- speechQueue = [];
569
- audioChunkQueue = [];
570
- isPlayingChunk = false;
571
- isSpeaking = false;
572
- ttsConsecutiveFailures = 0;
573
- ttsDisabledUntilReset = false;
574
- if (currentAudio) {
575
- currentAudio.pause();
576
- currentAudio = null;
577
- }
578
- }
579
-
580
- function stripHtml(text) {
581
- return text.replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim();
582
- }
583
-
584
- function addVoiceBlock(text, isUser) {
585
- var container = document.getElementById('voiceMessages');
586
- if (!container) return;
587
- var emptyMsg = container.querySelector('.voice-empty');
588
- if (emptyMsg) emptyMsg.remove();
589
- var lastChild = container.lastElementChild;
590
- if (!isUser && !_voiceBreakNext && lastChild && lastChild.classList.contains('voice-block') && !lastChild.classList.contains('voice-block-user')) {
591
- var contentSpan = lastChild.querySelector('.voice-block-content');
592
- if (contentSpan) {
593
- contentSpan.textContent += '\n' + stripHtml(text);
594
- lastChild._fullText = (lastChild._fullText || contentSpan.textContent) + '\n' + text;
595
- scrollVoiceToBottom();
596
- return lastChild;
597
- }
598
- }
599
- _voiceBreakNext = false;
600
- var div = document.createElement('div');
601
- div.className = 'voice-block' + (isUser ? ' voice-block-user' : '');
602
- if (isUser) {
603
- div.textContent = text;
604
- } else {
605
- var contentSpan = document.createElement('span');
606
- contentSpan.className = 'voice-block-content';
607
- contentSpan.textContent = stripHtml(text);
608
- div.appendChild(contentSpan);
609
- div._fullText = text;
610
- var rereadBtn = document.createElement('button');
611
- rereadBtn.className = 'voice-reread-btn';
612
- rereadBtn.title = 'Re-read aloud';
613
- rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
614
- rereadBtn.addEventListener('click', function() {
615
- speak(div._fullText || contentSpan.textContent);
616
- });
617
- div.appendChild(rereadBtn);
618
- }
619
- container.appendChild(div);
620
- scrollVoiceToBottom();
621
- return div;
622
- }
623
-
624
- function addVoiceResultBlock(block, autoSpeak) {
625
- var container = document.getElementById('voiceMessages');
626
- if (!container) return;
627
- var emptyMsg = container.querySelector('.voice-empty');
628
- if (emptyMsg) emptyMsg.remove();
629
- var div = document.createElement('div');
630
- div.className = 'voice-block';
631
- var isError = block.is_error || false;
632
- var duration = block.duration_ms ? (block.duration_ms / 1000).toFixed(1) + 's' : '';
633
- var cost = block.total_cost_usd ? '$' + block.total_cost_usd.toFixed(4) : '';
634
- var resultText = '';
635
- if (block.result) {
636
- resultText = typeof block.result === 'string' ? block.result : JSON.stringify(block.result);
637
- }
638
- var displayText = stripHtml(resultText);
639
- var html = '';
640
- if (displayText) {
641
- html += '<div>' + escapeHtml(displayText) + '</div>';
642
- }
643
- if (duration || cost) {
644
- html += '<div class="voice-result-stats">';
645
- if (duration) html += duration;
646
- if (duration && cost) html += ' | ';
647
- if (cost) html += cost;
648
- html += '</div>';
649
- }
650
- if (!html) {
651
- html = isError ? 'Execution failed' : 'Execution complete';
652
- }
653
- div.innerHTML = html;
654
- if (resultText) {
655
- var rereadBtn = document.createElement('button');
656
- rereadBtn.className = 'voice-reread-btn';
657
- rereadBtn.title = 'Re-read aloud';
658
- rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
659
- rereadBtn.addEventListener('click', function() {
660
- speak(resultText);
661
- });
662
- div.appendChild(rereadBtn);
663
- }
664
- container.appendChild(div);
665
- scrollVoiceToBottom();
666
- if (autoSpeak && ttsEnabled && resultText) {
667
- speak(resultText);
668
- }
669
- return div;
670
- }
671
-
672
- function scrollVoiceToBottom() {
673
- var scroll = document.getElementById('voiceScroll');
674
- if (scroll) {
675
- requestAnimationFrame(function() {
676
- scroll.scrollTop = scroll.scrollHeight;
677
- });
678
- }
679
- }
680
-
681
- function sendVoiceToServer() {
682
- if (typeof agentGUIClient !== 'undefined' && agentGUIClient && agentGUIClient.wsManager && agentGUIClient.wsManager.isConnected) {
683
- agentGUIClient.wsManager.sendMessage({ type: 'set_voice', voiceId: selectedVoiceId });
684
- }
685
- }
686
-
687
- function setupStreamingListener() {
688
- window.addEventListener('ws-message', function(e) {
689
- var data = e.detail;
690
- if (!data) return;
691
- if (data.type === 'tts_audio' && data.audio && data.voiceId === selectedVoiceId) {
692
- cacheTTSAudio(data.cacheKey, data.audio);
693
- }
694
- if (data.type === 'sync_connected') {
695
- sendVoiceToServer();
696
- }
697
- if (!voiceActive) return;
698
- if (data.type === 'streaming_progress' && data.block) {
699
- if (data.conversationId && data.conversationId !== currentConversationId) return;
700
- if (data.seq !== undefined && renderedSeqs.has(data.seq)) return;
701
- if (data.seq !== undefined) renderedSeqs.add(data.seq);
702
- handleVoiceBlock(data.block, true);
703
- }
704
- if (data.type === 'streaming_start') {
705
- if (data.conversationId && data.conversationId !== currentConversationId) return;
706
- spokenChunks = new Set();
707
- renderedSeqs = new Set();
708
- _voiceBreakNext = false;
709
- }
710
- });
711
- window.addEventListener('conversation-selected', function(e) {
712
- var newConversationId = e.detail.conversationId;
713
- if (currentConversationId && currentConversationId !== newConversationId) {
714
- unsubscribeFromConversation();
715
- }
716
- currentConversationId = newConversationId;
717
- stopSpeaking();
718
- spokenChunks = new Set();
719
- renderedSeqs = new Set();
720
- if (voiceActive) {
721
- loadVoiceBlocks(currentConversationId);
722
- }
723
- });
724
- }
725
-
726
- function handleVoiceBlock(block, isNew) {
727
- if (!block || !block.type) return;
728
- if (block.type === 'text' && block.text) {
729
- var now = Date.now();
730
- if (_lastVoiceBlockText === block.text && (now - _lastVoiceBlockTime) < 500) {
731
- return;
732
- }
733
- _lastVoiceBlockText = block.text;
734
- _lastVoiceBlockTime = now;
735
-
736
- var div = addVoiceBlock(block.text, false);
737
- if (div && isNew && ttsEnabled) {
738
- div.classList.add('speaking');
739
- speak(block.text);
740
- setTimeout(function() { div.classList.remove('speaking'); }, 2000);
741
- }
742
- } else if (block.type === 'result') {
743
- _voiceBreakNext = true;
744
- addVoiceResultBlock(block, isNew);
745
- } else {
746
- _voiceBreakNext = true;
747
- }
748
- }
749
-
750
- function loadVoiceBlocks(conversationId) {
751
- var container = document.getElementById('voiceMessages');
752
- if (!container) return;
753
- container.innerHTML = '';
754
- _lastVoiceBlockText = null;
755
- _lastVoiceBlockTime = 0;
756
- _voiceBreakNext = false;
757
- if (!conversationId) {
758
- showVoiceEmpty(container);
759
- unsubscribeFromConversation();
760
- return;
761
- }
762
- isLoadingHistory = true;
763
- subscribeToConversation(conversationId);
764
- fetch(BASE + '/api/conversations/' + conversationId + '/chunks')
765
- .then(function(res) { return res.json(); })
766
- .then(function(data) {
767
- isLoadingHistory = false;
768
- if (!data.ok || !Array.isArray(data.chunks) || data.chunks.length === 0) {
769
- showVoiceEmpty(container);
770
- return;
771
- }
772
- var hasContent = false;
773
- _voiceBreakNext = false;
774
- data.chunks.forEach(function(chunk) {
775
- if (chunk.sequence !== undefined) renderedSeqs.add(chunk.sequence);
776
- var block = typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data;
777
- if (!block) return;
778
- if (block.type === 'text' && block.text) {
779
- addVoiceBlock(block.text, false);
780
- hasContent = true;
781
- } else if (block.type === 'result') {
782
- _voiceBreakNext = true;
783
- addVoiceResultBlock(block, false);
784
- hasContent = true;
785
- } else {
786
- _voiceBreakNext = true;
787
- }
788
- });
789
- if (!hasContent) showVoiceEmpty(container);
790
- })
791
- .catch(function() {
792
- isLoadingHistory = false;
793
- showVoiceEmpty(container);
794
- });
795
- }
796
-
797
- function subscribeToConversation(conversationId) {
798
- if (!conversationId || typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager) {
799
- return;
800
- }
801
- agentGUIClient.wsManager.sendMessage({ type: 'subscribe', conversationId: conversationId, timestamp: Date.now() });
802
- }
803
-
804
- function unsubscribeFromConversation() {
805
- if (typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager || !currentConversationId) {
806
- return;
807
- }
808
- agentGUIClient.wsManager.sendMessage({ type: 'unsubscribe', conversationId: currentConversationId, timestamp: Date.now() });
809
- }
810
-
811
- function showVoiceEmpty(container) {
812
- container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Hold the microphone button to record.<br>Release to transcribe. Tap Send to submit.<br>New responses will be read aloud.</div></div>';
813
- }
814
-
815
- function activate() {
816
- voiceActive = true;
817
- if (currentConversationId) {
818
- loadVoiceBlocks(currentConversationId);
819
- } else {
820
- var container = document.getElementById('voiceMessages');
821
- if (container && !container.hasChildNodes()) {
822
- showVoiceEmpty(container);
823
- }
824
- }
825
- }
826
-
827
- function deactivate() {
828
- voiceActive = false;
829
- stopSpeaking();
830
- unsubscribeFromConversation();
831
- }
832
-
833
- function escapeHtml(text) {
834
- var map = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' };
835
- return text.replace(/[&<>"']/g, function(c) { return map[c]; });
836
- }
837
-
838
- window.voiceModule = {
839
- activate: activate,
840
- deactivate: deactivate,
841
- handleBlock: handleVoiceBlock
842
- };
843
-
844
- if (document.readyState === 'loading') {
845
- document.addEventListener('DOMContentLoaded', init);
846
- } else {
847
- init();
848
- }
849
- })();
1
+ (function() {
2
+ var BASE = window.__BASE_URL || '';
3
+ var isRecording = false;
4
+ var ttsEnabled = true;
5
+ var voiceActive = false;
6
+ var currentConversationId = null;
7
+ var speechQueue = [];
8
+ var isSpeaking = false;
9
+ var currentAudio = null;
10
+ var mediaStream = null;
11
+ var audioContext = null;
12
+ var workletNode = null;
13
+ var recordedChunks = [];
14
+ var TARGET_SAMPLE_RATE = 16000;
15
+ var spokenChunks = new Set();
16
+ var renderedSeqs = new Set();
17
+ var isLoadingHistory = false;
18
+ var _lastVoiceBlockText = null;
19
+ var _lastVoiceBlockTime = 0;
20
+ var _voiceBreakNext = false;
21
+ var selectedVoiceId = localStorage.getItem('voice-selected-id') || 'default';
22
+ var ttsAudioCache = new Map();
23
+ var TTS_CLIENT_CACHE_MAX = 50;
24
+
25
+ function init() {
26
+ setupTTSToggle();
27
+ setupUI();
28
+ setupStreamingListener();
29
+ setupAgentSelector();
30
+ setupVoiceSelector();
31
+ }
32
+
33
+ function setupVoiceSelector() {
34
+ var selector = document.getElementById('voiceSelector');
35
+ if (!selector) return;
36
+ var saved = localStorage.getItem('voice-selected-id');
37
+ if (saved) selectedVoiceId = saved;
38
+ if (window.wsManager) {
39
+ window.wsManager.subscribeToVoiceList(function(voices) {
40
+ if (!Array.isArray(voices)) return;
41
+ selector.innerHTML = '';
42
+ var builtIn = voices.filter(function(v) { return !v.isCustom; });
43
+ var custom = voices.filter(function(v) { return v.isCustom; });
44
+ if (builtIn.length) {
45
+ var grp1 = document.createElement('optgroup');
46
+ grp1.label = 'Built-in Voices';
47
+ builtIn.forEach(function(voice) {
48
+ var opt = document.createElement('option');
49
+ opt.value = voice.id;
50
+ var parts = [];
51
+ if (voice.gender) parts.push(voice.gender);
52
+ if (voice.accent) parts.push(voice.accent);
53
+ opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
54
+ grp1.appendChild(opt);
55
+ });
56
+ selector.appendChild(grp1);
57
+ }
58
+ if (custom.length) {
59
+ var grp2 = document.createElement('optgroup');
60
+ grp2.label = 'Custom Voices';
61
+ custom.forEach(function(voice) {
62
+ var opt = document.createElement('option');
63
+ opt.value = voice.id;
64
+ opt.textContent = voice.name;
65
+ grp2.appendChild(opt);
66
+ });
67
+ selector.appendChild(grp2);
68
+ }
69
+ if (saved && selector.querySelector('option[value="' + saved + '"]')) {
70
+ selector.value = saved;
71
+ }
72
+ });
73
+ return;
74
+ }
75
+ fetch(BASE + '/api/voices')
76
+ .then(function(res) { return res.json(); })
77
+ .then(function(data) {
78
+ if (!data.ok || !Array.isArray(data.voices)) return;
79
+ selector.innerHTML = '';
80
+ var builtIn = data.voices.filter(function(v) { return !v.isCustom; });
81
+ var custom = data.voices.filter(function(v) { return v.isCustom; });
82
+ if (builtIn.length) {
83
+ var grp1 = document.createElement('optgroup');
84
+ grp1.label = 'Built-in Voices';
85
+ builtIn.forEach(function(voice) {
86
+ var opt = document.createElement('option');
87
+ opt.value = voice.id;
88
+ var parts = [];
89
+ if (voice.gender) parts.push(voice.gender);
90
+ if (voice.accent) parts.push(voice.accent);
91
+ opt.textContent = voice.name + (parts.length ? ' (' + parts.join(', ') + ')' : '');
92
+ grp1.appendChild(opt);
93
+ });
94
+ selector.appendChild(grp1);
95
+ }
96
+ if (custom.length) {
97
+ var grp2 = document.createElement('optgroup');
98
+ grp2.label = 'Custom Voices';
99
+ custom.forEach(function(voice) {
100
+ var opt = document.createElement('option');
101
+ opt.value = voice.id;
102
+ opt.textContent = voice.name;
103
+ grp2.appendChild(opt);
104
+ });
105
+ selector.appendChild(grp2);
106
+ }
107
+ if (saved && selector.querySelector('option[value="' + saved + '"]')) {
108
+ selector.value = saved;
109
+ }
110
+ })
111
+ .catch(function(err) { console.error('[Voice] Failed to load voices:', err); });
112
+ selector.addEventListener('change', function() {
113
+ selectedVoiceId = selector.value;
114
+ localStorage.setItem('voice-selected-id', selectedVoiceId);
115
+ sendVoiceToServer();
116
+ });
117
+ }
118
+
119
+ function syncVoiceSelector() {
120
+ var voiceSelector = document.querySelector('[data-voice-agent-selector]');
121
+ var mainSelector = document.querySelector('[data-agent-selector]');
122
+ if (!voiceSelector || !mainSelector) return;
123
+ voiceSelector.innerHTML = mainSelector.innerHTML;
124
+ if (mainSelector.value) voiceSelector.value = mainSelector.value;
125
+ }
126
+
127
+ function setupAgentSelector() {
128
+ var voiceSelector = document.querySelector('[data-voice-agent-selector]');
129
+ if (!voiceSelector) return;
130
+ var mainSelector = document.querySelector('[data-agent-selector]');
131
+ if (mainSelector) {
132
+ syncVoiceSelector();
133
+ mainSelector.addEventListener('change', function() {
134
+ voiceSelector.value = mainSelector.value;
135
+ });
136
+ voiceSelector.addEventListener('change', function() {
137
+ mainSelector.value = voiceSelector.value;
138
+ });
139
+ }
140
+ window.addEventListener('agents-loaded', syncVoiceSelector);
141
+ }
142
+
143
+ function setupTTSToggle() {
144
+ var toggle = document.getElementById('voiceTTSToggle');
145
+ if (toggle) {
146
+ var saved = localStorage.getItem('voice-tts-enabled');
147
+ if (saved !== null) {
148
+ ttsEnabled = saved === 'true';
149
+ toggle.checked = ttsEnabled;
150
+ }
151
+ toggle.addEventListener('change', function() {
152
+ ttsEnabled = toggle.checked;
153
+ localStorage.setItem('voice-tts-enabled', ttsEnabled);
154
+ if (!ttsEnabled) stopSpeaking();
155
+ });
156
+ }
157
+ var stopBtn = document.getElementById('voiceStopSpeaking');
158
+ if (stopBtn) {
159
+ stopBtn.addEventListener('click', stopSpeaking);
160
+ }
161
+ }
162
+
163
+ function setupUI() {
164
+ var micBtn = document.getElementById('voiceMicBtn');
165
+ if (micBtn) {
166
+ micBtn.removeAttribute('disabled');
167
+ micBtn.title = 'Hold to record';
168
+ micBtn.addEventListener('mousedown', function(e) {
169
+ e.preventDefault();
170
+ startRecording();
171
+ });
172
+ micBtn.addEventListener('mouseup', function(e) {
173
+ e.preventDefault();
174
+ stopRecording();
175
+ });
176
+ micBtn.addEventListener('mouseleave', function(e) {
177
+ if (isRecording) stopRecording();
178
+ });
179
+ micBtn.addEventListener('touchstart', function(e) {
180
+ e.preventDefault();
181
+ startRecording();
182
+ });
183
+ micBtn.addEventListener('touchend', function(e) {
184
+ e.preventDefault();
185
+ stopRecording();
186
+ });
187
+ micBtn.addEventListener('touchcancel', function(e) {
188
+ if (isRecording) stopRecording();
189
+ });
190
+ }
191
+ var sendBtn = document.getElementById('voiceSendBtn');
192
+ if (sendBtn) {
193
+ sendBtn.addEventListener('click', sendVoiceMessage);
194
+ }
195
+ }
196
+
197
+ function resampleBuffer(inputBuffer, fromRate, toRate) {
198
+ if (fromRate === toRate) return inputBuffer;
199
+ var ratio = fromRate / toRate;
200
+ var newLen = Math.round(inputBuffer.length / ratio);
201
+ var result = new Float32Array(newLen);
202
+ for (var i = 0; i < newLen; i++) {
203
+ var srcIdx = i * ratio;
204
+ var lo = Math.floor(srcIdx);
205
+ var hi = Math.min(lo + 1, inputBuffer.length - 1);
206
+ var frac = srcIdx - lo;
207
+ result[i] = inputBuffer[lo] * (1 - frac) + inputBuffer[hi] * frac;
208
+ }
209
+ return result;
210
+ }
211
+
212
+ function encodeWav(float32Audio, sampleRate) {
213
+ var numSamples = float32Audio.length;
214
+ var bytesPerSample = 2;
215
+ var dataSize = numSamples * bytesPerSample;
216
+ var buffer = new ArrayBuffer(44 + dataSize);
217
+ var view = new DataView(buffer);
218
+ function writeStr(off, str) {
219
+ for (var i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i));
220
+ }
221
+ writeStr(0, 'RIFF');
222
+ view.setUint32(4, 36 + dataSize, true);
223
+ writeStr(8, 'WAVE');
224
+ writeStr(12, 'fmt ');
225
+ view.setUint32(16, 16, true);
226
+ view.setUint16(20, 1, true);
227
+ view.setUint16(22, 1, true);
228
+ view.setUint32(24, sampleRate, true);
229
+ view.setUint32(28, sampleRate * bytesPerSample, true);
230
+ view.setUint16(32, bytesPerSample, true);
231
+ view.setUint16(34, 16, true);
232
+ writeStr(36, 'data');
233
+ view.setUint32(40, dataSize, true);
234
+ for (var i = 0; i < numSamples; i++) {
235
+ var s = Math.max(-1, Math.min(1, float32Audio[i]));
236
+ view.setInt16(44 + i * 2, s < 0 ? s * 32768 : s * 32767, true);
237
+ }
238
+ return buffer;
239
+ }
240
+
241
+ async function startRecording() {
242
+ if (isRecording) return;
243
+ var el = document.getElementById('voiceTranscript');
244
+ if (el) {
245
+ el.textContent = '';
246
+ el.setAttribute('data-final', '');
247
+ }
248
+ try {
249
+ mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
250
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
251
+ var source = audioContext.createMediaStreamSource(mediaStream);
252
+ recordedChunks = [];
253
+ await audioContext.audioWorklet.addModule(BASE + '/js/audio-recorder-processor.js');
254
+ workletNode = new AudioWorkletNode(audioContext, 'recorder-processor');
255
+ workletNode.port.onmessage = function(e) {
256
+ recordedChunks.push(e.data);
257
+ };
258
+ source.connect(workletNode);
259
+ isRecording = true;
260
+ var micBtn = document.getElementById('voiceMicBtn');
261
+ if (micBtn) micBtn.classList.add('recording');
262
+ } catch (e) {
263
+ isRecording = false;
264
+ if (el) el.textContent = 'Mic access denied or unavailable: ' + e.message;
265
+ }
266
+ }
267
+
268
+ async function stopRecording() {
269
+ if (!isRecording) return;
270
+ isRecording = false;
271
+ var micBtn = document.getElementById('voiceMicBtn');
272
+ if (micBtn) micBtn.classList.remove('recording');
273
+ var el = document.getElementById('voiceTranscript');
274
+ if (workletNode) { workletNode.port.postMessage('stop'); workletNode.disconnect(); workletNode = null; }
275
+ if (mediaStream) {
276
+ mediaStream.getTracks().forEach(function(t) { t.stop(); });
277
+ mediaStream = null;
278
+ }
279
+ var sourceSampleRate = audioContext ? audioContext.sampleRate : 48000;
280
+ if (audioContext) { audioContext.close().catch(function() {}); audioContext = null; }
281
+ if (recordedChunks.length === 0) return;
282
+ var totalLen = 0;
283
+ for (var i = 0; i < recordedChunks.length; i++) totalLen += recordedChunks[i].length;
284
+ var merged = new Float32Array(totalLen);
285
+ var offset = 0;
286
+ for (var j = 0; j < recordedChunks.length; j++) {
287
+ merged.set(recordedChunks[j], offset);
288
+ offset += recordedChunks[j].length;
289
+ }
290
+ recordedChunks = [];
291
+ var resampled = resampleBuffer(merged, sourceSampleRate, TARGET_SAMPLE_RATE);
292
+ if (el) el.textContent = 'Transcribing...';
293
+ try {
294
+ var wavBuffer = encodeWav(resampled, TARGET_SAMPLE_RATE);
295
+ var resp = await fetch(BASE + '/api/stt', {
296
+ method: 'POST',
297
+ headers: { 'Content-Type': 'audio/wav' },
298
+ body: wavBuffer
299
+ });
300
+ var data = await resp.json();
301
+ if (data.text) {
302
+ if (el) {
303
+ el.textContent = data.text;
304
+ el.setAttribute('data-final', data.text);
305
+ }
306
+ } else if (data.error) {
307
+ if (el) el.textContent = 'Error: ' + data.error;
308
+ } else {
309
+ if (el) el.textContent = '';
310
+ }
311
+ } catch (e) {
312
+ if (el) el.textContent = 'Transcription failed: ' + e.message;
313
+ }
314
+ }
315
+
316
+ function sendVoiceMessage() {
317
+ var el = document.getElementById('voiceTranscript');
318
+ if (!el) return;
319
+ var text = el.textContent.trim();
320
+ if (!text || text.startsWith('Transcribing') || text.startsWith('Error')) return;
321
+ addVoiceBlock(text, true);
322
+ el.textContent = '';
323
+ el.setAttribute('data-final', '');
324
+ if (typeof agentGUIClient !== 'undefined' && agentGUIClient) {
325
+ var input = agentGUIClient.ui.messageInput;
326
+ if (input) {
327
+ input.value = text;
328
+ agentGUIClient.startExecution();
329
+ }
330
+ }
331
+ }
332
+
333
+ function speak(text) {
334
+ if (!ttsEnabled) return;
335
+ var clean = text.replace(/<[^>]*>/g, '').trim();
336
+ if (!clean) return;
337
+ var parts = [];
338
+ if (typeof agentGUIClient !== 'undefined' && agentGUIClient && typeof agentGUIClient.parseMarkdownCodeBlocks === 'function') {
339
+ parts = agentGUIClient.parseMarkdownCodeBlocks(clean);
340
+ } else {
341
+ parts = [{ type: 'text', content: clean }];
342
+ }
343
+ parts.forEach(function(part) {
344
+ if (part.type === 'code') return;
345
+ var segment = part.content.trim();
346
+ if (segment) {
347
+ speechQueue.push(segment);
348
+ }
349
+ });
350
+ processQueue();
351
+ }
352
+
353
+ function cacheTTSAudio(cacheKey, b64) {
354
+ if (ttsAudioCache.size >= TTS_CLIENT_CACHE_MAX) {
355
+ var oldest = ttsAudioCache.keys().next().value;
356
+ ttsAudioCache.delete(oldest);
357
+ }
358
+ var binary = atob(b64);
359
+ var bytes = new Uint8Array(binary.length);
360
+ for (var i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
361
+ ttsAudioCache.set(cacheKey, new Blob([bytes], { type: 'audio/wav' }));
362
+ }
363
+
364
+ function getCachedTTSBlob(text) {
365
+ var key = selectedVoiceId + ':' + text;
366
+ return ttsAudioCache.get(key) || null;
367
+ }
368
+
369
+ function splitSentences(text) {
370
+ if (!text) return [text];
371
+ var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
372
+ if (!raw) return [text];
373
+ var sentences = raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
374
+ var result = [];
375
+ for (var i = 0; i < sentences.length; i++) {
376
+ var s = sentences[i];
377
+ if (result.length > 0) {
378
+ var prev = result[result.length - 1];
379
+ if (s.match(/^(\d+[\.\)]|\d+\s)/) || prev.match(/\d+[\.\)]$/)) {
380
+ result[result.length - 1] = prev + ' ' + s;
381
+ continue;
382
+ }
383
+ }
384
+ result.push(s);
385
+ }
386
+ return result;
387
+ }
388
+
389
+ var audioChunkQueue = [];
390
+ var isPlayingChunk = false;
391
+ var streamDone = false;
392
+ var ttsConsecutiveFailures = 0;
393
+ var TTS_MAX_FAILURES = 3;
394
+ var ttsDisabledUntilReset = false;
395
+ var streamingSupported = true;
396
+ var streamingFailedAt = 0;
397
+
398
+ function playNextChunk() {
399
+ if (audioChunkQueue.length === 0) {
400
+ isPlayingChunk = false;
401
+ if (streamDone) {
402
+ isSpeaking = false;
403
+ processQueue();
404
+ }
405
+ return;
406
+ }
407
+ isPlayingChunk = true;
408
+ var blob = audioChunkQueue.shift();
409
+ var url = URL.createObjectURL(blob);
410
+ currentAudio = new Audio(url);
411
+ currentAudio.onended = function() {
412
+ URL.revokeObjectURL(url);
413
+ currentAudio = null;
414
+ playNextChunk();
415
+ };
416
+ currentAudio.onerror = function() {
417
+ URL.revokeObjectURL(url);
418
+ currentAudio = null;
419
+ playNextChunk();
420
+ };
421
+ currentAudio.play().catch(function() {
422
+ URL.revokeObjectURL(url);
423
+ currentAudio = null;
424
+ playNextChunk();
425
+ });
426
+ }
427
+
428
+ function processQueue() {
429
+ if (isSpeaking || speechQueue.length === 0) return;
430
+ if (ttsDisabledUntilReset) {
431
+ speechQueue = [];
432
+ return;
433
+ }
434
+ isSpeaking = true;
435
+ streamDone = false;
436
+ var text = speechQueue.shift();
437
+ audioChunkQueue = [];
438
+ isPlayingChunk = false;
439
+
440
+ var cachedBlob = getCachedTTSBlob(text);
441
+ if (cachedBlob) {
442
+ ttsConsecutiveFailures = 0;
443
+ audioChunkQueue.push(cachedBlob);
444
+ streamDone = true;
445
+ if (!isPlayingChunk) playNextChunk();
446
+ return;
447
+ }
448
+
449
+ var sentences = [text];
450
+ var cachedSentences = [];
451
+ var uncachedText = [text];
452
+
453
+ if (cachedSentences.length === sentences.length) {
454
+ ttsConsecutiveFailures = 0;
455
+ for (var j = 0; j < cachedSentences.length; j++) {
456
+ audioChunkQueue.push(cachedSentences[j].blob);
457
+ }
458
+ streamDone = true;
459
+ if (!isPlayingChunk) playNextChunk();
460
+ return;
461
+ }
462
+
463
+ if (cachedSentences.length > 0) {
464
+ ttsConsecutiveFailures = 0;
465
+ for (var k = 0; k < cachedSentences.length; k++) {
466
+ audioChunkQueue.push(cachedSentences[k].blob);
467
+ }
468
+ if (!isPlayingChunk) playNextChunk();
469
+ }
470
+
471
+ var remainingText = uncachedText.join(' ');
472
+
473
+ function onTtsSuccess() {
474
+ ttsConsecutiveFailures = 0;
475
+ }
476
+
477
+ function onTtsFailed() {
478
+ ttsConsecutiveFailures++;
479
+ if (ttsConsecutiveFailures >= TTS_MAX_FAILURES) {
480
+ console.warn('[Voice] TTS failed ' + ttsConsecutiveFailures + ' times consecutively, disabling until reset');
481
+ ttsDisabledUntilReset = true;
482
+ speechQueue = [];
483
+ }
484
+ streamDone = true;
485
+ isSpeaking = false;
486
+ if (!ttsDisabledUntilReset) {
487
+ processQueue();
488
+ }
489
+ }
490
+
491
+ function tryStreaming() {
492
+ if (!streamingSupported) { tryNonStreaming(remainingText); return; }
493
+ fetch(BASE + '/api/tts-stream', {
494
+ method: 'POST',
495
+ headers: { 'Content-Type': 'application/json' },
496
+ body: JSON.stringify({ text: remainingText, voiceId: selectedVoiceId })
497
+ }).then(function(resp) {
498
+ if (!resp.ok) {
499
+ streamingSupported = false;
500
+ streamingFailedAt = Date.now();
501
+ throw new Error('TTS stream failed: ' + resp.status);
502
+ }
503
+ var reader = resp.body.getReader();
504
+ var buffer = new Uint8Array(0);
505
+
506
+ function concat(a, b) {
507
+ var c = new Uint8Array(a.length + b.length);
508
+ c.set(a, 0);
509
+ c.set(b, a.length);
510
+ return c;
511
+ }
512
+
513
+ function pump() {
514
+ return reader.read().then(function(result) {
515
+ if (result.done) {
516
+ onTtsSuccess();
517
+ streamDone = true;
518
+ if (!isPlayingChunk && audioChunkQueue.length === 0) {
519
+ isSpeaking = false;
520
+ processQueue();
521
+ }
522
+ return;
523
+ }
524
+ buffer = concat(buffer, result.value);
525
+ while (buffer.length >= 4) {
526
+ var view = new DataView(buffer.buffer, buffer.byteOffset, 4);
527
+ var chunkLen = view.getUint32(0, false);
528
+ if (buffer.length < 4 + chunkLen) break;
529
+ var wavData = buffer.slice(4, 4 + chunkLen);
530
+ buffer = buffer.slice(4 + chunkLen);
531
+ var blob = new Blob([wavData], { type: 'audio/wav' });
532
+ audioChunkQueue.push(blob);
533
+ if (!isPlayingChunk) playNextChunk();
534
+ }
535
+ return pump();
536
+ });
537
+ }
538
+
539
+ return pump();
540
+ }).catch(function() {
541
+ tryNonStreaming(remainingText);
542
+ });
543
+ }
544
+
545
+ function tryNonStreaming(txt) {
546
+ fetch(BASE + '/api/tts', {
547
+ method: 'POST',
548
+ headers: { 'Content-Type': 'application/json' },
549
+ body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
550
+ }).then(function(resp) {
551
+ if (!resp.ok) throw new Error('TTS failed: ' + resp.status);
552
+ return resp.arrayBuffer();
553
+ }).then(function(buf) {
554
+ onTtsSuccess();
555
+ var blob = new Blob([buf], { type: 'audio/wav' });
556
+ audioChunkQueue.push(blob);
557
+ streamDone = true;
558
+ if (!isPlayingChunk) playNextChunk();
559
+ }).catch(function() {
560
+ onTtsFailed();
561
+ });
562
+ }
563
+
564
+ tryStreaming();
565
+ }
566
+
567
+ function stopSpeaking() {
568
+ speechQueue = [];
569
+ audioChunkQueue = [];
570
+ isPlayingChunk = false;
571
+ isSpeaking = false;
572
+ ttsConsecutiveFailures = 0;
573
+ ttsDisabledUntilReset = false;
574
+ if (currentAudio) {
575
+ currentAudio.pause();
576
+ currentAudio = null;
577
+ }
578
+ }
579
+
580
+ function stripHtml(text) {
581
+ return text.replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim();
582
+ }
583
+
584
+ function addVoiceBlock(text, isUser) {
585
+ var container = document.getElementById('voiceMessages');
586
+ if (!container) return;
587
+ var emptyMsg = container.querySelector('.voice-empty');
588
+ if (emptyMsg) emptyMsg.remove();
589
+ var lastChild = container.lastElementChild;
590
+ if (!isUser && !_voiceBreakNext && lastChild && lastChild.classList.contains('voice-block') && !lastChild.classList.contains('voice-block-user')) {
591
+ var contentSpan = lastChild.querySelector('.voice-block-content');
592
+ if (contentSpan) {
593
+ contentSpan.textContent += '\n' + stripHtml(text);
594
+ lastChild._fullText = (lastChild._fullText || contentSpan.textContent) + '\n' + text;
595
+ scrollVoiceToBottom();
596
+ return lastChild;
597
+ }
598
+ }
599
+ _voiceBreakNext = false;
600
+ var div = document.createElement('div');
601
+ div.className = 'voice-block' + (isUser ? ' voice-block-user' : '');
602
+ if (isUser) {
603
+ div.textContent = text;
604
+ } else {
605
+ var contentSpan = document.createElement('span');
606
+ contentSpan.className = 'voice-block-content';
607
+ contentSpan.textContent = stripHtml(text);
608
+ div.appendChild(contentSpan);
609
+ div._fullText = text;
610
+ var rereadBtn = document.createElement('button');
611
+ rereadBtn.className = 'voice-reread-btn';
612
+ rereadBtn.title = 'Re-read aloud';
613
+ rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
614
+ rereadBtn.addEventListener('click', function() {
615
+ speak(div._fullText || contentSpan.textContent);
616
+ });
617
+ div.appendChild(rereadBtn);
618
+ }
619
+ container.appendChild(div);
620
+ scrollVoiceToBottom();
621
+ return div;
622
+ }
623
+
624
+ function addVoiceResultBlock(block, autoSpeak) {
625
+ var container = document.getElementById('voiceMessages');
626
+ if (!container) return;
627
+ var emptyMsg = container.querySelector('.voice-empty');
628
+ if (emptyMsg) emptyMsg.remove();
629
+ var div = document.createElement('div');
630
+ div.className = 'voice-block';
631
+ var isError = block.is_error || false;
632
+ var duration = block.duration_ms ? (block.duration_ms / 1000).toFixed(1) + 's' : '';
633
+ var cost = block.total_cost_usd ? '$' + block.total_cost_usd.toFixed(4) : '';
634
+ var resultText = '';
635
+ if (block.result) {
636
+ resultText = typeof block.result === 'string' ? block.result : JSON.stringify(block.result);
637
+ }
638
+ var displayText = stripHtml(resultText);
639
+ var html = '';
640
+ if (displayText) {
641
+ html += '<div>' + escapeHtml(displayText) + '</div>';
642
+ }
643
+ if (duration || cost) {
644
+ html += '<div class="voice-result-stats">';
645
+ if (duration) html += duration;
646
+ if (duration && cost) html += ' | ';
647
+ if (cost) html += cost;
648
+ html += '</div>';
649
+ }
650
+ if (!html) {
651
+ html = isError ? 'Execution failed' : 'Execution complete';
652
+ }
653
+ div.innerHTML = html;
654
+ if (resultText) {
655
+ var rereadBtn = document.createElement('button');
656
+ rereadBtn.className = 'voice-reread-btn';
657
+ rereadBtn.title = 'Re-read aloud';
658
+ rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
659
+ rereadBtn.addEventListener('click', function() {
660
+ speak(resultText);
661
+ });
662
+ div.appendChild(rereadBtn);
663
+ }
664
+ container.appendChild(div);
665
+ scrollVoiceToBottom();
666
+ if (autoSpeak && ttsEnabled && resultText) {
667
+ speak(resultText);
668
+ }
669
+ return div;
670
+ }
671
+
672
+ function scrollVoiceToBottom() {
673
+ var scroll = document.getElementById('voiceScroll');
674
+ if (scroll) {
675
+ requestAnimationFrame(function() {
676
+ scroll.scrollTop = scroll.scrollHeight;
677
+ });
678
+ }
679
+ }
680
+
681
+ function sendVoiceToServer() {
682
+ if (typeof agentGUIClient !== 'undefined' && agentGUIClient && agentGUIClient.wsManager && agentGUIClient.wsManager.isConnected) {
683
+ agentGUIClient.wsManager.sendMessage({ type: 'set_voice', voiceId: selectedVoiceId });
684
+ }
685
+ }
686
+
687
+ function setupStreamingListener() {
688
+ window.addEventListener('ws-message', function(e) {
689
+ var data = e.detail;
690
+ if (!data) return;
691
+ if (data.type === 'tts_audio' && data.audio && data.voiceId === selectedVoiceId) {
692
+ cacheTTSAudio(data.cacheKey, data.audio);
693
+ }
694
+ if (data.type === 'sync_connected') {
695
+ sendVoiceToServer();
696
+ }
697
+ if (!voiceActive) return;
698
+ if (data.type === 'streaming_progress' && data.block) {
699
+ if (data.conversationId && data.conversationId !== currentConversationId) return;
700
+ if (data.seq !== undefined && renderedSeqs.has(data.seq)) return;
701
+ if (data.seq !== undefined) renderedSeqs.add(data.seq);
702
+ handleVoiceBlock(data.block, true);
703
+ }
704
+ if (data.type === 'streaming_start') {
705
+ if (data.conversationId && data.conversationId !== currentConversationId) return;
706
+ spokenChunks = new Set();
707
+ renderedSeqs = new Set();
708
+ _voiceBreakNext = false;
709
+ }
710
+ });
711
+ window.addEventListener('conversation-selected', function(e) {
712
+ var newConversationId = e.detail.conversationId;
713
+ if (currentConversationId && currentConversationId !== newConversationId) {
714
+ unsubscribeFromConversation();
715
+ }
716
+ currentConversationId = newConversationId;
717
+ stopSpeaking();
718
+ spokenChunks = new Set();
719
+ renderedSeqs = new Set();
720
+ if (voiceActive) {
721
+ loadVoiceBlocks(currentConversationId);
722
+ }
723
+ });
724
+ }
725
+
726
+ function handleVoiceBlock(block, isNew) {
727
+ if (!block || !block.type) return;
728
+ if (block.type === 'text' && block.text) {
729
+ var now = Date.now();
730
+ if (_lastVoiceBlockText === block.text && (now - _lastVoiceBlockTime) < 500) {
731
+ return;
732
+ }
733
+ _lastVoiceBlockText = block.text;
734
+ _lastVoiceBlockTime = now;
735
+
736
+ var div = addVoiceBlock(block.text, false);
737
+ if (div && isNew && ttsEnabled) {
738
+ div.classList.add('speaking');
739
+ speak(block.text);
740
+ setTimeout(function() { div.classList.remove('speaking'); }, 2000);
741
+ }
742
+ } else if (block.type === 'result') {
743
+ _voiceBreakNext = true;
744
+ addVoiceResultBlock(block, isNew);
745
+ } else {
746
+ _voiceBreakNext = true;
747
+ }
748
+ }
749
+
750
+ function loadVoiceBlocks(conversationId) {
751
+ var container = document.getElementById('voiceMessages');
752
+ if (!container) return;
753
+ container.innerHTML = '';
754
+ _lastVoiceBlockText = null;
755
+ _lastVoiceBlockTime = 0;
756
+ _voiceBreakNext = false;
757
+ if (!conversationId) {
758
+ showVoiceEmpty(container);
759
+ unsubscribeFromConversation();
760
+ return;
761
+ }
762
+ isLoadingHistory = true;
763
+ subscribeToConversation(conversationId);
764
+ fetch(BASE + '/api/conversations/' + conversationId + '/chunks')
765
+ .then(function(res) { return res.json(); })
766
+ .then(function(data) {
767
+ isLoadingHistory = false;
768
+ if (!data.ok || !Array.isArray(data.chunks) || data.chunks.length === 0) {
769
+ showVoiceEmpty(container);
770
+ return;
771
+ }
772
+ var hasContent = false;
773
+ _voiceBreakNext = false;
774
+ data.chunks.forEach(function(chunk) {
775
+ if (chunk.sequence !== undefined) renderedSeqs.add(chunk.sequence);
776
+ var block = typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data;
777
+ if (!block) return;
778
+ if (block.type === 'text' && block.text) {
779
+ addVoiceBlock(block.text, false);
780
+ hasContent = true;
781
+ } else if (block.type === 'result') {
782
+ _voiceBreakNext = true;
783
+ addVoiceResultBlock(block, false);
784
+ hasContent = true;
785
+ } else {
786
+ _voiceBreakNext = true;
787
+ }
788
+ });
789
+ if (!hasContent) showVoiceEmpty(container);
790
+ })
791
+ .catch(function() {
792
+ isLoadingHistory = false;
793
+ showVoiceEmpty(container);
794
+ });
795
+ }
796
+
797
+ function subscribeToConversation(conversationId) {
798
+ if (!conversationId || typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager) {
799
+ return;
800
+ }
801
+ agentGUIClient.wsManager.sendMessage({ type: 'subscribe', conversationId: conversationId, timestamp: Date.now() });
802
+ }
803
+
804
+ function unsubscribeFromConversation() {
805
+ if (typeof agentGUIClient === 'undefined' || !agentGUIClient || !agentGUIClient.wsManager || !currentConversationId) {
806
+ return;
807
+ }
808
+ agentGUIClient.wsManager.sendMessage({ type: 'unsubscribe', conversationId: currentConversationId, timestamp: Date.now() });
809
+ }
810
+
811
+ function showVoiceEmpty(container) {
812
+ container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Hold the microphone button to record.<br>Release to transcribe. Tap Send to submit.<br>New responses will be read aloud.</div></div>';
813
+ }
814
+
815
+ function activate() {
816
+ voiceActive = true;
817
+ if (currentConversationId) {
818
+ loadVoiceBlocks(currentConversationId);
819
+ } else {
820
+ var container = document.getElementById('voiceMessages');
821
+ if (container && !container.hasChildNodes()) {
822
+ showVoiceEmpty(container);
823
+ }
824
+ }
825
+ }
826
+
827
+ function deactivate() {
828
+ voiceActive = false;
829
+ stopSpeaking();
830
+ unsubscribeFromConversation();
831
+ }
832
+
833
+ function escapeHtml(text) {
834
+ var map = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' };
835
+ return text.replace(/[&<>"']/g, function(c) { return map[c]; });
836
+ }
837
+
838
+ window.voiceModule = {
839
+ activate: activate,
840
+ deactivate: deactivate,
841
+ handleBlock: handleVoiceBlock
842
+ };
843
+
844
+ if (document.readyState === 'loading') {
845
+ document.addEventListener('DOMContentLoaded', init);
846
+ } else {
847
+ init();
848
+ }
849
+ })();