agentgui 1.0.146 → 1.0.148

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.js CHANGED
@@ -1,4 +1,3 @@
1
- import { pipeline, env } from '@huggingface/transformers';
2
1
  import { createRequire } from 'module';
3
2
  import fs from 'fs';
4
3
  import path from 'path';
@@ -13,13 +12,21 @@ const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/transform
13
12
  const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
14
13
  const SAMPLE_RATE_TTS = 16000;
15
14
  const SAMPLE_RATE_STT = 16000;
15
+ const MIN_WAV_SIZE = 44;
16
16
 
17
+ let transformersModule = null;
17
18
  let sttPipeline = null;
18
19
  let ttsPipeline = null;
19
20
  let speakerEmbeddings = null;
20
21
  let sttLoading = false;
21
22
  let ttsLoading = false;
22
23
 
24
+ async function loadTransformers() {
25
+ if (transformersModule) return transformersModule;
26
+ transformersModule = await import('@huggingface/transformers');
27
+ return transformersModule;
28
+ }
29
+
23
30
  function whisperModelPath() {
24
31
  try {
25
32
  const webtalkDir = path.dirname(require.resolve('webtalk'));
@@ -46,10 +53,12 @@ async function getSTT() {
46
53
  if (sttPipeline) return sttPipeline;
47
54
  if (sttLoading) {
48
55
  while (sttLoading) await new Promise(r => setTimeout(r, 100));
56
+ if (!sttPipeline) throw new Error('STT pipeline failed to load');
49
57
  return sttPipeline;
50
58
  }
51
59
  sttLoading = true;
52
60
  try {
61
+ const { pipeline, env } = await loadTransformers();
53
62
  const modelPath = whisperModelPath();
54
63
  const isLocal = !modelPath.includes('/') || fs.existsSync(modelPath);
55
64
  env.allowLocalModels = true;
@@ -60,6 +69,9 @@ async function getSTT() {
60
69
  local_files_only: isLocal,
61
70
  });
62
71
  return sttPipeline;
72
+ } catch (err) {
73
+ sttPipeline = null;
74
+ throw new Error('STT model load failed: ' + err.message);
63
75
  } finally {
64
76
  sttLoading = false;
65
77
  }
@@ -69,10 +81,12 @@ async function getTTS() {
69
81
  if (ttsPipeline) return ttsPipeline;
70
82
  if (ttsLoading) {
71
83
  while (ttsLoading) await new Promise(r => setTimeout(r, 100));
84
+ if (!ttsPipeline) throw new Error('TTS pipeline failed to load');
72
85
  return ttsPipeline;
73
86
  }
74
87
  ttsLoading = true;
75
88
  try {
89
+ const { pipeline, env } = await loadTransformers();
76
90
  env.allowRemoteModels = true;
77
91
  ttsPipeline = await pipeline('text-to-speech', 'Xenova/speecht5_tts', {
78
92
  device: 'cpu',
@@ -80,6 +94,9 @@ async function getTTS() {
80
94
  });
81
95
  await ensureSpeakerEmbeddings();
82
96
  return ttsPipeline;
97
+ } catch (err) {
98
+ ttsPipeline = null;
99
+ throw new Error('TTS model load failed: ' + err.message);
83
100
  } finally {
84
101
  ttsLoading = false;
85
102
  }
@@ -159,18 +176,44 @@ function encodeWav(float32Audio, sampleRate) {
159
176
  }
160
177
 
161
178
  async function transcribe(audioBuffer) {
162
- const stt = await getSTT();
163
- let audio;
164
179
  const buf = Buffer.isBuffer(audioBuffer) ? audioBuffer : Buffer.from(audioBuffer);
180
+ if (buf.length < MIN_WAV_SIZE) {
181
+ throw new Error('Audio too short (' + buf.length + ' bytes)');
182
+ }
183
+ let audio;
165
184
  const isWav = buf.length > 4 && buf.toString('ascii', 0, 4) === 'RIFF';
166
185
  if (isWav) {
167
- const decoded = decodeWavToFloat32(buf);
186
+ let decoded;
187
+ try {
188
+ decoded = decodeWavToFloat32(buf);
189
+ } catch (err) {
190
+ throw new Error('WAV decode failed: ' + err.message);
191
+ }
192
+ if (!decoded.audio || decoded.audio.length === 0) {
193
+ throw new Error('WAV contains no audio samples');
194
+ }
168
195
  audio = resampleTo16k(decoded.audio, decoded.sampleRate);
169
196
  } else {
170
- audio = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
197
+ const sampleCount = Math.floor(buf.byteLength / 4);
198
+ if (sampleCount === 0) throw new Error('Audio buffer too small');
199
+ const aligned = new ArrayBuffer(sampleCount * 4);
200
+ new Uint8Array(aligned).set(buf.subarray(0, sampleCount * 4));
201
+ audio = new Float32Array(aligned);
202
+ }
203
+ if (audio.length < 100) {
204
+ throw new Error('Audio too short for transcription');
205
+ }
206
+ const stt = await getSTT();
207
+ let result;
208
+ try {
209
+ result = await stt(audio);
210
+ } catch (err) {
211
+ throw new Error('Transcription engine error: ' + err.message);
212
+ }
213
+ if (!result || typeof result.text !== 'string') {
214
+ return '';
171
215
  }
172
- const result = await stt(audio);
173
- return result.text || '';
216
+ return result.text;
174
217
  }
175
218
 
176
219
  async function synthesize(text) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.146",
3
+ "version": "1.0.148",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -8,14 +8,18 @@ import { execSync } from 'child_process';
8
8
  import { createRequire } from 'module';
9
9
  import { queries } from './database.js';
10
10
  import { runClaudeWithStreaming } from './lib/claude-runner.js';
11
- import { transcribe, synthesize, getStatus as getSpeechStatus } from './lib/speech.js';
11
+ let speechModule = null;
12
+ async function getSpeech() {
13
+ if (!speechModule) speechModule = await import('./lib/speech.js');
14
+ return speechModule;
15
+ }
12
16
 
13
17
  const require = createRequire(import.meta.url);
14
18
  const express = require('express');
15
19
  const Busboy = require('busboy');
16
20
  const fsbrowse = require('fsbrowse');
17
21
 
18
- const SYSTEM_PROMPT = `Always write your responses in ripple-ui enhanced HTML. Avoid overriding light/dark mode CSS variables. Use all the benefits of HTML to express technical details with proper semantic markup, tables, code blocks, headings, and lists. Write clean, well-structured HTML that respects the existing design system.`;
22
+ const SYSTEM_PROMPT = `Write all responses as clean semantic HTML. Use tags like <h3>, <p>, <ul>, <li>, <ol>, <table>, <code>, <pre>, <strong>, <em>, <a>, <blockquote>, <details>, <summary>. Your HTML will be rendered directly in a styled container that already provides fonts, colors, spacing, and dark mode support. Do not include <html>, <head>, <body>, <style>, or <script> tags. Do not use inline styles unless necessary for layout like tables. Do not use CSS class names. Just write semantic HTML content.`;
19
23
 
20
24
  const activeExecutions = new Map();
21
25
  const messageQueues = new Map();
@@ -327,6 +331,27 @@ const server = http.createServer(async (req, res) => {
327
331
  return;
328
332
  }
329
333
 
334
+ const fullLoadMatch = pathOnly.match(/^\/api\/conversations\/([^/]+)\/full$/);
335
+ if (fullLoadMatch && req.method === 'GET') {
336
+ const conversationId = fullLoadMatch[1];
337
+ const conv = queries.getConversation(conversationId);
338
+ if (!conv) { res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Not found' })); return; }
339
+ const latestSession = queries.getLatestSession(conversationId);
340
+ const isActivelyStreaming = activeExecutions.has(conversationId) ||
341
+ (latestSession && latestSession.status === 'active');
342
+ const chunks = queries.getConversationChunks(conversationId);
343
+ const msgResult = queries.getPaginatedMessages(conversationId, 100, 0);
344
+ res.writeHead(200, { 'Content-Type': 'application/json' });
345
+ res.end(JSON.stringify({
346
+ conversation: conv,
347
+ isActivelyStreaming,
348
+ latestSession,
349
+ chunks,
350
+ messages: msgResult.messages
351
+ }));
352
+ return;
353
+ }
354
+
330
355
  const conversationChunksMatch = pathOnly.match(/^\/api\/conversations\/([^/]+)\/chunks$/);
331
356
  if (conversationChunksMatch && req.method === 'GET') {
332
357
  const conversationId = conversationChunksMatch[1];
@@ -450,13 +475,16 @@ const server = http.createServer(async (req, res) => {
450
475
  res.end(JSON.stringify({ error: 'No audio data' }));
451
476
  return;
452
477
  }
478
+ const { transcribe } = await getSpeech();
453
479
  const text = await transcribe(audioBuffer);
454
480
  res.writeHead(200, { 'Content-Type': 'application/json' });
455
- res.end(JSON.stringify({ text: text.trim() }));
481
+ res.end(JSON.stringify({ text: (text || '').trim() }));
456
482
  } catch (err) {
457
483
  debugLog('[STT] Error: ' + err.message);
458
- res.writeHead(500, { 'Content-Type': 'application/json' });
459
- res.end(JSON.stringify({ error: err.message }));
484
+ if (!res.headersSent) {
485
+ res.writeHead(500, { 'Content-Type': 'application/json' });
486
+ }
487
+ res.end(JSON.stringify({ error: err.message || 'STT failed' }));
460
488
  }
461
489
  return;
462
490
  }
@@ -470,20 +498,29 @@ const server = http.createServer(async (req, res) => {
470
498
  res.end(JSON.stringify({ error: 'No text provided' }));
471
499
  return;
472
500
  }
501
+ const { synthesize } = await getSpeech();
473
502
  const wavBuffer = await synthesize(text);
474
503
  res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
475
504
  res.end(wavBuffer);
476
505
  } catch (err) {
477
506
  debugLog('[TTS] Error: ' + err.message);
478
- res.writeHead(500, { 'Content-Type': 'application/json' });
479
- res.end(JSON.stringify({ error: err.message }));
507
+ if (!res.headersSent) {
508
+ res.writeHead(500, { 'Content-Type': 'application/json' });
509
+ }
510
+ res.end(JSON.stringify({ error: err.message || 'TTS failed' }));
480
511
  }
481
512
  return;
482
513
  }
483
514
 
484
515
  if (routePath === '/api/speech-status' && req.method === 'GET') {
485
- res.writeHead(200, { 'Content-Type': 'application/json' });
486
- res.end(JSON.stringify(getSpeechStatus()));
516
+ try {
517
+ const { getStatus } = await getSpeech();
518
+ res.writeHead(200, { 'Content-Type': 'application/json' });
519
+ res.end(JSON.stringify(getStatus()));
520
+ } catch (err) {
521
+ res.writeHead(200, { 'Content-Type': 'application/json' });
522
+ res.end(JSON.stringify({ sttReady: false, ttsReady: false, sttLoading: false, ttsLoading: false }));
523
+ }
487
524
  return;
488
525
  }
489
526
 
@@ -522,7 +559,7 @@ const server = http.createServer(async (req, res) => {
522
559
  const mimeTypes = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.gif': 'image/gif', '.webp': 'image/webp', '.svg': 'image/svg+xml' };
523
560
  const contentType = mimeTypes[ext] || 'application/octet-stream';
524
561
  const fileContent = fs.readFileSync(normalizedPath);
525
- res.writeHead(200, { 'Content-Type': contentType, 'Cache-Control': 'public, max-age=3600' });
562
+ res.writeHead(200, { 'Content-Type': contentType, 'Cache-Control': 'no-cache' });
526
563
  res.end(fileContent);
527
564
  } catch (err) {
528
565
  res.writeHead(400, { 'Content-Type': 'application/json' });
@@ -574,7 +611,7 @@ function serveFile(filePath, res) {
574
611
  res.writeHead(200, {
575
612
  'Content-Type': contentType,
576
613
  'Content-Length': stats.size,
577
- 'Cache-Control': 'public, max-age=3600'
614
+ 'Cache-Control': 'no-cache, must-revalidate'
578
615
  });
579
616
  fs.createReadStream(filePath).pipe(res);
580
617
  });
package/static/index.html CHANGED
@@ -1106,6 +1106,26 @@
1106
1106
  border-top: 1px solid var(--color-border);
1107
1107
  }
1108
1108
 
1109
+ .voice-reread-btn {
1110
+ position: absolute;
1111
+ top: 0.5rem;
1112
+ right: 0.5rem;
1113
+ background: none;
1114
+ border: 1px solid var(--color-border);
1115
+ border-radius: 0.25rem;
1116
+ cursor: pointer;
1117
+ padding: 0.25rem;
1118
+ color: var(--color-text-secondary);
1119
+ opacity: 0;
1120
+ transition: opacity 0.15s, background-color 0.15s;
1121
+ display: flex;
1122
+ align-items: center;
1123
+ justify-content: center;
1124
+ }
1125
+
1126
+ .voice-block:hover .voice-reread-btn { opacity: 1; }
1127
+ .voice-reread-btn:hover { background: var(--color-bg-primary); color: var(--color-primary); }
1128
+
1109
1129
  /* ===== RESPONSIVE: TABLET ===== */
1110
1130
  @media (min-width: 769px) and (max-width: 1024px) {
1111
1131
  :root { --sidebar-width: 260px; }
@@ -420,7 +420,6 @@ class AgentGUIClient {
420
420
  if (outputEl) {
421
421
  let messagesEl = outputEl.querySelector('.conversation-messages');
422
422
  if (!messagesEl) {
423
- // Load existing conversation history before starting the stream
424
423
  const conv = this.state.currentConversation;
425
424
  const wdInfo = conv?.workingDirectory ? ` - ${this.escapeHtml(conv.workingDirectory)}` : '';
426
425
  outputEl.innerHTML = `
@@ -431,14 +430,16 @@ class AgentGUIClient {
431
430
  <div class="conversation-messages"></div>
432
431
  `;
433
432
  messagesEl = outputEl.querySelector('.conversation-messages');
434
- // Load prior messages into the container
435
433
  try {
436
- const msgResp = await fetch(window.__BASE_URL + `/api/conversations/${data.conversationId}/messages`);
437
- if (msgResp.ok) {
438
- const msgData = await msgResp.json();
439
- const priorChunks = await this.fetchChunks(data.conversationId, 0);
434
+ const fullResp = await fetch(window.__BASE_URL + `/api/conversations/${data.conversationId}/full`);
435
+ if (fullResp.ok) {
436
+ const fullData = await fullResp.json();
437
+ const priorChunks = (fullData.chunks || []).map(c => ({
438
+ ...c,
439
+ block: typeof c.data === 'string' ? JSON.parse(c.data) : c.data
440
+ }));
441
+ const userMsgs = (fullData.messages || []).filter(m => m.role === 'user');
440
442
  if (priorChunks.length > 0) {
441
- const userMsgs = (msgData.messages || []).filter(m => m.role === 'user');
442
443
  const sessionOrder = [];
443
444
  const sessionGroups = {};
444
445
  priorChunks.forEach(c => {
@@ -468,7 +469,7 @@ class AgentGUIClient {
468
469
  messagesEl.insertAdjacentHTML('beforeend', `<div class="message message-user" data-msg-id="${m.id}"><div class="message-role">User</div>${this.renderMessageContent(m.content)}<div class="message-timestamp">${new Date(m.created_at).toLocaleString()}</div></div>`);
469
470
  }
470
471
  } else {
471
- messagesEl.innerHTML = this.renderMessages(msgData.messages || []);
472
+ messagesEl.innerHTML = this.renderMessages(fullData.messages || []);
472
473
  }
473
474
  }
474
475
  } catch (e) {
@@ -516,7 +517,7 @@ class AgentGUIClient {
516
517
  if (block.type === 'text' && block.text) {
517
518
  const text = block.text;
518
519
  if (this.isHtmlContent(text)) {
519
- return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${text}</div>`;
520
+ return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(text)}</div>`;
520
521
  }
521
522
  const parts = this.parseMarkdownCodeBlocks(text);
522
523
  if (parts.length === 1 && parts[0].type === 'text') {
@@ -524,7 +525,7 @@ class AgentGUIClient {
524
525
  }
525
526
  return parts.map(part => {
526
527
  if (part.type === 'html') {
527
- return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${part.content}</div>`;
528
+ return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(part.content)}</div>`;
528
529
  } else if (part.type === 'code') {
529
530
  return this.renderCodeBlock(part.language, part.code);
530
531
  }
@@ -682,9 +683,17 @@ class AgentGUIClient {
682
683
  }
683
684
 
684
685
  isHtmlContent(text) {
685
- const openTag = /<(?:div|table|section|article|form|ul|ol|dl|nav|header|footer|main|aside|figure|details|summary|h[1-6])\b[^>]*>/i;
686
- const closeTag = /<\/(?:div|table|section|article|form|ul|ol|dl|nav|header|footer|main|aside|figure|details|summary|h[1-6])>/i;
687
- return openTag.test(text) && closeTag.test(text);
686
+ const htmlPattern = /<(?:div|table|section|article|ul|ol|dl|nav|header|footer|main|aside|figure|details|summary|h[1-6]|p|blockquote|pre|code|span|strong|em|a|img|br|hr|li|td|tr|th|thead|tbody|tfoot)\b[^>]*>/i;
687
+ return htmlPattern.test(text);
688
+ }
689
+
690
+ sanitizeHtml(html) {
691
+ const dangerous = /<\s*\/?\s*(script|iframe|object|embed|applet|form|input|button|select|textarea)\b[^>]*>/gi;
692
+ let cleaned = html.replace(dangerous, '');
693
+ cleaned = cleaned.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
694
+ cleaned = cleaned.replace(/\s+on\w+\s*=\s*[^\s>]+/gi, '');
695
+ cleaned = cleaned.replace(/javascript\s*:/gi, '');
696
+ return cleaned;
688
697
  }
689
698
 
690
699
  parseMarkdownCodeBlocks(text) {
@@ -735,7 +744,7 @@ class AgentGUIClient {
735
744
  Rendered HTML
736
745
  </div>
737
746
  <div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">
738
- ${code}
747
+ ${this.sanitizeHtml(code)}
739
748
  </div>
740
749
  </div>
741
750
  `;
@@ -751,7 +760,7 @@ class AgentGUIClient {
751
760
  renderMessageContent(content) {
752
761
  if (typeof content === 'string') {
753
762
  if (this.isHtmlContent(content)) {
754
- return `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${content}</div></div>`;
763
+ return `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(content)}</div></div>`;
755
764
  }
756
765
  return `<div class="message-text">${this.escapeHtml(content)}</div>`;
757
766
  } else if (content && typeof content === 'object' && content.type === 'claude_execution') {
@@ -762,7 +771,7 @@ class AgentGUIClient {
762
771
  const parts = this.parseMarkdownCodeBlocks(block.text);
763
772
  parts.forEach(part => {
764
773
  if (part.type === 'html') {
765
- html += `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${part.content}</div></div>`;
774
+ html += `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(part.content)}</div></div>`;
766
775
  } else if (part.type === 'text') {
767
776
  html += `<div class="message-text">${this.escapeHtml(part.content)}</div>`;
768
777
  } else if (part.type === 'code') {
@@ -778,7 +787,7 @@ class AgentGUIClient {
778
787
  Rendered HTML
779
788
  </div>
780
789
  <div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">
781
- ${block.code}
790
+ ${this.sanitizeHtml(block.code)}
782
791
  </div>
783
792
  </div>
784
793
  `;
@@ -1166,135 +1175,64 @@ class AgentGUIClient {
1166
1175
 
1167
1176
  async loadConversationMessages(conversationId) {
1168
1177
  try {
1169
- // Save scroll position of current conversation before switching
1170
1178
  if (this.state.currentConversation?.id) {
1171
1179
  this.saveScrollPosition(this.state.currentConversation.id);
1172
1180
  }
1173
-
1174
- // Stop any existing polling when switching conversations
1175
1181
  this.stopChunkPolling();
1176
-
1177
- // Clear streaming state from previous conversation view
1178
- // (the actual streaming continues on the server, we just stop tracking it on the UI side)
1179
1182
  if (this.state.isStreaming && this.state.currentConversation?.id !== conversationId) {
1180
1183
  this.state.isStreaming = false;
1181
1184
  this.state.currentSession = null;
1182
1185
  }
1183
1186
 
1184
- const convResponse = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}`);
1185
- const { conversation, isActivelyStreaming, latestSession } = await convResponse.json();
1186
- this.state.currentConversation = conversation;
1187
-
1188
- // Update URL with conversation ID
1189
1187
  this.updateUrlForConversation(conversationId);
1190
-
1191
1188
  if (this.wsManager.isConnected) {
1192
1189
  this.wsManager.sendMessage({ type: 'subscribe', conversationId });
1193
1190
  }
1194
1191
 
1195
- // Check if there's an active streaming session that needs to be resumed
1196
- // isActivelyStreaming comes from the server checking both in-memory activeExecutions map
1197
- // AND database session status. Use it as primary signal, with session status as confirmation.
1198
- const shouldResumeStreaming = isActivelyStreaming && latestSession &&
1199
- (latestSession.status === 'active' || latestSession.status === 'pending');
1192
+ const resp = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/full`);
1193
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
1194
+ const { conversation, isActivelyStreaming, latestSession, chunks: rawChunks, messages: allMessages } = await resp.json();
1200
1195
 
1201
- // Try to fetch chunks first (Wave 3 architecture)
1202
- try {
1203
- const chunks = await this.fetchChunks(conversationId, 0);
1204
-
1205
- const outputEl = document.getElementById('output');
1206
- if (outputEl) {
1207
- const wdInfo = conversation.workingDirectory ? ` - ${this.escapeHtml(conversation.workingDirectory)}` : '';
1208
- outputEl.innerHTML = `
1209
- <div class="conversation-header">
1210
- <h2>${this.escapeHtml(conversation.title || 'Conversation')}</h2>
1211
- <p class="text-secondary">${conversation.agentType || 'unknown'} - ${new Date(conversation.created_at).toLocaleDateString()}${wdInfo}</p>
1212
- </div>
1213
- <div class="conversation-messages"></div>
1214
- `;
1215
-
1216
- // Render all chunks
1217
- const messagesEl = outputEl.querySelector('.conversation-messages');
1218
- if (chunks.length > 0) {
1219
- // Fetch user messages to interleave with session chunks
1220
- let userMessages = [];
1221
- try {
1222
- const msgResp = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/messages`);
1223
- if (msgResp.ok) {
1224
- const msgData = await msgResp.json();
1225
- userMessages = (msgData.messages || []).filter(m => m.role === 'user');
1226
- }
1227
- } catch (_) {}
1228
-
1229
- // Group chunks by session, preserving order
1230
- const sessionOrder = [];
1231
- const sessionChunks = {};
1232
- chunks.forEach(chunk => {
1233
- if (!sessionChunks[chunk.sessionId]) {
1234
- sessionChunks[chunk.sessionId] = [];
1235
- sessionOrder.push(chunk.sessionId);
1236
- }
1237
- sessionChunks[chunk.sessionId].push(chunk);
1238
- });
1196
+ this.state.currentConversation = conversation;
1239
1197
 
1240
- // Build a timeline: match user messages to sessions by timestamp
1241
- let userMsgIdx = 0;
1242
- sessionOrder.forEach((sessionId) => {
1243
- const sessionChunkList = sessionChunks[sessionId];
1244
- const sessionStart = sessionChunkList[0].created_at;
1245
-
1246
- // Render user messages that came before this session
1247
- while (userMsgIdx < userMessages.length && userMessages[userMsgIdx].created_at <= sessionStart) {
1248
- const msg = userMessages[userMsgIdx];
1249
- const userDiv = document.createElement('div');
1250
- userDiv.className = 'message message-user';
1251
- userDiv.setAttribute('data-msg-id', msg.id);
1252
- userDiv.innerHTML = `
1253
- <div class="message-role">User</div>
1254
- ${this.renderMessageContent(msg.content)}
1255
- <div class="message-timestamp">${new Date(msg.created_at).toLocaleString()}</div>
1256
- `;
1257
- messagesEl.appendChild(userDiv);
1258
- userMsgIdx++;
1259
- }
1198
+ const chunks = (rawChunks || []).map(chunk => ({
1199
+ ...chunk,
1200
+ block: typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data
1201
+ }));
1202
+ const userMessages = (allMessages || []).filter(m => m.role === 'user');
1260
1203
 
1261
- const isCurrentActiveSession = shouldResumeStreaming && latestSession && latestSession.id === sessionId;
1262
- const messageDiv = document.createElement('div');
1263
- messageDiv.className = `message message-assistant${isCurrentActiveSession ? ' streaming-message' : ''}`;
1264
- messageDiv.id = isCurrentActiveSession ? `streaming-${sessionId}` : `message-${sessionId}`;
1265
- messageDiv.innerHTML = '<div class="message-role">Assistant</div><div class="message-blocks streaming-blocks"></div>';
1266
-
1267
- const blocksEl = messageDiv.querySelector('.message-blocks');
1268
- sessionChunkList.forEach(chunk => {
1269
- if (chunk.block && chunk.block.type) {
1270
- const element = this.renderer.renderBlock(chunk.block, chunk);
1271
- if (element) {
1272
- blocksEl.appendChild(element);
1273
- }
1274
- }
1275
- });
1204
+ const shouldResumeStreaming = isActivelyStreaming && latestSession &&
1205
+ (latestSession.status === 'active' || latestSession.status === 'pending');
1276
1206
 
1277
- if (isCurrentActiveSession) {
1278
- const indicatorDiv = document.createElement('div');
1279
- indicatorDiv.className = 'streaming-indicator';
1280
- indicatorDiv.style = 'display:flex;align-items:center;gap:0.5rem;padding:0.5rem 0;color:var(--color-text-secondary);font-size:0.875rem;';
1281
- indicatorDiv.innerHTML = `
1282
- <span class="animate-spin" style="display:inline-block;width:1rem;height:1rem;border:2px solid var(--color-border);border-top-color:var(--color-primary);border-radius:50%;"></span>
1283
- <span class="streaming-indicator-label">Processing...</span>
1284
- `;
1285
- messageDiv.appendChild(indicatorDiv);
1286
- } else {
1287
- const ts = document.createElement('div');
1288
- ts.className = 'message-timestamp';
1289
- ts.textContent = new Date(sessionChunkList[sessionChunkList.length - 1].created_at).toLocaleString();
1290
- messageDiv.appendChild(ts);
1291
- }
1207
+ const outputEl = document.getElementById('output');
1208
+ if (outputEl) {
1209
+ const wdInfo = conversation.workingDirectory ? ` - ${this.escapeHtml(conversation.workingDirectory)}` : '';
1210
+ outputEl.innerHTML = `
1211
+ <div class="conversation-header">
1212
+ <h2>${this.escapeHtml(conversation.title || 'Conversation')}</h2>
1213
+ <p class="text-secondary">${conversation.agentType || 'unknown'} - ${new Date(conversation.created_at).toLocaleDateString()}${wdInfo}</p>
1214
+ </div>
1215
+ <div class="conversation-messages"></div>
1216
+ `;
1292
1217
 
1293
- messagesEl.appendChild(messageDiv);
1294
- });
1218
+ const messagesEl = outputEl.querySelector('.conversation-messages');
1219
+ if (chunks.length > 0) {
1220
+ const sessionOrder = [];
1221
+ const sessionChunks = {};
1222
+ chunks.forEach(chunk => {
1223
+ if (!sessionChunks[chunk.sessionId]) {
1224
+ sessionChunks[chunk.sessionId] = [];
1225
+ sessionOrder.push(chunk.sessionId);
1226
+ }
1227
+ sessionChunks[chunk.sessionId].push(chunk);
1228
+ });
1229
+
1230
+ let userMsgIdx = 0;
1231
+ sessionOrder.forEach((sessionId) => {
1232
+ const sessionChunkList = sessionChunks[sessionId];
1233
+ const sessionStart = sessionChunkList[0].created_at;
1295
1234
 
1296
- // Render any remaining user messages after the last session
1297
- while (userMsgIdx < userMessages.length) {
1235
+ while (userMsgIdx < userMessages.length && userMessages[userMsgIdx].created_at <= sessionStart) {
1298
1236
  const msg = userMessages[userMsgIdx];
1299
1237
  const userDiv = document.createElement('div');
1300
1238
  userDiv.className = 'message message-user';
@@ -1307,78 +1245,83 @@ class AgentGUIClient {
1307
1245
  messagesEl.appendChild(userDiv);
1308
1246
  userMsgIdx++;
1309
1247
  }
1310
- } else {
1311
- // Fall back to messages if no chunks
1312
- const messagesResponse = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/messages`);
1313
- if (messagesResponse.ok) {
1314
- const messagesData = await messagesResponse.json();
1315
- messagesEl.innerHTML = this.renderMessages(messagesData.messages || []);
1316
- }
1317
- }
1318
1248
 
1319
- // Resume streaming if needed
1320
- if (shouldResumeStreaming && latestSession) {
1321
- console.log('Resuming live streaming for session:', latestSession.id);
1322
-
1323
- // Set streaming state
1324
- this.state.isStreaming = true;
1325
- this.state.currentSession = {
1326
- id: latestSession.id,
1327
- conversationId: conversationId,
1328
- agentId: conversation.agentType || 'claude-code',
1329
- startTime: latestSession.created_at
1330
- };
1331
-
1332
- // Subscribe to WebSocket updates for BOTH conversation and session
1333
- if (this.wsManager.isConnected) {
1334
- this.wsManager.subscribeToSession(latestSession.id);
1335
- this.wsManager.sendMessage({ type: 'subscribe', conversationId });
1336
- }
1249
+ const isCurrentActiveSession = shouldResumeStreaming && latestSession && latestSession.id === sessionId;
1250
+ const messageDiv = document.createElement('div');
1251
+ messageDiv.className = `message message-assistant${isCurrentActiveSession ? ' streaming-message' : ''}`;
1252
+ messageDiv.id = isCurrentActiveSession ? `streaming-${sessionId}` : `message-${sessionId}`;
1253
+ messageDiv.innerHTML = '<div class="message-role">Assistant</div><div class="message-blocks streaming-blocks"></div>';
1254
+
1255
+ const blocksEl = messageDiv.querySelector('.message-blocks');
1256
+ sessionChunkList.forEach(chunk => {
1257
+ if (chunk.block && chunk.block.type) {
1258
+ const element = this.renderer.renderBlock(chunk.block, chunk);
1259
+ if (element) blocksEl.appendChild(element);
1260
+ }
1261
+ });
1337
1262
 
1338
- // Update URL with session ID
1339
- this.updateUrlForConversation(conversationId, latestSession.id);
1263
+ if (isCurrentActiveSession) {
1264
+ const indicatorDiv = document.createElement('div');
1265
+ indicatorDiv.className = 'streaming-indicator';
1266
+ indicatorDiv.style = 'display:flex;align-items:center;gap:0.5rem;padding:0.5rem 0;color:var(--color-text-secondary);font-size:0.875rem;';
1267
+ indicatorDiv.innerHTML = `
1268
+ <span class="animate-spin" style="display:inline-block;width:1rem;height:1rem;border:2px solid var(--color-border);border-top-color:var(--color-primary);border-radius:50%;"></span>
1269
+ <span class="streaming-indicator-label">Processing...</span>
1270
+ `;
1271
+ messageDiv.appendChild(indicatorDiv);
1272
+ } else {
1273
+ const ts = document.createElement('div');
1274
+ ts.className = 'message-timestamp';
1275
+ ts.textContent = new Date(sessionChunkList[sessionChunkList.length - 1].created_at).toLocaleString();
1276
+ messageDiv.appendChild(ts);
1277
+ }
1340
1278
 
1341
- // Get the timestamp of the last chunk to start polling from
1342
- // Use the last chunk's created_at to avoid re-fetching already-rendered chunks
1343
- const lastChunkTime = chunks.length > 0
1344
- ? chunks[chunks.length - 1].created_at
1345
- : 0;
1279
+ messagesEl.appendChild(messageDiv);
1280
+ });
1346
1281
 
1347
- // Start polling for new chunks from where we left off
1348
- this.chunkPollState.lastFetchTimestamp = lastChunkTime;
1349
- this.startChunkPolling(conversationId);
1282
+ while (userMsgIdx < userMessages.length) {
1283
+ const msg = userMessages[userMsgIdx];
1284
+ const userDiv = document.createElement('div');
1285
+ userDiv.className = 'message message-user';
1286
+ userDiv.setAttribute('data-msg-id', msg.id);
1287
+ userDiv.innerHTML = `
1288
+ <div class="message-role">User</div>
1289
+ ${this.renderMessageContent(msg.content)}
1290
+ <div class="message-timestamp">${new Date(msg.created_at).toLocaleString()}</div>
1291
+ `;
1292
+ messagesEl.appendChild(userDiv);
1293
+ userMsgIdx++;
1294
+ }
1295
+ } else {
1296
+ messagesEl.innerHTML = this.renderMessages(allMessages || []);
1297
+ }
1350
1298
 
1351
- // Disable controls while streaming
1352
- this.disableControls();
1299
+ if (shouldResumeStreaming && latestSession) {
1300
+ this.state.isStreaming = true;
1301
+ this.state.currentSession = {
1302
+ id: latestSession.id,
1303
+ conversationId: conversationId,
1304
+ agentId: conversation.agentType || 'claude-code',
1305
+ startTime: latestSession.created_at
1306
+ };
1307
+
1308
+ if (this.wsManager.isConnected) {
1309
+ this.wsManager.subscribeToSession(latestSession.id);
1310
+ this.wsManager.sendMessage({ type: 'subscribe', conversationId });
1353
1311
  }
1354
1312
 
1355
- // Restore scroll position after rendering
1356
- this.restoreScrollPosition(conversationId);
1357
- }
1358
- } catch (chunkError) {
1359
- console.warn('Failed to fetch chunks, falling back to messages:', chunkError);
1360
-
1361
- // Fallback: use messages
1362
- const messagesResponse = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/messages`);
1363
- if (!messagesResponse.ok) throw new Error(`Failed to fetch messages: ${messagesResponse.status}`);
1364
- const messagesData = await messagesResponse.json();
1365
-
1366
- const outputEl = document.getElementById('output');
1367
- if (outputEl) {
1368
- const wdInfo = conversation.workingDirectory ? ` - ${this.escapeHtml(conversation.workingDirectory)}` : '';
1369
- outputEl.innerHTML = `
1370
- <div class="conversation-header">
1371
- <h2>${this.escapeHtml(conversation.title || 'Conversation')}</h2>
1372
- <p class="text-secondary">${conversation.agentType || 'unknown'} - ${new Date(conversation.created_at).toLocaleDateString()}${wdInfo}</p>
1373
- </div>
1374
- <div class="conversation-messages">
1375
- ${this.renderMessages(messagesData.messages || [])}
1376
- </div>
1377
- `;
1378
-
1379
- // Restore scroll position after rendering
1380
- this.restoreScrollPosition(conversationId);
1313
+ this.updateUrlForConversation(conversationId, latestSession.id);
1314
+
1315
+ const lastChunkTime = chunks.length > 0
1316
+ ? chunks[chunks.length - 1].created_at
1317
+ : 0;
1318
+
1319
+ this.chunkPollState.lastFetchTimestamp = lastChunkTime;
1320
+ this.startChunkPolling(conversationId);
1321
+ this.disableControls();
1381
1322
  }
1323
+
1324
+ this.restoreScrollPosition(conversationId);
1382
1325
  }
1383
1326
  } catch (error) {
1384
1327
  console.error('Failed to load conversation messages:', error);
@@ -1399,7 +1342,7 @@ class AgentGUIClient {
1399
1342
 
1400
1343
  if (typeof msg.content === 'string') {
1401
1344
  if (this.isHtmlContent(msg.content)) {
1402
- contentHtml = `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${msg.content}</div></div>`;
1345
+ contentHtml = `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(msg.content)}</div></div>`;
1403
1346
  } else {
1404
1347
  contentHtml = `<div class="message-text">${this.escapeHtml(msg.content)}</div>`;
1405
1348
  }
@@ -362,12 +362,30 @@ class StreamingRenderer {
362
362
  div.className = 'block-text';
363
363
 
364
364
  const text = block.text || '';
365
- const html = this.parseAndRenderMarkdown(text);
366
- div.innerHTML = html;
365
+ if (this.containsHtmlTags(text)) {
366
+ div.innerHTML = this.sanitizeHtml(text);
367
+ div.classList.add('html-content');
368
+ } else {
369
+ div.innerHTML = this.parseAndRenderMarkdown(text);
370
+ }
367
371
 
368
372
  return div;
369
373
  }
370
374
 
375
+ containsHtmlTags(text) {
376
+ const htmlPattern = /<(?:div|table|section|article|ul|ol|dl|nav|header|footer|main|aside|figure|details|summary|h[1-6]|p|blockquote|pre|code|span|strong|em|a|img|br|hr|li|td|tr|th|thead|tbody|tfoot)\b[^>]*>/i;
377
+ return htmlPattern.test(text);
378
+ }
379
+
380
+ sanitizeHtml(html) {
381
+ const dangerous = /<\s*\/?\s*(script|iframe|object|embed|applet|form|input|button|select|textarea)\b[^>]*>/gi;
382
+ let cleaned = html.replace(dangerous, '');
383
+ cleaned = cleaned.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
384
+ cleaned = cleaned.replace(/\s+on\w+\s*=\s*[^\s>]+/gi, '');
385
+ cleaned = cleaned.replace(/javascript\s*:/gi, '');
386
+ return cleaned;
387
+ }
388
+
371
389
  /**
372
390
  * Parse markdown and render links, code, bold, italic
373
391
  */
@@ -1259,7 +1277,7 @@ class StreamingRenderer {
1259
1277
  ${cost ? `<div class="result-stat"><span class="stat-icon">&#128176;</span><span class="stat-value">${this.escapeHtml(cost)}</span><span class="stat-label">cost</span></div>` : ''}
1260
1278
  ${turns ? `<div class="result-stat"><span class="stat-icon">&#128260;</span><span class="stat-value">${this.escapeHtml(String(turns))}</span><span class="stat-label">turns</span></div>` : ''}
1261
1279
  </div>
1262
- ${block.result ? `<div class="result-content">${this.escapeHtml(typeof block.result === 'string' ? block.result : JSON.stringify(block.result, null, 2))}</div>` : ''}
1280
+ ${block.result ? `<div class="result-content">${(() => { const r = typeof block.result === 'string' ? block.result : JSON.stringify(block.result, null, 2); return this.containsHtmlTags(r) ? '<div class="html-content">' + this.sanitizeHtml(r) + '</div>' : this.escapeHtml(r); })()}</div>` : ''}
1263
1281
  `;
1264
1282
 
1265
1283
  return div;
@@ -3,7 +3,6 @@
3
3
  var isRecording = false;
4
4
  var ttsEnabled = true;
5
5
  var voiceActive = false;
6
- var lastSpokenBlockIndex = -1;
7
6
  var currentConversationId = null;
8
7
  var speechQueue = [];
9
8
  var isSpeaking = false;
@@ -13,6 +12,8 @@
13
12
  var scriptNode = null;
14
13
  var recordedChunks = [];
15
14
  var TARGET_SAMPLE_RATE = 16000;
15
+ var spokenChunks = new Set();
16
+ var isLoadingHistory = false;
16
17
 
17
18
  function init() {
18
19
  setupTTSToggle();
@@ -61,14 +62,28 @@
61
62
  var micBtn = document.getElementById('voiceMicBtn');
62
63
  if (micBtn) {
63
64
  micBtn.removeAttribute('disabled');
64
- micBtn.title = 'Click to record';
65
- micBtn.addEventListener('click', function(e) {
65
+ micBtn.title = 'Hold to record';
66
+ micBtn.addEventListener('mousedown', function(e) {
66
67
  e.preventDefault();
67
- if (!isRecording) {
68
- startRecording();
69
- } else {
70
- stopRecording();
71
- }
68
+ startRecording();
69
+ });
70
+ micBtn.addEventListener('mouseup', function(e) {
71
+ e.preventDefault();
72
+ stopRecording();
73
+ });
74
+ micBtn.addEventListener('mouseleave', function(e) {
75
+ if (isRecording) stopRecording();
76
+ });
77
+ micBtn.addEventListener('touchstart', function(e) {
78
+ e.preventDefault();
79
+ startRecording();
80
+ });
81
+ micBtn.addEventListener('touchend', function(e) {
82
+ e.preventDefault();
83
+ stopRecording();
84
+ });
85
+ micBtn.addEventListener('touchcancel', function(e) {
86
+ if (isRecording) stopRecording();
72
87
  });
73
88
  }
74
89
  var sendBtn = document.getElementById('voiceSendBtn');
@@ -92,6 +107,35 @@
92
107
  return result;
93
108
  }
94
109
 
110
+ function encodeWav(float32Audio, sampleRate) {
111
+ var numSamples = float32Audio.length;
112
+ var bytesPerSample = 2;
113
+ var dataSize = numSamples * bytesPerSample;
114
+ var buffer = new ArrayBuffer(44 + dataSize);
115
+ var view = new DataView(buffer);
116
+ function writeStr(off, str) {
117
+ for (var i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i));
118
+ }
119
+ writeStr(0, 'RIFF');
120
+ view.setUint32(4, 36 + dataSize, true);
121
+ writeStr(8, 'WAVE');
122
+ writeStr(12, 'fmt ');
123
+ view.setUint32(16, 16, true);
124
+ view.setUint16(20, 1, true);
125
+ view.setUint16(22, 1, true);
126
+ view.setUint32(24, sampleRate, true);
127
+ view.setUint32(28, sampleRate * bytesPerSample, true);
128
+ view.setUint16(32, bytesPerSample, true);
129
+ view.setUint16(34, 16, true);
130
+ writeStr(36, 'data');
131
+ view.setUint32(40, dataSize, true);
132
+ for (var i = 0; i < numSamples; i++) {
133
+ var s = Math.max(-1, Math.min(1, float32Audio[i]));
134
+ view.setInt16(44 + i * 2, s < 0 ? s * 32768 : s * 32767, true);
135
+ }
136
+ return buffer;
137
+ }
138
+
95
139
  async function startRecording() {
96
140
  if (isRecording) return;
97
141
  var el = document.getElementById('voiceTranscript');
@@ -146,11 +190,11 @@
146
190
  var resampled = resampleBuffer(merged, sourceSampleRate, TARGET_SAMPLE_RATE);
147
191
  if (el) el.textContent = 'Transcribing...';
148
192
  try {
149
- var pcmBuffer = resampled.buffer;
193
+ var wavBuffer = encodeWav(resampled, TARGET_SAMPLE_RATE);
150
194
  var resp = await fetch(BASE + '/api/stt', {
151
195
  method: 'POST',
152
- headers: { 'Content-Type': 'application/octet-stream' },
153
- body: pcmBuffer
196
+ headers: { 'Content-Type': 'audio/wav' },
197
+ body: wavBuffer
154
198
  });
155
199
  var data = await resp.json();
156
200
  if (data.text) {
@@ -240,6 +284,10 @@
240
284
  }
241
285
  }
242
286
 
287
+ function stripHtml(text) {
288
+ return text.replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim();
289
+ }
290
+
243
291
  function addVoiceBlock(text, isUser) {
244
292
  var container = document.getElementById('voiceMessages');
245
293
  if (!container) return;
@@ -247,13 +295,23 @@
247
295
  if (emptyMsg) emptyMsg.remove();
248
296
  var div = document.createElement('div');
249
297
  div.className = 'voice-block' + (isUser ? ' voice-block-user' : '');
250
- div.textContent = text;
298
+ div.textContent = isUser ? text : stripHtml(text);
299
+ if (!isUser) {
300
+ var rereadBtn = document.createElement('button');
301
+ rereadBtn.className = 'voice-reread-btn';
302
+ rereadBtn.title = 'Re-read aloud';
303
+ rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
304
+ rereadBtn.addEventListener('click', function() {
305
+ speak(text);
306
+ });
307
+ div.appendChild(rereadBtn);
308
+ }
251
309
  container.appendChild(div);
252
310
  scrollVoiceToBottom();
253
311
  return div;
254
312
  }
255
313
 
256
- function addVoiceResultBlock(block) {
314
+ function addVoiceResultBlock(block, autoSpeak) {
257
315
  var container = document.getElementById('voiceMessages');
258
316
  if (!container) return;
259
317
  var emptyMsg = container.querySelector('.voice-empty');
@@ -267,9 +325,10 @@
267
325
  if (block.result) {
268
326
  resultText = typeof block.result === 'string' ? block.result : JSON.stringify(block.result);
269
327
  }
328
+ var displayText = stripHtml(resultText);
270
329
  var html = '';
271
- if (resultText) {
272
- html += '<div>' + escapeHtml(resultText) + '</div>';
330
+ if (displayText) {
331
+ html += '<div>' + escapeHtml(displayText) + '</div>';
273
332
  }
274
333
  if (duration || cost) {
275
334
  html += '<div class="voice-result-stats">';
@@ -282,9 +341,19 @@
282
341
  html = isError ? 'Execution failed' : 'Execution complete';
283
342
  }
284
343
  div.innerHTML = html;
344
+ if (resultText) {
345
+ var rereadBtn = document.createElement('button');
346
+ rereadBtn.className = 'voice-reread-btn';
347
+ rereadBtn.title = 'Re-read aloud';
348
+ rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
349
+ rereadBtn.addEventListener('click', function() {
350
+ speak(resultText);
351
+ });
352
+ div.appendChild(rereadBtn);
353
+ }
285
354
  container.appendChild(div);
286
355
  scrollVoiceToBottom();
287
- if (ttsEnabled && resultText) {
356
+ if (autoSpeak && ttsEnabled && resultText) {
288
357
  speak(resultText);
289
358
  }
290
359
  return div;
@@ -305,31 +374,33 @@
305
374
  var data = e.detail;
306
375
  if (!data) return;
307
376
  if (data.type === 'streaming_progress' && data.block) {
308
- handleVoiceBlock(data.block);
377
+ handleVoiceBlock(data.block, true);
309
378
  }
310
379
  if (data.type === 'streaming_start') {
311
- lastSpokenBlockIndex = -1;
380
+ spokenChunks = new Set();
312
381
  }
313
382
  });
314
383
  window.addEventListener('conversation-selected', function(e) {
315
384
  currentConversationId = e.detail.conversationId;
385
+ stopSpeaking();
386
+ spokenChunks = new Set();
316
387
  if (voiceActive) {
317
388
  loadVoiceBlocks(currentConversationId);
318
389
  }
319
390
  });
320
391
  }
321
392
 
322
- function handleVoiceBlock(block) {
393
+ function handleVoiceBlock(block, isNew) {
323
394
  if (!block || !block.type) return;
324
395
  if (block.type === 'text' && block.text) {
325
396
  var div = addVoiceBlock(block.text, false);
326
- if (div && ttsEnabled) {
397
+ if (div && isNew && ttsEnabled) {
327
398
  div.classList.add('speaking');
328
399
  speak(block.text);
329
400
  setTimeout(function() { div.classList.remove('speaking'); }, 2000);
330
401
  }
331
402
  } else if (block.type === 'result') {
332
- addVoiceResultBlock(block);
403
+ addVoiceResultBlock(block, isNew);
333
404
  }
334
405
  }
335
406
 
@@ -341,9 +412,11 @@
341
412
  showVoiceEmpty(container);
342
413
  return;
343
414
  }
415
+ isLoadingHistory = true;
344
416
  fetch(BASE + '/api/conversations/' + conversationId + '/chunks')
345
417
  .then(function(res) { return res.json(); })
346
418
  .then(function(data) {
419
+ isLoadingHistory = false;
347
420
  if (!data.ok || !Array.isArray(data.chunks) || data.chunks.length === 0) {
348
421
  showVoiceEmpty(container);
349
422
  return;
@@ -356,19 +429,20 @@
356
429
  addVoiceBlock(block.text, false);
357
430
  hasContent = true;
358
431
  } else if (block.type === 'result') {
359
- addVoiceResultBlock(block);
432
+ addVoiceResultBlock(block, false);
360
433
  hasContent = true;
361
434
  }
362
435
  });
363
436
  if (!hasContent) showVoiceEmpty(container);
364
437
  })
365
438
  .catch(function() {
439
+ isLoadingHistory = false;
366
440
  showVoiceEmpty(container);
367
441
  });
368
442
  }
369
443
 
370
444
  function showVoiceEmpty(container) {
371
- container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Tap the microphone and speak to send a message.<br>Responses will be read aloud.</div></div>';
445
+ container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Hold the microphone button to record.<br>Release to transcribe. Tap Send to submit.<br>New responses will be read aloud.</div></div>';
372
446
  }
373
447
 
374
448
  function activate() {