agentgui 1.0.146 → 1.0.148
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +50 -7
- package/package.json +1 -1
- package/server.js +48 -11
- package/static/index.html +20 -0
- package/static/js/client.js +134 -191
- package/static/js/streaming-renderer.js +21 -3
- package/static/js/voice.js +97 -23
package/lib/speech.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { pipeline, env } from '@huggingface/transformers';
|
|
2
1
|
import { createRequire } from 'module';
|
|
3
2
|
import fs from 'fs';
|
|
4
3
|
import path from 'path';
|
|
@@ -13,13 +12,21 @@ const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/transform
|
|
|
13
12
|
const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
|
|
14
13
|
const SAMPLE_RATE_TTS = 16000;
|
|
15
14
|
const SAMPLE_RATE_STT = 16000;
|
|
15
|
+
const MIN_WAV_SIZE = 44;
|
|
16
16
|
|
|
17
|
+
let transformersModule = null;
|
|
17
18
|
let sttPipeline = null;
|
|
18
19
|
let ttsPipeline = null;
|
|
19
20
|
let speakerEmbeddings = null;
|
|
20
21
|
let sttLoading = false;
|
|
21
22
|
let ttsLoading = false;
|
|
22
23
|
|
|
24
|
+
async function loadTransformers() {
|
|
25
|
+
if (transformersModule) return transformersModule;
|
|
26
|
+
transformersModule = await import('@huggingface/transformers');
|
|
27
|
+
return transformersModule;
|
|
28
|
+
}
|
|
29
|
+
|
|
23
30
|
function whisperModelPath() {
|
|
24
31
|
try {
|
|
25
32
|
const webtalkDir = path.dirname(require.resolve('webtalk'));
|
|
@@ -46,10 +53,12 @@ async function getSTT() {
|
|
|
46
53
|
if (sttPipeline) return sttPipeline;
|
|
47
54
|
if (sttLoading) {
|
|
48
55
|
while (sttLoading) await new Promise(r => setTimeout(r, 100));
|
|
56
|
+
if (!sttPipeline) throw new Error('STT pipeline failed to load');
|
|
49
57
|
return sttPipeline;
|
|
50
58
|
}
|
|
51
59
|
sttLoading = true;
|
|
52
60
|
try {
|
|
61
|
+
const { pipeline, env } = await loadTransformers();
|
|
53
62
|
const modelPath = whisperModelPath();
|
|
54
63
|
const isLocal = !modelPath.includes('/') || fs.existsSync(modelPath);
|
|
55
64
|
env.allowLocalModels = true;
|
|
@@ -60,6 +69,9 @@ async function getSTT() {
|
|
|
60
69
|
local_files_only: isLocal,
|
|
61
70
|
});
|
|
62
71
|
return sttPipeline;
|
|
72
|
+
} catch (err) {
|
|
73
|
+
sttPipeline = null;
|
|
74
|
+
throw new Error('STT model load failed: ' + err.message);
|
|
63
75
|
} finally {
|
|
64
76
|
sttLoading = false;
|
|
65
77
|
}
|
|
@@ -69,10 +81,12 @@ async function getTTS() {
|
|
|
69
81
|
if (ttsPipeline) return ttsPipeline;
|
|
70
82
|
if (ttsLoading) {
|
|
71
83
|
while (ttsLoading) await new Promise(r => setTimeout(r, 100));
|
|
84
|
+
if (!ttsPipeline) throw new Error('TTS pipeline failed to load');
|
|
72
85
|
return ttsPipeline;
|
|
73
86
|
}
|
|
74
87
|
ttsLoading = true;
|
|
75
88
|
try {
|
|
89
|
+
const { pipeline, env } = await loadTransformers();
|
|
76
90
|
env.allowRemoteModels = true;
|
|
77
91
|
ttsPipeline = await pipeline('text-to-speech', 'Xenova/speecht5_tts', {
|
|
78
92
|
device: 'cpu',
|
|
@@ -80,6 +94,9 @@ async function getTTS() {
|
|
|
80
94
|
});
|
|
81
95
|
await ensureSpeakerEmbeddings();
|
|
82
96
|
return ttsPipeline;
|
|
97
|
+
} catch (err) {
|
|
98
|
+
ttsPipeline = null;
|
|
99
|
+
throw new Error('TTS model load failed: ' + err.message);
|
|
83
100
|
} finally {
|
|
84
101
|
ttsLoading = false;
|
|
85
102
|
}
|
|
@@ -159,18 +176,44 @@ function encodeWav(float32Audio, sampleRate) {
|
|
|
159
176
|
}
|
|
160
177
|
|
|
161
178
|
async function transcribe(audioBuffer) {
|
|
162
|
-
const stt = await getSTT();
|
|
163
|
-
let audio;
|
|
164
179
|
const buf = Buffer.isBuffer(audioBuffer) ? audioBuffer : Buffer.from(audioBuffer);
|
|
180
|
+
if (buf.length < MIN_WAV_SIZE) {
|
|
181
|
+
throw new Error('Audio too short (' + buf.length + ' bytes)');
|
|
182
|
+
}
|
|
183
|
+
let audio;
|
|
165
184
|
const isWav = buf.length > 4 && buf.toString('ascii', 0, 4) === 'RIFF';
|
|
166
185
|
if (isWav) {
|
|
167
|
-
|
|
186
|
+
let decoded;
|
|
187
|
+
try {
|
|
188
|
+
decoded = decodeWavToFloat32(buf);
|
|
189
|
+
} catch (err) {
|
|
190
|
+
throw new Error('WAV decode failed: ' + err.message);
|
|
191
|
+
}
|
|
192
|
+
if (!decoded.audio || decoded.audio.length === 0) {
|
|
193
|
+
throw new Error('WAV contains no audio samples');
|
|
194
|
+
}
|
|
168
195
|
audio = resampleTo16k(decoded.audio, decoded.sampleRate);
|
|
169
196
|
} else {
|
|
170
|
-
|
|
197
|
+
const sampleCount = Math.floor(buf.byteLength / 4);
|
|
198
|
+
if (sampleCount === 0) throw new Error('Audio buffer too small');
|
|
199
|
+
const aligned = new ArrayBuffer(sampleCount * 4);
|
|
200
|
+
new Uint8Array(aligned).set(buf.subarray(0, sampleCount * 4));
|
|
201
|
+
audio = new Float32Array(aligned);
|
|
202
|
+
}
|
|
203
|
+
if (audio.length < 100) {
|
|
204
|
+
throw new Error('Audio too short for transcription');
|
|
205
|
+
}
|
|
206
|
+
const stt = await getSTT();
|
|
207
|
+
let result;
|
|
208
|
+
try {
|
|
209
|
+
result = await stt(audio);
|
|
210
|
+
} catch (err) {
|
|
211
|
+
throw new Error('Transcription engine error: ' + err.message);
|
|
212
|
+
}
|
|
213
|
+
if (!result || typeof result.text !== 'string') {
|
|
214
|
+
return '';
|
|
171
215
|
}
|
|
172
|
-
|
|
173
|
-
return result.text || '';
|
|
216
|
+
return result.text;
|
|
174
217
|
}
|
|
175
218
|
|
|
176
219
|
async function synthesize(text) {
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -8,14 +8,18 @@ import { execSync } from 'child_process';
|
|
|
8
8
|
import { createRequire } from 'module';
|
|
9
9
|
import { queries } from './database.js';
|
|
10
10
|
import { runClaudeWithStreaming } from './lib/claude-runner.js';
|
|
11
|
-
|
|
11
|
+
let speechModule = null;
|
|
12
|
+
async function getSpeech() {
|
|
13
|
+
if (!speechModule) speechModule = await import('./lib/speech.js');
|
|
14
|
+
return speechModule;
|
|
15
|
+
}
|
|
12
16
|
|
|
13
17
|
const require = createRequire(import.meta.url);
|
|
14
18
|
const express = require('express');
|
|
15
19
|
const Busboy = require('busboy');
|
|
16
20
|
const fsbrowse = require('fsbrowse');
|
|
17
21
|
|
|
18
|
-
const SYSTEM_PROMPT = `
|
|
22
|
+
const SYSTEM_PROMPT = `Write all responses as clean semantic HTML. Use tags like <h3>, <p>, <ul>, <li>, <ol>, <table>, <code>, <pre>, <strong>, <em>, <a>, <blockquote>, <details>, <summary>. Your HTML will be rendered directly in a styled container that already provides fonts, colors, spacing, and dark mode support. Do not include <html>, <head>, <body>, <style>, or <script> tags. Do not use inline styles unless necessary for layout like tables. Do not use CSS class names. Just write semantic HTML content.`;
|
|
19
23
|
|
|
20
24
|
const activeExecutions = new Map();
|
|
21
25
|
const messageQueues = new Map();
|
|
@@ -327,6 +331,27 @@ const server = http.createServer(async (req, res) => {
|
|
|
327
331
|
return;
|
|
328
332
|
}
|
|
329
333
|
|
|
334
|
+
const fullLoadMatch = pathOnly.match(/^\/api\/conversations\/([^/]+)\/full$/);
|
|
335
|
+
if (fullLoadMatch && req.method === 'GET') {
|
|
336
|
+
const conversationId = fullLoadMatch[1];
|
|
337
|
+
const conv = queries.getConversation(conversationId);
|
|
338
|
+
if (!conv) { res.writeHead(404, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: 'Not found' })); return; }
|
|
339
|
+
const latestSession = queries.getLatestSession(conversationId);
|
|
340
|
+
const isActivelyStreaming = activeExecutions.has(conversationId) ||
|
|
341
|
+
(latestSession && latestSession.status === 'active');
|
|
342
|
+
const chunks = queries.getConversationChunks(conversationId);
|
|
343
|
+
const msgResult = queries.getPaginatedMessages(conversationId, 100, 0);
|
|
344
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
345
|
+
res.end(JSON.stringify({
|
|
346
|
+
conversation: conv,
|
|
347
|
+
isActivelyStreaming,
|
|
348
|
+
latestSession,
|
|
349
|
+
chunks,
|
|
350
|
+
messages: msgResult.messages
|
|
351
|
+
}));
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
|
|
330
355
|
const conversationChunksMatch = pathOnly.match(/^\/api\/conversations\/([^/]+)\/chunks$/);
|
|
331
356
|
if (conversationChunksMatch && req.method === 'GET') {
|
|
332
357
|
const conversationId = conversationChunksMatch[1];
|
|
@@ -450,13 +475,16 @@ const server = http.createServer(async (req, res) => {
|
|
|
450
475
|
res.end(JSON.stringify({ error: 'No audio data' }));
|
|
451
476
|
return;
|
|
452
477
|
}
|
|
478
|
+
const { transcribe } = await getSpeech();
|
|
453
479
|
const text = await transcribe(audioBuffer);
|
|
454
480
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
455
|
-
res.end(JSON.stringify({ text: text.trim() }));
|
|
481
|
+
res.end(JSON.stringify({ text: (text || '').trim() }));
|
|
456
482
|
} catch (err) {
|
|
457
483
|
debugLog('[STT] Error: ' + err.message);
|
|
458
|
-
res.
|
|
459
|
-
|
|
484
|
+
if (!res.headersSent) {
|
|
485
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
486
|
+
}
|
|
487
|
+
res.end(JSON.stringify({ error: err.message || 'STT failed' }));
|
|
460
488
|
}
|
|
461
489
|
return;
|
|
462
490
|
}
|
|
@@ -470,20 +498,29 @@ const server = http.createServer(async (req, res) => {
|
|
|
470
498
|
res.end(JSON.stringify({ error: 'No text provided' }));
|
|
471
499
|
return;
|
|
472
500
|
}
|
|
501
|
+
const { synthesize } = await getSpeech();
|
|
473
502
|
const wavBuffer = await synthesize(text);
|
|
474
503
|
res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
|
|
475
504
|
res.end(wavBuffer);
|
|
476
505
|
} catch (err) {
|
|
477
506
|
debugLog('[TTS] Error: ' + err.message);
|
|
478
|
-
res.
|
|
479
|
-
|
|
507
|
+
if (!res.headersSent) {
|
|
508
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
509
|
+
}
|
|
510
|
+
res.end(JSON.stringify({ error: err.message || 'TTS failed' }));
|
|
480
511
|
}
|
|
481
512
|
return;
|
|
482
513
|
}
|
|
483
514
|
|
|
484
515
|
if (routePath === '/api/speech-status' && req.method === 'GET') {
|
|
485
|
-
|
|
486
|
-
|
|
516
|
+
try {
|
|
517
|
+
const { getStatus } = await getSpeech();
|
|
518
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
519
|
+
res.end(JSON.stringify(getStatus()));
|
|
520
|
+
} catch (err) {
|
|
521
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
522
|
+
res.end(JSON.stringify({ sttReady: false, ttsReady: false, sttLoading: false, ttsLoading: false }));
|
|
523
|
+
}
|
|
487
524
|
return;
|
|
488
525
|
}
|
|
489
526
|
|
|
@@ -522,7 +559,7 @@ const server = http.createServer(async (req, res) => {
|
|
|
522
559
|
const mimeTypes = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.gif': 'image/gif', '.webp': 'image/webp', '.svg': 'image/svg+xml' };
|
|
523
560
|
const contentType = mimeTypes[ext] || 'application/octet-stream';
|
|
524
561
|
const fileContent = fs.readFileSync(normalizedPath);
|
|
525
|
-
res.writeHead(200, { 'Content-Type': contentType, 'Cache-Control': '
|
|
562
|
+
res.writeHead(200, { 'Content-Type': contentType, 'Cache-Control': 'no-cache' });
|
|
526
563
|
res.end(fileContent);
|
|
527
564
|
} catch (err) {
|
|
528
565
|
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
@@ -574,7 +611,7 @@ function serveFile(filePath, res) {
|
|
|
574
611
|
res.writeHead(200, {
|
|
575
612
|
'Content-Type': contentType,
|
|
576
613
|
'Content-Length': stats.size,
|
|
577
|
-
'Cache-Control': '
|
|
614
|
+
'Cache-Control': 'no-cache, must-revalidate'
|
|
578
615
|
});
|
|
579
616
|
fs.createReadStream(filePath).pipe(res);
|
|
580
617
|
});
|
package/static/index.html
CHANGED
|
@@ -1106,6 +1106,26 @@
|
|
|
1106
1106
|
border-top: 1px solid var(--color-border);
|
|
1107
1107
|
}
|
|
1108
1108
|
|
|
1109
|
+
.voice-reread-btn {
|
|
1110
|
+
position: absolute;
|
|
1111
|
+
top: 0.5rem;
|
|
1112
|
+
right: 0.5rem;
|
|
1113
|
+
background: none;
|
|
1114
|
+
border: 1px solid var(--color-border);
|
|
1115
|
+
border-radius: 0.25rem;
|
|
1116
|
+
cursor: pointer;
|
|
1117
|
+
padding: 0.25rem;
|
|
1118
|
+
color: var(--color-text-secondary);
|
|
1119
|
+
opacity: 0;
|
|
1120
|
+
transition: opacity 0.15s, background-color 0.15s;
|
|
1121
|
+
display: flex;
|
|
1122
|
+
align-items: center;
|
|
1123
|
+
justify-content: center;
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
.voice-block:hover .voice-reread-btn { opacity: 1; }
|
|
1127
|
+
.voice-reread-btn:hover { background: var(--color-bg-primary); color: var(--color-primary); }
|
|
1128
|
+
|
|
1109
1129
|
/* ===== RESPONSIVE: TABLET ===== */
|
|
1110
1130
|
@media (min-width: 769px) and (max-width: 1024px) {
|
|
1111
1131
|
:root { --sidebar-width: 260px; }
|
package/static/js/client.js
CHANGED
|
@@ -420,7 +420,6 @@ class AgentGUIClient {
|
|
|
420
420
|
if (outputEl) {
|
|
421
421
|
let messagesEl = outputEl.querySelector('.conversation-messages');
|
|
422
422
|
if (!messagesEl) {
|
|
423
|
-
// Load existing conversation history before starting the stream
|
|
424
423
|
const conv = this.state.currentConversation;
|
|
425
424
|
const wdInfo = conv?.workingDirectory ? ` - ${this.escapeHtml(conv.workingDirectory)}` : '';
|
|
426
425
|
outputEl.innerHTML = `
|
|
@@ -431,14 +430,16 @@ class AgentGUIClient {
|
|
|
431
430
|
<div class="conversation-messages"></div>
|
|
432
431
|
`;
|
|
433
432
|
messagesEl = outputEl.querySelector('.conversation-messages');
|
|
434
|
-
// Load prior messages into the container
|
|
435
433
|
try {
|
|
436
|
-
const
|
|
437
|
-
if (
|
|
438
|
-
const
|
|
439
|
-
const priorChunks =
|
|
434
|
+
const fullResp = await fetch(window.__BASE_URL + `/api/conversations/${data.conversationId}/full`);
|
|
435
|
+
if (fullResp.ok) {
|
|
436
|
+
const fullData = await fullResp.json();
|
|
437
|
+
const priorChunks = (fullData.chunks || []).map(c => ({
|
|
438
|
+
...c,
|
|
439
|
+
block: typeof c.data === 'string' ? JSON.parse(c.data) : c.data
|
|
440
|
+
}));
|
|
441
|
+
const userMsgs = (fullData.messages || []).filter(m => m.role === 'user');
|
|
440
442
|
if (priorChunks.length > 0) {
|
|
441
|
-
const userMsgs = (msgData.messages || []).filter(m => m.role === 'user');
|
|
442
443
|
const sessionOrder = [];
|
|
443
444
|
const sessionGroups = {};
|
|
444
445
|
priorChunks.forEach(c => {
|
|
@@ -468,7 +469,7 @@ class AgentGUIClient {
|
|
|
468
469
|
messagesEl.insertAdjacentHTML('beforeend', `<div class="message message-user" data-msg-id="${m.id}"><div class="message-role">User</div>${this.renderMessageContent(m.content)}<div class="message-timestamp">${new Date(m.created_at).toLocaleString()}</div></div>`);
|
|
469
470
|
}
|
|
470
471
|
} else {
|
|
471
|
-
messagesEl.innerHTML = this.renderMessages(
|
|
472
|
+
messagesEl.innerHTML = this.renderMessages(fullData.messages || []);
|
|
472
473
|
}
|
|
473
474
|
}
|
|
474
475
|
} catch (e) {
|
|
@@ -516,7 +517,7 @@ class AgentGUIClient {
|
|
|
516
517
|
if (block.type === 'text' && block.text) {
|
|
517
518
|
const text = block.text;
|
|
518
519
|
if (this.isHtmlContent(text)) {
|
|
519
|
-
return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${text}</div>`;
|
|
520
|
+
return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(text)}</div>`;
|
|
520
521
|
}
|
|
521
522
|
const parts = this.parseMarkdownCodeBlocks(text);
|
|
522
523
|
if (parts.length === 1 && parts[0].type === 'text') {
|
|
@@ -524,7 +525,7 @@ class AgentGUIClient {
|
|
|
524
525
|
}
|
|
525
526
|
return parts.map(part => {
|
|
526
527
|
if (part.type === 'html') {
|
|
527
|
-
return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${part.content}</div>`;
|
|
528
|
+
return `<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(part.content)}</div>`;
|
|
528
529
|
} else if (part.type === 'code') {
|
|
529
530
|
return this.renderCodeBlock(part.language, part.code);
|
|
530
531
|
}
|
|
@@ -682,9 +683,17 @@ class AgentGUIClient {
|
|
|
682
683
|
}
|
|
683
684
|
|
|
684
685
|
isHtmlContent(text) {
|
|
685
|
-
const
|
|
686
|
-
|
|
687
|
-
|
|
686
|
+
const htmlPattern = /<(?:div|table|section|article|ul|ol|dl|nav|header|footer|main|aside|figure|details|summary|h[1-6]|p|blockquote|pre|code|span|strong|em|a|img|br|hr|li|td|tr|th|thead|tbody|tfoot)\b[^>]*>/i;
|
|
687
|
+
return htmlPattern.test(text);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
sanitizeHtml(html) {
|
|
691
|
+
const dangerous = /<\s*\/?\s*(script|iframe|object|embed|applet|form|input|button|select|textarea)\b[^>]*>/gi;
|
|
692
|
+
let cleaned = html.replace(dangerous, '');
|
|
693
|
+
cleaned = cleaned.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
|
|
694
|
+
cleaned = cleaned.replace(/\s+on\w+\s*=\s*[^\s>]+/gi, '');
|
|
695
|
+
cleaned = cleaned.replace(/javascript\s*:/gi, '');
|
|
696
|
+
return cleaned;
|
|
688
697
|
}
|
|
689
698
|
|
|
690
699
|
parseMarkdownCodeBlocks(text) {
|
|
@@ -735,7 +744,7 @@ class AgentGUIClient {
|
|
|
735
744
|
Rendered HTML
|
|
736
745
|
</div>
|
|
737
746
|
<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">
|
|
738
|
-
${code}
|
|
747
|
+
${this.sanitizeHtml(code)}
|
|
739
748
|
</div>
|
|
740
749
|
</div>
|
|
741
750
|
`;
|
|
@@ -751,7 +760,7 @@ class AgentGUIClient {
|
|
|
751
760
|
renderMessageContent(content) {
|
|
752
761
|
if (typeof content === 'string') {
|
|
753
762
|
if (this.isHtmlContent(content)) {
|
|
754
|
-
return `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${content}</div></div>`;
|
|
763
|
+
return `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(content)}</div></div>`;
|
|
755
764
|
}
|
|
756
765
|
return `<div class="message-text">${this.escapeHtml(content)}</div>`;
|
|
757
766
|
} else if (content && typeof content === 'object' && content.type === 'claude_execution') {
|
|
@@ -762,7 +771,7 @@ class AgentGUIClient {
|
|
|
762
771
|
const parts = this.parseMarkdownCodeBlocks(block.text);
|
|
763
772
|
parts.forEach(part => {
|
|
764
773
|
if (part.type === 'html') {
|
|
765
|
-
html += `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${part.content}</div></div>`;
|
|
774
|
+
html += `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(part.content)}</div></div>`;
|
|
766
775
|
} else if (part.type === 'text') {
|
|
767
776
|
html += `<div class="message-text">${this.escapeHtml(part.content)}</div>`;
|
|
768
777
|
} else if (part.type === 'code') {
|
|
@@ -778,7 +787,7 @@ class AgentGUIClient {
|
|
|
778
787
|
Rendered HTML
|
|
779
788
|
</div>
|
|
780
789
|
<div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">
|
|
781
|
-
${block.code}
|
|
790
|
+
${this.sanitizeHtml(block.code)}
|
|
782
791
|
</div>
|
|
783
792
|
</div>
|
|
784
793
|
`;
|
|
@@ -1166,135 +1175,64 @@ class AgentGUIClient {
|
|
|
1166
1175
|
|
|
1167
1176
|
async loadConversationMessages(conversationId) {
|
|
1168
1177
|
try {
|
|
1169
|
-
// Save scroll position of current conversation before switching
|
|
1170
1178
|
if (this.state.currentConversation?.id) {
|
|
1171
1179
|
this.saveScrollPosition(this.state.currentConversation.id);
|
|
1172
1180
|
}
|
|
1173
|
-
|
|
1174
|
-
// Stop any existing polling when switching conversations
|
|
1175
1181
|
this.stopChunkPolling();
|
|
1176
|
-
|
|
1177
|
-
// Clear streaming state from previous conversation view
|
|
1178
|
-
// (the actual streaming continues on the server, we just stop tracking it on the UI side)
|
|
1179
1182
|
if (this.state.isStreaming && this.state.currentConversation?.id !== conversationId) {
|
|
1180
1183
|
this.state.isStreaming = false;
|
|
1181
1184
|
this.state.currentSession = null;
|
|
1182
1185
|
}
|
|
1183
1186
|
|
|
1184
|
-
const convResponse = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}`);
|
|
1185
|
-
const { conversation, isActivelyStreaming, latestSession } = await convResponse.json();
|
|
1186
|
-
this.state.currentConversation = conversation;
|
|
1187
|
-
|
|
1188
|
-
// Update URL with conversation ID
|
|
1189
1187
|
this.updateUrlForConversation(conversationId);
|
|
1190
|
-
|
|
1191
1188
|
if (this.wsManager.isConnected) {
|
|
1192
1189
|
this.wsManager.sendMessage({ type: 'subscribe', conversationId });
|
|
1193
1190
|
}
|
|
1194
1191
|
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
const shouldResumeStreaming = isActivelyStreaming && latestSession &&
|
|
1199
|
-
(latestSession.status === 'active' || latestSession.status === 'pending');
|
|
1192
|
+
const resp = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/full`);
|
|
1193
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
1194
|
+
const { conversation, isActivelyStreaming, latestSession, chunks: rawChunks, messages: allMessages } = await resp.json();
|
|
1200
1195
|
|
|
1201
|
-
|
|
1202
|
-
try {
|
|
1203
|
-
const chunks = await this.fetchChunks(conversationId, 0);
|
|
1204
|
-
|
|
1205
|
-
const outputEl = document.getElementById('output');
|
|
1206
|
-
if (outputEl) {
|
|
1207
|
-
const wdInfo = conversation.workingDirectory ? ` - ${this.escapeHtml(conversation.workingDirectory)}` : '';
|
|
1208
|
-
outputEl.innerHTML = `
|
|
1209
|
-
<div class="conversation-header">
|
|
1210
|
-
<h2>${this.escapeHtml(conversation.title || 'Conversation')}</h2>
|
|
1211
|
-
<p class="text-secondary">${conversation.agentType || 'unknown'} - ${new Date(conversation.created_at).toLocaleDateString()}${wdInfo}</p>
|
|
1212
|
-
</div>
|
|
1213
|
-
<div class="conversation-messages"></div>
|
|
1214
|
-
`;
|
|
1215
|
-
|
|
1216
|
-
// Render all chunks
|
|
1217
|
-
const messagesEl = outputEl.querySelector('.conversation-messages');
|
|
1218
|
-
if (chunks.length > 0) {
|
|
1219
|
-
// Fetch user messages to interleave with session chunks
|
|
1220
|
-
let userMessages = [];
|
|
1221
|
-
try {
|
|
1222
|
-
const msgResp = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/messages`);
|
|
1223
|
-
if (msgResp.ok) {
|
|
1224
|
-
const msgData = await msgResp.json();
|
|
1225
|
-
userMessages = (msgData.messages || []).filter(m => m.role === 'user');
|
|
1226
|
-
}
|
|
1227
|
-
} catch (_) {}
|
|
1228
|
-
|
|
1229
|
-
// Group chunks by session, preserving order
|
|
1230
|
-
const sessionOrder = [];
|
|
1231
|
-
const sessionChunks = {};
|
|
1232
|
-
chunks.forEach(chunk => {
|
|
1233
|
-
if (!sessionChunks[chunk.sessionId]) {
|
|
1234
|
-
sessionChunks[chunk.sessionId] = [];
|
|
1235
|
-
sessionOrder.push(chunk.sessionId);
|
|
1236
|
-
}
|
|
1237
|
-
sessionChunks[chunk.sessionId].push(chunk);
|
|
1238
|
-
});
|
|
1196
|
+
this.state.currentConversation = conversation;
|
|
1239
1197
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
// Render user messages that came before this session
|
|
1247
|
-
while (userMsgIdx < userMessages.length && userMessages[userMsgIdx].created_at <= sessionStart) {
|
|
1248
|
-
const msg = userMessages[userMsgIdx];
|
|
1249
|
-
const userDiv = document.createElement('div');
|
|
1250
|
-
userDiv.className = 'message message-user';
|
|
1251
|
-
userDiv.setAttribute('data-msg-id', msg.id);
|
|
1252
|
-
userDiv.innerHTML = `
|
|
1253
|
-
<div class="message-role">User</div>
|
|
1254
|
-
${this.renderMessageContent(msg.content)}
|
|
1255
|
-
<div class="message-timestamp">${new Date(msg.created_at).toLocaleString()}</div>
|
|
1256
|
-
`;
|
|
1257
|
-
messagesEl.appendChild(userDiv);
|
|
1258
|
-
userMsgIdx++;
|
|
1259
|
-
}
|
|
1198
|
+
const chunks = (rawChunks || []).map(chunk => ({
|
|
1199
|
+
...chunk,
|
|
1200
|
+
block: typeof chunk.data === 'string' ? JSON.parse(chunk.data) : chunk.data
|
|
1201
|
+
}));
|
|
1202
|
+
const userMessages = (allMessages || []).filter(m => m.role === 'user');
|
|
1260
1203
|
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
messageDiv.className = `message message-assistant${isCurrentActiveSession ? ' streaming-message' : ''}`;
|
|
1264
|
-
messageDiv.id = isCurrentActiveSession ? `streaming-${sessionId}` : `message-${sessionId}`;
|
|
1265
|
-
messageDiv.innerHTML = '<div class="message-role">Assistant</div><div class="message-blocks streaming-blocks"></div>';
|
|
1266
|
-
|
|
1267
|
-
const blocksEl = messageDiv.querySelector('.message-blocks');
|
|
1268
|
-
sessionChunkList.forEach(chunk => {
|
|
1269
|
-
if (chunk.block && chunk.block.type) {
|
|
1270
|
-
const element = this.renderer.renderBlock(chunk.block, chunk);
|
|
1271
|
-
if (element) {
|
|
1272
|
-
blocksEl.appendChild(element);
|
|
1273
|
-
}
|
|
1274
|
-
}
|
|
1275
|
-
});
|
|
1204
|
+
const shouldResumeStreaming = isActivelyStreaming && latestSession &&
|
|
1205
|
+
(latestSession.status === 'active' || latestSession.status === 'pending');
|
|
1276
1206
|
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
const ts = document.createElement('div');
|
|
1288
|
-
ts.className = 'message-timestamp';
|
|
1289
|
-
ts.textContent = new Date(sessionChunkList[sessionChunkList.length - 1].created_at).toLocaleString();
|
|
1290
|
-
messageDiv.appendChild(ts);
|
|
1291
|
-
}
|
|
1207
|
+
const outputEl = document.getElementById('output');
|
|
1208
|
+
if (outputEl) {
|
|
1209
|
+
const wdInfo = conversation.workingDirectory ? ` - ${this.escapeHtml(conversation.workingDirectory)}` : '';
|
|
1210
|
+
outputEl.innerHTML = `
|
|
1211
|
+
<div class="conversation-header">
|
|
1212
|
+
<h2>${this.escapeHtml(conversation.title || 'Conversation')}</h2>
|
|
1213
|
+
<p class="text-secondary">${conversation.agentType || 'unknown'} - ${new Date(conversation.created_at).toLocaleDateString()}${wdInfo}</p>
|
|
1214
|
+
</div>
|
|
1215
|
+
<div class="conversation-messages"></div>
|
|
1216
|
+
`;
|
|
1292
1217
|
|
|
1293
|
-
|
|
1294
|
-
|
|
1218
|
+
const messagesEl = outputEl.querySelector('.conversation-messages');
|
|
1219
|
+
if (chunks.length > 0) {
|
|
1220
|
+
const sessionOrder = [];
|
|
1221
|
+
const sessionChunks = {};
|
|
1222
|
+
chunks.forEach(chunk => {
|
|
1223
|
+
if (!sessionChunks[chunk.sessionId]) {
|
|
1224
|
+
sessionChunks[chunk.sessionId] = [];
|
|
1225
|
+
sessionOrder.push(chunk.sessionId);
|
|
1226
|
+
}
|
|
1227
|
+
sessionChunks[chunk.sessionId].push(chunk);
|
|
1228
|
+
});
|
|
1229
|
+
|
|
1230
|
+
let userMsgIdx = 0;
|
|
1231
|
+
sessionOrder.forEach((sessionId) => {
|
|
1232
|
+
const sessionChunkList = sessionChunks[sessionId];
|
|
1233
|
+
const sessionStart = sessionChunkList[0].created_at;
|
|
1295
1234
|
|
|
1296
|
-
|
|
1297
|
-
while (userMsgIdx < userMessages.length) {
|
|
1235
|
+
while (userMsgIdx < userMessages.length && userMessages[userMsgIdx].created_at <= sessionStart) {
|
|
1298
1236
|
const msg = userMessages[userMsgIdx];
|
|
1299
1237
|
const userDiv = document.createElement('div');
|
|
1300
1238
|
userDiv.className = 'message message-user';
|
|
@@ -1307,78 +1245,83 @@ class AgentGUIClient {
|
|
|
1307
1245
|
messagesEl.appendChild(userDiv);
|
|
1308
1246
|
userMsgIdx++;
|
|
1309
1247
|
}
|
|
1310
|
-
} else {
|
|
1311
|
-
// Fall back to messages if no chunks
|
|
1312
|
-
const messagesResponse = await fetch(window.__BASE_URL + `/api/conversations/${conversationId}/messages`);
|
|
1313
|
-
if (messagesResponse.ok) {
|
|
1314
|
-
const messagesData = await messagesResponse.json();
|
|
1315
|
-
messagesEl.innerHTML = this.renderMessages(messagesData.messages || []);
|
|
1316
|
-
}
|
|
1317
|
-
}
|
|
1318
1248
|
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
// Subscribe to WebSocket updates for BOTH conversation and session
|
|
1333
|
-
if (this.wsManager.isConnected) {
|
|
1334
|
-
this.wsManager.subscribeToSession(latestSession.id);
|
|
1335
|
-
this.wsManager.sendMessage({ type: 'subscribe', conversationId });
|
|
1336
|
-
}
|
|
1249
|
+
const isCurrentActiveSession = shouldResumeStreaming && latestSession && latestSession.id === sessionId;
|
|
1250
|
+
const messageDiv = document.createElement('div');
|
|
1251
|
+
messageDiv.className = `message message-assistant${isCurrentActiveSession ? ' streaming-message' : ''}`;
|
|
1252
|
+
messageDiv.id = isCurrentActiveSession ? `streaming-${sessionId}` : `message-${sessionId}`;
|
|
1253
|
+
messageDiv.innerHTML = '<div class="message-role">Assistant</div><div class="message-blocks streaming-blocks"></div>';
|
|
1254
|
+
|
|
1255
|
+
const blocksEl = messageDiv.querySelector('.message-blocks');
|
|
1256
|
+
sessionChunkList.forEach(chunk => {
|
|
1257
|
+
if (chunk.block && chunk.block.type) {
|
|
1258
|
+
const element = this.renderer.renderBlock(chunk.block, chunk);
|
|
1259
|
+
if (element) blocksEl.appendChild(element);
|
|
1260
|
+
}
|
|
1261
|
+
});
|
|
1337
1262
|
|
|
1338
|
-
|
|
1339
|
-
|
|
1263
|
+
if (isCurrentActiveSession) {
|
|
1264
|
+
const indicatorDiv = document.createElement('div');
|
|
1265
|
+
indicatorDiv.className = 'streaming-indicator';
|
|
1266
|
+
indicatorDiv.style = 'display:flex;align-items:center;gap:0.5rem;padding:0.5rem 0;color:var(--color-text-secondary);font-size:0.875rem;';
|
|
1267
|
+
indicatorDiv.innerHTML = `
|
|
1268
|
+
<span class="animate-spin" style="display:inline-block;width:1rem;height:1rem;border:2px solid var(--color-border);border-top-color:var(--color-primary);border-radius:50%;"></span>
|
|
1269
|
+
<span class="streaming-indicator-label">Processing...</span>
|
|
1270
|
+
`;
|
|
1271
|
+
messageDiv.appendChild(indicatorDiv);
|
|
1272
|
+
} else {
|
|
1273
|
+
const ts = document.createElement('div');
|
|
1274
|
+
ts.className = 'message-timestamp';
|
|
1275
|
+
ts.textContent = new Date(sessionChunkList[sessionChunkList.length - 1].created_at).toLocaleString();
|
|
1276
|
+
messageDiv.appendChild(ts);
|
|
1277
|
+
}
|
|
1340
1278
|
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
const lastChunkTime = chunks.length > 0
|
|
1344
|
-
? chunks[chunks.length - 1].created_at
|
|
1345
|
-
: 0;
|
|
1279
|
+
messagesEl.appendChild(messageDiv);
|
|
1280
|
+
});
|
|
1346
1281
|
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1282
|
+
while (userMsgIdx < userMessages.length) {
|
|
1283
|
+
const msg = userMessages[userMsgIdx];
|
|
1284
|
+
const userDiv = document.createElement('div');
|
|
1285
|
+
userDiv.className = 'message message-user';
|
|
1286
|
+
userDiv.setAttribute('data-msg-id', msg.id);
|
|
1287
|
+
userDiv.innerHTML = `
|
|
1288
|
+
<div class="message-role">User</div>
|
|
1289
|
+
${this.renderMessageContent(msg.content)}
|
|
1290
|
+
<div class="message-timestamp">${new Date(msg.created_at).toLocaleString()}</div>
|
|
1291
|
+
`;
|
|
1292
|
+
messagesEl.appendChild(userDiv);
|
|
1293
|
+
userMsgIdx++;
|
|
1294
|
+
}
|
|
1295
|
+
} else {
|
|
1296
|
+
messagesEl.innerHTML = this.renderMessages(allMessages || []);
|
|
1297
|
+
}
|
|
1350
1298
|
|
|
1351
|
-
|
|
1352
|
-
|
|
1299
|
+
if (shouldResumeStreaming && latestSession) {
|
|
1300
|
+
this.state.isStreaming = true;
|
|
1301
|
+
this.state.currentSession = {
|
|
1302
|
+
id: latestSession.id,
|
|
1303
|
+
conversationId: conversationId,
|
|
1304
|
+
agentId: conversation.agentType || 'claude-code',
|
|
1305
|
+
startTime: latestSession.created_at
|
|
1306
|
+
};
|
|
1307
|
+
|
|
1308
|
+
if (this.wsManager.isConnected) {
|
|
1309
|
+
this.wsManager.subscribeToSession(latestSession.id);
|
|
1310
|
+
this.wsManager.sendMessage({ type: 'subscribe', conversationId });
|
|
1353
1311
|
}
|
|
1354
1312
|
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
const messagesData = await messagesResponse.json();
|
|
1365
|
-
|
|
1366
|
-
const outputEl = document.getElementById('output');
|
|
1367
|
-
if (outputEl) {
|
|
1368
|
-
const wdInfo = conversation.workingDirectory ? ` - ${this.escapeHtml(conversation.workingDirectory)}` : '';
|
|
1369
|
-
outputEl.innerHTML = `
|
|
1370
|
-
<div class="conversation-header">
|
|
1371
|
-
<h2>${this.escapeHtml(conversation.title || 'Conversation')}</h2>
|
|
1372
|
-
<p class="text-secondary">${conversation.agentType || 'unknown'} - ${new Date(conversation.created_at).toLocaleDateString()}${wdInfo}</p>
|
|
1373
|
-
</div>
|
|
1374
|
-
<div class="conversation-messages">
|
|
1375
|
-
${this.renderMessages(messagesData.messages || [])}
|
|
1376
|
-
</div>
|
|
1377
|
-
`;
|
|
1378
|
-
|
|
1379
|
-
// Restore scroll position after rendering
|
|
1380
|
-
this.restoreScrollPosition(conversationId);
|
|
1313
|
+
this.updateUrlForConversation(conversationId, latestSession.id);
|
|
1314
|
+
|
|
1315
|
+
const lastChunkTime = chunks.length > 0
|
|
1316
|
+
? chunks[chunks.length - 1].created_at
|
|
1317
|
+
: 0;
|
|
1318
|
+
|
|
1319
|
+
this.chunkPollState.lastFetchTimestamp = lastChunkTime;
|
|
1320
|
+
this.startChunkPolling(conversationId);
|
|
1321
|
+
this.disableControls();
|
|
1381
1322
|
}
|
|
1323
|
+
|
|
1324
|
+
this.restoreScrollPosition(conversationId);
|
|
1382
1325
|
}
|
|
1383
1326
|
} catch (error) {
|
|
1384
1327
|
console.error('Failed to load conversation messages:', error);
|
|
@@ -1399,7 +1342,7 @@ class AgentGUIClient {
|
|
|
1399
1342
|
|
|
1400
1343
|
if (typeof msg.content === 'string') {
|
|
1401
1344
|
if (this.isHtmlContent(msg.content)) {
|
|
1402
|
-
contentHtml = `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${msg.content}</div></div>`;
|
|
1345
|
+
contentHtml = `<div class="message-text"><div class="html-content bg-white dark:bg-gray-800 p-4 rounded border border-gray-200 dark:border-gray-700 overflow-x-auto">${this.sanitizeHtml(msg.content)}</div></div>`;
|
|
1403
1346
|
} else {
|
|
1404
1347
|
contentHtml = `<div class="message-text">${this.escapeHtml(msg.content)}</div>`;
|
|
1405
1348
|
}
|
|
@@ -362,12 +362,30 @@ class StreamingRenderer {
|
|
|
362
362
|
div.className = 'block-text';
|
|
363
363
|
|
|
364
364
|
const text = block.text || '';
|
|
365
|
-
|
|
366
|
-
|
|
365
|
+
if (this.containsHtmlTags(text)) {
|
|
366
|
+
div.innerHTML = this.sanitizeHtml(text);
|
|
367
|
+
div.classList.add('html-content');
|
|
368
|
+
} else {
|
|
369
|
+
div.innerHTML = this.parseAndRenderMarkdown(text);
|
|
370
|
+
}
|
|
367
371
|
|
|
368
372
|
return div;
|
|
369
373
|
}
|
|
370
374
|
|
|
375
|
+
containsHtmlTags(text) {
|
|
376
|
+
const htmlPattern = /<(?:div|table|section|article|ul|ol|dl|nav|header|footer|main|aside|figure|details|summary|h[1-6]|p|blockquote|pre|code|span|strong|em|a|img|br|hr|li|td|tr|th|thead|tbody|tfoot)\b[^>]*>/i;
|
|
377
|
+
return htmlPattern.test(text);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
sanitizeHtml(html) {
|
|
381
|
+
const dangerous = /<\s*\/?\s*(script|iframe|object|embed|applet|form|input|button|select|textarea)\b[^>]*>/gi;
|
|
382
|
+
let cleaned = html.replace(dangerous, '');
|
|
383
|
+
cleaned = cleaned.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
|
|
384
|
+
cleaned = cleaned.replace(/\s+on\w+\s*=\s*[^\s>]+/gi, '');
|
|
385
|
+
cleaned = cleaned.replace(/javascript\s*:/gi, '');
|
|
386
|
+
return cleaned;
|
|
387
|
+
}
|
|
388
|
+
|
|
371
389
|
/**
|
|
372
390
|
* Parse markdown and render links, code, bold, italic
|
|
373
391
|
*/
|
|
@@ -1259,7 +1277,7 @@ class StreamingRenderer {
|
|
|
1259
1277
|
${cost ? `<div class="result-stat"><span class="stat-icon">💰</span><span class="stat-value">${this.escapeHtml(cost)}</span><span class="stat-label">cost</span></div>` : ''}
|
|
1260
1278
|
${turns ? `<div class="result-stat"><span class="stat-icon">🔄</span><span class="stat-value">${this.escapeHtml(String(turns))}</span><span class="stat-label">turns</span></div>` : ''}
|
|
1261
1279
|
</div>
|
|
1262
|
-
${block.result ? `<div class="result-content">${
|
|
1280
|
+
${block.result ? `<div class="result-content">${(() => { const r = typeof block.result === 'string' ? block.result : JSON.stringify(block.result, null, 2); return this.containsHtmlTags(r) ? '<div class="html-content">' + this.sanitizeHtml(r) + '</div>' : this.escapeHtml(r); })()}</div>` : ''}
|
|
1263
1281
|
`;
|
|
1264
1282
|
|
|
1265
1283
|
return div;
|
package/static/js/voice.js
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
var isRecording = false;
|
|
4
4
|
var ttsEnabled = true;
|
|
5
5
|
var voiceActive = false;
|
|
6
|
-
var lastSpokenBlockIndex = -1;
|
|
7
6
|
var currentConversationId = null;
|
|
8
7
|
var speechQueue = [];
|
|
9
8
|
var isSpeaking = false;
|
|
@@ -13,6 +12,8 @@
|
|
|
13
12
|
var scriptNode = null;
|
|
14
13
|
var recordedChunks = [];
|
|
15
14
|
var TARGET_SAMPLE_RATE = 16000;
|
|
15
|
+
var spokenChunks = new Set();
|
|
16
|
+
var isLoadingHistory = false;
|
|
16
17
|
|
|
17
18
|
function init() {
|
|
18
19
|
setupTTSToggle();
|
|
@@ -61,14 +62,28 @@
|
|
|
61
62
|
var micBtn = document.getElementById('voiceMicBtn');
|
|
62
63
|
if (micBtn) {
|
|
63
64
|
micBtn.removeAttribute('disabled');
|
|
64
|
-
micBtn.title = '
|
|
65
|
-
micBtn.addEventListener('
|
|
65
|
+
micBtn.title = 'Hold to record';
|
|
66
|
+
micBtn.addEventListener('mousedown', function(e) {
|
|
66
67
|
e.preventDefault();
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
68
|
+
startRecording();
|
|
69
|
+
});
|
|
70
|
+
micBtn.addEventListener('mouseup', function(e) {
|
|
71
|
+
e.preventDefault();
|
|
72
|
+
stopRecording();
|
|
73
|
+
});
|
|
74
|
+
micBtn.addEventListener('mouseleave', function(e) {
|
|
75
|
+
if (isRecording) stopRecording();
|
|
76
|
+
});
|
|
77
|
+
micBtn.addEventListener('touchstart', function(e) {
|
|
78
|
+
e.preventDefault();
|
|
79
|
+
startRecording();
|
|
80
|
+
});
|
|
81
|
+
micBtn.addEventListener('touchend', function(e) {
|
|
82
|
+
e.preventDefault();
|
|
83
|
+
stopRecording();
|
|
84
|
+
});
|
|
85
|
+
micBtn.addEventListener('touchcancel', function(e) {
|
|
86
|
+
if (isRecording) stopRecording();
|
|
72
87
|
});
|
|
73
88
|
}
|
|
74
89
|
var sendBtn = document.getElementById('voiceSendBtn');
|
|
@@ -92,6 +107,35 @@
|
|
|
92
107
|
return result;
|
|
93
108
|
}
|
|
94
109
|
|
|
110
|
+
function encodeWav(float32Audio, sampleRate) {
|
|
111
|
+
var numSamples = float32Audio.length;
|
|
112
|
+
var bytesPerSample = 2;
|
|
113
|
+
var dataSize = numSamples * bytesPerSample;
|
|
114
|
+
var buffer = new ArrayBuffer(44 + dataSize);
|
|
115
|
+
var view = new DataView(buffer);
|
|
116
|
+
function writeStr(off, str) {
|
|
117
|
+
for (var i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i));
|
|
118
|
+
}
|
|
119
|
+
writeStr(0, 'RIFF');
|
|
120
|
+
view.setUint32(4, 36 + dataSize, true);
|
|
121
|
+
writeStr(8, 'WAVE');
|
|
122
|
+
writeStr(12, 'fmt ');
|
|
123
|
+
view.setUint32(16, 16, true);
|
|
124
|
+
view.setUint16(20, 1, true);
|
|
125
|
+
view.setUint16(22, 1, true);
|
|
126
|
+
view.setUint32(24, sampleRate, true);
|
|
127
|
+
view.setUint32(28, sampleRate * bytesPerSample, true);
|
|
128
|
+
view.setUint16(32, bytesPerSample, true);
|
|
129
|
+
view.setUint16(34, 16, true);
|
|
130
|
+
writeStr(36, 'data');
|
|
131
|
+
view.setUint32(40, dataSize, true);
|
|
132
|
+
for (var i = 0; i < numSamples; i++) {
|
|
133
|
+
var s = Math.max(-1, Math.min(1, float32Audio[i]));
|
|
134
|
+
view.setInt16(44 + i * 2, s < 0 ? s * 32768 : s * 32767, true);
|
|
135
|
+
}
|
|
136
|
+
return buffer;
|
|
137
|
+
}
|
|
138
|
+
|
|
95
139
|
async function startRecording() {
|
|
96
140
|
if (isRecording) return;
|
|
97
141
|
var el = document.getElementById('voiceTranscript');
|
|
@@ -146,11 +190,11 @@
|
|
|
146
190
|
var resampled = resampleBuffer(merged, sourceSampleRate, TARGET_SAMPLE_RATE);
|
|
147
191
|
if (el) el.textContent = 'Transcribing...';
|
|
148
192
|
try {
|
|
149
|
-
var
|
|
193
|
+
var wavBuffer = encodeWav(resampled, TARGET_SAMPLE_RATE);
|
|
150
194
|
var resp = await fetch(BASE + '/api/stt', {
|
|
151
195
|
method: 'POST',
|
|
152
|
-
headers: { 'Content-Type': '
|
|
153
|
-
body:
|
|
196
|
+
headers: { 'Content-Type': 'audio/wav' },
|
|
197
|
+
body: wavBuffer
|
|
154
198
|
});
|
|
155
199
|
var data = await resp.json();
|
|
156
200
|
if (data.text) {
|
|
@@ -240,6 +284,10 @@
|
|
|
240
284
|
}
|
|
241
285
|
}
|
|
242
286
|
|
|
287
|
+
function stripHtml(text) {
|
|
288
|
+
return text.replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim();
|
|
289
|
+
}
|
|
290
|
+
|
|
243
291
|
function addVoiceBlock(text, isUser) {
|
|
244
292
|
var container = document.getElementById('voiceMessages');
|
|
245
293
|
if (!container) return;
|
|
@@ -247,13 +295,23 @@
|
|
|
247
295
|
if (emptyMsg) emptyMsg.remove();
|
|
248
296
|
var div = document.createElement('div');
|
|
249
297
|
div.className = 'voice-block' + (isUser ? ' voice-block-user' : '');
|
|
250
|
-
div.textContent = text;
|
|
298
|
+
div.textContent = isUser ? text : stripHtml(text);
|
|
299
|
+
if (!isUser) {
|
|
300
|
+
var rereadBtn = document.createElement('button');
|
|
301
|
+
rereadBtn.className = 'voice-reread-btn';
|
|
302
|
+
rereadBtn.title = 'Re-read aloud';
|
|
303
|
+
rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
|
|
304
|
+
rereadBtn.addEventListener('click', function() {
|
|
305
|
+
speak(text);
|
|
306
|
+
});
|
|
307
|
+
div.appendChild(rereadBtn);
|
|
308
|
+
}
|
|
251
309
|
container.appendChild(div);
|
|
252
310
|
scrollVoiceToBottom();
|
|
253
311
|
return div;
|
|
254
312
|
}
|
|
255
313
|
|
|
256
|
-
function addVoiceResultBlock(block) {
|
|
314
|
+
function addVoiceResultBlock(block, autoSpeak) {
|
|
257
315
|
var container = document.getElementById('voiceMessages');
|
|
258
316
|
if (!container) return;
|
|
259
317
|
var emptyMsg = container.querySelector('.voice-empty');
|
|
@@ -267,9 +325,10 @@
|
|
|
267
325
|
if (block.result) {
|
|
268
326
|
resultText = typeof block.result === 'string' ? block.result : JSON.stringify(block.result);
|
|
269
327
|
}
|
|
328
|
+
var displayText = stripHtml(resultText);
|
|
270
329
|
var html = '';
|
|
271
|
-
if (
|
|
272
|
-
html += '<div>' + escapeHtml(
|
|
330
|
+
if (displayText) {
|
|
331
|
+
html += '<div>' + escapeHtml(displayText) + '</div>';
|
|
273
332
|
}
|
|
274
333
|
if (duration || cost) {
|
|
275
334
|
html += '<div class="voice-result-stats">';
|
|
@@ -282,9 +341,19 @@
|
|
|
282
341
|
html = isError ? 'Execution failed' : 'Execution complete';
|
|
283
342
|
}
|
|
284
343
|
div.innerHTML = html;
|
|
344
|
+
if (resultText) {
|
|
345
|
+
var rereadBtn = document.createElement('button');
|
|
346
|
+
rereadBtn.className = 'voice-reread-btn';
|
|
347
|
+
rereadBtn.title = 'Re-read aloud';
|
|
348
|
+
rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
|
|
349
|
+
rereadBtn.addEventListener('click', function() {
|
|
350
|
+
speak(resultText);
|
|
351
|
+
});
|
|
352
|
+
div.appendChild(rereadBtn);
|
|
353
|
+
}
|
|
285
354
|
container.appendChild(div);
|
|
286
355
|
scrollVoiceToBottom();
|
|
287
|
-
if (ttsEnabled && resultText) {
|
|
356
|
+
if (autoSpeak && ttsEnabled && resultText) {
|
|
288
357
|
speak(resultText);
|
|
289
358
|
}
|
|
290
359
|
return div;
|
|
@@ -305,31 +374,33 @@
|
|
|
305
374
|
var data = e.detail;
|
|
306
375
|
if (!data) return;
|
|
307
376
|
if (data.type === 'streaming_progress' && data.block) {
|
|
308
|
-
handleVoiceBlock(data.block);
|
|
377
|
+
handleVoiceBlock(data.block, true);
|
|
309
378
|
}
|
|
310
379
|
if (data.type === 'streaming_start') {
|
|
311
|
-
|
|
380
|
+
spokenChunks = new Set();
|
|
312
381
|
}
|
|
313
382
|
});
|
|
314
383
|
window.addEventListener('conversation-selected', function(e) {
|
|
315
384
|
currentConversationId = e.detail.conversationId;
|
|
385
|
+
stopSpeaking();
|
|
386
|
+
spokenChunks = new Set();
|
|
316
387
|
if (voiceActive) {
|
|
317
388
|
loadVoiceBlocks(currentConversationId);
|
|
318
389
|
}
|
|
319
390
|
});
|
|
320
391
|
}
|
|
321
392
|
|
|
322
|
-
function handleVoiceBlock(block) {
|
|
393
|
+
function handleVoiceBlock(block, isNew) {
|
|
323
394
|
if (!block || !block.type) return;
|
|
324
395
|
if (block.type === 'text' && block.text) {
|
|
325
396
|
var div = addVoiceBlock(block.text, false);
|
|
326
|
-
if (div && ttsEnabled) {
|
|
397
|
+
if (div && isNew && ttsEnabled) {
|
|
327
398
|
div.classList.add('speaking');
|
|
328
399
|
speak(block.text);
|
|
329
400
|
setTimeout(function() { div.classList.remove('speaking'); }, 2000);
|
|
330
401
|
}
|
|
331
402
|
} else if (block.type === 'result') {
|
|
332
|
-
addVoiceResultBlock(block);
|
|
403
|
+
addVoiceResultBlock(block, isNew);
|
|
333
404
|
}
|
|
334
405
|
}
|
|
335
406
|
|
|
@@ -341,9 +412,11 @@
|
|
|
341
412
|
showVoiceEmpty(container);
|
|
342
413
|
return;
|
|
343
414
|
}
|
|
415
|
+
isLoadingHistory = true;
|
|
344
416
|
fetch(BASE + '/api/conversations/' + conversationId + '/chunks')
|
|
345
417
|
.then(function(res) { return res.json(); })
|
|
346
418
|
.then(function(data) {
|
|
419
|
+
isLoadingHistory = false;
|
|
347
420
|
if (!data.ok || !Array.isArray(data.chunks) || data.chunks.length === 0) {
|
|
348
421
|
showVoiceEmpty(container);
|
|
349
422
|
return;
|
|
@@ -356,19 +429,20 @@
|
|
|
356
429
|
addVoiceBlock(block.text, false);
|
|
357
430
|
hasContent = true;
|
|
358
431
|
} else if (block.type === 'result') {
|
|
359
|
-
addVoiceResultBlock(block);
|
|
432
|
+
addVoiceResultBlock(block, false);
|
|
360
433
|
hasContent = true;
|
|
361
434
|
}
|
|
362
435
|
});
|
|
363
436
|
if (!hasContent) showVoiceEmpty(container);
|
|
364
437
|
})
|
|
365
438
|
.catch(function() {
|
|
439
|
+
isLoadingHistory = false;
|
|
366
440
|
showVoiceEmpty(container);
|
|
367
441
|
});
|
|
368
442
|
}
|
|
369
443
|
|
|
370
444
|
function showVoiceEmpty(container) {
|
|
371
|
-
container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>
|
|
445
|
+
container.innerHTML = '<div class="voice-empty"><div class="voice-empty-icon"><svg viewBox="0 0 24 24" width="64" height="64" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" y1="19" x2="12" y2="23"/><line x1="8" y1="23" x2="16" y2="23"/></svg></div><div>Hold the microphone button to record.<br>Release to transcribe. Tap Send to submit.<br>New responses will be read aloud.</div></div>';
|
|
372
446
|
}
|
|
373
447
|
|
|
374
448
|
function activate() {
|