agentgui 1.0.142 → 1.0.144
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/database.js +7 -0
- package/lib/claude-runner.js +8 -0
- package/lib/speech.js +192 -0
- package/package.json +3 -1
- package/server.js +122 -81
- package/static/js/client.js +14 -2
- package/static/js/streaming-renderer.js +22 -4
- package/static/js/voice.js +124 -173
package/database.js
CHANGED
|
@@ -510,6 +510,13 @@ export const queries = {
|
|
|
510
510
|
return stmt.all(conversationId, status);
|
|
511
511
|
},
|
|
512
512
|
|
|
513
|
+
getActiveSessions() {
|
|
514
|
+
const stmt = db.prepare(
|
|
515
|
+
"SELECT * FROM sessions WHERE status IN ('active', 'pending') ORDER BY started_at DESC"
|
|
516
|
+
);
|
|
517
|
+
return stmt.all();
|
|
518
|
+
},
|
|
519
|
+
|
|
513
520
|
createEvent(type, data, conversationId = null, sessionId = null) {
|
|
514
521
|
const id = generateId('evt');
|
|
515
522
|
const now = Date.now();
|
package/lib/claude-runner.js
CHANGED
|
@@ -52,6 +52,10 @@ class AgentRunner {
|
|
|
52
52
|
const args = this.buildArgs(prompt, config);
|
|
53
53
|
const proc = spawn(this.command, args, { cwd });
|
|
54
54
|
|
|
55
|
+
if (config.onPid) {
|
|
56
|
+
try { config.onPid(proc.pid); } catch (e) {}
|
|
57
|
+
}
|
|
58
|
+
|
|
55
59
|
let jsonBuffer = '';
|
|
56
60
|
const outputs = [];
|
|
57
61
|
let timedOut = false;
|
|
@@ -150,6 +154,10 @@ class AgentRunner {
|
|
|
150
154
|
|
|
151
155
|
const proc = spawn(cmd, args, { cwd });
|
|
152
156
|
|
|
157
|
+
if (config.onPid) {
|
|
158
|
+
try { config.onPid(proc.pid); } catch (e) {}
|
|
159
|
+
}
|
|
160
|
+
|
|
153
161
|
const outputs = [];
|
|
154
162
|
let timedOut = false;
|
|
155
163
|
let sessionId = null;
|
package/lib/speech.js
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
2
|
+
import { createRequire } from 'module';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
|
|
7
|
+
const require = createRequire(import.meta.url);
|
|
8
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
9
|
+
const ROOT = path.dirname(__dirname);
|
|
10
|
+
const DATA_DIR = path.join(ROOT, 'data');
|
|
11
|
+
|
|
12
|
+
const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
|
|
13
|
+
const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
|
|
14
|
+
const SAMPLE_RATE_TTS = 16000;
|
|
15
|
+
const SAMPLE_RATE_STT = 16000;
|
|
16
|
+
|
|
17
|
+
let sttPipeline = null;
|
|
18
|
+
let ttsPipeline = null;
|
|
19
|
+
let speakerEmbeddings = null;
|
|
20
|
+
let sttLoading = false;
|
|
21
|
+
let ttsLoading = false;
|
|
22
|
+
|
|
23
|
+
function whisperModelPath() {
|
|
24
|
+
try {
|
|
25
|
+
const webtalkDir = path.dirname(require.resolve('webtalk'));
|
|
26
|
+
const p = path.join(webtalkDir, 'models', 'onnx-community', 'whisper-base');
|
|
27
|
+
if (fs.existsSync(p)) return p;
|
|
28
|
+
} catch (_) {}
|
|
29
|
+
return 'onnx-community/whisper-base';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function ensureSpeakerEmbeddings() {
|
|
33
|
+
if (speakerEmbeddings) return speakerEmbeddings;
|
|
34
|
+
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
35
|
+
if (!fs.existsSync(SPEAKER_EMBEDDINGS_PATH)) {
|
|
36
|
+
const resp = await fetch(SPEAKER_EMBEDDINGS_URL);
|
|
37
|
+
if (!resp.ok) throw new Error('Failed to download speaker embeddings');
|
|
38
|
+
fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, Buffer.from(await resp.arrayBuffer()));
|
|
39
|
+
}
|
|
40
|
+
const buf = fs.readFileSync(SPEAKER_EMBEDDINGS_PATH);
|
|
41
|
+
speakerEmbeddings = new Float32Array(new Uint8Array(buf).buffer);
|
|
42
|
+
return speakerEmbeddings;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function getSTT() {
|
|
46
|
+
if (sttPipeline) return sttPipeline;
|
|
47
|
+
if (sttLoading) {
|
|
48
|
+
while (sttLoading) await new Promise(r => setTimeout(r, 100));
|
|
49
|
+
return sttPipeline;
|
|
50
|
+
}
|
|
51
|
+
sttLoading = true;
|
|
52
|
+
try {
|
|
53
|
+
const modelPath = whisperModelPath();
|
|
54
|
+
const isLocal = !modelPath.includes('/') || fs.existsSync(modelPath);
|
|
55
|
+
env.allowLocalModels = true;
|
|
56
|
+
env.allowRemoteModels = !isLocal;
|
|
57
|
+
if (isLocal) env.localModelPath = '';
|
|
58
|
+
sttPipeline = await pipeline('automatic-speech-recognition', modelPath, {
|
|
59
|
+
device: 'cpu',
|
|
60
|
+
local_files_only: isLocal,
|
|
61
|
+
});
|
|
62
|
+
return sttPipeline;
|
|
63
|
+
} finally {
|
|
64
|
+
sttLoading = false;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function getTTS() {
|
|
69
|
+
if (ttsPipeline) return ttsPipeline;
|
|
70
|
+
if (ttsLoading) {
|
|
71
|
+
while (ttsLoading) await new Promise(r => setTimeout(r, 100));
|
|
72
|
+
return ttsPipeline;
|
|
73
|
+
}
|
|
74
|
+
ttsLoading = true;
|
|
75
|
+
try {
|
|
76
|
+
env.allowRemoteModels = true;
|
|
77
|
+
ttsPipeline = await pipeline('text-to-speech', 'Xenova/speecht5_tts', {
|
|
78
|
+
device: 'cpu',
|
|
79
|
+
dtype: 'fp32',
|
|
80
|
+
});
|
|
81
|
+
await ensureSpeakerEmbeddings();
|
|
82
|
+
return ttsPipeline;
|
|
83
|
+
} finally {
|
|
84
|
+
ttsLoading = false;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function decodeWavToFloat32(buffer) {
|
|
89
|
+
const view = new DataView(buffer.buffer || buffer);
|
|
90
|
+
const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3));
|
|
91
|
+
if (riff !== 'RIFF') throw new Error('Not a WAV file');
|
|
92
|
+
const numChannels = view.getUint16(22, true);
|
|
93
|
+
const sampleRate = view.getUint32(24, true);
|
|
94
|
+
const bitsPerSample = view.getUint16(34, true);
|
|
95
|
+
let dataOffset = 44;
|
|
96
|
+
for (let i = 36; i < view.byteLength - 8; i++) {
|
|
97
|
+
if (view.getUint8(i) === 0x64 && view.getUint8(i+1) === 0x61 &&
|
|
98
|
+
view.getUint8(i+2) === 0x74 && view.getUint8(i+3) === 0x61) {
|
|
99
|
+
dataOffset = i + 8;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
const bytesPerSample = bitsPerSample / 8;
|
|
104
|
+
const numSamples = Math.floor((view.byteLength - dataOffset) / (bytesPerSample * numChannels));
|
|
105
|
+
const audio = new Float32Array(numSamples);
|
|
106
|
+
for (let i = 0; i < numSamples; i++) {
|
|
107
|
+
const offset = dataOffset + i * bytesPerSample * numChannels;
|
|
108
|
+
if (bitsPerSample === 16) {
|
|
109
|
+
audio[i] = view.getInt16(offset, true) / 32768;
|
|
110
|
+
} else if (bitsPerSample === 32) {
|
|
111
|
+
audio[i] = view.getFloat32(offset, true);
|
|
112
|
+
} else {
|
|
113
|
+
audio[i] = (view.getUint8(offset) - 128) / 128;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return { audio, sampleRate };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function resampleTo16k(audio, fromRate) {
|
|
120
|
+
if (fromRate === SAMPLE_RATE_STT) return audio;
|
|
121
|
+
const ratio = fromRate / SAMPLE_RATE_STT;
|
|
122
|
+
const newLen = Math.round(audio.length / ratio);
|
|
123
|
+
const result = new Float32Array(newLen);
|
|
124
|
+
for (let i = 0; i < newLen; i++) {
|
|
125
|
+
const srcIdx = i * ratio;
|
|
126
|
+
const lo = Math.floor(srcIdx);
|
|
127
|
+
const hi = Math.min(lo + 1, audio.length - 1);
|
|
128
|
+
const frac = srcIdx - lo;
|
|
129
|
+
result[i] = audio[lo] * (1 - frac) + audio[hi] * frac;
|
|
130
|
+
}
|
|
131
|
+
return result;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function encodeWav(float32Audio, sampleRate) {
|
|
135
|
+
const numSamples = float32Audio.length;
|
|
136
|
+
const bytesPerSample = 2;
|
|
137
|
+
const dataSize = numSamples * bytesPerSample;
|
|
138
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
139
|
+
const view = new DataView(buffer);
|
|
140
|
+
const writeStr = (off, str) => { for (let i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i)); };
|
|
141
|
+
writeStr(0, 'RIFF');
|
|
142
|
+
view.setUint32(4, 36 + dataSize, true);
|
|
143
|
+
writeStr(8, 'WAVE');
|
|
144
|
+
writeStr(12, 'fmt ');
|
|
145
|
+
view.setUint32(16, 16, true);
|
|
146
|
+
view.setUint16(20, 1, true);
|
|
147
|
+
view.setUint16(22, 1, true);
|
|
148
|
+
view.setUint32(24, sampleRate, true);
|
|
149
|
+
view.setUint32(28, sampleRate * bytesPerSample, true);
|
|
150
|
+
view.setUint16(32, bytesPerSample, true);
|
|
151
|
+
view.setUint16(34, 16, true);
|
|
152
|
+
writeStr(36, 'data');
|
|
153
|
+
view.setUint32(40, dataSize, true);
|
|
154
|
+
for (let i = 0; i < numSamples; i++) {
|
|
155
|
+
const s = Math.max(-1, Math.min(1, float32Audio[i]));
|
|
156
|
+
view.setInt16(44 + i * 2, s < 0 ? s * 32768 : s * 32767, true);
|
|
157
|
+
}
|
|
158
|
+
return Buffer.from(buffer);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async function transcribe(audioBuffer) {
|
|
162
|
+
const stt = await getSTT();
|
|
163
|
+
let audio;
|
|
164
|
+
const buf = Buffer.isBuffer(audioBuffer) ? audioBuffer : Buffer.from(audioBuffer);
|
|
165
|
+
const isWav = buf.length > 4 && buf.toString('ascii', 0, 4) === 'RIFF';
|
|
166
|
+
if (isWav) {
|
|
167
|
+
const decoded = decodeWavToFloat32(buf);
|
|
168
|
+
audio = resampleTo16k(decoded.audio, decoded.sampleRate);
|
|
169
|
+
} else {
|
|
170
|
+
audio = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
|
|
171
|
+
}
|
|
172
|
+
const result = await stt(audio);
|
|
173
|
+
return result.text || '';
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async function synthesize(text) {
|
|
177
|
+
const tts = await getTTS();
|
|
178
|
+
const embeddings = await ensureSpeakerEmbeddings();
|
|
179
|
+
const result = await tts(text, { speaker_embeddings: embeddings });
|
|
180
|
+
return encodeWav(result.audio, result.sampling_rate || SAMPLE_RATE_TTS);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function getStatus() {
|
|
184
|
+
return {
|
|
185
|
+
sttReady: !!sttPipeline,
|
|
186
|
+
ttsReady: !!ttsPipeline,
|
|
187
|
+
sttLoading,
|
|
188
|
+
ttsLoading,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export { transcribe, synthesize, getSTT, getTTS, getStatus };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentgui",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.144",
|
|
4
4
|
"description": "Multi-agent ACP client with real-time communication",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "server.js",
|
|
@@ -22,10 +22,12 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@anthropic-ai/claude-code": "^2.1.37",
|
|
25
|
+
"@huggingface/transformers": "^3.8.1",
|
|
25
26
|
"better-sqlite3": "^12.6.2",
|
|
26
27
|
"busboy": "^1.6.0",
|
|
27
28
|
"express": "^5.2.1",
|
|
28
29
|
"fsbrowse": "^0.2.13",
|
|
30
|
+
"onnxruntime-node": "^1.24.1",
|
|
29
31
|
"webtalk": "github:anEntrypoint/realtime-whisper-webgpu",
|
|
30
32
|
"ws": "^8.14.2"
|
|
31
33
|
}
|
package/server.js
CHANGED
|
@@ -8,12 +8,12 @@ import { execSync } from 'child_process';
|
|
|
8
8
|
import { createRequire } from 'module';
|
|
9
9
|
import { queries } from './database.js';
|
|
10
10
|
import { runClaudeWithStreaming } from './lib/claude-runner.js';
|
|
11
|
+
import { transcribe, synthesize, getStatus as getSpeechStatus } from './lib/speech.js';
|
|
11
12
|
|
|
12
13
|
const require = createRequire(import.meta.url);
|
|
13
14
|
const express = require('express');
|
|
14
15
|
const Busboy = require('busboy');
|
|
15
16
|
const fsbrowse = require('fsbrowse');
|
|
16
|
-
const { webtalk } = require('webtalk');
|
|
17
17
|
|
|
18
18
|
const SYSTEM_PROMPT = `Always write your responses in ripple-ui enhanced HTML. Avoid overriding light/dark mode CSS variables. Use all the benefits of HTML to express technical details with proper semantic markup, tables, code blocks, headings, and lists. Write clean, well-structured HTML that respects the existing design system.`;
|
|
19
19
|
|
|
@@ -37,28 +37,6 @@ if (!fs.existsSync(staticDir)) fs.mkdirSync(staticDir, { recursive: true });
|
|
|
37
37
|
// Express sub-app for fsbrowse file browser and file upload
|
|
38
38
|
const expressApp = express();
|
|
39
39
|
|
|
40
|
-
// Separate Express app for webtalk (STT/TTS) - isolated to contain COEP/COOP headers
|
|
41
|
-
const webtalkApp = express();
|
|
42
|
-
const webtalkInstance = webtalk(webtalkApp, { path: '/webtalk' });
|
|
43
|
-
|
|
44
|
-
const webtalkSdkDir = path.dirname(require.resolve('webtalk'));
|
|
45
|
-
const WASM_MIN_BYTES = 1000000;
|
|
46
|
-
const webtalkCriticalFiles = [
|
|
47
|
-
{ path: path.join(webtalkSdkDir, 'assets', 'ort-wasm-simd-threaded.jsep.wasm'), minBytes: WASM_MIN_BYTES }
|
|
48
|
-
];
|
|
49
|
-
for (const file of webtalkCriticalFiles) {
|
|
50
|
-
try {
|
|
51
|
-
if (fs.existsSync(file.path)) {
|
|
52
|
-
const stat = fs.statSync(file.path);
|
|
53
|
-
if (stat.size < file.minBytes) {
|
|
54
|
-
debugLog(`Removing corrupt file ${path.basename(file.path)} (${stat.size} bytes, need ${file.minBytes}+)`);
|
|
55
|
-
fs.unlinkSync(file.path);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
} catch (e) { debugLog(`File check error: ${e.message}`); }
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
webtalkInstance.init().catch(err => debugLog('Webtalk init: ' + err.message));
|
|
62
40
|
|
|
63
41
|
// File upload endpoint - copies dropped files to conversation workingDirectory
|
|
64
42
|
expressApp.post(BASE_URL + '/api/upload/:conversationId', (req, res) => {
|
|
@@ -165,63 +143,9 @@ const server = http.createServer(async (req, res) => {
|
|
|
165
143
|
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
166
144
|
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
|
|
167
145
|
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
|
168
|
-
res.setHeader('Cross-Origin-Embedder-Policy', 'credentialless');
|
|
169
|
-
res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
|
|
170
|
-
res.setHeader('Cross-Origin-Resource-Policy', 'cross-origin');
|
|
171
146
|
if (req.method === 'OPTIONS') { res.writeHead(200); res.end(); return; }
|
|
172
147
|
|
|
173
148
|
const pathOnly = req.url.split('?')[0];
|
|
174
|
-
const webtalkPrefix = BASE_URL + '/webtalk';
|
|
175
|
-
const isWebtalkRoute = pathOnly.startsWith(webtalkPrefix) ||
|
|
176
|
-
pathOnly.startsWith(BASE_URL + '/api/tts-status') ||
|
|
177
|
-
pathOnly.startsWith(BASE_URL + '/assets/') ||
|
|
178
|
-
pathOnly.startsWith(BASE_URL + '/tts/') ||
|
|
179
|
-
pathOnly.startsWith(BASE_URL + '/models/') ||
|
|
180
|
-
pathOnly.startsWith('/webtalk') ||
|
|
181
|
-
pathOnly.startsWith('/assets/') ||
|
|
182
|
-
pathOnly.startsWith('/tts/') ||
|
|
183
|
-
pathOnly.startsWith('/models/');
|
|
184
|
-
if (isWebtalkRoute) {
|
|
185
|
-
const webtalkSdkDir = path.dirname(require.resolve('webtalk'));
|
|
186
|
-
const sdkFiles = { '/demo': 'app.html', '/sdk.js': 'sdk.js', '/stt.js': 'stt.js', '/tts.js': 'tts.js', '/tts-utils.js': 'tts-utils.js' };
|
|
187
|
-
let stripped = pathOnly.startsWith(webtalkPrefix) ? pathOnly.slice(webtalkPrefix.length) : (pathOnly.startsWith('/webtalk') ? pathOnly.slice('/webtalk'.length) : null);
|
|
188
|
-
if (stripped !== null && !sdkFiles[stripped] && !stripped.endsWith('.js') && sdkFiles[stripped + '.js']) stripped += '.js';
|
|
189
|
-
if (stripped !== null && sdkFiles[stripped]) {
|
|
190
|
-
const filePath = path.join(webtalkSdkDir, sdkFiles[stripped]);
|
|
191
|
-
return fs.readFile(filePath, 'utf-8', (err, content) => {
|
|
192
|
-
if (err) { res.writeHead(404); res.end('Not found'); return; }
|
|
193
|
-
if (stripped === '/demo') {
|
|
194
|
-
let patched = content
|
|
195
|
-
.replace(/from\s+['"](\/webtalk\/[^'"]+)['"]/g, (_, p) => `from '${BASE_URL}${p}'`)
|
|
196
|
-
.replace(/from\s+['"]\.\/([^'"]+)['"]/g, (_, p) => `from '${BASE_URL}/webtalk/${p}'`)
|
|
197
|
-
.replace('<head>', `<head>\n <script>window.__WEBTALK_BASE='${BASE_URL}';</script>`);
|
|
198
|
-
res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8', 'Cross-Origin-Embedder-Policy': 'credentialless', 'Cross-Origin-Opener-Policy': 'same-origin', 'Cross-Origin-Resource-Policy': 'cross-origin' });
|
|
199
|
-
return res.end(patched);
|
|
200
|
-
}
|
|
201
|
-
let js = content;
|
|
202
|
-
const ensureExt = (mod) => mod.endsWith('.js') ? mod : mod + '.js';
|
|
203
|
-
if (js.includes('require(') || js.includes('module.exports')) {
|
|
204
|
-
js = js.replace(/const\s*\{([^}]+)\}\s*=\s*require\(['"]\.\/([^'"]+)['"]\);?/g, (_, names, mod) => `import {${names}} from '${BASE_URL}/webtalk/${ensureExt(mod)}';`);
|
|
205
|
-
js = js.replace(/const\s+(\w+)\s*=\s*require\(['"]\.\/([^'"]+)['"]\);?/g, (_, name, mod) => `import ${name} from '${BASE_URL}/webtalk/${ensureExt(mod)}';`);
|
|
206
|
-
js = js.replace(/module\.exports\s*=\s*\{([^}]+)\};?/, (_, names) => `export {${names.trim().replace(/\s+/g, ' ')} };`);
|
|
207
|
-
}
|
|
208
|
-
js = js.replace(/from\s+['"]\.\/([^'"]+)['"]/g, (_, p) => `from '${BASE_URL}/webtalk/${ensureExt(p)}'`);
|
|
209
|
-
res.writeHead(200, { 'Content-Type': 'application/javascript; charset=utf-8', 'Cross-Origin-Resource-Policy': 'cross-origin' });
|
|
210
|
-
res.end(js);
|
|
211
|
-
});
|
|
212
|
-
}
|
|
213
|
-
if (req.url.startsWith(BASE_URL)) req.url = req.url.slice(BASE_URL.length) || '/';
|
|
214
|
-
const isModelOrAsset = pathOnly.includes('/models/') || pathOnly.includes('/assets/') || pathOnly.endsWith('.wasm') || pathOnly.endsWith('.onnx');
|
|
215
|
-
if (isModelOrAsset) {
|
|
216
|
-
res.setHeader('Cache-Control', 'public, max-age=604800, immutable');
|
|
217
|
-
}
|
|
218
|
-
const origSetHeader = res.setHeader.bind(res);
|
|
219
|
-
res.setHeader = (name, value) => {
|
|
220
|
-
if (name.toLowerCase() === 'cross-origin-embedder-policy') return;
|
|
221
|
-
origSetHeader(name, value);
|
|
222
|
-
};
|
|
223
|
-
return webtalkApp(req, res);
|
|
224
|
-
}
|
|
225
149
|
|
|
226
150
|
// Route file upload and fsbrowse requests through Express sub-app
|
|
227
151
|
if (pathOnly.startsWith(BASE_URL + '/api/upload/') || pathOnly.startsWith(BASE_URL + '/files/')) {
|
|
@@ -516,6 +440,53 @@ const server = http.createServer(async (req, res) => {
|
|
|
516
440
|
return;
|
|
517
441
|
}
|
|
518
442
|
|
|
443
|
+
if (routePath === '/api/stt' && req.method === 'POST') {
|
|
444
|
+
try {
|
|
445
|
+
const chunks = [];
|
|
446
|
+
for await (const chunk of req) chunks.push(chunk);
|
|
447
|
+
const audioBuffer = Buffer.concat(chunks);
|
|
448
|
+
if (audioBuffer.length === 0) {
|
|
449
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
450
|
+
res.end(JSON.stringify({ error: 'No audio data' }));
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
const text = await transcribe(audioBuffer);
|
|
454
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
455
|
+
res.end(JSON.stringify({ text: text.trim() }));
|
|
456
|
+
} catch (err) {
|
|
457
|
+
debugLog('[STT] Error: ' + err.message);
|
|
458
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
459
|
+
res.end(JSON.stringify({ error: err.message }));
|
|
460
|
+
}
|
|
461
|
+
return;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
if (routePath === '/api/tts' && req.method === 'POST') {
|
|
465
|
+
try {
|
|
466
|
+
const body = await parseBody(req);
|
|
467
|
+
const text = body.text || '';
|
|
468
|
+
if (!text) {
|
|
469
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
470
|
+
res.end(JSON.stringify({ error: 'No text provided' }));
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
473
|
+
const wavBuffer = await synthesize(text);
|
|
474
|
+
res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
|
|
475
|
+
res.end(wavBuffer);
|
|
476
|
+
} catch (err) {
|
|
477
|
+
debugLog('[TTS] Error: ' + err.message);
|
|
478
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
479
|
+
res.end(JSON.stringify({ error: err.message }));
|
|
480
|
+
}
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
if (routePath === '/api/speech-status' && req.method === 'GET') {
|
|
485
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
486
|
+
res.end(JSON.stringify(getSpeechStatus()));
|
|
487
|
+
return;
|
|
488
|
+
}
|
|
489
|
+
|
|
519
490
|
if (routePath === '/api/folders' && req.method === 'POST') {
|
|
520
491
|
const body = await parseBody(req);
|
|
521
492
|
const folderPath = body.path || STARTUP_CWD;
|
|
@@ -591,7 +562,7 @@ const server = http.createServer(async (req, res) => {
|
|
|
591
562
|
}
|
|
592
563
|
});
|
|
593
564
|
|
|
594
|
-
const MIME_TYPES = { '.html': 'text/html; charset=utf-8', '.js': 'application/javascript; charset=utf-8', '.css': 'text/css; charset=utf-8', '.json': 'application/json', '.png': 'image/png', '.jpg': 'image/jpeg', '.svg': 'image/svg+xml'
|
|
565
|
+
const MIME_TYPES = { '.html': 'text/html; charset=utf-8', '.js': 'application/javascript; charset=utf-8', '.css': 'text/css; charset=utf-8', '.json': 'application/json', '.png': 'image/png', '.jpg': 'image/jpeg', '.svg': 'image/svg+xml' };
|
|
595
566
|
|
|
596
567
|
function serveFile(filePath, res) {
|
|
597
568
|
const ext = path.extname(filePath).toLowerCase();
|
|
@@ -613,7 +584,7 @@ function serveFile(filePath, res) {
|
|
|
613
584
|
fs.readFile(filePath, (err, data) => {
|
|
614
585
|
if (err) { res.writeHead(500); res.end('Server error'); return; }
|
|
615
586
|
let content = data.toString();
|
|
616
|
-
const baseTag = `<script>window.__BASE_URL='${BASE_URL}';</script
|
|
587
|
+
const baseTag = `<script>window.__BASE_URL='${BASE_URL}';</script>`;
|
|
617
588
|
content = content.replace('<head>', '<head>\n ' + baseTag);
|
|
618
589
|
if (watch) {
|
|
619
590
|
content += `\n<script>(function(){const ws=new WebSocket('ws://'+location.host+'${BASE_URL}/hot-reload');ws.onmessage=e=>{if(JSON.parse(e.data).type==='reload')location.reload()};})();</script>`;
|
|
@@ -640,7 +611,7 @@ function persistChunkWithRetry(sessionId, conversationId, sequence, blockType, b
|
|
|
640
611
|
|
|
641
612
|
async function processMessageWithStreaming(conversationId, messageId, sessionId, content, agentId) {
|
|
642
613
|
const startTime = Date.now();
|
|
643
|
-
activeExecutions.set(conversationId,
|
|
614
|
+
activeExecutions.set(conversationId, { pid: null, startTime, sessionId });
|
|
644
615
|
queries.setIsStreaming(conversationId, true);
|
|
645
616
|
queries.updateSession(sessionId, { status: 'active' });
|
|
646
617
|
|
|
@@ -756,7 +727,11 @@ async function processMessageWithStreaming(conversationId, messageId, sessionId,
|
|
|
756
727
|
print: true,
|
|
757
728
|
resumeSessionId,
|
|
758
729
|
systemPrompt: SYSTEM_PROMPT,
|
|
759
|
-
onEvent
|
|
730
|
+
onEvent,
|
|
731
|
+
onPid: (pid) => {
|
|
732
|
+
const entry = activeExecutions.get(conversationId);
|
|
733
|
+
if (entry) entry.pid = pid;
|
|
734
|
+
}
|
|
760
735
|
};
|
|
761
736
|
|
|
762
737
|
const { outputs, sessionId: claudeSessionId } = await runClaudeWithStreaming(content, cwd, agentId || 'claude-code', config);
|
|
@@ -1030,6 +1005,66 @@ server.on('error', (err) => {
|
|
|
1030
1005
|
}
|
|
1031
1006
|
});
|
|
1032
1007
|
|
|
1008
|
+
function recoverStaleSessions() {
|
|
1009
|
+
try {
|
|
1010
|
+
const staleSessions = queries.getActiveSessions ? queries.getActiveSessions() : [];
|
|
1011
|
+
let recoveredCount = 0;
|
|
1012
|
+
for (const session of staleSessions) {
|
|
1013
|
+
if (!activeExecutions.has(session.conversationId)) {
|
|
1014
|
+
queries.updateSession(session.id, {
|
|
1015
|
+
status: 'error',
|
|
1016
|
+
error: 'Agent died unexpectedly (server restart)',
|
|
1017
|
+
completed_at: Date.now()
|
|
1018
|
+
});
|
|
1019
|
+
queries.setIsStreaming(session.conversationId, false);
|
|
1020
|
+
broadcastSync({
|
|
1021
|
+
type: 'streaming_error',
|
|
1022
|
+
sessionId: session.id,
|
|
1023
|
+
conversationId: session.conversationId,
|
|
1024
|
+
error: 'Agent died unexpectedly (server restart)',
|
|
1025
|
+
recoverable: false,
|
|
1026
|
+
timestamp: Date.now()
|
|
1027
|
+
});
|
|
1028
|
+
recoveredCount++;
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
if (recoveredCount > 0) {
|
|
1032
|
+
console.log(`[RECOVERY] Recovered ${recoveredCount} stale active session(s)`);
|
|
1033
|
+
}
|
|
1034
|
+
} catch (err) {
|
|
1035
|
+
console.error('[RECOVERY] Stale session recovery error:', err.message);
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
function performAgentHealthCheck() {
|
|
1040
|
+
for (const [conversationId, entry] of activeExecutions) {
|
|
1041
|
+
if (!entry || !entry.pid) continue;
|
|
1042
|
+
try {
|
|
1043
|
+
process.kill(entry.pid, 0);
|
|
1044
|
+
} catch (err) {
|
|
1045
|
+
debugLog(`[HEALTH] Agent PID ${entry.pid} for conv ${conversationId} is dead`);
|
|
1046
|
+
activeExecutions.delete(conversationId);
|
|
1047
|
+
queries.setIsStreaming(conversationId, false);
|
|
1048
|
+
if (entry.sessionId) {
|
|
1049
|
+
queries.updateSession(entry.sessionId, {
|
|
1050
|
+
status: 'error',
|
|
1051
|
+
error: 'Agent process died unexpectedly',
|
|
1052
|
+
completed_at: Date.now()
|
|
1053
|
+
});
|
|
1054
|
+
}
|
|
1055
|
+
broadcastSync({
|
|
1056
|
+
type: 'streaming_error',
|
|
1057
|
+
sessionId: entry.sessionId,
|
|
1058
|
+
conversationId,
|
|
1059
|
+
error: 'Agent process died unexpectedly',
|
|
1060
|
+
recoverable: false,
|
|
1061
|
+
timestamp: Date.now()
|
|
1062
|
+
});
|
|
1063
|
+
drainMessageQueue(conversationId);
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1033
1068
|
function onServerReady() {
|
|
1034
1069
|
console.log(`GMGUI running on http://localhost:${PORT}${BASE_URL}/`);
|
|
1035
1070
|
console.log(`Agents: ${discoveredAgents.map(a => a.name).join(', ') || 'none'}`);
|
|
@@ -1041,12 +1076,18 @@ function onServerReady() {
|
|
|
1041
1076
|
console.log(`Cleaned up ${deletedCount} empty conversation(s) on startup`);
|
|
1042
1077
|
}
|
|
1043
1078
|
|
|
1079
|
+
// Recover stale active sessions from previous run
|
|
1080
|
+
recoverStaleSessions();
|
|
1081
|
+
|
|
1044
1082
|
// Run auto-import immediately
|
|
1045
1083
|
performAutoImport();
|
|
1046
1084
|
|
|
1047
1085
|
// Then run it every 30 seconds (constant automatic importing)
|
|
1048
1086
|
setInterval(performAutoImport, 30000);
|
|
1049
1087
|
|
|
1088
|
+
// Agent health check every 30 seconds
|
|
1089
|
+
setInterval(performAgentHealthCheck, 30000);
|
|
1090
|
+
|
|
1050
1091
|
}
|
|
1051
1092
|
|
|
1052
1093
|
function performAutoImport() {
|
package/static/js/client.js
CHANGED
|
@@ -792,7 +792,13 @@ class AgentGUIClient {
|
|
|
792
792
|
const inputStr = JSON.stringify(block.input, null, 2);
|
|
793
793
|
inputHtml = `<details class="tool-input-details"><summary class="tool-input-summary">Input</summary><pre class="tool-input-pre">${this.escapeHtml(inputStr)}</pre></details>`;
|
|
794
794
|
}
|
|
795
|
-
|
|
795
|
+
const tn = block.name || 'unknown';
|
|
796
|
+
const foldable = tn.startsWith('mcp__') || tn === 'Edit';
|
|
797
|
+
if (foldable) {
|
|
798
|
+
html += `<details class="streaming-block-tool-use"><summary class="tool-use-header" style="cursor:pointer;user-select:none;list-style:none;"><span class="tool-use-icon">⚙</span> <span class="tool-use-name">${this.escapeHtml(tn)}</span></summary>${inputHtml}</details>`;
|
|
799
|
+
} else {
|
|
800
|
+
html += `<div class="streaming-block-tool-use"><div class="tool-use-header"><span class="tool-use-icon">⚙</span> <span class="tool-use-name">${this.escapeHtml(tn)}</span></div>${inputHtml}</div>`;
|
|
801
|
+
}
|
|
796
802
|
} else if (block.type === 'tool_result') {
|
|
797
803
|
const content = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
798
804
|
const smartHtml = typeof StreamingRenderer !== 'undefined' ? StreamingRenderer.renderSmartContentHTML(content, this.escapeHtml.bind(this)) : `<pre class="tool-result-pre">${this.escapeHtml(content.length > 2000 ? content.substring(0, 2000) + '\n... (truncated)' : content)}</pre>`;
|
|
@@ -1433,7 +1439,13 @@ class AgentGUIClient {
|
|
|
1433
1439
|
const inputStr = JSON.stringify(block.input, null, 2);
|
|
1434
1440
|
inputHtml = `<details class="tool-input-details"><summary class="tool-input-summary">Input</summary><pre class="tool-input-pre">${this.escapeHtml(inputStr)}</pre></details>`;
|
|
1435
1441
|
}
|
|
1436
|
-
|
|
1442
|
+
const tn2 = block.name || 'unknown';
|
|
1443
|
+
const foldable2 = tn2.startsWith('mcp__') || tn2 === 'Edit';
|
|
1444
|
+
if (foldable2) {
|
|
1445
|
+
contentHtml += `<details class="streaming-block-tool-use"><summary class="tool-use-header" style="cursor:pointer;user-select:none;list-style:none;"><span class="tool-use-icon">⚙</span> <span class="tool-use-name">${this.escapeHtml(tn2)}</span></summary>${inputHtml}</details>`;
|
|
1446
|
+
} else {
|
|
1447
|
+
contentHtml += `<div class="streaming-block-tool-use"><div class="tool-use-header"><span class="tool-use-icon">⚙</span> <span class="tool-use-name">${this.escapeHtml(tn2)}</span></div>${inputHtml}</div>`;
|
|
1448
|
+
}
|
|
1437
1449
|
} else if (block.type === 'tool_result') {
|
|
1438
1450
|
const content = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
1439
1451
|
const smartHtml = typeof StreamingRenderer !== 'undefined' ? StreamingRenderer.renderSmartContentHTML(content, this.escapeHtml.bind(this)) : `<pre class="tool-result-pre">${this.escapeHtml(content.length > 2000 ? content.substring(0, 2000) + '\n... (truncated)' : content)}</pre>`;
|
|
@@ -624,12 +624,31 @@ class StreamingRenderer {
|
|
|
624
624
|
* Render tool use block with smart parameter display
|
|
625
625
|
*/
|
|
626
626
|
renderBlockToolUse(block, context) {
|
|
627
|
-
const div = document.createElement('div');
|
|
628
|
-
div.className = 'block-tool-use';
|
|
629
|
-
|
|
630
627
|
const toolName = block.name || 'unknown';
|
|
631
628
|
const input = block.input || {};
|
|
629
|
+
const shouldFold = toolName.startsWith('mcp__') || toolName === 'Edit';
|
|
630
|
+
|
|
631
|
+
if (shouldFold) {
|
|
632
|
+
const details = document.createElement('details');
|
|
633
|
+
details.className = 'block-tool-use';
|
|
634
|
+
const summary = document.createElement('summary');
|
|
635
|
+
summary.className = 'tool-header';
|
|
636
|
+
summary.style.cssText = 'cursor:pointer;user-select:none;list-style:none;';
|
|
637
|
+
summary.innerHTML = `
|
|
638
|
+
<span class="tool-icon">${this.getToolIcon(toolName)}</span>
|
|
639
|
+
<span class="tool-name"><code>${this.escapeHtml(toolName)}</code></span>
|
|
640
|
+
`;
|
|
641
|
+
details.appendChild(summary);
|
|
642
|
+
if (Object.keys(input).length > 0) {
|
|
643
|
+
const paramsDiv = document.createElement('div');
|
|
644
|
+
paramsDiv.innerHTML = this.renderSmartParams(toolName, input);
|
|
645
|
+
details.appendChild(paramsDiv);
|
|
646
|
+
}
|
|
647
|
+
return details;
|
|
648
|
+
}
|
|
632
649
|
|
|
650
|
+
const div = document.createElement('div');
|
|
651
|
+
div.className = 'block-tool-use';
|
|
633
652
|
div.innerHTML = `
|
|
634
653
|
<div class="tool-header">
|
|
635
654
|
<span class="tool-icon">${this.getToolIcon(toolName)}</span>
|
|
@@ -637,7 +656,6 @@ class StreamingRenderer {
|
|
|
637
656
|
</div>
|
|
638
657
|
${Object.keys(input).length > 0 ? this.renderSmartParams(toolName, input) : ''}
|
|
639
658
|
`;
|
|
640
|
-
|
|
641
659
|
return div;
|
|
642
660
|
}
|
|
643
661
|
|
package/static/js/voice.js
CHANGED
|
@@ -1,167 +1,24 @@
|
|
|
1
1
|
(function() {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
var BASE = window.__BASE_URL || '';
|
|
3
|
+
var isRecording = false;
|
|
4
|
+
var ttsEnabled = true;
|
|
5
|
+
var voiceActive = false;
|
|
6
|
+
var lastSpokenBlockIndex = -1;
|
|
7
|
+
var currentConversationId = null;
|
|
8
|
+
var speechQueue = [];
|
|
9
|
+
var isSpeaking = false;
|
|
10
|
+
var currentAudio = null;
|
|
11
|
+
var mediaStream = null;
|
|
12
|
+
var audioContext = null;
|
|
13
|
+
var scriptNode = null;
|
|
14
|
+
var recordedChunks = [];
|
|
15
|
+
var TARGET_SAMPLE_RATE = 16000;
|
|
5
16
|
|
|
6
|
-
|
|
7
|
-
try {
|
|
8
|
-
const mod = await import(BASE + '/webtalk/sdk.js');
|
|
9
|
-
STT = mod.STT;
|
|
10
|
-
TTS = mod.TTS;
|
|
11
|
-
return true;
|
|
12
|
-
} catch (e) {
|
|
13
|
-
console.warn('Webtalk SDK load failed:', e.message);
|
|
14
|
-
return false;
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
let stt = null;
|
|
18
|
-
let tts = null;
|
|
19
|
-
let isRecording = false;
|
|
20
|
-
let ttsEnabled = true;
|
|
21
|
-
let voiceActive = false;
|
|
22
|
-
let lastSpokenBlockIndex = -1;
|
|
23
|
-
let currentConversationId = null;
|
|
24
|
-
let sttReady = false;
|
|
25
|
-
let ttsReady = false;
|
|
26
|
-
let speechQueue = [];
|
|
27
|
-
let isSpeaking = false;
|
|
28
|
-
|
|
29
|
-
async function init() {
|
|
17
|
+
function init() {
|
|
30
18
|
setupTTSToggle();
|
|
31
19
|
setupUI();
|
|
32
20
|
setupStreamingListener();
|
|
33
21
|
setupAgentSelector();
|
|
34
|
-
var sdkLoaded = await loadSDK();
|
|
35
|
-
if (sdkLoaded) {
|
|
36
|
-
initSTT();
|
|
37
|
-
initTTS();
|
|
38
|
-
} else {
|
|
39
|
-
sttLoadPhase = 'failed';
|
|
40
|
-
updateMicState();
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
var sttLoadPhase = 'starting';
|
|
45
|
-
|
|
46
|
-
async function initSTT() {
|
|
47
|
-
try {
|
|
48
|
-
stt = new STT({
|
|
49
|
-
basePath: BASE + '/webtalk',
|
|
50
|
-
onTranscript: function(text) {
|
|
51
|
-
var el = document.getElementById('voiceTranscript');
|
|
52
|
-
if (el) {
|
|
53
|
-
el.textContent = text;
|
|
54
|
-
el.setAttribute('data-final', text);
|
|
55
|
-
}
|
|
56
|
-
},
|
|
57
|
-
onPartial: function(text) {
|
|
58
|
-
var el = document.getElementById('voiceTranscript');
|
|
59
|
-
if (el) {
|
|
60
|
-
var existing = el.getAttribute('data-final') || '';
|
|
61
|
-
el.textContent = existing + text;
|
|
62
|
-
}
|
|
63
|
-
},
|
|
64
|
-
onStatus: function(status) {
|
|
65
|
-
var micBtn = document.getElementById('voiceMicBtn');
|
|
66
|
-
if (!micBtn) return;
|
|
67
|
-
if (status === 'recording') {
|
|
68
|
-
micBtn.classList.add('recording');
|
|
69
|
-
} else {
|
|
70
|
-
micBtn.classList.remove('recording');
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
});
|
|
74
|
-
var origInit = stt.init.bind(stt);
|
|
75
|
-
var initPromise = new Promise(function(resolve, reject) {
|
|
76
|
-
origInit().then(resolve).catch(reject);
|
|
77
|
-
if (stt.worker) {
|
|
78
|
-
var origHandler = stt.worker.onmessage;
|
|
79
|
-
stt.worker.onmessage = function(e) {
|
|
80
|
-
var msg = e.data;
|
|
81
|
-
if (msg && msg.status) {
|
|
82
|
-
if (msg.status === 'progress' || msg.status === 'download') {
|
|
83
|
-
if (sttLoadPhase !== 'downloading') {
|
|
84
|
-
sttLoadPhase = 'downloading';
|
|
85
|
-
updateMicState();
|
|
86
|
-
}
|
|
87
|
-
} else if (msg.status === 'done' && msg.file && msg.file.endsWith('.onnx')) {
|
|
88
|
-
sttLoadPhase = 'compiling';
|
|
89
|
-
updateMicState();
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
if (origHandler) origHandler.call(stt.worker, e);
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
});
|
|
96
|
-
await initPromise;
|
|
97
|
-
sttReady = true;
|
|
98
|
-
updateMicState();
|
|
99
|
-
} catch (e) {
|
|
100
|
-
console.warn('STT init failed:', e.message);
|
|
101
|
-
sttLoadPhase = 'failed';
|
|
102
|
-
updateMicState();
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function updateMicState() {
|
|
107
|
-
var micBtn = document.getElementById('voiceMicBtn');
|
|
108
|
-
if (!micBtn) return;
|
|
109
|
-
if (sttReady) {
|
|
110
|
-
micBtn.removeAttribute('disabled');
|
|
111
|
-
micBtn.title = 'Click to record';
|
|
112
|
-
micBtn.classList.remove('loading');
|
|
113
|
-
} else if (sttLoadPhase === 'failed') {
|
|
114
|
-
micBtn.setAttribute('disabled', 'true');
|
|
115
|
-
micBtn.title = 'Speech recognition failed to load';
|
|
116
|
-
micBtn.classList.remove('loading');
|
|
117
|
-
} else {
|
|
118
|
-
micBtn.setAttribute('disabled', 'true');
|
|
119
|
-
micBtn.classList.add('loading');
|
|
120
|
-
if (sttLoadPhase === 'downloading') {
|
|
121
|
-
micBtn.title = 'Downloading speech models...';
|
|
122
|
-
} else if (sttLoadPhase === 'compiling') {
|
|
123
|
-
micBtn.title = 'Compiling speech models (may take a minute)...';
|
|
124
|
-
} else {
|
|
125
|
-
micBtn.title = 'Loading speech recognition...';
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
async function initTTS(retries) {
|
|
131
|
-
var maxRetries = retries || 3;
|
|
132
|
-
for (var attempt = 0; attempt < maxRetries; attempt++) {
|
|
133
|
-
try {
|
|
134
|
-
tts = new TTS({
|
|
135
|
-
basePath: BASE + '/webtalk',
|
|
136
|
-
apiBasePath: BASE,
|
|
137
|
-
onStatus: function() {},
|
|
138
|
-
onAudioReady: function(url) {
|
|
139
|
-
var audio = new Audio(url);
|
|
140
|
-
audio.onended = function() {
|
|
141
|
-
isSpeaking = false;
|
|
142
|
-
processQueue();
|
|
143
|
-
};
|
|
144
|
-
audio.onerror = function() {
|
|
145
|
-
isSpeaking = false;
|
|
146
|
-
processQueue();
|
|
147
|
-
};
|
|
148
|
-
audio.play().catch(function() {
|
|
149
|
-
isSpeaking = false;
|
|
150
|
-
processQueue();
|
|
151
|
-
});
|
|
152
|
-
}
|
|
153
|
-
});
|
|
154
|
-
await tts.init();
|
|
155
|
-
ttsReady = true;
|
|
156
|
-
return;
|
|
157
|
-
} catch (e) {
|
|
158
|
-
console.warn('TTS init attempt ' + (attempt + 1) + '/' + maxRetries + ' failed:', e.message);
|
|
159
|
-
tts = null;
|
|
160
|
-
if (attempt < maxRetries - 1) {
|
|
161
|
-
await new Promise(function(r) { setTimeout(r, 3000 * (attempt + 1)); });
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
22
|
}
|
|
166
23
|
|
|
167
24
|
function setupAgentSelector() {
|
|
@@ -203,6 +60,8 @@
|
|
|
203
60
|
function setupUI() {
|
|
204
61
|
var micBtn = document.getElementById('voiceMicBtn');
|
|
205
62
|
if (micBtn) {
|
|
63
|
+
micBtn.removeAttribute('disabled');
|
|
64
|
+
micBtn.title = 'Click to record';
|
|
206
65
|
micBtn.addEventListener('click', function(e) {
|
|
207
66
|
e.preventDefault();
|
|
208
67
|
if (!isRecording) {
|
|
@@ -216,43 +75,104 @@
|
|
|
216
75
|
if (sendBtn) {
|
|
217
76
|
sendBtn.addEventListener('click', sendVoiceMessage);
|
|
218
77
|
}
|
|
219
|
-
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function resampleBuffer(inputBuffer, fromRate, toRate) {
|
|
81
|
+
if (fromRate === toRate) return inputBuffer;
|
|
82
|
+
var ratio = fromRate / toRate;
|
|
83
|
+
var newLen = Math.round(inputBuffer.length / ratio);
|
|
84
|
+
var result = new Float32Array(newLen);
|
|
85
|
+
for (var i = 0; i < newLen; i++) {
|
|
86
|
+
var srcIdx = i * ratio;
|
|
87
|
+
var lo = Math.floor(srcIdx);
|
|
88
|
+
var hi = Math.min(lo + 1, inputBuffer.length - 1);
|
|
89
|
+
var frac = srcIdx - lo;
|
|
90
|
+
result[i] = inputBuffer[lo] * (1 - frac) + inputBuffer[hi] * frac;
|
|
91
|
+
}
|
|
92
|
+
return result;
|
|
220
93
|
}
|
|
221
94
|
|
|
222
95
|
async function startRecording() {
|
|
223
96
|
if (isRecording) return;
|
|
224
97
|
var el = document.getElementById('voiceTranscript');
|
|
225
|
-
if (!stt || !sttReady) {
|
|
226
|
-
if (el) el.textContent = 'Speech recognition still loading, please wait...';
|
|
227
|
-
return;
|
|
228
|
-
}
|
|
229
98
|
if (el) {
|
|
230
99
|
el.textContent = '';
|
|
231
100
|
el.setAttribute('data-final', '');
|
|
232
101
|
}
|
|
233
|
-
isRecording = true;
|
|
234
102
|
try {
|
|
235
|
-
await
|
|
103
|
+
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
104
|
+
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
105
|
+
var source = audioContext.createMediaStreamSource(mediaStream);
|
|
106
|
+
scriptNode = audioContext.createScriptProcessor(4096, 1, 1);
|
|
107
|
+
recordedChunks = [];
|
|
108
|
+
scriptNode.onaudioprocess = function(e) {
|
|
109
|
+
var data = e.inputBuffer.getChannelData(0);
|
|
110
|
+
recordedChunks.push(new Float32Array(data));
|
|
111
|
+
};
|
|
112
|
+
source.connect(scriptNode);
|
|
113
|
+
scriptNode.connect(audioContext.destination);
|
|
114
|
+
isRecording = true;
|
|
115
|
+
var micBtn = document.getElementById('voiceMicBtn');
|
|
116
|
+
if (micBtn) micBtn.classList.add('recording');
|
|
236
117
|
} catch (e) {
|
|
237
118
|
isRecording = false;
|
|
238
119
|
if (el) el.textContent = 'Mic access denied or unavailable: ' + e.message;
|
|
239
|
-
console.warn('Recording start failed:', e.message);
|
|
240
120
|
}
|
|
241
121
|
}
|
|
242
122
|
|
|
243
123
|
async function stopRecording() {
|
|
244
|
-
if (!
|
|
124
|
+
if (!isRecording) return;
|
|
245
125
|
isRecording = false;
|
|
126
|
+
var micBtn = document.getElementById('voiceMicBtn');
|
|
127
|
+
if (micBtn) micBtn.classList.remove('recording');
|
|
128
|
+
var el = document.getElementById('voiceTranscript');
|
|
129
|
+
if (scriptNode) { scriptNode.disconnect(); scriptNode = null; }
|
|
130
|
+
if (mediaStream) {
|
|
131
|
+
mediaStream.getTracks().forEach(function(t) { t.stop(); });
|
|
132
|
+
mediaStream = null;
|
|
133
|
+
}
|
|
134
|
+
var sourceSampleRate = audioContext ? audioContext.sampleRate : 48000;
|
|
135
|
+
if (audioContext) { audioContext.close().catch(function() {}); audioContext = null; }
|
|
136
|
+
if (recordedChunks.length === 0) return;
|
|
137
|
+
var totalLen = 0;
|
|
138
|
+
for (var i = 0; i < recordedChunks.length; i++) totalLen += recordedChunks[i].length;
|
|
139
|
+
var merged = new Float32Array(totalLen);
|
|
140
|
+
var offset = 0;
|
|
141
|
+
for (var j = 0; j < recordedChunks.length; j++) {
|
|
142
|
+
merged.set(recordedChunks[j], offset);
|
|
143
|
+
offset += recordedChunks[j].length;
|
|
144
|
+
}
|
|
145
|
+
recordedChunks = [];
|
|
146
|
+
var resampled = resampleBuffer(merged, sourceSampleRate, TARGET_SAMPLE_RATE);
|
|
147
|
+
if (el) el.textContent = 'Transcribing...';
|
|
246
148
|
try {
|
|
247
|
-
|
|
248
|
-
|
|
149
|
+
var pcmBuffer = resampled.buffer;
|
|
150
|
+
var resp = await fetch(BASE + '/api/stt', {
|
|
151
|
+
method: 'POST',
|
|
152
|
+
headers: { 'Content-Type': 'application/octet-stream' },
|
|
153
|
+
body: pcmBuffer
|
|
154
|
+
});
|
|
155
|
+
var data = await resp.json();
|
|
156
|
+
if (data.text) {
|
|
157
|
+
if (el) {
|
|
158
|
+
el.textContent = data.text;
|
|
159
|
+
el.setAttribute('data-final', data.text);
|
|
160
|
+
}
|
|
161
|
+
} else if (data.error) {
|
|
162
|
+
if (el) el.textContent = 'Error: ' + data.error;
|
|
163
|
+
} else {
|
|
164
|
+
if (el) el.textContent = '';
|
|
165
|
+
}
|
|
166
|
+
} catch (e) {
|
|
167
|
+
if (el) el.textContent = 'Transcription failed: ' + e.message;
|
|
168
|
+
}
|
|
249
169
|
}
|
|
250
170
|
|
|
251
171
|
function sendVoiceMessage() {
|
|
252
172
|
var el = document.getElementById('voiceTranscript');
|
|
253
173
|
if (!el) return;
|
|
254
174
|
var text = el.textContent.trim();
|
|
255
|
-
if (!text) return;
|
|
175
|
+
if (!text || text.startsWith('Transcribing') || text.startsWith('Error')) return;
|
|
256
176
|
addVoiceBlock(text, true);
|
|
257
177
|
el.textContent = '';
|
|
258
178
|
el.setAttribute('data-final', '');
|
|
@@ -266,7 +186,7 @@
|
|
|
266
186
|
}
|
|
267
187
|
|
|
268
188
|
function speak(text) {
|
|
269
|
-
if (!ttsEnabled
|
|
189
|
+
if (!ttsEnabled) return;
|
|
270
190
|
var clean = text.replace(/<[^>]*>/g, '').trim();
|
|
271
191
|
if (!clean) return;
|
|
272
192
|
speechQueue.push(clean);
|
|
@@ -277,7 +197,35 @@
|
|
|
277
197
|
if (isSpeaking || speechQueue.length === 0) return;
|
|
278
198
|
isSpeaking = true;
|
|
279
199
|
var text = speechQueue.shift();
|
|
280
|
-
|
|
200
|
+
fetch(BASE + '/api/tts', {
|
|
201
|
+
method: 'POST',
|
|
202
|
+
headers: { 'Content-Type': 'application/json' },
|
|
203
|
+
body: JSON.stringify({ text: text })
|
|
204
|
+
}).then(function(resp) {
|
|
205
|
+
if (!resp.ok) throw new Error('TTS failed');
|
|
206
|
+
return resp.blob();
|
|
207
|
+
}).then(function(blob) {
|
|
208
|
+
var url = URL.createObjectURL(blob);
|
|
209
|
+
currentAudio = new Audio(url);
|
|
210
|
+
currentAudio.onended = function() {
|
|
211
|
+
URL.revokeObjectURL(url);
|
|
212
|
+
currentAudio = null;
|
|
213
|
+
isSpeaking = false;
|
|
214
|
+
processQueue();
|
|
215
|
+
};
|
|
216
|
+
currentAudio.onerror = function() {
|
|
217
|
+
URL.revokeObjectURL(url);
|
|
218
|
+
currentAudio = null;
|
|
219
|
+
isSpeaking = false;
|
|
220
|
+
processQueue();
|
|
221
|
+
};
|
|
222
|
+
currentAudio.play().catch(function() {
|
|
223
|
+
URL.revokeObjectURL(url);
|
|
224
|
+
currentAudio = null;
|
|
225
|
+
isSpeaking = false;
|
|
226
|
+
processQueue();
|
|
227
|
+
});
|
|
228
|
+
}).catch(function() {
|
|
281
229
|
isSpeaking = false;
|
|
282
230
|
processQueue();
|
|
283
231
|
});
|
|
@@ -286,7 +234,10 @@
|
|
|
286
234
|
function stopSpeaking() {
|
|
287
235
|
speechQueue = [];
|
|
288
236
|
isSpeaking = false;
|
|
289
|
-
if (
|
|
237
|
+
if (currentAudio) {
|
|
238
|
+
currentAudio.pause();
|
|
239
|
+
currentAudio = null;
|
|
240
|
+
}
|
|
290
241
|
}
|
|
291
242
|
|
|
292
243
|
function addVoiceBlock(text, isUser) {
|