agentgui 1.0.173 → 1.0.175
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +14 -2
- package/package.json +1 -1
- package/server.js +20 -6
- package/static/index.html +20 -20
- package/static/js/voice.js +35 -8
package/lib/speech.js
CHANGED
|
@@ -115,6 +115,8 @@ let speakerEmbeddingPipeline = null;
|
|
|
115
115
|
let sttLoading = false;
|
|
116
116
|
let ttsLoading = false;
|
|
117
117
|
let speakerEmbeddingLoading = false;
|
|
118
|
+
let ttsLoadError = null;
|
|
119
|
+
let sttLoadError = null;
|
|
118
120
|
const voiceEmbeddingsCache = new Map();
|
|
119
121
|
const SAMPLE_RATE_STT = 16000;
|
|
120
122
|
const SAMPLE_RATE_TTS = 16000;
|
|
@@ -261,8 +263,10 @@ async function generateEmbeddingFromCustomVoice(voiceId) {
|
|
|
261
263
|
|
|
262
264
|
async function getSTT() {
|
|
263
265
|
if (sttPipeline) return sttPipeline;
|
|
266
|
+
if (sttLoadError) throw sttLoadError;
|
|
264
267
|
if (sttLoading) {
|
|
265
268
|
while (sttLoading) await new Promise(r => setTimeout(r, 100));
|
|
269
|
+
if (sttLoadError) throw sttLoadError;
|
|
266
270
|
if (!sttPipeline) throw new Error('STT pipeline failed to load');
|
|
267
271
|
return sttPipeline;
|
|
268
272
|
}
|
|
@@ -278,10 +282,12 @@ async function getSTT() {
|
|
|
278
282
|
device: 'cpu',
|
|
279
283
|
local_files_only: isLocal,
|
|
280
284
|
});
|
|
285
|
+
sttLoadError = null;
|
|
281
286
|
return sttPipeline;
|
|
282
287
|
} catch (err) {
|
|
283
288
|
sttPipeline = null;
|
|
284
|
-
|
|
289
|
+
sttLoadError = new Error('STT model load failed: ' + err.message);
|
|
290
|
+
throw sttLoadError;
|
|
285
291
|
} finally {
|
|
286
292
|
sttLoading = false;
|
|
287
293
|
}
|
|
@@ -289,8 +295,10 @@ async function getSTT() {
|
|
|
289
295
|
|
|
290
296
|
async function getTTS() {
|
|
291
297
|
if (ttsPipeline) return ttsPipeline;
|
|
298
|
+
if (ttsLoadError) throw ttsLoadError;
|
|
292
299
|
if (ttsLoading) {
|
|
293
300
|
while (ttsLoading) await new Promise(r => setTimeout(r, 100));
|
|
301
|
+
if (ttsLoadError) throw ttsLoadError;
|
|
294
302
|
if (!ttsPipeline) throw new Error('TTS pipeline failed to load');
|
|
295
303
|
return ttsPipeline;
|
|
296
304
|
}
|
|
@@ -303,10 +311,12 @@ async function getTTS() {
|
|
|
303
311
|
dtype: 'fp32',
|
|
304
312
|
});
|
|
305
313
|
await ensureSpeakerEmbeddings();
|
|
314
|
+
ttsLoadError = null;
|
|
306
315
|
return ttsPipeline;
|
|
307
316
|
} catch (err) {
|
|
308
317
|
ttsPipeline = null;
|
|
309
|
-
|
|
318
|
+
ttsLoadError = new Error('TTS model load failed: ' + err.message);
|
|
319
|
+
throw ttsLoadError;
|
|
310
320
|
} finally {
|
|
311
321
|
ttsLoading = false;
|
|
312
322
|
}
|
|
@@ -482,6 +492,8 @@ function getStatus() {
|
|
|
482
492
|
ttsReady: !!ttsPipeline,
|
|
483
493
|
sttLoading,
|
|
484
494
|
ttsLoading,
|
|
495
|
+
sttError: sttLoadError ? sttLoadError.message : null,
|
|
496
|
+
ttsError: ttsLoadError ? ttsLoadError.message : null,
|
|
485
497
|
};
|
|
486
498
|
}
|
|
487
499
|
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -554,13 +554,20 @@ const server = http.createServer(async (req, res) => {
|
|
|
554
554
|
sendJSON(req, res, 400, { error: 'No text provided' });
|
|
555
555
|
return;
|
|
556
556
|
}
|
|
557
|
-
const
|
|
558
|
-
const
|
|
557
|
+
const speech = await getSpeech();
|
|
558
|
+
const status = speech.getStatus();
|
|
559
|
+
if (status.ttsError) {
|
|
560
|
+
sendJSON(req, res, 503, { error: status.ttsError, retryable: false });
|
|
561
|
+
return;
|
|
562
|
+
}
|
|
563
|
+
const wavBuffer = await speech.synthesize(text, voiceId);
|
|
559
564
|
res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
|
|
560
565
|
res.end(wavBuffer);
|
|
561
566
|
} catch (err) {
|
|
562
567
|
debugLog('[TTS] Error: ' + err.message);
|
|
563
|
-
|
|
568
|
+
const isModelError = /model.*load|pipeline.*failed|failed to load/i.test(err.message);
|
|
569
|
+
const statusCode = isModelError ? 503 : 500;
|
|
570
|
+
if (!res.headersSent) sendJSON(req, res, statusCode, { error: err.message || 'TTS failed', retryable: !isModelError });
|
|
564
571
|
}
|
|
565
572
|
return;
|
|
566
573
|
}
|
|
@@ -574,14 +581,19 @@ const server = http.createServer(async (req, res) => {
|
|
|
574
581
|
sendJSON(req, res, 400, { error: 'No text provided' });
|
|
575
582
|
return;
|
|
576
583
|
}
|
|
577
|
-
const
|
|
584
|
+
const speech = await getSpeech();
|
|
585
|
+
const status = speech.getStatus();
|
|
586
|
+
if (status.ttsError) {
|
|
587
|
+
sendJSON(req, res, 503, { error: status.ttsError, retryable: false });
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
578
590
|
res.writeHead(200, {
|
|
579
591
|
'Content-Type': 'application/octet-stream',
|
|
580
592
|
'Transfer-Encoding': 'chunked',
|
|
581
593
|
'X-Content-Type': 'audio/wav-stream',
|
|
582
594
|
'Cache-Control': 'no-cache'
|
|
583
595
|
});
|
|
584
|
-
for await (const wavChunk of synthesizeStream(text, voiceId)) {
|
|
596
|
+
for await (const wavChunk of speech.synthesizeStream(text, voiceId)) {
|
|
585
597
|
const lenBuf = Buffer.alloc(4);
|
|
586
598
|
lenBuf.writeUInt32BE(wavChunk.length, 0);
|
|
587
599
|
res.write(lenBuf);
|
|
@@ -590,7 +602,9 @@ const server = http.createServer(async (req, res) => {
|
|
|
590
602
|
res.end();
|
|
591
603
|
} catch (err) {
|
|
592
604
|
debugLog('[TTS-STREAM] Error: ' + err.message);
|
|
593
|
-
|
|
605
|
+
const isModelError = /model.*load|pipeline.*failed|failed to load/i.test(err.message);
|
|
606
|
+
const statusCode = isModelError ? 503 : 500;
|
|
607
|
+
if (!res.headersSent) sendJSON(req, res, statusCode, { error: err.message || 'TTS stream failed', retryable: !isModelError });
|
|
594
608
|
else res.end();
|
|
595
609
|
}
|
|
596
610
|
return;
|
package/static/index.html
CHANGED
|
@@ -415,7 +415,7 @@
|
|
|
415
415
|
|
|
416
416
|
/* --- Messages --- */
|
|
417
417
|
.message {
|
|
418
|
-
margin-bottom: 0
|
|
418
|
+
margin-bottom: 0;
|
|
419
419
|
padding: 0.5rem 0.75rem;
|
|
420
420
|
border-radius: 0.75rem;
|
|
421
421
|
max-width: 85%;
|
|
@@ -466,7 +466,7 @@
|
|
|
466
466
|
.message-blocks {
|
|
467
467
|
display: flex;
|
|
468
468
|
flex-direction: column;
|
|
469
|
-
gap: 0.
|
|
469
|
+
gap: 0.125rem;
|
|
470
470
|
}
|
|
471
471
|
|
|
472
472
|
.message-text {
|
|
@@ -503,7 +503,7 @@
|
|
|
503
503
|
/* --- Streaming block types --- */
|
|
504
504
|
.streaming-block-system {
|
|
505
505
|
padding: 0.375rem 0.75rem;
|
|
506
|
-
margin: 0
|
|
506
|
+
margin: 0;
|
|
507
507
|
background: rgba(59,130,246,0.08);
|
|
508
508
|
border-radius: 0.375rem;
|
|
509
509
|
font-size: 0.8rem;
|
|
@@ -558,7 +558,7 @@
|
|
|
558
558
|
html.dark .tool-input-pre { background: rgba(255,255,255,0.03); }
|
|
559
559
|
|
|
560
560
|
.streaming-block-tool-result {
|
|
561
|
-
margin: 0
|
|
561
|
+
margin: 0;
|
|
562
562
|
border-radius: 0.375rem;
|
|
563
563
|
background: var(--color-bg-code);
|
|
564
564
|
overflow: hidden;
|
|
@@ -601,7 +601,7 @@
|
|
|
601
601
|
|
|
602
602
|
.streaming-block-result {
|
|
603
603
|
padding: 0.375rem 0.75rem;
|
|
604
|
-
margin: 0
|
|
604
|
+
margin: 0;
|
|
605
605
|
border-radius: 0.375rem;
|
|
606
606
|
background: rgba(16,185,129,0.08);
|
|
607
607
|
font-size: 0.8rem;
|
|
@@ -1149,7 +1149,7 @@
|
|
|
1149
1149
|
|
|
1150
1150
|
/* ===== STREAMING BLOCK STYLES ===== */
|
|
1151
1151
|
.block-text {
|
|
1152
|
-
margin-bottom: 0
|
|
1152
|
+
margin-bottom: 0;
|
|
1153
1153
|
padding: 0.5rem 0.75rem;
|
|
1154
1154
|
background: var(--color-bg-primary);
|
|
1155
1155
|
border-radius: 0.5rem;
|
|
@@ -1171,7 +1171,7 @@
|
|
|
1171
1171
|
}
|
|
1172
1172
|
|
|
1173
1173
|
.block-code {
|
|
1174
|
-
margin-bottom: 0
|
|
1174
|
+
margin-bottom: 0;
|
|
1175
1175
|
border-radius: 0.5rem;
|
|
1176
1176
|
overflow: hidden;
|
|
1177
1177
|
}
|
|
@@ -1218,7 +1218,7 @@
|
|
|
1218
1218
|
}
|
|
1219
1219
|
|
|
1220
1220
|
.block-thinking {
|
|
1221
|
-
margin-bottom: 0
|
|
1221
|
+
margin-bottom: 0;
|
|
1222
1222
|
border-radius: 0.5rem;
|
|
1223
1223
|
background: #f5f3ff;
|
|
1224
1224
|
overflow: hidden;
|
|
@@ -1255,7 +1255,7 @@
|
|
|
1255
1255
|
|
|
1256
1256
|
/* --- Tool Use Block --- */
|
|
1257
1257
|
.block-tool-use {
|
|
1258
|
-
margin-bottom: 0
|
|
1258
|
+
margin-bottom: 0;
|
|
1259
1259
|
border-radius: 0.5rem;
|
|
1260
1260
|
background: #ecfeff;
|
|
1261
1261
|
overflow: hidden;
|
|
@@ -1497,7 +1497,7 @@
|
|
|
1497
1497
|
|
|
1498
1498
|
/* --- Folded Tool Use (compact success-style bar) --- */
|
|
1499
1499
|
.folded-tool {
|
|
1500
|
-
margin: 0
|
|
1500
|
+
margin: 0;
|
|
1501
1501
|
border-radius: 0.375rem;
|
|
1502
1502
|
overflow: hidden;
|
|
1503
1503
|
background: #f0fdf4;
|
|
@@ -1681,7 +1681,7 @@
|
|
|
1681
1681
|
|
|
1682
1682
|
/* --- Collapsible Code Summary --- */
|
|
1683
1683
|
.collapsible-code {
|
|
1684
|
-
margin: 0
|
|
1684
|
+
margin: 0;
|
|
1685
1685
|
border-radius: 0.375rem;
|
|
1686
1686
|
overflow: hidden;
|
|
1687
1687
|
background: #1e293b;
|
|
@@ -1736,7 +1736,7 @@
|
|
|
1736
1736
|
|
|
1737
1737
|
/* --- Tool Result Block --- */
|
|
1738
1738
|
.block-tool-result {
|
|
1739
|
-
margin-bottom: 0
|
|
1739
|
+
margin-bottom: 0;
|
|
1740
1740
|
border-radius: 0.5rem;
|
|
1741
1741
|
overflow: hidden;
|
|
1742
1742
|
}
|
|
@@ -1822,7 +1822,7 @@
|
|
|
1822
1822
|
|
|
1823
1823
|
/* --- Result Summary Block --- */
|
|
1824
1824
|
.block-result {
|
|
1825
|
-
margin-bottom: 0
|
|
1825
|
+
margin-bottom: 0;
|
|
1826
1826
|
border-radius: 0.5rem;
|
|
1827
1827
|
overflow: hidden;
|
|
1828
1828
|
}
|
|
@@ -1882,7 +1882,7 @@
|
|
|
1882
1882
|
|
|
1883
1883
|
/* --- System Block --- */
|
|
1884
1884
|
.block-system {
|
|
1885
|
-
margin-bottom: 0
|
|
1885
|
+
margin-bottom: 0;
|
|
1886
1886
|
border-radius: 0.5rem;
|
|
1887
1887
|
background: #eef2ff;
|
|
1888
1888
|
overflow: hidden;
|
|
@@ -1939,7 +1939,7 @@
|
|
|
1939
1939
|
|
|
1940
1940
|
/* --- Bash Block --- */
|
|
1941
1941
|
.block-bash {
|
|
1942
|
-
margin-bottom: 0
|
|
1942
|
+
margin-bottom: 0;
|
|
1943
1943
|
border-radius: 0.5rem;
|
|
1944
1944
|
overflow: hidden;
|
|
1945
1945
|
background: #111827;
|
|
@@ -1973,7 +1973,7 @@
|
|
|
1973
1973
|
|
|
1974
1974
|
/* --- Generic Block --- */
|
|
1975
1975
|
.block-generic {
|
|
1976
|
-
margin-bottom: 0
|
|
1976
|
+
margin-bottom: 0;
|
|
1977
1977
|
padding: 0.5rem 0.75rem;
|
|
1978
1978
|
border-radius: 0.5rem;
|
|
1979
1979
|
background: var(--color-bg-secondary);
|
|
@@ -2016,7 +2016,7 @@
|
|
|
2016
2016
|
|
|
2017
2017
|
/* --- Error Block --- */
|
|
2018
2018
|
.block-error {
|
|
2019
|
-
margin-bottom: 0
|
|
2019
|
+
margin-bottom: 0;
|
|
2020
2020
|
padding: 0.5rem 0.75rem;
|
|
2021
2021
|
border-radius: 0.5rem;
|
|
2022
2022
|
background: #fef2f2;
|
|
@@ -2026,7 +2026,7 @@
|
|
|
2026
2026
|
|
|
2027
2027
|
/* --- Image Block --- */
|
|
2028
2028
|
.block-image {
|
|
2029
|
-
margin-bottom: 0
|
|
2029
|
+
margin-bottom: 0;
|
|
2030
2030
|
border-radius: 0.5rem;
|
|
2031
2031
|
overflow: hidden;
|
|
2032
2032
|
}
|
|
@@ -2036,7 +2036,7 @@
|
|
|
2036
2036
|
|
|
2037
2037
|
/* ===== STREAMING EVENTS ===== */
|
|
2038
2038
|
.event-streaming-start {
|
|
2039
|
-
margin-bottom: 0
|
|
2039
|
+
margin-bottom: 0;
|
|
2040
2040
|
padding: 0.375rem 0.75rem;
|
|
2041
2041
|
background: #eff6ff;
|
|
2042
2042
|
border-radius: 0.5rem;
|
|
@@ -2048,7 +2048,7 @@
|
|
|
2048
2048
|
html.dark .event-streaming-start { background: #0c1a2e; }
|
|
2049
2049
|
|
|
2050
2050
|
.event-streaming-complete {
|
|
2051
|
-
margin-bottom: 0
|
|
2051
|
+
margin-bottom: 0;
|
|
2052
2052
|
padding: 0.375rem 0.75rem;
|
|
2053
2053
|
background: linear-gradient(135deg, #ecfdf5, #f0fdf4);
|
|
2054
2054
|
border-radius: 0.5rem;
|
package/static/js/voice.js
CHANGED
|
@@ -298,6 +298,9 @@
|
|
|
298
298
|
var audioChunkQueue = [];
|
|
299
299
|
var isPlayingChunk = false;
|
|
300
300
|
var streamDone = false;
|
|
301
|
+
var ttsConsecutiveFailures = 0;
|
|
302
|
+
var TTS_MAX_FAILURES = 3;
|
|
303
|
+
var ttsDisabledUntilReset = false;
|
|
301
304
|
|
|
302
305
|
function playNextChunk() {
|
|
303
306
|
if (audioChunkQueue.length === 0) {
|
|
@@ -331,19 +334,41 @@
|
|
|
331
334
|
|
|
332
335
|
function processQueue() {
|
|
333
336
|
if (isSpeaking || speechQueue.length === 0) return;
|
|
337
|
+
if (ttsDisabledUntilReset) {
|
|
338
|
+
speechQueue = [];
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
334
341
|
isSpeaking = true;
|
|
335
342
|
streamDone = false;
|
|
336
343
|
var text = speechQueue.shift();
|
|
337
344
|
audioChunkQueue = [];
|
|
338
345
|
isPlayingChunk = false;
|
|
339
|
-
|
|
346
|
+
|
|
347
|
+
function onTtsSuccess() {
|
|
348
|
+
ttsConsecutiveFailures = 0;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function onTtsFailed() {
|
|
352
|
+
ttsConsecutiveFailures++;
|
|
353
|
+
if (ttsConsecutiveFailures >= TTS_MAX_FAILURES) {
|
|
354
|
+
console.warn('[Voice] TTS failed ' + ttsConsecutiveFailures + ' times consecutively, disabling until reset');
|
|
355
|
+
ttsDisabledUntilReset = true;
|
|
356
|
+
speechQueue = [];
|
|
357
|
+
}
|
|
358
|
+
streamDone = true;
|
|
359
|
+
isSpeaking = false;
|
|
360
|
+
if (!ttsDisabledUntilReset) {
|
|
361
|
+
processQueue();
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
340
365
|
function tryStreaming() {
|
|
341
366
|
fetch(BASE + '/api/tts-stream', {
|
|
342
367
|
method: 'POST',
|
|
343
368
|
headers: { 'Content-Type': 'application/json' },
|
|
344
369
|
body: JSON.stringify({ text: text, voiceId: selectedVoiceId })
|
|
345
370
|
}).then(function(resp) {
|
|
346
|
-
if (!resp.ok) throw new Error('TTS stream failed');
|
|
371
|
+
if (!resp.ok) throw new Error('TTS stream failed: ' + resp.status);
|
|
347
372
|
var reader = resp.body.getReader();
|
|
348
373
|
var buffer = new Uint8Array(0);
|
|
349
374
|
|
|
@@ -357,6 +382,7 @@
|
|
|
357
382
|
function pump() {
|
|
358
383
|
return reader.read().then(function(result) {
|
|
359
384
|
if (result.done) {
|
|
385
|
+
onTtsSuccess();
|
|
360
386
|
streamDone = true;
|
|
361
387
|
if (!isPlayingChunk && audioChunkQueue.length === 0) {
|
|
362
388
|
isSpeaking = false;
|
|
@@ -384,16 +410,17 @@
|
|
|
384
410
|
tryNonStreaming(text);
|
|
385
411
|
});
|
|
386
412
|
}
|
|
387
|
-
|
|
413
|
+
|
|
388
414
|
function tryNonStreaming(txt) {
|
|
389
415
|
fetch(BASE + '/api/tts', {
|
|
390
416
|
method: 'POST',
|
|
391
417
|
headers: { 'Content-Type': 'application/json' },
|
|
392
418
|
body: JSON.stringify({ text: txt, voiceId: selectedVoiceId })
|
|
393
419
|
}).then(function(resp) {
|
|
394
|
-
if (!resp.ok) throw new Error('TTS failed');
|
|
420
|
+
if (!resp.ok) throw new Error('TTS failed: ' + resp.status);
|
|
395
421
|
return resp.arrayBuffer();
|
|
396
422
|
}).then(function(buf) {
|
|
423
|
+
onTtsSuccess();
|
|
397
424
|
var blob = new Blob([buf], { type: 'audio/wav' });
|
|
398
425
|
audioChunkQueue.push(blob);
|
|
399
426
|
if (!isPlayingChunk) playNextChunk();
|
|
@@ -401,12 +428,10 @@
|
|
|
401
428
|
isSpeaking = false;
|
|
402
429
|
processQueue();
|
|
403
430
|
}).catch(function() {
|
|
404
|
-
|
|
405
|
-
isSpeaking = false;
|
|
406
|
-
processQueue();
|
|
431
|
+
onTtsFailed();
|
|
407
432
|
});
|
|
408
433
|
}
|
|
409
|
-
|
|
434
|
+
|
|
410
435
|
tryStreaming();
|
|
411
436
|
}
|
|
412
437
|
|
|
@@ -415,6 +440,8 @@
|
|
|
415
440
|
audioChunkQueue = [];
|
|
416
441
|
isPlayingChunk = false;
|
|
417
442
|
isSpeaking = false;
|
|
443
|
+
ttsConsecutiveFailures = 0;
|
|
444
|
+
ttsDisabledUntilReset = false;
|
|
418
445
|
if (currentAudio) {
|
|
419
446
|
currentAudio.pause();
|
|
420
447
|
currentAudio = null;
|