agentgui 1.0.235 → 1.0.236
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/claude-runner.js +55 -2
- package/lib/speech.js +1 -4
- package/package.json +1 -1
- package/server.js +8 -11
- package/static/js/voice.js +36 -14
package/lib/claude-runner.js
CHANGED
|
@@ -561,11 +561,33 @@ registry.register({
|
|
|
561
561
|
|
|
562
562
|
// Agent message chunk (text response)
|
|
563
563
|
if (update.sessionUpdate === 'agent_message_chunk' && update.content) {
|
|
564
|
+
let contentBlock;
|
|
565
|
+
|
|
566
|
+
// Handle different content formats
|
|
567
|
+
if (typeof update.content === 'string') {
|
|
568
|
+
contentBlock = { type: 'text', text: update.content };
|
|
569
|
+
} else if (update.content.type === 'text' && update.content.text) {
|
|
570
|
+
contentBlock = update.content;
|
|
571
|
+
} else if (update.content.text) {
|
|
572
|
+
contentBlock = { type: 'text', text: update.content.text };
|
|
573
|
+
} else if (update.content.content) {
|
|
574
|
+
const inner = update.content.content;
|
|
575
|
+
if (typeof inner === 'string') {
|
|
576
|
+
contentBlock = { type: 'text', text: inner };
|
|
577
|
+
} else if (inner.type === 'text' && inner.text) {
|
|
578
|
+
contentBlock = inner;
|
|
579
|
+
} else {
|
|
580
|
+
contentBlock = { type: 'text', text: JSON.stringify(inner) };
|
|
581
|
+
}
|
|
582
|
+
} else {
|
|
583
|
+
contentBlock = { type: 'text', text: JSON.stringify(update.content) };
|
|
584
|
+
}
|
|
585
|
+
|
|
564
586
|
return {
|
|
565
587
|
type: 'assistant',
|
|
566
588
|
message: {
|
|
567
589
|
role: 'assistant',
|
|
568
|
-
content: [
|
|
590
|
+
content: [contentBlock]
|
|
569
591
|
},
|
|
570
592
|
session_id: params.sessionId
|
|
571
593
|
};
|
|
@@ -705,11 +727,33 @@ function createACPProtocolHandler() {
|
|
|
705
727
|
|
|
706
728
|
// Agent message chunk (text response)
|
|
707
729
|
if (update.sessionUpdate === 'agent_message_chunk' && update.content) {
|
|
730
|
+
let contentBlock;
|
|
731
|
+
|
|
732
|
+
// Handle different content formats
|
|
733
|
+
if (typeof update.content === 'string') {
|
|
734
|
+
contentBlock = { type: 'text', text: update.content };
|
|
735
|
+
} else if (update.content.type === 'text' && update.content.text) {
|
|
736
|
+
contentBlock = update.content;
|
|
737
|
+
} else if (update.content.text) {
|
|
738
|
+
contentBlock = { type: 'text', text: update.content.text };
|
|
739
|
+
} else if (update.content.content) {
|
|
740
|
+
const inner = update.content.content;
|
|
741
|
+
if (typeof inner === 'string') {
|
|
742
|
+
contentBlock = { type: 'text', text: inner };
|
|
743
|
+
} else if (inner.type === 'text' && inner.text) {
|
|
744
|
+
contentBlock = inner;
|
|
745
|
+
} else {
|
|
746
|
+
contentBlock = { type: 'text', text: JSON.stringify(inner) };
|
|
747
|
+
}
|
|
748
|
+
} else {
|
|
749
|
+
contentBlock = { type: 'text', text: JSON.stringify(update.content) };
|
|
750
|
+
}
|
|
751
|
+
|
|
708
752
|
return {
|
|
709
753
|
type: 'assistant',
|
|
710
754
|
message: {
|
|
711
755
|
role: 'assistant',
|
|
712
|
-
content: [
|
|
756
|
+
content: [contentBlock]
|
|
713
757
|
},
|
|
714
758
|
session_id: params.sessionId
|
|
715
759
|
};
|
|
@@ -800,6 +844,15 @@ function createACPProtocolHandler() {
|
|
|
800
844
|
};
|
|
801
845
|
}
|
|
802
846
|
|
|
847
|
+
// Plan update
|
|
848
|
+
if (update.sessionUpdate === 'plan') {
|
|
849
|
+
return {
|
|
850
|
+
type: 'plan',
|
|
851
|
+
entries: update.entries || [],
|
|
852
|
+
session_id: params.sessionId
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
|
|
803
856
|
return null;
|
|
804
857
|
}
|
|
805
858
|
|
package/lib/speech.js
CHANGED
|
@@ -89,10 +89,7 @@ function synthesize(text, voiceId) {
|
|
|
89
89
|
function synthesizeStream(text, voiceId) {
|
|
90
90
|
if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
|
|
91
91
|
return (async function* () {
|
|
92
|
-
|
|
93
|
-
for (const sentence of sentences) {
|
|
94
|
-
yield await synthesizeDirect(sentence, voiceId);
|
|
95
|
-
}
|
|
92
|
+
yield await synthesizeDirect(text, voiceId);
|
|
96
93
|
})();
|
|
97
94
|
}
|
|
98
95
|
return serverTTS.synthesizeStream(text, voiceId, EXTRA_VOICE_DIRS);
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -120,19 +120,16 @@ function flushTTSaccumulator(key, conversationId, sessionId) {
|
|
|
120
120
|
}
|
|
121
121
|
}
|
|
122
122
|
if (voices.size === 0) return;
|
|
123
|
-
const
|
|
123
|
+
const cacheKey = speech.ttsCacheKey(text, vid);
|
|
124
124
|
for (const vid of voices) {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
pushTTSAudio(cacheKey, cached, conversationId, sessionId, vid);
|
|
130
|
-
continue;
|
|
131
|
-
}
|
|
132
|
-
speech.synthesize(sentence, vid).then(wav => {
|
|
133
|
-
pushTTSAudio(cacheKey, wav, conversationId, sessionId, vid);
|
|
134
|
-
}).catch(() => {});
|
|
125
|
+
const cached = speech.ttsCacheGet(cacheKey);
|
|
126
|
+
if (cached) {
|
|
127
|
+
pushTTSAudio(cacheKey, cached, conversationId, sessionId, vid);
|
|
128
|
+
continue;
|
|
135
129
|
}
|
|
130
|
+
speech.synthesize(text, vid).then(wav => {
|
|
131
|
+
pushTTSAudio(cacheKey, wav, conversationId, sessionId, vid);
|
|
132
|
+
}).catch(() => {});
|
|
136
133
|
}
|
|
137
134
|
}).catch(() => {});
|
|
138
135
|
}
|
package/static/js/voice.js
CHANGED
|
@@ -317,9 +317,23 @@
|
|
|
317
317
|
}
|
|
318
318
|
|
|
319
319
|
function splitSentences(text) {
|
|
320
|
+
if (!text) return [text];
|
|
320
321
|
var raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
|
|
321
322
|
if (!raw) return [text];
|
|
322
|
-
|
|
323
|
+
var sentences = raw.map(function(s) { return s.trim(); }).filter(function(s) { return s.length > 0; });
|
|
324
|
+
var result = [];
|
|
325
|
+
for (var i = 0; i < sentences.length; i++) {
|
|
326
|
+
var s = sentences[i];
|
|
327
|
+
if (result.length > 0) {
|
|
328
|
+
var prev = result[result.length - 1];
|
|
329
|
+
if (s.match(/^(\d+[\.\)]|\d+\s)/) || prev.match(/\d+[\.\)]$/)) {
|
|
330
|
+
result[result.length - 1] = prev + ' ' + s;
|
|
331
|
+
continue;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
result.push(s);
|
|
335
|
+
}
|
|
336
|
+
return result;
|
|
323
337
|
}
|
|
324
338
|
|
|
325
339
|
var audioChunkQueue = [];
|
|
@@ -382,17 +396,9 @@
|
|
|
382
396
|
return;
|
|
383
397
|
}
|
|
384
398
|
|
|
385
|
-
var sentences =
|
|
399
|
+
var sentences = [text];
|
|
386
400
|
var cachedSentences = [];
|
|
387
|
-
var uncachedText = [];
|
|
388
|
-
for (var i = 0; i < sentences.length; i++) {
|
|
389
|
-
var blob = getCachedTTSBlob(sentences[i]);
|
|
390
|
-
if (blob) {
|
|
391
|
-
cachedSentences.push({ idx: i, blob: blob });
|
|
392
|
-
} else {
|
|
393
|
-
uncachedText.push(sentences[i]);
|
|
394
|
-
}
|
|
395
|
-
}
|
|
401
|
+
var uncachedText = [text];
|
|
396
402
|
|
|
397
403
|
if (cachedSentences.length === sentences.length) {
|
|
398
404
|
ttsConsecutiveFailures = 0;
|
|
@@ -530,16 +536,32 @@
|
|
|
530
536
|
if (!container) return;
|
|
531
537
|
var emptyMsg = container.querySelector('.voice-empty');
|
|
532
538
|
if (emptyMsg) emptyMsg.remove();
|
|
539
|
+
var lastChild = container.lastElementChild;
|
|
540
|
+
if (!isUser && lastChild && lastChild.classList.contains('voice-block') && !lastChild.classList.contains('voice-block-user')) {
|
|
541
|
+
var contentSpan = lastChild.querySelector('.voice-block-content');
|
|
542
|
+
if (contentSpan) {
|
|
543
|
+
contentSpan.textContent += '\n' + stripHtml(text);
|
|
544
|
+
lastChild._fullText = (lastChild._fullText || contentSpan.textContent) + '\n' + text;
|
|
545
|
+
scrollVoiceToBottom();
|
|
546
|
+
return lastChild;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
533
549
|
var div = document.createElement('div');
|
|
534
550
|
div.className = 'voice-block' + (isUser ? ' voice-block-user' : '');
|
|
535
|
-
|
|
536
|
-
|
|
551
|
+
if (isUser) {
|
|
552
|
+
div.textContent = text;
|
|
553
|
+
} else {
|
|
554
|
+
var contentSpan = document.createElement('span');
|
|
555
|
+
contentSpan.className = 'voice-block-content';
|
|
556
|
+
contentSpan.textContent = stripHtml(text);
|
|
557
|
+
div.appendChild(contentSpan);
|
|
558
|
+
div._fullText = text;
|
|
537
559
|
var rereadBtn = document.createElement('button');
|
|
538
560
|
rereadBtn.className = 'voice-reread-btn';
|
|
539
561
|
rereadBtn.title = 'Re-read aloud';
|
|
540
562
|
rereadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="2"><polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/></svg>';
|
|
541
563
|
rereadBtn.addEventListener('click', function() {
|
|
542
|
-
speak(
|
|
564
|
+
speak(div._fullText || contentSpan.textContent);
|
|
543
565
|
});
|
|
544
566
|
div.appendChild(rereadBtn);
|
|
545
567
|
}
|