@inetafrica/open-claudia 2.6.49 → 2.6.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/runner.js +89 -15
- package/package.json +1 -1
package/core/runner.js
CHANGED
|
@@ -315,6 +315,15 @@ async function buildClaudeArgs(prompt, opts = {}) {
|
|
|
315
315
|
if (settings.permissionMode) args.push("--permission-mode", settings.permissionMode);
|
|
316
316
|
else args.push("--dangerously-skip-permissions");
|
|
317
317
|
if (settings.worktree) args.push("--worktree");
|
|
318
|
+
// Voice turns stream partial text so the spoken reply can start mid-generation
|
|
319
|
+
// (see the streaming-out handler in the runner). Strictly gated to the voice
|
|
320
|
+
// channel — zero behaviour change for Telegram/Kazee.
|
|
321
|
+
if (state.lastInputWasVoice) {
|
|
322
|
+
try {
|
|
323
|
+
const { currentTransport } = require("./context");
|
|
324
|
+
if (currentTransport() === "voice") args.push("--include-partial-messages");
|
|
325
|
+
} catch { /* context not ready — skip streaming flag */ }
|
|
326
|
+
}
|
|
318
327
|
// Dynamic state rides in the user prompt so the appended system prompt
|
|
319
328
|
// stays byte-stable across turns and the prompt-cache prefix survives.
|
|
320
329
|
args.push(await promptWithDynamicContext(prompt));
|
|
@@ -814,6 +823,50 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
|
|
|
814
823
|
state.statusMessageId = null;
|
|
815
824
|
state.streamBuffer = "";
|
|
816
825
|
let assistantText = "";
|
|
826
|
+
|
|
827
|
+
// Voice streaming-out: on voice turns we speak each finished sentence as it is
|
|
828
|
+
// generated (off the partial text_delta events) so the first audio plays while
|
|
829
|
+
// the rest of the reply is still being written — far lower time-to-first-sound
|
|
830
|
+
// than synthesizing one pass over the whole reply at the end. Reads the delta
|
|
831
|
+
// stream only; the text/transcript channel still reads whole-message events, so
|
|
832
|
+
// chat transports are completely unaffected.
|
|
833
|
+
let voiceStreaming = false;
|
|
834
|
+
try {
|
|
835
|
+
const { currentTransport } = require("./context");
|
|
836
|
+
voiceStreaming = !!state.lastInputWasVoice && currentTransport() === "voice";
|
|
837
|
+
} catch { voiceStreaming = false; }
|
|
838
|
+
let spokenBuf = ""; // text_delta accumulator awaiting a sentence boundary
|
|
839
|
+
let ttsChain = Promise.resolve(); // ordered send queue so clips play in order
|
|
840
|
+
let spokeAnyStreamed = false;
|
|
841
|
+
const SPOKEN_MIN_CHARS = 40; // don't fire TTS on tiny fragments ("Hi.")
|
|
842
|
+
function dispatchSpoken(text) {
|
|
843
|
+
const clean = redactSensitive(text);
|
|
844
|
+
if (!clean.trim()) return;
|
|
845
|
+
spokeAnyStreamed = true;
|
|
846
|
+
const synthP = synthSentenceMp3(clean); // start synth now (parallel)
|
|
847
|
+
ttsChain = ttsChain.then(async () => { // but send strictly in order
|
|
848
|
+
try { const clip = await synthP; if (clip) await sendVoice(clip); }
|
|
849
|
+
catch (e) { console.error("voice stream clip failed:", e.message); }
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
function pumpSpoken(flush) {
|
|
853
|
+
// Cut the smallest prefix that ends in a sentence terminator and is at least
|
|
854
|
+
// SPOKEN_MIN_CHARS long, dispatch it, repeat. On flush, send whatever remains.
|
|
855
|
+
while (true) {
|
|
856
|
+
const re = /[.!?]+(?=\s|$)/g;
|
|
857
|
+
let idx = -1, m;
|
|
858
|
+
while ((m = re.exec(spokenBuf)) !== null) {
|
|
859
|
+
const end = m.index + m[0].length;
|
|
860
|
+
if (end >= SPOKEN_MIN_CHARS) { idx = end; break; }
|
|
861
|
+
}
|
|
862
|
+
if (idx === -1) break;
|
|
863
|
+
const sentence = spokenBuf.slice(0, idx).trim();
|
|
864
|
+
spokenBuf = spokenBuf.slice(idx).replace(/^\s+/, "");
|
|
865
|
+
if (sentence) dispatchSpoken(sentence);
|
|
866
|
+
}
|
|
867
|
+
if (flush) { const tail = spokenBuf.trim(); spokenBuf = ""; if (tail) dispatchSpoken(tail); }
|
|
868
|
+
}
|
|
869
|
+
|
|
817
870
|
let toolUses = [];
|
|
818
871
|
let currentTool = null;
|
|
819
872
|
let currentToolDetail = "";
|
|
@@ -1020,6 +1073,15 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
|
|
|
1020
1073
|
const lastNewline = state.streamBuffer.lastIndexOf("\n");
|
|
1021
1074
|
state.streamBuffer = lastNewline >= 0 ? state.streamBuffer.slice(lastNewline + 1) : state.streamBuffer;
|
|
1022
1075
|
for (const evt of events) {
|
|
1076
|
+
// Voice streaming-out: speak finished sentences as the model writes them.
|
|
1077
|
+
// Only text_delta is spoken; thinking_delta and tool events are ignored.
|
|
1078
|
+
if (voiceStreaming && evt.type === "stream_event"
|
|
1079
|
+
&& evt.event?.type === "content_block_delta"
|
|
1080
|
+
&& evt.event.delta?.type === "text_delta"
|
|
1081
|
+
&& typeof evt.event.delta.text === "string") {
|
|
1082
|
+
spokenBuf += evt.event.delta.text;
|
|
1083
|
+
pumpSpoken(false);
|
|
1084
|
+
}
|
|
1023
1085
|
if (evt.type === "assistant" && evt.message?.usage) {
|
|
1024
1086
|
const callPrefix = usageParts(evt.message.usage, settings.backend || "claude").context;
|
|
1025
1087
|
if (callPrefix > peakContextTokens) peakContextTokens = callPrefix;
|
|
@@ -1212,25 +1274,37 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
|
|
|
1212
1274
|
|
|
1213
1275
|
if (state.lastInputWasVoice) {
|
|
1214
1276
|
state.lastInputWasVoice = false;
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
let spokeAny = false;
|
|
1225
|
-
for (const sentence of sentences) {
|
|
1226
|
-
const clip = await synthSentenceMp3(sentence);
|
|
1227
|
-
if (clip) { spokeAny = true; await sendVoice(clip); }
|
|
1228
|
-
}
|
|
1229
|
-
if (!spokeAny) {
|
|
1277
|
+
if (voiceStreaming) {
|
|
1278
|
+
// Sentences were already being spoken as the model wrote them. Flush
|
|
1279
|
+
// the trailing partial sentence, wait for the ordered send queue to
|
|
1280
|
+
// drain, then close the turn so the client re-arms the mic.
|
|
1281
|
+
pumpSpoken(true);
|
|
1282
|
+
await ttsChain;
|
|
1283
|
+
if (!spokeAnyStreamed) {
|
|
1284
|
+
// Tool-only / empty turn produced no spoken text — say the final
|
|
1285
|
+
// text once so the user still hears a reply.
|
|
1230
1286
|
const voicePath = await textToVoice(finalText);
|
|
1231
1287
|
if (voicePath) await sendVoice(voicePath);
|
|
1232
1288
|
}
|
|
1233
1289
|
await sendVoiceEnd();
|
|
1290
|
+
} else {
|
|
1291
|
+
// Non-streamed fallback. Spoken replies belong to the hands-free voice
|
|
1292
|
+
// channel; on chat transports (Telegram/Kazee) an auto voice note on
|
|
1293
|
+
// every voice input is unwanted noise, so gate it to the voice channel.
|
|
1294
|
+
const { currentTransport } = require("./context");
|
|
1295
|
+
if (currentTransport() === "voice") {
|
|
1296
|
+
const sentences = splitSentences(finalText);
|
|
1297
|
+
let spokeAny = false;
|
|
1298
|
+
for (const sentence of sentences) {
|
|
1299
|
+
const clip = await synthSentenceMp3(sentence);
|
|
1300
|
+
if (clip) { spokeAny = true; await sendVoice(clip); }
|
|
1301
|
+
}
|
|
1302
|
+
if (!spokeAny) {
|
|
1303
|
+
const voicePath = await textToVoice(finalText);
|
|
1304
|
+
if (voicePath) await sendVoice(voicePath);
|
|
1305
|
+
}
|
|
1306
|
+
await sendVoiceEnd();
|
|
1307
|
+
}
|
|
1234
1308
|
}
|
|
1235
1309
|
}
|
|
1236
1310
|
} catch (e) {
|
package/package.json
CHANGED