@inetafrica/open-claudia 2.6.49 → 2.6.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@
18
18
  const graph = require("./graph");
19
19
  const metrics = require("./metrics");
20
20
  const { spawnSubagent } = require("../subagent");
21
+ const warmWalker = require("./warm-walker");
21
22
 
22
23
  const WALKER_MODEL = process.env.RECALL_DISCOVERER_MODEL || "haiku";
23
24
  const WALKER_TIMEOUT_MS = Number(process.env.RECALL_DISCOVERER_TIMEOUT_MS || 25000);
@@ -78,6 +79,21 @@ const WALKER_SYSTEM = [
78
79
  'Reply with ONLY a JSON array: [{"id":"pack:foo","why":"shared lime theme governs this app"}]. Use [] if none.',
79
80
  ].join("\n");
80
81
 
82
+ // Run the walker model on the prompt. Prefer the warm (reused) process for
83
+ // low latency; on any warm-path error fall back to a cold spawn — identical
84
+ // model/prompt/contract, so quality is unchanged and recall never silently
85
+ // degrades to the classic engine.
86
+ async function runWalker(prompt) {
87
+ const opts = { model: WALKER_MODEL, systemPrompt: WALKER_SYSTEM, timeoutMs: WALKER_TIMEOUT_MS };
88
+ if (warmWalker.isEnabled()) {
89
+ try {
90
+ return await warmWalker.walkWarm(prompt, opts);
91
+ } catch (e) { /* fall back to cold spawn below */ }
92
+ }
93
+ const { text } = await spawnSubagent(prompt, opts);
94
+ return text;
95
+ }
96
+
81
97
  async function walk(userText, contextText, candidates) {
82
98
  if (!WALKER_ENABLED || candidates.length === 0) return null;
83
99
  const lines = candidates.map((c) => {
@@ -95,9 +111,7 @@ async function walk(userText, contextText, candidates) {
95
111
  'Reply ONLY with the JSON array of kept nodes and their why, e.g. [{"id":"' + candidates[0].id + '","why":"..."}].',
96
112
  ].join("\n");
97
113
  try {
98
- const { text } = await spawnSubagent(prompt, {
99
- model: WALKER_MODEL, systemPrompt: WALKER_SYSTEM, timeoutMs: WALKER_TIMEOUT_MS,
100
- });
114
+ const text = await runWalker(prompt);
101
115
  const match = String(text || "").match(/\[[\s\S]*\]/);
102
116
  if (!match) return null;
103
117
  const arr = JSON.parse(match[0]);
@@ -0,0 +1,151 @@
1
+ // Warm walker: one long-lived `claude` process the discoverer reuses across
2
+ // turns, instead of cold-spawning a fresh CLI for every recall.
3
+ //
4
+ // Why this exists: the Claude CLI initialises lazily on the FIRST message, so
5
+ // a cold spawn pays ~3-16s of boot+init every turn. Measured: on a persistent
6
+ // stream-json process the first message costs ~3.4s but every message after is
7
+ // ~1.4s — init is paid once and amortised. Same model, same system prompt,
8
+ // same JSON contract as the cold path, so recall quality is unchanged; only
9
+ // the transport differs.
10
+ //
11
+ // Statelessness: stream-json keeps one growing session, so we recycle the
12
+ // process on a small message/char budget to keep accumulated history tiny and
13
+ // each walk effectively independent.
14
+ //
15
+ // Fail-open: any error here throws, and the caller (discoverer.walk) falls
16
+ // back to the cold spawnSubagent path — never worse than before, and never a
17
+ // drop to the classic engine.
18
+
19
+ const { spawn } = require("child_process");
20
+ const { CLAUDE_PATH, botSubprocessEnv } = require("../config");
21
+ const { claudeSubprocessEnv } = require("../auth-flow");
22
+ const { redactSensitive } = require("../redact");
23
+
24
+ const MAX_MSGS = Number(process.env.RECALL_WARM_WALKER_MAX_MSGS || 12);
25
+ const MAX_CHARS = Number(process.env.RECALL_WARM_WALKER_MAX_CHARS || 24000);
26
+ const DEFAULT_TIMEOUT_MS = 25000;
27
+
28
+ let child = null; // current process, or null when none is alive
29
+ let pending = null; // { resolve, reject, timer } for the in-flight walk
30
+ let chain = Promise.resolve(); // serialises walks (one message in flight)
31
+ let msgCount = 0; // messages sent to the current process
32
+ let charCount = 0; // prompt chars sent to the current process
33
+
34
+ function isEnabled() {
35
+ return String(process.env.RECALL_WARM_WALKER || "on").toLowerCase() !== "off";
36
+ }
37
+
38
+ function killChild() {
39
+ const proc = child;
40
+ child = null;
41
+ if (proc) {
42
+ try { proc.stdin.end(); } catch (e) {}
43
+ try { proc.kill("SIGTERM"); } catch (e) {}
44
+ }
45
+ }
46
+
47
+ function spawnChild(cfg) {
48
+ const args = [
49
+ "-p",
50
+ "--input-format", "stream-json",
51
+ "--output-format", "stream-json",
52
+ "--verbose",
53
+ "--model", cfg.model || "haiku",
54
+ "--no-session-persistence",
55
+ "--append-system-prompt", cfg.systemPrompt || "",
56
+ "--dangerously-skip-permissions",
57
+ ];
58
+ const env = { ...botSubprocessEnv(), ...claudeSubprocessEnv() };
59
+ const proc = spawn(CLAUDE_PATH, args, { cwd: process.cwd(), env, stdio: ["pipe", "pipe", "pipe"] });
60
+ child = proc;
61
+ msgCount = 0;
62
+ charCount = 0;
63
+ let buf = "";
64
+ let asstText = "";
65
+
66
+ const settle = (fn, arg) => {
67
+ const p = pending;
68
+ pending = null;
69
+ if (!p) return;
70
+ clearTimeout(p.timer);
71
+ fn === "resolve" ? p.resolve(arg) : p.reject(arg);
72
+ };
73
+
74
+ proc.stdout.on("data", (d) => {
75
+ if (child !== proc) return;
76
+ buf += d.toString();
77
+ let nl;
78
+ while ((nl = buf.indexOf("\n")) >= 0) {
79
+ const line = buf.slice(0, nl).trim();
80
+ buf = buf.slice(nl + 1);
81
+ if (!line) continue;
82
+ let evt;
83
+ try { evt = JSON.parse(line); } catch (e) { continue; }
84
+ if (evt.type === "assistant" && evt.message && Array.isArray(evt.message.content)) {
85
+ for (const b of evt.message.content) {
86
+ if (b && b.type === "text" && typeof b.text === "string") asstText += b.text;
87
+ }
88
+ } else if (evt.type === "result") {
89
+ const text = (typeof evt.result === "string" && evt.result) ? evt.result : asstText;
90
+ asstText = "";
91
+ if (text && !evt.is_error) settle("resolve", redactSensitive(String(text).trim()));
92
+ else settle("reject", new Error("warm walker: empty/error result"));
93
+ }
94
+ }
95
+ });
96
+ proc.stderr.on("data", () => {}); // surfaced via result/close, not needed here
97
+ proc.on("error", (e) => {
98
+ if (child === proc) child = null;
99
+ settle("reject", e);
100
+ });
101
+ proc.on("close", () => {
102
+ if (child === proc) child = null;
103
+ settle("reject", new Error("warm walker: process closed"));
104
+ });
105
+ return proc;
106
+ }
107
+
108
+ function doWalk(promptText, opts) {
109
+ // Recycle before sending if the current process is over its budget, so each
110
+ // walk runs against a small, near-stateless context.
111
+ if (child && (msgCount >= MAX_MSGS || charCount >= MAX_CHARS)) killChild();
112
+ if (!child) spawnChild({ systemPrompt: opts.systemPrompt, model: opts.model });
113
+ const proc = child;
114
+ const timeoutMs = opts.timeoutMs || DEFAULT_TIMEOUT_MS;
115
+
116
+ return new Promise((resolve, reject) => {
117
+ const timer = setTimeout(() => {
118
+ pending = null;
119
+ if (child === proc) killChild(); // process is wedged — drop it
120
+ reject(new Error("warm walker: timeout"));
121
+ }, timeoutMs);
122
+ pending = { resolve, reject, timer };
123
+ msgCount++;
124
+ charCount += promptText.length;
125
+ const payload = JSON.stringify({ type: "user", message: { role: "user", content: promptText } }) + "\n";
126
+ try {
127
+ proc.stdin.write(payload);
128
+ } catch (e) {
129
+ clearTimeout(timer);
130
+ pending = null;
131
+ killChild();
132
+ reject(e);
133
+ }
134
+ });
135
+ }
136
+
137
+ // Serialise: one message in flight at a time. A failed walk must not poison the
138
+ // queue, so the chain swallows outcomes while callers still see their result.
139
+ function walkWarm(promptText, opts = {}) {
140
+ const run = () => doWalk(promptText, opts);
141
+ const p = chain.then(run, run);
142
+ chain = p.then(() => {}, () => {});
143
+ return p;
144
+ }
145
+
146
+ function shutdown() { killChild(); }
147
+ function stats() { return { alive: !!child, msgCount, charCount, MAX_MSGS, MAX_CHARS }; }
148
+
149
+ process.on("exit", () => { try { killChild(); } catch (e) {} });
150
+
151
+ module.exports = { walkWarm, isEnabled, shutdown, stats };
package/core/router.js CHANGED
@@ -120,12 +120,17 @@ async function handleVoice(envelope) {
120
120
  return send(`Voice note too large (${Math.round(media.size / 1024 / 1024)}MB). Max: ${MAX_VOICE_SIZE / 1024 / 1024}MB`);
121
121
  }
122
122
  envelope.adapter.typing(envelope.channelId).catch(() => {});
123
+ const tStart = Date.now();
123
124
  const oggPath = await envelope.adapter.downloadMedia(media);
125
+ const tAfterDownload = Date.now();
124
126
  const transcript = transcribeAudio(oggPath);
127
+ const tAfterStt = Date.now();
125
128
  try { fs.unlinkSync(oggPath); } catch (e) {}
126
129
  if (!transcript) return send("Couldn't transcribe. Try typing it.");
127
130
  await send(`Heard: "${transcript}"`, { replyTo: envelope.messageId });
128
131
  state.lastInputWasVoice = true;
132
+ // Per-stage latency for the voice pipeline; the runner fills in the rest.
133
+ state.voiceLat = { startAt: tStart, downloadMs: tAfterDownload - tStart, sttMs: tAfterStt - tAfterDownload };
129
134
  await runClaude(transcript, state.currentSession.dir, envelope.messageId);
130
135
  } catch (err) { await send(`Voice failed: ${err.message}`); }
131
136
  }
package/core/runner.js CHANGED
@@ -315,6 +315,15 @@ async function buildClaudeArgs(prompt, opts = {}) {
315
315
  if (settings.permissionMode) args.push("--permission-mode", settings.permissionMode);
316
316
  else args.push("--dangerously-skip-permissions");
317
317
  if (settings.worktree) args.push("--worktree");
318
+ // Voice turns stream partial text so the spoken reply can start mid-generation
319
+ // (see the streaming-out handler in the runner). Strictly gated to the voice
320
+ // channel — zero behaviour change for Telegram/Kazee.
321
+ if (state.lastInputWasVoice) {
322
+ try {
323
+ const { currentTransport } = require("./context");
324
+ if (currentTransport() === "voice") args.push("--include-partial-messages");
325
+ } catch { /* context not ready — skip streaming flag */ }
326
+ }
318
327
  // Dynamic state rides in the user prompt so the appended system prompt
319
328
  // stays byte-stable across turns and the prompt-cache prefix survives.
320
329
  args.push(await promptWithDynamicContext(prompt));
@@ -814,6 +823,55 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
814
823
  state.statusMessageId = null;
815
824
  state.streamBuffer = "";
816
825
  let assistantText = "";
826
+
827
+ // Voice streaming-out: on voice turns we speak each finished sentence as it is
828
+ // generated (off the partial text_delta events) so the first audio plays while
829
+ // the rest of the reply is still being written — far lower time-to-first-sound
830
+ // than synthesizing one pass over the whole reply at the end. Reads the delta
831
+ // stream only; the text/transcript channel still reads whole-message events, so
832
+ // chat transports are completely unaffected.
833
+ let voiceStreaming = false;
834
+ try {
835
+ const { currentTransport } = require("./context");
836
+ voiceStreaming = !!state.lastInputWasVoice && currentTransport() === "voice";
837
+ } catch { voiceStreaming = false; }
838
+ let spokenBuf = ""; // text_delta accumulator awaiting a sentence boundary
839
+ let ttsChain = Promise.resolve(); // ordered send queue so clips play in order
840
+ let spokeAnyStreamed = false;
841
+ const SPOKEN_MIN_CHARS = 40; // don't fire TTS on tiny fragments ("Hi.")
842
+ // Voice latency probe (measurement only) — timestamps filled in as the turn runs.
843
+ // Spawn time is `startTime` (captured just after spawn below).
844
+ let vlFirstSysAt = null, vlFirstTokenAt = null, vlFirstAudioAt = null, vlResultAt = null;
845
+ function dispatchSpoken(text) {
846
+ const clean = redactSensitive(text);
847
+ if (!clean.trim()) return;
848
+ spokeAnyStreamed = true;
849
+ const synthP = synthSentenceMp3(clean); // start synth now (parallel)
850
+ ttsChain = ttsChain.then(async () => { // but send strictly in order
851
+ try {
852
+ const clip = await synthP;
853
+ if (clip) { await sendVoice(clip); if (vlFirstAudioAt == null) vlFirstAudioAt = Date.now(); }
854
+ } catch (e) { console.error("voice stream clip failed:", e.message); }
855
+ });
856
+ }
857
+ function pumpSpoken(flush) {
858
+ // Cut the smallest prefix that ends in a sentence terminator and is at least
859
+ // SPOKEN_MIN_CHARS long, dispatch it, repeat. On flush, send whatever remains.
860
+ while (true) {
861
+ const re = /[.!?]+(?=\s|$)/g;
862
+ let idx = -1, m;
863
+ while ((m = re.exec(spokenBuf)) !== null) {
864
+ const end = m.index + m[0].length;
865
+ if (end >= SPOKEN_MIN_CHARS) { idx = end; break; }
866
+ }
867
+ if (idx === -1) break;
868
+ const sentence = spokenBuf.slice(0, idx).trim();
869
+ spokenBuf = spokenBuf.slice(idx).replace(/^\s+/, "");
870
+ if (sentence) dispatchSpoken(sentence);
871
+ }
872
+ if (flush) { const tail = spokenBuf.trim(); spokenBuf = ""; if (tail) dispatchSpoken(tail); }
873
+ }
874
+
817
875
  let toolUses = [];
818
876
  let currentTool = null;
819
877
  let currentToolDetail = "";
@@ -1020,6 +1078,20 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
1020
1078
  const lastNewline = state.streamBuffer.lastIndexOf("\n");
1021
1079
  state.streamBuffer = lastNewline >= 0 ? state.streamBuffer.slice(lastNewline + 1) : state.streamBuffer;
1022
1080
  for (const evt of events) {
1081
+ // Voice latency probe: first "system" event = CLI ready (cold-start done);
1082
+ // "result" = generation finished (before the TTS tail drains).
1083
+ if (voiceStreaming && vlFirstSysAt == null && evt.type === "system") vlFirstSysAt = Date.now();
1084
+ if (voiceStreaming && vlResultAt == null && evt.type === "result") vlResultAt = Date.now();
1085
+ // Voice streaming-out: speak finished sentences as the model writes them.
1086
+ // Only text_delta is spoken; thinking_delta and tool events are ignored.
1087
+ if (voiceStreaming && evt.type === "stream_event"
1088
+ && evt.event?.type === "content_block_delta"
1089
+ && evt.event.delta?.type === "text_delta"
1090
+ && typeof evt.event.delta.text === "string") {
1091
+ if (vlFirstTokenAt == null) vlFirstTokenAt = Date.now();
1092
+ spokenBuf += evt.event.delta.text;
1093
+ pumpSpoken(false);
1094
+ }
1023
1095
  if (evt.type === "assistant" && evt.message?.usage) {
1024
1096
  const callPrefix = usageParts(evt.message.usage, settings.backend || "claude").context;
1025
1097
  if (callPrefix > peakContextTokens) peakContextTokens = callPrefix;
@@ -1212,25 +1284,52 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
1212
1284
 
1213
1285
  if (state.lastInputWasVoice) {
1214
1286
  state.lastInputWasVoice = false;
1215
- // Spoken replies belong to the hands-free voice channel. On chat
1216
- // transports (Telegram/Kazee) an auto voice note on every voice
1217
- // input is unwanted noise, so gate it to the voice channel.
1218
- const { currentTransport } = require("./context");
1219
- if (currentTransport() === "voice") {
1220
- // Stream the spoken reply sentence-by-sentence so the first audio
1221
- // plays while the rest still synthesizes — far lower time-to-first-
1222
- // sound than waiting for one TTS pass over the whole reply.
1223
- const sentences = splitSentences(finalText);
1224
- let spokeAny = false;
1225
- for (const sentence of sentences) {
1226
- const clip = await synthSentenceMp3(sentence);
1227
- if (clip) { spokeAny = true; await sendVoice(clip); }
1228
- }
1229
- if (!spokeAny) {
1287
+ if (voiceStreaming) {
1288
+ // Sentences were already being spoken as the model wrote them. Flush
1289
+ // the trailing partial sentence, wait for the ordered send queue to
1290
+ // drain, then close the turn so the client re-arms the mic.
1291
+ pumpSpoken(true);
1292
+ await ttsChain;
1293
+ if (!spokeAnyStreamed) {
1294
+ // Tool-only / empty turn produced no spoken text — say the final
1295
+ // text once so the user still hears a reply.
1230
1296
  const voicePath = await textToVoice(finalText);
1231
1297
  if (voicePath) await sendVoice(voicePath);
1232
1298
  }
1233
1299
  await sendVoiceEnd();
1300
+ // Per-stage latency breakdown (measurement only — temporary debug footer).
1301
+ try {
1302
+ const vl = state.voiceLat || {};
1303
+ const endAt = Date.now();
1304
+ const sec = (n) => n != null ? (n / 1000).toFixed(1) + "s" : "?";
1305
+ const diff = (a, b) => (a != null && b != null) ? sec(b - a) : "?";
1306
+ const fromStart = (t) => diff(vl.startAt, t);
1307
+ const line = `⏱ dl ${sec(vl.downloadMs)} · stt ${sec(vl.sttMs)}`
1308
+ + ` · cli ${diff(startTime, vlFirstSysAt)} · ttft ${diff(vlFirstSysAt, vlFirstTokenAt)}`
1309
+ + ` · gen ${diff(vlFirstTokenAt, vlResultAt)} · 1st-audio ${fromStart(vlFirstAudioAt)}`
1310
+ + ` · total ${fromStart(endAt)}`;
1311
+ console.log(`[VOICE-LAT] ${line}`);
1312
+ await send(line);
1313
+ } catch (e) { /* metrics best-effort */ }
1314
+ state.voiceLat = null;
1315
+ } else {
1316
+ // Non-streamed fallback. Spoken replies belong to the hands-free voice
1317
+ // channel; on chat transports (Telegram/Kazee) an auto voice note on
1318
+ // every voice input is unwanted noise, so gate it to the voice channel.
1319
+ const { currentTransport } = require("./context");
1320
+ if (currentTransport() === "voice") {
1321
+ const sentences = splitSentences(finalText);
1322
+ let spokeAny = false;
1323
+ for (const sentence of sentences) {
1324
+ const clip = await synthSentenceMp3(sentence);
1325
+ if (clip) { spokeAny = true; await sendVoice(clip); }
1326
+ }
1327
+ if (!spokeAny) {
1328
+ const voicePath = await textToVoice(finalText);
1329
+ if (voicePath) await sendVoice(voicePath);
1330
+ }
1331
+ await sendVoiceEnd();
1332
+ }
1234
1333
  }
1235
1334
  }
1236
1335
  } catch (e) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inetafrica/open-claudia",
3
- "version": "2.6.49",
3
+ "version": "2.6.51",
4
4
  "description": "Your always-on AI coding assistant — Claude Code, Cursor Agent, and OpenAI Codex via Telegram or Kazee Chat",
5
5
  "main": "bot.js",
6
6
  "bin": {