@inetafrica/open-claudia 2.6.50 → 2.6.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/recall/discoverer.js +17 -3
- package/core/recall/warm-walker.js +151 -0
- package/core/router.js +5 -0
- package/core/runner.js +27 -2
- package/package.json +1 -1
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
const graph = require("./graph");
|
|
19
19
|
const metrics = require("./metrics");
|
|
20
20
|
const { spawnSubagent } = require("../subagent");
|
|
21
|
+
const warmWalker = require("./warm-walker");
|
|
21
22
|
|
|
22
23
|
const WALKER_MODEL = process.env.RECALL_DISCOVERER_MODEL || "haiku";
|
|
23
24
|
const WALKER_TIMEOUT_MS = Number(process.env.RECALL_DISCOVERER_TIMEOUT_MS || 25000);
|
|
@@ -78,6 +79,21 @@ const WALKER_SYSTEM = [
|
|
|
78
79
|
'Reply with ONLY a JSON array: [{"id":"pack:foo","why":"shared lime theme governs this app"}]. Use [] if none.',
|
|
79
80
|
].join("\n");
|
|
80
81
|
|
|
82
|
+
// Run the walker model on the prompt. Prefer the warm (reused) process for
|
|
83
|
+
// low latency; on any warm-path error fall back to a cold spawn — identical
|
|
84
|
+
// model/prompt/contract, so quality is unchanged and recall never silently
|
|
85
|
+
// degrades to the classic engine.
|
|
86
|
+
async function runWalker(prompt) {
|
|
87
|
+
const opts = { model: WALKER_MODEL, systemPrompt: WALKER_SYSTEM, timeoutMs: WALKER_TIMEOUT_MS };
|
|
88
|
+
if (warmWalker.isEnabled()) {
|
|
89
|
+
try {
|
|
90
|
+
return await warmWalker.walkWarm(prompt, opts);
|
|
91
|
+
} catch (e) { /* fall back to cold spawn below */ }
|
|
92
|
+
}
|
|
93
|
+
const { text } = await spawnSubagent(prompt, opts);
|
|
94
|
+
return text;
|
|
95
|
+
}
|
|
96
|
+
|
|
81
97
|
async function walk(userText, contextText, candidates) {
|
|
82
98
|
if (!WALKER_ENABLED || candidates.length === 0) return null;
|
|
83
99
|
const lines = candidates.map((c) => {
|
|
@@ -95,9 +111,7 @@ async function walk(userText, contextText, candidates) {
|
|
|
95
111
|
'Reply ONLY with the JSON array of kept nodes and their why, e.g. [{"id":"' + candidates[0].id + '","why":"..."}].',
|
|
96
112
|
].join("\n");
|
|
97
113
|
try {
|
|
98
|
-
const
|
|
99
|
-
model: WALKER_MODEL, systemPrompt: WALKER_SYSTEM, timeoutMs: WALKER_TIMEOUT_MS,
|
|
100
|
-
});
|
|
114
|
+
const text = await runWalker(prompt);
|
|
101
115
|
const match = String(text || "").match(/\[[\s\S]*\]/);
|
|
102
116
|
if (!match) return null;
|
|
103
117
|
const arr = JSON.parse(match[0]);
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
// Warm walker: one long-lived `claude` process the discoverer reuses across
|
|
2
|
+
// turns, instead of cold-spawning a fresh CLI for every recall.
|
|
3
|
+
//
|
|
4
|
+
// Why this exists: the Claude CLI initialises lazily on the FIRST message, so
|
|
5
|
+
// a cold spawn pays ~3-16s of boot+init every turn. Measured: on a persistent
|
|
6
|
+
// stream-json process the first message costs ~3.4s but every message after is
|
|
7
|
+
// ~1.4s — init is paid once and amortised. Same model, same system prompt,
|
|
8
|
+
// same JSON contract as the cold path, so recall quality is unchanged; only
|
|
9
|
+
// the transport differs.
|
|
10
|
+
//
|
|
11
|
+
// Statelessness: stream-json keeps one growing session, so we recycle the
|
|
12
|
+
// process on a small message/char budget to keep accumulated history tiny and
|
|
13
|
+
// each walk effectively independent.
|
|
14
|
+
//
|
|
15
|
+
// Fail-open: any error here throws, and the caller (discoverer.walk) falls
|
|
16
|
+
// back to the cold spawnSubagent path — never worse than before, and never a
|
|
17
|
+
// drop to the classic engine.
|
|
18
|
+
|
|
19
|
+
const { spawn } = require("child_process");
|
|
20
|
+
const { CLAUDE_PATH, botSubprocessEnv } = require("../config");
|
|
21
|
+
const { claudeSubprocessEnv } = require("../auth-flow");
|
|
22
|
+
const { redactSensitive } = require("../redact");
|
|
23
|
+
|
|
24
|
+
const MAX_MSGS = Number(process.env.RECALL_WARM_WALKER_MAX_MSGS || 12);
|
|
25
|
+
const MAX_CHARS = Number(process.env.RECALL_WARM_WALKER_MAX_CHARS || 24000);
|
|
26
|
+
const DEFAULT_TIMEOUT_MS = 25000;
|
|
27
|
+
|
|
28
|
+
let child = null; // current process, or null when none is alive
|
|
29
|
+
let pending = null; // { resolve, reject, timer } for the in-flight walk
|
|
30
|
+
let chain = Promise.resolve(); // serialises walks (one message in flight)
|
|
31
|
+
let msgCount = 0; // messages sent to the current process
|
|
32
|
+
let charCount = 0; // prompt chars sent to the current process
|
|
33
|
+
|
|
34
|
+
function isEnabled() {
|
|
35
|
+
return String(process.env.RECALL_WARM_WALKER || "on").toLowerCase() !== "off";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function killChild() {
|
|
39
|
+
const proc = child;
|
|
40
|
+
child = null;
|
|
41
|
+
if (proc) {
|
|
42
|
+
try { proc.stdin.end(); } catch (e) {}
|
|
43
|
+
try { proc.kill("SIGTERM"); } catch (e) {}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function spawnChild(cfg) {
|
|
48
|
+
const args = [
|
|
49
|
+
"-p",
|
|
50
|
+
"--input-format", "stream-json",
|
|
51
|
+
"--output-format", "stream-json",
|
|
52
|
+
"--verbose",
|
|
53
|
+
"--model", cfg.model || "haiku",
|
|
54
|
+
"--no-session-persistence",
|
|
55
|
+
"--append-system-prompt", cfg.systemPrompt || "",
|
|
56
|
+
"--dangerously-skip-permissions",
|
|
57
|
+
];
|
|
58
|
+
const env = { ...botSubprocessEnv(), ...claudeSubprocessEnv() };
|
|
59
|
+
const proc = spawn(CLAUDE_PATH, args, { cwd: process.cwd(), env, stdio: ["pipe", "pipe", "pipe"] });
|
|
60
|
+
child = proc;
|
|
61
|
+
msgCount = 0;
|
|
62
|
+
charCount = 0;
|
|
63
|
+
let buf = "";
|
|
64
|
+
let asstText = "";
|
|
65
|
+
|
|
66
|
+
const settle = (fn, arg) => {
|
|
67
|
+
const p = pending;
|
|
68
|
+
pending = null;
|
|
69
|
+
if (!p) return;
|
|
70
|
+
clearTimeout(p.timer);
|
|
71
|
+
fn === "resolve" ? p.resolve(arg) : p.reject(arg);
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
proc.stdout.on("data", (d) => {
|
|
75
|
+
if (child !== proc) return;
|
|
76
|
+
buf += d.toString();
|
|
77
|
+
let nl;
|
|
78
|
+
while ((nl = buf.indexOf("\n")) >= 0) {
|
|
79
|
+
const line = buf.slice(0, nl).trim();
|
|
80
|
+
buf = buf.slice(nl + 1);
|
|
81
|
+
if (!line) continue;
|
|
82
|
+
let evt;
|
|
83
|
+
try { evt = JSON.parse(line); } catch (e) { continue; }
|
|
84
|
+
if (evt.type === "assistant" && evt.message && Array.isArray(evt.message.content)) {
|
|
85
|
+
for (const b of evt.message.content) {
|
|
86
|
+
if (b && b.type === "text" && typeof b.text === "string") asstText += b.text;
|
|
87
|
+
}
|
|
88
|
+
} else if (evt.type === "result") {
|
|
89
|
+
const text = (typeof evt.result === "string" && evt.result) ? evt.result : asstText;
|
|
90
|
+
asstText = "";
|
|
91
|
+
if (text && !evt.is_error) settle("resolve", redactSensitive(String(text).trim()));
|
|
92
|
+
else settle("reject", new Error("warm walker: empty/error result"));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
proc.stderr.on("data", () => {}); // surfaced via result/close, not needed here
|
|
97
|
+
proc.on("error", (e) => {
|
|
98
|
+
if (child === proc) child = null;
|
|
99
|
+
settle("reject", e);
|
|
100
|
+
});
|
|
101
|
+
proc.on("close", () => {
|
|
102
|
+
if (child === proc) child = null;
|
|
103
|
+
settle("reject", new Error("warm walker: process closed"));
|
|
104
|
+
});
|
|
105
|
+
return proc;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function doWalk(promptText, opts) {
|
|
109
|
+
// Recycle before sending if the current process is over its budget, so each
|
|
110
|
+
// walk runs against a small, near-stateless context.
|
|
111
|
+
if (child && (msgCount >= MAX_MSGS || charCount >= MAX_CHARS)) killChild();
|
|
112
|
+
if (!child) spawnChild({ systemPrompt: opts.systemPrompt, model: opts.model });
|
|
113
|
+
const proc = child;
|
|
114
|
+
const timeoutMs = opts.timeoutMs || DEFAULT_TIMEOUT_MS;
|
|
115
|
+
|
|
116
|
+
return new Promise((resolve, reject) => {
|
|
117
|
+
const timer = setTimeout(() => {
|
|
118
|
+
pending = null;
|
|
119
|
+
if (child === proc) killChild(); // process is wedged — drop it
|
|
120
|
+
reject(new Error("warm walker: timeout"));
|
|
121
|
+
}, timeoutMs);
|
|
122
|
+
pending = { resolve, reject, timer };
|
|
123
|
+
msgCount++;
|
|
124
|
+
charCount += promptText.length;
|
|
125
|
+
const payload = JSON.stringify({ type: "user", message: { role: "user", content: promptText } }) + "\n";
|
|
126
|
+
try {
|
|
127
|
+
proc.stdin.write(payload);
|
|
128
|
+
} catch (e) {
|
|
129
|
+
clearTimeout(timer);
|
|
130
|
+
pending = null;
|
|
131
|
+
killChild();
|
|
132
|
+
reject(e);
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Serialise: one message in flight at a time. A failed walk must not poison the
|
|
138
|
+
// queue, so the chain swallows outcomes while callers still see their result.
|
|
139
|
+
function walkWarm(promptText, opts = {}) {
|
|
140
|
+
const run = () => doWalk(promptText, opts);
|
|
141
|
+
const p = chain.then(run, run);
|
|
142
|
+
chain = p.then(() => {}, () => {});
|
|
143
|
+
return p;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function shutdown() { killChild(); }
|
|
147
|
+
function stats() { return { alive: !!child, msgCount, charCount, MAX_MSGS, MAX_CHARS }; }
|
|
148
|
+
|
|
149
|
+
process.on("exit", () => { try { killChild(); } catch (e) {} });
|
|
150
|
+
|
|
151
|
+
module.exports = { walkWarm, isEnabled, shutdown, stats };
|
package/core/router.js
CHANGED
|
@@ -120,12 +120,17 @@ async function handleVoice(envelope) {
|
|
|
120
120
|
return send(`Voice note too large (${Math.round(media.size / 1024 / 1024)}MB). Max: ${MAX_VOICE_SIZE / 1024 / 1024}MB`);
|
|
121
121
|
}
|
|
122
122
|
envelope.adapter.typing(envelope.channelId).catch(() => {});
|
|
123
|
+
const tStart = Date.now();
|
|
123
124
|
const oggPath = await envelope.adapter.downloadMedia(media);
|
|
125
|
+
const tAfterDownload = Date.now();
|
|
124
126
|
const transcript = transcribeAudio(oggPath);
|
|
127
|
+
const tAfterStt = Date.now();
|
|
125
128
|
try { fs.unlinkSync(oggPath); } catch (e) {}
|
|
126
129
|
if (!transcript) return send("Couldn't transcribe. Try typing it.");
|
|
127
130
|
await send(`Heard: "${transcript}"`, { replyTo: envelope.messageId });
|
|
128
131
|
state.lastInputWasVoice = true;
|
|
132
|
+
// Per-stage latency for the voice pipeline; the runner fills in the rest.
|
|
133
|
+
state.voiceLat = { startAt: tStart, downloadMs: tAfterDownload - tStart, sttMs: tAfterStt - tAfterDownload };
|
|
129
134
|
await runClaude(transcript, state.currentSession.dir, envelope.messageId);
|
|
130
135
|
} catch (err) { await send(`Voice failed: ${err.message}`); }
|
|
131
136
|
}
|
package/core/runner.js
CHANGED
|
@@ -839,14 +839,19 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
|
|
|
839
839
|
let ttsChain = Promise.resolve(); // ordered send queue so clips play in order
|
|
840
840
|
let spokeAnyStreamed = false;
|
|
841
841
|
const SPOKEN_MIN_CHARS = 40; // don't fire TTS on tiny fragments ("Hi.")
|
|
842
|
+
// Voice latency probe (measurement only) — timestamps filled in as the turn runs.
|
|
843
|
+
// Spawn time is `startTime` (captured just after spawn below).
|
|
844
|
+
let vlFirstSysAt = null, vlFirstTokenAt = null, vlFirstAudioAt = null, vlResultAt = null;
|
|
842
845
|
function dispatchSpoken(text) {
|
|
843
846
|
const clean = redactSensitive(text);
|
|
844
847
|
if (!clean.trim()) return;
|
|
845
848
|
spokeAnyStreamed = true;
|
|
846
849
|
const synthP = synthSentenceMp3(clean); // start synth now (parallel)
|
|
847
850
|
ttsChain = ttsChain.then(async () => { // but send strictly in order
|
|
848
|
-
try {
|
|
849
|
-
|
|
851
|
+
try {
|
|
852
|
+
const clip = await synthP;
|
|
853
|
+
if (clip) { await sendVoice(clip); if (vlFirstAudioAt == null) vlFirstAudioAt = Date.now(); }
|
|
854
|
+
} catch (e) { console.error("voice stream clip failed:", e.message); }
|
|
850
855
|
});
|
|
851
856
|
}
|
|
852
857
|
function pumpSpoken(flush) {
|
|
@@ -1073,12 +1078,17 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
|
|
|
1073
1078
|
const lastNewline = state.streamBuffer.lastIndexOf("\n");
|
|
1074
1079
|
state.streamBuffer = lastNewline >= 0 ? state.streamBuffer.slice(lastNewline + 1) : state.streamBuffer;
|
|
1075
1080
|
for (const evt of events) {
|
|
1081
|
+
// Voice latency probe: first "system" event = CLI ready (cold-start done);
|
|
1082
|
+
// "result" = generation finished (before the TTS tail drains).
|
|
1083
|
+
if (voiceStreaming && vlFirstSysAt == null && evt.type === "system") vlFirstSysAt = Date.now();
|
|
1084
|
+
if (voiceStreaming && vlResultAt == null && evt.type === "result") vlResultAt = Date.now();
|
|
1076
1085
|
// Voice streaming-out: speak finished sentences as the model writes them.
|
|
1077
1086
|
// Only text_delta is spoken; thinking_delta and tool events are ignored.
|
|
1078
1087
|
if (voiceStreaming && evt.type === "stream_event"
|
|
1079
1088
|
&& evt.event?.type === "content_block_delta"
|
|
1080
1089
|
&& evt.event.delta?.type === "text_delta"
|
|
1081
1090
|
&& typeof evt.event.delta.text === "string") {
|
|
1091
|
+
if (vlFirstTokenAt == null) vlFirstTokenAt = Date.now();
|
|
1082
1092
|
spokenBuf += evt.event.delta.text;
|
|
1083
1093
|
pumpSpoken(false);
|
|
1084
1094
|
}
|
|
@@ -1287,6 +1297,21 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
|
|
|
1287
1297
|
if (voicePath) await sendVoice(voicePath);
|
|
1288
1298
|
}
|
|
1289
1299
|
await sendVoiceEnd();
|
|
1300
|
+
// Per-stage latency breakdown (measurement only — temporary debug footer).
|
|
1301
|
+
try {
|
|
1302
|
+
const vl = state.voiceLat || {};
|
|
1303
|
+
const endAt = Date.now();
|
|
1304
|
+
const sec = (n) => n != null ? (n / 1000).toFixed(1) + "s" : "?";
|
|
1305
|
+
const diff = (a, b) => (a != null && b != null) ? sec(b - a) : "?";
|
|
1306
|
+
const fromStart = (t) => diff(vl.startAt, t);
|
|
1307
|
+
const line = `⏱ dl ${sec(vl.downloadMs)} · stt ${sec(vl.sttMs)}`
|
|
1308
|
+
+ ` · cli ${diff(startTime, vlFirstSysAt)} · ttft ${diff(vlFirstSysAt, vlFirstTokenAt)}`
|
|
1309
|
+
+ ` · gen ${diff(vlFirstTokenAt, vlResultAt)} · 1st-audio ${fromStart(vlFirstAudioAt)}`
|
|
1310
|
+
+ ` · total ${fromStart(endAt)}`;
|
|
1311
|
+
console.log(`[VOICE-LAT] ${line}`);
|
|
1312
|
+
await send(line);
|
|
1313
|
+
} catch (e) { /* metrics best-effort */ }
|
|
1314
|
+
state.voiceLat = null;
|
|
1290
1315
|
} else {
|
|
1291
1316
|
// Non-streamed fallback. Spoken replies belong to the hands-free voice
|
|
1292
1317
|
// channel; on chat transports (Telegram/Kazee) an auto voice note on
|
package/package.json
CHANGED