@kernel.chat/kbot 3.99.20 → 3.99.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/agent.js +23 -0
- package/dist/agents/producer.js +65 -23
- package/dist/auth.d.ts +2 -0
- package/dist/cli.js +7 -4
- package/dist/critic-gate.d.ts +26 -0
- package/dist/critic-gate.js +220 -0
- package/dist/critic-retrospect.d.ts +64 -0
- package/dist/critic-retrospect.js +279 -0
- package/dist/growth.d.ts +37 -0
- package/dist/growth.js +272 -0
- package/dist/integrations/ableton.d.ts +30 -0
- package/dist/integrations/ableton.js +66 -0
- package/dist/integrations/kbot-control-client.d.ts +66 -0
- package/dist/integrations/kbot-control-client.js +224 -0
- package/dist/observer.d.ts +13 -0
- package/dist/observer.js +5 -1
- package/dist/planner/hierarchical/persistence.d.ts +26 -0
- package/dist/planner/hierarchical/persistence.js +113 -0
- package/dist/planner/hierarchical/session-planner.d.ts +68 -0
- package/dist/planner/hierarchical/session-planner.js +141 -0
- package/dist/planner/hierarchical/types.d.ts +116 -0
- package/dist/planner/hierarchical/types.js +18 -0
- package/dist/tool-pipeline.d.ts +39 -1
- package/dist/tool-pipeline.js +109 -1
- package/dist/tools/ableton-listen.d.ts +2 -0
- package/dist/tools/ableton-listen.js +126 -0
- package/dist/tools/ableton.js +477 -12
- package/dist/tools/index.js +2 -0
- package/dist/tools/kbot-control.d.ts +2 -0
- package/dist/tools/kbot-control.js +63 -0
- package/package.json +1 -1
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Critic Retrospect — retroactive judgement of past session tool calls.
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.kbot/observer/session.jsonl, replays tool calls through
|
|
5
|
+
* gateToolResult (critic-gate.ts), and reports:
|
|
6
|
+
* - overall accept/reject ratio
|
|
7
|
+
* - tools with highest reject rate (args-validation candidates)
|
|
8
|
+
* - rejects that were later retried successfully (critic false positives)
|
|
9
|
+
* - sessions ranked by "retries saved" score
|
|
10
|
+
* - suggested strictness setting from precision/recall tradeoff
|
|
11
|
+
*
|
|
12
|
+
* NB: the observer only logs {ts, tool, args, session} — no results.
|
|
13
|
+
* We synthesize a *result proxy* from retry behaviour: a call whose exact
|
|
14
|
+
* (tool, args-hash) recurs inside the same session within RETRY_WINDOW_MS
|
|
15
|
+
* is treated as having implicitly failed the first time. The critic is
|
|
16
|
+
* passed this synthesized signal so it can judge on intent + shape.
|
|
17
|
+
*
|
|
18
|
+
* Cache: ~/.kbot/critic-cache.json — keyed by (tool, argsHash, resultHash).
|
|
19
|
+
*
|
|
20
|
+
* CLI wiring: cli.ts was modified in parallel; leaving subcommand wiring
|
|
21
|
+
* as a TODO. Invoke via `node -e "import('./dist/critic-retrospect.js').then(m => m.run())"`.
|
|
22
|
+
*/
|
|
23
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
|
|
24
|
+
import { homedir } from 'node:os';
|
|
25
|
+
import { join, dirname } from 'node:path';
|
|
26
|
+
import { createHash } from 'node:crypto';
|
|
27
|
+
import { gateToolResult } from './critic-gate.js';
|
|
28
|
+
const OBSERVER_PATH = join(homedir(), '.kbot', 'observer', 'session.jsonl');
|
|
29
|
+
const CACHE_PATH = join(homedir(), '.kbot', 'critic-cache.json');
|
|
30
|
+
const RETRY_WINDOW_MS = 2 * 60 * 1000; // 2 minutes
|
|
31
|
+
function sha(s) { return createHash('sha256').update(s).digest('hex').slice(0, 16); }
|
|
32
|
+
function hashArgs(args) { try {
|
|
33
|
+
return sha(JSON.stringify(args));
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return sha(String(args));
|
|
37
|
+
} }
|
|
38
|
+
function loadCache() {
|
|
39
|
+
if (!existsSync(CACHE_PATH))
|
|
40
|
+
return {};
|
|
41
|
+
try {
|
|
42
|
+
return JSON.parse(readFileSync(CACHE_PATH, 'utf8'));
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return {};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
function saveCache(c) {
|
|
49
|
+
try {
|
|
50
|
+
mkdirSync(dirname(CACHE_PATH), { recursive: true });
|
|
51
|
+
writeFileSync(CACHE_PATH, JSON.stringify(c, null, 2));
|
|
52
|
+
}
|
|
53
|
+
catch { /* best-effort */ }
|
|
54
|
+
}
|
|
55
|
+
function readEvents() {
|
|
56
|
+
if (!existsSync(OBSERVER_PATH))
|
|
57
|
+
return [];
|
|
58
|
+
const raw = readFileSync(OBSERVER_PATH, 'utf8');
|
|
59
|
+
const out = [];
|
|
60
|
+
for (const line of raw.split('\n')) {
|
|
61
|
+
if (!line.trim())
|
|
62
|
+
continue;
|
|
63
|
+
try {
|
|
64
|
+
const e = JSON.parse(line);
|
|
65
|
+
if (e && e.tool && e.session && e.ts)
|
|
66
|
+
out.push(e);
|
|
67
|
+
}
|
|
68
|
+
catch { /* skip */ }
|
|
69
|
+
}
|
|
70
|
+
return out;
|
|
71
|
+
}
|
|
72
|
+
/** Group events by session; keep last N sessions (by latest ts). */
|
|
73
|
+
function pickLastNSessions(events, n) {
|
|
74
|
+
const bySession = new Map();
|
|
75
|
+
for (const e of events) {
|
|
76
|
+
const arr = bySession.get(e.session) ?? [];
|
|
77
|
+
arr.push(e);
|
|
78
|
+
bySession.set(e.session, arr);
|
|
79
|
+
}
|
|
80
|
+
const ordered = [...bySession.entries()]
|
|
81
|
+
.map(([id, evs]) => ({ id, evs, lastTs: Date.parse(evs[evs.length - 1].ts) || 0 }))
|
|
82
|
+
.sort((a, b) => b.lastTs - a.lastTs);
|
|
83
|
+
const pick = ordered.slice(0, n);
|
|
84
|
+
const picked = new Map();
|
|
85
|
+
for (const p of pick)
|
|
86
|
+
picked.set(p.id, p.evs);
|
|
87
|
+
return { picked, available: bySession.size };
|
|
88
|
+
}
|
|
89
|
+
/** Annotate each call with retry info inside its session. */
|
|
90
|
+
function enrich(sessionEvents) {
|
|
91
|
+
const calls = sessionEvents.map(e => ({
|
|
92
|
+
...e, argsHash: hashArgs({ tool: e.tool, args: e.args }), retriedLater: false, retrySucceeded: false,
|
|
93
|
+
}));
|
|
94
|
+
// Two-pass: mark retriedLater, then mark retrySucceeded.
|
|
95
|
+
for (let i = 0; i < calls.length; i++) {
|
|
96
|
+
const a = calls[i];
|
|
97
|
+
const at = Date.parse(a.ts) || 0;
|
|
98
|
+
for (let j = i + 1; j < calls.length; j++) {
|
|
99
|
+
const b = calls[j];
|
|
100
|
+
const bt = Date.parse(b.ts) || 0;
|
|
101
|
+
if (bt - at > RETRY_WINDOW_MS)
|
|
102
|
+
break;
|
|
103
|
+
if (b.tool === a.tool && b.argsHash === a.argsHash) {
|
|
104
|
+
a.retriedLater = true;
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
for (let i = 0; i < calls.length; i++) {
|
|
110
|
+
if (!calls[i].retriedLater)
|
|
111
|
+
continue;
|
|
112
|
+
// If the LAST occurrence of (tool,argsHash) in this session is NOT retriedLater, consider retry "succeeded".
|
|
113
|
+
for (let j = calls.length - 1; j > i; j--) {
|
|
114
|
+
if (calls[j].tool === calls[i].tool && calls[j].argsHash === calls[i].argsHash) {
|
|
115
|
+
calls[i].retrySucceeded = !calls[j].retriedLater;
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return calls;
|
|
121
|
+
}
|
|
122
|
+
/** Build a synthetic "result" proxy to feed the critic. */
|
|
123
|
+
function synthResult(c) {
|
|
124
|
+
if (c.retriedLater)
|
|
125
|
+
return `[observer-proxy] no result captured; same (tool,args) was retried within ${RETRY_WINDOW_MS / 1000}s — likely failed or unsatisfactory.`;
|
|
126
|
+
return `[observer-proxy] no result captured; call was not retried in-session — presumed accepted by the agent downstream.`;
|
|
127
|
+
}
|
|
128
|
+
async function judge(c, strictness, cache, llmClient) {
|
|
129
|
+
const resultProxy = synthResult(c);
|
|
130
|
+
const resultHash = sha(resultProxy);
|
|
131
|
+
const key = `${c.tool}:${c.argsHash}:${resultHash}:${strictness.toFixed(2)}`;
|
|
132
|
+
const hit = cache[key];
|
|
133
|
+
if (hit)
|
|
134
|
+
return hit.verdict;
|
|
135
|
+
const verdict = await gateToolResult(c.tool, c.args, resultProxy, { strictness, llmClient });
|
|
136
|
+
cache[key] = { verdict, cachedAt: new Date().toISOString() };
|
|
137
|
+
return verdict;
|
|
138
|
+
}
|
|
139
|
+
/** Precision/recall math.
|
|
140
|
+
* We treat "retriedLater" as ground-truth "call was bad".
|
|
141
|
+
* Critic rejects = positives.
|
|
142
|
+
* TP = reject && retriedLater (correct catch)
|
|
143
|
+
* FP = reject && !retriedLater (nagged a fine call)
|
|
144
|
+
* FN = accept && retriedLater (missed a bad call)
|
|
145
|
+
* TN = accept && !retriedLater (correct pass)
|
|
146
|
+
* Precision = TP / (TP + FP) — of rejects, how many were real
|
|
147
|
+
* Recall = TP / (TP + FN) — of bad calls, how many did we catch
|
|
148
|
+
* Sweep strictness-equivalent proxy: threshold on verdict.confidence for rejects.
|
|
149
|
+
*/
|
|
150
|
+
function prCurve(rows) {
|
|
151
|
+
const candidates = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
|
|
152
|
+
let best = { s: 0.5, f1: -1, p: 0, r: 0 };
|
|
153
|
+
for (const s of candidates) {
|
|
154
|
+
let tp = 0, fp = 0, fn = 0;
|
|
155
|
+
for (const r of rows) {
|
|
156
|
+
const gatedReject = r.reject && r.confidence >= 1 - s; // stricter => more rejects pass through
|
|
157
|
+
if (gatedReject && r.bad)
|
|
158
|
+
tp++;
|
|
159
|
+
else if (gatedReject && !r.bad)
|
|
160
|
+
fp++;
|
|
161
|
+
else if (!gatedReject && r.bad)
|
|
162
|
+
fn++;
|
|
163
|
+
}
|
|
164
|
+
const p = tp + fp > 0 ? tp / (tp + fp) : 0;
|
|
165
|
+
const rec = tp + fn > 0 ? tp / (tp + fn) : 0;
|
|
166
|
+
const f1 = p + rec > 0 ? (2 * p * rec) / (p + rec) : 0;
|
|
167
|
+
if (f1 > best.f1)
|
|
168
|
+
best = { s, f1, p, r: rec };
|
|
169
|
+
}
|
|
170
|
+
return { suggested: best.s, precision: best.p, recall: best.r };
|
|
171
|
+
}
|
|
172
|
+
export async function run(opts = {}) {
|
|
173
|
+
const nSessions = opts.sessions ?? 10;
|
|
174
|
+
const strictness = opts.strictness ?? 0.5;
|
|
175
|
+
const perSessionCap = opts.maxCallsPerSession ?? 50;
|
|
176
|
+
const events = readEvents();
|
|
177
|
+
const { picked, available } = pickLastNSessions(events, nSessions);
|
|
178
|
+
const cache = loadCache();
|
|
179
|
+
const byTool = {};
|
|
180
|
+
const falsePositives = [];
|
|
181
|
+
const sessionStats = [];
|
|
182
|
+
const prRows = [];
|
|
183
|
+
let accepts = 0, rejects = 0, totalCalls = 0;
|
|
184
|
+
for (const [sid, evs] of picked) {
|
|
185
|
+
const enriched = enrich(evs).slice(0, perSessionCap);
|
|
186
|
+
let sessionRetriesSaved = 0;
|
|
187
|
+
for (const c of enriched) {
|
|
188
|
+
const v = await judge(c, strictness, cache, opts.llmClient);
|
|
189
|
+
totalCalls++;
|
|
190
|
+
const bucket = byTool[c.tool] ?? (byTool[c.tool] = { total: 0, accepts: 0, rejects: 0 });
|
|
191
|
+
bucket.total++;
|
|
192
|
+
if (v.accept) {
|
|
193
|
+
accepts++;
|
|
194
|
+
bucket.accepts++;
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
rejects++;
|
|
198
|
+
bucket.rejects++;
|
|
199
|
+
}
|
|
200
|
+
prRows.push({ reject: !v.accept, bad: c.retriedLater, confidence: v.confidence });
|
|
201
|
+
if (!v.accept && c.retriedLater)
|
|
202
|
+
sessionRetriesSaved++;
|
|
203
|
+
// Likely false positives: critic rejected, but the call was NOT retried (so downstream accepted it).
|
|
204
|
+
if (!v.accept && !c.retriedLater) {
|
|
205
|
+
falsePositives.push({ tool: c.tool, session: sid, retryGap: 0, reason: v.reason });
|
|
206
|
+
}
|
|
207
|
+
// Also: rejected AND the retry later succeeded — still a FP if the agent had listened and skipped, it would have worked anyway.
|
|
208
|
+
if (!v.accept && c.retrySucceeded) {
|
|
209
|
+
falsePositives.push({ tool: c.tool, session: sid, retryGap: RETRY_WINDOW_MS, reason: `retry later succeeded: ${v.reason ?? ''}` });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
sessionStats.push({ session: sid, calls: enriched.length, retriesSaved: sessionRetriesSaved, score: sessionRetriesSaved / Math.max(1, enriched.length) });
|
|
213
|
+
}
|
|
214
|
+
saveCache(cache);
|
|
215
|
+
const topRejectRate = Object.entries(byTool)
|
|
216
|
+
.filter(([, v]) => v.total >= 3)
|
|
217
|
+
.map(([tool, v]) => ({ tool, total: v.total, rejectRate: v.rejects / v.total }))
|
|
218
|
+
.sort((a, b) => b.rejectRate - a.rejectRate)
|
|
219
|
+
.slice(0, 5);
|
|
220
|
+
const sessionsRanked = sessionStats.sort((a, b) => b.score - a.score).slice(0, 10);
|
|
221
|
+
const fpTop = falsePositives.slice(0, 5);
|
|
222
|
+
const pr = prCurve(prRows);
|
|
223
|
+
const report = {
|
|
224
|
+
totalCalls, sessionsScanned: picked.size, sessionsAvailable: available,
|
|
225
|
+
accepts, rejects, byTool, topRejectRate, likelyFalsePositives: fpTop,
|
|
226
|
+
sessionsRanked, suggestedStrictness: pr.suggested, precision: pr.precision, recall: pr.recall,
|
|
227
|
+
};
|
|
228
|
+
renderReport(report);
|
|
229
|
+
if (opts.jsonOut) {
|
|
230
|
+
try {
|
|
231
|
+
writeFileSync(opts.jsonOut, JSON.stringify(report, null, 2));
|
|
232
|
+
console.log(`\nJSON written → ${opts.jsonOut}`);
|
|
233
|
+
}
|
|
234
|
+
catch (e) {
|
|
235
|
+
console.error(`JSON export failed: ${e.message}`);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return report;
|
|
239
|
+
}
|
|
240
|
+
function renderReport(r) {
|
|
241
|
+
const line = (s = '') => console.log(s);
|
|
242
|
+
line('\n=== Critic Retrospective ===');
|
|
243
|
+
line(`sessions scanned: ${r.sessionsScanned} / ${r.sessionsAvailable} available`);
|
|
244
|
+
line(`tool calls judged: ${r.totalCalls}`);
|
|
245
|
+
const ratio = r.totalCalls ? (r.accepts / r.totalCalls) : 0;
|
|
246
|
+
line(`accept/reject: ${r.accepts} / ${r.rejects} (accept-rate ${(ratio * 100).toFixed(1)}%)`);
|
|
247
|
+
line('\n-- top 5 reject rate (candidates for args validation) --');
|
|
248
|
+
if (!r.topRejectRate.length)
|
|
249
|
+
line(' (no tool has >=3 calls)');
|
|
250
|
+
for (const t of r.topRejectRate)
|
|
251
|
+
line(` ${t.tool.padEnd(28)} ${(t.rejectRate * 100).toFixed(1)}% rejected (${t.total} calls)`);
|
|
252
|
+
line('\n-- likely critic false positives (rejected but agent did not retry OR retry worked) --');
|
|
253
|
+
if (!r.likelyFalsePositives.length)
|
|
254
|
+
line(' (none)');
|
|
255
|
+
for (const fp of r.likelyFalsePositives)
|
|
256
|
+
line(` ${fp.tool.padEnd(28)} session=${fp.session.slice(0, 8)} ${fp.reason ?? ''}`);
|
|
257
|
+
line('\n-- sessions ranked by retries-saved score --');
|
|
258
|
+
for (const s of r.sessionsRanked.slice(0, 5))
|
|
259
|
+
line(` ${s.session.slice(0, 8)} calls=${s.calls} saved=${s.retriesSaved} score=${s.score.toFixed(3)}`);
|
|
260
|
+
line('\n-- precision / recall tradeoff --');
|
|
261
|
+
line(` precision = ${r.precision.toFixed(3)} recall = ${r.recall.toFixed(3)}`);
|
|
262
|
+
line(` suggested critic_strictness = ${r.suggestedStrictness.toFixed(2)}`);
|
|
263
|
+
line('');
|
|
264
|
+
}
|
|
265
|
+
// TODO(cli-wiring): register `kbot critic retrospect` subcommand in cli.ts once
|
|
266
|
+
// the parallel skills-subcommand edit lands. For now, invoke via:
|
|
267
|
+
// node -e "import('./dist/critic-retrospect.js').then(m => m.run({ sessions: 20 }))"
|
|
268
|
+
// Direct-execution entrypoint for `node dist/critic-retrospect.js`.
|
|
269
|
+
const argv1 = process.argv[1] || '';
|
|
270
|
+
if (argv1.endsWith('critic-retrospect.js') || argv1.endsWith('critic-retrospect.ts')) {
|
|
271
|
+
const sessionsArg = process.argv.find(a => a.startsWith('--sessions='));
|
|
272
|
+
const jsonArg = process.argv.find(a => a.startsWith('--json='));
|
|
273
|
+
const strictArg = process.argv.find(a => a.startsWith('--strictness='));
|
|
274
|
+
const sessions = sessionsArg ? Number(sessionsArg.split('=')[1]) : 10;
|
|
275
|
+
const strictness = strictArg ? Number(strictArg.split('=')[1]) : undefined;
|
|
276
|
+
const jsonOut = jsonArg ? jsonArg.split('=')[1] : undefined;
|
|
277
|
+
run({ sessions, strictness, jsonOut }).catch(e => { console.error(e); process.exit(1); });
|
|
278
|
+
}
|
|
279
|
+
//# sourceMappingURL=critic-retrospect.js.map
|
package/dist/growth.d.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
interface GrowthSummary {
|
|
2
|
+
betterPct: number;
|
|
3
|
+
days: number;
|
|
4
|
+
sessions: number;
|
|
5
|
+
toolCalls: number;
|
|
6
|
+
successRate: number;
|
|
7
|
+
routingAccuracy: number;
|
|
8
|
+
newPatterns: number;
|
|
9
|
+
}
|
|
10
|
+
interface GrowthResult {
|
|
11
|
+
summary: GrowthSummary;
|
|
12
|
+
metrics: Array<{
|
|
13
|
+
label: string;
|
|
14
|
+
current: number;
|
|
15
|
+
prior: number;
|
|
16
|
+
delta: number;
|
|
17
|
+
}>;
|
|
18
|
+
deltas: Array<{
|
|
19
|
+
tool: string;
|
|
20
|
+
current: number;
|
|
21
|
+
prior: number;
|
|
22
|
+
delta: number;
|
|
23
|
+
}>;
|
|
24
|
+
agents: Array<{
|
|
25
|
+
agent: string;
|
|
26
|
+
accuracy: number;
|
|
27
|
+
samples: number;
|
|
28
|
+
}>;
|
|
29
|
+
}
|
|
30
|
+
export declare function runGrowth(opts?: {
|
|
31
|
+
json?: boolean;
|
|
32
|
+
days?: number;
|
|
33
|
+
dataDir?: string;
|
|
34
|
+
now?: number;
|
|
35
|
+
}): GrowthResult | null;
|
|
36
|
+
export {};
|
|
37
|
+
//# sourceMappingURL=growth.d.ts.map
|
package/dist/growth.js
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
// kbot growth — Make kbot's learning visible. Reads local learning artifacts
|
|
2
|
+
// (~/.kbot/skill-profile.json, confidence.json, evolution-state.json,
|
|
3
|
+
// observer/session.jsonl) and shows a week-over-week improvement report.
|
|
4
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { homedir } from 'node:os';
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
function pathsFor(dataDir) {
|
|
9
|
+
return {
|
|
10
|
+
skill: join(dataDir, 'skill-profile.json'),
|
|
11
|
+
confidence: join(dataDir, 'confidence.json'),
|
|
12
|
+
evolution: join(dataDir, 'evolution-state.json'),
|
|
13
|
+
observer: join(dataDir, 'observer', 'session.jsonl'),
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
function readJsonSafe(path) {
|
|
17
|
+
try {
|
|
18
|
+
if (!existsSync(path))
|
|
19
|
+
return null;
|
|
20
|
+
return JSON.parse(readFileSync(path, 'utf8'));
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
function readObserver(observerPath) {
|
|
27
|
+
if (!existsSync(observerPath))
|
|
28
|
+
return [];
|
|
29
|
+
try {
|
|
30
|
+
const raw = readFileSync(observerPath, 'utf8');
|
|
31
|
+
const out = [];
|
|
32
|
+
for (const line of raw.split('\n')) {
|
|
33
|
+
if (!line.trim())
|
|
34
|
+
continue;
|
|
35
|
+
try {
|
|
36
|
+
out.push(JSON.parse(line));
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
// skip malformed line
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return out;
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return [];
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
function inWindow(ts, start, end) {
|
|
49
|
+
const t = Date.parse(ts);
|
|
50
|
+
if (Number.isNaN(t))
|
|
51
|
+
return false;
|
|
52
|
+
return t >= start && t < end;
|
|
53
|
+
}
|
|
54
|
+
function computeWindow(events, confidence, start, end) {
|
|
55
|
+
const sessions = new Set();
|
|
56
|
+
const toolCounts = {};
|
|
57
|
+
const agentCounts = {};
|
|
58
|
+
let toolCalls = 0;
|
|
59
|
+
let errors = 0;
|
|
60
|
+
for (const e of events) {
|
|
61
|
+
if (!inWindow(e.ts, start, end))
|
|
62
|
+
continue;
|
|
63
|
+
toolCalls++;
|
|
64
|
+
if (e.error === true)
|
|
65
|
+
errors++;
|
|
66
|
+
if (e.session)
|
|
67
|
+
sessions.add(e.session);
|
|
68
|
+
toolCounts[e.tool] = (toolCounts[e.tool] ?? 0) + 1;
|
|
69
|
+
const agent = typeof e.args?.['agent'] === 'string' ? e.args['agent'] : null;
|
|
70
|
+
if (agent)
|
|
71
|
+
agentCounts[agent] = (agentCounts[agent] ?? 0) + 1;
|
|
72
|
+
}
|
|
73
|
+
const successRate = toolCalls > 0 ? (toolCalls - errors) / toolCalls : 0;
|
|
74
|
+
// Routing accuracy: |predicted - actual| <= 0.2 counts as accurate
|
|
75
|
+
let routingSamples = 0;
|
|
76
|
+
let routingHits = 0;
|
|
77
|
+
for (const c of confidence) {
|
|
78
|
+
if (!inWindow(c.timestamp, start, end))
|
|
79
|
+
continue;
|
|
80
|
+
routingSamples++;
|
|
81
|
+
if (Math.abs(c.predicted - c.actual) <= 0.2)
|
|
82
|
+
routingHits++;
|
|
83
|
+
}
|
|
84
|
+
const routingAccuracy = routingSamples > 0 ? routingHits / routingSamples : 0;
|
|
85
|
+
return {
|
|
86
|
+
sessions: sessions.size,
|
|
87
|
+
toolCalls,
|
|
88
|
+
errors,
|
|
89
|
+
successRate,
|
|
90
|
+
routingAccuracy,
|
|
91
|
+
toolCounts,
|
|
92
|
+
agentCounts,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
function topToolDeltas(current, prior, limit = 5) {
|
|
96
|
+
const tools = new Set([...Object.keys(current), ...Object.keys(prior)]);
|
|
97
|
+
const rows = [];
|
|
98
|
+
for (const t of tools) {
|
|
99
|
+
const c = current[t] ?? 0;
|
|
100
|
+
const p = prior[t] ?? 0;
|
|
101
|
+
if (c + p === 0)
|
|
102
|
+
continue;
|
|
103
|
+
rows.push({ tool: t, current: c, prior: p, delta: c - p });
|
|
104
|
+
}
|
|
105
|
+
rows.sort((a, b) => Math.abs(b.delta) - Math.abs(a.delta));
|
|
106
|
+
return rows.slice(0, limit);
|
|
107
|
+
}
|
|
108
|
+
function perAgentRouting(confidence, start, end) {
|
|
109
|
+
const agg = {};
|
|
110
|
+
for (const c of confidence) {
|
|
111
|
+
if (!inWindow(c.timestamp, start, end))
|
|
112
|
+
continue;
|
|
113
|
+
const domain = c.domain || 'general';
|
|
114
|
+
const bucket = (agg[domain] ??= { hits: 0, total: 0 });
|
|
115
|
+
bucket.total++;
|
|
116
|
+
if (Math.abs(c.predicted - c.actual) <= 0.2)
|
|
117
|
+
bucket.hits++;
|
|
118
|
+
}
|
|
119
|
+
return Object.entries(agg)
|
|
120
|
+
.map(([agent, v]) => ({ agent, accuracy: v.total > 0 ? v.hits / v.total : 0, samples: v.total }))
|
|
121
|
+
.sort((a, b) => b.samples - a.samples)
|
|
122
|
+
.slice(0, 8);
|
|
123
|
+
}
|
|
124
|
+
function blendScore(successRate, routingAccuracy) {
|
|
125
|
+
// 60% tool success, 40% routing. Fall back to either if one is missing.
|
|
126
|
+
if (successRate === 0 && routingAccuracy === 0)
|
|
127
|
+
return 0;
|
|
128
|
+
if (routingAccuracy === 0)
|
|
129
|
+
return successRate;
|
|
130
|
+
if (successRate === 0)
|
|
131
|
+
return routingAccuracy;
|
|
132
|
+
return successRate * 0.6 + routingAccuracy * 0.4;
|
|
133
|
+
}
|
|
134
|
+
const pct = (n) => `${(n * 100).toFixed(1)}%`;
|
|
135
|
+
function bar(n, width = 20) {
|
|
136
|
+
const filled = Math.max(0, Math.min(width, Math.round(n * width)));
|
|
137
|
+
return chalk.cyan('█'.repeat(filled)) + chalk.dim('░'.repeat(width - filled));
|
|
138
|
+
}
|
|
139
|
+
function renderNotEnoughData() {
|
|
140
|
+
return [
|
|
141
|
+
'',
|
|
142
|
+
` ${chalk.bold('kbot growth')}`,
|
|
143
|
+
` ${chalk.dim('─'.repeat(40))}`,
|
|
144
|
+
'',
|
|
145
|
+
` ${chalk.yellow('Not enough data yet.')}`,
|
|
146
|
+
'',
|
|
147
|
+
` kbot learns from your sessions. To seed it:`,
|
|
148
|
+
` • Run ${chalk.bold('kbot')} on real work for a few days`,
|
|
149
|
+
` • Let the observer log tool calls to ${chalk.dim('~/.kbot/observer/session.jsonl')}`,
|
|
150
|
+
` • Re-run ${chalk.bold('kbot growth')} after ~3 sessions`,
|
|
151
|
+
'',
|
|
152
|
+
].join('\n');
|
|
153
|
+
}
|
|
154
|
+
function renderPretty(result) {
|
|
155
|
+
const s = result.summary;
|
|
156
|
+
const lines = [];
|
|
157
|
+
lines.push('');
|
|
158
|
+
lines.push(` ${chalk.bold('kbot growth')} ${chalk.dim(`— last ${s.days} days vs prior ${s.days}`)}`);
|
|
159
|
+
lines.push(` ${chalk.dim('─'.repeat(60))}`);
|
|
160
|
+
lines.push('');
|
|
161
|
+
const headlineColor = s.betterPct >= 0 ? chalk.green : chalk.red;
|
|
162
|
+
const sign = s.betterPct >= 0 ? '+' : '';
|
|
163
|
+
lines.push(` ${chalk.bold('kbot is')} ${headlineColor.bold(`${sign}${s.betterPct.toFixed(1)}%`)} ${chalk.bold('better at your tasks this week')}`);
|
|
164
|
+
lines.push('');
|
|
165
|
+
// Core metrics table
|
|
166
|
+
lines.push(` ${chalk.bold('Metrics')}`);
|
|
167
|
+
lines.push(` ${chalk.dim('─'.repeat(60))}`);
|
|
168
|
+
for (const m of result.metrics) {
|
|
169
|
+
const arrow = m.delta > 0 ? chalk.green('▲') : m.delta < 0 ? chalk.red('▼') : chalk.dim('·');
|
|
170
|
+
const isRate = m.label.includes('rate') || m.label.includes('accuracy');
|
|
171
|
+
const cur = isRate ? pct(m.current) : String(Math.round(m.current));
|
|
172
|
+
const prev = isRate ? pct(m.prior) : String(Math.round(m.prior));
|
|
173
|
+
const deltaStr = isRate
|
|
174
|
+
? `${m.delta >= 0 ? '+' : ''}${(m.delta * 100).toFixed(1)}pp`
|
|
175
|
+
: `${m.delta >= 0 ? '+' : ''}${Math.round(m.delta)}`;
|
|
176
|
+
lines.push(` ${arrow} ${m.label.padEnd(24)} ${cur.padStart(8)} ${chalk.dim(`prev ${prev}`)} ${chalk.bold(deltaStr)}`);
|
|
177
|
+
}
|
|
178
|
+
lines.push('');
|
|
179
|
+
// Tool deltas
|
|
180
|
+
if (result.deltas.length > 0) {
|
|
181
|
+
lines.push(` ${chalk.bold('Top tools by usage delta')}`);
|
|
182
|
+
lines.push(` ${chalk.dim('─'.repeat(60))}`);
|
|
183
|
+
for (const d of result.deltas) {
|
|
184
|
+
const arrow = d.delta > 0 ? chalk.green('▲') : d.delta < 0 ? chalk.red('▼') : chalk.dim('·');
|
|
185
|
+
const deltaStr = `${d.delta >= 0 ? '+' : ''}${d.delta}`;
|
|
186
|
+
lines.push(` ${arrow} ${d.tool.padEnd(30)} ${String(d.current).padStart(5)} ${chalk.dim(`prev ${d.prior}`)} ${chalk.bold(deltaStr)}`);
|
|
187
|
+
}
|
|
188
|
+
lines.push('');
|
|
189
|
+
}
|
|
190
|
+
// Per-agent routing
|
|
191
|
+
if (result.agents.length > 0) {
|
|
192
|
+
lines.push(` ${chalk.bold('Per-domain routing accuracy')}`);
|
|
193
|
+
lines.push(` ${chalk.dim('─'.repeat(60))}`);
|
|
194
|
+
for (const a of result.agents) {
|
|
195
|
+
lines.push(` ${a.agent.padEnd(16)} ${bar(a.accuracy)} ${pct(a.accuracy).padStart(6)} ${chalk.dim(`n=${a.samples}`)}`);
|
|
196
|
+
}
|
|
197
|
+
lines.push('');
|
|
198
|
+
}
|
|
199
|
+
lines.push(` ${chalk.dim(`New patterns learned: ${s.newPatterns}`)}`);
|
|
200
|
+
lines.push('');
|
|
201
|
+
return lines.join('\n');
|
|
202
|
+
}
|
|
203
|
+
export function runGrowth(opts = {}) {
|
|
204
|
+
const days = Math.max(1, Math.floor(opts.days ?? 7));
|
|
205
|
+
const now = opts.now ?? Date.now();
|
|
206
|
+
const dayMs = 24 * 60 * 60 * 1000;
|
|
207
|
+
const currentStart = now - days * dayMs;
|
|
208
|
+
const priorStart = now - 2 * days * dayMs;
|
|
209
|
+
const paths = pathsFor(opts.dataDir ?? join(homedir(), '.kbot'));
|
|
210
|
+
const events = readObserver(paths.observer);
|
|
211
|
+
const confidenceRaw = readJsonSafe(paths.confidence);
|
|
212
|
+
const confidence = confidenceRaw?.entries ?? [];
|
|
213
|
+
const skillRaw = readJsonSafe(paths.skill);
|
|
214
|
+
const skills = skillRaw?.skills ?? {};
|
|
215
|
+
// evolution state is read to surface future signals; currently used only as a
|
|
216
|
+
// signal that the file exists and kbot has evolved behaviours.
|
|
217
|
+
const evolution = readJsonSafe(paths.evolution);
|
|
218
|
+
if (events.length === 0 && confidence.length === 0 && Object.keys(skills).length === 0) {
|
|
219
|
+
if (opts.json) {
|
|
220
|
+
process.stdout.write(JSON.stringify({ summary: null, metrics: [], deltas: [] }, null, 2) + '\n');
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
process.stdout.write(renderNotEnoughData() + '\n');
|
|
224
|
+
}
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
const cur = computeWindow(events, confidence, currentStart, now);
|
|
228
|
+
const prior = computeWindow(events, confidence, priorStart, currentStart);
|
|
229
|
+
// "Better by N%": compare blended score now vs prior, as a relative lift.
|
|
230
|
+
const curBlend = blendScore(cur.successRate, cur.routingAccuracy);
|
|
231
|
+
const priBlend = blendScore(prior.successRate, prior.routingAccuracy);
|
|
232
|
+
const betterPct = priBlend > 0 ? ((curBlend - priBlend) / priBlend) * 100 : curBlend > 0 ? 100 : 0;
|
|
233
|
+
// New patterns learned: skills whose lastAttempt is in the current window.
|
|
234
|
+
let newPatterns = 0;
|
|
235
|
+
for (const entry of Object.values(skills)) {
|
|
236
|
+
if (entry.lastAttempt && inWindow(entry.lastAttempt, currentStart, now))
|
|
237
|
+
newPatterns++;
|
|
238
|
+
}
|
|
239
|
+
// Plus unique new domains appearing in confidence in current window but not prior.
|
|
240
|
+
const priorDomains = new Set(confidence.filter((c) => inWindow(c.timestamp, priorStart, currentStart)).map((c) => c.domain));
|
|
241
|
+
const newDomains = new Set(confidence
|
|
242
|
+
.filter((c) => inWindow(c.timestamp, currentStart, now))
|
|
243
|
+
.map((c) => c.domain)
|
|
244
|
+
.filter((d) => !priorDomains.has(d)));
|
|
245
|
+
newPatterns += newDomains.size;
|
|
246
|
+
void evolution; // evolution data reserved for future deltas
|
|
247
|
+
const summary = {
|
|
248
|
+
betterPct,
|
|
249
|
+
days,
|
|
250
|
+
sessions: cur.sessions,
|
|
251
|
+
toolCalls: cur.toolCalls,
|
|
252
|
+
successRate: cur.successRate,
|
|
253
|
+
routingAccuracy: cur.routingAccuracy,
|
|
254
|
+
newPatterns,
|
|
255
|
+
};
|
|
256
|
+
const metrics = [
|
|
257
|
+
{ label: 'sessions', current: cur.sessions, prior: prior.sessions, delta: cur.sessions - prior.sessions },
|
|
258
|
+
{ label: 'tool calls', current: cur.toolCalls, prior: prior.toolCalls, delta: cur.toolCalls - prior.toolCalls },
|
|
259
|
+
{ label: 'tool success rate', current: cur.successRate, prior: prior.successRate, delta: cur.successRate - prior.successRate },
|
|
260
|
+
{ label: 'routing accuracy', current: cur.routingAccuracy, prior: prior.routingAccuracy, delta: cur.routingAccuracy - prior.routingAccuracy },
|
|
261
|
+
];
|
|
262
|
+
const deltas = topToolDeltas(cur.toolCounts, prior.toolCounts, 5);
|
|
263
|
+
const agents = perAgentRouting(confidence, currentStart, now);
|
|
264
|
+
const result = { summary, metrics, deltas, agents };
|
|
265
|
+
if (opts.json) {
|
|
266
|
+
process.stdout.write(JSON.stringify(result, null, 2) + '\n');
|
|
267
|
+
return result;
|
|
268
|
+
}
|
|
269
|
+
process.stdout.write(renderPretty(result) + '\n');
|
|
270
|
+
return result;
|
|
271
|
+
}
|
|
272
|
+
//# sourceMappingURL=growth.js.map
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ableton.ts — unified Ableton client helper.
|
|
3
|
+
*
|
|
4
|
+
* Single entry point for all kbot Ableton tools. Tries kbot-control.amxd
|
|
5
|
+
* (TCP:9000) first; falls back to AbletonOSC (UDP:11000) if the device
|
|
6
|
+
* isn't loaded. Over time, as kbot-control's dispatcher covers the full
|
|
7
|
+
* OSC surface, the OSC fallback goes away.
|
|
8
|
+
*
|
|
9
|
+
* Tool code should import from here, not from kbot-control-client.ts
|
|
10
|
+
* or ableton-osc.ts directly.
|
|
11
|
+
*/
|
|
12
|
+
import { ensureAbleton, type OscArg } from './ableton-osc.js';
|
|
13
|
+
/**
|
|
14
|
+
* Call a kbot-control method if the device is loaded. Returns undefined
|
|
15
|
+
* if unavailable — caller should fall back to OSC.
|
|
16
|
+
*/
|
|
17
|
+
export declare function tryKc<T = unknown>(method: string, params?: Record<string, unknown>): Promise<T | undefined>;
|
|
18
|
+
/**
|
|
19
|
+
* Route an OSC operation through kbot-control if possible, else AbletonOSC.
|
|
20
|
+
* The two functions are called with the same args — whichever resolves wins.
|
|
21
|
+
*
|
|
22
|
+
* Use when you have parallel implementations. Example:
|
|
23
|
+
* await routed(
|
|
24
|
+
* () => tryKc('song.tempo', { value: 120 }),
|
|
25
|
+
* async () => { (await ensureAbleton()).send('/live/song/set/tempo', 120); return 120 },
|
|
26
|
+
* )
|
|
27
|
+
*/
|
|
28
|
+
export declare function routed<T>(kc: () => Promise<T | undefined>, osc: () => Promise<T>): Promise<T>;
|
|
29
|
+
export { ensureAbleton, type OscArg };
|
|
30
|
+
//# sourceMappingURL=ableton.d.ts.map
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ableton.ts — unified Ableton client helper.
|
|
3
|
+
*
|
|
4
|
+
* Single entry point for all kbot Ableton tools. Tries kbot-control.amxd
|
|
5
|
+
* (TCP:9000) first; falls back to AbletonOSC (UDP:11000) if the device
|
|
6
|
+
* isn't loaded. Over time, as kbot-control's dispatcher covers the full
|
|
7
|
+
* OSC surface, the OSC fallback goes away.
|
|
8
|
+
*
|
|
9
|
+
* Tool code should import from here, not from kbot-control-client.ts
|
|
10
|
+
* or ableton-osc.ts directly.
|
|
11
|
+
*/
|
|
12
|
+
import { KbotControlClient } from './kbot-control-client.js';
|
|
13
|
+
import { ensureAbleton } from './ableton-osc.js';
|
|
14
|
+
let kbotControlAvailable = null;
|
|
15
|
+
let lastProbeAt = 0;
|
|
16
|
+
const PROBE_CACHE_MS = 5_000;
|
|
17
|
+
async function probeKbotControl() {
|
|
18
|
+
const now = Date.now();
|
|
19
|
+
if (kbotControlAvailable !== null && now - lastProbeAt < PROBE_CACHE_MS) {
|
|
20
|
+
return kbotControlAvailable;
|
|
21
|
+
}
|
|
22
|
+
try {
|
|
23
|
+
await KbotControlClient.get().connect();
|
|
24
|
+
kbotControlAvailable = KbotControlClient.get().isConnected;
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
kbotControlAvailable = false;
|
|
28
|
+
}
|
|
29
|
+
lastProbeAt = now;
|
|
30
|
+
return kbotControlAvailable;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Call a kbot-control method if the device is loaded. Returns undefined
|
|
34
|
+
* if unavailable — caller should fall back to OSC.
|
|
35
|
+
*/
|
|
36
|
+
export async function tryKc(method, params) {
|
|
37
|
+
if (!(await probeKbotControl()))
|
|
38
|
+
return undefined;
|
|
39
|
+
try {
|
|
40
|
+
return await KbotControlClient.get().call(method, params);
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
// Method might not be implemented yet in the dispatcher;
|
|
44
|
+
// let the caller fall through to OSC.
|
|
45
|
+
return undefined;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Route an OSC operation through kbot-control if possible, else AbletonOSC.
|
|
50
|
+
* The two functions are called with the same args — whichever resolves wins.
|
|
51
|
+
*
|
|
52
|
+
* Use when you have parallel implementations. Example:
|
|
53
|
+
* await routed(
|
|
54
|
+
* () => tryKc('song.tempo', { value: 120 }),
|
|
55
|
+
* async () => { (await ensureAbleton()).send('/live/song/set/tempo', 120); return 120 },
|
|
56
|
+
* )
|
|
57
|
+
*/
|
|
58
|
+
export async function routed(kc, osc) {
|
|
59
|
+
const v = await kc();
|
|
60
|
+
if (v !== undefined)
|
|
61
|
+
return v;
|
|
62
|
+
return osc();
|
|
63
|
+
}
|
|
64
|
+
// Re-export the legacy OSC escape hatch for tools that haven't migrated yet.
|
|
65
|
+
export { ensureAbleton };
|
|
66
|
+
//# sourceMappingURL=ableton.js.map
|