@kernel.chat/kbot 3.99.19 → 3.99.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/agent.js +23 -0
- package/dist/agents/producer.js +65 -23
- package/dist/auth.d.ts +2 -0
- package/dist/cli.js +30 -4
- package/dist/critic-gate.d.ts +26 -0
- package/dist/critic-gate.js +220 -0
- package/dist/critic-retrospect.d.ts +64 -0
- package/dist/critic-retrospect.js +279 -0
- package/dist/growth.d.ts +37 -0
- package/dist/growth.js +272 -0
- package/dist/integrations/ableton.d.ts +30 -0
- package/dist/integrations/ableton.js +66 -0
- package/dist/integrations/kbot-control-client.d.ts +66 -0
- package/dist/integrations/kbot-control-client.js +224 -0
- package/dist/memory-prune.d.ts +19 -0
- package/dist/memory-prune.js +77 -0
- package/dist/observer.d.ts +13 -0
- package/dist/observer.js +5 -1
- package/dist/planner/hierarchical/persistence.d.ts +26 -0
- package/dist/planner/hierarchical/persistence.js +113 -0
- package/dist/planner/hierarchical/session-planner.d.ts +68 -0
- package/dist/planner/hierarchical/session-planner.js +141 -0
- package/dist/planner/hierarchical/types.d.ts +116 -0
- package/dist/planner/hierarchical/types.js +18 -0
- package/dist/tool-pipeline.d.ts +39 -1
- package/dist/tool-pipeline.js +109 -1
- package/dist/tools/ableton-listen.d.ts +2 -0
- package/dist/tools/ableton-listen.js +126 -0
- package/dist/tools/ableton.js +477 -12
- package/dist/tools/index.js +2 -0
- package/dist/tools/kbot-control.d.ts +2 -0
- package/dist/tools/kbot-control.js +63 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -126,6 +126,16 @@ kbot attack_surface_scan --domain x.com # Passive recon + security headers
|
|
|
126
126
|
kbot incident_response --type ransomware # Generate IR playbook
|
|
127
127
|
```
|
|
128
128
|
|
|
129
|
+
### Design From Your Terminal
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
kbot design "a minimal pitch deck cover for our product" --kind deck --pdf --open
|
|
133
|
+
kbot design "landing page hero with our brand colors" --kind page
|
|
134
|
+
kbot design "interactive prototype of a chat inbox" --kind prototype
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Local-first alternative to Anthropic's Claude Design. kbot reads your repo's CSS design tokens, typography, and component patterns, then generates a single complete HTML file — no external deps, mobile-first, a11y-clean — that matches your visual system. Optional Playwright-backed PDF export. Runs on your local model at $0, ships with `@kernel.chat/kbot`. No subscription, no upload, no cloud.
|
|
138
|
+
|
|
129
139
|
### Audit Any Repo in One Command
|
|
130
140
|
|
|
131
141
|
```
|
|
@@ -151,6 +161,7 @@ Checks security, documentation, code quality, CI/CD, community health, and DevOp
|
|
|
151
161
|
| Buddy companion | Yes (8 species) | No | No | No | No |
|
|
152
162
|
| iPhone control | Yes | No | No | No | No |
|
|
153
163
|
| Music production | Ableton Live | No | No | No | No |
|
|
164
|
+
| Visual design | `kbot design` (local, $0) | Separate Claude Design subscription | No | No | No |
|
|
154
165
|
| Financial analysis | Multi-agent | No | No | No | No |
|
|
155
166
|
| Threat intelligence | Yes | No | No | No | No |
|
|
156
167
|
| Buddy leaderboard | kernel.chat | No | No | No | No |
|
package/dist/agent.js
CHANGED
|
@@ -1084,6 +1084,8 @@ Always quote file paths that contain spaces. Never reference internal system nam
|
|
|
1084
1084
|
let toolCallCount = 0;
|
|
1085
1085
|
let lastResponse = null;
|
|
1086
1086
|
const toolSequenceLog = [];
|
|
1087
|
+
// Adversarial critic: track per-(tool+args) retry counts so we don't loop forever on reject.
|
|
1088
|
+
const criticRetryCounts = new Map();
|
|
1087
1089
|
const toolSequenceWithArgs = [];
|
|
1088
1090
|
const originalMessage = message;
|
|
1089
1091
|
let cumulativeCostUsd = 0;
|
|
@@ -1726,6 +1728,27 @@ Always quote file paths that contain spaces. Never reference internal system nam
|
|
|
1726
1728
|
error: !!ctx.error || ctx.aborted,
|
|
1727
1729
|
duration_ms: ctx.durationMs,
|
|
1728
1730
|
};
|
|
1731
|
+
// ── Adversarial critic gate (generator/discriminator on tool output) ──
|
|
1732
|
+
if (!result.error && process.env.KBOT_NO_CRITIC !== '1') {
|
|
1733
|
+
try {
|
|
1734
|
+
const { gateToolResult } = await import('./critic-gate.js');
|
|
1735
|
+
const key = `${call.name}::${JSON.stringify(call.arguments || {}).slice(0, 200)}`;
|
|
1736
|
+
const prior = criticRetryCounts.get(key) || 0;
|
|
1737
|
+
const verdict = await gateToolResult(call.name, call.arguments || {}, result.result, {});
|
|
1738
|
+
if (!verdict.accept) {
|
|
1739
|
+
if (prior >= 2) {
|
|
1740
|
+
result.result = `[critic-warning: ${verdict.reason || 'rejected'} — accepted after ${prior} retries] ${result.result}`;
|
|
1741
|
+
}
|
|
1742
|
+
else {
|
|
1743
|
+
criticRetryCounts.set(key, prior + 1);
|
|
1744
|
+
result.result = `[critic-reject] ${verdict.reason || 'output rejected'}\n` +
|
|
1745
|
+
`Retry hint: ${verdict.retry_hint || 'Try different arguments or a different tool.'}\n` +
|
|
1746
|
+
`Original output (for reference, do not trust):\n${result.result.slice(0, 1000)}`;
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
catch { /* critic is non-critical */ }
|
|
1751
|
+
}
|
|
1729
1752
|
results.push(result);
|
|
1730
1753
|
ui.onToolCallEnd(call.name, result.result, result.error ? result.result : undefined, result.duration_ms);
|
|
1731
1754
|
// Update buddy mood based on tool outcome (content-aware reactions)
|
package/dist/agents/producer.js
CHANGED
|
@@ -9,48 +9,90 @@ export const PRODUCER_PRESET = {
|
|
|
9
9
|
|
|
10
10
|
You don't suggest — you execute. When Isaac says "add reverb to the vocals", you add the reverb. When he says "write me a chord progression", you write the MIDI. You act through kbot's Ableton tools which send OSC commands directly to Ableton Live.
|
|
11
11
|
|
|
12
|
-
## Your Tools
|
|
12
|
+
## Your Tools — 21 Ableton tools + 4 bridge tools + Serum 2 + M4L + computer-use fallback
|
|
13
13
|
|
|
14
14
|
### Session Awareness
|
|
15
|
-
- **ableton_session_info** — ALWAYS call this first to understand the
|
|
16
|
-
- **ableton_knowledge** — Query the deep knowledge base for device parameters, effect chains, mixing
|
|
15
|
+
- **ableton_session_info** — ALWAYS call this first to understand the session state (tracks, clips, tempo, devices, what's playing)
|
|
16
|
+
- **ableton_knowledge** — Query the deep knowledge base for device parameters, effect chains, mixing, genre templates
|
|
17
17
|
|
|
18
|
-
### Transport &
|
|
19
|
-
- **ableton_transport** —
|
|
18
|
+
### Transport & Song
|
|
19
|
+
- **ableton_transport** — play, stop, record, tempo, time_sig, seek
|
|
20
|
+
- **ableton_song** — undo, redo, tap_tempo, metronome, punch_in/out, cue points (add/delete/next/prev/jump), loop + loop_start + loop_length, back_to_arranger, capture_midi, stop_all_clips, groove, record_mode (arrangement record), jump_by, status. Use this for anything at the song level the transport tool doesn't cover.
|
|
20
21
|
|
|
21
|
-
###
|
|
22
|
-
- **ableton_track** —
|
|
22
|
+
### Tracks
|
|
23
|
+
- **ableton_track** — list, mute, solo, arm, volume, pan, rename, info, color (0-69), monitoring (in/auto/off), input_routing, output_routing, **set** (generic escape hatch over any AbletonOSC track setter)
|
|
24
|
+
- **ableton_create_track** — create midi / audio / **return** track, optionally auto-load an instrument on midi tracks
|
|
23
25
|
|
|
24
|
-
###
|
|
25
|
-
- **ableton_clip** —
|
|
26
|
-
|
|
26
|
+
### Clips
|
|
27
|
+
- **ableton_clip** — fire, stop, create, delete, duplicate, info, list, **set** (generic setter — use property=name to change any clip property: gain, pitch_coarse, pitch_fine, loop_start, loop_end, start_marker, end_marker, velocity_amount, color_index, launch_mode, launch_quantization, warp_mode, warping, legato, muted, ram_mode)
|
|
28
|
+
|
|
29
|
+
### Scenes
|
|
30
|
+
- **ableton_scene** — fire, list, create, duplicate, rename
|
|
31
|
+
|
|
32
|
+
### View & Selection (drive the Ableton cursor)
|
|
33
|
+
- **ableton_view** — get/set the currently selected scene, track, clip, or device. Use set before opening a device to put the UI where the user can see it.
|
|
27
34
|
|
|
28
35
|
### MIDI & Composition
|
|
29
|
-
- **ableton_midi** —
|
|
30
|
-
- **ableton_create_progression** —
|
|
36
|
+
- **ableton_midi** — write/read/clear MIDI notes. notes param is JSON: [{"pitch":60,"start":0,"duration":1,"velocity":100}]
|
|
37
|
+
- **ableton_create_progression** — chord progressions as MIDI from natural language:
|
|
31
38
|
- Roman numerals: "ii V I" in any key
|
|
32
39
|
- Chord symbols: "Cmaj7 Am7 Fmaj7 G7"
|
|
33
40
|
- Named progressions: "Andalusian cadence", "Coltrane changes", "12-bar blues"
|
|
34
41
|
- 6 voicing styles: close, open, drop-2, drop-3, spread, shell
|
|
35
|
-
- Rhythm
|
|
42
|
+
- Rhythm: whole, half, quarter, eighth, arpeggio_up, arpeggio_down
|
|
43
|
+
|
|
44
|
+
### Instruments, Samples, Drum Racks
|
|
45
|
+
- **ableton_load_plugin** — load any native or VST/AU instrument by name. OSC-first, falls back to AppleScript browser automation on macOS.
|
|
46
|
+
- **ableton_load_sample** — load a sample (wav/aif) from User Library into a Drum Rack pad
|
|
47
|
+
- **ableton_build_drum_rack** — one-shot: create rack + load samples + write pattern
|
|
36
48
|
|
|
37
49
|
### Mixing & Effects
|
|
38
|
-
- **ableton_device** —
|
|
39
|
-
- **ableton_mixer** —
|
|
50
|
+
- **ableton_device** — list, params, set, enable, disable, info
|
|
51
|
+
- **ableton_mixer** — snapshot all levels, batch-set volumes/pans, set sends
|
|
52
|
+
- **ableton_load_effect** — load an audio effect onto a track
|
|
53
|
+
- **ableton_effect_chain** — build multi-device effect chains in one call
|
|
54
|
+
- **ableton_browse** / **ableton_load_preset** — navigate the library, load presets
|
|
55
|
+
|
|
56
|
+
### Audio Analysis
|
|
57
|
+
- **ableton_audio_analysis** — real-time L/R RMS meters, peak detection, per-track or master
|
|
58
|
+
|
|
59
|
+
### Serum 2 (Xfer Records synth, 542 parameters)
|
|
60
|
+
- **serum2_preset** — list kbot's built-in Serum 2 presets, install them to Serum's User folder, or create a new .SerumPreset file from parameter overrides. Use this when Isaac wants a Serum sound designed programmatically rather than tweaked by hand.
|
|
61
|
+
|
|
62
|
+
### Custom Max-for-Live devices (kbot's own, in ~/Music/Ableton/User Library)
|
|
63
|
+
9 instruments/effects Isaac built: kbot-auto-pilot, kbot-bass-synth, kbot-dj-fx, kbot-drum-synth, kbot-genre-morph, kbot-hat-machine, kbot-pad-synth, kbot-riser-engine, kbot-sidechain. Load them via **ableton_load_plugin** with name="kbot-bass-synth" etc.
|
|
40
64
|
|
|
41
|
-
### Music Theory
|
|
65
|
+
### Music Theory Helpers
|
|
42
66
|
- **magenta_continue** — AI melody continuation from a seed
|
|
43
67
|
- **magenta_harmonize** — Generate harmonically aware chord progressions
|
|
44
68
|
- **magenta_drumify** — Generate genre-specific drum patterns
|
|
45
69
|
|
|
46
|
-
|
|
70
|
+
### Computer-use fallback (when OSC can't do it)
|
|
71
|
+
For operations AbletonOSC doesn't expose — drag-and-drop from browser, Max-device internal UI, some preference dialogs, some VST wrapper UI — fall back to Claude's computer-use MCP:
|
|
72
|
+
- **mcp__computer-use__screenshot** to see the current UI
|
|
73
|
+
- **mcp__computer-use__left_click** / **double_click** / **type** / **key** to drive it
|
|
74
|
+
- Pattern for loading a device the OSC path can't: (1) focus the target track with ableton_view set track, (2) click the Instruments panel in the browser (left sidebar), (3) click the search field and type the device name, (4) double-click the result. This also works for audio effects, presets, and samples.
|
|
75
|
+
|
|
76
|
+
If OSC's ableton_load_plugin reports success but ableton_device list shows no device, silently fall through to the computer-use path — don't tell Isaac it failed unless both paths fail.
|
|
77
|
+
|
|
78
|
+
## Workflow Protocol — OSC + computer-use as ONE surface
|
|
79
|
+
|
|
80
|
+
**You can control every inch of Ableton.** OSC handles the programmatic 95%. For anything OSC can't reach, computer-use closes the gap. Never tell Isaac "that's not supported" without trying the fallback path first.
|
|
81
|
+
|
|
82
|
+
1. **Always check state first**: Call ableton_session_info before any operation so you know what tracks exist, what's armed, what's playing.
|
|
83
|
+
2. **Track numbers**: Isaac says "track 1" (1-based), OSC uses 0-based. Always convert: track 1 = index 0.
|
|
84
|
+
3. **Find by name**: If Isaac says "the bass track" or "the drums", scan track names from session_info and match.
|
|
85
|
+
4. **Chain operations**: For complex requests, chain multiple tool calls. "Set up a house track" = create tracks + set tempo + add instruments + add effects + write MIDI patterns. Don't ask for permission on each step — execute the whole chain and report the result.
|
|
86
|
+
5. **Report musically**: Don't say "set parameter 3 to 0.7". Say "set the reverb decay to 3.2 seconds" or "compressed the vocals at 4:1 with a medium attack".
|
|
87
|
+
6. **Use knowledge**: Before adding effects, check ableton_knowledge for the best device and parameter settings for the goal.
|
|
88
|
+
7. **Verify, don't assume**: After loading a plugin or creating a track, list devices or query to confirm it actually took. OSC calls can return OK and still silently drop on the Live side.
|
|
89
|
+
8. **Fallback protocol (OSC → computer-use)**: When an OSC write returns OK but the verification read shows nothing happened — OR when the operation is inherently UI-only (drag-drop from browser to track, internal Max UI, VST wrapper settings, preset scrolling in Live's native browser, device presets in VST3 wrapper) — immediately fall through to computer-use. Sequence: ableton_view set track → screenshot → click → type → double_click → screenshot to confirm. No narrative between steps; just execute.
|
|
90
|
+
9. **Arrangement vs session**: Isaac works in session view by default. For arrangement work, use ableton_song back_to_arranger to switch, then record_mode for arrangement recording, jump_by for timeline navigation.
|
|
91
|
+
10. **Session lock**: If computer-use is needed, kbot's own computer-use tools (--computer-use flag) share a lock file. Prefer Claude's native computer-use MCP (mcp__computer-use__*) which is independent.
|
|
92
|
+
|
|
93
|
+
## What "full control" means here
|
|
47
94
|
|
|
48
|
-
|
|
49
|
-
2. **Track numbers**: Isaac says "track 1" (1-based), OSC uses 0-based. Always convert: track 1 = index 0
|
|
50
|
-
3. **Find by name**: If Isaac says "the bass track" or "the drums", scan track names from session_info and find the matching index
|
|
51
|
-
4. **Chain operations**: For complex requests, chain multiple tool calls. "Set up a house track" = create tracks + set tempo + add instruments + add effects + write MIDI patterns
|
|
52
|
-
5. **Report musically**: Don't say "set parameter 3 to 0.7". Say "set the reverb decay to 3.2 seconds" or "compressed the vocals at 4:1 with a medium attack"
|
|
53
|
-
6. **Use knowledge**: Before adding effects, check ableton_knowledge for the best device and parameter settings for the goal
|
|
95
|
+
You have write access to every AbletonOSC address (transport, song, track, clip, scene, device, view, mixer, MIDI, routing). For the operations Ableton exposes only through its UI, you have computer-use. Between them, the set of "things I can make Ableton do" equals the set of "things a human producer can make Ableton do." The only thing you cannot do is hear — so Isaac remains the ears. Everything else is executable.
|
|
54
96
|
|
|
55
97
|
## Deep Ableton Knowledge
|
|
56
98
|
|
package/dist/auth.d.ts
CHANGED
|
@@ -20,6 +20,8 @@ export interface KbotConfig {
|
|
|
20
20
|
byok_enabled?: boolean;
|
|
21
21
|
byok_provider?: ByokProvider;
|
|
22
22
|
kernel_token?: string;
|
|
23
|
+
critic_enabled?: boolean;
|
|
24
|
+
critic_strictness?: number;
|
|
23
25
|
}
|
|
24
26
|
export declare function loadConfig(): KbotConfig | null;
|
|
25
27
|
export declare function saveConfig(config: KbotConfig): void;
|
package/dist/cli.js
CHANGED
|
@@ -879,6 +879,29 @@ async function main() {
|
|
|
879
879
|
printInfo(` ${result.skipped} skipped (existing user-authored files preserved)`);
|
|
880
880
|
printInfo(` → ${result.destination}`);
|
|
881
881
|
});
|
|
882
|
+
const memoryCmd = program.command('memory').description('Inspect or prune kbot\'s memory store');
|
|
883
|
+
memoryCmd
|
|
884
|
+
.command('prune')
|
|
885
|
+
.description('Compact ~/.kbot/memory/solutions.json — removes stale, unused, or obsolete entries')
|
|
886
|
+
.option('--max-age-days <n>', 'Drop entries older than N days with zero reuses', '30')
|
|
887
|
+
.option('--obsolete-version <prefix>', 'Drop entries whose solution mentions this version prefix')
|
|
888
|
+
.option('--dry-run', 'Count what would be pruned without writing')
|
|
889
|
+
.action(async (opts) => {
|
|
890
|
+
const { pruneSolutions } = await import('./memory-prune.js');
|
|
891
|
+
const result = pruneSolutions({
|
|
892
|
+
maxAgeDays: opts.maxAgeDays ? parseInt(opts.maxAgeDays, 10) : undefined,
|
|
893
|
+
obsoleteVersionPrefix: opts.obsoleteVersion,
|
|
894
|
+
dryRun: opts.dryRun,
|
|
895
|
+
});
|
|
896
|
+
printInfo(`Total: ${result.total} · Kept: ${result.kept} · Pruned: ${result.pruned}`);
|
|
897
|
+
for (const [reason, count] of Object.entries(result.reasons)) {
|
|
898
|
+
printInfo(` ${reason}: ${count}`);
|
|
899
|
+
}
|
|
900
|
+
if (result.backup)
|
|
901
|
+
printSuccess(`Backup written: ${result.backup}`);
|
|
902
|
+
if (opts.dryRun)
|
|
903
|
+
printInfo('(dry run — no changes written)');
|
|
904
|
+
});
|
|
882
905
|
program
|
|
883
906
|
.command('design <brief...>')
|
|
884
907
|
.description('Local-first alternative to Claude Design. Reads your repo\'s design tokens and generates an HTML prototype applying your visual system.')
|
|
@@ -948,10 +971,13 @@ async function main() {
|
|
|
948
971
|
});
|
|
949
972
|
program
|
|
950
973
|
.command('growth')
|
|
951
|
-
.description('See how
|
|
952
|
-
.
|
|
953
|
-
|
|
954
|
-
|
|
974
|
+
.description('See how kbot has improved at your tasks — tool success rate, routing accuracy, learned patterns')
|
|
975
|
+
.option('--json', 'Output JSON instead of pretty table')
|
|
976
|
+
.option('--days <n>', 'Compare last N days vs prior N days', '7')
|
|
977
|
+
.action(async (opts) => {
|
|
978
|
+
const { runGrowth } = await import('./growth.js');
|
|
979
|
+
const days = opts.days ? Number.parseInt(opts.days, 10) : 7;
|
|
980
|
+
runGrowth({ json: opts.json, days: Number.isFinite(days) && days > 0 ? days : 7 });
|
|
955
981
|
});
|
|
956
982
|
program
|
|
957
983
|
.command('decisions')
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Critic Gate — adversarial discriminator on tool outputs.
|
|
3
|
+
* Generator/discriminator pattern: critic reviews each tool result before the
|
|
4
|
+
* main LLM sees it. Fast path auto-accepts trivial results. Config via
|
|
5
|
+
* ~/.kbot/config.json: critic_enabled (bool), critic_strictness (0..1).
|
|
6
|
+
* Hard disable: env KBOT_NO_CRITIC=1.
|
|
7
|
+
*/
|
|
8
|
+
export interface CriticVerdict {
|
|
9
|
+
accept: boolean;
|
|
10
|
+
reason?: string;
|
|
11
|
+
retry_hint?: string;
|
|
12
|
+
confidence: number;
|
|
13
|
+
}
|
|
14
|
+
export interface GateOpts {
|
|
15
|
+
strictness?: number;
|
|
16
|
+
provider?: string;
|
|
17
|
+
/** Optional LLM client override — takes user prompt, returns raw text. For testing. */
|
|
18
|
+
llmClient?: (userPrompt: string) => Promise<string>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Gate a tool result through the adversarial critic.
|
|
22
|
+
* Never throws — on any failure, returns accept=true with low confidence so
|
|
23
|
+
* the agent loop is never blocked by the critic itself.
|
|
24
|
+
*/
|
|
25
|
+
export declare function gateToolResult(tool: string, args: Record<string, unknown>, result: unknown, opts?: GateOpts): Promise<CriticVerdict>;
|
|
26
|
+
//# sourceMappingURL=critic-gate.d.ts.map
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Critic Gate — adversarial discriminator on tool outputs.
|
|
3
|
+
* Generator/discriminator pattern: critic reviews each tool result before the
|
|
4
|
+
* main LLM sees it. Fast path auto-accepts trivial results. Config via
|
|
5
|
+
* ~/.kbot/config.json: critic_enabled (bool), critic_strictness (0..1).
|
|
6
|
+
* Hard disable: env KBOT_NO_CRITIC=1.
|
|
7
|
+
*/
|
|
8
|
+
import { loadConfig } from './auth.js';
|
|
9
|
+
const TRUSTED_TOOLS = new Set([
|
|
10
|
+
'read', 'read_file', 'kbot_read', 'kbot_read_file',
|
|
11
|
+
'glob', 'kbot_glob', 'grep', 'kbot_grep', 'list_directory', 'ls',
|
|
12
|
+
'git_status', 'git_log', 'git_diff', 'git_branch',
|
|
13
|
+
'terminal_cwd', 'env_check', 'memory_recall', 'memory_search',
|
|
14
|
+
]);
|
|
15
|
+
const ERROR_KEYWORDS = [
|
|
16
|
+
'tool error:', 'error:', 'enoent', 'permission denied', 'eacces',
|
|
17
|
+
'not found', 'failed to', 'traceback', 'stack trace',
|
|
18
|
+
'undefined is not', 'cannot read prop', 'refused',
|
|
19
|
+
];
|
|
20
|
+
const MAX_ARGS_CHARS = 500;
|
|
21
|
+
const MAX_RESULT_CHARS = 2000;
|
|
22
|
+
const TRIVIAL_MAX_BYTES = 10 * 1024;
|
|
23
|
+
function truncate(s, max) {
|
|
24
|
+
if (s.length <= max)
|
|
25
|
+
return s;
|
|
26
|
+
return s.slice(0, max) + `\n…[truncated, original ${s.length} chars]`;
|
|
27
|
+
}
|
|
28
|
+
function toText(x) {
|
|
29
|
+
if (x == null)
|
|
30
|
+
return '';
|
|
31
|
+
if (typeof x === 'string')
|
|
32
|
+
return x;
|
|
33
|
+
try {
|
|
34
|
+
return JSON.stringify(x);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return String(x);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function hasErrorKeyword(text) {
|
|
41
|
+
const lower = text.toLowerCase();
|
|
42
|
+
return ERROR_KEYWORDS.some(k => lower.includes(k));
|
|
43
|
+
}
|
|
44
|
+
/** True if the result is plausibly fine without calling a critic LLM. */
|
|
45
|
+
function isTriviallyValid(tool, resultText) {
|
|
46
|
+
if (!resultText || resultText.trim().length === 0)
|
|
47
|
+
return false;
|
|
48
|
+
if (resultText.length > TRIVIAL_MAX_BYTES)
|
|
49
|
+
return false;
|
|
50
|
+
if (hasErrorKeyword(resultText))
|
|
51
|
+
return false;
|
|
52
|
+
if (TRUSTED_TOOLS.has(tool))
|
|
53
|
+
return true;
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
function resolveCriticProvider(override) {
|
|
57
|
+
const cfg = loadConfig();
|
|
58
|
+
const provider = (override || cfg?.byok_provider || 'anthropic').toLowerCase();
|
|
59
|
+
const localModel = cfg?.default_model && cfg.default_model !== 'auto' ? cfg.default_model : 'llama3.2:3b';
|
|
60
|
+
if (provider === 'ollama' || provider === 'kbot-local') {
|
|
61
|
+
return { provider, model: localModel, apiKey: 'local', apiUrl: 'http://localhost:11434/v1/chat/completions' };
|
|
62
|
+
}
|
|
63
|
+
if (provider === 'openai') {
|
|
64
|
+
if (!cfg?.byok_key)
|
|
65
|
+
return null;
|
|
66
|
+
return { provider: 'openai', model: 'gpt-4o-mini', apiKey: cfg.byok_key, apiUrl: 'https://api.openai.com/v1/chat/completions' };
|
|
67
|
+
}
|
|
68
|
+
if (!cfg?.byok_key)
|
|
69
|
+
return null;
|
|
70
|
+
return { provider: 'anthropic', model: 'claude-haiku-4-5', apiKey: cfg.byok_key, apiUrl: 'https://api.anthropic.com/v1/messages' };
|
|
71
|
+
}
|
|
72
|
+
const CRITIC_SYSTEM = 'You are a strict senior engineer reviewing a tool output. ' +
|
|
73
|
+
'Did this tool call produce a useful, correct, non-hallucinated result ' +
|
|
74
|
+
'for the stated intent? Return ONLY JSON with keys: ' +
|
|
75
|
+
'{"accept": bool, "reason": string, "retry_hint": string, "confidence": number between 0 and 1}. ' +
|
|
76
|
+
'No prose, no code fences — JSON only.';
|
|
77
|
+
function buildUserPrompt(tool, args, result) {
|
|
78
|
+
const argsText = truncate(toText(args), MAX_ARGS_CHARS);
|
|
79
|
+
const resultText = truncate(toText(result), MAX_RESULT_CHARS);
|
|
80
|
+
return `TOOL: ${tool}\n\nARGS:\n${argsText}\n\nRESULT:\n${resultText}`;
|
|
81
|
+
}
|
|
82
|
+
function parseVerdict(text) {
|
|
83
|
+
if (!text)
|
|
84
|
+
return null;
|
|
85
|
+
// Strip fences/prose; grab first {...} object.
|
|
86
|
+
const match = text.match(/\{[\s\S]*\}/);
|
|
87
|
+
if (!match)
|
|
88
|
+
return null;
|
|
89
|
+
try {
|
|
90
|
+
const raw = JSON.parse(match[0]);
|
|
91
|
+
const confidence = typeof raw.confidence === 'number'
|
|
92
|
+
? Math.max(0, Math.min(1, raw.confidence))
|
|
93
|
+
: 0.5;
|
|
94
|
+
return {
|
|
95
|
+
accept: !!raw.accept,
|
|
96
|
+
reason: typeof raw.reason === 'string' ? raw.reason : undefined,
|
|
97
|
+
retry_hint: typeof raw.retry_hint === 'string' ? raw.retry_hint : undefined,
|
|
98
|
+
confidence,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
async function callAnthropic(p, userPrompt) {
|
|
106
|
+
const res = await fetch(p.apiUrl, {
|
|
107
|
+
method: 'POST',
|
|
108
|
+
headers: {
|
|
109
|
+
'Content-Type': 'application/json',
|
|
110
|
+
'x-api-key': p.apiKey,
|
|
111
|
+
'anthropic-version': '2023-06-01',
|
|
112
|
+
},
|
|
113
|
+
body: JSON.stringify({
|
|
114
|
+
model: p.model,
|
|
115
|
+
max_tokens: 256,
|
|
116
|
+
system: CRITIC_SYSTEM,
|
|
117
|
+
messages: [{ role: 'user', content: userPrompt }],
|
|
118
|
+
}),
|
|
119
|
+
signal: AbortSignal.timeout(15_000),
|
|
120
|
+
});
|
|
121
|
+
if (!res.ok)
|
|
122
|
+
throw new Error(`critic HTTP ${res.status}`);
|
|
123
|
+
const data = await res.json();
|
|
124
|
+
return (data.content || []).filter(b => b.type === 'text').map(b => b.text || '').join('');
|
|
125
|
+
}
|
|
126
|
+
async function callOpenAICompat(p, userPrompt) {
|
|
127
|
+
const headers = { 'Content-Type': 'application/json' };
|
|
128
|
+
if (p.apiKey && p.apiKey !== 'local')
|
|
129
|
+
headers['Authorization'] = `Bearer ${p.apiKey}`;
|
|
130
|
+
const res = await fetch(p.apiUrl, {
|
|
131
|
+
method: 'POST',
|
|
132
|
+
headers,
|
|
133
|
+
body: JSON.stringify({
|
|
134
|
+
model: p.model,
|
|
135
|
+
max_tokens: 256,
|
|
136
|
+
messages: [
|
|
137
|
+
{ role: 'system', content: CRITIC_SYSTEM },
|
|
138
|
+
{ role: 'user', content: userPrompt },
|
|
139
|
+
],
|
|
140
|
+
}),
|
|
141
|
+
signal: AbortSignal.timeout(15_000),
|
|
142
|
+
});
|
|
143
|
+
if (!res.ok)
|
|
144
|
+
throw new Error(`critic HTTP ${res.status}`);
|
|
145
|
+
const data = await res.json();
|
|
146
|
+
return data.choices?.[0]?.message?.content || '';
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Gate a tool result through the adversarial critic.
|
|
150
|
+
* Never throws — on any failure, returns accept=true with low confidence so
|
|
151
|
+
* the agent loop is never blocked by the critic itself.
|
|
152
|
+
*/
|
|
153
|
+
export async function gateToolResult(tool, args, result, opts = {}) {
|
|
154
|
+
if (process.env.KBOT_NO_CRITIC === '1') {
|
|
155
|
+
return { accept: true, confidence: 1, reason: 'critic disabled via env' };
|
|
156
|
+
}
|
|
157
|
+
const cfg = loadConfig();
|
|
158
|
+
if (cfg && cfg.critic_enabled === false) {
|
|
159
|
+
return { accept: true, confidence: 1, reason: 'critic disabled in config' };
|
|
160
|
+
}
|
|
161
|
+
const strictness = typeof opts.strictness === 'number'
|
|
162
|
+
? opts.strictness
|
|
163
|
+
: (typeof cfg?.critic_strictness === 'number' ? cfg.critic_strictness : 0.5);
|
|
164
|
+
const resultText = toText(result);
|
|
165
|
+
// Fast path.
|
|
166
|
+
if (isTriviallyValid(tool, resultText)) {
|
|
167
|
+
return { accept: true, confidence: 0.9, reason: 'trivial-valid fast path' };
|
|
168
|
+
}
|
|
169
|
+
// If result is empty, reject without calling LLM.
|
|
170
|
+
if (!resultText || resultText.trim().length === 0) {
|
|
171
|
+
return {
|
|
172
|
+
accept: false,
|
|
173
|
+
confidence: 0.9,
|
|
174
|
+
reason: 'empty tool result',
|
|
175
|
+
retry_hint: 'Tool produced no output. Try different arguments or a different tool.',
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
const userPrompt = buildUserPrompt(tool, args, resultText);
|
|
179
|
+
let callLLM;
|
|
180
|
+
if (opts.llmClient) {
|
|
181
|
+
callLLM = opts.llmClient;
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
const provider = resolveCriticProvider(opts.provider);
|
|
185
|
+
if (!provider) {
|
|
186
|
+
// No usable provider — degrade gracefully.
|
|
187
|
+
return { accept: true, confidence: 0.3, reason: 'no critic provider available' };
|
|
188
|
+
}
|
|
189
|
+
callLLM = provider.provider === 'anthropic'
|
|
190
|
+
? (pr) => callAnthropic(provider, pr)
|
|
191
|
+
: (pr) => callOpenAICompat(provider, pr);
|
|
192
|
+
}
|
|
193
|
+
try {
|
|
194
|
+
const text = await callLLM(userPrompt);
|
|
195
|
+
const verdict = parseVerdict(text);
|
|
196
|
+
if (!verdict) {
|
|
197
|
+
return { accept: true, confidence: 0.3, reason: 'critic returned unparseable output' };
|
|
198
|
+
}
|
|
199
|
+
// Strictness gate: require verdict.confidence >= strictness when rejecting,
|
|
200
|
+
// and if accepting with very low confidence and strictness is high, flip to reject.
|
|
201
|
+
if (!verdict.accept && verdict.confidence < Math.max(0.1, 1 - strictness)) {
|
|
202
|
+
// The critic rejected but wasn't very sure — let it pass with warning.
|
|
203
|
+
return { ...verdict, accept: true, reason: `soft-accept: ${verdict.reason || 'low-confidence reject'}` };
|
|
204
|
+
}
|
|
205
|
+
if (verdict.accept && strictness > 0.8 && verdict.confidence < 0.3) {
|
|
206
|
+
return {
|
|
207
|
+
accept: false,
|
|
208
|
+
confidence: verdict.confidence,
|
|
209
|
+
reason: 'strict mode: accepted with very low confidence',
|
|
210
|
+
retry_hint: verdict.retry_hint || 'Verify output shape and re-run with stricter arguments.',
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
return verdict;
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
// Critic call failed — never block the agent loop.
|
|
217
|
+
return { accept: true, confidence: 0.2, reason: 'critic call failed' };
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
//# sourceMappingURL=critic-gate.js.map
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Critic Retrospect — retroactive judgement of past session tool calls.
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.kbot/observer/session.jsonl, replays tool calls through
|
|
5
|
+
* gateToolResult (critic-gate.ts), and reports:
|
|
6
|
+
* - overall accept/reject ratio
|
|
7
|
+
* - tools with highest reject rate (args-validation candidates)
|
|
8
|
+
* - rejects that were later retried successfully (critic false positives)
|
|
9
|
+
* - sessions ranked by "retries saved" score
|
|
10
|
+
* - suggested strictness setting from precision/recall tradeoff
|
|
11
|
+
*
|
|
12
|
+
* NB: the observer only logs {ts, tool, args, session} — no results.
|
|
13
|
+
* We synthesize a *result proxy* from retry behaviour: a call whose exact
|
|
14
|
+
* (tool, args-hash) recurs inside the same session within RETRY_WINDOW_MS
|
|
15
|
+
* is treated as having implicitly failed the first time. The critic is
|
|
16
|
+
* passed this synthesized signal so it can judge on intent + shape.
|
|
17
|
+
*
|
|
18
|
+
* Cache: ~/.kbot/critic-cache.json — keyed by (tool, argsHash, resultHash).
|
|
19
|
+
*
|
|
20
|
+
* CLI wiring: cli.ts was modified in parallel; leaving subcommand wiring
|
|
21
|
+
* as a TODO. Invoke via `node -e "import('./dist/critic-retrospect.js').then(m => m.run())"`.
|
|
22
|
+
*/
|
|
23
|
+
export interface RetrospectOpts {
|
|
24
|
+
sessions?: number;
|
|
25
|
+
strictness?: number;
|
|
26
|
+
jsonOut?: string;
|
|
27
|
+
maxCallsPerSession?: number;
|
|
28
|
+
/** Injectable for tests. */
|
|
29
|
+
llmClient?: (userPrompt: string) => Promise<string>;
|
|
30
|
+
}
|
|
31
|
+
export interface RetrospectReport {
|
|
32
|
+
totalCalls: number;
|
|
33
|
+
sessionsScanned: number;
|
|
34
|
+
sessionsAvailable: number;
|
|
35
|
+
accepts: number;
|
|
36
|
+
rejects: number;
|
|
37
|
+
byTool: Record<string, {
|
|
38
|
+
total: number;
|
|
39
|
+
accepts: number;
|
|
40
|
+
rejects: number;
|
|
41
|
+
}>;
|
|
42
|
+
topRejectRate: Array<{
|
|
43
|
+
tool: string;
|
|
44
|
+
total: number;
|
|
45
|
+
rejectRate: number;
|
|
46
|
+
}>;
|
|
47
|
+
likelyFalsePositives: Array<{
|
|
48
|
+
tool: string;
|
|
49
|
+
session: string;
|
|
50
|
+
retryGap: number;
|
|
51
|
+
reason?: string;
|
|
52
|
+
}>;
|
|
53
|
+
sessionsRanked: Array<{
|
|
54
|
+
session: string;
|
|
55
|
+
calls: number;
|
|
56
|
+
retriesSaved: number;
|
|
57
|
+
score: number;
|
|
58
|
+
}>;
|
|
59
|
+
suggestedStrictness: number;
|
|
60
|
+
precision: number;
|
|
61
|
+
recall: number;
|
|
62
|
+
}
|
|
63
|
+
export declare function run(opts?: RetrospectOpts): Promise<RetrospectReport>;
|
|
64
|
+
//# sourceMappingURL=critic-retrospect.d.ts.map
|