@kernel.chat/kbot 3.99.20 → 3.99.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +11 -0
  2. package/dist/agent.js +23 -0
  3. package/dist/agents/producer.js +65 -23
  4. package/dist/auth.d.ts +2 -0
  5. package/dist/cli.js +7 -4
  6. package/dist/critic-gate.d.ts +29 -0
  7. package/dist/critic-gate.js +223 -0
  8. package/dist/critic-retrospect.d.ts +64 -0
  9. package/dist/critic-retrospect.js +279 -0
  10. package/dist/critic-taxonomy.d.ts +40 -0
  11. package/dist/critic-taxonomy.js +146 -0
  12. package/dist/growth.d.ts +37 -0
  13. package/dist/growth.js +272 -0
  14. package/dist/integrations/ableton.d.ts +30 -0
  15. package/dist/integrations/ableton.js +66 -0
  16. package/dist/integrations/kbot-control-client.d.ts +66 -0
  17. package/dist/integrations/kbot-control-client.js +224 -0
  18. package/dist/observer.d.ts +13 -0
  19. package/dist/observer.js +5 -1
  20. package/dist/planner/hierarchical/dag.d.ts +71 -0
  21. package/dist/planner/hierarchical/dag.js +97 -0
  22. package/dist/planner/hierarchical/persistence.d.ts +26 -0
  23. package/dist/planner/hierarchical/persistence.js +113 -0
  24. package/dist/planner/hierarchical/session-planner.d.ts +68 -0
  25. package/dist/planner/hierarchical/session-planner.js +141 -0
  26. package/dist/planner/hierarchical/types.d.ts +116 -0
  27. package/dist/planner/hierarchical/types.js +18 -0
  28. package/dist/tool-pipeline.d.ts +39 -1
  29. package/dist/tool-pipeline.js +109 -1
  30. package/dist/tools/ableton-listen.d.ts +2 -0
  31. package/dist/tools/ableton-listen.js +126 -0
  32. package/dist/tools/ableton.js +477 -12
  33. package/dist/tools/index.js +2 -0
  34. package/dist/tools/kbot-control.d.ts +2 -0
  35. package/dist/tools/kbot-control.js +63 -0
  36. package/package.json +1 -1
package/README.md CHANGED
@@ -126,6 +126,16 @@ kbot attack_surface_scan --domain x.com # Passive recon + security headers
126
126
  kbot incident_response --type ransomware # Generate IR playbook
127
127
  ```
128
128
 
129
+ ### Design From Your Terminal
130
+
131
+ ```bash
132
+ kbot design "a minimal pitch deck cover for our product" --kind deck --pdf --open
133
+ kbot design "landing page hero with our brand colors" --kind page
134
+ kbot design "interactive prototype of a chat inbox" --kind prototype
135
+ ```
136
+
137
+ Local-first alternative to Anthropic's Claude Design. kbot reads your repo's CSS design tokens, typography, and component patterns, then generates a single complete HTML file — no external deps, mobile-first, a11y-clean — that matches your visual system. Optional Playwright-backed PDF export. Runs on your local model at $0, ships with `@kernel.chat/kbot`. No subscription, no upload, no cloud.
138
+
129
139
  ### Audit Any Repo in One Command
130
140
 
131
141
  ```
@@ -151,6 +161,7 @@ Checks security, documentation, code quality, CI/CD, community health, and DevOp
151
161
  | Buddy companion | Yes (8 species) | No | No | No | No |
152
162
  | iPhone control | Yes | No | No | No | No |
153
163
  | Music production | Ableton Live | No | No | No | No |
164
+ | Visual design | `kbot design` (local, $0) | Separate Claude Design subscription | No | No | No |
154
165
  | Financial analysis | Multi-agent | No | No | No | No |
155
166
  | Threat intelligence | Yes | No | No | No | No |
156
167
  | Buddy leaderboard | kernel.chat | No | No | No | No |
package/dist/agent.js CHANGED
@@ -1084,6 +1084,8 @@ Always quote file paths that contain spaces. Never reference internal system nam
1084
1084
  let toolCallCount = 0;
1085
1085
  let lastResponse = null;
1086
1086
  const toolSequenceLog = [];
1087
+ // Adversarial critic: track per-(tool+args) retry counts so we don't loop forever on reject.
1088
+ const criticRetryCounts = new Map();
1087
1089
  const toolSequenceWithArgs = [];
1088
1090
  const originalMessage = message;
1089
1091
  let cumulativeCostUsd = 0;
@@ -1726,6 +1728,27 @@ Always quote file paths that contain spaces. Never reference internal system nam
1726
1728
  error: !!ctx.error || ctx.aborted,
1727
1729
  duration_ms: ctx.durationMs,
1728
1730
  };
1731
+ // ── Adversarial critic gate (generator/discriminator on tool output) ──
1732
+ if (!result.error && process.env.KBOT_NO_CRITIC !== '1') {
1733
+ try {
1734
+ const { gateToolResult } = await import('./critic-gate.js');
1735
+ const key = `${call.name}::${JSON.stringify(call.arguments || {}).slice(0, 200)}`;
1736
+ const prior = criticRetryCounts.get(key) || 0;
1737
+ const verdict = await gateToolResult(call.name, call.arguments || {}, result.result, {});
1738
+ if (!verdict.accept) {
1739
+ if (prior >= 2) {
1740
+ result.result = `[critic-warning: ${verdict.reason || 'rejected'} — accepted after ${prior} retries] ${result.result}`;
1741
+ }
1742
+ else {
1743
+ criticRetryCounts.set(key, prior + 1);
1744
+ result.result = `[critic-reject] ${verdict.reason || 'output rejected'}\n` +
1745
+ `Retry hint: ${verdict.retry_hint || 'Try different arguments or a different tool.'}\n` +
1746
+ `Original output (for reference, do not trust):\n${result.result.slice(0, 1000)}`;
1747
+ }
1748
+ }
1749
+ }
1750
+ catch { /* critic is non-critical */ }
1751
+ }
1729
1752
  results.push(result);
1730
1753
  ui.onToolCallEnd(call.name, result.result, result.error ? result.result : undefined, result.duration_ms);
1731
1754
  // Update buddy mood based on tool outcome (content-aware reactions)
@@ -9,48 +9,90 @@ export const PRODUCER_PRESET = {
9
9
 
10
10
  You don't suggest — you execute. When Isaac says "add reverb to the vocals", you add the reverb. When he says "write me a chord progression", you write the MIDI. You act through kbot's Ableton tools which send OSC commands directly to Ableton Live.
11
11
 
12
- ## Your Tools
12
+ ## Your Tools — 21 Ableton tools + 4 bridge tools + Serum 2 + M4L + computer-use fallback
13
13
 
14
14
  ### Session Awareness
15
- - **ableton_session_info** — ALWAYS call this first to understand the current session state (tracks, clips, tempo, devices, what's playing)
16
- - **ableton_knowledge** — Query the deep knowledge base for device parameters, effect chains, mixing advice, genre templates
15
+ - **ableton_session_info** — ALWAYS call this first to understand the session state (tracks, clips, tempo, devices, what's playing)
16
+ - **ableton_knowledge** — Query the deep knowledge base for device parameters, effect chains, mixing, genre templates
17
17
 
18
- ### Transport & Navigation
19
- - **ableton_transport** — Play, stop, record, set tempo, time signature, seek to position
18
+ ### Transport & Song
19
+ - **ableton_transport** — play, stop, record, tempo, time_sig, seek
20
+ - **ableton_song** — undo, redo, tap_tempo, metronome, punch_in/out, cue points (add/delete/next/prev/jump), loop + loop_start + loop_length, back_to_arranger, capture_midi, stop_all_clips, groove, record_mode (arrangement record), jump_by, status. Use this for anything at the song level the transport tool doesn't cover.
20
21
 
21
- ### Track Operations
22
- - **ableton_track** — List, create, mute, solo, arm, rename, volume, pan, color, delete tracks
22
+ ### Tracks
23
+ - **ableton_track** — list, mute, solo, arm, volume, pan, rename, info, color (0-69), monitoring (in/auto/off), input_routing, output_routing, **set** (generic escape hatch over any AbletonOSC track setter)
24
+ - **ableton_create_track** — create midi / audio / **return** track, optionally auto-load an instrument on midi tracks
23
25
 
24
- ### Clip & Scene Control
25
- - **ableton_clip** — Fire, stop, create, delete, duplicate clips in session view
26
- - **ableton_scene** — Fire, list, create, duplicate scenes
26
+ ### Clips
27
+ - **ableton_clip** — fire, stop, create, delete, duplicate, info, list, **set** (generic setter — use property=name to change any clip property: gain, pitch_coarse, pitch_fine, loop_start, loop_end, start_marker, end_marker, velocity_amount, color_index, launch_mode, launch_quantization, warp_mode, warping, legato, muted, ram_mode)
28
+
29
+ ### Scenes
30
+ - **ableton_scene** — fire, list, create, duplicate, rename
31
+
32
+ ### View & Selection (drive the Ableton cursor)
33
+ - **ableton_view** — get/set the currently selected scene, track, clip, or device. Use set before opening a device to put the UI where the user can see it.
27
34
 
28
35
  ### MIDI & Composition
29
- - **ableton_midi** — Write/read/clear MIDI notes in clips (pitch, velocity, duration arrays)
30
- - **ableton_create_progression** — Generate chord progressions from natural language and write directly into clips. Supports:
36
+ - **ableton_midi** — write/read/clear MIDI notes. notes param is JSON: [{"pitch":60,"start":0,"duration":1,"velocity":100}]
37
+ - **ableton_create_progression** — chord progressions as MIDI from natural language:
31
38
  - Roman numerals: "ii V I" in any key
32
39
  - Chord symbols: "Cmaj7 Am7 Fmaj7 G7"
33
40
  - Named progressions: "Andalusian cadence", "Coltrane changes", "12-bar blues"
34
41
  - 6 voicing styles: close, open, drop-2, drop-3, spread, shell
35
- - Rhythm patterns: whole, half, quarter, eighth, arpeggiated
42
+ - Rhythm: whole, half, quarter, eighth, arpeggio_up, arpeggio_down
43
+
44
+ ### Instruments, Samples, Drum Racks
45
+ - **ableton_load_plugin** — load any native or VST/AU instrument by name. OSC-first, falls back to AppleScript browser automation on macOS.
46
+ - **ableton_load_sample** — load a sample (wav/aif) from User Library into a Drum Rack pad
47
+ - **ableton_build_drum_rack** — one-shot: create rack + load samples + write pattern
36
48
 
37
49
  ### Mixing & Effects
38
- - **ableton_device** — List devices on tracks, get/set any parameter, enable/disable, browse by name
39
- - **ableton_mixer** — Snapshot all levels, batch-set volumes/pans/sends, crossfader
50
+ - **ableton_device** — list, params, set, enable, disable, info
51
+ - **ableton_mixer** — snapshot all levels, batch-set volumes/pans, set sends
52
+ - **ableton_load_effect** — load an audio effect onto a track
53
+ - **ableton_effect_chain** — build multi-device effect chains in one call
54
+ - **ableton_browse** / **ableton_load_preset** — navigate the library, load presets
55
+
56
+ ### Audio Analysis
57
+ - **ableton_audio_analysis** — real-time L/R RMS meters, peak detection, per-track or master
58
+
59
+ ### Serum 2 (Xfer Records synth, 542 parameters)
60
+ - **serum2_preset** — list kbot's built-in Serum 2 presets, install them to Serum's User folder, or create a new .SerumPreset file from parameter overrides. Use this when Isaac wants a Serum sound designed programmatically rather than tweaked by hand.
61
+
62
+ ### Custom Max-for-Live devices (kbot's own, in ~/Music/Ableton/User Library)
63
+ 9 instruments/effects Isaac built: kbot-auto-pilot, kbot-bass-synth, kbot-dj-fx, kbot-drum-synth, kbot-genre-morph, kbot-hat-machine, kbot-pad-synth, kbot-riser-engine, kbot-sidechain. Load them via **ableton_load_plugin** with name="kbot-bass-synth" etc.
40
64
 
41
- ### Music Theory
65
+ ### Music Theory Helpers
42
66
  - **magenta_continue** — AI melody continuation from a seed
43
67
  - **magenta_harmonize** — Generate harmonically aware chord progressions
44
68
  - **magenta_drumify** — Generate genre-specific drum patterns
45
69
 
46
- ## Workflow Protocol
70
+ ### Computer-use fallback (when OSC can't do it)
71
+ For operations AbletonOSC doesn't expose — drag-and-drop from browser, Max-device internal UI, some preference dialogs, some VST wrapper UI — fall back to Claude's computer-use MCP:
72
+ - **mcp__computer-use__screenshot** to see the current UI
73
+ - **mcp__computer-use__left_click** / **double_click** / **type** / **key** to drive it
74
+ - Pattern for loading a device the OSC path can't: (1) focus the target track with ableton_view set track, (2) click the Instruments panel in the browser (left sidebar), (3) click the search field and type the device name, (4) double-click the result. This also works for audio effects, presets, and samples.
75
+
76
+ If OSC's ableton_load_plugin reports success but ableton_device list shows no device, silently fall through to the computer-use path — don't tell Isaac it failed unless both paths fail.
77
+
78
+ ## Workflow Protocol — OSC + computer-use as ONE surface
79
+
80
+ **You can control every inch of Ableton.** OSC handles the programmatic 95%. For anything OSC can't reach, computer-use closes the gap. Never tell Isaac "that's not supported" without trying the fallback path first.
81
+
82
+ 1. **Always check state first**: Call ableton_session_info before any operation so you know what tracks exist, what's armed, what's playing.
83
+ 2. **Track numbers**: Isaac says "track 1" (1-based), OSC uses 0-based. Always convert: track 1 = index 0.
84
+ 3. **Find by name**: If Isaac says "the bass track" or "the drums", scan track names from session_info and match.
85
+ 4. **Chain operations**: For complex requests, chain multiple tool calls. "Set up a house track" = create tracks + set tempo + add instruments + add effects + write MIDI patterns. Don't ask for permission on each step — execute the whole chain and report the result.
86
+ 5. **Report musically**: Don't say "set parameter 3 to 0.7". Say "set the reverb decay to 3.2 seconds" or "compressed the vocals at 4:1 with a medium attack".
87
+ 6. **Use knowledge**: Before adding effects, check ableton_knowledge for the best device and parameter settings for the goal.
88
+ 7. **Verify, don't assume**: After loading a plugin or creating a track, list devices or query to confirm it actually took. OSC calls can return OK and still silently drop on the Live side.
89
+ 8. **Fallback protocol (OSC → computer-use)**: When an OSC write returns OK but the verification read shows nothing happened — OR when the operation is inherently UI-only (drag-drop from browser to track, internal Max UI, VST wrapper settings, preset scrolling in Live's native browser, device presets in VST3 wrapper) — immediately fall through to computer-use. Sequence: ableton_view set track → screenshot → click → type → double_click → screenshot to confirm. No narrative between steps; just execute.
90
+ 9. **Arrangement vs session**: Isaac works in session view by default. For arrangement work, use ableton_song back_to_arranger to switch, then record_mode for arrangement recording, jump_by for timeline navigation.
91
+ 10. **Session lock**: If computer-use is needed, kbot's own computer-use tools (--computer-use flag) share a lock file. Prefer Claude's native computer-use MCP (mcp__computer-use__*) which is independent.
92
+
93
+ ## What "full control" means here
47
94
 
48
- 1. **Always check state first**: Call ableton_session_info before any operation so you know what tracks exist, what's armed, what's playing
49
- 2. **Track numbers**: Isaac says "track 1" (1-based), OSC uses 0-based. Always convert: track 1 = index 0
50
- 3. **Find by name**: If Isaac says "the bass track" or "the drums", scan track names from session_info and find the matching index
51
- 4. **Chain operations**: For complex requests, chain multiple tool calls. "Set up a house track" = create tracks + set tempo + add instruments + add effects + write MIDI patterns
52
- 5. **Report musically**: Don't say "set parameter 3 to 0.7". Say "set the reverb decay to 3.2 seconds" or "compressed the vocals at 4:1 with a medium attack"
53
- 6. **Use knowledge**: Before adding effects, check ableton_knowledge for the best device and parameter settings for the goal
95
+ You have write access to every AbletonOSC address (transport, song, track, clip, scene, device, view, mixer, MIDI, routing). For the operations Ableton exposes only through its UI, you have computer-use. Between them, the set of "things I can make Ableton do" equals the set of "things a human producer can make Ableton do." The only thing you cannot do is hear — so Isaac remains the ears. Everything else is executable.
54
96
 
55
97
  ## Deep Ableton Knowledge
56
98
 
package/dist/auth.d.ts CHANGED
@@ -20,6 +20,8 @@ export interface KbotConfig {
20
20
  byok_enabled?: boolean;
21
21
  byok_provider?: ByokProvider;
22
22
  kernel_token?: string;
23
+ critic_enabled?: boolean;
24
+ critic_strictness?: number;
23
25
  }
24
26
  export declare function loadConfig(): KbotConfig | null;
25
27
  export declare function saveConfig(config: KbotConfig): void;
package/dist/cli.js CHANGED
@@ -971,10 +971,13 @@ async function main() {
971
971
  });
972
972
  program
973
973
  .command('growth')
974
- .description('See how you\'ve evolved as a buildermilestones, efficiency gains, knowledge arc')
975
- .action(async () => {
976
- const { generateGrowthReport } = await import('./introspection.js');
977
- process.stderr.write(generateGrowthReport());
974
+ .description('See how kbot has improved at your tasks tool success rate, routing accuracy, learned patterns')
975
+ .option('--json', 'Output JSON instead of pretty table')
976
+ .option('--days <n>', 'Compare last N days vs prior N days', '7')
977
+ .action(async (opts) => {
978
+ const { runGrowth } = await import('./growth.js');
979
+ const days = opts.days ? Number.parseInt(opts.days, 10) : 7;
980
+ runGrowth({ json: opts.json, days: Number.isFinite(days) && days > 0 ? days : 7 });
978
981
  });
979
982
  program
980
983
  .command('decisions')
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Critic Gate — adversarial discriminator on tool outputs.
3
+ * Generator/discriminator pattern: critic reviews each tool result before the
4
+ * main LLM sees it. Fast path auto-accepts trivial results. Config via
5
+ * ~/.kbot/config.json: critic_enabled (bool), critic_strictness (0..1).
6
+ * Hard disable: env KBOT_NO_CRITIC=1.
7
+ */
8
+ import { type RFClass } from './critic-taxonomy.js';
9
+ export interface CriticVerdict {
10
+ accept: boolean;
11
+ reason?: string;
12
+ retry_hint?: string;
13
+ confidence: number;
14
+ /** RF taxonomy class when a rule-based classifier fired (arXiv:2601.22208). */
15
+ failure_class?: RFClass;
16
+ }
17
+ export interface GateOpts {
18
+ strictness?: number;
19
+ provider?: string;
20
+ /** Optional LLM client override — takes user prompt, returns raw text. For testing. */
21
+ llmClient?: (userPrompt: string) => Promise<string>;
22
+ }
23
+ /**
24
+ * Gate a tool result through the adversarial critic.
25
+ * Never throws — on any failure, returns accept=true with low confidence so
26
+ * the agent loop is never blocked by the critic itself.
27
+ */
28
+ export declare function gateToolResult(tool: string, args: Record<string, unknown>, result: unknown, opts?: GateOpts): Promise<CriticVerdict>;
29
+ //# sourceMappingURL=critic-gate.d.ts.map
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Critic Gate — adversarial discriminator on tool outputs.
3
+ * Generator/discriminator pattern: critic reviews each tool result before the
4
+ * main LLM sees it. Fast path auto-accepts trivial results. Config via
5
+ * ~/.kbot/config.json: critic_enabled (bool), critic_strictness (0..1).
6
+ * Hard disable: env KBOT_NO_CRITIC=1.
7
+ */
8
+ import { loadConfig } from './auth.js';
9
+ import { classifyToolResult } from './critic-taxonomy.js';
10
+ const TRUSTED_TOOLS = new Set([
11
+ 'read', 'read_file', 'kbot_read', 'kbot_read_file',
12
+ 'glob', 'kbot_glob', 'grep', 'kbot_grep', 'list_directory', 'ls',
13
+ 'git_status', 'git_log', 'git_diff', 'git_branch',
14
+ 'terminal_cwd', 'env_check', 'memory_recall', 'memory_search',
15
+ ]);
16
+ const ERROR_KEYWORDS = [
17
+ 'tool error:', 'error:', 'enoent', 'permission denied', 'eacces',
18
+ 'not found', 'failed to', 'traceback', 'stack trace',
19
+ 'undefined is not', 'cannot read prop', 'refused',
20
+ ];
21
+ const MAX_ARGS_CHARS = 500;
22
+ const MAX_RESULT_CHARS = 2000;
23
+ const TRIVIAL_MAX_BYTES = 10 * 1024;
24
+ function truncate(s, max) {
25
+ if (s.length <= max)
26
+ return s;
27
+ return s.slice(0, max) + `\n…[truncated, original ${s.length} chars]`;
28
+ }
29
+ function toText(x) {
30
+ if (x == null)
31
+ return '';
32
+ if (typeof x === 'string')
33
+ return x;
34
+ try {
35
+ return JSON.stringify(x);
36
+ }
37
+ catch {
38
+ return String(x);
39
+ }
40
+ }
41
+ function hasErrorKeyword(text) {
42
+ const lower = text.toLowerCase();
43
+ return ERROR_KEYWORDS.some(k => lower.includes(k));
44
+ }
45
+ /** True if the result is plausibly fine without calling a critic LLM. */
46
+ function isTriviallyValid(tool, resultText) {
47
+ if (!resultText || resultText.trim().length === 0)
48
+ return false;
49
+ if (resultText.length > TRIVIAL_MAX_BYTES)
50
+ return false;
51
+ if (hasErrorKeyword(resultText))
52
+ return false;
53
+ if (TRUSTED_TOOLS.has(tool))
54
+ return true;
55
+ return false;
56
+ }
57
+ function resolveCriticProvider(override) {
58
+ const cfg = loadConfig();
59
+ const provider = (override || cfg?.byok_provider || 'anthropic').toLowerCase();
60
+ const localModel = cfg?.default_model && cfg.default_model !== 'auto' ? cfg.default_model : 'llama3.2:3b';
61
+ if (provider === 'ollama' || provider === 'kbot-local') {
62
+ return { provider, model: localModel, apiKey: 'local', apiUrl: 'http://localhost:11434/v1/chat/completions' };
63
+ }
64
+ if (provider === 'openai') {
65
+ if (!cfg?.byok_key)
66
+ return null;
67
+ return { provider: 'openai', model: 'gpt-4o-mini', apiKey: cfg.byok_key, apiUrl: 'https://api.openai.com/v1/chat/completions' };
68
+ }
69
+ if (!cfg?.byok_key)
70
+ return null;
71
+ return { provider: 'anthropic', model: 'claude-haiku-4-5', apiKey: cfg.byok_key, apiUrl: 'https://api.anthropic.com/v1/messages' };
72
+ }
73
+ const CRITIC_SYSTEM = 'You are a strict senior engineer reviewing a tool output. ' +
74
+ 'Did this tool call produce a useful, correct, non-hallucinated result ' +
75
+ 'for the stated intent? Return ONLY JSON with keys: ' +
76
+ '{"accept": bool, "reason": string, "retry_hint": string, "confidence": number between 0 and 1}. ' +
77
+ 'No prose, no code fences — JSON only.';
78
+ function buildUserPrompt(tool, args, result) {
79
+ const argsText = truncate(toText(args), MAX_ARGS_CHARS);
80
+ const resultText = truncate(toText(result), MAX_RESULT_CHARS);
81
+ return `TOOL: ${tool}\n\nARGS:\n${argsText}\n\nRESULT:\n${resultText}`;
82
+ }
83
+ function parseVerdict(text) {
84
+ if (!text)
85
+ return null;
86
+ // Strip fences/prose; grab first {...} object.
87
+ const match = text.match(/\{[\s\S]*\}/);
88
+ if (!match)
89
+ return null;
90
+ try {
91
+ const raw = JSON.parse(match[0]);
92
+ const confidence = typeof raw.confidence === 'number'
93
+ ? Math.max(0, Math.min(1, raw.confidence))
94
+ : 0.5;
95
+ return {
96
+ accept: !!raw.accept,
97
+ reason: typeof raw.reason === 'string' ? raw.reason : undefined,
98
+ retry_hint: typeof raw.retry_hint === 'string' ? raw.retry_hint : undefined,
99
+ confidence,
100
+ };
101
+ }
102
+ catch {
103
+ return null;
104
+ }
105
+ }
106
+ async function callAnthropic(p, userPrompt) {
107
+ const res = await fetch(p.apiUrl, {
108
+ method: 'POST',
109
+ headers: {
110
+ 'Content-Type': 'application/json',
111
+ 'x-api-key': p.apiKey,
112
+ 'anthropic-version': '2023-06-01',
113
+ },
114
+ body: JSON.stringify({
115
+ model: p.model,
116
+ max_tokens: 256,
117
+ system: CRITIC_SYSTEM,
118
+ messages: [{ role: 'user', content: userPrompt }],
119
+ }),
120
+ signal: AbortSignal.timeout(15_000),
121
+ });
122
+ if (!res.ok)
123
+ throw new Error(`critic HTTP ${res.status}`);
124
+ const data = await res.json();
125
+ return (data.content || []).filter(b => b.type === 'text').map(b => b.text || '').join('');
126
+ }
127
+ async function callOpenAICompat(p, userPrompt) {
128
+ const headers = { 'Content-Type': 'application/json' };
129
+ if (p.apiKey && p.apiKey !== 'local')
130
+ headers['Authorization'] = `Bearer ${p.apiKey}`;
131
+ const res = await fetch(p.apiUrl, {
132
+ method: 'POST',
133
+ headers,
134
+ body: JSON.stringify({
135
+ model: p.model,
136
+ max_tokens: 256,
137
+ messages: [
138
+ { role: 'system', content: CRITIC_SYSTEM },
139
+ { role: 'user', content: userPrompt },
140
+ ],
141
+ }),
142
+ signal: AbortSignal.timeout(15_000),
143
+ });
144
+ if (!res.ok)
145
+ throw new Error(`critic HTTP ${res.status}`);
146
+ const data = await res.json();
147
+ return data.choices?.[0]?.message?.content || '';
148
+ }
149
+ /**
150
+ * Gate a tool result through the adversarial critic.
151
+ * Never throws — on any failure, returns accept=true with low confidence so
152
+ * the agent loop is never blocked by the critic itself.
153
+ */
154
+ export async function gateToolResult(tool, args, result, opts = {}) {
155
+ if (process.env.KBOT_NO_CRITIC === '1') {
156
+ return { accept: true, confidence: 1, reason: 'critic disabled via env' };
157
+ }
158
+ const cfg = loadConfig();
159
+ if (cfg && cfg.critic_enabled === false) {
160
+ return { accept: true, confidence: 1, reason: 'critic disabled in config' };
161
+ }
162
+ const strictness = typeof opts.strictness === 'number'
163
+ ? opts.strictness
164
+ : (typeof cfg?.critic_strictness === 'number' ? cfg.critic_strictness : 0.5);
165
+ const resultText = toText(result);
166
+ // Fast path.
167
+ if (isTriviallyValid(tool, resultText)) {
168
+ return { accept: true, confidence: 0.9, reason: 'trivial-valid fast path' };
169
+ }
170
+ // Rule-based RF classifier — cheap, no LLM. High-confidence hits short-circuit.
171
+ const rf = classifyToolResult(resultText);
172
+ if (rf && rf.confidence >= 0.8) {
173
+ return {
174
+ accept: false,
175
+ confidence: rf.confidence,
176
+ reason: `${rf.class}: ${rf.evidence}`,
177
+ retry_hint: 'Taxonomy match — try different arguments or a different tool.',
178
+ failure_class: rf.class,
179
+ };
180
+ }
181
+ const userPrompt = buildUserPrompt(tool, args, resultText);
182
+ let callLLM;
183
+ if (opts.llmClient) {
184
+ callLLM = opts.llmClient;
185
+ }
186
+ else {
187
+ const provider = resolveCriticProvider(opts.provider);
188
+ if (!provider) {
189
+ // No usable provider — degrade gracefully.
190
+ return { accept: true, confidence: 0.3, reason: 'no critic provider available' };
191
+ }
192
+ callLLM = provider.provider === 'anthropic'
193
+ ? (pr) => callAnthropic(provider, pr)
194
+ : (pr) => callOpenAICompat(provider, pr);
195
+ }
196
+ try {
197
+ const text = await callLLM(userPrompt);
198
+ const verdict = parseVerdict(text);
199
+ if (!verdict) {
200
+ return { accept: true, confidence: 0.3, reason: 'critic returned unparseable output' };
201
+ }
202
+ // Strictness gate: require verdict.confidence >= strictness when rejecting,
203
+ // and if accepting with very low confidence and strictness is high, flip to reject.
204
+ if (!verdict.accept && verdict.confidence < Math.max(0.1, 1 - strictness)) {
205
+ // The critic rejected but wasn't very sure — let it pass with warning.
206
+ return { ...verdict, accept: true, reason: `soft-accept: ${verdict.reason || 'low-confidence reject'}` };
207
+ }
208
+ if (verdict.accept && strictness > 0.8 && verdict.confidence < 0.3) {
209
+ return {
210
+ accept: false,
211
+ confidence: verdict.confidence,
212
+ reason: 'strict mode: accepted with very low confidence',
213
+ retry_hint: verdict.retry_hint || 'Verify output shape and re-run with stricter arguments.',
214
+ };
215
+ }
216
+ return verdict;
217
+ }
218
+ catch {
219
+ // Critic call failed — never block the agent loop.
220
+ return { accept: true, confidence: 0.2, reason: 'critic call failed' };
221
+ }
222
+ }
223
+ //# sourceMappingURL=critic-gate.js.map
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Critic Retrospect — retroactive judgement of past session tool calls.
3
+ *
4
+ * Reads ~/.kbot/observer/session.jsonl, replays tool calls through
5
+ * gateToolResult (critic-gate.ts), and reports:
6
+ * - overall accept/reject ratio
7
+ * - tools with highest reject rate (args-validation candidates)
8
+ * - rejects that were later retried successfully (critic false positives)
9
+ * - sessions ranked by "retries saved" score
10
+ * - suggested strictness setting from precision/recall tradeoff
11
+ *
12
+ * NB: the observer only logs {ts, tool, args, session} — no results.
13
+ * We synthesize a *result proxy* from retry behaviour: a call whose exact
14
+ * (tool, args-hash) recurs inside the same session within RETRY_WINDOW_MS
15
+ * is treated as having implicitly failed the first time. The critic is
16
+ * passed this synthesized signal so it can judge on intent + shape.
17
+ *
18
+ * Cache: ~/.kbot/critic-cache.json — keyed by (tool, argsHash, resultHash).
19
+ *
20
+ * CLI wiring: cli.ts was modified in parallel; leaving subcommand wiring
21
+ * as a TODO. Invoke via `node -e "import('./dist/critic-retrospect.js').then(m => m.run())"`.
22
+ */
23
+ export interface RetrospectOpts {
24
+ sessions?: number;
25
+ strictness?: number;
26
+ jsonOut?: string;
27
+ maxCallsPerSession?: number;
28
+ /** Injectable for tests. */
29
+ llmClient?: (userPrompt: string) => Promise<string>;
30
+ }
31
+ export interface RetrospectReport {
32
+ totalCalls: number;
33
+ sessionsScanned: number;
34
+ sessionsAvailable: number;
35
+ accepts: number;
36
+ rejects: number;
37
+ byTool: Record<string, {
38
+ total: number;
39
+ accepts: number;
40
+ rejects: number;
41
+ }>;
42
+ topRejectRate: Array<{
43
+ tool: string;
44
+ total: number;
45
+ rejectRate: number;
46
+ }>;
47
+ likelyFalsePositives: Array<{
48
+ tool: string;
49
+ session: string;
50
+ retryGap: number;
51
+ reason?: string;
52
+ }>;
53
+ sessionsRanked: Array<{
54
+ session: string;
55
+ calls: number;
56
+ retriesSaved: number;
57
+ score: number;
58
+ }>;
59
+ suggestedStrictness: number;
60
+ precision: number;
61
+ recall: number;
62
+ }
63
+ export declare function run(opts?: RetrospectOpts): Promise<RetrospectReport>;
64
+ //# sourceMappingURL=critic-retrospect.d.ts.map