@thispointon/kondi-chat 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +556 -0
- package/bin/kondi-chat +56 -0
- package/bin/kondi-chat.js +72 -0
- package/package.json +55 -0
- package/scripts/demo.tape +49 -0
- package/scripts/postinstall.cjs +103 -0
- package/src/audit/analytics.ts +261 -0
- package/src/audit/ledger.ts +253 -0
- package/src/audit/telemetry.ts +165 -0
- package/src/cli/backend.ts +675 -0
- package/src/cli/commands.ts +419 -0
- package/src/cli/help.ts +182 -0
- package/src/cli/submit-helpers.ts +159 -0
- package/src/cli/submit.ts +539 -0
- package/src/cli/wizard.ts +121 -0
- package/src/context/bootstrap.ts +138 -0
- package/src/context/budget.ts +100 -0
- package/src/context/manager.ts +666 -0
- package/src/context/memory.ts +160 -0
- package/src/context/preflight.ts +176 -0
- package/src/context/project-brain.ts +101 -0
- package/src/context/receipts.ts +108 -0
- package/src/context/skills.ts +154 -0
- package/src/context/symbol-index.ts +240 -0
- package/src/council/profiles.ts +137 -0
- package/src/council/tool.ts +138 -0
- package/src/council-engine/cli/council-artifacts.ts +230 -0
- package/src/council-engine/cli/council-config.ts +178 -0
- package/src/council-engine/cli/council-session-export.ts +116 -0
- package/src/council-engine/cli/kondi.ts +98 -0
- package/src/council-engine/cli/llm-caller.ts +229 -0
- package/src/council-engine/cli/localStorage-shim.ts +119 -0
- package/src/council-engine/cli/node-platform.ts +68 -0
- package/src/council-engine/cli/run-council.ts +481 -0
- package/src/council-engine/cli/run-pipeline.ts +772 -0
- package/src/council-engine/cli/session-export.ts +153 -0
- package/src/council-engine/configs/councils/analysis.json +101 -0
- package/src/council-engine/configs/councils/code-planning.json +86 -0
- package/src/council-engine/configs/councils/coding.json +89 -0
- package/src/council-engine/configs/councils/debate.json +97 -0
- package/src/council-engine/configs/councils/solo-claude.json +34 -0
- package/src/council-engine/configs/councils/solo-gpt.json +34 -0
- package/src/council-engine/council/coding-orchestrator.ts +1205 -0
- package/src/council-engine/council/context-bootstrap.ts +147 -0
- package/src/council-engine/council/context-inspection.ts +42 -0
- package/src/council-engine/council/context-store.ts +763 -0
- package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
- package/src/council-engine/council/factory.ts +164 -0
- package/src/council-engine/council/index.ts +201 -0
- package/src/council-engine/council/ledger-store.ts +438 -0
- package/src/council-engine/council/prompts.ts +1689 -0
- package/src/council-engine/council/storage-cleanup.ts +164 -0
- package/src/council-engine/council/store.ts +1110 -0
- package/src/council-engine/council/synthesis.ts +291 -0
- package/src/council-engine/council/types.ts +845 -0
- package/src/council-engine/council/validation.ts +613 -0
- package/src/council-engine/pipeline/build-detect.ts +73 -0
- package/src/council-engine/pipeline/executor.ts +1048 -0
- package/src/council-engine/pipeline/index.ts +9 -0
- package/src/council-engine/pipeline/install-detect.ts +84 -0
- package/src/council-engine/pipeline/memory-store.ts +182 -0
- package/src/council-engine/pipeline/output-parsers.ts +146 -0
- package/src/council-engine/pipeline/run-output.ts +149 -0
- package/src/council-engine/pipeline/session-import.ts +177 -0
- package/src/council-engine/pipeline/store.ts +753 -0
- package/src/council-engine/pipeline/test-detect.ts +82 -0
- package/src/council-engine/pipeline/types.ts +401 -0
- package/src/council-engine/services/deliberationSummary.ts +114 -0
- package/src/council-engine/tsconfig.json +16 -0
- package/src/council-engine/types/mcp.ts +122 -0
- package/src/council-engine/utils/filterTools.ts +73 -0
- package/src/engine/apply.ts +238 -0
- package/src/engine/checkpoints.ts +237 -0
- package/src/engine/consultants.ts +347 -0
- package/src/engine/diff.ts +171 -0
- package/src/engine/errors.ts +102 -0
- package/src/engine/git-tools.ts +246 -0
- package/src/engine/hooks.ts +181 -0
- package/src/engine/loop-guard.ts +155 -0
- package/src/engine/permissions.ts +293 -0
- package/src/engine/pipeline.ts +376 -0
- package/src/engine/sub-agents.ts +133 -0
- package/src/engine/task-card.ts +185 -0
- package/src/engine/task-router.ts +256 -0
- package/src/engine/task-store.ts +86 -0
- package/src/engine/tools.ts +783 -0
- package/src/engine/verify.ts +111 -0
- package/src/mcp/client.ts +225 -0
- package/src/mcp/config.ts +120 -0
- package/src/mcp/tool-manager.ts +192 -0
- package/src/mcp/types.ts +61 -0
- package/src/providers/llm-caller.ts +943 -0
- package/src/providers/rate-limiter.ts +238 -0
- package/src/router/NOTES.md +28 -0
- package/src/router/collector.ts +474 -0
- package/src/router/embeddings.ts +286 -0
- package/src/router/index.ts +299 -0
- package/src/router/intent-router.ts +225 -0
- package/src/router/nn-router.ts +205 -0
- package/src/router/profiles.ts +309 -0
- package/src/router/registry.ts +565 -0
- package/src/router/rules.ts +274 -0
- package/src/router/train.py +408 -0
- package/src/session/store.ts +211 -0
- package/src/test-utils/mock-llm.ts +39 -0
- package/src/types.ts +322 -0
- package/src/web/manager.ts +311 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Permission System — safety gate in front of every tool execution.
|
|
3
|
+
*
|
|
4
|
+
* Tiers:
|
|
5
|
+
* - auto-approve : execute immediately
|
|
6
|
+
* - confirm : ask the user once; may be escalated to session-approve
|
|
7
|
+
* - always-confirm : ask every time, cannot be auto-approved from config
|
|
8
|
+
*
|
|
9
|
+
* The backend calls `check()` to classify, then `requestPermission()` to
|
|
10
|
+
* emit a `permission_request` to the TUI and await a response. Responses
|
|
11
|
+
* come back through `handleResponse()` from the TUI's `permission_response`.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
15
|
+
import { dirname, join } from 'node:path';
|
|
16
|
+
import { homedir } from 'node:os';
|
|
17
|
+
import { createHash } from 'node:crypto';
|
|
18
|
+
|
|
19
|
+
export type PermissionTier = 'auto-approve' | 'confirm' | 'always-confirm';
|
|
20
|
+
export type PermissionDecision = 'approved' | 'denied' | 'approved-session' | 'approved-turn';
|
|
21
|
+
|
|
22
|
+
export interface PermissionConfig {
|
|
23
|
+
defaultTier: PermissionTier;
|
|
24
|
+
tools: Record<string, PermissionTier>;
|
|
25
|
+
alwaysConfirmPatterns: string[];
|
|
26
|
+
sessionOverrides?: Record<string, PermissionTier>;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const DEFAULT_TOOL_TIERS: Record<string, PermissionTier> = {
|
|
30
|
+
read_file: 'auto-approve',
|
|
31
|
+
list_files: 'auto-approve',
|
|
32
|
+
search_code: 'auto-approve',
|
|
33
|
+
update_plan: 'auto-approve',
|
|
34
|
+
write_file: 'confirm',
|
|
35
|
+
edit_file: 'confirm',
|
|
36
|
+
run_command: 'confirm',
|
|
37
|
+
create_task: 'confirm',
|
|
38
|
+
update_memory: 'confirm',
|
|
39
|
+
git_status: 'auto-approve',
|
|
40
|
+
git_diff: 'auto-approve',
|
|
41
|
+
git_log: 'auto-approve',
|
|
42
|
+
git_commit: 'confirm',
|
|
43
|
+
git_branch: 'confirm',
|
|
44
|
+
git_create_pr: 'confirm',
|
|
45
|
+
spawn_agent: 'confirm',
|
|
46
|
+
web_search: 'auto-approve',
|
|
47
|
+
web_fetch: 'confirm',
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
const DEFAULT_ALWAYS_CONFIRM_PATTERNS: string[] = [
|
|
51
|
+
'rm\\s+(-[rfR]+\\s+|--recursive)',
|
|
52
|
+
'git\\s+push\\s+(-f|--force|--force-with-lease)',
|
|
53
|
+
'git\\s+push\\s+.*\\b(main|master)\\b',
|
|
54
|
+
'git\\s+reset\\s+--hard',
|
|
55
|
+
'chmod\\s+(777|000)',
|
|
56
|
+
'sudo(\\s|$)',
|
|
57
|
+
'curl.*\\|\\s*(sh|bash)',
|
|
58
|
+
'wget.*\\|\\s*(sh|bash)',
|
|
59
|
+
'dd\\s+',
|
|
60
|
+
'>\\s*/dev/',
|
|
61
|
+
// Write/redirect to system dirs
|
|
62
|
+
'>\\s*(/etc|/usr|/bin|/sbin|/boot|/root|~)',
|
|
63
|
+
// Crypto/secret exfil vectors
|
|
64
|
+
'(ssh-keygen|openssl)\\s+.*\\bprivate\\b',
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Shell compound/chaining operators that let a caller append arbitrary
|
|
69
|
+
* follow-up commands. When `run_command` is classified as `auto-approve`
|
|
70
|
+
* and the command string contains any of these, we force an upgrade to
|
|
71
|
+
* `confirm` so a human sees the chain before it runs. This closes the
|
|
72
|
+
* "auto-approve `npm test` then `&& rm -rf ~`" gap.
|
|
73
|
+
*
|
|
74
|
+
* Detection is textual on purpose — anything short of a full shell AST
|
|
75
|
+
* parse has edge cases (e.g. `echo "a && b"` contains `&&` inside a
|
|
76
|
+
* quoted string). We accept the false-positive rate here: at worst the
|
|
77
|
+
* user sees a confirm dialog for a command that was actually safe, and
|
|
78
|
+
* can approve it. The alternative — shipping a production shell parser —
|
|
79
|
+
* is a much bigger maintenance surface.
|
|
80
|
+
*/
|
|
81
|
+
const SHELL_CHAIN_OPERATORS: RegExp = /(&&|\|\||;|\||`|\$\(|>>|\bxargs\b|\beval\b)/;
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Public predicate so wrappers that *force* a result to `auto-approve`
|
|
85
|
+
* (e.g. the `--auto-approve run_command` CLI flag) can re-apply the
|
|
86
|
+
* chain-operator gate themselves. Without this, a CLI allow-list would
|
|
87
|
+
* silently bypass `check()`'s upgrade to `confirm` because the wrapper
|
|
88
|
+
* overrides the resolved tier after `check()` returns.
|
|
89
|
+
*/
|
|
90
|
+
export function hasShellChainOperator(command: string): boolean {
|
|
91
|
+
return SHELL_CHAIN_OPERATORS.test(normalizeCommand(command));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const DEFAULT_CONFIG: PermissionConfig = {
|
|
95
|
+
defaultTier: 'confirm',
|
|
96
|
+
tools: { ...DEFAULT_TOOL_TIERS },
|
|
97
|
+
alwaysConfirmPatterns: DEFAULT_ALWAYS_CONFIRM_PATTERNS,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// Permission dialogs wait indefinitely — the user responds when ready.
|
|
101
|
+
// No auto-deny timeout; the TUI keeps the dialog visible until dismissed.
|
|
102
|
+
const REQUEST_TIMEOUT_MS = 24 * 60 * 60 * 1000; // 24h (effectively forever)
|
|
103
|
+
|
|
104
|
+
interface Pending {
|
|
105
|
+
resolve: (d: PermissionDecision) => void;
|
|
106
|
+
timeout: NodeJS.Timeout;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function fingerprint(tool: string, args: Record<string, unknown>): string {
|
|
110
|
+
// Stable JSON by sorted keys
|
|
111
|
+
const keys = Object.keys(args).sort();
|
|
112
|
+
const normalized: Record<string, unknown> = {};
|
|
113
|
+
for (const k of keys) normalized[k] = args[k];
|
|
114
|
+
const s = tool + '::' + JSON.stringify(normalized);
|
|
115
|
+
return createHash('sha1').update(s).digest('hex').slice(0, 16);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export class PermissionManager {
|
|
119
|
+
private config: PermissionConfig;
|
|
120
|
+
private skip: boolean;
|
|
121
|
+
private patterns: RegExp[];
|
|
122
|
+
private pending = new Map<string, Pending>();
|
|
123
|
+
/** Session approvals: fingerprint -> approved */
|
|
124
|
+
private sessionApprovals = new Set<string>();
|
|
125
|
+
/** Auto-generated sequential id */
|
|
126
|
+
private nextId = 0;
|
|
127
|
+
/**
|
|
128
|
+
* Yolo-for-this-turn: approve every confirm-tier tool call until the
|
|
129
|
+
* backend declares the turn over via endTurn(). always-confirm tools
|
|
130
|
+
* (rm -rf, sudo, force-push to main, …) are NEVER bypassed.
|
|
131
|
+
*/
|
|
132
|
+
private turnApproveAll = false;
|
|
133
|
+
|
|
134
|
+
constructor(configPath: string, skipPermissions = false, userConfigPath?: string) {
|
|
135
|
+
this.skip = skipPermissions;
|
|
136
|
+
// Load user-level permissions as the base, then merge any explicit
|
|
137
|
+
// project-level overrides on top. Projects that don't have a
|
|
138
|
+
// permissions.json get the user-level settings (auto-approve etc.)
|
|
139
|
+
// without any hardcoded defaults overriding them. `userConfigPath` is
|
|
140
|
+
// an injection point used by tests to keep the developer's actual
|
|
141
|
+
// ~/.kondi-chat/permissions.json from leaking into the test config.
|
|
142
|
+
const resolvedUserPath = userConfigPath ?? join(homedir(), '.kondi-chat', 'permissions.json');
|
|
143
|
+
const userConfig = loadConfig(resolvedUserPath);
|
|
144
|
+
const projectConfig = loadConfig(configPath);
|
|
145
|
+
// Use DEFAULT_CONFIG as the ultimate fallback if neither user nor project has settings.
|
|
146
|
+
this.config = {
|
|
147
|
+
defaultTier: projectConfig.defaultTier || userConfig.defaultTier || DEFAULT_CONFIG.defaultTier,
|
|
148
|
+
tools: { ...DEFAULT_TOOL_TIERS, ...userConfig.tools, ...projectConfig.tools },
|
|
149
|
+
alwaysConfirmPatterns: projectConfig.alwaysConfirmPatterns.length > 0
|
|
150
|
+
? projectConfig.alwaysConfirmPatterns
|
|
151
|
+
: userConfig.alwaysConfirmPatterns.length > 0
|
|
152
|
+
? userConfig.alwaysConfirmPatterns
|
|
153
|
+
: DEFAULT_ALWAYS_CONFIRM_PATTERNS,
|
|
154
|
+
sessionOverrides: projectConfig.sessionOverrides,
|
|
155
|
+
};
|
|
156
|
+
this.patterns = this.config.alwaysConfirmPatterns.map(p => {
|
|
157
|
+
try { return new RegExp(p); } catch { return null; }
|
|
158
|
+
}).filter((r): r is RegExp => r !== null);
|
|
159
|
+
if (skipPermissions) {
|
|
160
|
+
process.stderr.write('[permissions] --dangerously-skip-permissions active; all tools auto-approved\n');
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/** Classify a tool call without prompting. */
|
|
165
|
+
check(tool: string, args: Record<string, unknown>): PermissionTier {
|
|
166
|
+
if (this.skip) return 'auto-approve';
|
|
167
|
+
|
|
168
|
+
// Start from session override → tool default → config default.
|
|
169
|
+
const sessionTier = this.config.sessionOverrides?.[tool];
|
|
170
|
+
let tier: PermissionTier = sessionTier
|
|
171
|
+
|| this.config.tools[tool]
|
|
172
|
+
|| this.config.defaultTier;
|
|
173
|
+
|
|
174
|
+
// run_command-specific safety rails:
|
|
175
|
+
// 1. always-confirm patterns (rm -rf, sudo, curl|sh, …) are bypass-
|
|
176
|
+
// proof — they always escalate to the strictest tier regardless
|
|
177
|
+
// of what the config or session override says.
|
|
178
|
+
// 2. shell compound/chain operators (&&, ||, ;, |, $(), backtick,
|
|
179
|
+
// xargs, eval) force-upgrade `auto-approve` → `confirm`. A human
|
|
180
|
+
// sees every chained command before it runs, but yolo-for-turn
|
|
181
|
+
// can still batch-approve them — they're "risky" not "forbidden".
|
|
182
|
+
if (tool === 'run_command') {
|
|
183
|
+
const cmd = normalizeCommand(String(args.command ?? ''));
|
|
184
|
+
for (const re of this.patterns) {
|
|
185
|
+
if (re.test(cmd)) return 'always-confirm';
|
|
186
|
+
}
|
|
187
|
+
if (tier === 'auto-approve' && SHELL_CHAIN_OPERATORS.test(cmd)) {
|
|
188
|
+
return 'confirm';
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return tier;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Request permission: if tier is auto-approve or session-approved, resolve
|
|
197
|
+
* immediately; otherwise emit a permission_request and await a response.
|
|
198
|
+
*/
|
|
199
|
+
async requestPermission(
|
|
200
|
+
tool: string,
|
|
201
|
+
args: Record<string, unknown>,
|
|
202
|
+
emit: (event: any) => void,
|
|
203
|
+
): Promise<PermissionDecision> {
|
|
204
|
+
if (this.skip) return 'approved';
|
|
205
|
+
const tier = this.check(tool, args);
|
|
206
|
+
if (tier === 'auto-approve') return 'approved';
|
|
207
|
+
|
|
208
|
+
// Yolo-for-this-turn: user pressed 4, they mean approve EVERYTHING
|
|
209
|
+
// for the rest of this turn — including always-confirm tier. The flag
|
|
210
|
+
// resets automatically at endTurn().
|
|
211
|
+
if (this.turnApproveAll) return 'approved';
|
|
212
|
+
|
|
213
|
+
const fp = fingerprint(tool, args);
|
|
214
|
+
if (tier !== 'always-confirm' && this.sessionApprovals.has(fp)) return 'approved';
|
|
215
|
+
|
|
216
|
+
const id = `perm-${Date.now()}-${this.nextId++}`;
|
|
217
|
+
emit({
|
|
218
|
+
type: 'permission_request',
|
|
219
|
+
id,
|
|
220
|
+
tool,
|
|
221
|
+
args: JSON.stringify(args).slice(0, 2000),
|
|
222
|
+
summary: summarize(tool, args),
|
|
223
|
+
tier,
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
return new Promise<PermissionDecision>((resolve) => {
|
|
227
|
+
const timeout = setTimeout(() => {
|
|
228
|
+
this.pending.delete(id);
|
|
229
|
+
emit({ type: 'permission_timeout', id, tool });
|
|
230
|
+
resolve('denied');
|
|
231
|
+
}, REQUEST_TIMEOUT_MS);
|
|
232
|
+
this.pending.set(id, { resolve, timeout });
|
|
233
|
+
}).then(decision => {
|
|
234
|
+
if (decision === 'approved-session' && tier !== 'always-confirm') {
|
|
235
|
+
this.sessionApprovals.add(fp);
|
|
236
|
+
}
|
|
237
|
+
if (decision === 'approved-turn') {
|
|
238
|
+
this.turnApproveAll = true;
|
|
239
|
+
}
|
|
240
|
+
return decision;
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/** Handle a response from the TUI. Duplicate/unknown ids are ignored. */
|
|
245
|
+
handleResponse(id: string, decision: PermissionDecision): void {
|
|
246
|
+
const p = this.pending.get(id);
|
|
247
|
+
if (!p) return;
|
|
248
|
+
clearTimeout(p.timeout);
|
|
249
|
+
this.pending.delete(id);
|
|
250
|
+
p.resolve(decision);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/** Backend calls this when the assistant turn completes — clears yolo. */
|
|
254
|
+
endTurn(): void {
|
|
255
|
+
this.turnApproveAll = false;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function normalizeCommand(cmd: string): string {
|
|
260
|
+
return cmd.trim().replace(/\s+/g, ' ');
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function summarize(tool: string, args: Record<string, unknown>): string {
|
|
264
|
+
switch (tool) {
|
|
265
|
+
case 'run_command': return `Run shell command: ${String(args.command || '').slice(0, 200)}`;
|
|
266
|
+
case 'write_file': return `Write file: ${String(args.path || '')}`;
|
|
267
|
+
case 'edit_file': return `Edit file: ${String(args.path || '')}`;
|
|
268
|
+
case 'create_task': return `Dispatch task: ${String(args.description || '').slice(0, 160)}`;
|
|
269
|
+
case 'update_memory': return `Update ${String(args.scope || '')} memory (${String(args.operation || '')})`;
|
|
270
|
+
default: return `${tool}(${JSON.stringify(args).slice(0, 160)})`;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function loadConfig(configPath: string): PermissionConfig {
|
|
275
|
+
if (!existsSync(configPath)) {
|
|
276
|
+
// Don't write defaults to project level — let user-level handle it.
|
|
277
|
+
// Only return an empty config so the merge in the constructor picks
|
|
278
|
+
// up user-level settings without project-level overriding them.
|
|
279
|
+
return { defaultTier: '' as PermissionTier, tools: {}, alwaysConfirmPatterns: [], sessionOverrides: undefined };
|
|
280
|
+
}
|
|
281
|
+
try {
|
|
282
|
+
const raw = JSON.parse(readFileSync(configPath, 'utf-8'));
|
|
283
|
+
return {
|
|
284
|
+
defaultTier: raw.defaultTier || DEFAULT_CONFIG.defaultTier,
|
|
285
|
+
tools: { ...DEFAULT_TOOL_TIERS, ...(raw.tools || {}) },
|
|
286
|
+
alwaysConfirmPatterns: raw.alwaysConfirmPatterns || DEFAULT_ALWAYS_CONFIRM_PATTERNS,
|
|
287
|
+
sessionOverrides: raw.sessionOverrides,
|
|
288
|
+
};
|
|
289
|
+
} catch (e) {
|
|
290
|
+
process.stderr.write(`[permissions] Failed to parse ${configPath}: ${(e as Error).message}; using defaults\n`);
|
|
291
|
+
return { ...DEFAULT_CONFIG };
|
|
292
|
+
}
|
|
293
|
+
}
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline — the Discuss → Commit → Dispatch → Execute → Verify → Reflect loop.
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the flow between conversation model, worker model,
|
|
5
|
+
* and local verification tools. All calls are recorded in the audit ledger.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
import type {
|
|
10
|
+
Session, SessionState, TaskCard, RepoMap,
|
|
11
|
+
LLMResponse, VerificationResult, ProviderId,
|
|
12
|
+
} from '../types.ts';
|
|
13
|
+
import { callLLM } from '../providers/llm-caller.ts';
|
|
14
|
+
import { createTaskCard, executeTaskCard, readRelevantFiles } from './task-card.ts';
|
|
15
|
+
import { parseFileReplacements, applyChanges, formatApplyResult, type ApplyResult } from './apply.ts';
|
|
16
|
+
import { verify } from './verify.ts';
|
|
17
|
+
import { Ledger } from '../audit/ledger.ts';
|
|
18
|
+
import type { Router as UnifiedRouter } from '../router/index.ts';
|
|
19
|
+
import type { RoutingCollector } from '../router/collector.ts';
|
|
20
|
+
import { PipelineError } from './errors.ts';
|
|
21
|
+
import { TaskStore } from './task-store.ts';
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Pipeline configuration
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
export interface PipelineConfig {
|
|
28
|
+
/** Fallback provider (used when no router is available) */
|
|
29
|
+
provider: ProviderId;
|
|
30
|
+
model?: string;
|
|
31
|
+
/** Unified router for model selection */
|
|
32
|
+
router?: UnifiedRouter;
|
|
33
|
+
/** Training data collector */
|
|
34
|
+
collector?: RoutingCollector;
|
|
35
|
+
/** Max failures before retrying with enhanced prompt */
|
|
36
|
+
promotionThreshold: number;
|
|
37
|
+
/** Working directory */
|
|
38
|
+
workingDir: string;
|
|
39
|
+
/** Run verification after execution? */
|
|
40
|
+
autoVerify: boolean;
|
|
41
|
+
/** Task store for persisting task cards across sessions. */
|
|
42
|
+
taskStore?: TaskStore;
|
|
43
|
+
/**
|
|
44
|
+
* Optional event sink — if provided, the pipeline streams an
|
|
45
|
+
* `activity` event per phase as it runs. Threaded in from
|
|
46
|
+
* `ToolContext.emit` by `toolCreateTask` so the TUI can show
|
|
47
|
+
* "pipeline: dispatch → claude-sonnet …" / "pipeline: execute → gemini
|
|
48
|
+
* …" / "pipeline: verify → PASSED" in real time instead of blocking
|
|
49
|
+
* on a single opaque `create_task` tool call. Leaving this undefined
|
|
50
|
+
* preserves the silent behavior for any caller that wants it.
|
|
51
|
+
*/
|
|
52
|
+
emit?: (event: Record<string, unknown>) => void;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// Pipeline result
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
export interface PipelineResult {
|
|
60
|
+
/** The task card that was created and executed */
|
|
61
|
+
task: TaskCard;
|
|
62
|
+
/** Worker model output */
|
|
63
|
+
executionOutput: string;
|
|
64
|
+
/** Files written to disk */
|
|
65
|
+
applied?: ApplyResult;
|
|
66
|
+
/** Verification results (if autoVerify) */
|
|
67
|
+
verification?: VerificationResult;
|
|
68
|
+
/** Frontier model reflection on results */
|
|
69
|
+
reflection: string;
|
|
70
|
+
/** Was the task promoted to frontier after cheap failures? */
|
|
71
|
+
promoted: boolean;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
// Pipeline execution
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Run the full pipeline for a user request that requires code execution.
|
|
80
|
+
*
|
|
81
|
+
* 1. Dispatch — create task card from user intent + session state
|
|
82
|
+
* 2. Execute — send task card to worker model
|
|
83
|
+
* 3. Verify — run local tests/lint/typecheck
|
|
84
|
+
* 4. Reflect — frontier summarizes what happened
|
|
85
|
+
*
|
|
86
|
+
* Returns the result for display in the conversation.
|
|
87
|
+
*/
|
|
88
|
+
export async function runPipeline(
|
|
89
|
+
userIntent: string,
|
|
90
|
+
session: Session,
|
|
91
|
+
ledger: Ledger,
|
|
92
|
+
config: PipelineConfig,
|
|
93
|
+
): Promise<PipelineResult> {
|
|
94
|
+
|
|
95
|
+
/** Track what happened in each pipeline phase so the router's intent
|
|
96
|
+
* classifier can make informed per-step decisions. */
|
|
97
|
+
const priorPhases: Array<{ phase: string; model: string; summary?: string; succeeded?: boolean }> = [];
|
|
98
|
+
|
|
99
|
+
/** Resolve provider/model from router or fallback, passing accumulated
|
|
100
|
+
* phase context so the intent classifier sees the full picture. */
|
|
101
|
+
const route = async (
|
|
102
|
+
phase: import('../types.ts').LedgerPhase,
|
|
103
|
+
promptText: string,
|
|
104
|
+
taskKind?: string,
|
|
105
|
+
failures = 0,
|
|
106
|
+
) => {
|
|
107
|
+
if (config.router) {
|
|
108
|
+
const decision = await config.router.select(
|
|
109
|
+
phase,
|
|
110
|
+
promptText,
|
|
111
|
+
taskKind,
|
|
112
|
+
failures,
|
|
113
|
+
config.promotionThreshold,
|
|
114
|
+
{ priorPhases: [...priorPhases], currentGoal: userIntent },
|
|
115
|
+
);
|
|
116
|
+
return { provider: decision.model.provider, model: decision.model.id, decision };
|
|
117
|
+
}
|
|
118
|
+
return { provider: config.provider, model: config.model, decision: undefined as any };
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
const emit = config.emit;
|
|
122
|
+
emit?.({ type: 'activity', text: `pipeline: starting — "${userIntent.slice(0, 80)}"`, activity_type: 'step' });
|
|
123
|
+
|
|
124
|
+
// -----------------------------------------------------------------------
|
|
125
|
+
// Step 1: Dispatch — create task card
|
|
126
|
+
// -----------------------------------------------------------------------
|
|
127
|
+
const dispatchRoute = await route('dispatch', userIntent);
|
|
128
|
+
emit?.({
|
|
129
|
+
type: 'activity',
|
|
130
|
+
text: `pipeline: dispatch → ${dispatchRoute.model || '(fallback)'} (${dispatchRoute.decision?.reason || 'fallback'})`,
|
|
131
|
+
activity_type: 'step',
|
|
132
|
+
});
|
|
133
|
+
let card, dispatchResponse;
|
|
134
|
+
try {
|
|
135
|
+
({ card, response: dispatchResponse } = await createTaskCard(
|
|
136
|
+
userIntent,
|
|
137
|
+
session.state,
|
|
138
|
+
session.repoMap,
|
|
139
|
+
dispatchRoute.provider,
|
|
140
|
+
dispatchRoute.model,
|
|
141
|
+
ledger,
|
|
142
|
+
));
|
|
143
|
+
} catch (e) {
|
|
144
|
+
throw new PipelineError(
|
|
145
|
+
`dispatch failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
146
|
+
{ severity: 'fatal', stage: 'dispatch', cause: e },
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
// process.stderr.write(` │ │ model: ${dispatchResponse.model} ${dispatchResponse.inputTokens}in/${dispatchResponse.outputTokens}out\n`);
|
|
150
|
+
// process.stderr.write(` │ ╰─ task ${card.id} (${card.kind}): ${card.goal.slice(0, 60)}\n`);
|
|
151
|
+
|
|
152
|
+
// Record routing outcome
|
|
153
|
+
config.collector?.record({
|
|
154
|
+
timestamp: new Date().toISOString(),
|
|
155
|
+
phase: 'dispatch', taskKind: card.kind, promptLength: userIntent.length,
|
|
156
|
+
contextTokens: dispatchResponse.inputTokens, failures: 0, promoted: false,
|
|
157
|
+
modelId: dispatchResponse.model, provider: dispatchRoute.provider,
|
|
158
|
+
succeeded: true, inputTokens: dispatchResponse.inputTokens,
|
|
159
|
+
outputTokens: dispatchResponse.outputTokens,
|
|
160
|
+
costUsd: 0, latencyMs: dispatchResponse.latencyMs,
|
|
161
|
+
routeReason: dispatchRoute.decision?.reason || 'fallback',
|
|
162
|
+
routingTier: dispatchRoute.decision?.tier,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
priorPhases.push({
|
|
166
|
+
phase: 'dispatch',
|
|
167
|
+
model: dispatchResponse.model,
|
|
168
|
+
summary: `task ${card.id} (${card.kind}): ${card.goal.slice(0, 80)}`,
|
|
169
|
+
succeeded: true,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
card.status = 'executing';
|
|
173
|
+
session.tasks.push(card);
|
|
174
|
+
session.state.activeTaskId = card.id;
|
|
175
|
+
config.taskStore?.setCurrent(card);
|
|
176
|
+
|
|
177
|
+
// -----------------------------------------------------------------------
|
|
178
|
+
// Step 2: Execute — router picks the worker model
|
|
179
|
+
// -----------------------------------------------------------------------
|
|
180
|
+
const fileContents = config.workingDir
|
|
181
|
+
? readRelevantFiles(config.workingDir, card.relevantFiles)
|
|
182
|
+
: '';
|
|
183
|
+
|
|
184
|
+
const execRoute = await route('execute', card.goal, card.kind, card.failures);
|
|
185
|
+
emit?.({
|
|
186
|
+
type: 'activity',
|
|
187
|
+
text: `pipeline: execute → ${execRoute.model || '(fallback)'} (${execRoute.decision?.reason || 'fallback'})`,
|
|
188
|
+
activity_type: 'step',
|
|
189
|
+
});
|
|
190
|
+
let executionResponse;
|
|
191
|
+
try {
|
|
192
|
+
executionResponse = await executeTaskCard(
|
|
193
|
+
card,
|
|
194
|
+
session.repoMap,
|
|
195
|
+
fileContents,
|
|
196
|
+
execRoute.provider,
|
|
197
|
+
execRoute.model,
|
|
198
|
+
ledger,
|
|
199
|
+
);
|
|
200
|
+
} catch (e) {
|
|
201
|
+
throw new PipelineError(
|
|
202
|
+
`execute failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
203
|
+
{ severity: 'recoverable', stage: 'execute', cause: e },
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
// process.stderr.write(` │ │ model: ${executionResponse.model} ${executionResponse.inputTokens}in/${executionResponse.outputTokens}out\n`);
|
|
207
|
+
// process.stderr.write(` │ ╰─ done\n`);
|
|
208
|
+
|
|
209
|
+
// -----------------------------------------------------------------------
|
|
210
|
+
// Step 2.5: Apply — write model output to disk
|
|
211
|
+
// -----------------------------------------------------------------------
|
|
212
|
+
priorPhases.push({
|
|
213
|
+
phase: 'execute',
|
|
214
|
+
model: executionResponse.model,
|
|
215
|
+
summary: `wrote ${executionResponse.outputTokens} output tokens`,
|
|
216
|
+
succeeded: true,
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
let applyResult: ApplyResult | undefined;
|
|
220
|
+
if (config.workingDir && card.outputMode !== 'text') {
|
|
221
|
+
const changes = parseFileReplacements(executionResponse.content);
|
|
222
|
+
if (changes.length > 0) {
|
|
223
|
+
const backupDir = join(config.workingDir, '.kondi-chat', 'backups', card.id);
|
|
224
|
+
applyResult = applyChanges(config.workingDir, changes, backupDir);
|
|
225
|
+
emit?.({
|
|
226
|
+
type: 'activity',
|
|
227
|
+
text: `pipeline: apply → ${applyResult.applied.length} file(s) written${applyResult.skipped.length > 0 ? `, ${applyResult.skipped.length} skipped` : ''}`,
|
|
228
|
+
activity_type: 'step',
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// -----------------------------------------------------------------------
|
|
234
|
+
// Step 3: Verify — run local tools
|
|
235
|
+
// -----------------------------------------------------------------------
|
|
236
|
+
let verification: VerificationResult | undefined;
|
|
237
|
+
|
|
238
|
+
if (config.autoVerify && config.workingDir) {
|
|
239
|
+
card.status = 'verifying';
|
|
240
|
+
emit?.({ type: 'activity', text: 'pipeline: verify → running tests/typecheck/lint', activity_type: 'step' });
|
|
241
|
+
verification = verify(config.workingDir, session.repoMap);
|
|
242
|
+
emit?.({
|
|
243
|
+
type: 'activity',
|
|
244
|
+
text: `pipeline: verify → ${verification.passed ? 'PASSED' : 'FAILED'}`,
|
|
245
|
+
activity_type: 'step',
|
|
246
|
+
});
|
|
247
|
+
priorPhases.push({
|
|
248
|
+
phase: 'verify',
|
|
249
|
+
model: 'local',
|
|
250
|
+
summary: verification.passed ? 'tests passed' : 'tests FAILED',
|
|
251
|
+
succeeded: verification.passed,
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
const verifyOutput = [
|
|
255
|
+
verification.testOutput ? `Tests: ${verification.passed ? 'PASS' : 'FAIL'}\n${verification.testOutput}` : '',
|
|
256
|
+
verification.typecheckOutput ? `Typecheck: ${verification.typecheckOutput}` : '',
|
|
257
|
+
verification.lintOutput ? `Lint: ${verification.lintOutput}` : '',
|
|
258
|
+
].filter(Boolean).join('\n\n');
|
|
259
|
+
|
|
260
|
+
// process.stderr.write(` │ ╰─ ${verification.passed ? 'PASSED' : 'FAILED'}\n`);
|
|
261
|
+
ledger.recordVerification(card.id, verification.passed, verifyOutput);
|
|
262
|
+
|
|
263
|
+
// Retry on failure — enrich prompt with error context so router can escalate
|
|
264
|
+
if (!verification.passed && card.failures < config.promotionThreshold) {
|
|
265
|
+
card.failures++;
|
|
266
|
+
session.state.recentFailures.push(
|
|
267
|
+
`Task ${card.id} failed (attempt ${card.failures}): ${verifyOutput.slice(0, 200)}`
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
// pipeline: retry (attempt N/M) — suppressed for TUI
|
|
271
|
+
|
|
272
|
+
// Retry — router may promote to a better model based on failure count
|
|
273
|
+
const retryRoute = await route('execute', card.goal, card.kind, card.failures);
|
|
274
|
+
const retryCard = { ...card, constraints: [...card.constraints, `Previous attempt failed with: ${verifyOutput.slice(0, 500)}`] };
|
|
275
|
+
// process.stderr.write(` │ │ ${retryRoute.decision?.promoted ? 'PROMOTED' : 'retrying'}${retryRoute.decision ? ` [${retryRoute.decision.reason}]` : ''}\n`);
|
|
276
|
+
executionResponse = await executeTaskCard(
|
|
277
|
+
retryCard,
|
|
278
|
+
session.repoMap,
|
|
279
|
+
fileContents,
|
|
280
|
+
retryRoute.provider,
|
|
281
|
+
retryRoute.model,
|
|
282
|
+
ledger,
|
|
283
|
+
);
|
|
284
|
+
// process.stderr.write(` │ │ model: ${executionResponse.model} ${executionResponse.inputTokens}in/${executionResponse.outputTokens}out\n`);
|
|
285
|
+
// process.stderr.write(` │ ╰─ retry done\n`);
|
|
286
|
+
|
|
287
|
+
// Re-verify
|
|
288
|
+
// process.stderr.write(` │ ╭─ verify (local)\n`);
|
|
289
|
+
verification = verify(config.workingDir, session.repoMap);
|
|
290
|
+
const retryVerifyOutput = [
|
|
291
|
+
verification.testOutput ? `Tests: ${verification.passed ? 'PASS' : 'FAIL'}\n${verification.testOutput}` : '',
|
|
292
|
+
verification.typecheckOutput ? `Typecheck: ${verification.typecheckOutput}` : '',
|
|
293
|
+
].filter(Boolean).join('\n\n');
|
|
294
|
+
// process.stderr.write(` │ ╰─ ${verification.passed ? 'PASSED' : 'FAILED'}\n`);
|
|
295
|
+
ledger.recordVerification(card.id, verification.passed, retryVerifyOutput);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const promoted = card.failures >= config.promotionThreshold;
|
|
300
|
+
card.status = verification?.passed ? 'passed' : (promoted ? 'promoted' : 'failed');
|
|
301
|
+
card.completedAt = new Date().toISOString();
|
|
302
|
+
|
|
303
|
+
// Record execution outcome for router training
|
|
304
|
+
config.collector?.record({
|
|
305
|
+
timestamp: new Date().toISOString(),
|
|
306
|
+
phase: 'execute', taskKind: card.kind, promptLength: card.goal.length,
|
|
307
|
+
contextTokens: executionResponse.inputTokens, failures: card.failures, promoted,
|
|
308
|
+
modelId: executionResponse.model, provider: executionResponse.provider,
|
|
309
|
+
succeeded: verification?.passed ?? true,
|
|
310
|
+
verificationPassed: verification?.passed,
|
|
311
|
+
inputTokens: executionResponse.inputTokens,
|
|
312
|
+
outputTokens: executionResponse.outputTokens,
|
|
313
|
+
costUsd: 0, latencyMs: executionResponse.latencyMs,
|
|
314
|
+
routeReason: execRoute.decision?.reason || 'fallback',
|
|
315
|
+
routingTier: execRoute.decision?.tier,
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
// -----------------------------------------------------------------------
|
|
319
|
+
// Step 4: Reflect — frontier summarizes what happened
|
|
320
|
+
// -----------------------------------------------------------------------
|
|
321
|
+
const reflectRoute = await route('reflect', card.goal);
|
|
322
|
+
emit?.({
|
|
323
|
+
type: 'activity',
|
|
324
|
+
text: `pipeline: reflect → ${reflectRoute.model || '(fallback)'} (${reflectRoute.decision?.reason || 'fallback'})`,
|
|
325
|
+
activity_type: 'step',
|
|
326
|
+
});
|
|
327
|
+
let reflectionResponse: LLMResponse;
|
|
328
|
+
try {
|
|
329
|
+
reflectionResponse = await callLLM({
|
|
330
|
+
provider: reflectRoute.provider,
|
|
331
|
+
model: reflectRoute.model,
|
|
332
|
+
systemPrompt: 'You are summarizing the results of a coding task for the user. Be concise. Report what was done, whether it passed verification, and what to do next.',
|
|
333
|
+
userMessage: `Task: ${card.goal}
|
|
334
|
+
Kind: ${card.kind}
|
|
335
|
+
Status: ${card.status}
|
|
336
|
+
|
|
337
|
+
Worker output (summary):
|
|
338
|
+
${executionResponse.content.slice(0, 3000)}
|
|
339
|
+
|
|
340
|
+
${verification ? `Verification: ${verification.passed ? 'PASSED' : 'FAILED'}
|
|
341
|
+
${verification.testOutput ? `Test output: ${verification.testOutput.slice(0, 500)}` : ''}
|
|
342
|
+
${verification.typecheckOutput ? `Typecheck: ${verification.typecheckOutput.slice(0, 500)}` : ''}` : 'Verification: skipped'}
|
|
343
|
+
|
|
344
|
+
Summarize the results for the user. If failed, suggest what to try next.`,
|
|
345
|
+
maxOutputTokens: 1500,
|
|
346
|
+
});
|
|
347
|
+
} catch (e) {
|
|
348
|
+
// Reflection is non-essential — we already executed and verified. If
|
|
349
|
+
// the reflection call fails, degrade gracefully with a synthetic
|
|
350
|
+
// summary instead of nuking the whole pipeline result.
|
|
351
|
+
reflectionResponse = {
|
|
352
|
+
content: `(reflection failed: ${e instanceof Error ? e.message : String(e)})`,
|
|
353
|
+
model: reflectRoute.model || 'unknown',
|
|
354
|
+
provider: reflectRoute.provider,
|
|
355
|
+
inputTokens: 0, outputTokens: 0, latencyMs: 0,
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
ledger.record('reflect', reflectionResponse, `Reflect on task ${card.id}`, { taskId: card.id });
|
|
360
|
+
|
|
361
|
+
// Clean up state + persist completed task to history
|
|
362
|
+
session.state.activeTaskId = undefined;
|
|
363
|
+
if (card.status === 'passed') {
|
|
364
|
+
session.state.recentFailures = session.state.recentFailures.filter(f => !f.includes(card.id));
|
|
365
|
+
}
|
|
366
|
+
config.taskStore?.complete();
|
|
367
|
+
|
|
368
|
+
return {
|
|
369
|
+
task: card,
|
|
370
|
+
executionOutput: executionResponse.content,
|
|
371
|
+
applied: applyResult,
|
|
372
|
+
verification,
|
|
373
|
+
reflection: reflectionResponse.content,
|
|
374
|
+
promoted,
|
|
375
|
+
};
|
|
376
|
+
}
|