@thispointon/kondi-chat 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +556 -0
  3. package/bin/kondi-chat +56 -0
  4. package/bin/kondi-chat.js +72 -0
  5. package/package.json +55 -0
  6. package/scripts/demo.tape +49 -0
  7. package/scripts/postinstall.cjs +103 -0
  8. package/src/audit/analytics.ts +261 -0
  9. package/src/audit/ledger.ts +253 -0
  10. package/src/audit/telemetry.ts +165 -0
  11. package/src/cli/backend.ts +675 -0
  12. package/src/cli/commands.ts +419 -0
  13. package/src/cli/help.ts +182 -0
  14. package/src/cli/submit-helpers.ts +159 -0
  15. package/src/cli/submit.ts +539 -0
  16. package/src/cli/wizard.ts +121 -0
  17. package/src/context/bootstrap.ts +138 -0
  18. package/src/context/budget.ts +100 -0
  19. package/src/context/manager.ts +666 -0
  20. package/src/context/memory.ts +160 -0
  21. package/src/context/preflight.ts +176 -0
  22. package/src/context/project-brain.ts +101 -0
  23. package/src/context/receipts.ts +108 -0
  24. package/src/context/skills.ts +154 -0
  25. package/src/context/symbol-index.ts +240 -0
  26. package/src/council/profiles.ts +137 -0
  27. package/src/council/tool.ts +138 -0
  28. package/src/council-engine/cli/council-artifacts.ts +230 -0
  29. package/src/council-engine/cli/council-config.ts +178 -0
  30. package/src/council-engine/cli/council-session-export.ts +116 -0
  31. package/src/council-engine/cli/kondi.ts +98 -0
  32. package/src/council-engine/cli/llm-caller.ts +229 -0
  33. package/src/council-engine/cli/localStorage-shim.ts +119 -0
  34. package/src/council-engine/cli/node-platform.ts +68 -0
  35. package/src/council-engine/cli/run-council.ts +481 -0
  36. package/src/council-engine/cli/run-pipeline.ts +772 -0
  37. package/src/council-engine/cli/session-export.ts +153 -0
  38. package/src/council-engine/configs/councils/analysis.json +101 -0
  39. package/src/council-engine/configs/councils/code-planning.json +86 -0
  40. package/src/council-engine/configs/councils/coding.json +89 -0
  41. package/src/council-engine/configs/councils/debate.json +97 -0
  42. package/src/council-engine/configs/councils/solo-claude.json +34 -0
  43. package/src/council-engine/configs/councils/solo-gpt.json +34 -0
  44. package/src/council-engine/council/coding-orchestrator.ts +1205 -0
  45. package/src/council-engine/council/context-bootstrap.ts +147 -0
  46. package/src/council-engine/council/context-inspection.ts +42 -0
  47. package/src/council-engine/council/context-store.ts +763 -0
  48. package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
  49. package/src/council-engine/council/factory.ts +164 -0
  50. package/src/council-engine/council/index.ts +201 -0
  51. package/src/council-engine/council/ledger-store.ts +438 -0
  52. package/src/council-engine/council/prompts.ts +1689 -0
  53. package/src/council-engine/council/storage-cleanup.ts +164 -0
  54. package/src/council-engine/council/store.ts +1110 -0
  55. package/src/council-engine/council/synthesis.ts +291 -0
  56. package/src/council-engine/council/types.ts +845 -0
  57. package/src/council-engine/council/validation.ts +613 -0
  58. package/src/council-engine/pipeline/build-detect.ts +73 -0
  59. package/src/council-engine/pipeline/executor.ts +1048 -0
  60. package/src/council-engine/pipeline/index.ts +9 -0
  61. package/src/council-engine/pipeline/install-detect.ts +84 -0
  62. package/src/council-engine/pipeline/memory-store.ts +182 -0
  63. package/src/council-engine/pipeline/output-parsers.ts +146 -0
  64. package/src/council-engine/pipeline/run-output.ts +149 -0
  65. package/src/council-engine/pipeline/session-import.ts +177 -0
  66. package/src/council-engine/pipeline/store.ts +753 -0
  67. package/src/council-engine/pipeline/test-detect.ts +82 -0
  68. package/src/council-engine/pipeline/types.ts +401 -0
  69. package/src/council-engine/services/deliberationSummary.ts +114 -0
  70. package/src/council-engine/tsconfig.json +16 -0
  71. package/src/council-engine/types/mcp.ts +122 -0
  72. package/src/council-engine/utils/filterTools.ts +73 -0
  73. package/src/engine/apply.ts +238 -0
  74. package/src/engine/checkpoints.ts +237 -0
  75. package/src/engine/consultants.ts +347 -0
  76. package/src/engine/diff.ts +171 -0
  77. package/src/engine/errors.ts +102 -0
  78. package/src/engine/git-tools.ts +246 -0
  79. package/src/engine/hooks.ts +181 -0
  80. package/src/engine/loop-guard.ts +155 -0
  81. package/src/engine/permissions.ts +293 -0
  82. package/src/engine/pipeline.ts +376 -0
  83. package/src/engine/sub-agents.ts +133 -0
  84. package/src/engine/task-card.ts +185 -0
  85. package/src/engine/task-router.ts +256 -0
  86. package/src/engine/task-store.ts +86 -0
  87. package/src/engine/tools.ts +783 -0
  88. package/src/engine/verify.ts +111 -0
  89. package/src/mcp/client.ts +225 -0
  90. package/src/mcp/config.ts +120 -0
  91. package/src/mcp/tool-manager.ts +192 -0
  92. package/src/mcp/types.ts +61 -0
  93. package/src/providers/llm-caller.ts +943 -0
  94. package/src/providers/rate-limiter.ts +238 -0
  95. package/src/router/NOTES.md +28 -0
  96. package/src/router/collector.ts +474 -0
  97. package/src/router/embeddings.ts +286 -0
  98. package/src/router/index.ts +299 -0
  99. package/src/router/intent-router.ts +225 -0
  100. package/src/router/nn-router.ts +205 -0
  101. package/src/router/profiles.ts +309 -0
  102. package/src/router/registry.ts +565 -0
  103. package/src/router/rules.ts +274 -0
  104. package/src/router/train.py +408 -0
  105. package/src/session/store.ts +211 -0
  106. package/src/test-utils/mock-llm.ts +39 -0
  107. package/src/types.ts +322 -0
  108. package/src/web/manager.ts +311 -0
@@ -0,0 +1,293 @@
1
+ /**
2
+ * Permission System — safety gate in front of every tool execution.
3
+ *
4
+ * Tiers:
5
+ * - auto-approve : execute immediately
6
+ * - confirm : ask the user once; may be escalated to session-approve
7
+ * - always-confirm : ask every time, cannot be auto-approved from config
8
+ *
9
+ * The backend calls `check()` to classify, then `requestPermission()` to
10
+ * emit a `permission_request` to the TUI and await a response. Responses
11
+ * come back through `handleResponse()` from the TUI's `permission_response`.
12
+ */
13
+
14
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
15
+ import { dirname, join } from 'node:path';
16
+ import { homedir } from 'node:os';
17
+ import { createHash } from 'node:crypto';
18
+
19
+ export type PermissionTier = 'auto-approve' | 'confirm' | 'always-confirm';
20
+ export type PermissionDecision = 'approved' | 'denied' | 'approved-session' | 'approved-turn';
21
+
22
+ export interface PermissionConfig {
23
+ defaultTier: PermissionTier;
24
+ tools: Record<string, PermissionTier>;
25
+ alwaysConfirmPatterns: string[];
26
+ sessionOverrides?: Record<string, PermissionTier>;
27
+ }
28
+
29
+ const DEFAULT_TOOL_TIERS: Record<string, PermissionTier> = {
30
+ read_file: 'auto-approve',
31
+ list_files: 'auto-approve',
32
+ search_code: 'auto-approve',
33
+ update_plan: 'auto-approve',
34
+ write_file: 'confirm',
35
+ edit_file: 'confirm',
36
+ run_command: 'confirm',
37
+ create_task: 'confirm',
38
+ update_memory: 'confirm',
39
+ git_status: 'auto-approve',
40
+ git_diff: 'auto-approve',
41
+ git_log: 'auto-approve',
42
+ git_commit: 'confirm',
43
+ git_branch: 'confirm',
44
+ git_create_pr: 'confirm',
45
+ spawn_agent: 'confirm',
46
+ web_search: 'auto-approve',
47
+ web_fetch: 'confirm',
48
+ };
49
+
50
+ const DEFAULT_ALWAYS_CONFIRM_PATTERNS: string[] = [
51
+ 'rm\\s+(-[rfR]+\\s+|--recursive)',
52
+ 'git\\s+push\\s+(-f|--force|--force-with-lease)',
53
+ 'git\\s+push\\s+.*\\b(main|master)\\b',
54
+ 'git\\s+reset\\s+--hard',
55
+ 'chmod\\s+(777|000)',
56
+ 'sudo(\\s|$)',
57
+ 'curl.*\\|\\s*(sh|bash)',
58
+ 'wget.*\\|\\s*(sh|bash)',
59
+ 'dd\\s+',
60
+ '>\\s*/dev/',
61
+ // Write/redirect to system dirs
62
+ '>\\s*(/etc|/usr|/bin|/sbin|/boot|/root|~)',
63
+ // Crypto/secret exfil vectors
64
+ '(ssh-keygen|openssl)\\s+.*\\bprivate\\b',
65
+ ];
66
+
67
+ /**
68
+ * Shell compound/chaining operators that let a caller append arbitrary
69
+ * follow-up commands. When `run_command` is classified as `auto-approve`
70
+ * and the command string contains any of these, we force an upgrade to
71
+ * `confirm` so a human sees the chain before it runs. This closes the
72
+ * "auto-approve `npm test` then `&& rm -rf ~`" gap.
73
+ *
74
+ * Detection is textual on purpose — anything short of a full shell AST
75
+ * parse has edge cases (e.g. `echo "a && b"` contains `&&` inside a
76
+ * quoted string). We accept the false-positive rate here: at worst the
77
+ * user sees a confirm dialog for a command that was actually safe, and
78
+ * can approve it. The alternative — shipping a production shell parser —
79
+ * is a much bigger maintenance surface.
80
+ */
81
+ const SHELL_CHAIN_OPERATORS: RegExp = /(&&|\|\||;|\||`|\$\(|>>|\bxargs\b|\beval\b)/;
82
+
83
+ /**
84
+ * Public predicate so wrappers that *force* a result to `auto-approve`
85
+ * (e.g. the `--auto-approve run_command` CLI flag) can re-apply the
86
+ * chain-operator gate themselves. Without this, a CLI allow-list would
87
+ * silently bypass `check()`'s upgrade to `confirm` because the wrapper
88
+ * overrides the resolved tier after `check()` returns.
89
+ */
90
+ export function hasShellChainOperator(command: string): boolean {
91
+ return SHELL_CHAIN_OPERATORS.test(normalizeCommand(command));
92
+ }
93
+
94
+ const DEFAULT_CONFIG: PermissionConfig = {
95
+ defaultTier: 'confirm',
96
+ tools: { ...DEFAULT_TOOL_TIERS },
97
+ alwaysConfirmPatterns: DEFAULT_ALWAYS_CONFIRM_PATTERNS,
98
+ };
99
+
100
+ // Permission dialogs wait indefinitely — the user responds when ready.
101
+ // No auto-deny timeout; the TUI keeps the dialog visible until dismissed.
102
+ const REQUEST_TIMEOUT_MS = 24 * 60 * 60 * 1000; // 24h (effectively forever)
103
+
104
+ interface Pending {
105
+ resolve: (d: PermissionDecision) => void;
106
+ timeout: NodeJS.Timeout;
107
+ }
108
+
109
+ function fingerprint(tool: string, args: Record<string, unknown>): string {
110
+ // Stable JSON by sorted keys
111
+ const keys = Object.keys(args).sort();
112
+ const normalized: Record<string, unknown> = {};
113
+ for (const k of keys) normalized[k] = args[k];
114
+ const s = tool + '::' + JSON.stringify(normalized);
115
+ return createHash('sha1').update(s).digest('hex').slice(0, 16);
116
+ }
117
+
118
+ export class PermissionManager {
119
+ private config: PermissionConfig;
120
+ private skip: boolean;
121
+ private patterns: RegExp[];
122
+ private pending = new Map<string, Pending>();
123
+ /** Session approvals: fingerprint -> approved */
124
+ private sessionApprovals = new Set<string>();
125
+ /** Auto-generated sequential id */
126
+ private nextId = 0;
127
+ /**
128
+ * Yolo-for-this-turn: approve every confirm-tier tool call until the
129
+ * backend declares the turn over via endTurn(). always-confirm tools
130
+ * (rm -rf, sudo, force-push to main, …) are NEVER bypassed.
131
+ */
132
+ private turnApproveAll = false;
133
+
134
+ constructor(configPath: string, skipPermissions = false, userConfigPath?: string) {
135
+ this.skip = skipPermissions;
136
+ // Load user-level permissions as the base, then merge any explicit
137
+ // project-level overrides on top. Projects that don't have a
138
+ // permissions.json get the user-level settings (auto-approve etc.)
139
+ // without any hardcoded defaults overriding them. `userConfigPath` is
140
+ // an injection point used by tests to keep the developer's actual
141
+ // ~/.kondi-chat/permissions.json from leaking into the test config.
142
+ const resolvedUserPath = userConfigPath ?? join(homedir(), '.kondi-chat', 'permissions.json');
143
+ const userConfig = loadConfig(resolvedUserPath);
144
+ const projectConfig = loadConfig(configPath);
145
+ // Use DEFAULT_CONFIG as the ultimate fallback if neither user nor project has settings.
146
+ this.config = {
147
+ defaultTier: projectConfig.defaultTier || userConfig.defaultTier || DEFAULT_CONFIG.defaultTier,
148
+ tools: { ...DEFAULT_TOOL_TIERS, ...userConfig.tools, ...projectConfig.tools },
149
+ alwaysConfirmPatterns: projectConfig.alwaysConfirmPatterns.length > 0
150
+ ? projectConfig.alwaysConfirmPatterns
151
+ : userConfig.alwaysConfirmPatterns.length > 0
152
+ ? userConfig.alwaysConfirmPatterns
153
+ : DEFAULT_ALWAYS_CONFIRM_PATTERNS,
154
+ sessionOverrides: projectConfig.sessionOverrides,
155
+ };
156
+ this.patterns = this.config.alwaysConfirmPatterns.map(p => {
157
+ try { return new RegExp(p); } catch { return null; }
158
+ }).filter((r): r is RegExp => r !== null);
159
+ if (skipPermissions) {
160
+ process.stderr.write('[permissions] --dangerously-skip-permissions active; all tools auto-approved\n');
161
+ }
162
+ }
163
+
164
+ /** Classify a tool call without prompting. */
165
+ check(tool: string, args: Record<string, unknown>): PermissionTier {
166
+ if (this.skip) return 'auto-approve';
167
+
168
+ // Start from session override → tool default → config default.
169
+ const sessionTier = this.config.sessionOverrides?.[tool];
170
+ let tier: PermissionTier = sessionTier
171
+ || this.config.tools[tool]
172
+ || this.config.defaultTier;
173
+
174
+ // run_command-specific safety rails:
175
+ // 1. always-confirm patterns (rm -rf, sudo, curl|sh, …) are bypass-
176
+ // proof — they always escalate to the strictest tier regardless
177
+ // of what the config or session override says.
178
+ // 2. shell compound/chain operators (&&, ||, ;, |, $(), backtick,
179
+ // xargs, eval) force-upgrade `auto-approve` → `confirm`. A human
180
+ // sees every chained command before it runs, but yolo-for-turn
181
+ // can still batch-approve them — they're "risky" not "forbidden".
182
+ if (tool === 'run_command') {
183
+ const cmd = normalizeCommand(String(args.command ?? ''));
184
+ for (const re of this.patterns) {
185
+ if (re.test(cmd)) return 'always-confirm';
186
+ }
187
+ if (tier === 'auto-approve' && SHELL_CHAIN_OPERATORS.test(cmd)) {
188
+ return 'confirm';
189
+ }
190
+ }
191
+
192
+ return tier;
193
+ }
194
+
195
+ /**
196
+ * Request permission: if tier is auto-approve or session-approved, resolve
197
+ * immediately; otherwise emit a permission_request and await a response.
198
+ */
199
+ async requestPermission(
200
+ tool: string,
201
+ args: Record<string, unknown>,
202
+ emit: (event: any) => void,
203
+ ): Promise<PermissionDecision> {
204
+ if (this.skip) return 'approved';
205
+ const tier = this.check(tool, args);
206
+ if (tier === 'auto-approve') return 'approved';
207
+
208
+ // Yolo-for-this-turn: user pressed 4, they mean approve EVERYTHING
209
+ // for the rest of this turn — including always-confirm tier. The flag
210
+ // resets automatically at endTurn().
211
+ if (this.turnApproveAll) return 'approved';
212
+
213
+ const fp = fingerprint(tool, args);
214
+ if (tier !== 'always-confirm' && this.sessionApprovals.has(fp)) return 'approved';
215
+
216
+ const id = `perm-${Date.now()}-${this.nextId++}`;
217
+ emit({
218
+ type: 'permission_request',
219
+ id,
220
+ tool,
221
+ args: JSON.stringify(args).slice(0, 2000),
222
+ summary: summarize(tool, args),
223
+ tier,
224
+ });
225
+
226
+ return new Promise<PermissionDecision>((resolve) => {
227
+ const timeout = setTimeout(() => {
228
+ this.pending.delete(id);
229
+ emit({ type: 'permission_timeout', id, tool });
230
+ resolve('denied');
231
+ }, REQUEST_TIMEOUT_MS);
232
+ this.pending.set(id, { resolve, timeout });
233
+ }).then(decision => {
234
+ if (decision === 'approved-session' && tier !== 'always-confirm') {
235
+ this.sessionApprovals.add(fp);
236
+ }
237
+ if (decision === 'approved-turn') {
238
+ this.turnApproveAll = true;
239
+ }
240
+ return decision;
241
+ });
242
+ }
243
+
244
+ /** Handle a response from the TUI. Duplicate/unknown ids are ignored. */
245
+ handleResponse(id: string, decision: PermissionDecision): void {
246
+ const p = this.pending.get(id);
247
+ if (!p) return;
248
+ clearTimeout(p.timeout);
249
+ this.pending.delete(id);
250
+ p.resolve(decision);
251
+ }
252
+
253
+ /** Backend calls this when the assistant turn completes — clears yolo. */
254
+ endTurn(): void {
255
+ this.turnApproveAll = false;
256
+ }
257
+ }
258
+
259
+ function normalizeCommand(cmd: string): string {
260
+ return cmd.trim().replace(/\s+/g, ' ');
261
+ }
262
+
263
+ function summarize(tool: string, args: Record<string, unknown>): string {
264
+ switch (tool) {
265
+ case 'run_command': return `Run shell command: ${String(args.command || '').slice(0, 200)}`;
266
+ case 'write_file': return `Write file: ${String(args.path || '')}`;
267
+ case 'edit_file': return `Edit file: ${String(args.path || '')}`;
268
+ case 'create_task': return `Dispatch task: ${String(args.description || '').slice(0, 160)}`;
269
+ case 'update_memory': return `Update ${String(args.scope || '')} memory (${String(args.operation || '')})`;
270
+ default: return `${tool}(${JSON.stringify(args).slice(0, 160)})`;
271
+ }
272
+ }
273
+
274
+ function loadConfig(configPath: string): PermissionConfig {
275
+ if (!existsSync(configPath)) {
276
+ // Don't write defaults to project level — let user-level handle it.
277
+ // Only return an empty config so the merge in the constructor picks
278
+ // up user-level settings without project-level overriding them.
279
+ return { defaultTier: '' as PermissionTier, tools: {}, alwaysConfirmPatterns: [], sessionOverrides: undefined };
280
+ }
281
+ try {
282
+ const raw = JSON.parse(readFileSync(configPath, 'utf-8'));
283
+ return {
284
+ defaultTier: raw.defaultTier || DEFAULT_CONFIG.defaultTier,
285
+ tools: { ...DEFAULT_TOOL_TIERS, ...(raw.tools || {}) },
286
+ alwaysConfirmPatterns: raw.alwaysConfirmPatterns || DEFAULT_ALWAYS_CONFIRM_PATTERNS,
287
+ sessionOverrides: raw.sessionOverrides,
288
+ };
289
+ } catch (e) {
290
+ process.stderr.write(`[permissions] Failed to parse ${configPath}: ${(e as Error).message}; using defaults\n`);
291
+ return { ...DEFAULT_CONFIG };
292
+ }
293
+ }
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Pipeline — the Discuss → Commit → Dispatch → Execute → Verify → Reflect loop.
3
+ *
4
+ * Orchestrates the flow between conversation model, worker model,
5
+ * and local verification tools. All calls are recorded in the audit ledger.
6
+ */
7
+
8
+ import { join } from 'node:path';
9
+ import type {
10
+ Session, SessionState, TaskCard, RepoMap,
11
+ LLMResponse, VerificationResult, ProviderId,
12
+ } from '../types.ts';
13
+ import { callLLM } from '../providers/llm-caller.ts';
14
+ import { createTaskCard, executeTaskCard, readRelevantFiles } from './task-card.ts';
15
+ import { parseFileReplacements, applyChanges, formatApplyResult, type ApplyResult } from './apply.ts';
16
+ import { verify } from './verify.ts';
17
+ import { Ledger } from '../audit/ledger.ts';
18
+ import type { Router as UnifiedRouter } from '../router/index.ts';
19
+ import type { RoutingCollector } from '../router/collector.ts';
20
+ import { PipelineError } from './errors.ts';
21
+ import { TaskStore } from './task-store.ts';
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Pipeline configuration
25
+ // ---------------------------------------------------------------------------
26
+
27
+ export interface PipelineConfig {
28
+ /** Fallback provider (used when no router is available) */
29
+ provider: ProviderId;
30
+ model?: string;
31
+ /** Unified router for model selection */
32
+ router?: UnifiedRouter;
33
+ /** Training data collector */
34
+ collector?: RoutingCollector;
35
+ /** Max failures before retrying with enhanced prompt */
36
+ promotionThreshold: number;
37
+ /** Working directory */
38
+ workingDir: string;
39
+ /** Run verification after execution? */
40
+ autoVerify: boolean;
41
+ /** Task store for persisting task cards across sessions. */
42
+ taskStore?: TaskStore;
43
+ /**
44
+ * Optional event sink — if provided, the pipeline streams an
45
+ * `activity` event per phase as it runs. Threaded in from
46
+ * `ToolContext.emit` by `toolCreateTask` so the TUI can show
47
+ * "pipeline: dispatch → claude-sonnet …" / "pipeline: execute → gemini
48
+ * …" / "pipeline: verify → PASSED" in real time instead of blocking
49
+ * on a single opaque `create_task` tool call. Leaving this undefined
50
+ * preserves the silent behavior for any caller that wants it.
51
+ */
52
+ emit?: (event: Record<string, unknown>) => void;
53
+ }
54
+
55
+ // ---------------------------------------------------------------------------
56
+ // Pipeline result
57
+ // ---------------------------------------------------------------------------
58
+
59
+ export interface PipelineResult {
60
+ /** The task card that was created and executed */
61
+ task: TaskCard;
62
+ /** Worker model output */
63
+ executionOutput: string;
64
+ /** Files written to disk */
65
+ applied?: ApplyResult;
66
+ /** Verification results (if autoVerify) */
67
+ verification?: VerificationResult;
68
+ /** Frontier model reflection on results */
69
+ reflection: string;
70
+ /** Was the task promoted to frontier after cheap failures? */
71
+ promoted: boolean;
72
+ }
73
+
74
+ // ---------------------------------------------------------------------------
75
+ // Pipeline execution
76
+ // ---------------------------------------------------------------------------
77
+
78
+ /**
79
+ * Run the full pipeline for a user request that requires code execution.
80
+ *
81
+ * 1. Dispatch — create task card from user intent + session state
82
+ * 2. Execute — send task card to worker model
83
+ * 3. Verify — run local tests/lint/typecheck
84
+ * 4. Reflect — frontier summarizes what happened
85
+ *
86
+ * Returns the result for display in the conversation.
87
+ */
88
+ export async function runPipeline(
89
+ userIntent: string,
90
+ session: Session,
91
+ ledger: Ledger,
92
+ config: PipelineConfig,
93
+ ): Promise<PipelineResult> {
94
+
95
+ /** Track what happened in each pipeline phase so the router's intent
96
+ * classifier can make informed per-step decisions. */
97
+ const priorPhases: Array<{ phase: string; model: string; summary?: string; succeeded?: boolean }> = [];
98
+
99
+ /** Resolve provider/model from router or fallback, passing accumulated
100
+ * phase context so the intent classifier sees the full picture. */
101
+ const route = async (
102
+ phase: import('../types.ts').LedgerPhase,
103
+ promptText: string,
104
+ taskKind?: string,
105
+ failures = 0,
106
+ ) => {
107
+ if (config.router) {
108
+ const decision = await config.router.select(
109
+ phase,
110
+ promptText,
111
+ taskKind,
112
+ failures,
113
+ config.promotionThreshold,
114
+ { priorPhases: [...priorPhases], currentGoal: userIntent },
115
+ );
116
+ return { provider: decision.model.provider, model: decision.model.id, decision };
117
+ }
118
+ return { provider: config.provider, model: config.model, decision: undefined as any };
119
+ };
120
+
121
+ const emit = config.emit;
122
+ emit?.({ type: 'activity', text: `pipeline: starting — "${userIntent.slice(0, 80)}"`, activity_type: 'step' });
123
+
124
+ // -----------------------------------------------------------------------
125
+ // Step 1: Dispatch — create task card
126
+ // -----------------------------------------------------------------------
127
+ const dispatchRoute = await route('dispatch', userIntent);
128
+ emit?.({
129
+ type: 'activity',
130
+ text: `pipeline: dispatch → ${dispatchRoute.model || '(fallback)'} (${dispatchRoute.decision?.reason || 'fallback'})`,
131
+ activity_type: 'step',
132
+ });
133
+ let card, dispatchResponse;
134
+ try {
135
+ ({ card, response: dispatchResponse } = await createTaskCard(
136
+ userIntent,
137
+ session.state,
138
+ session.repoMap,
139
+ dispatchRoute.provider,
140
+ dispatchRoute.model,
141
+ ledger,
142
+ ));
143
+ } catch (e) {
144
+ throw new PipelineError(
145
+ `dispatch failed: ${e instanceof Error ? e.message : String(e)}`,
146
+ { severity: 'fatal', stage: 'dispatch', cause: e },
147
+ );
148
+ }
149
+ // process.stderr.write(` │ │ model: ${dispatchResponse.model} ${dispatchResponse.inputTokens}in/${dispatchResponse.outputTokens}out\n`);
150
+ // process.stderr.write(` │ ╰─ task ${card.id} (${card.kind}): ${card.goal.slice(0, 60)}\n`);
151
+
152
+ // Record routing outcome
153
+ config.collector?.record({
154
+ timestamp: new Date().toISOString(),
155
+ phase: 'dispatch', taskKind: card.kind, promptLength: userIntent.length,
156
+ contextTokens: dispatchResponse.inputTokens, failures: 0, promoted: false,
157
+ modelId: dispatchResponse.model, provider: dispatchRoute.provider,
158
+ succeeded: true, inputTokens: dispatchResponse.inputTokens,
159
+ outputTokens: dispatchResponse.outputTokens,
160
+ costUsd: 0, latencyMs: dispatchResponse.latencyMs,
161
+ routeReason: dispatchRoute.decision?.reason || 'fallback',
162
+ routingTier: dispatchRoute.decision?.tier,
163
+ });
164
+
165
+ priorPhases.push({
166
+ phase: 'dispatch',
167
+ model: dispatchResponse.model,
168
+ summary: `task ${card.id} (${card.kind}): ${card.goal.slice(0, 80)}`,
169
+ succeeded: true,
170
+ });
171
+
172
+ card.status = 'executing';
173
+ session.tasks.push(card);
174
+ session.state.activeTaskId = card.id;
175
+ config.taskStore?.setCurrent(card);
176
+
177
+ // -----------------------------------------------------------------------
178
+ // Step 2: Execute — router picks the worker model
179
+ // -----------------------------------------------------------------------
180
+ const fileContents = config.workingDir
181
+ ? readRelevantFiles(config.workingDir, card.relevantFiles)
182
+ : '';
183
+
184
+ const execRoute = await route('execute', card.goal, card.kind, card.failures);
185
+ emit?.({
186
+ type: 'activity',
187
+ text: `pipeline: execute → ${execRoute.model || '(fallback)'} (${execRoute.decision?.reason || 'fallback'})`,
188
+ activity_type: 'step',
189
+ });
190
+ let executionResponse;
191
+ try {
192
+ executionResponse = await executeTaskCard(
193
+ card,
194
+ session.repoMap,
195
+ fileContents,
196
+ execRoute.provider,
197
+ execRoute.model,
198
+ ledger,
199
+ );
200
+ } catch (e) {
201
+ throw new PipelineError(
202
+ `execute failed: ${e instanceof Error ? e.message : String(e)}`,
203
+ { severity: 'recoverable', stage: 'execute', cause: e },
204
+ );
205
+ }
206
+ // process.stderr.write(` │ │ model: ${executionResponse.model} ${executionResponse.inputTokens}in/${executionResponse.outputTokens}out\n`);
207
+ // process.stderr.write(` │ ╰─ done\n`);
208
+
209
+ // -----------------------------------------------------------------------
210
+ // Step 2.5: Apply — write model output to disk
211
+ // -----------------------------------------------------------------------
212
+ priorPhases.push({
213
+ phase: 'execute',
214
+ model: executionResponse.model,
215
+ summary: `wrote ${executionResponse.outputTokens} output tokens`,
216
+ succeeded: true,
217
+ });
218
+
219
+ let applyResult: ApplyResult | undefined;
220
+ if (config.workingDir && card.outputMode !== 'text') {
221
+ const changes = parseFileReplacements(executionResponse.content);
222
+ if (changes.length > 0) {
223
+ const backupDir = join(config.workingDir, '.kondi-chat', 'backups', card.id);
224
+ applyResult = applyChanges(config.workingDir, changes, backupDir);
225
+ emit?.({
226
+ type: 'activity',
227
+ text: `pipeline: apply → ${applyResult.applied.length} file(s) written${applyResult.skipped.length > 0 ? `, ${applyResult.skipped.length} skipped` : ''}`,
228
+ activity_type: 'step',
229
+ });
230
+ }
231
+ }
232
+
233
+ // -----------------------------------------------------------------------
234
+ // Step 3: Verify — run local tools
235
+ // -----------------------------------------------------------------------
236
+ let verification: VerificationResult | undefined;
237
+
238
+ if (config.autoVerify && config.workingDir) {
239
+ card.status = 'verifying';
240
+ emit?.({ type: 'activity', text: 'pipeline: verify → running tests/typecheck/lint', activity_type: 'step' });
241
+ verification = verify(config.workingDir, session.repoMap);
242
+ emit?.({
243
+ type: 'activity',
244
+ text: `pipeline: verify → ${verification.passed ? 'PASSED' : 'FAILED'}`,
245
+ activity_type: 'step',
246
+ });
247
+ priorPhases.push({
248
+ phase: 'verify',
249
+ model: 'local',
250
+ summary: verification.passed ? 'tests passed' : 'tests FAILED',
251
+ succeeded: verification.passed,
252
+ });
253
+
254
+ const verifyOutput = [
255
+ verification.testOutput ? `Tests: ${verification.passed ? 'PASS' : 'FAIL'}\n${verification.testOutput}` : '',
256
+ verification.typecheckOutput ? `Typecheck: ${verification.typecheckOutput}` : '',
257
+ verification.lintOutput ? `Lint: ${verification.lintOutput}` : '',
258
+ ].filter(Boolean).join('\n\n');
259
+
260
+ // process.stderr.write(` │ ╰─ ${verification.passed ? 'PASSED' : 'FAILED'}\n`);
261
+ ledger.recordVerification(card.id, verification.passed, verifyOutput);
262
+
263
+ // Retry on failure — enrich prompt with error context so router can escalate
264
+ if (!verification.passed && card.failures < config.promotionThreshold) {
265
+ card.failures++;
266
+ session.state.recentFailures.push(
267
+ `Task ${card.id} failed (attempt ${card.failures}): ${verifyOutput.slice(0, 200)}`
268
+ );
269
+
270
+ // pipeline: retry (attempt N/M) — suppressed for TUI
271
+
272
+ // Retry — router may promote to a better model based on failure count
273
+ const retryRoute = await route('execute', card.goal, card.kind, card.failures);
274
+ const retryCard = { ...card, constraints: [...card.constraints, `Previous attempt failed with: ${verifyOutput.slice(0, 500)}`] };
275
+ // process.stderr.write(` │ │ ${retryRoute.decision?.promoted ? 'PROMOTED' : 'retrying'}${retryRoute.decision ? ` [${retryRoute.decision.reason}]` : ''}\n`);
276
+ executionResponse = await executeTaskCard(
277
+ retryCard,
278
+ session.repoMap,
279
+ fileContents,
280
+ retryRoute.provider,
281
+ retryRoute.model,
282
+ ledger,
283
+ );
284
+ // process.stderr.write(` │ │ model: ${executionResponse.model} ${executionResponse.inputTokens}in/${executionResponse.outputTokens}out\n`);
285
+ // process.stderr.write(` │ ╰─ retry done\n`);
286
+
287
+ // Re-verify
288
+ // process.stderr.write(` │ ╭─ verify (local)\n`);
289
+ verification = verify(config.workingDir, session.repoMap);
290
+ const retryVerifyOutput = [
291
+ verification.testOutput ? `Tests: ${verification.passed ? 'PASS' : 'FAIL'}\n${verification.testOutput}` : '',
292
+ verification.typecheckOutput ? `Typecheck: ${verification.typecheckOutput}` : '',
293
+ ].filter(Boolean).join('\n\n');
294
+ // process.stderr.write(` │ ╰─ ${verification.passed ? 'PASSED' : 'FAILED'}\n`);
295
+ ledger.recordVerification(card.id, verification.passed, retryVerifyOutput);
296
+ }
297
+ }
298
+
299
+ const promoted = card.failures >= config.promotionThreshold;
300
+ card.status = verification?.passed ? 'passed' : (promoted ? 'promoted' : 'failed');
301
+ card.completedAt = new Date().toISOString();
302
+
303
+ // Record execution outcome for router training
304
+ config.collector?.record({
305
+ timestamp: new Date().toISOString(),
306
+ phase: 'execute', taskKind: card.kind, promptLength: card.goal.length,
307
+ contextTokens: executionResponse.inputTokens, failures: card.failures, promoted,
308
+ modelId: executionResponse.model, provider: executionResponse.provider,
309
+ succeeded: verification?.passed ?? true,
310
+ verificationPassed: verification?.passed,
311
+ inputTokens: executionResponse.inputTokens,
312
+ outputTokens: executionResponse.outputTokens,
313
+ costUsd: 0, latencyMs: executionResponse.latencyMs,
314
+ routeReason: execRoute.decision?.reason || 'fallback',
315
+ routingTier: execRoute.decision?.tier,
316
+ });
317
+
318
+ // -----------------------------------------------------------------------
319
+ // Step 4: Reflect — frontier summarizes what happened
320
+ // -----------------------------------------------------------------------
321
+ const reflectRoute = await route('reflect', card.goal);
322
+ emit?.({
323
+ type: 'activity',
324
+ text: `pipeline: reflect → ${reflectRoute.model || '(fallback)'} (${reflectRoute.decision?.reason || 'fallback'})`,
325
+ activity_type: 'step',
326
+ });
327
+ let reflectionResponse: LLMResponse;
328
+ try {
329
+ reflectionResponse = await callLLM({
330
+ provider: reflectRoute.provider,
331
+ model: reflectRoute.model,
332
+ systemPrompt: 'You are summarizing the results of a coding task for the user. Be concise. Report what was done, whether it passed verification, and what to do next.',
333
+ userMessage: `Task: ${card.goal}
334
+ Kind: ${card.kind}
335
+ Status: ${card.status}
336
+
337
+ Worker output (summary):
338
+ ${executionResponse.content.slice(0, 3000)}
339
+
340
+ ${verification ? `Verification: ${verification.passed ? 'PASSED' : 'FAILED'}
341
+ ${verification.testOutput ? `Test output: ${verification.testOutput.slice(0, 500)}` : ''}
342
+ ${verification.typecheckOutput ? `Typecheck: ${verification.typecheckOutput.slice(0, 500)}` : ''}` : 'Verification: skipped'}
343
+
344
+ Summarize the results for the user. If failed, suggest what to try next.`,
345
+ maxOutputTokens: 1500,
346
+ });
347
+ } catch (e) {
348
+ // Reflection is non-essential — we already executed and verified. If
349
+ // the reflection call fails, degrade gracefully with a synthetic
350
+ // summary instead of nuking the whole pipeline result.
351
+ reflectionResponse = {
352
+ content: `(reflection failed: ${e instanceof Error ? e.message : String(e)})`,
353
+ model: reflectRoute.model || 'unknown',
354
+ provider: reflectRoute.provider,
355
+ inputTokens: 0, outputTokens: 0, latencyMs: 0,
356
+ };
357
+ }
358
+
359
+ ledger.record('reflect', reflectionResponse, `Reflect on task ${card.id}`, { taskId: card.id });
360
+
361
+ // Clean up state + persist completed task to history
362
+ session.state.activeTaskId = undefined;
363
+ if (card.status === 'passed') {
364
+ session.state.recentFailures = session.state.recentFailures.filter(f => !f.includes(card.id));
365
+ }
366
+ config.taskStore?.complete();
367
+
368
+ return {
369
+ task: card,
370
+ executionOutput: executionResponse.content,
371
+ applied: applyResult,
372
+ verification,
373
+ reflection: reflectionResponse.content,
374
+ promoted,
375
+ };
376
+ }