@thispointon/kondi-chat 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +556 -0
  3. package/bin/kondi-chat +56 -0
  4. package/bin/kondi-chat.js +72 -0
  5. package/package.json +55 -0
  6. package/scripts/demo.tape +49 -0
  7. package/scripts/postinstall.cjs +103 -0
  8. package/src/audit/analytics.ts +261 -0
  9. package/src/audit/ledger.ts +253 -0
  10. package/src/audit/telemetry.ts +165 -0
  11. package/src/cli/backend.ts +675 -0
  12. package/src/cli/commands.ts +419 -0
  13. package/src/cli/help.ts +182 -0
  14. package/src/cli/submit-helpers.ts +159 -0
  15. package/src/cli/submit.ts +539 -0
  16. package/src/cli/wizard.ts +121 -0
  17. package/src/context/bootstrap.ts +138 -0
  18. package/src/context/budget.ts +100 -0
  19. package/src/context/manager.ts +666 -0
  20. package/src/context/memory.ts +160 -0
  21. package/src/context/preflight.ts +176 -0
  22. package/src/context/project-brain.ts +101 -0
  23. package/src/context/receipts.ts +108 -0
  24. package/src/context/skills.ts +154 -0
  25. package/src/context/symbol-index.ts +240 -0
  26. package/src/council/profiles.ts +137 -0
  27. package/src/council/tool.ts +138 -0
  28. package/src/council-engine/cli/council-artifacts.ts +230 -0
  29. package/src/council-engine/cli/council-config.ts +178 -0
  30. package/src/council-engine/cli/council-session-export.ts +116 -0
  31. package/src/council-engine/cli/kondi.ts +98 -0
  32. package/src/council-engine/cli/llm-caller.ts +229 -0
  33. package/src/council-engine/cli/localStorage-shim.ts +119 -0
  34. package/src/council-engine/cli/node-platform.ts +68 -0
  35. package/src/council-engine/cli/run-council.ts +481 -0
  36. package/src/council-engine/cli/run-pipeline.ts +772 -0
  37. package/src/council-engine/cli/session-export.ts +153 -0
  38. package/src/council-engine/configs/councils/analysis.json +101 -0
  39. package/src/council-engine/configs/councils/code-planning.json +86 -0
  40. package/src/council-engine/configs/councils/coding.json +89 -0
  41. package/src/council-engine/configs/councils/debate.json +97 -0
  42. package/src/council-engine/configs/councils/solo-claude.json +34 -0
  43. package/src/council-engine/configs/councils/solo-gpt.json +34 -0
  44. package/src/council-engine/council/coding-orchestrator.ts +1205 -0
  45. package/src/council-engine/council/context-bootstrap.ts +147 -0
  46. package/src/council-engine/council/context-inspection.ts +42 -0
  47. package/src/council-engine/council/context-store.ts +763 -0
  48. package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
  49. package/src/council-engine/council/factory.ts +164 -0
  50. package/src/council-engine/council/index.ts +201 -0
  51. package/src/council-engine/council/ledger-store.ts +438 -0
  52. package/src/council-engine/council/prompts.ts +1689 -0
  53. package/src/council-engine/council/storage-cleanup.ts +164 -0
  54. package/src/council-engine/council/store.ts +1110 -0
  55. package/src/council-engine/council/synthesis.ts +291 -0
  56. package/src/council-engine/council/types.ts +845 -0
  57. package/src/council-engine/council/validation.ts +613 -0
  58. package/src/council-engine/pipeline/build-detect.ts +73 -0
  59. package/src/council-engine/pipeline/executor.ts +1048 -0
  60. package/src/council-engine/pipeline/index.ts +9 -0
  61. package/src/council-engine/pipeline/install-detect.ts +84 -0
  62. package/src/council-engine/pipeline/memory-store.ts +182 -0
  63. package/src/council-engine/pipeline/output-parsers.ts +146 -0
  64. package/src/council-engine/pipeline/run-output.ts +149 -0
  65. package/src/council-engine/pipeline/session-import.ts +177 -0
  66. package/src/council-engine/pipeline/store.ts +753 -0
  67. package/src/council-engine/pipeline/test-detect.ts +82 -0
  68. package/src/council-engine/pipeline/types.ts +401 -0
  69. package/src/council-engine/services/deliberationSummary.ts +114 -0
  70. package/src/council-engine/tsconfig.json +16 -0
  71. package/src/council-engine/types/mcp.ts +122 -0
  72. package/src/council-engine/utils/filterTools.ts +73 -0
  73. package/src/engine/apply.ts +238 -0
  74. package/src/engine/checkpoints.ts +237 -0
  75. package/src/engine/consultants.ts +347 -0
  76. package/src/engine/diff.ts +171 -0
  77. package/src/engine/errors.ts +102 -0
  78. package/src/engine/git-tools.ts +246 -0
  79. package/src/engine/hooks.ts +181 -0
  80. package/src/engine/loop-guard.ts +155 -0
  81. package/src/engine/permissions.ts +293 -0
  82. package/src/engine/pipeline.ts +376 -0
  83. package/src/engine/sub-agents.ts +133 -0
  84. package/src/engine/task-card.ts +185 -0
  85. package/src/engine/task-router.ts +256 -0
  86. package/src/engine/task-store.ts +86 -0
  87. package/src/engine/tools.ts +783 -0
  88. package/src/engine/verify.ts +111 -0
  89. package/src/mcp/client.ts +225 -0
  90. package/src/mcp/config.ts +120 -0
  91. package/src/mcp/tool-manager.ts +192 -0
  92. package/src/mcp/types.ts +61 -0
  93. package/src/providers/llm-caller.ts +943 -0
  94. package/src/providers/rate-limiter.ts +238 -0
  95. package/src/router/NOTES.md +28 -0
  96. package/src/router/collector.ts +474 -0
  97. package/src/router/embeddings.ts +286 -0
  98. package/src/router/index.ts +299 -0
  99. package/src/router/intent-router.ts +225 -0
  100. package/src/router/nn-router.ts +205 -0
  101. package/src/router/profiles.ts +309 -0
  102. package/src/router/registry.ts +565 -0
  103. package/src/router/rules.ts +274 -0
  104. package/src/router/train.py +408 -0
  105. package/src/session/store.ts +211 -0
  106. package/src/test-utils/mock-llm.ts +39 -0
  107. package/src/types.ts +322 -0
  108. package/src/web/manager.ts +311 -0
@@ -0,0 +1,539 @@
1
+ /**
2
+ * Agent submit handler — runs a user message through the loop.
3
+ *
4
+ * Extracted from backend.ts to shrink the god-object. The function has
5
+ * one closure-captured dependency (`emit` for pushing TUI events) which
6
+ * is now a named field on `SubmitDeps`. Everything else is passed in
7
+ * explicitly so this module can be tested without starting the full
8
+ * backend.
9
+ *
10
+ * Two entry paths:
11
+ * 1. @mention prefix ("@gpt write X") → single-shot call to a pinned
12
+ * model, no agent loop.
13
+ * 2. Everything else → full agent loop with tool calls, compaction,
14
+ * checkpoints, loop-guard-enforced caps, and optional autonomous
15
+ * continuation when `opts.loop` is true (the /loop command).
16
+ */
17
+
18
+ import { join } from 'node:path';
19
+ import type { Session, LLMMessage, ProviderId, ToolCall } from '../types.ts';
20
+ import type { ContextManager } from '../context/manager.ts';
21
+ import type { Ledger } from '../audit/ledger.ts';
22
+ import { estimateCost } from '../audit/ledger.ts';
23
+ import type { Router as UnifiedRouter } from '../router/index.ts';
24
+ import type { RoutingCollector } from '../router/collector.ts';
25
+ import type { ToolContext } from '../engine/tools.ts';
26
+ import type { ToolManager } from '../mcp/tool-manager.ts';
27
+ import type { ProfileManager } from '../router/profiles.ts';
28
+ import type { CheckpointManager } from '../engine/checkpoints.ts';
29
+ import { callLLM } from '../providers/llm-caller.ts';
30
+ import { LoopGuard } from '../engine/loop-guard.ts';
31
+ import { isMutatingToolCall, predictedMutations } from '../engine/checkpoints.ts';
32
+ import { detectCommands } from '../engine/verify.ts';
33
+ import { compactInLoop, classifyPhase } from './submit-helpers.ts';
34
+ import { classifyTaskLocal, frameProblem, type TaskClassification } from '../engine/task-router.ts';
35
+ import { ReceiptStore, buildReceipt } from '../context/receipts.ts';
36
+ import { assembleBrainContext } from '../context/project-brain.ts';
37
+
38
+ export interface SubmitDeps {
39
+ session: Session;
40
+ contextManager: ContextManager;
41
+ ledger: Ledger;
42
+ router: UnifiedRouter;
43
+ collector: RoutingCollector;
44
+ toolCtx: ToolContext;
45
+ toolManager: ToolManager;
46
+ profiles: ProfileManager;
47
+ checkpointManager: CheckpointManager;
48
+ /** Push a live event back to the TUI. */
49
+ emit: (event: Record<string, unknown>) => void;
50
+ }
51
+
52
+ export interface SubmitOptions {
53
+ /** Autonomous-loop mode: keep iterating after "no tool calls" responses until DONE/STUCK or caps hit. */
54
+ loop?: boolean;
55
+ /** Goal text shown to the model during /loop continuation prompts. */
56
+ loopGoal?: string;
57
+ }
58
+
59
+ /** Short, human-readable tool-arg summary shown in TUI tool-call previews. */
60
+ function formatToolArgs(tc: ToolCall): string {
61
+ const args = tc.arguments as Record<string, unknown>;
62
+ switch (tc.name) {
63
+ case 'read_file': return String(args.path || '');
64
+ case 'list_files': return String(args.path || '.');
65
+ case 'search_code': return `"${args.pattern}"`;
66
+ case 'run_command': return String(args.command || '').slice(0, 60);
67
+ case 'create_task': return String(args.description || '').slice(0, 60);
68
+ case 'update_plan': return args.goal ? `goal="${String(args.goal).slice(0, 40)}"` : '...';
69
+ default: return JSON.stringify(args).slice(0, 60);
70
+ }
71
+ }
72
+
73
+ export async function handleSubmit(
74
+ input: string,
75
+ deps: SubmitDeps,
76
+ opts?: SubmitOptions,
77
+ ): Promise<void> {
78
+ const {
79
+ session, contextManager, ledger, router, toolCtx, toolManager,
80
+ profiles, checkpointManager, emit,
81
+ } = deps;
82
+
83
+ const turnNumber = session.messages.filter(m => m.role === 'user').length + 1;
84
+ let checkpointCreated = false;
85
+ // Spec 08 — profile-driven bounds replace the old MAX_TOOL_ITERATIONS=20.
86
+ const loopGuard = new LoopGuard(profiles.getActive());
87
+ toolCtx.loopGuard = loopGuard;
88
+
89
+ // ── @mention path ───────────────────────────────────────────────────
90
+ const mentionMatch = input.match(/^@(\S+)\s+([\s\S]+)/);
91
+ if (mentionMatch) {
92
+ const alias = mentionMatch[1];
93
+ const message = mentionMatch[2];
94
+ const targetModel = router.registry.getByAlias(alias);
95
+ if (!targetModel) {
96
+ const candidates = router.registry.findAliasCandidates(alias);
97
+ const hint = candidates.length > 1
98
+ ? ` — ambiguous, could be: ${candidates.map(a => `@${a}`).join(', ')}`
99
+ : candidates.length === 0
100
+ ? ` — available: ${router.registry.getAliases().map(a => `@${a}`).join(', ')}`
101
+ : '';
102
+ emit({ type: 'error', message: `Unknown model: @${alias}${hint}` });
103
+ return;
104
+ }
105
+
106
+ contextManager.addUserMessage(input);
107
+ const { systemPrompt, userMessage, cacheablePrefix } = contextManager.assemblePrompt();
108
+ const msgId = `msg-${Date.now()}`;
109
+ emit({ type: 'message', id: msgId, role: 'assistant', content: '', model_label: targetModel.alias || targetModel.name });
110
+ emit({ type: 'status', text: `@${alias} ...` });
111
+
112
+ let streamedContent = '';
113
+ const response = await callLLM({
114
+ provider: targetModel.provider,
115
+ model: targetModel.id,
116
+ systemPrompt, userMessage,
117
+ maxOutputTokens: 8192, cacheablePrefix,
118
+ stream: true,
119
+ onToken: (token: string) => {
120
+ streamedContent += token;
121
+ emit({ type: 'message_update', id: msgId, content: streamedContent });
122
+ },
123
+ });
124
+
125
+ const cost = estimateCost(response.model, response.inputTokens, response.outputTokens);
126
+ contextManager.addAssistantMessage(response);
127
+ ledger.record('discuss', response, message.slice(0, 200));
128
+
129
+ emit({
130
+ type: 'message', id: msgId, role: 'assistant',
131
+ content: response.content,
132
+ model_label: targetModel.alias || targetModel.name,
133
+ reasoning_content: response.reasoningContent,
134
+ });
135
+ emit({
136
+ type: 'message_update', id: msgId, stats: {
137
+ input_tokens: response.inputTokens, output_tokens: response.outputTokens,
138
+ cost_usd: cost, models: [response.model], provider: targetModel.provider,
139
+ route_reason: `@${targetModel.alias}`, iterations: 1,
140
+ },
141
+ });
142
+ return;
143
+ }
144
+
145
+ // ── Task classification — fast local heuristic, no LLM call ────────
146
+ //
147
+ // The local classifier handles 95% of inputs instantly (regex + word
148
+ // count). Only genuinely ambiguous multi-sentence requests with broad
149
+ // verbs like "redesign" or "overhaul" fall through to the LLM classifier.
150
+ // This eliminates 2-5 seconds of latency on every single message.
151
+ const recentMessages = session.messages.slice(-4).map(m => `${m.role}: ${(m.content || '').slice(0, 200)}`).join('\n');
152
+ const taskClass = classifyTaskLocal(input, recentMessages);
153
+
154
+ // Handle frame_then_execute — frame the problem, show the frame, then
155
+ // run the agent loop against the framed goal instead of the raw input.
156
+ // Only triggered for broad multi-sentence requests with words like
157
+ // "redesign", "overhaul", etc. — very rare in practice.
158
+ let effectiveInput = input;
159
+ if (taskClass.mode === 'frame_then_execute') {
160
+ emit({ type: 'activity', text: `task-router: framing problem (${taskClass.reason})`, activity_type: 'step' });
161
+ try {
162
+ // Resolve cheapest model only when framing is actually needed.
163
+ const classifier = router.getClassifier();
164
+ let cheapProvider: ProviderId = classifier?.provider || 'anthropic';
165
+ let cheapModel: string | undefined = classifier?.model;
166
+ if (!classifier) {
167
+ const pinning = profiles.getActive().rolePinning;
168
+ if (pinning) {
169
+ const pinIds = new Set(Object.values(pinning));
170
+ const candidates = router.registry.getAvailable().filter(m => pinIds.has(m.id));
171
+ candidates.sort((a, b) => a.inputCostPer1M - b.inputCostPer1M);
172
+ cheapProvider = candidates[0]?.provider || 'anthropic';
173
+ cheapModel = candidates[0]?.id;
174
+ }
175
+ }
176
+ const frame = await frameProblem(input, '', cheapProvider, cheapModel);
177
+ emit({ type: 'activity', text: `frame: ${frame.interpretedGoal}`, activity_type: 'step' });
178
+ if (frame.successCriteria.length > 0) {
179
+ emit({ type: 'activity', text: `success: ${frame.successCriteria.join('; ')}`, activity_type: 'step' });
180
+ }
181
+ if (frame.proposedPlan.length > 0) {
182
+ emit({ type: 'activity', text: `plan: ${frame.proposedPlan.join(' → ')}`, activity_type: 'step' });
183
+ }
184
+ // Use the framed goal as the effective input for the agent loop.
185
+ effectiveInput = `${frame.interpretedGoal}\n\nSuccess criteria: ${frame.successCriteria.join('; ')}\n\nPlan: ${frame.proposedPlan.join('; ')}\n\nOriginal request: ${input}`;
186
+ } catch {
187
+ // If framing fails, proceed with the original input.
188
+ emit({ type: 'activity', text: 'task-router: framing failed, proceeding with original request', activity_type: 'step' });
189
+ }
190
+ } else {
191
+ emit({ type: 'activity', text: `task-router: ${taskClass.mode} (${taskClass.reason})`, activity_type: 'step' });
192
+ }
193
+
194
+ const workingDir = session.workingDirectory || process.cwd();
195
+ const storageDir = join(workingDir, '.kondi-chat');
196
+ const receipts = new ReceiptStore(storageDir, session.id);
197
+
198
+ // ── Project Brain: assemble all context (memory, receipts, skills, preflight) ──
199
+ // Skip preflight for short messages and follow-ups — the model has tools
200
+ // to read files when it needs them. Preflight only helps on substantive
201
+ // first-turn requests where it can save 2-3 tool calls.
202
+ const isFollowUp = turnNumber > 1 || effectiveInput.split(/\s+/).length < 10;
203
+ const brain = assembleBrainContext(workingDir, session, effectiveInput, { skipPreflight: isFollowUp });
204
+ if (brain.preflightFiles.length > 0) {
205
+ emit({ type: 'activity', text: `preflight: loaded ${brain.preflightFiles.join(', ')}`, activity_type: 'step' });
206
+ }
207
+ if (brain.skillsUsed.length > 0) {
208
+ emit({ type: 'activity', text: `skills: ${brain.skillsUsed.join(', ')}`, activity_type: 'step' });
209
+ }
210
+
211
+ // ── Regular agent loop ──────────────────────────────────────────────
212
+ contextManager.addUserMessage(effectiveInput);
213
+ const { systemPrompt: rawSystemPrompt, userMessage, cacheablePrefix } = contextManager.assemblePrompt();
214
+ // Inject brain context (memory + receipts + skills + preflight files).
215
+ const systemPrompt = brain.fullContext
216
+ ? `${rawSystemPrompt}\n\n${brain.fullContext}`
217
+ : rawSystemPrompt;
218
+ const messages: LLMMessage[] = [{ role: 'user', content: userMessage }];
219
+
220
+ // Resolve the post-edit typecheck command once per turn. Prefer the
221
+ // session-level repoMap (set by bootstrap), fall back to detecting from
222
+ // project files (handles TS/Python/Rust/Go). `null` means "no typecheck
223
+ // applies here, skip auto-verify silently" — much better than blindly
224
+ // running `tsc --noEmit` against a Python or Rust repo.
225
+ const autoVerifyCmd: string | null = (() => {
226
+ const fromRepoMap = session.repoMap?.commands?.typecheck;
227
+ if (fromRepoMap) return fromRepoMap;
228
+ const detected = detectCommands(workingDir).typecheck;
229
+ return detected ?? null;
230
+ })();
231
+
232
+ let totalInputTokens = 0, totalOutputTokens = 0, totalCost = 0;
233
+ let finalContent = '';
234
+ let respondingModel = '';
235
+ let respondingProvider = '';
236
+ let respondingReason = '';
237
+ const allToolCalls: Array<{ name: string; args: string; result: string; is_error: boolean; diff?: string }> = [];
238
+ const modelsUsed = new Set<string>();
239
+ const reasoningChunks: string[] = [];
240
+
241
+ const msgId = `msg-${Date.now()}`;
242
+ emit({ type: 'message', id: msgId, role: 'assistant', content: '', model_label: '...' });
243
+
244
+ // Dynamic phase: reclassified per iteration based on what the model
245
+ // is doing. Investigation (read/search) → dispatch (planning) →
246
+ // execute (write/edit) → reflect (review). The router selects a
247
+ // different model for each phase from the profile's rolePinning.
248
+ // This is how the agent switches models mid-turn automatically.
249
+ let currentPhase: import('../types.ts').LedgerPhase = classifyPhase(effectiveInput);
250
+ emit({
251
+ type: 'activity',
252
+ text: `router: phase=${currentPhase} (${currentPhase === 'execute' ? 'coding intent detected' : 'discussion / reasoning'})`,
253
+ activity_type: 'step',
254
+ });
255
+
256
+ while (true) {
257
+ const iteration = loopGuard.check().iteration;
258
+ const decision = await router.select(currentPhase, userMessage, undefined, iteration);
259
+ respondingModel = decision.model.alias || decision.model.name;
260
+ respondingProvider = decision.model.provider;
261
+ respondingReason = decision.reason;
262
+ emit({ type: 'status', text: `${respondingModel} thinking${iteration > 0 ? ` (step ${iteration + 1})` : ''}...` });
263
+ emit({
264
+ type: 'activity',
265
+ text: `→ ${respondingModel} (${decision.tier}: ${decision.reason})`,
266
+ activity_type: 'step',
267
+ });
268
+ emit({ type: 'message_update', id: msgId, model_label: respondingModel });
269
+
270
+ // Before each model call, enforce the profile's contextBudget by
271
+ // stubbing old tool results in place. No LLM calls — zero cost.
272
+ const budget = profiles.getActive().contextBudget;
273
+ const compaction = compactInLoop(messages, budget);
274
+ if (compaction.savedBytes > 0) {
275
+ emit({
276
+ type: 'activity',
277
+ text: `context: ${compaction.before.toLocaleString()} → ${compaction.after.toLocaleString()} tokens (${compaction.savedBytes.toLocaleString()} chars pruned)`,
278
+ activity_type: 'step',
279
+ });
280
+ }
281
+
282
+ let iterContent = '';
283
+ const response = await callLLM({
284
+ provider: decision.model.provider,
285
+ model: decision.model.id,
286
+ systemPrompt, messages,
287
+ tools: toolManager.getTools('discuss'),
288
+ maxOutputTokens: 8192, cacheablePrefix,
289
+ stream: true,
290
+ onToken: (token: string) => {
291
+ iterContent += token;
292
+ // Show accumulated content across ALL iterations, not just
293
+ // the current one. This prevents earlier text ("Step 3: ...")
294
+ // from vanishing when the model calls tools and starts a new
295
+ // iteration.
296
+ emit({ type: 'message_update', id: msgId, content: finalContent + iterContent });
297
+ },
298
+ });
299
+
300
+ // If the response came from a fallback model, update the label so the
301
+ // user sees which model actually responded, not just which was requested.
302
+ if (response.wasFallback) {
303
+ const fallbackAlias = router.registry.getById(response.model)?.alias || response.model;
304
+ respondingModel = `${respondingModel}→${fallbackAlias}`;
305
+ emit({ type: 'activity', text: `fallback: ${response.requestedModel || 'unknown'} failed, used ${response.model}`, activity_type: 'step' });
306
+ emit({ type: 'message_update', id: msgId, model_label: respondingModel });
307
+ }
308
+
309
+ const iterCost = estimateCost(response.model, response.inputTokens, response.outputTokens);
310
+ totalInputTokens += response.inputTokens;
311
+ totalOutputTokens += response.outputTokens;
312
+ totalCost += iterCost;
313
+ modelsUsed.add(response.model);
314
+
315
+ if (response.reasoningContent) {
316
+ const header = reasoningChunks.length === 0
317
+ ? `── ${response.model} ──`
318
+ : `\n── ${response.model} (step ${reasoningChunks.length + 1}) ──`;
319
+ reasoningChunks.push(`${header}\n${response.reasoningContent}`);
320
+ }
321
+
322
+ ledger.record('discuss', response, messages[messages.length - 1]?.content?.slice(0, 200) || '');
323
+
324
+ // Accumulate this iteration's text so it persists across iterations.
325
+ // Without this, "Step 3: ..." vanishes when the model calls tools.
326
+ if (response.content) {
327
+ finalContent += (finalContent ? '\n\n' : '') + response.content;
328
+ }
329
+
330
+ if (!response.toolCalls || response.toolCalls.length === 0) {
331
+ // Autonomous-loop mode: when the model stops calling tools but the
332
+ // goal isn't explicitly marked done, synthesize a "continue" prompt
333
+ // and keep iterating. LoopGuard still enforces hard caps.
334
+ if (opts?.loop) {
335
+ const body = (response.content || '').trim();
336
+ const terminated = /^DONE\b/mi.test(body) || /^STUCK\b/mi.test(body);
337
+ if (!terminated && !loopGuard.check().shouldStop) {
338
+ messages.push({ role: 'assistant', content: response.content || '(progress)' });
339
+ messages.push({
340
+ role: 'user',
341
+ content:
342
+ `Continue working on the goal: "${opts.loopGoal || input}".\n` +
343
+ `If the goal is fully accomplished, respond with DONE on its own line followed by a brief summary.\n` +
344
+ `If you are blocked and cannot proceed, respond with STUCK: <reason>.\n` +
345
+ `Otherwise keep going — call the tools you need.`,
346
+ });
347
+ emit({ type: 'activity', text: 'loop: continuing — no terminal marker', activity_type: 'step' });
348
+ continue;
349
+ }
350
+ }
351
+ // finalContent already accumulated above
352
+ break;
353
+ }
354
+
355
+ messages.push({
356
+ role: 'assistant',
357
+ content: response.content || undefined,
358
+ toolCalls: response.toolCalls,
359
+ reasoningContent: response.reasoningContent,
360
+ });
361
+
362
+ const toolResults = [];
363
+ for (const tc of response.toolCalls) {
364
+ const toolArgs = formatToolArgs(tc);
365
+ emit({ type: 'tool_call', name: tc.name, args: toolArgs, is_error: false });
366
+ emit({ type: 'activity', text: `${tc.name}(${toolArgs})`, activity_type: 'tool' });
367
+
368
+ // Spec 05 — create a checkpoint before the first mutating tool in this turn.
369
+ if (!checkpointCreated && isMutatingToolCall(tc.name, tc.arguments)) {
370
+ try {
371
+ const predicted = new Set([
372
+ ...(toolCtx.mutatedFiles ?? []),
373
+ ...predictedMutations(tc.name, tc.arguments),
374
+ ]);
375
+ checkpointManager.create(
376
+ `Turn ${turnNumber}: ${input.slice(0, 60)}`,
377
+ input,
378
+ turnNumber,
379
+ totalCost,
380
+ predicted,
381
+ );
382
+ checkpointCreated = true;
383
+ } catch (e) {
384
+ emit({ type: 'error', message: `Checkpoint failed: ${(e as Error).message}` });
385
+ }
386
+ }
387
+
388
+ const result = await toolManager.execute(tc.name, tc.arguments, toolCtx);
389
+
390
+ // Post-edit verification policy: after any file mutation, run the
391
+ // detected typecheck so the model sees compile errors immediately
392
+ // without needing to call run_command itself. Skipped entirely when
393
+ // the project has no typecheck (plain JS, Bash repo, etc.) — running
394
+ // `tsc` against a non-TS project produces noise and 30s waits.
395
+ if (autoVerifyCmd && isMutatingToolCall(tc.name, tc.arguments) && !result.isError) {
396
+ try {
397
+ const { execSync: execSyncVerify } = await import('node:child_process');
398
+ const verifyResult = execSyncVerify(autoVerifyCmd, {
399
+ cwd: workingDir,
400
+ encoding: 'utf-8',
401
+ timeout: 30_000,
402
+ stdio: ['pipe', 'pipe', 'pipe'],
403
+ }).trim();
404
+ if (verifyResult) {
405
+ result.content += `\n\n[auto-verify: ${verifyResult.includes('error') ? 'ISSUES FOUND' : 'clean'}]\n${verifyResult.slice(0, 500)}`;
406
+ }
407
+ } catch (verifyErr: any) {
408
+ const output = (verifyErr.stdout || verifyErr.stderr || verifyErr.message || '').toString().trim();
409
+ if (output) {
410
+ result.content += `\n\n[auto-verify: FAILED]\n${output.slice(0, 500)}`;
411
+ }
412
+ }
413
+ }
414
+
415
+ const capped = result.content.length > 3000 ? result.content.slice(0, 3000) + '...' : result.content;
416
+
417
+ allToolCalls.push({
418
+ name: tc.name,
419
+ args: toolArgs,
420
+ result: capped.slice(0, 300),
421
+ is_error: result.isError || false,
422
+ diff: result.diff,
423
+ });
424
+ emit({ type: 'message_update', id: msgId, content: response.content || '', tool_calls: [...allToolCalls] });
425
+
426
+ toolResults.push({ toolCallId: tc.id, content: capped, isError: result.isError, diff: result.diff });
427
+ }
428
+
429
+ messages.push({ role: 'tool', toolResults });
430
+
431
+ // Dynamic phase reclassification: based on what tools just ran,
432
+ // determine what the NEXT iteration should be. This is how the
433
+ // agent switches between models mid-turn — investigation uses the
434
+ // cheap/discuss model, coding uses the execute model, review uses
435
+ // the reflect model.
436
+ const toolsThisRound = new Set(response.toolCalls!.map(tc => tc.name));
437
+ const prevPhase: import('../types.ts').LedgerPhase = currentPhase;
438
+ if (toolsThisRound.has('write_file') || toolsThisRound.has('edit_file')) {
439
+ // Just wrote code → next iteration should review/reflect
440
+ currentPhase = 'reflect';
441
+ } else if (toolsThisRound.has('update_plan') || toolsThisRound.has('create_task')) {
442
+ // Just planned → next iteration should execute
443
+ currentPhase = 'execute';
444
+ } else if (toolsThisRound.has('read_file') || toolsThisRound.has('search_code') ||
445
+ toolsThisRound.has('find_symbol') || toolsThisRound.has('related_files') ||
446
+ toolsThisRound.has('repo_map')) {
447
+ // Just investigated → next iteration should plan/dispatch
448
+ currentPhase = 'dispatch';
449
+ } else if (toolsThisRound.has('run_command')) {
450
+ // Just ran a command (test/build) → reflect on results
451
+ currentPhase = 'reflect';
452
+ }
453
+ // else: keep current phase (e.g. for web_search, consult, etc.)
454
+
455
+ if (currentPhase !== prevPhase) {
456
+ emit({ type: 'activity', text: `phase: ${prevPhase} → ${currentPhase}`, activity_type: 'step' });
457
+ }
458
+
459
+ // Spec 08 — drive the loop with LoopGuard. Feed the first tool error so
460
+ // stuck detection works on ordinary turns.
461
+ const firstError = toolResults.find(r => r.isError)?.content;
462
+ loopGuard.recordIteration(iterCost, firstError);
463
+ const guard = loopGuard.check();
464
+ if (guard.shouldStop) {
465
+ // Give the model one final no-tools iteration to summarize what it
466
+ // found. This is the difference between "Loop stopped: iteration
467
+ // limit" with zero useful output and a real summary of progress.
468
+ try {
469
+ emit({ type: 'status', text: `${respondingModel} summarizing (cap reached)...` });
470
+ const finalResponse = await callLLM({
471
+ provider: decision.model.provider,
472
+ model: decision.model.id,
473
+ systemPrompt,
474
+ messages: [
475
+ ...messages,
476
+ { role: 'user', content: `You have reached the iteration limit (${guard.stopReason || 'bounds reached'}). Do not call any more tools. Summarize what you found, what you produced, and what remains to be done, in 10 lines or fewer.` },
477
+ ],
478
+ maxOutputTokens: 2048,
479
+ cacheablePrefix,
480
+ });
481
+ totalInputTokens += finalResponse.inputTokens;
482
+ totalOutputTokens += finalResponse.outputTokens;
483
+ totalCost += estimateCost(finalResponse.model, finalResponse.inputTokens, finalResponse.outputTokens);
484
+ finalContent = (finalResponse.content || response.content || '').trim()
485
+ + `\n\n_(loop stopped: ${guard.stopReason || 'bounds reached'})_`;
486
+ } catch {
487
+ finalContent = (response.content || `(no final output)`) + `\n\n_(loop stopped: ${guard.stopReason || 'bounds reached'})_`;
488
+ }
489
+ break;
490
+ }
491
+ }
492
+
493
+ // Append file modification summary
494
+ const filesModified = allToolCalls
495
+ .filter(tc => ['write_file', 'edit_file', 'create_task'].includes(tc.name) && !tc.is_error)
496
+ .map(tc => {
497
+ if (tc.name === 'create_task') return ` ✦ task: ${tc.args}`;
498
+ return ` ${tc.name === 'write_file' ? '+' : '~'} ${tc.args}`;
499
+ });
500
+ if (filesModified.length > 0) {
501
+ finalContent += '\n\nFiles:\n' + filesModified.join('\n');
502
+ }
503
+
504
+ contextManager.addAssistantMessage({
505
+ content: finalContent, model: respondingModel,
506
+ provider: 'openai' as ProviderId,
507
+ inputTokens: totalInputTokens, outputTokens: totalOutputTokens, latencyMs: 0,
508
+ });
509
+
510
+ emit({
511
+ type: 'message_update', id: msgId,
512
+ content: finalContent,
513
+ model_label: respondingModel,
514
+ tool_calls: allToolCalls.length > 0 ? allToolCalls : null,
515
+ reasoning_content: reasoningChunks.length > 0 ? reasoningChunks.join('\n') : undefined,
516
+ stats: {
517
+ input_tokens: totalInputTokens, output_tokens: totalOutputTokens,
518
+ cost_usd: totalCost, models: [...modelsUsed],
519
+ provider: respondingProvider,
520
+ route_reason: respondingReason,
521
+ iterations: messages.filter(m => m.role === 'assistant').length || 1,
522
+ },
523
+ });
524
+
525
+ emit({ type: 'status', text: '' });
526
+ toolCtx.permissionManager?.endTurn();
527
+
528
+ // Record a context receipt for cross-turn continuity.
529
+ receipts.record(buildReceipt(
530
+ turnNumber,
531
+ input.slice(0, 200),
532
+ respondingModel,
533
+ allToolCalls,
534
+ finalContent,
535
+ ));
536
+
537
+ await contextManager.maybeCompact();
538
+ await contextManager.updateSessionState();
539
+ }
@@ -0,0 +1,121 @@
1
+ /**
2
+ * First-run setup wizard.
3
+ *
4
+ * Called from the non-interactive path (or manually via `/wizard`). Writes a
5
+ * minimal `.kondi-chat/config.json` if one does not already exist, and notes
6
+ * which providers are likely configured based on environment variables.
7
+ *
8
+ * The wizard is non-interactive by default: it inspects the environment and
9
+ * writes sensible defaults without blocking. An interactive stdin path can be
10
+ * added later by a thin caller around this module.
11
+ */
12
+
13
+ import { existsSync, writeFileSync, mkdirSync, readFileSync } from 'node:fs';
14
+ import { dirname, join } from 'node:path';
15
+
16
+ export interface WizardResult {
17
+ configPath: string;
18
+ created: boolean;
19
+ providersDetected: string[];
20
+ defaultProfile: string;
21
+ }
22
+
23
+ const PROVIDER_ENV: Record<string, string> = {
24
+ anthropic: 'ANTHROPIC_API_KEY',
25
+ openai: 'OPENAI_API_KEY',
26
+ deepseek: 'DEEPSEEK_API_KEY',
27
+ google: 'GOOGLE_API_KEY',
28
+ xai: 'XAI_API_KEY',
29
+ };
30
+
31
+ export function runFirstRunWizard(storageDir: string, opts: { interactive?: boolean } = {}): WizardResult {
32
+ const configPath = join(storageDir, 'config.json');
33
+ const providersDetected = Object.entries(PROVIDER_ENV)
34
+ .filter(([, envVar]) => !!process.env[envVar])
35
+ .map(([id]) => id);
36
+
37
+ if (existsSync(configPath)) {
38
+ return { configPath, created: false, providersDetected, defaultProfile: readProfile(configPath) };
39
+ }
40
+
41
+ const defaultProfile: WizardResult['defaultProfile'] = providersDetected.length === 0
42
+ ? 'cheap'
43
+ : providersDetected.includes('anthropic') ? 'balanced' : 'balanced';
44
+
45
+ const config = {
46
+ defaultProfile,
47
+ providers: providersDetected,
48
+ createdAt: new Date().toISOString(),
49
+ };
50
+ mkdirSync(dirname(configPath), { recursive: true });
51
+ writeFileSync(configPath, JSON.stringify(config, null, 2));
52
+
53
+ if (opts.interactive) {
54
+ process.stderr.write(`[wizard] wrote ${configPath}\n`);
55
+ process.stderr.write(`[wizard] detected providers: ${providersDetected.join(', ') || '(none — set an API key)'}\n`);
56
+ }
57
+
58
+ return { configPath, created: true, providersDetected, defaultProfile };
59
+ }
60
+
61
+ function readProfile(path: string): string {
62
+ try {
63
+ const raw = JSON.parse(readFileSync(path, 'utf-8'));
64
+ if (typeof raw.defaultProfile === 'string' && raw.defaultProfile.length > 0) {
65
+ return raw.defaultProfile;
66
+ }
67
+ } catch { /* ignore */ }
68
+ return 'balanced';
69
+ }
70
+
71
+ /** Persist the active profile name to config.json, preserving other fields. */
72
+ export function writeActiveProfile(storageDir: string, name: string): void {
73
+ const configPath = join(storageDir, 'config.json');
74
+ let config: Record<string, unknown> = {};
75
+ try {
76
+ if (existsSync(configPath)) {
77
+ config = JSON.parse(readFileSync(configPath, 'utf-8'));
78
+ }
79
+ } catch { /* start fresh on parse error */ }
80
+ config.defaultProfile = name;
81
+ mkdirSync(dirname(configPath), { recursive: true });
82
+ writeFileSync(configPath, JSON.stringify(config, null, 2));
83
+ }
84
+
85
+ /** Read the persisted active profile from config.json (or 'balanced'). */
86
+ export function readActiveProfile(storageDir: string): string {
87
+ return readProfile(join(storageDir, 'config.json'));
88
+ }
89
+
90
+ /**
91
+ * Update-available banner. Fetches the latest release tag from GitHub, caches
92
+ * for 24 hours under ~/.kondi-chat/.update-check. Never blocks startup.
93
+ */
94
+ export async function checkForUpdate(currentVersion: string): Promise<string | null> {
95
+ if (process.env.KONDI_NO_UPDATE_CHECK === '1') return null;
96
+ try {
97
+ const cachePath = join(process.env.HOME || '.', '.kondi-chat', '.update-check');
98
+ if (existsSync(cachePath)) {
99
+ const cached = JSON.parse(readFileSync(cachePath, 'utf-8')) as { checkedAt: number; latest: string };
100
+ if (Date.now() - cached.checkedAt < 24 * 60 * 60 * 1000) {
101
+ return cached.latest !== currentVersion ? banner(cached.latest) : null;
102
+ }
103
+ }
104
+ const resp = await fetch('https://api.github.com/repos/kondi/kondi-chat/releases/latest', {
105
+ signal: AbortSignal.timeout(3000),
106
+ headers: { 'Accept': 'application/vnd.github+json' },
107
+ });
108
+ if (!resp.ok) return null;
109
+ const data = await resp.json() as { tag_name?: string };
110
+ const latest = (data.tag_name || '').replace(/^v/, '');
111
+ mkdirSync(dirname(cachePath), { recursive: true });
112
+ writeFileSync(cachePath, JSON.stringify({ checkedAt: Date.now(), latest }));
113
+ return latest && latest !== currentVersion ? banner(latest) : null;
114
+ } catch {
115
+ return null;
116
+ }
117
+ }
118
+
119
+ function banner(latest: string): string {
120
+ return `Update available: kondi-chat ${latest} — run \`npm install -g kondi-chat@latest\` or \`brew upgrade kondi-chat\``;
121
+ }