kc-beta 0.3.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/agent/confidence-scorer.js +8 -0
  3. package/src/agent/context-window.js +7 -2
  4. package/src/agent/context.js +25 -0
  5. package/src/agent/corner-case-registry.js +5 -0
  6. package/src/agent/engine.js +564 -76
  7. package/src/agent/event-log.js +15 -2
  8. package/src/agent/history.js +91 -23
  9. package/src/agent/pipelines/initializer.js +3 -6
  10. package/src/agent/retry.js +9 -1
  11. package/src/agent/rule-catalog-normalize.js +37 -0
  12. package/src/agent/scheduler.js +276 -0
  13. package/src/agent/session-state.js +11 -2
  14. package/src/agent/task-manager.js +5 -0
  15. package/src/agent/tools/agent-tool.js +57 -14
  16. package/src/agent/tools/archive-file.js +94 -0
  17. package/src/agent/tools/copy-to-workspace.js +140 -0
  18. package/src/agent/tools/phase-advance.js +60 -0
  19. package/src/agent/tools/release.js +323 -0
  20. package/src/agent/tools/rule-catalog.js +56 -4
  21. package/src/agent/tools/schedule-fetch.js +118 -0
  22. package/src/agent/tools/snapshot.js +101 -0
  23. package/src/agent/tools/workspace-file.js +10 -7
  24. package/src/agent/version-manager.js +29 -120
  25. package/src/agent/workspace.js +127 -4
  26. package/src/cli/components.js +68 -12
  27. package/src/cli/index.js +147 -15
  28. package/src/config.js +10 -1
  29. package/src/model-tiers.json +5 -5
  30. package/template/release-runtime/README.md.tmpl +84 -0
  31. package/template/release-runtime/kc_runtime/__init__.py +2 -0
  32. package/template/release-runtime/kc_runtime/confidence.py +93 -0
  33. package/template/release-runtime/kc_runtime/dashboard.py +208 -0
  34. package/template/release-runtime/render_dashboard.py +49 -0
  35. package/template/release-runtime/run.py +230 -0
  36. package/template/release-runtime/serve.sh +15 -0
  37. package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  38. package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
  39. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
  40. package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
  41. package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
  42. package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  43. package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
  44. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
  45. package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
  46. package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
  47. package/template/workspace.gitignore +22 -0
@@ -4,12 +4,19 @@ import { AgentEvent } from "./events.js";
4
4
  import { ContextAssembler } from "./context.js";
5
5
  import { ConversationHistory } from "./history.js";
6
6
  import { Workspace } from "./workspace.js";
7
+ import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
7
8
  import { VersionManager } from "./version-manager.js";
8
9
  import { CornerCaseRegistry } from "./corner-case-registry.js";
9
10
  import { ConfidenceScorer } from "./confidence-scorer.js";
10
11
  import { ToolRegistry } from "./tools/registry.js";
11
12
  import { SandboxExecTool } from "./tools/sandbox-exec.js";
12
13
  import { WorkspaceFileTool } from "./tools/workspace-file.js";
14
+ import { CopyToWorkspaceTool } from "./tools/copy-to-workspace.js";
15
+ import { SnapshotTool } from "./tools/snapshot.js";
16
+ import { ArchiveFileTool } from "./tools/archive-file.js";
17
+ import { ScheduleFetchTool } from "./tools/schedule-fetch.js";
18
+ import { ReleaseTool } from "./tools/release.js";
19
+ import { PhaseAdvanceTool } from "./tools/phase-advance.js";
13
20
  import { DocumentParseTool } from "./tools/document-parse.js";
14
21
  import { DocumentSearchTool } from "./tools/document-search.js";
15
22
  import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
@@ -23,6 +30,7 @@ import { AgentTool } from "./tools/agent-tool.js";
23
30
  import { WebSearchTool } from "./tools/web-search.js";
24
31
  import { SkillLoader } from "./skill-loader.js";
25
32
  import { TaskManager } from "./task-manager.js";
33
+ import { Scheduler } from "./scheduler.js";
26
34
  import { Phase } from "./pipelines/index.js";
27
35
  import { ProjectInitializer } from "./pipelines/initializer.js";
28
36
  import { RuleExtractionPipeline } from "./pipelines/extraction.js";
@@ -35,9 +43,25 @@ import { ContextWindow } from "./context-window.js";
35
43
  import { SessionState } from "./session-state.js";
36
44
  import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
37
45
 
46
+ // Default max output tokens for the conductor LLM. SOTA models (GLM-5,
47
+ // Claude Sonnet 4) handle this comfortably. Override via KC_MAX_TOKENS env
48
+ // or kc_max_tokens in the global config.
49
+ const DEFAULT_KC_MAX_TOKENS = 65536;
50
+
38
51
  // Phases where worker LLM tools are available (DISTILL mode)
39
52
  const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
40
53
 
54
+ // Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
55
+ // Exported so the TUI's /phase slash command (src/cli/index.js) can call
56
+ // _advancePhase with the right successor without re-declaring the map.
57
+ export const NEXT_PHASE = {
58
+ [Phase.BOOTSTRAP]: Phase.EXTRACTION,
59
+ [Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
60
+ [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
61
+ [Phase.SKILL_TESTING]: Phase.DISTILLATION,
62
+ [Phase.DISTILLATION]: Phase.PRODUCTION_QC,
63
+ };
64
+
41
65
  /**
42
66
  * The KC Agent conversation engine.
43
67
  *
@@ -52,40 +76,85 @@ export class AgentEngine {
52
76
  * @param {import('./llm-client.js').LLMClient} opts.client
53
77
  * @param {object} opts.config - Settings from loadSettings()
54
78
  * @param {string} [opts.sessionId]
79
+ * @param {string} [opts.subagentScope] - When set, persistence is isolated to
80
+ * sub_agents/<scope>/ inside the workspace. Used by `agent_tool` to spawn
81
+ * children that share workspace files but don't trash parent's history /
82
+ * tasks / session-state. (Bug 2)
83
+ * @param {string} [opts.initialPhase] - When set, the engine starts in this phase
84
+ * instead of BOOTSTRAP. Used by sub-agents to inherit parent's phase so they
85
+ * get the right tools registered. (Bug 2)
55
86
  */
56
- constructor({ client, config, sessionId }) {
87
+ constructor({ client, config, sessionId, subagentScope, initialPhase }) {
57
88
  this.client = client;
58
89
  this.config = config;
59
90
  this.context = new ContextAssembler();
91
+ this._isSubagent = !!subagentScope;
92
+ this._subagentScope = subagentScope || null;
60
93
 
61
94
  // Workspace + structural components
62
- this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId, config.projectDir);
63
- this.history = new ConversationHistory(this.workspace.cwd);
95
+ this.workspace = new Workspace(
96
+ config.kcWorkspaceRoot,
97
+ sessionId,
98
+ config.projectDir,
99
+ { gitAutoCommit: config.gitAutoCommit !== false },
100
+ );
101
+
102
+ // For sub-agents, persistence (history/events/state) lives under
103
+ // sub_agents/<scope>/ instead of the workspace root. Workspace files
104
+ // (rules/, rule_skills/, workflows/) stay shared.
105
+ let conversationDir, logDir, statePath;
106
+ if (this._isSubagent) {
107
+ // Defense-in-depth: even though agent_tool sanitizes task_id against
108
+ // VALID_TASK_ID, an attacker reaching engine construction through
109
+ // another path (e.g. future callers) must not escape the workspace.
110
+ const scopeRoot = path.resolve(this.workspace.cwd, "sub_agents", subagentScope);
111
+ const wsRoot = path.resolve(this.workspace.cwd);
112
+ if (scopeRoot !== wsRoot && !scopeRoot.startsWith(wsRoot + path.sep)) {
113
+ throw new Error(`sub-agent scope escapes workspace: ${subagentScope}`);
114
+ }
115
+ // Also reject the scopeRoot being the workspace root itself, since that
116
+ // would defeat isolation.
117
+ if (scopeRoot === wsRoot || scopeRoot === path.resolve(wsRoot, "sub_agents")) {
118
+ throw new Error(`sub-agent scope must be a unique subfolder, got: ${subagentScope}`);
119
+ }
120
+ fs.mkdirSync(scopeRoot, { recursive: true });
121
+ conversationDir = path.join(scopeRoot, "conversation");
122
+ logDir = path.join(scopeRoot, "logs");
123
+ statePath = path.join(scopeRoot, "session-state.json");
124
+ }
125
+
126
+ const initialPhaseValue = initialPhase || Phase.BOOTSTRAP;
127
+ this.workspace.setPhase(initialPhaseValue);
128
+ this.history = new ConversationHistory(this.workspace.cwd, {
129
+ conversationDir,
130
+ maxMessageTokens: this.config.maxMessageTokens,
131
+ });
64
132
  this.versionManager = new VersionManager(this.workspace.cwd);
65
133
  this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
66
134
  this.confidence = new ConfidenceScorer(this.workspace.cwd, this.cornerCases);
67
135
 
68
136
  // Event log (append-only JSONL, source of truth)
69
- this.eventLog = new EventLog(this.workspace.cwd);
137
+ this.eventLog = new EventLog(this.workspace.cwd, { logDir });
70
138
 
71
139
  // Context windowing
72
140
  this.contextWindow = new ContextWindow({
73
141
  contextLimit: config.kcContextLimit || 200000,
74
- reserveForResponse: config.kcMaxTokens || 65536,
142
+ reserveForResponse: config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS,
75
143
  });
76
144
 
77
145
  // Session state persistence
78
- this.sessionState = new SessionState(this.workspace.cwd);
146
+ this.sessionState = new SessionState(this.workspace.cwd, { statePath });
79
147
 
80
- // Task manager (ralph-loop)
81
- this.taskManager = new TaskManager(this.workspace.cwd);
148
+ // Task manager (ralph-loop) — sub-agents don't queue further sub-tasks,
149
+ // so they don't get a TaskManager.
150
+ this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
82
151
 
83
152
  // Build all tool instances (but register phase-appropriate ones)
84
153
  this._buildTools = this._createAllTools();
85
154
  this._phaseSummaries = [];
86
155
 
87
156
  // Pipeline system (meta-meta skills as code)
88
- this.currentPhase = Phase.BOOTSTRAP;
157
+ this.currentPhase = initialPhaseValue;
89
158
  this.pipelines = {
90
159
  [Phase.BOOTSTRAP]: new ProjectInitializer(this.workspace),
91
160
  [Phase.EXTRACTION]: new RuleExtractionPipeline(this.workspace),
@@ -101,6 +170,17 @@ export class AgentEngine {
101
170
  // Register tools for initial phase
102
171
  this.toolRegistry = new ToolRegistry();
103
172
  this._registerToolsForPhase(this.currentPhase);
173
+
174
+ // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
175
+ // phase so the first real false→true flip inside onToolResult triggers an
176
+ // advance — even when the user launches from a pre-populated workspace
177
+ // whose exit criteria already happen to be met at boot.
178
+ // resume() re-primes this from the restored pipeline state (see ~L566),
179
+ // which is the correct behaviour there: resumed sessions that were already
180
+ // past this phase shouldn't re-fire.
181
+ this._lastReady = Object.fromEntries(
182
+ Object.keys(this.pipelines).map((p) => [p, false]),
183
+ );
104
184
  }
105
185
 
106
186
  /**
@@ -127,6 +207,14 @@ export class AgentEngine {
127
207
  core: [
128
208
  new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
129
209
  new WorkspaceFileTool(this.workspace, this.versionManager),
210
+ new CopyToWorkspaceTool(this.workspace, {
211
+ largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
212
+ }),
213
+ new SnapshotTool(this.workspace),
214
+ new ArchiveFileTool(this.workspace),
215
+ new ScheduleFetchTool(this.workspace),
216
+ new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
217
+ new PhaseAdvanceTool((to, reason, opts) => this._advancePhase(to, reason, opts)),
130
218
  new DocumentParseTool(this.workspace, {
131
219
  mineruApiUrl: this.config.mineruApiUrl,
132
220
  mineruApiKey: this.config.mineruApiKey,
@@ -138,9 +226,14 @@ export class AgentEngine {
138
226
  new RuleCatalogTool(this.workspace),
139
227
  new EvolutionCycleTool(this.workspace, this.cornerCases),
140
228
  new DashboardRenderTool(this.workspace),
141
- new AgentTool(this.workspace, (sid) => new AgentEngine({
142
- client: this.client, config: this.config, sessionId: sid,
143
- })),
229
+ new AgentTool(
230
+ this.workspace,
231
+ ({ sessionId, subagentScope, initialPhase }) => new AgentEngine({
232
+ client: this.client, config: this.config,
233
+ sessionId, subagentScope, initialPhase,
234
+ }),
235
+ () => this.currentPhase,
236
+ ),
144
237
  new WebSearchTool(this.config.tavilyApiKey),
145
238
  ],
146
239
  // Distillation+ only (DISTILL mode)
@@ -204,9 +297,11 @@ export class AgentEngine {
204
297
  );
205
298
  }
206
299
 
207
- // Task progress (ralph-loop)
208
- const taskContext = this.taskManager.describeForContext();
209
- if (taskContext) lines.push("", taskContext);
300
+ // Task progress (ralph-loop) — skipped for sub-agents (no taskManager)
301
+ if (this.taskManager) {
302
+ const taskContext = this.taskManager.describeForContext();
303
+ if (taskContext) lines.push("", taskContext);
304
+ }
210
305
 
211
306
  return lines.join("\n");
212
307
  }
@@ -233,9 +328,126 @@ export class AgentEngine {
233
328
  };
234
329
  }
235
330
 
331
+ /**
332
+ * Run the windowing check immediately after a tool result appends to
333
+ * history. Called from runTurn() so that a large tool result can't sit in
334
+ * history past the threshold until the next LLM-loop iteration, where a
335
+ * stream-abort could then trap the context in a bloated state.
336
+ *
337
+ * Safe to call frequently — contextWindow.window() fast-paths when under
338
+ * the trigger fraction.
339
+ */
340
+ _maybeWindowAfterToolResult() {
341
+ if (!this.contextWindow) return;
342
+ const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
343
+ if (windowed.wasWindowed) {
344
+ this.history.messages = windowed.messages;
345
+ this.eventLog.append("context_windowed", {
346
+ removed: windowed.removedCount,
347
+ trigger: "post_tool_result",
348
+ });
349
+ }
350
+
351
+ // Heap-pressure diagnostic. The TUI has its own virtualization + tool-
352
+ // output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
353
+ // still see high heap usage, something else is leaking — log it once per
354
+ // pressure-crossing so operators can investigate without flooding logs.
355
+ try {
356
+ const mem = process.memoryUsage();
357
+ const frac = mem.heapUsed / (mem.heapTotal || 1);
358
+ if (frac > 0.80 && !this._memPressureLogged) {
359
+ this._memPressureLogged = true;
360
+ this.eventLog.append("memory_pressure", {
361
+ heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
362
+ heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
363
+ rssMB: Math.round(mem.rss / 1024 / 1024),
364
+ historyLength: this.history.messages.length,
365
+ });
366
+ } else if (frac < 0.60 && this._memPressureLogged) {
367
+ this._memPressureLogged = false; // re-arm for next crossing
368
+ }
369
+ } catch { /* process.memoryUsage failures are non-fatal */ }
370
+ }
371
+
372
+ /**
373
+ * Pre-flight hard ceiling (Bug 1). After windowing, if the message
374
+ * array's total token count still exceeds the model's input budget,
375
+ * drop oldest user-bounded blocks until under budget.
376
+ *
377
+ * Drops in BLOCK units — a block is `user(N) + everything until the
378
+ * next user`. This guarantees the head after a drop is always either a
379
+ * user message or empty, satisfying Anthropic's "first message must use
380
+ * the user role" requirement and OpenAI's tool-call adjacency rules.
381
+ *
382
+ * Treats the compaction summary pair (user with `[Previous conversation
383
+ * summary]` or `[Context Summary` marker, followed by assistant ack) as
384
+ * sticky — it represents prior LLM-summarized work and should outlive
385
+ * any normal turn.
386
+ */
387
+ _enforceTokenBudget(messages) {
388
+ const limit = this.config.kcContextLimit || 200000;
389
+ const reserve = this.config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS;
390
+ const budget = limit - reserve;
391
+ let totalTokens = estimateMessagesTokens(messages);
392
+ if (totalTokens <= budget) return messages;
393
+
394
+ // Sticky region: system + (optional summary user + ack assistant)
395
+ let stickyEnd = messages[0]?.role === "system" ? 1 : 0;
396
+ const sumMarkers = ["[Previous conversation summary]", "[Context Summary"];
397
+ const hasSummaryAt = (i) =>
398
+ messages[i]?.role === "user" &&
399
+ typeof messages[i].content === "string" &&
400
+ sumMarkers.some((m) => messages[i].content.startsWith(m));
401
+ if (hasSummaryAt(stickyEnd)) {
402
+ stickyEnd++;
403
+ if (messages[stickyEnd]?.role === "assistant") stickyEnd++;
404
+ }
405
+
406
+ let droppedCount = 0;
407
+ let droppedTokens = 0;
408
+
409
+ // Drop user-bounded blocks. A block starts at messages[stickyEnd]
410
+ // (expected to be a user message in normal flow) and runs up to (not
411
+ // including) the next user message — or to the end of array.
412
+ while (totalTokens > budget && messages.length > stickyEnd) {
413
+ const blockStart = stickyEnd;
414
+ let blockEnd = blockStart + 1;
415
+ while (blockEnd < messages.length && messages[blockEnd].role !== "user") blockEnd++;
416
+ // If this block goes to end-of-array, there's no following user to anchor
417
+ // the head — dropping it would leave just [system, (summary)?]. Stop and
418
+ // let the LLM call attempt; the API will surface a clear error if even
419
+ // sticky alone is over budget.
420
+ if (blockEnd === messages.length) break;
421
+ const removed = messages.splice(blockStart, blockEnd - blockStart);
422
+ droppedCount += removed.length;
423
+ droppedTokens += removed.reduce((a, m) => a + estimateTokens(JSON.stringify(m)), 0);
424
+ totalTokens = estimateMessagesTokens(messages);
425
+ }
426
+
427
+ // Defensive postcondition: head after sticky must be a user message or
428
+ // the array must end at sticky. Block-drop should make this trivially true,
429
+ // but if the input was malformed (e.g., already started with a non-user),
430
+ // clean up here so we never send an Anthropic-invalid sequence.
431
+ while (messages.length > stickyEnd && messages[stickyEnd].role !== "user") {
432
+ messages.splice(stickyEnd, 1);
433
+ droppedCount++;
434
+ }
435
+
436
+ if (droppedCount > 0) {
437
+ this.eventLog.append("context_truncated", {
438
+ droppedCount,
439
+ droppedTokens,
440
+ finalTokens: totalTokens,
441
+ budget,
442
+ });
443
+ }
444
+ return messages;
445
+ }
446
+
236
447
  /**
237
448
  * Compact conversation history by summarizing older messages via LLM.
238
- * Keeps the most recent messages intact.
449
+ * Keeps the most recent messages intact. (Bug 1: now chunked — never sends
450
+ * a single oversized prompt to the summarizer LLM.)
239
451
  * @param {object} [opts]
240
452
  * @param {number} [opts.recentCount=20] - Number of recent messages to keep
241
453
  * @returns {Promise<{removedCount: number, retainedCount: number, summaryTokens: number}|null>}
@@ -246,46 +458,20 @@ export class AgentEngine {
246
458
  const olderMessages = this.history.messages.slice(0, -recentCount);
247
459
  const recentMessages = this.history.messages.slice(-recentCount);
248
460
 
249
- let summary;
250
- try {
251
- const summaryResp = await this.client.chat({
252
- model: this.config.kcModel,
253
- messages: [
254
- {
255
- role: "system",
256
- content:
257
- "You are a conversation summarizer. Produce a concise summary of the following conversation. " +
258
- "Focus on: decisions made, files created or modified, current state of work, key findings, " +
259
- "unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 2000 tokens.",
260
- },
261
- {
262
- role: "user",
263
- content: `Summarize this conversation:\n\n${JSON.stringify(olderMessages)}`,
264
- },
265
- ],
266
- maxTokens: 2048,
267
- });
268
- summary = summaryResp.choices?.[0]?.message?.content || null;
269
- } catch {
270
- // LLM summary failed — do mechanical fallback
271
- summary = null;
272
- }
461
+ const CHUNK_BUDGET = 30000; // tokens per summarization request
462
+ const chunks = this._chunkMessages(olderMessages, CHUNK_BUDGET);
273
463
 
274
- if (!summary) {
275
- // Mechanical fallback: extract tool names and outcomes
276
- const lines = ["Previous conversation summary (mechanical):"];
277
- for (const msg of olderMessages) {
278
- if (msg.role === "user") {
279
- lines.push(`- User: ${(msg.content || "").slice(0, 100)}`);
280
- } else if (msg.role === "assistant" && msg.tool_calls) {
281
- for (const tc of msg.tool_calls) {
282
- lines.push(`- Tool call: ${tc.function?.name}`);
283
- }
284
- }
285
- }
286
- summary = lines.join("\n");
464
+ const partials = [];
465
+ for (let i = 0; i < chunks.length; i++) {
466
+ const chunk = chunks[i];
467
+ const partial = await this._summarizeChunk(chunk, i, chunks.length);
468
+ partials.push(partial);
287
469
  }
288
470
 
471
+ const summary = partials.length === 1
472
+ ? partials[0]
473
+ : "## Compacted history (multi-part)\n\n" + partials.map((p, i) => `### Part ${i + 1}\n${p}`).join("\n\n");
474
+
289
475
  // Replace history
290
476
  this.history._messages = [
291
477
  { role: "user", content: `[Previous conversation summary]\n${summary}` },
@@ -298,6 +484,7 @@ export class AgentEngine {
298
484
  this.eventLog.append("compact", {
299
485
  removedCount: olderMessages.length,
300
486
  retainedCount: recentMessages.length,
487
+ chunkCount: chunks.length,
301
488
  summary,
302
489
  });
303
490
 
@@ -308,6 +495,81 @@ export class AgentEngine {
308
495
  };
309
496
  }
310
497
 
498
+ /**
499
+ * Split a flat message list into chunks where each chunk's serialized JSON
500
+ * fits within tokenBudget. Chunks are turn-aligned where possible (a single
501
+ * user→assistant→tool sequence won't be split mid-turn unless that single
502
+ * turn alone exceeds the budget; in that case it gets its own oversized
503
+ * chunk and the LLM call may fail → mechanical fallback fires).
504
+ */
505
+ _chunkMessages(messages, tokenBudget) {
506
+ const chunks = [];
507
+ let current = [];
508
+ let currentTokens = 0;
509
+ for (const msg of messages) {
510
+ const mTokens = estimateTokens(JSON.stringify(msg));
511
+ if (current.length > 0 && currentTokens + mTokens > tokenBudget) {
512
+ chunks.push(current);
513
+ current = [];
514
+ currentTokens = 0;
515
+ }
516
+ current.push(msg);
517
+ currentTokens += mTokens;
518
+ }
519
+ if (current.length > 0) chunks.push(current);
520
+ return chunks;
521
+ }
522
+
523
+ /**
524
+ * Summarize one chunk via the conductor LLM. On failure (incl. context-length
525
+ * errors that the chunked split should usually prevent), fall back to a
526
+ * mechanical summary so we always produce *something*.
527
+ */
528
+ async _summarizeChunk(chunk, idx, total) {
529
+ const partLabel = total > 1 ? ` (part ${idx + 1}/${total})` : "";
530
+ try {
531
+ const resp = await this.client.chat({
532
+ model: this.config.kcModel,
533
+ messages: [
534
+ {
535
+ role: "system",
536
+ content:
537
+ "You are a conversation summarizer. Produce a concise summary of the following conversation excerpt. " +
538
+ "Focus on: decisions made, files created or modified, current state of work, key findings, " +
539
+ "unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 1500 tokens.",
540
+ },
541
+ {
542
+ role: "user",
543
+ content: `Summarize this conversation excerpt${partLabel}:\n\n${JSON.stringify(chunk)}`,
544
+ },
545
+ ],
546
+ maxTokens: 1800,
547
+ });
548
+ const text = resp.choices?.[0]?.message?.content;
549
+ if (text) return text;
550
+ } catch {
551
+ // fall through to mechanical
552
+ }
553
+ return this._mechanicalSummary(chunk, partLabel);
554
+ }
555
+
556
+ _mechanicalSummary(chunk, partLabel) {
557
+ const lines = [`Mechanical summary${partLabel}:`];
558
+ for (const msg of chunk) {
559
+ if (msg.role === "user" && typeof msg.content === "string") {
560
+ lines.push(`- User: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
561
+ } else if (msg.role === "assistant") {
562
+ if (typeof msg.content === "string" && msg.content) {
563
+ lines.push(`- Assistant: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
564
+ }
565
+ for (const tc of msg.tool_calls || []) {
566
+ lines.push(`- Tool call: ${tc.function?.name || "?"}`);
567
+ }
568
+ }
569
+ }
570
+ return lines.join("\n");
571
+ }
572
+
311
573
  /**
312
574
  * Restore an engine from a persisted session.
313
575
  * @param {object} opts
@@ -325,6 +587,7 @@ export class AgentEngine {
325
587
  engine.currentPhase = data.currentPhase || Phase.BOOTSTRAP;
326
588
  engine._phaseSummaries = data.phaseSummaries || [];
327
589
  engine._registerToolsForPhase(engine.currentPhase);
590
+ engine.workspace.setPhase(engine.currentPhase);
328
591
 
329
592
  // Restore project directory from saved state
330
593
  if (data.projectDir) {
@@ -342,6 +605,17 @@ export class AgentEngine {
342
605
  }
343
606
  }
344
607
 
608
+ // Re-prime _lastReady AFTER importState so it reflects the restored
609
+ // pipeline milestones, not the empty defaults from constructor.
610
+ // (Bug 5 fix — without this, resume reignites auto-advance.)
611
+ for (const phase of Object.keys(engine.pipelines)) {
612
+ try {
613
+ engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
614
+ } catch {
615
+ engine._lastReady[phase] = false;
616
+ }
617
+ }
618
+
345
619
  engine.eventLog.append("session_resume", {
346
620
  resumedPhase: engine.currentPhase,
347
621
  resumedFromSeq: data.lastEventSeq,
@@ -358,6 +632,56 @@ export class AgentEngine {
358
632
  this.sessionState.save(this);
359
633
  }
360
634
 
635
+ /**
636
+ * Rename the workspace folder and cascade the new path to every persistence
637
+ * subsystem that captured `workspace.cwd` at construction time (Bug 3).
638
+ * Without this cascade, subsystems keep writing to the OLD path even
639
+ * though the directory has moved on disk — the user sees the renamed dir
640
+ * "die" while the old dir keeps growing.
641
+ *
642
+ * Also regenerates Block 9 cron wrapper scripts which bake in absolute
643
+ * paths to the workspace. Returns information for the TUI to surface
644
+ * (incl. whether the user needs to re-install crontab lines).
645
+ *
646
+ * @param {string} newName
647
+ * @returns {{ sessionId: string, oldCwd: string, newCwd: string,
648
+ * scheduleWrappersRegenerated: string[],
649
+ * scheduleWrappersSkipped: string[] }}
650
+ */
651
+ renameSession(newName) {
652
+ const r = this.workspace.rename(newName);
653
+ if (r.changed) {
654
+ // Cascade to every subsystem that captured workspace.cwd
655
+ this.history._setWorkspacePath?.(r.newCwd);
656
+ this.eventLog._setWorkspacePath?.(r.newCwd);
657
+ this.sessionState._setWorkspacePath?.(r.newCwd);
658
+ this.taskManager?._setWorkspacePath?.(r.newCwd);
659
+ this.confidence._setWorkspacePath?.(r.newCwd);
660
+ this.cornerCases._setWorkspacePath?.(r.newCwd);
661
+ }
662
+
663
+ // Regenerate cron wrapper scripts — they bake absolute paths to WORKSPACE,
664
+ // INPUT_DIR, LOG_FILE, so rename invalidates them. The Scheduler is
665
+ // workspace-bound (created on demand inside the schedule_fetch tool), so
666
+ // construct a fresh one against the renamed workspace.
667
+ let scheduleResult = { regenerated: [], disabled: [], failed: [] };
668
+ try {
669
+ const sched = new Scheduler(this.workspace);
670
+ scheduleResult = sched.regenerateAllWrappers();
671
+ } catch {
672
+ // Best effort — never let scheduler issues block the rename
673
+ }
674
+
675
+ return {
676
+ sessionId: r.sessionId,
677
+ oldCwd: r.oldCwd,
678
+ newCwd: r.newCwd,
679
+ scheduleWrappersRegenerated: scheduleResult.regenerated,
680
+ scheduleWrappersDisabled: scheduleResult.disabled,
681
+ scheduleWrappersFailed: scheduleResult.failed,
682
+ };
683
+ }
684
+
361
685
  /**
362
686
  * Run one conversation turn. Yields AgentEvent objects.
363
687
  * Loops: LLM call -> tool execution -> LLM call ... until no tool calls.
@@ -383,7 +707,7 @@ export class AgentEngine {
383
707
  while (true) {
384
708
  // Apply context windowing before sending to LLM
385
709
  const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
386
- const messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
710
+ let messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
387
711
 
388
712
  if (windowed.wasWindowed) {
389
713
  this.eventLog.append("context_windowed", {
@@ -392,6 +716,12 @@ export class AgentEngine {
392
716
  });
393
717
  }
394
718
 
719
+ // Pre-flight hard ceiling (Bug 1 P0). Even after windowing, if the
720
+ // request still exceeds the model's input budget (e.g., recent messages
721
+ // alone are too big), drop the oldest non-system messages until under
722
+ // budget. Better to lose some history than crash with HTTP 400.
723
+ messages = this._enforceTokenBudget(messages);
724
+
395
725
  this.eventLog.append("llm_start", {
396
726
  model: this.config.kcModel,
397
727
  messageCount: messages.length,
@@ -448,6 +778,12 @@ export class AgentEngine {
448
778
  });
449
779
 
450
780
  if (toolCallsAcc.size === 0) {
781
+ // Bug 4 trigger (1): re-check phase criteria at end of every turn —
782
+ // KC may have advanced state via conversation alone, without any
783
+ // tool that the pipeline narrowly watches.
784
+ const advancedEv = this._maybeAutoAdvance();
785
+ if (advancedEv) yield advancedEv;
786
+
451
787
  this.eventLog.append("turn_complete", {});
452
788
  this.saveState();
453
789
  yield new AgentEvent({ type: "turn_complete" });
@@ -466,52 +802,57 @@ export class AgentEngine {
466
802
 
467
803
  const result = await this.toolRegistry.execute(tc.name, inputData);
468
804
 
805
+ // Tool-call offloading: large outputs go to logs/tool_results/<traceId>.txt;
806
+ // history holds head + tail with a pointer. Event log keeps the full output
807
+ // (it's append-only and the source of truth).
808
+ const offload = this._maybeOffload(tc.name, result);
809
+ const historyContent = offload ? offload.digest : (result.content || "");
810
+
469
811
  this.eventLog.append("tool_result", {
470
812
  name: tc.name,
471
- output: result.content?.slice(0, 5000) || "",
813
+ output: result.content || "",
472
814
  isError: result.isError,
815
+ traceId: offload?.traceId || null,
473
816
  });
474
817
  yield new AgentEvent({
475
818
  type: "tool_result",
476
819
  name: tc.name,
477
- output: result.content,
820
+ output: historyContent,
478
821
  isError: result.isError,
479
822
  });
480
823
 
481
824
  this.history.addRaw({
482
825
  role: "tool",
483
826
  tool_call_id: tc.id,
484
- content: result.content,
827
+ content: historyContent,
485
828
  });
486
829
 
830
+ // Post-tool-result safety net: check for context pressure RIGHT NOW
831
+ // rather than waiting for the next LLM-loop iteration. A large tool
832
+ // result that tips history over the threshold used to sit there
833
+ // until the next turn, and if the stream aborted in between the
834
+ // user saw "CTX: 210% / stream terminated" with no recovery.
835
+ this._maybeWindowAfterToolResult();
836
+
487
837
  // Pipeline controller: update state and re-register tools on phase change
488
838
  if (pipeline?.onToolResult) {
489
839
  const pEvent = pipeline.onToolResult(tc.name, inputData, result);
490
840
  if (pEvent) {
491
841
  if (pEvent.type === "phase_ready" && pEvent.nextPhase) {
492
- const phaseSummary = `[${this.currentPhase.toUpperCase()} completed]: ${pEvent.message || ""}`;
493
- this._phaseSummaries.push(phaseSummary);
494
- this.eventLog.append("phase_transition", {
495
- from: this.currentPhase,
496
- to: pEvent.nextPhase,
497
- summary: phaseSummary,
498
- });
499
- this.currentPhase = pEvent.nextPhase;
500
- this._registerToolsForPhase(this.currentPhase);
501
-
502
- // Ralph-loop: create per-rule tasks for the new phase
503
- this._createTasksForPhase(this.currentPhase);
504
-
505
- this.saveState();
842
+ this._advancePhase(pEvent.nextPhase, pEvent.message || "exit criteria met");
506
843
  }
507
- yield new AgentEvent({
508
- type: "pipeline_event",
509
- data: pEvent,
510
- });
844
+ yield new AgentEvent({ type: "pipeline_event", data: pEvent });
511
845
  }
512
846
  }
513
847
  }
514
848
 
849
+ // Bug 4 fix: re-check exit criteria after every tool-result loop, not
850
+ // just from pipeline.onToolResult. The pipeline's describeState() (called
851
+ // on every turn) already re-scans, so exitCriteriaMet() is accurate; we
852
+ // just need to act on it eagerly.
853
+ const ev = this._maybeAutoAdvance();
854
+ if (ev) yield ev;
855
+
515
856
  } catch (err) {
516
857
  this.eventLog.append("error", { message: err.message });
517
858
  yield new AgentEvent({ type: "error", message: err.message });
@@ -520,17 +861,123 @@ export class AgentEngine {
520
861
  }
521
862
  }
522
863
 
864
+ /**
865
+ * Centralized phase transition (Bug 4). All three triggers route through here:
866
+ * (1) pipeline.onToolResult returning phase_ready
867
+ * (2) post-turn auto-check via _maybeAutoAdvance
868
+ * (3) explicit user request via the phase_advance tool
869
+ *
870
+ * Reachability: by default only forward-by-one transitions per NEXT_PHASE.
871
+ * Set `force: true` to allow non-adjacent or backward transitions (e.g. user
872
+ * explicitly requests a regression for testing). The refusal is logged.
873
+ *
874
+ * Idempotent — calling with the current phase is a no-op.
875
+ */
876
+ _advancePhase(nextPhase, reason = "", { force = false } = {}) {
877
+ if (!nextPhase || nextPhase === this.currentPhase) return false;
878
+
879
+ const expected = NEXT_PHASE[this.currentPhase];
880
+ if (!force && nextPhase !== expected) {
881
+ this.eventLog.append("phase_advance_refused", {
882
+ from: this.currentPhase, to: nextPhase, reason,
883
+ hint: expected ? `expected next phase is '${expected}' — pass force:true to override`
884
+ : `${this.currentPhase} is the terminal phase`,
885
+ });
886
+ return false;
887
+ }
888
+
889
+ const phaseSummary = `[${this.currentPhase.toUpperCase()} → ${nextPhase.toUpperCase()}]: ${reason}${force && nextPhase !== expected ? " (forced)" : ""}`;
890
+ this._phaseSummaries.push(phaseSummary);
891
+ this.eventLog.append("phase_transition", {
892
+ from: this.currentPhase,
893
+ to: nextPhase,
894
+ reason,
895
+ forced: force && nextPhase !== expected,
896
+ });
897
+ this.currentPhase = nextPhase;
898
+ this._registerToolsForPhase(this.currentPhase);
899
+ this.workspace.setPhase(this.currentPhase);
900
+ this._createTasksForPhase(this.currentPhase);
901
+ this.saveState();
902
+ return true;
903
+ }
904
+
905
+ /**
906
+ * Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
907
+ * fresh false → true flip in `exitCriteriaMet()`. Sessions resumed in an
908
+ * already-met state do nothing; users iterating in a phase whose criteria
909
+ * have been met for a while do nothing. Real new evidence is required.
910
+ */
911
+ _maybeAutoAdvance() {
912
+ const phase = this.currentPhase;
913
+ const pipeline = this.pipelines[phase];
914
+ let nowReady = false;
915
+ try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
916
+
917
+ if (!nowReady) {
918
+ this._lastReady[phase] = false;
919
+ return null;
920
+ }
921
+ // Edge-trigger: nowReady && !wasReady
922
+ if (this._lastReady[phase]) return null;
923
+ this._lastReady[phase] = true;
924
+
925
+ const next = NEXT_PHASE[phase];
926
+ if (!next) return null;
927
+ const advanced = this._advancePhase(next, "exit criteria flipped to met");
928
+ if (!advanced) return null;
929
+ return new AgentEvent({
930
+ type: "pipeline_event",
931
+ data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
932
+ });
933
+ }
934
+
935
+ /**
936
+ * Tool-call offloading. If the tool's content exceeds the threshold,
937
+ * write the full content to logs/tool_results/<traceId>.txt and return a
938
+ * digest (head + tail) with a pointer. Otherwise return null (caller uses
939
+ * full content).
940
+ */
941
+ _maybeOffload(toolName, result) {
942
+ const content = result.content || "";
943
+ if (!content) return null;
944
+ const threshold = result.isError
945
+ ? (this.config.toolOutputOffloadErrorTokens ?? 500)
946
+ : (this.config.toolOutputOffloadTokens ?? 2000);
947
+ const tokens = estimateTokens(content);
948
+ if (tokens <= threshold) return null;
949
+
950
+ const safeToolName = String(toolName || "tool").replace(/[^A-Za-z0-9_-]/g, "_");
951
+ const traceId = this.versionManager.generateTraceId(safeToolName, "result");
952
+ const offloadDir = path.join(this.workspace.cwd, "logs", "tool_results");
953
+ try {
954
+ fs.mkdirSync(offloadDir, { recursive: true });
955
+ fs.writeFileSync(path.join(offloadDir, `${traceId}.txt`), content, "utf-8");
956
+ } catch {
957
+ // If we can't write the offload file, fall back to keeping full content in context.
958
+ return null;
959
+ }
960
+
961
+ const HEAD = 800, TAIL = 800;
962
+ const truncatedNote = `\n\n[…truncated, ${tokens} tokens; full at logs/tool_results/${traceId}.txt — read with workspace_file if needed…]\n\n`;
963
+ const digest = content.length > HEAD + TAIL
964
+ ? content.slice(0, HEAD) + truncatedNote + content.slice(-TAIL)
965
+ : content + truncatedNote;
966
+ return { traceId, digest };
967
+ }
968
+
523
969
  /**
524
970
  * Create per-rule tasks when entering a new phase.
525
971
  * Reads the rule catalog and creates one task per rule for the given phase.
526
972
  */
527
973
  _createTasksForPhase(phase) {
974
+ if (!this.taskManager) return; // Sub-agents don't manage tasks
528
975
  const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
529
976
  if (!fs.existsSync(catalogPath)) return;
530
977
 
531
978
  try {
532
979
  const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
533
- const rules = Array.isArray(catalog) ? catalog : [];
980
+ const rules = normalizeRuleCatalog(catalog);
534
981
  if (rules.length > 0) {
535
982
  this.taskManager.createRuleTasks(rules, phase);
536
983
  }
@@ -546,6 +993,12 @@ export class AgentEngine {
546
993
  * @yields {AgentEvent}
547
994
  */
548
995
  async *runTaskLoop(userMessage) {
996
+ // Sub-agents don't run task loops — they execute one task and exit
997
+ if (!this.taskManager) {
998
+ yield* this.runTurn(userMessage);
999
+ return;
1000
+ }
1001
+
549
1002
  // Run the initial turn (user's request)
550
1003
  yield* this.runTurn(userMessage);
551
1004
 
@@ -593,6 +1046,41 @@ export class AgentEngine {
593
1046
  progress: this.taskManager.progress,
594
1047
  },
595
1048
  });
1049
+
1050
+ // Bug 4 trigger (2): auto-advance when all phase tasks are done AND
1051
+ // the pipeline's exit criteria are also met (Bug 5 fix — task state
1052
+ // alone is a ralph-loop convenience, not authoritative phase signal;
1053
+ // tasks could be marked skipped manually or by an editor).
1054
+ if (this._allCurrentPhaseTasksComplete()) {
1055
+ const pipeline = this.pipelines[this.currentPhase];
1056
+ let exitMet = false;
1057
+ try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
1058
+ if (exitMet) {
1059
+ const next = NEXT_PHASE[this.currentPhase];
1060
+ if (next) {
1061
+ const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
1062
+ if (advanced) {
1063
+ yield new AgentEvent({
1064
+ type: "pipeline_event",
1065
+ data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
1066
+ });
1067
+ }
1068
+ }
1069
+ }
1070
+ }
596
1071
  }
597
1072
  }
1073
+
1074
+ /**
1075
+ * True when every task tagged with the current phase is in a terminal state
1076
+ * (completed | failed | skipped) and at least one such task exists. Used by
1077
+ * runTaskLoop's auto-advance trigger.
1078
+ */
1079
+ _allCurrentPhaseTasksComplete() {
1080
+ if (!this.taskManager) return false;
1081
+ const phase = this.currentPhase;
1082
+ const phaseTasks = this.taskManager.getAllTasks().filter((t) => t.phase === phase);
1083
+ if (phaseTasks.length === 0) return false;
1084
+ return phaseTasks.every((t) => t.status === "completed" || t.status === "failed" || t.status === "skipped");
1085
+ }
598
1086
  }