kc-beta 0.3.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/package.json +1 -1
  2. package/src/agent/confidence-scorer.js +8 -0
  3. package/src/agent/context.js +25 -0
  4. package/src/agent/corner-case-registry.js +5 -0
  5. package/src/agent/engine.js +514 -75
  6. package/src/agent/event-log.js +15 -2
  7. package/src/agent/history.js +91 -23
  8. package/src/agent/pipelines/initializer.js +3 -6
  9. package/src/agent/retry.js +9 -1
  10. package/src/agent/scheduler.js +276 -0
  11. package/src/agent/session-state.js +11 -2
  12. package/src/agent/task-manager.js +5 -0
  13. package/src/agent/tools/agent-tool.js +57 -14
  14. package/src/agent/tools/archive-file.js +94 -0
  15. package/src/agent/tools/copy-to-workspace.js +140 -0
  16. package/src/agent/tools/phase-advance.js +60 -0
  17. package/src/agent/tools/release.js +322 -0
  18. package/src/agent/tools/schedule-fetch.js +118 -0
  19. package/src/agent/tools/snapshot.js +101 -0
  20. package/src/agent/tools/workspace-file.js +10 -7
  21. package/src/agent/version-manager.js +29 -120
  22. package/src/agent/workspace.js +127 -4
  23. package/src/cli/components.js +4 -1
  24. package/src/cli/index.js +57 -4
  25. package/src/config.js +10 -1
  26. package/template/release-runtime/README.md.tmpl +84 -0
  27. package/template/release-runtime/kc_runtime/__init__.py +2 -0
  28. package/template/release-runtime/kc_runtime/confidence.py +93 -0
  29. package/template/release-runtime/kc_runtime/dashboard.py +208 -0
  30. package/template/release-runtime/render_dashboard.py +49 -0
  31. package/template/release-runtime/run.py +230 -0
  32. package/template/release-runtime/serve.sh +15 -0
  33. package/template/skills/en/meta/entity-extraction/SKILL.md +6 -0
  34. package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  35. package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
  36. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +35 -0
  37. package/template/skills/en/meta-meta/rule-graph/SKILL.md +16 -0
  38. package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
  39. package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
  40. package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
  41. package/template/skills/zh/meta/entity-extraction/SKILL.md +6 -0
  42. package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
  43. package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
  44. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +35 -0
  45. package/template/skills/zh/meta-meta/rule-graph/SKILL.md +16 -0
  46. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
  47. package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
  48. package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
  49. package/template/workspace.gitignore +22 -0
@@ -10,6 +10,12 @@ import { ConfidenceScorer } from "./confidence-scorer.js";
10
10
  import { ToolRegistry } from "./tools/registry.js";
11
11
  import { SandboxExecTool } from "./tools/sandbox-exec.js";
12
12
  import { WorkspaceFileTool } from "./tools/workspace-file.js";
13
+ import { CopyToWorkspaceTool } from "./tools/copy-to-workspace.js";
14
+ import { SnapshotTool } from "./tools/snapshot.js";
15
+ import { ArchiveFileTool } from "./tools/archive-file.js";
16
+ import { ScheduleFetchTool } from "./tools/schedule-fetch.js";
17
+ import { ReleaseTool } from "./tools/release.js";
18
+ import { PhaseAdvanceTool } from "./tools/phase-advance.js";
13
19
  import { DocumentParseTool } from "./tools/document-parse.js";
14
20
  import { DocumentSearchTool } from "./tools/document-search.js";
15
21
  import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
@@ -23,6 +29,7 @@ import { AgentTool } from "./tools/agent-tool.js";
23
29
  import { WebSearchTool } from "./tools/web-search.js";
24
30
  import { SkillLoader } from "./skill-loader.js";
25
31
  import { TaskManager } from "./task-manager.js";
32
+ import { Scheduler } from "./scheduler.js";
26
33
  import { Phase } from "./pipelines/index.js";
27
34
  import { ProjectInitializer } from "./pipelines/initializer.js";
28
35
  import { RuleExtractionPipeline } from "./pipelines/extraction.js";
@@ -35,9 +42,23 @@ import { ContextWindow } from "./context-window.js";
35
42
  import { SessionState } from "./session-state.js";
36
43
  import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
37
44
 
45
+ // Default max output tokens for the conductor LLM. SOTA models (GLM-5,
46
+ // Claude Sonnet 4) handle this comfortably. Override via KC_MAX_TOKENS env
47
+ // or kc_max_tokens in the global config.
48
+ const DEFAULT_KC_MAX_TOKENS = 65536;
49
+
38
50
  // Phases where worker LLM tools are available (DISTILL mode)
39
51
  const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
40
52
 
53
+ // Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
54
+ const NEXT_PHASE = {
55
+ [Phase.BOOTSTRAP]: Phase.EXTRACTION,
56
+ [Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
57
+ [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
58
+ [Phase.SKILL_TESTING]: Phase.DISTILLATION,
59
+ [Phase.DISTILLATION]: Phase.PRODUCTION_QC,
60
+ };
61
+
41
62
  /**
42
63
  * The KC Agent conversation engine.
43
64
  *
@@ -52,40 +73,85 @@ export class AgentEngine {
52
73
  * @param {import('./llm-client.js').LLMClient} opts.client
53
74
  * @param {object} opts.config - Settings from loadSettings()
54
75
  * @param {string} [opts.sessionId]
76
+ * @param {string} [opts.subagentScope] - When set, persistence is isolated to
77
+ * sub_agents/<scope>/ inside the workspace. Used by `agent_tool` to spawn
78
+ * children that share workspace files but don't trash parent's history /
79
+ * tasks / session-state. (Bug 2)
80
+ * @param {string} [opts.initialPhase] - When set, the engine starts in this phase
81
+ * instead of BOOTSTRAP. Used by sub-agents to inherit parent's phase so they
82
+ * get the right tools registered. (Bug 2)
55
83
  */
56
- constructor({ client, config, sessionId }) {
84
+ constructor({ client, config, sessionId, subagentScope, initialPhase }) {
57
85
  this.client = client;
58
86
  this.config = config;
59
87
  this.context = new ContextAssembler();
88
+ this._isSubagent = !!subagentScope;
89
+ this._subagentScope = subagentScope || null;
60
90
 
61
91
  // Workspace + structural components
62
- this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId, config.projectDir);
63
- this.history = new ConversationHistory(this.workspace.cwd);
92
+ this.workspace = new Workspace(
93
+ config.kcWorkspaceRoot,
94
+ sessionId,
95
+ config.projectDir,
96
+ { gitAutoCommit: config.gitAutoCommit !== false },
97
+ );
98
+
99
+ // For sub-agents, persistence (history/events/state) lives under
100
+ // sub_agents/<scope>/ instead of the workspace root. Workspace files
101
+ // (rules/, rule_skills/, workflows/) stay shared.
102
+ let conversationDir, logDir, statePath;
103
+ if (this._isSubagent) {
104
+ // Defense-in-depth: even though agent_tool sanitizes task_id against
105
+ // VALID_TASK_ID, an attacker reaching engine construction through
106
+ // another path (e.g. future callers) must not escape the workspace.
107
+ const scopeRoot = path.resolve(this.workspace.cwd, "sub_agents", subagentScope);
108
+ const wsRoot = path.resolve(this.workspace.cwd);
109
+ if (scopeRoot !== wsRoot && !scopeRoot.startsWith(wsRoot + path.sep)) {
110
+ throw new Error(`sub-agent scope escapes workspace: ${subagentScope}`);
111
+ }
112
+ // Also reject the scopeRoot being the workspace root itself, since that
113
+ // would defeat isolation.
114
+ if (scopeRoot === wsRoot || scopeRoot === path.resolve(wsRoot, "sub_agents")) {
115
+ throw new Error(`sub-agent scope must be a unique subfolder, got: ${subagentScope}`);
116
+ }
117
+ fs.mkdirSync(scopeRoot, { recursive: true });
118
+ conversationDir = path.join(scopeRoot, "conversation");
119
+ logDir = path.join(scopeRoot, "logs");
120
+ statePath = path.join(scopeRoot, "session-state.json");
121
+ }
122
+
123
+ const initialPhaseValue = initialPhase || Phase.BOOTSTRAP;
124
+ this.workspace.setPhase(initialPhaseValue);
125
+ this.history = new ConversationHistory(this.workspace.cwd, {
126
+ conversationDir,
127
+ maxMessageTokens: this.config.maxMessageTokens,
128
+ });
64
129
  this.versionManager = new VersionManager(this.workspace.cwd);
65
130
  this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
66
131
  this.confidence = new ConfidenceScorer(this.workspace.cwd, this.cornerCases);
67
132
 
68
133
  // Event log (append-only JSONL, source of truth)
69
- this.eventLog = new EventLog(this.workspace.cwd);
134
+ this.eventLog = new EventLog(this.workspace.cwd, { logDir });
70
135
 
71
136
  // Context windowing
72
137
  this.contextWindow = new ContextWindow({
73
138
  contextLimit: config.kcContextLimit || 200000,
74
- reserveForResponse: config.kcMaxTokens || 65536,
139
+ reserveForResponse: config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS,
75
140
  });
76
141
 
77
142
  // Session state persistence
78
- this.sessionState = new SessionState(this.workspace.cwd);
143
+ this.sessionState = new SessionState(this.workspace.cwd, { statePath });
79
144
 
80
- // Task manager (ralph-loop)
81
- this.taskManager = new TaskManager(this.workspace.cwd);
145
+ // Task manager (ralph-loop) — sub-agents don't queue further sub-tasks,
146
+ // so they don't get a TaskManager.
147
+ this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
82
148
 
83
149
  // Build all tool instances (but register phase-appropriate ones)
84
150
  this._buildTools = this._createAllTools();
85
151
  this._phaseSummaries = [];
86
152
 
87
153
  // Pipeline system (meta-meta skills as code)
88
- this.currentPhase = Phase.BOOTSTRAP;
154
+ this.currentPhase = initialPhaseValue;
89
155
  this.pipelines = {
90
156
  [Phase.BOOTSTRAP]: new ProjectInitializer(this.workspace),
91
157
  [Phase.EXTRACTION]: new RuleExtractionPipeline(this.workspace),
@@ -101,6 +167,19 @@ export class AgentEngine {
101
167
  // Register tools for initial phase
102
168
  this.toolRegistry = new ToolRegistry();
103
169
  this._registerToolsForPhase(this.currentPhase);
170
+
171
+ // Edge-trigger state for _maybeAutoAdvance (Bug 5). Primed at construction
172
+ // (and at resume) so a session that's already exit-criteria-met when it
173
+ // boots doesn't auto-advance on the first user turn — only on a fresh
174
+ // false→true flip.
175
+ this._lastReady = {};
176
+ for (const phase of Object.keys(this.pipelines)) {
177
+ try {
178
+ this._lastReady[phase] = !!this.pipelines[phase].exitCriteriaMet?.();
179
+ } catch {
180
+ this._lastReady[phase] = false;
181
+ }
182
+ }
104
183
  }
105
184
 
106
185
  /**
@@ -127,6 +206,14 @@ export class AgentEngine {
127
206
  core: [
128
207
  new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
129
208
  new WorkspaceFileTool(this.workspace, this.versionManager),
209
+ new CopyToWorkspaceTool(this.workspace, {
210
+ largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
211
+ }),
212
+ new SnapshotTool(this.workspace),
213
+ new ArchiveFileTool(this.workspace),
214
+ new ScheduleFetchTool(this.workspace),
215
+ new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
216
+ new PhaseAdvanceTool((to, reason, opts) => this._advancePhase(to, reason, opts)),
130
217
  new DocumentParseTool(this.workspace, {
131
218
  mineruApiUrl: this.config.mineruApiUrl,
132
219
  mineruApiKey: this.config.mineruApiKey,
@@ -138,9 +225,14 @@ export class AgentEngine {
138
225
  new RuleCatalogTool(this.workspace),
139
226
  new EvolutionCycleTool(this.workspace, this.cornerCases),
140
227
  new DashboardRenderTool(this.workspace),
141
- new AgentTool(this.workspace, (sid) => new AgentEngine({
142
- client: this.client, config: this.config, sessionId: sid,
143
- })),
228
+ new AgentTool(
229
+ this.workspace,
230
+ ({ sessionId, subagentScope, initialPhase }) => new AgentEngine({
231
+ client: this.client, config: this.config,
232
+ sessionId, subagentScope, initialPhase,
233
+ }),
234
+ () => this.currentPhase,
235
+ ),
144
236
  new WebSearchTool(this.config.tavilyApiKey),
145
237
  ],
146
238
  // Distillation+ only (DISTILL mode)
@@ -204,9 +296,11 @@ export class AgentEngine {
204
296
  );
205
297
  }
206
298
 
207
- // Task progress (ralph-loop)
208
- const taskContext = this.taskManager.describeForContext();
209
- if (taskContext) lines.push("", taskContext);
299
+ // Task progress (ralph-loop) — skipped for sub-agents (no taskManager)
300
+ if (this.taskManager) {
301
+ const taskContext = this.taskManager.describeForContext();
302
+ if (taskContext) lines.push("", taskContext);
303
+ }
210
304
 
211
305
  return lines.join("\n");
212
306
  }
@@ -233,9 +327,85 @@ export class AgentEngine {
233
327
  };
234
328
  }
235
329
 
330
+ /**
331
+ * Pre-flight hard ceiling (Bug 1). After windowing, if the message
332
+ * array's total token count still exceeds the model's input budget,
333
+ * drop oldest user-bounded blocks until under budget.
334
+ *
335
+ * Drops in BLOCK units — a block is `user(N) + everything until the
336
+ * next user`. This guarantees the head after a drop is always either a
337
+ * user message or empty, satisfying Anthropic's "first message must use
338
+ * the user role" requirement and OpenAI's tool-call adjacency rules.
339
+ *
340
+ * Treats the compaction summary pair (user with `[Previous conversation
341
+ * summary]` or `[Context Summary` marker, followed by assistant ack) as
342
+ * sticky — it represents prior LLM-summarized work and should outlive
343
+ * any normal turn.
344
+ */
345
+ _enforceTokenBudget(messages) {
346
+ const limit = this.config.kcContextLimit || 200000;
347
+ const reserve = this.config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS;
348
+ const budget = limit - reserve;
349
+ let totalTokens = estimateMessagesTokens(messages);
350
+ if (totalTokens <= budget) return messages;
351
+
352
+ // Sticky region: system + (optional summary user + ack assistant)
353
+ let stickyEnd = messages[0]?.role === "system" ? 1 : 0;
354
+ const sumMarkers = ["[Previous conversation summary]", "[Context Summary"];
355
+ const hasSummaryAt = (i) =>
356
+ messages[i]?.role === "user" &&
357
+ typeof messages[i].content === "string" &&
358
+ sumMarkers.some((m) => messages[i].content.startsWith(m));
359
+ if (hasSummaryAt(stickyEnd)) {
360
+ stickyEnd++;
361
+ if (messages[stickyEnd]?.role === "assistant") stickyEnd++;
362
+ }
363
+
364
+ let droppedCount = 0;
365
+ let droppedTokens = 0;
366
+
367
+ // Drop user-bounded blocks. A block starts at messages[stickyEnd]
368
+ // (expected to be a user message in normal flow) and runs up to (not
369
+ // including) the next user message — or to the end of array.
370
+ while (totalTokens > budget && messages.length > stickyEnd) {
371
+ const blockStart = stickyEnd;
372
+ let blockEnd = blockStart + 1;
373
+ while (blockEnd < messages.length && messages[blockEnd].role !== "user") blockEnd++;
374
+ // If this block goes to end-of-array, there's no following user to anchor
375
+ // the head — dropping it would leave just [system, (summary)?]. Stop and
376
+ // let the LLM call attempt; the API will surface a clear error if even
377
+ // sticky alone is over budget.
378
+ if (blockEnd === messages.length) break;
379
+ const removed = messages.splice(blockStart, blockEnd - blockStart);
380
+ droppedCount += removed.length;
381
+ droppedTokens += removed.reduce((a, m) => a + estimateTokens(JSON.stringify(m)), 0);
382
+ totalTokens = estimateMessagesTokens(messages);
383
+ }
384
+
385
+ // Defensive postcondition: head after sticky must be a user message or
386
+ // the array must end at sticky. Block-drop should make this trivially true,
387
+ // but if the input was malformed (e.g., already started with a non-user),
388
+ // clean up here so we never send an Anthropic-invalid sequence.
389
+ while (messages.length > stickyEnd && messages[stickyEnd].role !== "user") {
390
+ messages.splice(stickyEnd, 1);
391
+ droppedCount++;
392
+ }
393
+
394
+ if (droppedCount > 0) {
395
+ this.eventLog.append("context_truncated", {
396
+ droppedCount,
397
+ droppedTokens,
398
+ finalTokens: totalTokens,
399
+ budget,
400
+ });
401
+ }
402
+ return messages;
403
+ }
404
+
236
405
  /**
237
406
  * Compact conversation history by summarizing older messages via LLM.
238
- * Keeps the most recent messages intact.
407
+ * Keeps the most recent messages intact. (Bug 1: now chunked — never sends
408
+ * a single oversized prompt to the summarizer LLM.)
239
409
  * @param {object} [opts]
240
410
  * @param {number} [opts.recentCount=20] - Number of recent messages to keep
241
411
  * @returns {Promise<{removedCount: number, retainedCount: number, summaryTokens: number}|null>}
@@ -246,46 +416,20 @@ export class AgentEngine {
246
416
  const olderMessages = this.history.messages.slice(0, -recentCount);
247
417
  const recentMessages = this.history.messages.slice(-recentCount);
248
418
 
249
- let summary;
250
- try {
251
- const summaryResp = await this.client.chat({
252
- model: this.config.kcModel,
253
- messages: [
254
- {
255
- role: "system",
256
- content:
257
- "You are a conversation summarizer. Produce a concise summary of the following conversation. " +
258
- "Focus on: decisions made, files created or modified, current state of work, key findings, " +
259
- "unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 2000 tokens.",
260
- },
261
- {
262
- role: "user",
263
- content: `Summarize this conversation:\n\n${JSON.stringify(olderMessages)}`,
264
- },
265
- ],
266
- maxTokens: 2048,
267
- });
268
- summary = summaryResp.choices?.[0]?.message?.content || null;
269
- } catch {
270
- // LLM summary failed — do mechanical fallback
271
- summary = null;
272
- }
419
+ const CHUNK_BUDGET = 30000; // tokens per summarization request
420
+ const chunks = this._chunkMessages(olderMessages, CHUNK_BUDGET);
273
421
 
274
- if (!summary) {
275
- // Mechanical fallback: extract tool names and outcomes
276
- const lines = ["Previous conversation summary (mechanical):"];
277
- for (const msg of olderMessages) {
278
- if (msg.role === "user") {
279
- lines.push(`- User: ${(msg.content || "").slice(0, 100)}`);
280
- } else if (msg.role === "assistant" && msg.tool_calls) {
281
- for (const tc of msg.tool_calls) {
282
- lines.push(`- Tool call: ${tc.function?.name}`);
283
- }
284
- }
285
- }
286
- summary = lines.join("\n");
422
+ const partials = [];
423
+ for (let i = 0; i < chunks.length; i++) {
424
+ const chunk = chunks[i];
425
+ const partial = await this._summarizeChunk(chunk, i, chunks.length);
426
+ partials.push(partial);
287
427
  }
288
428
 
429
+ const summary = partials.length === 1
430
+ ? partials[0]
431
+ : "## Compacted history (multi-part)\n\n" + partials.map((p, i) => `### Part ${i + 1}\n${p}`).join("\n\n");
432
+
289
433
  // Replace history
290
434
  this.history._messages = [
291
435
  { role: "user", content: `[Previous conversation summary]\n${summary}` },
@@ -298,6 +442,7 @@ export class AgentEngine {
298
442
  this.eventLog.append("compact", {
299
443
  removedCount: olderMessages.length,
300
444
  retainedCount: recentMessages.length,
445
+ chunkCount: chunks.length,
301
446
  summary,
302
447
  });
303
448
 
@@ -308,6 +453,81 @@ export class AgentEngine {
308
453
  };
309
454
  }
310
455
 
456
+ /**
457
+ * Split a flat message list into chunks where each chunk's serialized JSON
458
+ * fits within tokenBudget. Chunks are turn-aligned where possible (a single
459
+ * user→assistant→tool sequence won't be split mid-turn unless that single
460
+ * turn alone exceeds the budget; in that case it gets its own oversized
461
+ * chunk and the LLM call may fail → mechanical fallback fires).
462
+ */
463
+ _chunkMessages(messages, tokenBudget) {
464
+ const chunks = [];
465
+ let current = [];
466
+ let currentTokens = 0;
467
+ for (const msg of messages) {
468
+ const mTokens = estimateTokens(JSON.stringify(msg));
469
+ if (current.length > 0 && currentTokens + mTokens > tokenBudget) {
470
+ chunks.push(current);
471
+ current = [];
472
+ currentTokens = 0;
473
+ }
474
+ current.push(msg);
475
+ currentTokens += mTokens;
476
+ }
477
+ if (current.length > 0) chunks.push(current);
478
+ return chunks;
479
+ }
480
+
481
+ /**
482
+ * Summarize one chunk via the conductor LLM. On failure (incl. context-length
483
+ * errors that the chunked split should usually prevent), fall back to a
484
+ * mechanical summary so we always produce *something*.
485
+ */
486
+ async _summarizeChunk(chunk, idx, total) {
487
+ const partLabel = total > 1 ? ` (part ${idx + 1}/${total})` : "";
488
+ try {
489
+ const resp = await this.client.chat({
490
+ model: this.config.kcModel,
491
+ messages: [
492
+ {
493
+ role: "system",
494
+ content:
495
+ "You are a conversation summarizer. Produce a concise summary of the following conversation excerpt. " +
496
+ "Focus on: decisions made, files created or modified, current state of work, key findings, " +
497
+ "unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 1500 tokens.",
498
+ },
499
+ {
500
+ role: "user",
501
+ content: `Summarize this conversation excerpt${partLabel}:\n\n${JSON.stringify(chunk)}`,
502
+ },
503
+ ],
504
+ maxTokens: 1800,
505
+ });
506
+ const text = resp.choices?.[0]?.message?.content;
507
+ if (text) return text;
508
+ } catch {
509
+ // fall through to mechanical
510
+ }
511
+ return this._mechanicalSummary(chunk, partLabel);
512
+ }
513
+
514
+ _mechanicalSummary(chunk, partLabel) {
515
+ const lines = [`Mechanical summary${partLabel}:`];
516
+ for (const msg of chunk) {
517
+ if (msg.role === "user" && typeof msg.content === "string") {
518
+ lines.push(`- User: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
519
+ } else if (msg.role === "assistant") {
520
+ if (typeof msg.content === "string" && msg.content) {
521
+ lines.push(`- Assistant: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
522
+ }
523
+ for (const tc of msg.tool_calls || []) {
524
+ lines.push(`- Tool call: ${tc.function?.name || "?"}`);
525
+ }
526
+ }
527
+ }
528
+ return lines.join("\n");
529
+ }
530
+
311
531
  /**
312
532
  * Restore an engine from a persisted session.
313
533
  * @param {object} opts
@@ -325,6 +545,7 @@ export class AgentEngine {
325
545
  engine.currentPhase = data.currentPhase || Phase.BOOTSTRAP;
326
546
  engine._phaseSummaries = data.phaseSummaries || [];
327
547
  engine._registerToolsForPhase(engine.currentPhase);
548
+ engine.workspace.setPhase(engine.currentPhase);
328
549
 
329
550
  // Restore project directory from saved state
330
551
  if (data.projectDir) {
@@ -342,6 +563,17 @@ export class AgentEngine {
342
563
  }
343
564
  }
344
565
 
566
+ // Re-prime _lastReady AFTER importState so it reflects the restored
567
+ // pipeline milestones, not the empty defaults from constructor.
568
+ // (Bug 5 fix — without this, resume reignites auto-advance.)
569
+ for (const phase of Object.keys(engine.pipelines)) {
570
+ try {
571
+ engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
572
+ } catch {
573
+ engine._lastReady[phase] = false;
574
+ }
575
+ }
576
+
345
577
  engine.eventLog.append("session_resume", {
346
578
  resumedPhase: engine.currentPhase,
347
579
  resumedFromSeq: data.lastEventSeq,
@@ -358,6 +590,56 @@ export class AgentEngine {
358
590
  this.sessionState.save(this);
359
591
  }
360
592
 
593
+ /**
594
+ * Rename the workspace folder and cascade the new path to every persistence
595
+ * subsystem that captured `workspace.cwd` at construction time (Bug 3).
596
+ * Without this cascade, subsystems keep writing to the OLD path even
597
+ * though the directory has moved on disk — the user sees the renamed dir
598
+ * "die" while the old dir keeps growing.
599
+ *
600
+ * Also regenerates Block 9 cron wrapper scripts which bake in absolute
601
+ * paths to the workspace. Returns information for the TUI to surface
602
+ * (incl. whether the user needs to re-install crontab lines).
603
+ *
604
+ * @param {string} newName
605
+ * @returns {{ sessionId: string, oldCwd: string, newCwd: string,
606
+ * scheduleWrappersRegenerated: string[],
607
+ * scheduleWrappersSkipped: string[] }}
608
+ */
609
+ renameSession(newName) {
610
+ const r = this.workspace.rename(newName);
611
+ if (r.changed) {
612
+ // Cascade to every subsystem that captured workspace.cwd
613
+ this.history._setWorkspacePath?.(r.newCwd);
614
+ this.eventLog._setWorkspacePath?.(r.newCwd);
615
+ this.sessionState._setWorkspacePath?.(r.newCwd);
616
+ this.taskManager?._setWorkspacePath?.(r.newCwd);
617
+ this.confidence._setWorkspacePath?.(r.newCwd);
618
+ this.cornerCases._setWorkspacePath?.(r.newCwd);
619
+ }
620
+
621
+ // Regenerate cron wrapper scripts — they bake absolute paths to WORKSPACE,
622
+ // INPUT_DIR, LOG_FILE, so rename invalidates them. The Scheduler is
623
+ // workspace-bound (created on demand inside the schedule_fetch tool), so
624
+ // construct a fresh one against the renamed workspace.
625
+ let scheduleResult = { regenerated: [], disabled: [], failed: [] };
626
+ try {
627
+ const sched = new Scheduler(this.workspace);
628
+ scheduleResult = sched.regenerateAllWrappers();
629
+ } catch {
630
+ // Best effort — never let scheduler issues block the rename
631
+ }
632
+
633
+ return {
634
+ sessionId: r.sessionId,
635
+ oldCwd: r.oldCwd,
636
+ newCwd: r.newCwd,
637
+ scheduleWrappersRegenerated: scheduleResult.regenerated,
638
+ scheduleWrappersDisabled: scheduleResult.disabled,
639
+ scheduleWrappersFailed: scheduleResult.failed,
640
+ };
641
+ }
642
+
361
643
  /**
362
644
  * Run one conversation turn. Yields AgentEvent objects.
363
645
  * Loops: LLM call -> tool execution -> LLM call ... until no tool calls.
@@ -383,7 +665,7 @@ export class AgentEngine {
383
665
  while (true) {
384
666
  // Apply context windowing before sending to LLM
385
667
  const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
386
- const messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
668
+ let messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
387
669
 
388
670
  if (windowed.wasWindowed) {
389
671
  this.eventLog.append("context_windowed", {
@@ -392,6 +674,12 @@ export class AgentEngine {
392
674
  });
393
675
  }
394
676
 
677
+ // Pre-flight hard ceiling (Bug 1 P0). Even after windowing, if the
678
+ // request still exceeds the model's input budget (e.g., recent messages
679
+ // alone are too big), drop the oldest non-system messages until under
680
+ // budget. Better to lose some history than crash with HTTP 400.
681
+ messages = this._enforceTokenBudget(messages);
682
+
395
683
  this.eventLog.append("llm_start", {
396
684
  model: this.config.kcModel,
397
685
  messageCount: messages.length,
@@ -448,6 +736,12 @@ export class AgentEngine {
448
736
  });
449
737
 
450
738
  if (toolCallsAcc.size === 0) {
739
+ // Bug 4 trigger (1): re-check phase criteria at end of every turn —
740
+ // KC may have advanced state via conversation alone, without any
741
+ // tool that the pipeline narrowly watches.
742
+ const advancedEv = this._maybeAutoAdvance();
743
+ if (advancedEv) yield advancedEv;
744
+
451
745
  this.eventLog.append("turn_complete", {});
452
746
  this.saveState();
453
747
  yield new AgentEvent({ type: "turn_complete" });
@@ -466,22 +760,29 @@ export class AgentEngine {
466
760
 
467
761
  const result = await this.toolRegistry.execute(tc.name, inputData);
468
762
 
763
+ // Tool-call offloading: large outputs go to logs/tool_results/<traceId>.txt;
764
+ // history holds head + tail with a pointer. Event log keeps the full output
765
+ // (it's append-only and the source of truth).
766
+ const offload = this._maybeOffload(tc.name, result);
767
+ const historyContent = offload ? offload.digest : (result.content || "");
768
+
469
769
  this.eventLog.append("tool_result", {
470
770
  name: tc.name,
471
- output: result.content?.slice(0, 5000) || "",
771
+ output: result.content || "",
472
772
  isError: result.isError,
773
+ traceId: offload?.traceId || null,
473
774
  });
474
775
  yield new AgentEvent({
475
776
  type: "tool_result",
476
777
  name: tc.name,
477
- output: result.content,
778
+ output: historyContent,
478
779
  isError: result.isError,
479
780
  });
480
781
 
481
782
  this.history.addRaw({
482
783
  role: "tool",
483
784
  tool_call_id: tc.id,
484
- content: result.content,
785
+ content: historyContent,
485
786
  });
486
787
 
487
788
  // Pipeline controller: update state and re-register tools on phase change
@@ -489,29 +790,20 @@ export class AgentEngine {
489
790
  const pEvent = pipeline.onToolResult(tc.name, inputData, result);
490
791
  if (pEvent) {
491
792
  if (pEvent.type === "phase_ready" && pEvent.nextPhase) {
492
- const phaseSummary = `[${this.currentPhase.toUpperCase()} completed]: ${pEvent.message || ""}`;
493
- this._phaseSummaries.push(phaseSummary);
494
- this.eventLog.append("phase_transition", {
495
- from: this.currentPhase,
496
- to: pEvent.nextPhase,
497
- summary: phaseSummary,
498
- });
499
- this.currentPhase = pEvent.nextPhase;
500
- this._registerToolsForPhase(this.currentPhase);
501
-
502
- // Ralph-loop: create per-rule tasks for the new phase
503
- this._createTasksForPhase(this.currentPhase);
504
-
505
- this.saveState();
793
+ this._advancePhase(pEvent.nextPhase, pEvent.message || "exit criteria met");
506
794
  }
507
- yield new AgentEvent({
508
- type: "pipeline_event",
509
- data: pEvent,
510
- });
795
+ yield new AgentEvent({ type: "pipeline_event", data: pEvent });
511
796
  }
512
797
  }
513
798
  }
514
799
 
800
+ // Bug 4 fix: re-check exit criteria after every tool-result loop, not
801
+ // just from pipeline.onToolResult. The pipeline's describeState() (called
802
+ // on every turn) already re-scans, so exitCriteriaMet() is accurate; we
803
+ // just need to act on it eagerly.
804
+ const ev = this._maybeAutoAdvance();
805
+ if (ev) yield ev;
806
+
515
807
  } catch (err) {
516
808
  this.eventLog.append("error", { message: err.message });
517
809
  yield new AgentEvent({ type: "error", message: err.message });
@@ -520,11 +812,117 @@ export class AgentEngine {
520
812
  }
521
813
  }
522
814
 
815
+ /**
816
+ * Centralized phase transition (Bug 4). All three triggers route through here:
817
+ * (1) pipeline.onToolResult returning phase_ready
818
+ * (2) post-turn auto-check via _maybeAutoAdvance
819
+ * (3) explicit user request via the phase_advance tool
820
+ *
821
+ * Reachability: by default only forward-by-one transitions per NEXT_PHASE.
822
+ * Set `force: true` to allow non-adjacent or backward transitions (e.g. user
823
+ * explicitly requests a regression for testing). The refusal is logged.
824
+ *
825
+ * Idempotent — calling with the current phase is a no-op.
826
+ */
827
+ _advancePhase(nextPhase, reason = "", { force = false } = {}) {
828
+ if (!nextPhase || nextPhase === this.currentPhase) return false;
829
+
830
+ const expected = NEXT_PHASE[this.currentPhase];
831
+ if (!force && nextPhase !== expected) {
832
+ this.eventLog.append("phase_advance_refused", {
833
+ from: this.currentPhase, to: nextPhase, reason,
834
+ hint: expected ? `expected next phase is '${expected}' — pass force:true to override`
835
+ : `${this.currentPhase} is the terminal phase`,
836
+ });
837
+ return false;
838
+ }
839
+
840
+ const phaseSummary = `[${this.currentPhase.toUpperCase()} → ${nextPhase.toUpperCase()}]: ${reason}${force && nextPhase !== expected ? " (forced)" : ""}`;
841
+ this._phaseSummaries.push(phaseSummary);
842
+ this.eventLog.append("phase_transition", {
843
+ from: this.currentPhase,
844
+ to: nextPhase,
845
+ reason,
846
+ forced: force && nextPhase !== expected,
847
+ });
848
+ this.currentPhase = nextPhase;
849
+ this._registerToolsForPhase(this.currentPhase);
850
+ this.workspace.setPhase(this.currentPhase);
851
+ this._createTasksForPhase(this.currentPhase);
852
+ this.saveState();
853
+ return true;
854
+ }
855
+
856
+ /**
857
+ * Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
858
+ * fresh false → true flip in `exitCriteriaMet()`. Sessions resumed in an
859
+ * already-met state do nothing; users iterating in a phase whose criteria
860
+ * have been met for a while do nothing. Real new evidence is required.
861
+ */
862
+ _maybeAutoAdvance() {
863
+ const phase = this.currentPhase;
864
+ const pipeline = this.pipelines[phase];
865
+ let nowReady = false;
866
+ try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
867
+
868
+ if (!nowReady) {
869
+ this._lastReady[phase] = false;
870
+ return null;
871
+ }
872
+ // Edge-trigger: nowReady && !wasReady
873
+ if (this._lastReady[phase]) return null;
874
+ this._lastReady[phase] = true;
875
+
876
+ const next = NEXT_PHASE[phase];
877
+ if (!next) return null;
878
+ const advanced = this._advancePhase(next, "exit criteria flipped to met");
879
+ if (!advanced) return null;
880
+ return new AgentEvent({
881
+ type: "pipeline_event",
882
+ data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
883
+ });
884
+ }
885
+
886
+ /**
887
+ * Tool-call offloading. If the tool's content exceeds the threshold,
888
+ * write the full content to logs/tool_results/<traceId>.txt and return a
889
+ * digest (head + tail) with a pointer. Otherwise return null (caller uses
890
+ * full content).
891
+ */
892
+ _maybeOffload(toolName, result) {
893
+ const content = result.content || "";
894
+ if (!content) return null;
895
+ const threshold = result.isError
896
+ ? (this.config.toolOutputOffloadErrorTokens ?? 500)
897
+ : (this.config.toolOutputOffloadTokens ?? 2000);
898
+ const tokens = estimateTokens(content);
899
+ if (tokens <= threshold) return null;
900
+
901
+ const safeToolName = String(toolName || "tool").replace(/[^A-Za-z0-9_-]/g, "_");
902
+ const traceId = this.versionManager.generateTraceId(safeToolName, "result");
903
+ const offloadDir = path.join(this.workspace.cwd, "logs", "tool_results");
904
+ try {
905
+ fs.mkdirSync(offloadDir, { recursive: true });
906
+ fs.writeFileSync(path.join(offloadDir, `${traceId}.txt`), content, "utf-8");
907
+ } catch {
908
+ // If we can't write the offload file, fall back to keeping full content in context.
909
+ return null;
910
+ }
911
+
912
+ const HEAD = 800, TAIL = 800;
913
+ const truncatedNote = `\n\n[…truncated, ${tokens} tokens; full at logs/tool_results/${traceId}.txt — read with workspace_file if needed…]\n\n`;
914
+ const digest = content.length > HEAD + TAIL
915
+ ? content.slice(0, HEAD) + truncatedNote + content.slice(-TAIL)
916
+ : content + truncatedNote;
917
+ return { traceId, digest };
918
+ }
919
+
523
920
  /**
524
921
  * Create per-rule tasks when entering a new phase.
525
922
  * Reads the rule catalog and creates one task per rule for the given phase.
526
923
  */
527
924
  _createTasksForPhase(phase) {
925
+ if (!this.taskManager) return; // Sub-agents don't manage tasks
528
926
  const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
529
927
  if (!fs.existsSync(catalogPath)) return;
530
928
 
@@ -546,6 +944,12 @@ export class AgentEngine {
546
944
  * @yields {AgentEvent}
547
945
  */
548
946
  async *runTaskLoop(userMessage) {
947
+ // Sub-agents don't run task loops — they execute one task and exit
948
+ if (!this.taskManager) {
949
+ yield* this.runTurn(userMessage);
950
+ return;
951
+ }
952
+
549
953
  // Run the initial turn (user's request)
550
954
  yield* this.runTurn(userMessage);
551
955
 
@@ -593,6 +997,41 @@ export class AgentEngine {
593
997
  progress: this.taskManager.progress,
594
998
  },
595
999
  });
1000
+
1001
+ // Bug 4 trigger (2): auto-advance when all phase tasks are done AND
1002
+ // the pipeline's exit criteria are also met (Bug 5 fix — task state
1003
+ // alone is a ralph-loop convenience, not authoritative phase signal;
1004
+ // tasks could be marked skipped manually or by an editor).
1005
+ if (this._allCurrentPhaseTasksComplete()) {
1006
+ const pipeline = this.pipelines[this.currentPhase];
1007
+ let exitMet = false;
1008
+ try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
1009
+ if (exitMet) {
1010
+ const next = NEXT_PHASE[this.currentPhase];
1011
+ if (next) {
1012
+ const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
1013
+ if (advanced) {
1014
+ yield new AgentEvent({
1015
+ type: "pipeline_event",
1016
+ data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
1017
+ });
1018
+ }
1019
+ }
1020
+ }
1021
+ }
596
1022
  }
597
1023
  }
1024
+
1025
+ /**
1026
+ * True when every task tagged with the current phase is in a terminal state
1027
+ * (completed | failed | skipped) and at least one such task exists. Used by
1028
+ * runTaskLoop's auto-advance trigger.
1029
+ */
1030
+ _allCurrentPhaseTasksComplete() {
1031
+ if (!this.taskManager) return false;
1032
+ const phase = this.currentPhase;
1033
+ const phaseTasks = this.taskManager.getAllTasks().filter((t) => t.phase === phase);
1034
+ if (phaseTasks.length === 0) return false;
1035
+ return phaseTasks.every((t) => t.status === "completed" || t.status === "failed" || t.status === "skipped");
1036
+ }
598
1037
  }