npm - kc-beta - Versions diffs - 0.3.2 → 0.5.4 - Mend

kc-beta 0.3.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/package.json +1 -1
package/src/agent/confidence-scorer.js +8 -0
package/src/agent/context-window.js +7 -2
package/src/agent/context.js +25 -0
package/src/agent/corner-case-registry.js +5 -0
package/src/agent/engine.js +564 -76
package/src/agent/event-log.js +15 -2
package/src/agent/history.js +91 -23
package/src/agent/pipelines/initializer.js +3 -6
package/src/agent/retry.js +9 -1
package/src/agent/rule-catalog-normalize.js +37 -0
package/src/agent/scheduler.js +276 -0
package/src/agent/session-state.js +11 -2
package/src/agent/task-manager.js +5 -0
package/src/agent/tools/agent-tool.js +57 -14
package/src/agent/tools/archive-file.js +94 -0
package/src/agent/tools/copy-to-workspace.js +140 -0
package/src/agent/tools/phase-advance.js +60 -0
package/src/agent/tools/release.js +323 -0
package/src/agent/tools/rule-catalog.js +56 -4
package/src/agent/tools/schedule-fetch.js +118 -0
package/src/agent/tools/snapshot.js +101 -0
package/src/agent/tools/workspace-file.js +10 -7
package/src/agent/version-manager.js +29 -120
package/src/agent/workspace.js +127 -4
package/src/cli/components.js +68 -12
package/src/cli/index.js +147 -15
package/src/config.js +10 -1
package/src/model-tiers.json +5 -5
package/template/release-runtime/README.md.tmpl +84 -0
package/template/release-runtime/kc_runtime/__init__.py +2 -0
package/template/release-runtime/kc_runtime/confidence.py +93 -0
package/template/release-runtime/kc_runtime/dashboard.py +208 -0
package/template/release-runtime/render_dashboard.py +49 -0
package/template/release-runtime/run.py +230 -0
package/template/release-runtime/serve.sh +15 -0
package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +11 -0
package/template/skills/en/meta-meta/quality-control/SKILL.md +13 -1
package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +8 -0
package/template/skills/en/meta-meta/task-decomposition/SKILL.md +13 -0
package/template/skills/en/meta-meta/version-control/SKILL.md +13 -0
package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +11 -0
package/template/skills/zh/meta-meta/quality-control/SKILL.md +12 -0
package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +8 -0
package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +16 -0
package/template/skills/zh/meta-meta/version-control/SKILL.md +13 -0
package/template/workspace.gitignore +22 -0

package/src/agent/engine.js CHANGED Viewed

@@ -4,12 +4,19 @@ import { AgentEvent } from "./events.js";
 import { ContextAssembler } from "./context.js";
 import { ConversationHistory } from "./history.js";
 import { Workspace } from "./workspace.js";
+import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
 import { VersionManager } from "./version-manager.js";
 import { CornerCaseRegistry } from "./corner-case-registry.js";
 import { ConfidenceScorer } from "./confidence-scorer.js";
 import { ToolRegistry } from "./tools/registry.js";
 import { SandboxExecTool } from "./tools/sandbox-exec.js";
 import { WorkspaceFileTool } from "./tools/workspace-file.js";
+import { CopyToWorkspaceTool } from "./tools/copy-to-workspace.js";
+import { SnapshotTool } from "./tools/snapshot.js";
+import { ArchiveFileTool } from "./tools/archive-file.js";
+import { ScheduleFetchTool } from "./tools/schedule-fetch.js";
+import { ReleaseTool } from "./tools/release.js";
+import { PhaseAdvanceTool } from "./tools/phase-advance.js";
 import { DocumentParseTool } from "./tools/document-parse.js";
 import { DocumentSearchTool } from "./tools/document-search.js";
 import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
@@ -23,6 +30,7 @@ import { AgentTool } from "./tools/agent-tool.js";
 import { WebSearchTool } from "./tools/web-search.js";
 import { SkillLoader } from "./skill-loader.js";
 import { TaskManager } from "./task-manager.js";
+import { Scheduler } from "./scheduler.js";
 import { Phase } from "./pipelines/index.js";
 import { ProjectInitializer } from "./pipelines/initializer.js";
 import { RuleExtractionPipeline } from "./pipelines/extraction.js";
@@ -35,9 +43,25 @@ import { ContextWindow } from "./context-window.js";
 import { SessionState } from "./session-state.js";
 import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
+// Default max output tokens for the conductor LLM. SOTA models (GLM-5,
+// Claude Sonnet 4) handle this comfortably. Override via KC_MAX_TOKENS env
+// or kc_max_tokens in the global config.
+const DEFAULT_KC_MAX_TOKENS = 65536;
 // Phases where worker LLM tools are available (DISTILL mode)
 const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
+// Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
+// Exported so the TUI's /phase slash command (src/cli/index.js) can call
+// _advancePhase with the right successor without re-declaring the map.
+export const NEXT_PHASE = {
+  [Phase.BOOTSTRAP]: Phase.EXTRACTION,
+  [Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
+  [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
+  [Phase.SKILL_TESTING]: Phase.DISTILLATION,
+  [Phase.DISTILLATION]: Phase.PRODUCTION_QC,
+};
 /**
  * The KC Agent conversation engine.
  *
@@ -52,40 +76,85 @@ export class AgentEngine {
    * @param {import('./llm-client.js').LLMClient} opts.client
    * @param {object} opts.config - Settings from loadSettings()
    * @param {string} [opts.sessionId]
+   * @param {string} [opts.subagentScope] - When set, persistence is isolated to
+   *   sub_agents/<scope>/ inside the workspace. Used by `agent_tool` to spawn
+   *   children that share workspace files but don't trash parent's history /
+   *   tasks / session-state. (Bug 2)
+   * @param {string} [opts.initialPhase] - When set, the engine starts in this phase
+   *   instead of BOOTSTRAP. Used by sub-agents to inherit parent's phase so they
+   *   get the right tools registered. (Bug 2)
    */
-  constructor({ client, config, sessionId }) {
+  constructor({ client, config, sessionId, subagentScope, initialPhase }) {
     this.client = client;
     this.config = config;
     this.context = new ContextAssembler();
+    this._isSubagent = !!subagentScope;
+    this._subagentScope = subagentScope || null;
     // Workspace + structural components
-    this.workspace = new Workspace(config.kcWorkspaceRoot, sessionId, config.projectDir);
-    this.history = new ConversationHistory(this.workspace.cwd);
+    this.workspace = new Workspace(
+      config.kcWorkspaceRoot,
+      sessionId,
+      config.projectDir,
+      { gitAutoCommit: config.gitAutoCommit !== false },
+    );
+    // For sub-agents, persistence (history/events/state) lives under
+    // sub_agents/<scope>/ instead of the workspace root. Workspace files
+    // (rules/, rule_skills/, workflows/) stay shared.
+    let conversationDir, logDir, statePath;
+    if (this._isSubagent) {
+      // Defense-in-depth: even though agent_tool sanitizes task_id against
+      // VALID_TASK_ID, an attacker reaching engine construction through
+      // another path (e.g. future callers) must not escape the workspace.
+      const scopeRoot = path.resolve(this.workspace.cwd, "sub_agents", subagentScope);
+      const wsRoot = path.resolve(this.workspace.cwd);
+      if (scopeRoot !== wsRoot && !scopeRoot.startsWith(wsRoot + path.sep)) {
+        throw new Error(`sub-agent scope escapes workspace: ${subagentScope}`);
+      }
+      // Also reject the scopeRoot being the workspace root itself, since that
+      // would defeat isolation.
+      if (scopeRoot === wsRoot || scopeRoot === path.resolve(wsRoot, "sub_agents")) {
+        throw new Error(`sub-agent scope must be a unique subfolder, got: ${subagentScope}`);
+      }
+      fs.mkdirSync(scopeRoot, { recursive: true });
+      conversationDir = path.join(scopeRoot, "conversation");
+      logDir = path.join(scopeRoot, "logs");
+      statePath = path.join(scopeRoot, "session-state.json");
+    }
+    const initialPhaseValue = initialPhase || Phase.BOOTSTRAP;
+    this.workspace.setPhase(initialPhaseValue);
+    this.history = new ConversationHistory(this.workspace.cwd, {
+      conversationDir,
+      maxMessageTokens: this.config.maxMessageTokens,
+    });
     this.versionManager = new VersionManager(this.workspace.cwd);
     this.cornerCases = new CornerCaseRegistry(this.workspace.cwd);
     this.confidence = new ConfidenceScorer(this.workspace.cwd, this.cornerCases);
     // Event log (append-only JSONL, source of truth)
-    this.eventLog = new EventLog(this.workspace.cwd);
+    this.eventLog = new EventLog(this.workspace.cwd, { logDir });
     // Context windowing
     this.contextWindow = new ContextWindow({
       contextLimit: config.kcContextLimit || 200000,
-      reserveForResponse: config.kcMaxTokens || 65536,
+      reserveForResponse: config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS,
     });
     // Session state persistence
-    this.sessionState = new SessionState(this.workspace.cwd);
+    this.sessionState = new SessionState(this.workspace.cwd, { statePath });
-    // Task manager (ralph-loop)
-    this.taskManager = new TaskManager(this.workspace.cwd);
+    // Task manager (ralph-loop) — sub-agents don't queue further sub-tasks,
+    // so they don't get a TaskManager.
+    this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
     // Build all tool instances (but register phase-appropriate ones)
     this._buildTools = this._createAllTools();
     this._phaseSummaries = [];
     // Pipeline system (meta-meta skills as code)
-    this.currentPhase = Phase.BOOTSTRAP;
+    this.currentPhase = initialPhaseValue;
     this.pipelines = {
       [Phase.BOOTSTRAP]: new ProjectInitializer(this.workspace),
       [Phase.EXTRACTION]: new RuleExtractionPipeline(this.workspace),
@@ -101,6 +170,17 @@ export class AgentEngine {
     // Register tools for initial phase
     this.toolRegistry = new ToolRegistry();
     this._registerToolsForPhase(this.currentPhase);
+    // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
+    // phase so the first real false→true flip inside onToolResult triggers an
+    // advance — even when the user launches from a pre-populated workspace
+    // whose exit criteria already happen to be met at boot.
+    // resume() re-primes this from the restored pipeline state (see ~L566),
+    // which is the correct behaviour there: resumed sessions that were already
+    // past this phase shouldn't re-fire.
+    this._lastReady = Object.fromEntries(
+      Object.keys(this.pipelines).map((p) => [p, false]),
+    );
   }
   /**
@@ -127,6 +207,14 @@ export class AgentEngine {
       core: [
         new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
         new WorkspaceFileTool(this.workspace, this.versionManager),
+        new CopyToWorkspaceTool(this.workspace, {
+          largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
+        }),
+        new SnapshotTool(this.workspace),
+        new ArchiveFileTool(this.workspace),
+        new ScheduleFetchTool(this.workspace),
+        new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
+        new PhaseAdvanceTool((to, reason, opts) => this._advancePhase(to, reason, opts)),
         new DocumentParseTool(this.workspace, {
           mineruApiUrl: this.config.mineruApiUrl,
           mineruApiKey: this.config.mineruApiKey,
@@ -138,9 +226,14 @@ export class AgentEngine {
         new RuleCatalogTool(this.workspace),
         new EvolutionCycleTool(this.workspace, this.cornerCases),
         new DashboardRenderTool(this.workspace),
-        new AgentTool(this.workspace, (sid) => new AgentEngine({
-          client: this.client, config: this.config, sessionId: sid,
-        })),
+        new AgentTool(
+          this.workspace,
+          ({ sessionId, subagentScope, initialPhase }) => new AgentEngine({
+            client: this.client, config: this.config,
+            sessionId, subagentScope, initialPhase,
+          }),
+          () => this.currentPhase,
+        ),
         new WebSearchTool(this.config.tavilyApiKey),
       ],
       // Distillation+ only (DISTILL mode)
@@ -204,9 +297,11 @@ export class AgentEngine {
       );
     }
-    // Task progress (ralph-loop)
-    const taskContext = this.taskManager.describeForContext();
-    if (taskContext) lines.push("", taskContext);
+    // Task progress (ralph-loop) — skipped for sub-agents (no taskManager)
+    if (this.taskManager) {
+      const taskContext = this.taskManager.describeForContext();
+      if (taskContext) lines.push("", taskContext);
+    }
     return lines.join("\n");
   }
@@ -233,9 +328,126 @@ export class AgentEngine {
     };
   }
+  /**
+   * Run the windowing check immediately after a tool result appends to
+   * history. Called from runTurn() so that a large tool result can't sit in
+   * history past the threshold until the next LLM-loop iteration, where a
+   * stream-abort could then trap the context in a bloated state.
+   *
+   * Safe to call frequently — contextWindow.window() fast-paths when under
+   * the trigger fraction.
+   */
+  _maybeWindowAfterToolResult() {
+    if (!this.contextWindow) return;
+    const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
+    if (windowed.wasWindowed) {
+      this.history.messages = windowed.messages;
+      this.eventLog.append("context_windowed", {
+        removed: windowed.removedCount,
+        trigger: "post_tool_result",
+      });
+    }
+    // Heap-pressure diagnostic. The TUI has its own virtualization + tool-
+    // output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
+    // still see high heap usage, something else is leaking — log it once per
+    // pressure-crossing so operators can investigate without flooding logs.
+    try {
+      const mem = process.memoryUsage();
+      const frac = mem.heapUsed / (mem.heapTotal || 1);
+      if (frac > 0.80 && !this._memPressureLogged) {
+        this._memPressureLogged = true;
+        this.eventLog.append("memory_pressure", {
+          heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
+          heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
+          rssMB: Math.round(mem.rss / 1024 / 1024),
+          historyLength: this.history.messages.length,
+        });
+      } else if (frac < 0.60 && this._memPressureLogged) {
+        this._memPressureLogged = false;  // re-arm for next crossing
+      }
+    } catch { /* process.memoryUsage failures are non-fatal */ }
+  }
+  /**
+   * Pre-flight hard ceiling (Bug 1). After windowing, if the message
+   * array's total token count still exceeds the model's input budget,
+   * drop oldest user-bounded blocks until under budget.
+   *
+   * Drops in BLOCK units — a block is `user(N) + everything until the
+   * next user`. This guarantees the head after a drop is always either a
+   * user message or empty, satisfying Anthropic's "first message must use
+   * the user role" requirement and OpenAI's tool-call adjacency rules.
+   *
+   * Treats the compaction summary pair (user with `[Previous conversation
+   * summary]` or `[Context Summary` marker, followed by assistant ack) as
+   * sticky — it represents prior LLM-summarized work and should outlive
+   * any normal turn.
+   */
+  _enforceTokenBudget(messages) {
+    const limit = this.config.kcContextLimit || 200000;
+    const reserve = this.config.kcMaxTokens || DEFAULT_KC_MAX_TOKENS;
+    const budget = limit - reserve;
+    let totalTokens = estimateMessagesTokens(messages);
+    if (totalTokens <= budget) return messages;
+    // Sticky region: system + (optional summary user + ack assistant)
+    let stickyEnd = messages[0]?.role === "system" ? 1 : 0;
+    const sumMarkers = ["[Previous conversation summary]", "[Context Summary"];
+    const hasSummaryAt = (i) =>
+      messages[i]?.role === "user" &&
+      typeof messages[i].content === "string" &&
+      sumMarkers.some((m) => messages[i].content.startsWith(m));
+    if (hasSummaryAt(stickyEnd)) {
+      stickyEnd++;
+      if (messages[stickyEnd]?.role === "assistant") stickyEnd++;
+    }
+    let droppedCount = 0;
+    let droppedTokens = 0;
+    // Drop user-bounded blocks. A block starts at messages[stickyEnd]
+    // (expected to be a user message in normal flow) and runs up to (not
+    // including) the next user message — or to the end of array.
+    while (totalTokens > budget && messages.length > stickyEnd) {
+      const blockStart = stickyEnd;
+      let blockEnd = blockStart + 1;
+      while (blockEnd < messages.length && messages[blockEnd].role !== "user") blockEnd++;
+      // If this block goes to end-of-array, there's no following user to anchor
+      // the head — dropping it would leave just [system, (summary)?]. Stop and
+      // let the LLM call attempt; the API will surface a clear error if even
+      // sticky alone is over budget.
+      if (blockEnd === messages.length) break;
+      const removed = messages.splice(blockStart, blockEnd - blockStart);
+      droppedCount += removed.length;
+      droppedTokens += removed.reduce((a, m) => a + estimateTokens(JSON.stringify(m)), 0);
+      totalTokens = estimateMessagesTokens(messages);
+    }
+    // Defensive postcondition: head after sticky must be a user message or
+    // the array must end at sticky. Block-drop should make this trivially true,
+    // but if the input was malformed (e.g., already started with a non-user),
+    // clean up here so we never send an Anthropic-invalid sequence.
+    while (messages.length > stickyEnd && messages[stickyEnd].role !== "user") {
+      messages.splice(stickyEnd, 1);
+      droppedCount++;
+    }
+    if (droppedCount > 0) {
+      this.eventLog.append("context_truncated", {
+        droppedCount,
+        droppedTokens,
+        finalTokens: totalTokens,
+        budget,
+      });
+    }
+    return messages;
+  }
   /**
    * Compact conversation history by summarizing older messages via LLM.
-   * Keeps the most recent messages intact.
+   * Keeps the most recent messages intact. (Bug 1: now chunked — never sends
+   * a single oversized prompt to the summarizer LLM.)
    * @param {object} [opts]
    * @param {number} [opts.recentCount=20] - Number of recent messages to keep
    * @returns {Promise<{removedCount: number, retainedCount: number, summaryTokens: number}|null>}
@@ -246,46 +458,20 @@ export class AgentEngine {
     const olderMessages = this.history.messages.slice(0, -recentCount);
     const recentMessages = this.history.messages.slice(-recentCount);
-    let summary;
-    try {
-      const summaryResp = await this.client.chat({
-        model: this.config.kcModel,
-        messages: [
-          {
-            role: "system",
-            content:
-              "You are a conversation summarizer. Produce a concise summary of the following conversation. " +
-              "Focus on: decisions made, files created or modified, current state of work, key findings, " +
-              "unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 2000 tokens.",
-          },
-          {
-            role: "user",
-            content: `Summarize this conversation:\n\n${JSON.stringify(olderMessages)}`,
-          },
-        ],
-        maxTokens: 2048,
-      });
-      summary = summaryResp.choices?.[0]?.message?.content || null;
-    } catch {
-      // LLM summary failed — do mechanical fallback
-      summary = null;
-    }
+    const CHUNK_BUDGET = 30000; // tokens per summarization request
+    const chunks = this._chunkMessages(olderMessages, CHUNK_BUDGET);
-    if (!summary) {
-      // Mechanical fallback: extract tool names and outcomes
-      const lines = ["Previous conversation summary (mechanical):"];
-      for (const msg of olderMessages) {
-        if (msg.role === "user") {
-          lines.push(`- User: ${(msg.content || "").slice(0, 100)}`);
-        } else if (msg.role === "assistant" && msg.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            lines.push(`- Tool call: ${tc.function?.name}`);
-          }
-        }
-      }
-      summary = lines.join("\n");
+    const partials = [];
+    for (let i = 0; i < chunks.length; i++) {
+      const chunk = chunks[i];
+      const partial = await this._summarizeChunk(chunk, i, chunks.length);
+      partials.push(partial);
     }
+    const summary = partials.length === 1
+      ? partials[0]
+      : "## Compacted history (multi-part)\n\n" + partials.map((p, i) => `### Part ${i + 1}\n${p}`).join("\n\n");
     // Replace history
     this.history._messages = [
       { role: "user", content: `[Previous conversation summary]\n${summary}` },
@@ -298,6 +484,7 @@ export class AgentEngine {
     this.eventLog.append("compact", {
       removedCount: olderMessages.length,
       retainedCount: recentMessages.length,
+      chunkCount: chunks.length,
       summary,
     });
@@ -308,6 +495,81 @@ export class AgentEngine {
     };
   }
+  /**
+   * Split a flat message list into chunks where each chunk's serialized JSON
+   * fits within tokenBudget. Chunks are turn-aligned where possible (a single
+   * user→assistant→tool sequence won't be split mid-turn unless that single
+   * turn alone exceeds the budget; in that case it gets its own oversized
+   * chunk and the LLM call may fail → mechanical fallback fires).
+   */
+  _chunkMessages(messages, tokenBudget) {
+    const chunks = [];
+    let current = [];
+    let currentTokens = 0;
+    for (const msg of messages) {
+      const mTokens = estimateTokens(JSON.stringify(msg));
+      if (current.length > 0 && currentTokens + mTokens > tokenBudget) {
+        chunks.push(current);
+        current = [];
+        currentTokens = 0;
+      }
+      current.push(msg);
+      currentTokens += mTokens;
+    }
+    if (current.length > 0) chunks.push(current);
+    return chunks;
+  }
+  /**
+   * Summarize one chunk via the conductor LLM. On failure (incl. context-length
+   * errors that the chunked split should usually prevent), fall back to a
+   * mechanical summary so we always produce *something*.
+   */
+  async _summarizeChunk(chunk, idx, total) {
+    const partLabel = total > 1 ? ` (part ${idx + 1}/${total})` : "";
+    try {
+      const resp = await this.client.chat({
+        model: this.config.kcModel,
+        messages: [
+          {
+            role: "system",
+            content:
+              "You are a conversation summarizer. Produce a concise summary of the following conversation excerpt. " +
+              "Focus on: decisions made, files created or modified, current state of work, key findings, " +
+              "unresolved questions. Be specific about file paths, rule IDs, and results. Keep under 1500 tokens.",
+          },
+          {
+            role: "user",
+            content: `Summarize this conversation excerpt${partLabel}:\n\n${JSON.stringify(chunk)}`,
+          },
+        ],
+        maxTokens: 1800,
+      });
+      const text = resp.choices?.[0]?.message?.content;
+      if (text) return text;
+    } catch {
+      // fall through to mechanical
+    }
+    return this._mechanicalSummary(chunk, partLabel);
+  }
+  _mechanicalSummary(chunk, partLabel) {
+    const lines = [`Mechanical summary${partLabel}:`];
+    for (const msg of chunk) {
+      if (msg.role === "user" && typeof msg.content === "string") {
+        lines.push(`- User: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
+      } else if (msg.role === "assistant") {
+        if (typeof msg.content === "string" && msg.content) {
+          lines.push(`- Assistant: ${msg.content.slice(0, 120).replace(/\s+/g, " ")}`);
+        }
+        for (const tc of msg.tool_calls || []) {
+          lines.push(`- Tool call: ${tc.function?.name || "?"}`);
+        }
+      }
+    }
+    return lines.join("\n");
+  }
   /**
    * Restore an engine from a persisted session.
    * @param {object} opts
@@ -325,6 +587,7 @@ export class AgentEngine {
       engine.currentPhase = data.currentPhase || Phase.BOOTSTRAP;
       engine._phaseSummaries = data.phaseSummaries || [];
       engine._registerToolsForPhase(engine.currentPhase);
+      engine.workspace.setPhase(engine.currentPhase);
       // Restore project directory from saved state
       if (data.projectDir) {
@@ -342,6 +605,17 @@ export class AgentEngine {
         }
       }
+      // Re-prime _lastReady AFTER importState so it reflects the restored
+      // pipeline milestones, not the empty defaults from constructor.
+      // (Bug 5 fix — without this, resume reignites auto-advance.)
+      for (const phase of Object.keys(engine.pipelines)) {
+        try {
+          engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
+        } catch {
+          engine._lastReady[phase] = false;
+        }
+      }
       engine.eventLog.append("session_resume", {
         resumedPhase: engine.currentPhase,
         resumedFromSeq: data.lastEventSeq,
@@ -358,6 +632,56 @@ export class AgentEngine {
     this.sessionState.save(this);
   }
+  /**
+   * Rename the workspace folder and cascade the new path to every persistence
+   * subsystem that captured `workspace.cwd` at construction time (Bug 3).
+   * Without this cascade, subsystems keep writing to the OLD path even
+   * though the directory has moved on disk — the user sees the renamed dir
+   * "die" while the old dir keeps growing.
+   *
+   * Also regenerates Block 9 cron wrapper scripts which bake in absolute
+   * paths to the workspace. Returns information for the TUI to surface
+   * (incl. whether the user needs to re-install crontab lines).
+   *
+   * @param {string} newName
+   * @returns {{ sessionId: string, oldCwd: string, newCwd: string,
+   *             scheduleWrappersRegenerated: string[],
+   *             scheduleWrappersSkipped: string[] }}
+   */
+  renameSession(newName) {
+    const r = this.workspace.rename(newName);
+    if (r.changed) {
+      // Cascade to every subsystem that captured workspace.cwd
+      this.history._setWorkspacePath?.(r.newCwd);
+      this.eventLog._setWorkspacePath?.(r.newCwd);
+      this.sessionState._setWorkspacePath?.(r.newCwd);
+      this.taskManager?._setWorkspacePath?.(r.newCwd);
+      this.confidence._setWorkspacePath?.(r.newCwd);
+      this.cornerCases._setWorkspacePath?.(r.newCwd);
+    }
+    // Regenerate cron wrapper scripts — they bake absolute paths to WORKSPACE,
+    // INPUT_DIR, LOG_FILE, so rename invalidates them. The Scheduler is
+    // workspace-bound (created on demand inside the schedule_fetch tool), so
+    // construct a fresh one against the renamed workspace.
+    let scheduleResult = { regenerated: [], disabled: [], failed: [] };
+    try {
+      const sched = new Scheduler(this.workspace);
+      scheduleResult = sched.regenerateAllWrappers();
+    } catch {
+      // Best effort — never let scheduler issues block the rename
+    }
+    return {
+      sessionId: r.sessionId,
+      oldCwd: r.oldCwd,
+      newCwd: r.newCwd,
+      scheduleWrappersRegenerated: scheduleResult.regenerated,
+      scheduleWrappersDisabled: scheduleResult.disabled,
+      scheduleWrappersFailed: scheduleResult.failed,
+    };
+  }
   /**
    * Run one conversation turn. Yields AgentEvent objects.
    * Loops: LLM call -> tool execution -> LLM call ... until no tool calls.
@@ -383,7 +707,7 @@ export class AgentEngine {
     while (true) {
       // Apply context windowing before sending to LLM
       const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
-      const messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
+      let messages = [{ role: "system", content: systemPrompt }, ...windowed.messages];
       if (windowed.wasWindowed) {
         this.eventLog.append("context_windowed", {
@@ -392,6 +716,12 @@ export class AgentEngine {
         });
       }
+      // Pre-flight hard ceiling (Bug 1 P0). Even after windowing, if the
+      // request still exceeds the model's input budget (e.g., recent messages
+      // alone are too big), drop the oldest non-system messages until under
+      // budget. Better to lose some history than crash with HTTP 400.
+      messages = this._enforceTokenBudget(messages);
       this.eventLog.append("llm_start", {
         model: this.config.kcModel,
         messageCount: messages.length,
@@ -448,6 +778,12 @@ export class AgentEngine {
         });
         if (toolCallsAcc.size === 0) {
+          // Bug 4 trigger (1): re-check phase criteria at end of every turn —
+          // KC may have advanced state via conversation alone, without any
+          // tool that the pipeline narrowly watches.
+          const advancedEv = this._maybeAutoAdvance();
+          if (advancedEv) yield advancedEv;
           this.eventLog.append("turn_complete", {});
           this.saveState();
           yield new AgentEvent({ type: "turn_complete" });
@@ -466,52 +802,57 @@ export class AgentEngine {
           const result = await this.toolRegistry.execute(tc.name, inputData);
+          // Tool-call offloading: large outputs go to logs/tool_results/<traceId>.txt;
+          // history holds head + tail with a pointer. Event log keeps the full output
+          // (it's append-only and the source of truth).
+          const offload = this._maybeOffload(tc.name, result);
+          const historyContent = offload ? offload.digest : (result.content || "");
           this.eventLog.append("tool_result", {
             name: tc.name,
-            output: result.content?.slice(0, 5000) || "",
+            output: result.content || "",
             isError: result.isError,
+            traceId: offload?.traceId || null,
           });
           yield new AgentEvent({
             type: "tool_result",
             name: tc.name,
-            output: result.content,
+            output: historyContent,
             isError: result.isError,
           });
           this.history.addRaw({
             role: "tool",
             tool_call_id: tc.id,
-            content: result.content,
+            content: historyContent,
           });
+          // Post-tool-result safety net: check for context pressure RIGHT NOW
+          // rather than waiting for the next LLM-loop iteration. A large tool
+          // result that tips history over the threshold used to sit there
+          // until the next turn, and if the stream aborted in between the
+          // user saw "CTX: 210% / stream terminated" with no recovery.
+          this._maybeWindowAfterToolResult();
           // Pipeline controller: update state and re-register tools on phase change
           if (pipeline?.onToolResult) {
             const pEvent = pipeline.onToolResult(tc.name, inputData, result);
             if (pEvent) {
               if (pEvent.type === "phase_ready" && pEvent.nextPhase) {
-                const phaseSummary = `[${this.currentPhase.toUpperCase()} completed]: ${pEvent.message || ""}`;
-                this._phaseSummaries.push(phaseSummary);
-                this.eventLog.append("phase_transition", {
-                  from: this.currentPhase,
-                  to: pEvent.nextPhase,
-                  summary: phaseSummary,
-                });
-                this.currentPhase = pEvent.nextPhase;
-                this._registerToolsForPhase(this.currentPhase);
-                // Ralph-loop: create per-rule tasks for the new phase
-                this._createTasksForPhase(this.currentPhase);
-                this.saveState();
+                this._advancePhase(pEvent.nextPhase, pEvent.message || "exit criteria met");
               }
-              yield new AgentEvent({
-                type: "pipeline_event",
-                data: pEvent,
-              });
+              yield new AgentEvent({ type: "pipeline_event", data: pEvent });
             }
           }
         }
+        // Bug 4 fix: re-check exit criteria after every tool-result loop, not
+        // just from pipeline.onToolResult. The pipeline's describeState() (called
+        // on every turn) already re-scans, so exitCriteriaMet() is accurate; we
+        // just need to act on it eagerly.
+        const ev = this._maybeAutoAdvance();
+        if (ev) yield ev;
       } catch (err) {
         this.eventLog.append("error", { message: err.message });
         yield new AgentEvent({ type: "error", message: err.message });
@@ -520,17 +861,123 @@ export class AgentEngine {
     }
   }
+  /**
+   * Centralized phase transition (Bug 4). All three triggers route through here:
+   * (1) pipeline.onToolResult returning phase_ready
+   * (2) post-turn auto-check via _maybeAutoAdvance
+   * (3) explicit user request via the phase_advance tool
+   *
+   * Reachability: by default only forward-by-one transitions per NEXT_PHASE.
+   * Set `force: true` to allow non-adjacent or backward transitions (e.g. user
+   * explicitly requests a regression for testing). The refusal is logged.
+   *
+   * Idempotent — calling with the current phase is a no-op.
+   */
+  _advancePhase(nextPhase, reason = "", { force = false } = {}) {
+    if (!nextPhase || nextPhase === this.currentPhase) return false;
+    const expected = NEXT_PHASE[this.currentPhase];
+    if (!force && nextPhase !== expected) {
+      this.eventLog.append("phase_advance_refused", {
+        from: this.currentPhase, to: nextPhase, reason,
+        hint: expected ? `expected next phase is '${expected}' — pass force:true to override`
+                       : `${this.currentPhase} is the terminal phase`,
+      });
+      return false;
+    }
+    const phaseSummary = `[${this.currentPhase.toUpperCase()} → ${nextPhase.toUpperCase()}]: ${reason}${force && nextPhase !== expected ? " (forced)" : ""}`;
+    this._phaseSummaries.push(phaseSummary);
+    this.eventLog.append("phase_transition", {
+      from: this.currentPhase,
+      to: nextPhase,
+      reason,
+      forced: force && nextPhase !== expected,
+    });
+    this.currentPhase = nextPhase;
+    this._registerToolsForPhase(this.currentPhase);
+    this.workspace.setPhase(this.currentPhase);
+    this._createTasksForPhase(this.currentPhase);
+    this.saveState();
+    return true;
+  }
+  /**
+   * Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
+   * fresh false → true flip in `exitCriteriaMet()`. Sessions resumed in an
+   * already-met state do nothing; users iterating in a phase whose criteria
+   * have been met for a while do nothing. Real new evidence is required.
+   */
+  _maybeAutoAdvance() {
+    const phase = this.currentPhase;
+    const pipeline = this.pipelines[phase];
+    let nowReady = false;
+    try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
+    if (!nowReady) {
+      this._lastReady[phase] = false;
+      return null;
+    }
+    // Edge-trigger: nowReady && !wasReady
+    if (this._lastReady[phase]) return null;
+    this._lastReady[phase] = true;
+    const next = NEXT_PHASE[phase];
+    if (!next) return null;
+    const advanced = this._advancePhase(next, "exit criteria flipped to met");
+    if (!advanced) return null;
+    return new AgentEvent({
+      type: "pipeline_event",
+      data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
+    });
+  }
+  /**
+   * Tool-call offloading. If the tool's content exceeds the threshold,
+   * write the full content to logs/tool_results/<traceId>.txt and return a
+   * digest (head + tail) with a pointer. Otherwise return null (caller uses
+   * full content).
+   */
+  _maybeOffload(toolName, result) {
+    const content = result.content || "";
+    if (!content) return null;
+    const threshold = result.isError
+      ? (this.config.toolOutputOffloadErrorTokens ?? 500)
+      : (this.config.toolOutputOffloadTokens ?? 2000);
+    const tokens = estimateTokens(content);
+    if (tokens <= threshold) return null;
+    const safeToolName = String(toolName || "tool").replace(/[^A-Za-z0-9_-]/g, "_");
+    const traceId = this.versionManager.generateTraceId(safeToolName, "result");
+    const offloadDir = path.join(this.workspace.cwd, "logs", "tool_results");
+    try {
+      fs.mkdirSync(offloadDir, { recursive: true });
+      fs.writeFileSync(path.join(offloadDir, `${traceId}.txt`), content, "utf-8");
+    } catch {
+      // If we can't write the offload file, fall back to keeping full content in context.
+      return null;
+    }
+    const HEAD = 800, TAIL = 800;
+    const truncatedNote = `\n\n[…truncated, ${tokens} tokens; full at logs/tool_results/${traceId}.txt — read with workspace_file if needed…]\n\n`;
+    const digest = content.length > HEAD + TAIL
+      ? content.slice(0, HEAD) + truncatedNote + content.slice(-TAIL)
+      : content + truncatedNote;
+    return { traceId, digest };
+  }
   /**
    * Create per-rule tasks when entering a new phase.
    * Reads the rule catalog and creates one task per rule for the given phase.
    */
   _createTasksForPhase(phase) {
+    if (!this.taskManager) return; // Sub-agents don't manage tasks
     const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
     if (!fs.existsSync(catalogPath)) return;
     try {
       const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
-      const rules = Array.isArray(catalog) ? catalog : [];
+      const rules = normalizeRuleCatalog(catalog);
       if (rules.length > 0) {
         this.taskManager.createRuleTasks(rules, phase);
       }
@@ -546,6 +993,12 @@ export class AgentEngine {
    * @yields {AgentEvent}
    */
   async *runTaskLoop(userMessage) {
+    // Sub-agents don't run task loops — they execute one task and exit
+    if (!this.taskManager) {
+      yield* this.runTurn(userMessage);
+      return;
+    }
     // Run the initial turn (user's request)
     yield* this.runTurn(userMessage);
@@ -593,6 +1046,41 @@ export class AgentEngine {
           progress: this.taskManager.progress,
         },
       });
+      // Bug 4 trigger (2): auto-advance when all phase tasks are done AND
+      // the pipeline's exit criteria are also met (Bug 5 fix — task state
+      // alone is a ralph-loop convenience, not authoritative phase signal;
+      // tasks could be marked skipped manually or by an editor).
+      if (this._allCurrentPhaseTasksComplete()) {
+        const pipeline = this.pipelines[this.currentPhase];
+        let exitMet = false;
+        try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
+        if (exitMet) {
+          const next = NEXT_PHASE[this.currentPhase];
+          if (next) {
+            const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
+            if (advanced) {
+              yield new AgentEvent({
+                type: "pipeline_event",
+                data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
+              });
+            }
+          }
+        }
+      }
     }
   }
+  /**
+   * True when every task tagged with the current phase is in a terminal state
+   * (completed | failed | skipped) and at least one such task exists. Used by
+   * runTaskLoop's auto-advance trigger.
+   */
+  _allCurrentPhaseTasksComplete() {
+    if (!this.taskManager) return false;
+    const phase = this.currentPhase;
+    const phaseTasks = this.taskManager.getAllTasks().filter((t) => t.phase === phase);
+    if (phaseTasks.length === 0) return false;
+    return phaseTasks.every((t) => t.status === "completed" || t.status === "failed" || t.status === "skipped");
+  }
 }