npm - kc-beta - Versions diffs - 0.5.5 → 0.6.0 - Mend

kc-beta 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/QUICKSTART.md +17 -4
package/README.md +58 -11
package/bin/kc-beta.js +35 -1
package/package.json +1 -1
package/src/agent/bundle-tree.js +553 -0
package/src/agent/context.js +40 -1
package/src/agent/engine.js +644 -28
package/src/agent/llm-client.js +67 -18
package/src/agent/pipelines/finalization.js +186 -0
package/src/agent/pipelines/index.js +8 -0
package/src/agent/pipelines/initializer.js +40 -0
package/src/agent/pipelines/skill-authoring.js +100 -6
package/src/agent/skill-loader.js +54 -4
package/src/agent/task-manager.js +66 -3
package/src/agent/tools/agent-tool.js +283 -35
package/src/agent/tools/bundle-search.js +146 -0
package/src/agent/tools/document-chunk.js +246 -0
package/src/agent/tools/document-classify.js +311 -0
package/src/agent/tools/document-parse.js +8 -1
package/src/agent/tools/phase-advance.js +30 -7
package/src/agent/tools/registry.js +10 -0
package/src/agent/tools/rule-catalog.js +17 -3
package/src/agent/tools/sandbox-exec.js +30 -0
package/src/agent/workspace.js +168 -14
package/src/cli/components.js +165 -17
package/src/cli/index.js +166 -19
package/src/cli/meme.js +58 -0
package/src/config.js +39 -2
package/src/model-tiers.json +3 -2
package/src/providers.js +34 -1
package/template/skills/en/meta-meta/evolution-loop/SKILL.md +13 -1
package/template/skills/en/meta-meta/rule-extraction/SKILL.md +74 -0
package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +7 -1
package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +73 -0

package/src/agent/engine.js CHANGED Viewed

@@ -19,6 +19,9 @@ import { ReleaseTool } from "./tools/release.js";
 import { PhaseAdvanceTool } from "./tools/phase-advance.js";
 import { DocumentParseTool } from "./tools/document-parse.js";
 import { DocumentSearchTool } from "./tools/document-search.js";
+import { DocumentChunkTool } from "./tools/document-chunk.js";
+import { BundleSearchTool } from "./tools/bundle-search.js";
+import { DocumentClassifyTool } from "./tools/document-classify.js";
 import { WorkerLLMCallTool } from "./tools/worker-llm-call.js";
 import { WorkflowRunTool } from "./tools/workflow-run.js";
 import { RuleCatalogTool } from "./tools/rule-catalog.js";
@@ -38,6 +41,7 @@ import { SkillAuthoringPipeline } from "./pipelines/skill-authoring.js";
 import { SkillTestingPipeline } from "./pipelines/skill-testing.js";
 import { DistillationEngine as DistillationPipeline } from "./pipelines/distillation.js";
 import { ProductionQCPipeline } from "./pipelines/production-qc.js";
+import { FinalizationPipeline } from "./pipelines/finalization.js";
 import { EventLog } from "./event-log.js";
 import { ContextWindow } from "./context-window.js";
 import { SessionState } from "./session-state.js";
@@ -48,8 +52,10 @@ import { estimateTokens, estimateMessagesTokens } from "./token-counter.js";
 // or kc_max_tokens in the global config.
 const DEFAULT_KC_MAX_TOKENS = 65536;
-// Phases where worker LLM tools are available (DISTILL mode)
-const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
+// Phases where worker LLM tools are available (DISTILL mode).
+// E1: FINALIZATION inherits worker-LLM access so one-last-pass validation
+// runs + dashboard_render + workflow_run stay usable during packaging.
+const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC, Phase.FINALIZATION]);
 // Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
 // Exported so the TUI's /phase slash command (src/cli/index.js) can call
@@ -60,6 +66,7 @@ export const NEXT_PHASE = {
   [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
   [Phase.SKILL_TESTING]: Phase.DISTILLATION,
   [Phase.DISTILLATION]: Phase.PRODUCTION_QC,
+  [Phase.PRODUCTION_QC]: Phase.FINALIZATION, // E1: new 7th phase
 };
 /**
@@ -162,6 +169,7 @@ export class AgentEngine {
       [Phase.SKILL_TESTING]: new SkillTestingPipeline(this.workspace),
       [Phase.DISTILLATION]: new DistillationPipeline(this.workspace),
       [Phase.PRODUCTION_QC]: new ProductionQCPipeline(this.workspace),
+      [Phase.FINALIZATION]: new FinalizationPipeline(this.workspace), // E1
     };
     // Skill discovery (Claude Code pattern: index in context, full content on demand)
@@ -181,6 +189,61 @@ export class AgentEngine {
     this._lastReady = Object.fromEntries(
       Object.keys(this.pipelines).map((p) => [p, false]),
     );
+    // B0.1: Heap sampler. Parent engines only — sub-agents share a process
+    // with the parent and would double-log. Writes a single JSONL line
+    // per minute to <workspace>/logs/heap.jsonl with the numbers needed
+    // to diagnose RSS creep (heapUsed/heapTotal/external/rss/arrayBuffers,
+    // plus active task count and history length). Always on, ~60 bytes
+    // per minute to disk.
+    this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
+  }
+  /**
+   * Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
+   * Returns a stop fn. Timer is .unref()'d so it never keeps the process
+   * alive by itself. Failures are silently suppressed — this is a
+   * diagnostic, not a correctness feature.
+   */
+  _startHeapSampler() {
+    const logDir = path.join(this.workspace.cwd, "logs");
+    const logPath = path.join(logDir, "heap.jsonl");
+    const sample = () => {
+      try {
+        const mem = process.memoryUsage();
+        const row = {
+          t: new Date().toISOString(),
+          seq: this.eventLog?.currentSeq ?? 0,
+          phase: this.currentPhase,
+          rssMB: Math.round(mem.rss / 1024 / 1024),
+          heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
+          heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
+          externalMB: Math.round((mem.external || 0) / 1024 / 1024),
+          arrayBuffersMB: Math.round((mem.arrayBuffers || 0) / 1024 / 1024),
+          historyLen: this.history?.messages?.length ?? 0,
+          tasksPending: this.taskManager?.progress?.pending ?? 0,
+          tasksInProgress: this.taskManager?.progress?.inProgress ?? 0,
+        };
+        fs.mkdirSync(logDir, { recursive: true });
+        fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
+      } catch { /* never fatal */ }
+    };
+    // Record one sample at startup so we have a baseline even on short runs.
+    sample();
+    const timer = setInterval(sample, 60_000);
+    timer.unref?.();
+    return () => {
+      try {
+        clearInterval(timer);
+        sample(); // one final sample on shutdown
+      } catch { /* ignore */ }
+    };
+  }
+  /** Stop background diagnostics. Call on graceful shutdown. */
+  stop() {
+    try { this._heapSamplerStop?.(); } catch { /* ignore */ }
+    this._heapSamplerStop = null;
   }
   /**
@@ -214,7 +277,10 @@ export class AgentEngine {
         new ArchiveFileTool(this.workspace),
         new ScheduleFetchTool(this.workspace),
         new ReleaseTool(this.workspace, { kcVersion: "0.5.2" }),
-        new PhaseAdvanceTool((to, reason, opts) => this._advancePhase(to, reason, opts)),
+        new PhaseAdvanceTool(
+          (to, reason, opts) => this._advancePhase(to, reason, opts),
+          () => this.currentPhase, // H1: tool reads phase BEFORE its own call
+        ),
         new DocumentParseTool(this.workspace, {
           mineruApiUrl: this.config.mineruApiUrl,
           mineruApiKey: this.config.mineruApiKey,
@@ -223,6 +289,12 @@ export class AgentEngine {
           ocrModel: vlmModel,
         }),
         new DocumentSearchTool(this.workspace),
+        // Group C — chunker/RAG infrastructure ported from AMC app. Core
+        // tools (not phase-gated): useful from BOOTSTRAP through FINALIZATION
+        // for any doc-heavy project, not just rule extraction.
+        new DocumentChunkTool(this.workspace),
+        new BundleSearchTool(this.workspace),
+        new DocumentClassifyTool(this.workspace, this.config),
         new RuleCatalogTool(this.workspace),
         new EvolutionCycleTool(this.workspace, this.cornerCases),
         new DashboardRenderTool(this.workspace),
@@ -313,7 +385,7 @@ export class AgentEngine {
   getContextStats() {
     const systemPrompt = this.context.build({
       agentMd: this._readAgentMd(),
-      skillIndex: this._skillLoader.formatForContext(),
+      skillIndex: this._skillLoader.formatForContext(this.currentPhase),
       pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
       workspaceState: this._buildWorkspaceState(),
     });
@@ -353,21 +425,37 @@ export class AgentEngine {
     // Heap-pressure diagnostic. The TUI has its own virtualization + tool-
     // output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
-    // still see high heap usage, something else is leaking — log it once per
-    // pressure-crossing so operators can investigate without flooding logs.
+    // still see high heap usage, something else is leaking.
+    //
+    // A9: Original design logged once per pressure-crossing (edge-triggered),
+    // which went silent for 17h during E2E #3 while RSS climbed to 3.8GB.
+    // Now: still edge-trigger on entry (noisy otherwise), but ALSO re-emit
+    // every 15min while we're still above the threshold, so an operator
+    // watching logs after hour 4 still sees the signal. Drops to silent on
+    // recovery below 0.60.
     try {
       const mem = process.memoryUsage();
       const frac = mem.heapUsed / (mem.heapTotal || 1);
-      if (frac > 0.80 && !this._memPressureLogged) {
-        this._memPressureLogged = true;
-        this.eventLog.append("memory_pressure", {
-          heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
-          heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
-          rssMB: Math.round(mem.rss / 1024 / 1024),
-          historyLength: this.history.messages.length,
-        });
+      const now = Date.now();
+      const REPRESS_INTERVAL_MS = 15 * 60 * 1000;
+      if (frac > 0.80) {
+        const firstCrossing = !this._memPressureLogged;
+        const dueForRepress = this._memPressureLastEmittedAt &&
+          (now - this._memPressureLastEmittedAt) >= REPRESS_INTERVAL_MS;
+        if (firstCrossing || dueForRepress) {
+          this._memPressureLogged = true;
+          this._memPressureLastEmittedAt = now;
+          this.eventLog.append("memory_pressure", {
+            heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
+            heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
+            rssMB: Math.round(mem.rss / 1024 / 1024),
+            historyLength: this.history.messages.length,
+            kind: firstCrossing ? "crossing" : "sustained",
+          });
+        }
       } else if (frac < 0.60 && this._memPressureLogged) {
         this._memPressureLogged = false;  // re-arm for next crossing
+        this._memPressureLastEmittedAt = null;
       }
     } catch { /* process.memoryUsage failures are non-fatal */ }
   }
@@ -701,7 +789,7 @@ export class AgentEngine {
     const systemPrompt = this.context.build({
       agentMd: this._readAgentMd(),
-      skillIndex: this._skillLoader.formatForContext(),
+      skillIndex: this._skillLoader.formatForContext(this.currentPhase),
       pipelineState,
       workspaceState: this._buildWorkspaceState(),
     });
@@ -781,6 +869,30 @@ export class AgentEngine {
         });
         if (toolCallsAcc.size === 0) {
+          // A3: Empty-response guard. If the LLM returned no content AND no
+          // tool calls, count it. Two in a row almost always means the
+          // provider is silently failing (context exceeded, rate-limit
+          // corruption, auth expired) and continuing wastes tokens + time.
+          // Reset on any non-empty turn. Reason-tagged so /status can
+          // surface the running rate.
+          if (!collectedText || !collectedText.trim()) {
+            this._consecutiveEmptyResponses = (this._consecutiveEmptyResponses || 0) + 1;
+            this._totalEmptyResponses = (this._totalEmptyResponses || 0) + 1;
+            if (this._consecutiveEmptyResponses >= 2) {
+              const message =
+                `LLM returned empty response ${this._consecutiveEmptyResponses}× in a row — ` +
+                `likely context-length exceeded or provider-side silent failure. ` +
+                `Stopping this turn to prevent runaway API spend.`;
+              this.eventLog.append("error", { message, kind: "empty_response_streak" });
+              yield new AgentEvent({ type: "error", message });
+              this._consecutiveEmptyResponses = 0; // reset so next /run isn't blocked
+              return;
+            }
+          } else {
+            this._consecutiveEmptyResponses = 0;
+          }
+          this._totalTurns = (this._totalTurns || 0) + 1;
           // Bug 4 trigger (1): re-check phase criteria at end of every turn —
           // KC may have advanced state via conversation alone, without any
           // tool that the pipeline narrowly watches.
@@ -793,6 +905,10 @@ export class AgentEngine {
           return;
         }
+        // A3: A turn with tool_calls or content is not empty — reset streak.
+        this._consecutiveEmptyResponses = 0;
+        this._totalTurns = (this._totalTurns || 0) + 1;
         // Tool execution loop
         for (const tc of toolCallsAcc.values()) {
           let inputData = {};
@@ -803,6 +919,12 @@ export class AgentEngine {
           this.eventLog.append("tool_start", { name: tc.name, input: inputData });
           yield new AgentEvent({ type: "tool_start", name: tc.name, input: inputData });
+          // A1: Capture phase BEFORE tool execution. Some tools — notably
+          // phase_advance — mutate this.currentPhase via a callback without
+          // yielding any AgentEvent, so the TUI's status bar never gets the
+          // signal. We diff after execute() and emit a synthetic
+          // pipeline_event so subscribers can sync.
+          const beforePhase = this.currentPhase;
           const result = await this.toolRegistry.execute(tc.name, inputData);
           // Tool-call offloading: large outputs go to logs/tool_results/<traceId>.txt;
@@ -817,6 +939,29 @@ export class AgentEngine {
             isError: result.isError,
             traceId: offload?.traceId || null,
           });
+          // D3a: trace skill invocations. When the agent reads a SKILL.md via
+          // workspace_file (the canonical way KC "uses" a skill, since skills
+          // are progressively-disclosed markdown), emit a skill_invoked event.
+          // Makes "which skills did KC actually consult?" answerable in post-run
+          // analysis — before this, skills were opaque to the event log.
+          try {
+            if (
+              !result.isError &&
+              (tc.name === "workspace_file" || tc.name === "sandbox_exec")
+            ) {
+              const p = String(inputData?.path || inputData?.command || "");
+              const skillMatch = p.match(/(?:template\/)?skills\/[a-z-]+\/(?:meta-meta|meta|skill-creator)\/([a-zA-Z0-9_-]+)(?:\/SKILL\.md|\/)?|\bSKILL\.md\b/);
+              if (skillMatch) {
+                const skillName = skillMatch[1] || "(unknown)";
+                this.eventLog.append("skill_invoked", {
+                  skill: skillName,
+                  via_tool: tc.name,
+                  phase: this.currentPhase,
+                });
+              }
+            }
+          } catch { /* never let tracing break a tool call */ }
           yield new AgentEvent({
             type: "tool_result",
             name: tc.name,
@@ -837,6 +982,22 @@ export class AgentEngine {
           // user saw "CTX: 210% / stream terminated" with no recovery.
           this._maybeWindowAfterToolResult();
+          // A1: If the tool mutated the phase (e.g. phase_advance), emit the
+          // signal the TUI and pipelines need to re-sync state. Runs BEFORE
+          // pipeline.onToolResult so the fresh phase is active if the pipeline
+          // itself wants to react to the transition.
+          if (this.currentPhase !== beforePhase) {
+            yield new AgentEvent({
+              type: "pipeline_event",
+              data: {
+                type: "phase_changed",
+                from: beforePhase,
+                nextPhase: this.currentPhase,
+                reason: `via ${tc.name}`,
+              },
+            });
+          }
           // Pipeline controller: update state and re-register tools on phase change
           if (pipeline?.onToolResult) {
             const pEvent = pipeline.onToolResult(tc.name, inputData, result);
@@ -857,8 +1018,15 @@ export class AgentEngine {
         if (ev) yield ev;
       } catch (err) {
-        this.eventLog.append("error", { message: err.message });
-        yield new AgentEvent({ type: "error", message: err.message });
+        // A8: If the LLM client tagged the stream termination reason, pass
+        // it through. Upstream log consumers + the TUI can then distinguish
+        // "provider returned 429" from "socket died mid-token" from "SSE
+        // buffer exploded" — today they're all just "Error: ...".
+        const payload = { message: err.message };
+        if (err.streamTermination) payload.kind = err.streamTermination;
+        if (err.status) payload.status = err.status;
+        this.eventLog.append("error", payload);
+        yield new AgentEvent({ type: "error", message: err.message, ...payload });
         return;
       }
     }
@@ -897,11 +1065,31 @@ export class AgentEngine {
       reason,
       forced: force && nextPhase !== expected,
     });
+    const fromPhase = this.currentPhase;
     this.currentPhase = nextPhase;
     this._registerToolsForPhase(this.currentPhase);
     this.workspace.setPhase(this.currentPhase);
     this._createTasksForPhase(this.currentPhase);
     this.saveState();
+    // B8: Soft signal — surface any sub-agents left running from the prior
+    // phase so the main agent's next turn can decide whether to kill them.
+    // NOT automated: phase_advance can fire from _maybeAutoAdvance on a
+    // criteria-flip, and auto-killing would couple lifecycle with blast
+    // radius. This just informs.
+    try {
+      const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
+      const runningIds = agentTool?.getRunningTaskIds?.() || [];
+      if (runningIds.length > 0) {
+        this.eventLog.append("stale_subagents", {
+          from_phase: fromPhase,
+          to_phase: nextPhase,
+          running_task_ids: runningIds,
+          hint: "These sub-agents were dispatched during the prior phase. Consider operation=poll to check status, or operation=kill to abort if stale.",
+        });
+      }
+    } catch { /* never let signal emission break phase advance */ }
     return true;
   }
@@ -972,6 +1160,16 @@ export class AgentEngine {
   /**
    * Create per-rule tasks when entering a new phase.
    * Reads the rule catalog and creates one task per rule for the given phase.
+   *
+   * D6: For skill_authoring / skill_testing, filter rules via the bundle
+   * classification cache (`cache/bundles/<hash>.classification.json`,
+   * written by document_classify). Rules whose `applicable_product_types`
+   * or `report_types` don't overlap with the bundle's classification get
+   * SKIPPED at task-creation time — we don't mutate catalog.json to mark
+   * them not_applicable, we just keep them out of the task queue. The
+   * finalization phase (Group E) will report them in the coverage
+   * artifact as "not applicable to this bundle." Conservative default:
+   * if no classification exists, include all rules (pre-B9 behavior).
    */
   _createTasksForPhase(phase) {
     if (!this.taskManager) return; // Sub-agents don't manage tasks
@@ -980,28 +1178,258 @@ export class AgentEngine {
     try {
       const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
-      const rules = normalizeRuleCatalog(catalog);
-      if (rules.length > 0) {
-        this.taskManager.createRuleTasks(rules, phase);
+      let rules = normalizeRuleCatalog(catalog);
+      if (rules.length === 0) return;
+      // D6: applicability pre-filter (skill phases only — bootstrap/extraction
+      // have no task creation here per A6).
+      if (phase === "skill_authoring" || phase === "skill_testing") {
+        const classification = this._loadBundleClassification();
+        if (classification) {
+          const before = rules.length;
+          rules = rules.filter((r) => this._ruleAppliesToBundle(r, classification));
+          if (rules.length < before) {
+            this.eventLog.append("applicability_prefilter", {
+              phase,
+              classification: {
+                product_type: classification.product_type,
+                report_type: classification.report_type,
+                source: classification.source,
+              },
+              rules_before: before,
+              rules_after: rules.length,
+              skipped: before - rules.length,
+            });
+          }
+        }
       }
+      this.taskManager.createRuleTasks(rules, phase);
     } catch { /* skip if catalog can't be read */ }
   }
+  /**
+   * D6: Load the most recent bundle classification cache, if one exists.
+   * Written by the `document_classify` tool. Returns null if no cache or
+   * unreadable — callers must treat null as "all rules apply."
+   */
+  _loadBundleClassification() {
+    const cacheDir = path.join(this.workspace.cwd, "cache", "bundles");
+    if (!fs.existsSync(cacheDir)) return null;
+    let entries;
+    try { entries = fs.readdirSync(cacheDir); }
+    catch { return null; }
+    const files = entries
+      .filter((n) => n.endsWith(".classification.json"))
+      .map((n) => {
+        const p = path.join(cacheDir, n);
+        try { return { path: p, mtime: fs.statSync(p).mtimeMs }; }
+        catch { return null; }
+      })
+      .filter(Boolean)
+      .sort((a, b) => b.mtime - a.mtime);
+    if (files.length === 0) return null;
+    try { return JSON.parse(fs.readFileSync(files[0].path, "utf-8")); }
+    catch { return null; }
+  }
+  /**
+   * D6: Rule-applicability check mirroring the AMC app's `applies_to`.
+   * Conservative: returns true when we don't have enough info to
+   * confidently skip (missing fields on rule, or classification with
+   * empty product/report).
+   */
+  _ruleAppliesToBundle(rule, classification) {
+    const docProduct = classification?.product_type || "";
+    const docReport = classification?.report_type || "";
+    const ruleProducts = rule.applicable_product_types || rule.applicable_sections || [];
+    const ruleReports = rule.report_types || [];
+    const allProducts = ruleProducts.length === 0 ||
+      ruleProducts.some((x) => x === "全部" || x === "all" || x === "");
+    const allReports = ruleReports.length === 0 ||
+      ruleReports.some((x) => x === "全部" || x === "all" || x === "");
+    if (allProducts && allReports) return true;
+    const productOk = allProducts || (
+      docProduct && ruleProducts.some((rp) => rp.includes(docProduct) || docProduct.includes(rp))
+    );
+    const reportOk = allReports || (
+      docReport && ruleReports.some((rr) => rr.includes(docReport) || docReport.includes(rr))
+    );
+    // Unknown classification → don't prefilter, let the agent judge.
+    if (!docProduct && !docReport) return true;
+    return productOk && reportOk;
+  }
+  /**
+   * D1: Enrich a skill_authoring / skill_testing task prompt with the
+   * rule's source context — reads `source_chunk_ids` back-refs from
+   * catalog.json (populated by extraction) and fetches chunk text from
+   * the most recent BundleTree cache. Falls back to the minimal prompt
+   * when catalog / cache aren't available.
+   *
+   * Previously the task prompt was ONE line — "Continue with next task:
+   * ${title}" — leaving the skill-author agent to re-read the rule and
+   * re-find its evidence per task. Auto-attach saves the LLM turn
+   * needed for document_search on every task, and ensures the author
+   * sees the exact regulation text the extractor used to justify the
+   * rule.
+   *
+   * @param {{id: string, title: string, ruleId?: string, phase: string}} task
+   * @returns {string}
+   */
+  _buildEnrichedTaskPrompt(task) {
+    const fallback = `Continue with next task: ${task.title}` +
+      (task.ruleId ? ` (rule: ${task.ruleId})` : "");
+    // Only enrich for rule-anchored phases
+    if (task.phase !== "skill_authoring" && task.phase !== "skill_testing") {
+      return fallback;
+    }
+    if (!task.ruleId) return fallback;
+    // Find the rule in catalog.json
+    const catalogPath = path.join(this.workspace.cwd, "rules", "catalog.json");
+    if (!fs.existsSync(catalogPath)) return fallback;
+    let rules;
+    try {
+      rules = normalizeRuleCatalog(JSON.parse(fs.readFileSync(catalogPath, "utf-8")));
+    } catch { return fallback; }
+    const rule = rules.find((r) => r.id === task.ruleId);
+    if (!rule) return fallback;
+    // Assemble the enriched brief. Every section is optional — when a
+    // back-ref or cache is missing, just skip that section rather than
+    // failing back to the minimal prompt.
+    const lines = [];
+    lines.push(`# Task: ${task.title}`);
+    lines.push("");
+    lines.push(`## Rule ${rule.id}`);
+    if (rule.source_ref) lines.push(`Source: ${rule.source_ref}`);
+    if (rule.severity) lines.push(`Severity: ${rule.severity}`);
+    if (rule.description) lines.push(`\n${rule.description}`);
+    if (rule.falsifiability_statement) lines.push(`\n**Falsifiability**: ${rule.falsifiability_statement}`);
+    if (rule.test_case_stub) lines.push(`**Test stub**: ${rule.test_case_stub}`);
+    // D1: if rule has source_chunk_ids AND a BundleTree cache exists,
+    // pull chunk text inline so the author doesn't need to call
+    // bundle_search manually. Bounded to ~3000 tokens total to avoid
+    // blowing the author's context budget.
+    const chunkIds = Array.isArray(rule.source_chunk_ids) ? rule.source_chunk_ids : [];
+    if (chunkIds.length > 0) {
+      const chunks = this._loadChunksFromBundleCache(chunkIds);
+      if (chunks.length > 0) {
+        lines.push("");
+        lines.push("## Source context");
+        let totalChars = 0;
+        const MAX_CHARS = 7500; // ~3000 CJK tokens
+        for (const ch of chunks) {
+          const header = `### ${ch.title || ch.chunk_id} · ${ch.source_file} p.${(ch.page_range || [1, 1]).join("-")}`;
+          const body = (ch.content || "").trim();
+          const block = `${header}\n${body}\n`;
+          if (totalChars + block.length > MAX_CHARS) {
+            lines.push(`\n[…${chunks.length - chunks.indexOf(ch)} more source chunks truncated; use bundle_search to retrieve them…]`);
+            break;
+          }
+          lines.push("");
+          lines.push(block);
+          totalChars += block.length;
+        }
+      }
+    }
+    // Sibling rules (same source_ref prefix) — helps the author see the
+    // surrounding catalog and avoid re-implementing cross-referenced logic.
+    const siblings = this._findSiblingRuleIds(rule, rules);
+    if (siblings.length > 0) {
+      lines.push("");
+      lines.push(`## Sibling rules (same regulation section)`);
+      lines.push(siblings.map((id) => `- ${id}`).join("\n"));
+    }
+    lines.push("");
+    lines.push("Write the skill to `rule_skills/<rule_id>/SKILL.md` + detect script. Prefer 1 rule = 1 skill dir (use `check_rNNN_rMMM.py` naming ONLY when rules share evidence and fail together).");
+    return lines.join("\n");
+  }
+  /** D1: Load chunk text from the most recent BundleTree cache. */
+  _loadChunksFromBundleCache(chunkIds) {
+    const cacheDir = path.join(this.workspace.cwd, "cache", "bundles");
+    if (!fs.existsSync(cacheDir)) return [];
+    let entries;
+    try { entries = fs.readdirSync(cacheDir); }
+    catch { return []; }
+    const candidates = entries
+      .filter((n) => n.endsWith(".json") && !n.endsWith(".classification.json"))
+      .map((n) => {
+        const p = path.join(cacheDir, n);
+        try { return { path: p, mtime: fs.statSync(p).mtimeMs }; }
+        catch { return null; }
+      })
+      .filter(Boolean)
+      .sort((a, b) => b.mtime - a.mtime);
+    if (candidates.length === 0) return [];
+    let tree;
+    try { tree = JSON.parse(fs.readFileSync(candidates[0].path, "utf-8")); }
+    catch { return []; }
+    const out = [];
+    for (const cid of chunkIds) {
+      const ch = tree.chunks?.[cid];
+      if (ch) out.push(ch);
+    }
+    return out;
+  }
+  /** D1: Rules that share the same regulation article (naive: source_ref prefix). */
+  _findSiblingRuleIds(rule, allRules) {
+    if (!rule.source_ref) return [];
+    const prefix = rule.source_ref.split(/[第条款项]/)[0].trim();
+    if (!prefix) return [];
+    return allRules
+      .filter((r) => r.id !== rule.id && (r.source_ref || "").startsWith(prefix))
+      .slice(0, 8)
+      .map((r) => r.id);
+  }
   /**
    * Ralph-loop: run a turn, then auto-continue through pending tasks.
    * Compacts context aggressively between tasks to prevent context blowup.
    * If no tasks exist, behaves identically to runTurn().
    *
    * @param {string} userMessage
+   * @param {{parallelism?: number}} [opts] — B1: optional parallel mode.
+   *   N > 1 dispatches tasks through N concurrent subagents (using the
+   *   agent_tool infrastructure from B8). Clamped to `effectiveParallelism`
+   *   from config.js — which silently downgrades to 1 unless
+   *   KC_PARALLELISM_VERIFIED=1 is set AND heap.jsonl shows flat RSS
+   *   (B0.6 guard; prevents accidental $100+ runaway runs).
    * @yields {AgentEvent}
    */
-  async *runTaskLoop(userMessage) {
+  async *runTaskLoop(userMessage, opts = {}) {
     // Sub-agents don't run task loops — they execute one task and exit
     if (!this.taskManager) {
       yield* this.runTurn(userMessage);
       return;
     }
+    // B1: resolve effective parallelism. Caller opts override config.
+    const requested = Number.isFinite(opts.parallelism)
+      ? Math.max(1, Math.min(8, opts.parallelism))
+      : (this.config.effectiveParallelism?.() ?? 1);
+    if (requested > 1) {
+      yield* this._runTaskLoopParallel(userMessage, requested);
+      return;
+    }
+    yield* this._runTaskLoopSerial(userMessage);
+  }
+  /** B1: original serial ralph-loop path — one task at a time, shared
+   *  conversation history. Unchanged from pre-v0.6.0 behavior. */
+  async *_runTaskLoopSerial(userMessage) {
     // Run the initial turn (user's request)
     yield* this.runTurn(userMessage);
@@ -1015,8 +1443,11 @@ export class AgentEngine {
         await this.compact({ recentCount: 8 });
       }
-      const task = this.taskManager.getNextPending();
-      this.taskManager.updateTask(task.id, { status: "in_progress" });
+      // B2: atomic claim — for serial we could use getNextPending, but
+      // using claimNextPending gives us consistent state fields (worker
+      // label, startedAt) whether in serial or parallel mode.
+      const task = this.taskManager.claimNextPending("serial");
+      if (!task) break;
       // Yield task progress event for TUI
       yield new AgentEvent({
@@ -1030,14 +1461,15 @@ export class AgentEngine {
         },
       });
-      // Synthesize a task-focused prompt
-      const taskPrompt = `Continue with next task: ${task.title}` +
-        (task.ruleId ? ` (rule: ${task.ruleId})` : "");
+      // D1: synthesize a task-focused prompt, enriched with rule source
+      // context (rule NL + source_ref + chunk text + sibling ids) when
+      // the catalog + BundleTree cache are available. Falls back to the
+      // minimal "Continue with next task" line otherwise.
+      const taskPrompt = this._buildEnrichedTaskPrompt(task);
       yield* this.runTurn(taskPrompt);
-      this.taskManager.updateTask(task.id, { status: "completed" });
-      this.taskManager.save();
+      this.taskManager.markDone(task.id);
       this.saveState();
       yield new AgentEvent({
@@ -1074,6 +1506,190 @@ export class AgentEngine {
     }
   }
+  /**
+   * B1: Parallel ralph-loop — N concurrent subagents each executing one
+   * task at a time, claimed atomically from TaskManager.
+   *
+   * Implementation: leverages B8's agent_tool infrastructure. Each worker
+   * slot is a sub-engine with its own heap-isolated history; workspace
+   * writes are serialized through B9's file locks. The main engine acts
+   * as dispatcher — it claims tasks and spawns subagents, then waits.
+   *
+   * Chosen over in-process history-forking because: (a) sub-engines are
+   * already heap-isolated (good under B0's RSS-safety regime); (b)
+   * kill authority from B8 applies uniformly; (c) no runTurn refactor
+   * needed — the engine's conversation-state assumptions stay intact.
+   * Trade-off: each task pays a cold-start cost (re-read AGENT.md,
+   * skill index, pipeline state). For 100+ task sessions this is
+   * amortized against the 2-4× wall-clock speedup.
+   */
+  async *_runTaskLoopParallel(userMessage, parallelism) {
+    // Initial turn: main agent reads user request, creates tasks.
+    yield* this.runTurn(userMessage);
+    const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
+    if (!agentTool) {
+      // Shouldn't happen (agent_tool is core), but fall back safely.
+      yield new AgentEvent({
+        type: "error",
+        message: "agent_tool not registered; parallel mode requires it. Falling back to serial.",
+      });
+      yield* this._runTaskLoopSerial("");
+      return;
+    }
+    // Event queue so concurrent workers can yield progress through a
+    // single async-generator consumer. push-style with a notifier.
+    const eventQueue = [];
+    let notify = null;
+    const enq = (ev) => {
+      eventQueue.push(ev);
+      if (notify) { const n = notify; notify = null; n(); }
+    };
+    // In-flight: subagent task_id → { task, promise }
+    const inFlight = new Map();
+    const dispatch = async () => {
+      while (inFlight.size < parallelism) {
+        const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
+        if (!task) return;
+        const workerLabel = `pool${[...inFlight.keys()].length}`;
+        const subId = `pool_${task.id}`.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 60);
+        // D1: build the enriched brief with source context. Parallel workers
+        // are subagents — each with zero conversation history, so the brief
+        // must carry everything they need. Even more important to have
+        // source context inline vs. expecting them to call document_search.
+        const enriched = this._buildEnrichedTaskPrompt(task);
+        const brief =
+          enriched +
+          `\n\nNOTE (parallel worker): write outputs via workspace_file or ` +
+          `rule_catalog — do NOT write to shared coordination files ` +
+          `(rules/catalog.json, rules/manifest.json) via sandbox_exec; they're ` +
+          `lock-protected and bypassing the lock will race with other workers.`;
+        enq(new AgentEvent({
+          type: "task_progress",
+          data: {
+            taskId: task.id, title: task.title, ruleId: task.ruleId,
+            status: "in_progress", worker: workerLabel,
+            progress: this.taskManager.progress,
+          },
+        }));
+        // Spawn via the tool's public API. agent_tool writes status.txt,
+        // abort controller, etc. We read _runningTasks to get a promise
+        // handle we can await.
+        const spawnRes = await agentTool.execute({
+          operation: "spawn",
+          task_description: brief,
+          task_id: subId,
+        });
+        if (spawnRes.isError) {
+          this.taskManager.markFailed(task.id, `spawn failed: ${spawnRes.content}`);
+          enq(new AgentEvent({
+            type: "task_progress",
+            data: { taskId: task.id, status: "failed", worker: workerLabel },
+          }));
+          continue;
+        }
+        const entry = agentTool._runningTasks.get(subId);
+        if (!entry) {
+          // Sub-agent completed synchronously (no events) — mark done.
+          this.taskManager.markDone(task.id);
+          enq(new AgentEvent({
+            type: "task_progress",
+            data: { taskId: task.id, status: "completed", worker: workerLabel },
+          }));
+          continue;
+        }
+        const trackedPromise = entry.promise.then(
+          () => ({ taskId: task.id, subId, ok: true }),
+          (e) => ({ taskId: task.id, subId, ok: false, error: e?.message || String(e) }),
+        );
+        inFlight.set(subId, { task, workerLabel, promise: trackedPromise });
+      }
+    };
+    // Prime the pool
+    await dispatch();
+    // Drain events + replenish until queue is empty and all in-flight done.
+    while (inFlight.size > 0 || eventQueue.length > 0) {
+      // Drain all queued events first
+      while (eventQueue.length > 0) yield eventQueue.shift();
+      if (inFlight.size === 0) break;
+      // Wait for either the next event OR a worker to complete
+      const workerCompletion = Promise.race([...inFlight.values()].map((v) => v.promise));
+      const eventArrival = new Promise((resolve) => { notify = () => resolve("event"); });
+      const winner = await Promise.race([
+        workerCompletion.then((done) => ({ kind: "worker", done })),
+        eventArrival.then(() => ({ kind: "event" })),
+      ]);
+      if (winner.kind === "worker") {
+        const { taskId, subId, ok, error } = winner.done;
+        const entry = inFlight.get(subId);
+        inFlight.delete(subId);
+        if (ok) {
+          this.taskManager.markDone(taskId);
+          enq(new AgentEvent({
+            type: "task_progress",
+            data: {
+              taskId, status: "completed",
+              worker: entry?.workerLabel,
+              progress: this.taskManager.progress,
+            },
+          }));
+        } else {
+          this.taskManager.markFailed(taskId, error);
+          enq(new AgentEvent({
+            type: "task_progress",
+            data: {
+              taskId, status: "failed",
+              worker: entry?.workerLabel,
+              error,
+              progress: this.taskManager.progress,
+            },
+          }));
+        }
+        // Refill the pool. If no pending tasks left, in-flight drains naturally.
+        await dispatch();
+      }
+      // event winner: loop re-iterates and drains eventQueue
+    }
+    this.saveState();
+    // After all workers done, check for phase auto-advance (same as serial path).
+    if (this._allCurrentPhaseTasksComplete()) {
+      const pipeline = this.pipelines[this.currentPhase];
+      let exitMet = false;
+      try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
+      if (exitMet) {
+        const next = NEXT_PHASE[this.currentPhase];
+        if (next) {
+          const advanced = this._advancePhase(next, "all parallel tasks completed + exit criteria met");
+          if (advanced) {
+            yield new AgentEvent({
+              type: "pipeline_event",
+              data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
+            });
+          }
+        }
+      }
+    }
+  }
   /**
    * True when every task tagged with the current phase is in a terminal state
    * (completed | failed | skipped) and at least one such task exists. Used by