npm - kc-beta - Versions diffs - 0.6.2 → 0.7.0 - Mend

kc-beta 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/LICENSE +81 -0
package/LICENSE-COMMERCIAL.md +125 -0
package/README.md +21 -3
package/package.json +14 -5
package/src/agent/context-window.js +9 -12
package/src/agent/context.js +14 -1
package/src/agent/document-parser.js +169 -0
package/src/agent/engine.js +367 -18
package/src/agent/history/event-history.js +222 -0
package/src/agent/llm-client.js +55 -0
package/src/agent/message-utils.js +63 -0
package/src/agent/pipelines/_milestone-derive.js +511 -0
package/src/agent/pipelines/base.js +21 -0
package/src/agent/pipelines/distillation.js +28 -15
package/src/agent/pipelines/extraction.js +103 -36
package/src/agent/pipelines/finalization.js +178 -11
package/src/agent/pipelines/index.js +6 -1
package/src/agent/pipelines/initializer.js +74 -8
package/src/agent/pipelines/production-qc.js +31 -44
package/src/agent/pipelines/skill-authoring.js +97 -80
package/src/agent/pipelines/skill-testing.js +67 -23
package/src/agent/retry.js +10 -2
package/src/agent/scheduler.js +14 -2
package/src/agent/session-state.js +18 -1
package/src/agent/skill-loader.js +13 -7
package/src/agent/skill-validator.js +19 -5
package/src/agent/task-manager.js +61 -5
package/src/agent/tools/document-chunk.js +21 -9
package/src/agent/tools/phase-advance.js +18 -3
package/src/agent/tools/release.js +51 -9
package/src/agent/tools/rule-catalog.js +11 -1
package/src/agent/tools/workspace-file.js +32 -0
package/src/agent/workspace.js +39 -1
package/src/cli/components.js +64 -14
package/src/cli/index.js +62 -3
package/src/cli/meme.js +26 -25
package/src/config.js +65 -22
package/src/model-tiers.json +24 -8
package/src/providers.js +42 -0
package/template/release/v1/README.md.tmpl +108 -0
package/template/release/v1/catalog.json.tmpl +4 -0
package/template/release/v1/kc_runtime/__init__.py +11 -0
package/template/release/v1/kc_runtime/confidence.py +63 -0
package/template/release/v1/kc_runtime/doc_parser.py +127 -0
package/template/release/v1/manifest.json.tmpl +11 -0
package/template/release/v1/render_dashboard.py +117 -0
package/template/release/v1/run.py +212 -0
package/template/release/v1/serve.sh +17 -0
package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
package/template/skills/en/skill-creator/SKILL.md +1 -1
package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
package/template/skills/zh/skill-creator/SKILL.md +1 -1

package/src/agent/pipelines/extraction.js CHANGED Viewed

@@ -2,6 +2,7 @@ import fs from "node:fs";
 import path from "node:path";
 import { Phase, PipelineEvent } from "./index.js";
 import { Pipeline } from "./base.js";
+import { deriveRuleExtractionMilestones, deriveSkillAuthoringMilestones } from "./_milestone-derive.js";
 export class RuleExtractionPipeline extends Pipeline {
   constructor(workspace) {
@@ -20,62 +21,91 @@ export class RuleExtractionPipeline extends Pipeline {
   }
   _scanWorkspace() {
+    // v0.7.0 A1: route through filesystem-derived milestone helper.
+    // Existing instance state (rulesExtracted, rulesWithChunkRefs,
+    // coverageAudited) becomes a cache of disk facts rather than a
+    // running record of which tools fired. Tool-wrapper recorders can
+    // still bump these via engine._recordMilestone but disk wins on
+    // any rescan.
+    const m = deriveRuleExtractionMilestones(this._workspace);
+    this.rulesExtracted = [...m.rulesExtracted];
+    this.rulesWithChunkRefs = [...m.rulesWithChunkRefs];
+    this.coverageAudited = m.coverageAudited;
+    // regulationsScanned: presence of any non-JSON file in rules/. Kept
+    // local to this pipeline (not in the helper) because "did the agent
+    // copy regs into the workspace" is a cheap heuristic specific to
+    // this phase.
     const rulesDir = path.join(this._workspace.cwd, "rules");
     if (fs.existsSync(rulesDir)) {
-      const regFiles = fs.readdirSync(rulesDir).filter((f) => !f.endsWith(".json") && fs.statSync(path.join(rulesDir, f)).isFile());
-      this.regulationsScanned = regFiles.length > 0;
-    }
-    this._scanRules();
-    this._scanTests();
-    this.coverageAudited = fs.existsSync(path.join(this._workspace.cwd, "rules", "coverage_audit.md")) ||
-                           fs.existsSync(path.join(this._workspace.cwd, "rules", "coverage_audit.json"));
-  }
-  _scanRules() {
-    this.rulesExtracted = [];
-    this.rulesWithChunkRefs = [];
-    const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
-    if (fs.existsSync(catalogPath)) {
       try {
-        const data = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
-        if (Array.isArray(data)) {
-          this.rulesExtracted = data.map((r, i) => r.id || `rule_${i}`);
-          // A1: collect ids whose entry has non-empty source_chunk_ids
-          for (const r of data) {
-            const ids = r?.source_chunk_ids;
-            if (Array.isArray(ids) && ids.length > 0 && r?.id) {
-              this.rulesWithChunkRefs.push(r.id);
-            }
-          }
-        }
+        const regFiles = fs.readdirSync(rulesDir).filter(
+          (f) => !f.endsWith(".json") && fs.statSync(path.join(rulesDir, f)).isFile(),
+        );
+        this.regulationsScanned = regFiles.length > 0;
       } catch { /* skip */ }
     }
-    const skillsDir = path.join(this._workspace.cwd, "rule_skills");
-    if (fs.existsSync(skillsDir)) {
-      for (const e of fs.readdirSync(skillsDir, { withFileTypes: true })) {
-        if (e.isDirectory() && !e.name.startsWith("__") && !this.rulesExtracted.includes(e.name)) {
-          this.rulesExtracted.push(e.name);
-        }
+    // Union with rule_skills/ dirs — sometimes agents create skill dirs
+    // before adding to catalog.json (XM E2E #5 stranded-catalog case).
+    // Pulled from the skill-authoring helper so we share the canonical
+    // skill dir scan.
+    const sa = deriveSkillAuthoringMilestones(this._workspace);
+    for (const dirName of sa.skillsAuthored) {
+      if (!this.rulesExtracted.includes(dirName)) {
+        this.rulesExtracted.push(dirName);
       }
     }
+    this._scanTests();
   }
   _scanTests() {
+    // v0.7.0 A1: rulesWithTests now accepts multiple test shapes (was
+    // form-prescriptive on test_cases/ only — none of E2E #5's three
+    // alive contestants used that exact path; the gate refused all).
+    // Now: a rule is "tested" iff it has ANY of:
+    //   rule_skills/<id>/test_cases/   (canonical, original)
+    //   rule_skills/<id>/tests/        (alt spelling)
+    //   rule_skills/<id>/check*.py     (check IS the test for many rules)
+    //   rule_skills/<id>/scripts/check*.py (XM-style nested scripts)
+    //   rule_skills/<id>/assets/test_cases.json
+    // Spirit of the gate is "did the agent leave test artifacts behind"
+    // not "did they use this exact directory name."
     this.rulesWithTests = [];
     const skillsDir = path.join(this._workspace.cwd, "rule_skills");
     if (!fs.existsSync(skillsDir)) return;
     for (const e of fs.readdirSync(skillsDir, { withFileTypes: true })) {
       if (!e.isDirectory()) continue;
-      const testDir = path.join(skillsDir, e.name, "test_cases");
-      if (fs.existsSync(testDir) && fs.readdirSync(testDir).length > 0) {
-        this.rulesWithTests.push(e.name);
+      const skillPath = path.join(skillsDir, e.name);
+      const testDirA = path.join(skillPath, "test_cases");
+      const testDirB = path.join(skillPath, "tests");
+      const assetsTests = path.join(skillPath, "assets", "test_cases.json");
+      let hasTest = false;
+      if (fs.existsSync(testDirA) && fs.readdirSync(testDirA).length > 0) hasTest = true;
+      if (!hasTest && fs.existsSync(testDirB) && fs.readdirSync(testDirB).length > 0) hasTest = true;
+      if (!hasTest && fs.existsSync(assetsTests)) hasTest = true;
+      // Check files: any check*.py at root or under scripts/
+      if (!hasTest) {
+        try {
+          const files = fs.readdirSync(skillPath);
+          if (files.some((f) => /^check.*\.py$/i.test(f))) hasTest = true;
+          else if (files.includes("scripts")) {
+            const scriptsDir = path.join(skillPath, "scripts");
+            try {
+              if (fs.readdirSync(scriptsDir).some((f) => /^check.*\.py$/i.test(f))) hasTest = true;
+            } catch { /* skip */ }
+          }
+        } catch { /* skip */ }
       }
+      if (hasTest) this.rulesWithTests.push(e.name);
     }
   }
   describeState() {
     this._scanWorkspace();
-    const parts = ["## Phase: EXTRACTION\nRead and decompose regulation documents into atomic, testable verification rules. This is BUILD mode — do the analysis directly."];
+    const parts = ["## Phase: RULE_EXTRACTION\nRead and decompose regulation documents into atomic, testable verification rules. This is BUILD mode — do the analysis directly. (Distinct from data/entity extraction work that skills perform internally.)"];
     parts.push(`### Progress\n- Regulations scanned: ${this.regulationsScanned ? "yes" : "no"}\n- Rules extracted: ${this.rulesExtracted.length}\n- Rules with test stubs: ${this.rulesWithTests.length}\n- Coverage audit: ${this.coverageAudited ? "done" : "pending"}`);
     if (this.exitCriteriaMet()) {
@@ -132,7 +162,13 @@ export class RuleExtractionPipeline extends Pipeline {
   }
   exitCriteriaMet() {
-    return this.regulationsScanned && this.rulesExtracted.length > 0 &&
+    // v0.7.0 A1: dropped explicit `regulationsScanned` gate — rulesExtracted
+    // > 0 already implies the agent read regulations from somewhere
+    // (catalog.json wouldn't exist otherwise). The old criterion measured
+    // "did the agent copy regs into workspace/rules/" — ceremonial work
+    // none of E2E #5's three contestants did because they read directly
+    // from projectDir/rules/.
+    return this.rulesExtracted.length > 0 &&
       this.rulesWithTests.length >= Math.max(this.rulesExtracted.length * 0.8, 1) &&
       this.coverageAudited &&
       // v0.6.1 A1: hard tracking — D1 source-context auto-attach requires
@@ -141,6 +177,37 @@ export class RuleExtractionPipeline extends Pipeline {
       this._chunkRefsCriterionMet();
   }
+  /**
+   * v0.6.3 (#74): RULE_EXTRACTION should produce rules/catalog.json + per-rule
+   * markdown extraction notes, not python check scripts or workflows.
+   */
+  phaseMisfitHint(toolName, toolInput, result) {
+    if (result?.isError) return null;
+    const exitText = this.exitCriteriaMet()
+      ? "Extraction exit criteria are MET — call phase_advance(to=\"skill_authoring\") to switch phases before continuing."
+      : "Extraction exit criteria NOT yet met. Either finish extraction first, or use force:true on phase_advance.";
+    if (toolName === "workspace_file" && toolInput?.operation === "write") {
+      const p = toolInput.path || "";
+      // Writing the actual python check is unambiguous skill-authoring work.
+      if (/^rule_skills\/[^/]+\/check_r\d+\.py$/.test(p) || p.endsWith("/SKILL.md") && p.startsWith("rule_skills/")) {
+        return `Writing "${p}" is SKILL_AUTHORING-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
+      }
+      if (p.startsWith("workflows/")) {
+        return `Writing under workflows/ is DISTILLATION-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
+      }
+      if (p.startsWith("output/results/")) {
+        return `Writing under output/results/ is PRODUCTION_QC-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
+      }
+    }
+    if (toolName === "workflow_run") {
+      return `workflow_run is SKILL_TESTING/PRODUCTION_QC-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
+    }
+    return null;
+  }
   exportState() {
     return {
       regulationsScanned: this.regulationsScanned,

package/src/agent/pipelines/finalization.js CHANGED Viewed

@@ -1,8 +1,15 @@
 import fs from "node:fs";
 import path from "node:path";
+import { fileURLToPath } from "node:url";
 import { PipelineEvent } from "./index.js";
 import { Pipeline } from "./base.js";
 import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
+import { deriveFinalizationMilestones } from "./_milestone-derive.js";
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+// v0.7.0 N: ship template/release/v1/ from the npm package; copy into
+// the workspace at finalization phase entry.
+const RELEASE_TEMPLATE_DIR = path.resolve(__dirname, "../../../template/release/v1");
 /**
  * E1: FINALIZATION — the 7th phase. Runs after PRODUCTION_QC has shown
@@ -41,17 +48,21 @@ export class FinalizationPipeline extends Pipeline {
   }
   _scanWorkspace() {
-    const cwd = this._workspace.cwd;
-    this.readmeWritten = fs.existsSync(path.join(cwd, "rule_skills", "README.md"));
-    this.coverageReportWritten = fs.existsSync(path.join(cwd, "rule_skills", "coverage_report.md"));
-    this.finalDashboardWritten = fs.existsSync(path.join(cwd, "output", "final_dashboard.html"));
+    // v0.7.0 A1: route through filesystem-derived helper. The helper
+    // accepts multiple shipping locations (output/releases/v#/README.md,
+    // rule_skills/README.md, workspace-root README.md) and enforces a
+    // ≥500-byte threshold to defeat empty stub files. Dashboard check
+    // requires sha256-distinct HTMLs in dashboards/ (Group C dedup).
+    const m = deriveFinalizationMilestones(this._workspace);
+    this.readmeWritten = m.readmeWritten;
+    this.coverageReportWritten = m.coverageReportWritten;
+    this.finalDashboardWritten = m.finalDashboardWritten;
+    this._dashboardDuplicatesDetected = m.dashboardDuplicatesDetected;
     // Canonical layout: every rule_id in the catalog has a dedicated
-    // directory OR a thin-link stub under rule_skills/<rule_id>/. When
-    // skills are already per-rule (every rule has its own dir) this is
-    // trivially true. When skills are grouped, the agent creates
-    // per-rule stub dirs that reference the grouped file. We approximate
-    // "canonical" by checking: does every catalog rule_id have a
-    // matching directory under rule_skills/?
+    // directory OR a thin-link stub under rule_skills/<rule_id>/. Kept
+    // here (not in helper) because it requires reading catalog.json
+    // and matching against existing dirs — pipeline-specific logic.
     this.canonicalLayoutDone = this._checkCanonicalLayout();
   }
@@ -165,7 +176,163 @@ export class FinalizationPipeline extends Pipeline {
     return this.readmeWritten &&
       this.coverageReportWritten &&
       this.finalDashboardWritten &&
-      this.canonicalLayoutDone;
+      this.canonicalLayoutDone &&
+      // v0.7.0 N (#94): pre-flight — every required file run.py loads
+      // must exist. Without this, finalization can declare "done" with
+      // a release dir that bombs on first invocation (E2E #5 DS shipped
+      // run.py requiring manifest.json which didn't exist).
+      this._releaseBundlePreflightOk();
+  }
+  /**
+   * v0.7.0 N (#94): copy `template/release/v1/` into
+   * `output/releases/v1/` at phase entry so the agent has a runnable
+   * skeleton to fill in. Skips if the release dir already exists with
+   * non-template content (resume case — preserve agent edits).
+   *
+   * Called from engine._advancePhase after the phase transitions to
+   * finalization.
+   */
+  onPhaseEnter({ fromPhase, workspace } = {}) {
+    if (!fs.existsSync(RELEASE_TEMPLATE_DIR)) return; // template not bundled (dev edge case)
+    const releaseRoot = path.join((workspace || this._workspace).cwd, "output", "releases", "v1");
+    if (fs.existsSync(releaseRoot)) {
+      // Don't overwrite existing release dir (resume / repeat phase entry).
+      // Re-rerunning the populator on existing files is safe but the agent
+      // may have hand-edited; leave alone.
+      return;
+    }
+    try {
+      this._copyTemplateRecursive(RELEASE_TEMPLATE_DIR, releaseRoot);
+      // Populate .tmpl files from session-state where we can.
+      this._populateRelease(releaseRoot);
+    } catch (e) {
+      // Defensive: never let template setup break phase transition.
+      // The agent can re-run via /phase finalization or recover manually.
+      // eslint-disable-next-line no-console
+      console.warn(`[finalization] release template copy failed: ${e?.message || e}`);
+    }
+  }
+  _copyTemplateRecursive(srcDir, destDir) {
+    fs.mkdirSync(destDir, { recursive: true });
+    for (const entry of fs.readdirSync(srcDir, { withFileTypes: true })) {
+      const src = path.join(srcDir, entry.name);
+      const dst = path.join(destDir, entry.name);
+      if (entry.isDirectory()) {
+        this._copyTemplateRecursive(src, dst);
+      } else if (entry.isFile()) {
+        fs.copyFileSync(src, dst);
+        // Preserve executable bits on shipped scripts
+        if (/\.(py|sh)$/.test(entry.name)) {
+          try { fs.chmodSync(dst, 0o755); } catch { /* not critical */ }
+        }
+      }
+    }
+  }
+  _populateRelease(releaseRoot) {
+    // Best-effort populator — fills the .tmpl placeholders with what
+    // session-state currently knows. Agent can re-edit afterwards.
+    const cwd = this._workspace.cwd;
+    const sessionId = path.basename(cwd);
+    const generatedAt = new Date().toISOString();
+    // catalog.json: copy from rules/catalog.json if present
+    const catalogSrc = path.join(cwd, "rules", "catalog.json");
+    if (fs.existsSync(catalogSrc)) {
+      try {
+        fs.copyFileSync(catalogSrc, path.join(releaseRoot, "catalog.json"));
+      } catch { /* ignore */ }
+    }
+    // manifest.json: scan workflows/ for rule -> file mappings
+    const workflowsRoot = path.join(cwd, "workflows");
+    const workflows = {};
+    let ruleCount = 0;
+    let workflowCount = 0;
+    if (fs.existsSync(workflowsRoot)) {
+      for (const entry of fs.readdirSync(workflowsRoot, { withFileTypes: true })) {
+        if (entry.isDirectory()) {
+          const subFiles = fs.readdirSync(path.join(workflowsRoot, entry.name));
+          const py = subFiles.find((f) => /workflow.*\.py$/i.test(f) || /^check.*\.py$/i.test(f));
+          if (py) {
+            workflows[entry.name] = `workflows/${entry.name}/${py}`;
+            workflowCount++;
+          }
+        } else if (entry.isFile()) {
+          const m = entry.name.match(/^(.+)_workflow\.py$/i);
+          if (m) {
+            workflows[m[1]] = `workflows/${entry.name}`;
+            workflowCount++;
+          }
+        }
+      }
+    }
+    try {
+      const catalog = fs.existsSync(catalogSrc)
+        ? JSON.parse(fs.readFileSync(catalogSrc, "utf-8"))
+        : [];
+      ruleCount = Array.isArray(catalog) ? catalog.length : (catalog?.rules?.length || 0);
+    } catch { /* ignore */ }
+    const manifest = {
+      release_version: "v1",
+      kc_version: this._readKcVersion(),
+      generated_at: generatedAt,
+      session_id: sessionId,
+      rules_count: ruleCount,
+      workflows_count: workflowCount,
+      workflows,
+      calibration_source: "confidence_calibration.json",
+      documentation: "README.md",
+    };
+    fs.writeFileSync(
+      path.join(releaseRoot, "manifest.json"),
+      JSON.stringify(manifest, null, 2),
+      "utf-8",
+    );
+    // README.md: substitute placeholders in README.md.tmpl
+    const readmeTmplPath = path.join(releaseRoot, "README.md.tmpl");
+    if (fs.existsSync(readmeTmplPath)) {
+      let readme = fs.readFileSync(readmeTmplPath, "utf-8");
+      readme = readme
+        .replaceAll("{{kc_version}}", this._readKcVersion())
+        .replaceAll("{{session_id}}", sessionId)
+        .replaceAll("{{generated_at}}", generatedAt)
+        .replaceAll("{{rule_count}}", String(ruleCount))
+        .replaceAll("{{workflow_count}}", String(workflowCount))
+        .replaceAll("{{project_description}}", "(Agent: replace with project-specific description.)")
+        .replaceAll("{{known_limitations}}", "(Agent: replace with known limitations from this run.)");
+      fs.writeFileSync(path.join(releaseRoot, "README.md"), readme, "utf-8");
+    }
+  }
+  _readKcVersion() {
+    try {
+      const pkg = JSON.parse(fs.readFileSync(
+        path.resolve(__dirname, "../../../package.json"), "utf-8",
+      ));
+      return pkg.version || "unknown";
+    } catch { return "unknown"; }
+  }
+  /**
+   * v0.7.0 N (#94): pre-flight — confirm every file `run.py` loads via
+   * `_load_json(..., required=True)` exists in the bundle. Without this
+   * the agent can declare finalization done with a bundle that bombs
+   * at runtime.
+   */
+  _releaseBundlePreflightOk() {
+    const releaseRoot = path.join(this._workspace.cwd, "output", "releases", "v1");
+    if (!fs.existsSync(releaseRoot)) return false;
+    const required = ["run.py", "manifest.json", "README.md", "kc_runtime/doc_parser.py", "kc_runtime/confidence.py"];
+    for (const rel of required) {
+      const p = path.join(releaseRoot, rel);
+      if (!fs.existsSync(p)) return false;
+    }
+    return true;
   }
   exportState() {

package/src/agent/pipelines/index.js CHANGED Viewed

@@ -10,7 +10,12 @@
  */
 export const Phase = Object.freeze({
   BOOTSTRAP: "bootstrap",
-  EXTRACTION: "extraction",
+  // v0.6.3: phase value renamed from "extraction" → "rule_extraction" to
+  // disambiguate from the data/entity extraction that skills/workflows do
+  // internally. The JS const name (Phase.EXTRACTION) is unchanged so call
+  // sites don't shift; only the string value persisted to session-state.json
+  // and shown in /status changes.
+  EXTRACTION: "rule_extraction",
   SKILL_AUTHORING: "skill_authoring",
   SKILL_TESTING: "skill_testing",
   DISTILLATION: "distillation",

package/src/agent/pipelines/initializer.js CHANGED Viewed

@@ -4,6 +4,7 @@ import os from "node:os";
 import { fileURLToPath } from "node:url";
 import { Phase, PipelineEvent } from "./index.js";
 import { Pipeline } from "./base.js";
+import { deriveBootstrapMilestones } from "./_milestone-derive.js";
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
@@ -107,12 +108,14 @@ export class ProjectInitializer extends Pipeline {
   }
   _checkSamples() {
-    // Check workspace samples/
-    const dir = path.join(this._workspace.cwd, "samples");
-    if (fs.existsSync(dir) && fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile())) {
-      this.hasSamples = true; return;
-    }
-    // Check project dir samples/ (case-insensitive)
+    // v0.7.0 A1: route workspace check through filesystem-derived helper.
+    // Helper walks recursively (catches E2E #5 GLM's samples/samples/
+    // nested layout that the previous top-level-only check missed) and
+    // counts files at any depth. Project-dir fallback kept for the
+    // "user has samples but hasn't ingested them yet" path.
+    const m = deriveBootstrapMilestones(this._workspace);
+    if (m.hasSamples) { this.hasSamples = true; return; }
     if (this._workspace.projectDir) {
       for (const name of ["samples", "Samples", "SAMPLES", "sample", "Sample"]) {
         const pdir = path.join(this._workspace.projectDir, name);
@@ -199,7 +202,7 @@ export class ProjectInitializer extends Pipeline {
     }
     if (this.exitCriteriaMet()) {
-      parts.push("### Exit\nBootstrap requirements met. Proceed to EXTRACTION.");
+      parts.push("### Exit\nBootstrap requirements met. Proceed to RULE_EXTRACTION.");
     }
     return parts.join("\n\n");
   }
@@ -228,7 +231,7 @@ export class ProjectInitializer extends Pipeline {
     }
     if (!wasReady && this.exitCriteriaMet()) {
-      return new PipelineEvent({ type: "phase_ready", message: "Bootstrap complete. Ready for EXTRACTION.", nextPhase: Phase.EXTRACTION });
+      return new PipelineEvent({ type: "phase_ready", message: "Bootstrap complete. Ready for RULE_EXTRACTION.", nextPhase: Phase.EXTRACTION });
     }
     return null;
   }
@@ -237,6 +240,69 @@ export class ProjectInitializer extends Pipeline {
     return this.workspaceCreated && this.configReady && this.hasRegulations && this.hasSamples;
   }
+  /**
+   * v0.6.3 (#74): nudge the agent when it does work that belongs to a later
+   * phase. Bootstrap is setup — reading rules/samples, configuring keys,
+   * orienting. Writing skill code, running workflows, or spawning extraction
+   * subagents from BOOTSTRAP means the milestones get tagged "bootstrap"
+   * instead of the right phase, breaking later exit-criteria checks.
+   */
+  phaseMisfitHint(toolName, toolInput, result) {
+    if (result?.isError) return null;
+    const exitText = this.exitCriteriaMet()
+      ? "Bootstrap exit criteria are MET — call phase_advance(to=\"rule_extraction\") now to record this work under the right phase."
+      : "Bootstrap exit criteria NOT yet met (see describeState). Either complete bootstrap setup first, or use force:true on phase_advance if you've decided to skip ahead.";
+    if (toolName === "workspace_file" && toolInput?.operation === "write") {
+      const p = toolInput.path || "";
+      if (p.startsWith("rule_skills/")) {
+        return `Writing under rule_skills/ is SKILL_AUTHORING-phase work, but engine is in BOOTSTRAP. ${exitText}`;
+      }
+      if (p.startsWith("workflows/")) {
+        return `Writing under workflows/ is DISTILLATION-phase work, but engine is in BOOTSTRAP. ${exitText}`;
+      }
+      if (p.startsWith("output/results/")) {
+        return `Writing under output/results/ is PRODUCTION_QC-phase work, but engine is in BOOTSTRAP. ${exitText}`;
+      }
+    }
+    if (toolName === "workflow_run") {
+      return `workflow_run is SKILL_TESTING/PRODUCTION_QC-phase work, but engine is in BOOTSTRAP. Workflow results recorded now will be milestone-tagged "bootstrap" and won't count toward later exit criteria. ${exitText}`;
+    }
+    // v0.6.3.1 patch: rule_catalog is the most direct signature of
+    // RULE_EXTRACTION work. Creating/updating rules from BOOTSTRAP means the
+    // rule_extraction pipeline's milestone tracker stays at zero (its
+    // onToolResult only fires when engine.currentPhase matches), so the
+    // exit gate will refuse later. Caught Tencent hy3-preview after it
+    // created 22 rules silently in the wrong phase. Same risk for any
+    // model that skips sample-inventory and jumps to rule decomposition.
+    if (toolName === "rule_catalog" &&
+        ["create", "update", "delete"].includes(toolInput?.operation)) {
+      return `rule_catalog ${toolInput.operation} is RULE_EXTRACTION-phase work, but engine is in BOOTSTRAP. Rules created now WILL be persisted in rules/catalog.json (the tool writes regardless of phase), but the rule_extraction pipeline's milestone tracker won't pick them up until you're in that phase, and the v0.6.3 exit gate will refuse to advance from BOOTSTRAP unless its own criteria are met. ${exitText}`;
+    }
+    if (toolName === "agent_tool" && toolInput?.operation === "spawn") {
+      const taskId = (toolInput.task_id || "").toLowerCase();
+      // Heuristic: task_ids hinting at extraction/skill/workflow work are
+      // out-of-phase from bootstrap. Doc-parsing or setup-shaped task names
+      // are fine.
+      if (/extract|rule|skill|workflow|verify|qc|distill/.test(taskId)) {
+        return `Spawning subagent "${toolInput.task_id}" looks like ${this._guessSubagentPhase(taskId).toUpperCase()}-phase work, but engine is in BOOTSTRAP. Milestones the subagent emits will be tagged "bootstrap", causing the target phase's exit criteria to start at zero later. ${exitText}`;
+      }
+    }
+    return null;
+  }
+  _guessSubagentPhase(taskId) {
+    if (/extract|rule/.test(taskId)) return "rule_extraction";
+    if (/skill/.test(taskId)) return "skill_authoring";
+    if (/workflow|distill/.test(taskId)) return "distillation";
+    if (/verify|qc/.test(taskId)) return "production_qc";
+    return "later";
+  }
   exportState() {
     return {
       workspaceCreated: this.workspaceCreated,

package/src/agent/pipelines/production-qc.js CHANGED Viewed

@@ -2,6 +2,7 @@ import fs from "node:fs";
 import path from "node:path";
 import { PipelineEvent } from "./index.js";
 import { Pipeline } from "./base.js";
+import { deriveProductionQcMilestones } from "./_milestone-derive.js";
 const FREQUENCY_MAP = { high: 1.0, mid: 0.5, low: 0.2 };
@@ -36,27 +37,31 @@ export class ProductionQCPipeline extends Pipeline {
   }
   _scanQcResults() {
-    // v0.6.1 A5/A6: don't reset documentsReviewed if engine emission has
-    // bumped it since last scan — workflow_run hooks call _recordMilestone
-    // and the increment lives in this same field. Other counters (batches,
-    // accuracy, issues) come solely from filesystem scan and reset cleanly.
+    // v0.7.0 A1: route through filesystem-derived helper. The helper
+    // recognizes both DS-style results (object with `results` keyed by
+    // rule_id, doc-paths in nested keys) AND GLM-style array-of-verdicts
+    // (one entry per doc with .verdict/.file/.path) — neither matched
+    // the v0.6.1 A5 heuristic alone, so E2E #5 saw batchesProcessed=0
+    // even with 1,951 verdicts on disk.
     const engineDocsReviewed = this.documentsReviewed;
-    this.batchesProcessed = 0;
+    const m = deriveProductionQcMilestones(this._workspace);
+    this.batchesProcessed = m.batchesProcessed;
+    this.documentsReviewed = m.documentsReviewed;
+    // Layered: still extract accuracyByRule / confidence / issues from
+    // canonical output/qc/*.json batches when present. The helper
+    // doesn't try to reconstruct accuracy semantics (too schema-specific),
+    // but if the agent followed canonical schema, we surface it.
     this.totalDocuments = 0;
-    this.documentsReviewed = 0;
     this.accuracyByRule = {};
     this.confidenceDistribution = { low: 0, medium: 0, high: 0 };
     this.issuesFound = [];
-    // Existing canonical path: output/qc/*.json (formal QC batch reports)
     const qcDir = path.join(this._workspace.cwd, "output", "qc");
     if (fs.existsSync(qcDir)) {
       for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).sort()) {
         try {
           const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
-          this.batchesProcessed++;
           this.totalDocuments += typeof data.documents === "number" ? data.documents : (data.total || 0);
-          this.documentsReviewed += data.reviewed || 0;
           if (data.accuracy_by_rule) Object.assign(this.accuracyByRule, data.accuracy_by_rule);
           if (data.confidence) {
             for (const band of ["low", "medium", "high"]) this.confidenceDistribution[band] += data.confidence[band] || 0;
@@ -66,44 +71,26 @@ export class ProductionQCPipeline extends Pipeline {
       }
     }
-    // v0.6.1 A5: also pick up batch-style results in output/results/. E2E #4
-    // showed agents writing batch QC outputs to output/results/qc_*.json
-    // (e.g. unified_qc.py) instead of output/qc/, so the formal scanner
-    // missed them. Heuristic match: filename starts with "qc_" or contains
-    // "_batch_". Each match counts as one batch; total_checks → totalDocuments.
-    const resultsDir = path.join(this._workspace.cwd, "output", "results");
-    if (fs.existsSync(resultsDir)) {
-      const seen = new Set();
-      for (const f of fs.readdirSync(resultsDir).filter((f) => f.endsWith(".json"))) {
-        const lower = f.toLowerCase();
-        if (!(lower.startsWith("qc_") || lower.includes("_batch_"))) continue;
-        // Dedupe near-duplicate filenames that differ only by timestamp
-        // suffix (qc_full_batch_20260424_141642.json vs _141921.json
-        // — both are real batches, keep both. But qc_pt_x.json and
-        // qc_pt_x_<ts>.json are usually the same batch saved twice; key
-        // on the prefix before any 8-digit date.)
-        const key = f.replace(/_\d{8}_\d{6}/g, "").replace(/\.json$/, "");
-        if (seen.has(key)) continue;
-        seen.add(key);
-        this.batchesProcessed++;
-        try {
-          const data = JSON.parse(fs.readFileSync(path.join(resultsDir, f), "utf-8"));
-          // Best-effort metric extraction; tolerate missing keys
-          this.totalDocuments += typeof data.sample_count === "number" ? data.sample_count
-            : typeof data.documents === "number" ? data.documents
-            : typeof data.total === "number" ? data.total : 0;
-        } catch { /* skip */ }
-      }
-    }
-    // Restore engine-emitted documentsReviewed if filesystem reported less
+    // Restore engine-emitted documentsReviewed if disk-derived is lower
+    // (engine increment may know about reviews not yet flushed to disk)
     if (engineDocsReviewed > this.documentsReviewed) this.documentsReviewed = engineDocsReviewed;
-    // Determine monitoring phase
+    // Determine monitoring phase. v0.7.0 H5 fix: empty accuracyByRule
+    // no longer flips to "stable" via vacuous truth — require at least
+    // one rule with an accuracy reading before claiming stability.
     if (this.batchesProcessed < 3) this.monitoringPhase = "initial";
     else if (this.issuesFound.length > 0) this.monitoringPhase = "active";
-    else if (Object.values(this.accuracyByRule).every((a) => a >= this._accuracyThreshold)) this.monitoringPhase = "stable";
-    else this.monitoringPhase = "active";
+    else {
+      const accuracies = Object.values(this.accuracyByRule);
+      if (accuracies.length > 0 && accuracies.every((a) => a >= this._accuracyThreshold)) {
+        this.monitoringPhase = "stable";
+      } else {
+        // Helper-derived batches with no accuracy data: agent ran QC but
+        // didn't surface accuracy schema. Treat as `active` (work
+        // happened, but engine can't auto-bless stability).
+        this.monitoringPhase = "active";
+      }
+    }
   }
   describeState() {