npm - kc-beta - Versions diffs - 0.7.5 → 0.8.3 - Mend

kc-beta 0.7.5 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/README.md +47 -0
package/package.json +3 -2
package/src/agent/context.js +17 -1
package/src/agent/engine.js +467 -100
package/src/agent/llm-client.js +24 -1
package/src/agent/pipelines/_advance-hints.js +92 -0
package/src/agent/pipelines/_milestone-derive.js +325 -20
package/src/agent/pipelines/skill-authoring.js +49 -3
package/src/agent/tools/agent-tool.js +2 -2
package/src/agent/tools/consult-skill.js +15 -0
package/src/agent/tools/dashboard-render.js +48 -1
package/src/agent/tools/document-parse.js +31 -2
package/src/agent/tools/phase-advance.js +17 -13
package/src/agent/tools/release.js +343 -7
package/src/agent/tools/sandbox-exec.js +65 -8
package/src/agent/tools/worker-llm-call.js +95 -15
package/src/agent/workspace.js +25 -4
package/src/cli/components.js +4 -1
package/src/cli/index.js +125 -8
package/src/config.js +19 -2
package/src/marathon/driver.js +217 -0
package/src/marathon/prompts.js +93 -0
package/template/.env.template +17 -1
package/template/AGENT.md +2 -2
package/template/skills/en/auto-model-selection/SKILL.md +55 -35
package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
package/template/skills/en/compliance-judgment/SKILL.md +14 -0
package/template/skills/en/confidence-system/SKILL.md +30 -8
package/template/skills/en/corner-case-management/SKILL.md +53 -33
package/template/skills/en/cross-document-verification/SKILL.md +88 -83
package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
package/template/skills/en/data-sensibility/SKILL.md +19 -12
package/template/skills/en/document-chunking/SKILL.md +99 -15
package/template/skills/en/entity-extraction/SKILL.md +14 -4
package/template/skills/en/quality-control/SKILL.md +23 -0
package/template/skills/en/rule-extraction/SKILL.md +92 -94
package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
package/template/skills/en/skill-authoring/SKILL.md +85 -2
package/template/skills/en/skill-creator/SKILL.md +25 -3
package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
package/template/skills/en/task-decomposition/SKILL.md +1 -1
package/template/skills/en/tree-processing/SKILL.md +1 -1
package/template/skills/en/version-control/SKILL.md +15 -0
package/template/skills/en/work-decomposition/SKILL.md +52 -32
package/template/skills/phase_skills.yaml +5 -0
package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
package/template/skills/zh/confidence-system/SKILL.md +34 -9
package/template/skills/zh/corner-case-management/SKILL.md +71 -104
package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
package/template/skills/zh/data-sensibility/SKILL.md +13 -0
package/template/skills/zh/document-chunking/SKILL.md +101 -18
package/template/skills/zh/document-parsing/SKILL.md +65 -65
package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
package/template/skills/zh/entity-extraction/SKILL.md +78 -68
package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
package/template/skills/zh/quality-control/SKILL.md +23 -0
package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
package/template/skills/zh/rule-extraction/SKILL.md +199 -188
package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
package/template/skills/zh/skill-authoring/SKILL.md +136 -58
package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
package/template/skills/zh/skill-creator/SKILL.md +215 -201
package/template/skills/zh/skill-creator/references/schemas.md +60 -60
package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
package/template/skills/zh/task-decomposition/SKILL.md +1 -1
package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
package/template/skills/zh/tree-processing/SKILL.md +67 -63
package/template/skills/zh/version-control/SKILL.md +15 -0
package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
package/template/skills/zh/work-decomposition/SKILL.md +52 -30
package/template/workflows/common/llm_client.py +168 -0
package/template/workflows/common/utils.py +132 -0

package/src/agent/tools/agent-tool.js CHANGED Viewed

@@ -382,8 +382,8 @@ export class AgentTool extends BaseTool {
    * B8: List currently-running sub-agents. Called by engine's phase-advance
    * path to emit a `stale_subagents` pipeline event — the main agent's next
    * turn sees the list and decides whether to kill each. Soft signal, not
-   * an automated kill, because phase_advance can fire from _maybeAutoAdvance
-   * unexpectedly and coupling the lifecycle would amplify blast radius.
+   * an automated kill: coupling the subagent lifecycle to phase advance
+   * would amplify blast radius if a transition happened unexpectedly.
    */
   getRunningTaskIds() {
     return Array.from(this._runningTasks.keys());

package/src/agent/tools/consult-skill.js CHANGED Viewed

@@ -63,6 +63,21 @@ export class ConsultSkillTool extends BaseTool {
     const name = (input?.name || "").trim();
     if (!name) return new ToolResult("name required (e.g. consult_skill({name: 'work-decomposition'}))", true);
+    // v0.8 P0-A: defensive null-check. v0.7.5 shipped with an init-order bug
+    // where ConsultSkillTool received undefined skillLoader and threw
+    // "Cannot read properties of undefined (reading 'getPhaseSkillSet')"
+    // on every invocation (资管 audit § 9.1, 5/5 failure rate). The init-order
+    // fix is in engine.js:238; this guard prevents an uncaught exception if
+    // the bug recurs from any future constructor reorder.
+    if (!this._skillLoader || typeof this._skillLoader.getPhaseSkillSet !== "function") {
+      return new ToolResult(
+        "consult_skill is misconfigured: skillLoader unavailable. This is an engine-side bug — " +
+        "surface to the developer user. The agent should fall back to reading skill bodies " +
+        "directly from <workspace>/skills/<name>/SKILL.md or the system prompt's always-loaded section.",
+        true,
+      );
+    }
     const phase = this._getCurrentPhase ? this._getCurrentPhase() : null;
     const { alwaysLoaded, available } = this._skillLoader.getPhaseSkillSet(phase);

package/src/agent/tools/dashboard-render.js CHANGED Viewed

@@ -81,11 +81,57 @@ export class DashboardRenderTool extends BaseTool {
       metrics.evolution_iterations = fs.readdirSync(evoDir).filter((f) => f.endsWith(".json")).length;
     }
+    // v0.8 P1-G: QC counter now reads from multiple known agent-write
+    // locations + counts per-doc reviews. Pre-v0.8 read only output/qc/*.json
+    // top-level; 资管 v0.7.5 wrote output/results/production_qc_results.json
+    // so the dashboard showed `QC Batches: 0` despite 126 pairs of data.
+    let qcBatches = 0;
+    let qcDocsReviewed = 0;
+    // (a) Top-level batch files in output/qc/ (贷款 v0.7.5 shape)
     const qcDir = path.join(ws, "output", "qc");
     if (fs.existsSync(qcDir)) {
-      metrics.qc_batches = fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).length;
+      for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json"))) {
+        qcBatches++;
+        try {
+          const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
+          const n = Number(data?.documents_reviewed);
+          if (Number.isFinite(n) && n > qcDocsReviewed) qcDocsReviewed = n;
+        } catch { /* skip malformed */ }
+      }
     }
+    // (b) Per-doc reviews at output/qc/reviews/ (贷款 detail shape)
+    const reviewsDir = path.join(ws, "output", "qc", "reviews");
+    if (fs.existsSync(reviewsDir)) {
+      const reviewFiles = fs.readdirSync(reviewsDir).filter((f) => f.endsWith(".json"));
+      qcDocsReviewed = Math.max(qcDocsReviewed, reviewFiles.length);
+    }
+    // (c) production_qc_results.json shape (资管 v0.7.5)
+    const productionQc = path.join(ws, "output", "results", "production_qc_results.json");
+    if (fs.existsSync(productionQc)) {
+      qcBatches++;
+      try {
+        const data = JSON.parse(fs.readFileSync(productionQc, "utf-8"));
+        const totalDocs = Number(data?.total_docs);
+        if (Number.isFinite(totalDocs)) qcDocsReviewed = Math.max(qcDocsReviewed, totalDocs);
+        // Otherwise, dedup doc keys from nested results
+        if (!Number.isFinite(totalDocs) && data?.results && typeof data.results === "object") {
+          const docSet = new Set();
+          for (const docs of Object.values(data.results)) {
+            if (docs && typeof docs === "object") {
+              for (const k of Object.keys(docs)) docSet.add(k);
+            }
+          }
+          if (docSet.size > 0) qcDocsReviewed = Math.max(qcDocsReviewed, docSet.size);
+        }
+      } catch { /* skip */ }
+    }
+    metrics.qc_batches = qcBatches;
+    metrics.qc_docs_reviewed = qcDocsReviewed;
     return metrics;
   }
@@ -126,6 +172,7 @@ th { color: #737373; font-size: 0.85em; }
 <div class="metric"><span class="value">${total}</span><br><span class="label">Results</span></div>
 <div class="metric"><span class="value">${metrics.evolution_iterations}</span><br><span class="label">Evolution Cycles</span></div>
 <div class="metric"><span class="value">${metrics.qc_batches}</span><br><span class="label">QC Batches</span></div>
+<div class="metric"><span class="value">${metrics.qc_docs_reviewed || 0}</span><br><span class="label">Docs Reviewed</span></div>
 </div>
 <h2>Confidence Distribution</h2>
 <div class="card">

package/src/agent/tools/document-parse.js CHANGED Viewed

@@ -12,14 +12,43 @@ const MIN_CHARS_PER_PAGE = 50;
  * Level 3: OCR models via SiliconFlow — fallback via vision models
  */
 export class DocumentParseTool extends BaseTool {
-  constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel } = {}) {
+  /**
+   * @param {object} workspace
+   * @param {object} opts
+   * @param {string} [opts.mineruApiUrl]
+   * @param {string} [opts.mineruApiKey]
+   * @param {string} [opts.llmApiKey]
+   * @param {string} [opts.llmBaseUrl]
+   * @param {string} [opts.ocrModel] — static fallback (legacy)
+   * @param {() => string} [opts.getOcrModel] — v0.8.1 P9-B: live-read
+   *   callback. If provided, takes precedence over `ocrModel`. The
+   *   constructor used to capture vlmTier1 once at engine startup, but
+   *   workspace_env_overlay (P1-B) fires AFTER tool construction in
+   *   some flows (e.g. agent edits .env mid-run, OR overlay applies on
+   *   a subagent's engine but parent already cached the gc default).
+   *   E2E #11 资管 v0.8 audit found document_parse errors quoting
+   *   Qwen3-VL-235B-A22B-Instruct (gc default) even though .env set
+   *   OCR_MODEL_TIER1=zai-org/GLM-4.6V — the overlay applied 5 min
+   *   after first failed call. Live-read fixes the race.
+   */
+  constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel, getOcrModel } = {}) {
     super();
     this._workspace = workspace;
     this._mineruApiUrl = mineruApiUrl || "";
     this._mineruApiKey = mineruApiKey || "";
     this._vlmApiKey = llmApiKey || "";
     this._vlmBaseUrl = (llmBaseUrl || "").replace(/\/+$/, "");
-    this._ocrModel = ocrModel || "";
+    this._ocrModelStatic = ocrModel || "";
+    this._getOcrModel = typeof getOcrModel === "function" ? getOcrModel : null;
+  }
+  /** Read ocrModel live (P9-B) or fall back to the static value captured at construction. */
+  get _ocrModel() {
+    if (this._getOcrModel) {
+      try { return this._getOcrModel() || this._ocrModelStatic; }
+      catch { return this._ocrModelStatic; }
+    }
+    return this._ocrModelStatic;
   }
   get name() { return "document_parse"; }

package/src/agent/tools/phase-advance.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { BaseTool, ToolResult } from "./base.js";
 import { Phase } from "../pipelines/index.js";
+import { getPrescriptiveHint } from "../pipelines/_advance-hints.js";
 const VALID_PHASES = new Set(Object.values(Phase));
@@ -72,12 +73,12 @@ export class PhaseAdvanceTool extends BaseTool {
     const beforePhase = this._getCurrentPhase();
     // H1: short-circuit the "already in target" case with an informational
-    // message — the agent was trying to advance correctly, engine just
-    // auto-advanced ahead of it (common when _maybeAutoAdvance fires on a
-    // criteria flip). Treat as success, not refusal.
+    // message — agent was trying to advance correctly, engine was already
+    // there (from a prior pipeline_event-driven advance or an earlier
+    // explicit call). Treat as success, not refusal.
     if (beforePhase && beforePhase === to) {
       return new ToolResult(
-        `Already in phase ${to} (engine auto-advanced earlier via criteria flip or prior explicit call). Proceed with phase-appropriate work.`,
+        `Already in phase ${to} (engine was already there from a prior advance). Proceed with phase-appropriate work.`,
       );
     }
@@ -126,18 +127,21 @@ export class PhaseAdvanceTool extends BaseTool {
     // exactly which milestones the gate is reading and can satisfy them.
     // E2E #6 v070 showed the generic "check /status" hint wasn't concrete
     // enough — agents forced through. Naming the gap inline reduces that.
-    const engineCountsLine = advanceResult?.engineCounts
-      ? `\nEngine telemetry: ${advanceResult.engineCounts}`
-      : "";
+    // v0.8 P0-E: prescriptive refusal hint — name the artifacts the agent
+    // needs to produce, derived from the same paths _milestone-derive.js
+    // walks. Replaces the v0.7.x descriptive "check /status" message that
+    // 资管 + 贷款 v0.7.5 audits showed agents force-bypassing.
+    const prescriptive = getPrescriptiveHint(
+      beforePhase,
+      advanceResult?.engineCounts,
+      advanceResult?.engineCounts || "",
+    );
     return new ToolResult(
       `Did not advance to ${to} (currently in ${beforePhase || "?"}). ` +
-      `Likely cause: source-phase exit criteria not met.${engineCountsLine}\n\n` +
-      `Run /status (or read the phase describeState block in this turn's system reminder) ` +
-      `to see which milestones are missing, then produce the disk artifacts that satisfy them — ` +
-      `the engine derives milestones from filesystem facts (rule_skills/<id>/SKILL.md, check.py, ` +
-      `workflows/<id>/*.py, output/results/*.json, etc.). ` +
-      `If the transition is non-adjacent or this phase truly is done despite the gate, ` +
+      `Likely cause: source-phase exit criteria not met.\n\n` +
+      prescriptive +
+      `\n\nIf the transition is non-adjacent or this phase truly is done despite the gate, ` +
       `re-call with the documented schema flag. The engine logged the precise reason in ` +
       `events.jsonl as 'phase_advance_refused'.`,
       false,

package/src/agent/tools/release.js CHANGED Viewed

@@ -85,13 +85,19 @@ export class ReleaseTool extends BaseTool {
       return new ToolResult(`release template missing at ${TEMPLATE_DIR}`, true);
     }
-    // 1. Snapshot first — locks in commit + tag, regardless of whether bundle build succeeds
-    const snapResult = await this._snapshot.execute({
-      label: `release-${slug}`,
-      notes: `Release ${label} bundle source`,
-    });
-    if (snapResult.isError) return new ToolResult(`snapshot failed: ${snapResult.content}`, true);
-    const { tag: snapshotTag, commit: snapshotCommit } = this._readSnapshotMeta(`release-${slug}`);
+    // v0.8.1 P9-C: defer the snapshot (git tag) until AFTER the bundle
+    // is written + verified. v0.8.0 ordered snapshot-first to "lock in
+    // commit + tag regardless of bundle outcome," but E2E #11 资管 v0.8
+    // audit found `release-v1` tags with no corresponding bundle dir —
+    // tag without bundle confuses downstream consumers. New order:
+    //   1. Build bundle (catalog read, copy template, write fixtures, manifest, README)
+    //   2. Verify bundle (manifest.json + README.md exist + non-empty)
+    //   3. ONLY THEN snapshot (creates the git tag) + back-fill manifest
+    //      with snapshot tag/commit
+    // If verification fails, a `.failed_release` marker is written into
+    // the bundle dir and NO tag is created.
+    let snapshotTag = null;
+    let snapshotCommit = null;
     // 2. Read catalog and filter
     const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
@@ -294,6 +300,77 @@ export class ReleaseTool extends BaseTool {
       }
     }
+    // v0.8.1 P9-C: bundle verification + transactional snapshot.
+    // The manifest + README were written above. Verify they exist with
+    // substance (≥200 bytes README, valid JSON manifest with `slug` field).
+    // If verification fails, write `.failed_release` marker and skip
+    // the git-tag step — no tag-without-bundle.
+    const manifestPath = path.join(bundleAbs, "manifest.json");
+    const readmePath = path.join(bundleAbs, "README.md");
+    let verifyError = null;
+    try {
+      const mStat = fs.statSync(manifestPath);
+      const rStat = fs.statSync(readmePath);
+      if (!mStat.isFile() || mStat.size < 50) verifyError = "manifest.json missing or too small";
+      else if (!rStat.isFile() || rStat.size < 200) verifyError = "README.md missing or too small";
+      else {
+        const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
+        if (m.slug !== slug) verifyError = `manifest.slug=${m.slug} doesn't match expected ${slug}`;
+      }
+    } catch (e) {
+      verifyError = `bundle verification threw: ${e.message}`;
+    }
+    if (verifyError) {
+      try {
+        fs.writeFileSync(
+          path.join(bundleAbs, ".failed_release"),
+          JSON.stringify({
+            failed_at: new Date().toISOString(),
+            reason: verifyError,
+            label,
+            slug,
+          }, null, 2),
+        );
+      } catch { /* best-effort */ }
+      return new ToolResult(
+        `Release bundle verification failed (${verifyError}). NO git tag created. ` +
+        `See .failed_release marker in ${bundleRel}/ for details. Fix the bundle issue and re-run.`,
+        true,
+      );
+    }
+    // Bundle verified. NOW snapshot — creates the durable git tag.
+    const snapResult = await this._snapshot.execute({
+      label: `release-${slug}`,
+      notes: `Release ${label} bundle source`,
+    });
+    if (snapResult.isError) {
+      // Bundle exists but tagging failed. Surface but don't roll back —
+      // the bundle is still usable; the user can manually tag later.
+      return new ToolResult(
+        `Release '${label}' bundled at ${bundleRel} but snapshot tag FAILED: ${snapResult.content}. ` +
+        `Bundle is valid; create the snapshot tag manually if needed.`,
+      );
+    }
+    const meta = this._readSnapshotMeta(`release-${slug}`);
+    snapshotTag = meta.tag;
+    snapshotCommit = meta.commit;
+    // Back-fill the manifest with the now-known snapshot tag/commit.
+    try {
+      const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
+      m.snapshot_tag = snapshotTag;
+      m.snapshot_commit = snapshotCommit;
+      fs.writeFileSync(manifestPath, JSON.stringify(m, null, 2) + "\n");
+      // Also back-fill the README's snapshot placeholders if still placeholder.
+      const readme = fs.readFileSync(readmePath, "utf-8");
+      const updated = readme
+        .replace(/\(no tag — git unavailable\)/g, snapshotTag || "")
+        .replace(/\(unknown\)/g, snapshotCommit || "(unknown)");
+      if (updated !== readme) fs.writeFileSync(readmePath, updated);
+    } catch { /* best-effort back-fill */ }
     // Bundle dir is in output/ (gitignored). Snapshot manifest in snapshots/ IS tracked.
     const lines = [
       `Release '${label}' bundled at ${bundleRel}`,
@@ -576,10 +653,268 @@ export class ReleaseTool extends BaseTool {
       }
     }
+    // 3) v0.8 P0-C: production_qc_results.json + qc_results_v*.json shapes
+    // (资管 + 贷款 v0.7.5 audits both shipped empty historical_accuracy
+    // because the v0.7.2 aggregator only recognized rule_stats / full_test_results).
+    if (tally.size === 0) {
+      const qcFiles = files
+        .filter((f) =>
+          /^production_qc(?:_results)?(?:_v\d+)?\.json$/i.test(f.name) ||
+          /^qc_results(?:_v\d+)?\.json$/i.test(f.name)
+        )
+        .sort((a, b) => a.name.localeCompare(b.name));
+      for (const f of qcFiles.slice(0, 5)) {
+        try {
+          const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
+          const results = d.results;
+          if (!results) continue;
+          // Shape 3a (资管): nested rule-keyed map
+          //   {results: {<rid>: {<doc_id>: {verdict, ...}}}}
+          if (typeof results === "object" && !Array.isArray(results)) {
+            for (const [rid, docs] of Object.entries(results)) {
+              if (!isRuleId(rid) || !docs || typeof docs !== "object") continue;
+              for (const r of Object.values(docs)) {
+                if (!r || typeof r !== "object") continue;
+                const verdict = (r.verdict || "").toString().toUpperCase();
+                if (verdict === "PASS") bump(rid, "pass");
+                else if (verdict === "FAIL") bump(rid, "fail");
+                else if (verdict === "NOT_APPLICABLE" || verdict === "NA" || verdict === "WARNING") bump(rid, "na");
+              }
+            }
+            if (tally.size > 0) sourceFiles.push(path.relative(this._workspace.cwd, f.path));
+          }
+          // Shape 3b (贷款): per-doc rollup list with failed_rules
+          //   {results: [{filename, actual, correct, failed_rules: [...]}], total_tested: N}
+          // For each rule: failures counted from failed_rules union; passes
+          // inferred as (total_tested - failures) for rules that appear in the catalog.
+          else if (Array.isArray(results)) {
+            const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
+            let catalogRules = [];
+            try {
+              const cat = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
+              const list = Array.isArray(cat) ? cat : Array.isArray(cat?.rules) ? cat.rules : [];
+              catalogRules = list.map((r) => r?.id || r?.rule_id).filter((x) => isRuleId(x));
+            } catch { /* catalog optional */ }
+            const failCountByRule = new Map();
+            let docCount = 0;
+            for (const row of results) {
+              if (!row || typeof row !== "object") continue;
+              docCount += 1;
+              const failed = Array.isArray(row.failed_rules) ? row.failed_rules : [];
+              for (const rid of failed) {
+                if (!isRuleId(rid)) continue;
+                failCountByRule.set(rid, (failCountByRule.get(rid) || 0) + 1);
+              }
+            }
+            if (docCount > 0) {
+              const ruleSet = new Set([...catalogRules, ...failCountByRule.keys()]);
+              for (const rid of ruleSet) {
+                const fails = failCountByRule.get(rid) || 0;
+                const passes = Math.max(0, docCount - fails);
+                const t = tally.get(rid) || { pass: 0, fail: 0, na: 0, n: 0 };
+                t.pass += passes; t.fail += fails; t.n += docCount;
+                tally.set(rid, t);
+              }
+              if (tally.size > 0) sourceFiles.push(path.relative(this._workspace.cwd, f.path));
+            }
+          }
+        } catch { /* try next file */ }
+        if (tally.size > 0) break;
+      }
+    }
+    // 4) v0.8.1 P9-A: top-level fail_by_rule + pass_by_rule maps (贷款
+    // v0.8 production_qc_report.json shape). Direct per-rule counts —
+    // no per-doc rollup, no verdict literals to scan.
+    //   {accuracy, total_checks, fail_by_rule: {<rid>: N}, pass_by_rule: {<rid>: N}}
+    if (tally.size === 0) {
+      for (const f of files) {
+        if (!/qc|prod|report|result/i.test(f.name)) continue;
+        try {
+          const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
+          const failMap = d?.fail_by_rule;
+          const passMap = d?.pass_by_rule;
+          if (
+            failMap && typeof failMap === "object" && !Array.isArray(failMap) &&
+            passMap && typeof passMap === "object" && !Array.isArray(passMap)
+          ) {
+            const allRules = new Set([...Object.keys(failMap), ...Object.keys(passMap)]);
+            let matched = false;
+            for (const rid of allRules) {
+              if (!isRuleId(rid)) continue;
+              const fails = Number(failMap[rid]) || 0;
+              const passes = Number(passMap[rid]) || 0;
+              if (fails + passes === 0) continue;
+              const t = tally.get(rid) || { pass: 0, fail: 0, na: 0, n: 0 };
+              t.pass += passes;
+              t.fail += fails;
+              t.n += passes + fails;
+              tally.set(rid, t);
+              matched = true;
+            }
+            if (matched) {
+              sourceFiles.push(path.relative(this._workspace.cwd, f.path));
+              break;
+            }
+          }
+        } catch { /* skip non-JSON */ }
+      }
+    }
+    // 5) v0.8.2 P13-A: doc-keyed → rules-keyed nested shape.
+    // 贷款 v0.8.1 wrote skill_test_v*_results.json + v2_hybrid_results.json
+    // + run_all_checks.json all with this shape:
+    //   {
+    //     "<doc_filename>": {
+    //       "channel": "...", "expected": "PASS"|"FAIL",
+    //       "rules": {
+    //         "R01": {"rule_id": "R01", "verdict": "PASS", "confidence": 0.95, "method": "regex"},
+    //         "R02": {...}
+    //       }
+    //     },
+    //     ...
+    //   }
+    // The optional outer "results" wrapper from v2_full_regression.json
+    // (which nests this further) is unwrapped via d.results || d.
+    if (tally.size === 0) {
+      for (const f of files) {
+        if (!/qc|verdict|result|test/i.test(f.name)) continue;
+        try {
+          const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
+          const root = d?.results || d;
+          if (!root || typeof root !== "object" || Array.isArray(root)) continue;
+          let matched = false;
+          for (const docKey of Object.keys(root)) {
+            const docEntry = root[docKey];
+            if (!docEntry || typeof docEntry !== "object") continue;
+            const rulesMap = docEntry.rules;
+            if (!rulesMap || typeof rulesMap !== "object" || Array.isArray(rulesMap)) continue;
+            for (const rid of Object.keys(rulesMap)) {
+              if (!isRuleId(rid)) continue;
+              const r = rulesMap[rid];
+              if (!r || typeof r !== "object") continue;
+              const verdict = (r.verdict || r.result_type || r.status || "").toString().toUpperCase();
+              if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
+              else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
+              else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
+            }
+          }
+          if (matched) {
+            sourceFiles.push(path.relative(this._workspace.cwd, f.path));
+            break;
+          }
+        } catch { /* skip non-JSON */ }
+      }
+    }
+    // 6) v0.8.3 P22-B6: top-level array of {doc_id, results: [{rule_id, status}]}.
+    // 资管 v0.8.2 wrote `output/skill_test_v*.json` + `workflow_v*_results.json`
+    // + `evolution_round*.json` all with this shape:
+    //   [
+    //     {
+    //       "doc_id": "<doc-filename>",
+    //       "results": [
+    //         {"rule_id": "R01-01", "status": "WARNING", "found_fields": {...}},
+    //         {"rule_id": "R01-02", "status": "PASS", ...},
+    //         ...
+    //       ]
+    //     },
+    //     ...
+    //   ]
+    // Distinct from Shape 5: top-level is an ARRAY (not object), and the
+    // per-rule data lives in `results: [...]` (an array of rule outcomes)
+    // rather than `rules: {<rule>: ...}` (object keyed by rule).
+    if (tally.size === 0) {
+      for (const f of files) {
+        if (!/qc|verdict|result|test|evolution|workflow/i.test(f.name)) continue;
+        try {
+          const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
+          if (!Array.isArray(d)) continue;
+          let matched = false;
+          for (const docEntry of d) {
+            if (!docEntry || typeof docEntry !== "object") continue;
+            const results = docEntry.results;
+            if (!Array.isArray(results)) continue;
+            for (const r of results) {
+              if (!r || typeof r !== "object") continue;
+              const rid = r.rule_id || r.ruleId || r.id;
+              if (!isRuleId(rid)) continue;
+              const verdict = (r.status || r.verdict || r.result_type || "").toString().toUpperCase();
+              if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
+              else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
+              else if (verdict === "WARNING") { bump(rid, "pass"); matched = true; } // WARNING counts as pass (per existing shape conventions)
+              else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
+            }
+          }
+          if (matched) {
+            sourceFiles.push(path.relative(this._workspace.cwd, f.path));
+            break;
+          }
+        } catch { /* skip non-JSON */ }
+      }
+    }
+    // 7) Fallback (belt-and-suspenders per v0.8 plan Risk #7):
+    // walk any output/*.json with a top-level rule_id-keyed shape that has
+    // verdict-like leaf objects. Catches future schema drift before the
+    // next audit cycle.
+    if (tally.size === 0) {
+      for (const f of files) {
+        if (!/qc|verdict|result/i.test(f.name)) continue;
+        try {
+          const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
+          const root = d?.results || d;
+          if (!root || typeof root !== "object" || Array.isArray(root)) continue;
+          let matched = false;
+          for (const [rid, val] of Object.entries(root)) {
+            if (!isRuleId(rid) || !val || typeof val !== "object") continue;
+            // val might be {verdict, ...} OR {<doc>: {verdict, ...}}
+            const probe = val.verdict ? [val] : Object.values(val);
+            for (const r of probe) {
+              if (!r || typeof r !== "object") continue;
+              const verdict = (r.verdict || "").toString().toUpperCase();
+              if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
+              else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
+              else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
+            }
+          }
+          if (matched) {
+            sourceFiles.push(path.relative(this._workspace.cwd, f.path) + " (fallback shape)");
+            break;
+          }
+        } catch { /* skip non-JSON */ }
+      }
+    }
     if (tally.size === 0) return null;
+    // v0.8.1 P9-D: filter tally to rule_ids in the current catalog.
+    // E2E #11 资管 v0.8 audit: confidence_calibration aggregated from
+    // an abandoned 39-rule pipeline included only 2 of 4 final samples.
+    // Filtering to catalog.json keeps the calibration scoped to the
+    // rules that actually ship in the release.
+    let catalogRuleIds = null;
+    try {
+      const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
+      if (fs.existsSync(catalogPath)) {
+        const cat = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
+        const list = Array.isArray(cat) ? cat : Array.isArray(cat?.rules) ? cat.rules : [];
+        catalogRuleIds = new Set(
+          list.map((r) => r?.id || r?.rule_id).filter((x) => isRuleId(x))
+        );
+        if (catalogRuleIds.size === 0) catalogRuleIds = null;
+      }
+    } catch { /* skip filter if catalog missing/malformed */ }
     const historical_accuracy = {};
+    const droppedRules = [];
     for (const [rid, t] of tally.entries()) {
+      if (catalogRuleIds && !catalogRuleIds.has(rid)) {
+        droppedRules.push(rid);
+        continue;
+      }
       const fired = t.pass + t.fail;
       historical_accuracy[rid] = {
         pass_rate: fired > 0 ? +(t.pass / fired).toFixed(4) : null,
@@ -593,6 +928,7 @@ export class ReleaseTool extends BaseTool {
       historical_accuracy,
       computed_at: new Date().toISOString(),
       source_files: sourceFiles,
+      ...(droppedRules.length > 0 ? { dropped_off_catalog: droppedRules } : {}),
     };
   }