npm - kc-beta - Versions diffs - 0.7.3 → 0.8.1 - Mend

kc-beta 0.7.3 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/src/agent/tools/consult-skill.js ADDED Viewed

@@ -0,0 +1,127 @@
+import { BaseTool, ToolResult } from "./base.js";
+/**
+ * v0.7.5: load a methodology skill's body into the agent's conversation
+ * history as a tool result. Pairs with the always-loaded body injection
+ * in SkillLoader.formatForContext — that handles the 1-2 architecturally-
+ * required skills per phase; consult_skill handles the rest on demand.
+ *
+ * Validation:
+ * - Skill name must be in the current phase's available set (per
+ *   template/skills/phase_skills.yaml).
+ * - Already-always-loaded skills return a hint pointing the agent at the
+ *   system prompt (don't double-load).
+ * - Missing bodies return an error result.
+ *
+ * Emits `skill_invoked` event with proper skill name on success — replaces
+ * the older path-matching regex at engine.js:1297-1313 that produced
+ * "(unknown)" spam from rule_skills/<id>/SKILL.md writes.
+ */
+export class ConsultSkillTool extends BaseTool {
+  /**
+   * @param {import('../workspace.js').Workspace} workspace
+   * @param {import('../skill-loader.js').SkillLoader} skillLoader
+   * @param {() => string} getCurrentPhase — returns the engine's current phase
+   * @param {import('../event-log.js').EventLog} [eventLog] — for skill_invoked emission
+   */
+  constructor(workspace, skillLoader, getCurrentPhase, eventLog) {
+    super();
+    this._workspace = workspace;
+    this._skillLoader = skillLoader;
+    this._getCurrentPhase = getCurrentPhase;
+    this._eventLog = eventLog;
+  }
+  get name() { return "consult_skill"; }
+  get description() {
+    return (
+      "Load the full body of a methodology skill into your context for the " +
+      "current turn. Use when the description tease in the system prompt's " +
+      "'Available Methodology Skills' section isn't enough detail to proceed. " +
+      "The body lands in your conversation history; subsequent turns can " +
+      "reference it via context, or you can re-consult if it ages out. " +
+      "Skills already in the 'Loaded Into Your Context' section don't need " +
+      "consulting — they're already in your prompt."
+    );
+  }
+  get inputSchema() {
+    return {
+      type: "object",
+      properties: {
+        name: {
+          type: "string",
+          description: "Skill name as listed in the system prompt (e.g., 'work-decomposition', 'evolution-loop').",
+        },
+      },
+      required: ["name"],
+    };
+  }
+  async execute(input) {
+    const name = (input?.name || "").trim();
+    if (!name) return new ToolResult("name required (e.g. consult_skill({name: 'work-decomposition'}))", true);
+    // v0.8 P0-A: defensive null-check. v0.7.5 shipped with an init-order bug
+    // where ConsultSkillTool received undefined skillLoader and threw
+    // "Cannot read properties of undefined (reading 'getPhaseSkillSet')"
+    // on every invocation (资管 audit § 9.1, 5/5 failure rate). The init-order
+    // fix is in engine.js:238; this guard prevents an uncaught exception if
+    // the bug recurs from any future constructor reorder.
+    if (!this._skillLoader || typeof this._skillLoader.getPhaseSkillSet !== "function") {
+      return new ToolResult(
+        "consult_skill is misconfigured: skillLoader unavailable. This is an engine-side bug — " +
+        "surface to the developer user. The agent should fall back to reading skill bodies " +
+        "directly from <workspace>/skills/<name>/SKILL.md or the system prompt's always-loaded section.",
+        true,
+      );
+    }
+    const phase = this._getCurrentPhase ? this._getCurrentPhase() : null;
+    const { alwaysLoaded, available } = this._skillLoader.getPhaseSkillSet(phase);
+    const alwaysSet = new Set(alwaysLoaded);
+    const availableSet = new Set(available);
+    if (alwaysSet.has(name)) {
+      return new ToolResult(
+        `Skill '${name}' is already always-loaded in your system prompt for phase '${phase}'. ` +
+        `Re-read the system prompt's 'Methodology Skills — Loaded Into Your Context' section ` +
+        `— the body is there. No separate consult needed.`,
+      );
+    }
+    if (!availableSet.has(name)) {
+      const sorted = [...availableSet].sort();
+      return new ToolResult(
+        `Skill '${name}' is not available in phase '${phase}'. ` +
+        `Available for this phase: ${sorted.join(", ")}. ` +
+        `If you genuinely need this skill, either advance/retreat to a phase ` +
+        `where it's available, or check the spelling.`,
+        true,
+      );
+    }
+    const body = this._skillLoader.loadSkillBody(name);
+    if (!body) {
+      return new ToolResult(
+        `Skill '${name}' is declared available for phase '${phase}' but its body could not be loaded. ` +
+        `This is an engine/template inconsistency — surface to the developer user.`,
+        true,
+      );
+    }
+    // Emit skill_invoked event with the real skill name (replaces the
+    // old path-matching regex that produced "(unknown)" spam).
+    try {
+      this._eventLog?.append?.("skill_invoked", {
+        skill: name,
+        via_tool: "consult_skill",
+        phase,
+      });
+    } catch { /* event logging is best-effort */ }
+    return new ToolResult(body);
+  }
+}

package/src/agent/tools/copy-to-workspace.js CHANGED Viewed

@@ -114,9 +114,10 @@ export class CopyToWorkspaceTool extends BaseTool {
   }
   async _appendManifest(entry) {
-    // v0.7.3: refs/manifest.json is a shared coordination path — wrap the
-    // whole read-modify-write under the workspace lock so two parallel
-    // copy_to_workspace calls (main agent + subagent) don't lose entries.
+    // v0.7.4 (re-applied from v0.7.3 G1a): refs/manifest.json is a
+    // shared coordination path — wrap the whole read-modify-write
+    // under the workspace lock so two parallel copy_to_workspace
+    // calls (main agent + subagent) don't lose entries.
     return await this._workspace.withSharedLockIfApplicable(MANIFEST_REL, () => {
       const manifestAbs = this._workspace.resolvePath(MANIFEST_REL);
       fs.mkdirSync(path.dirname(manifestAbs), { recursive: true });

package/src/agent/tools/dashboard-render.js CHANGED Viewed

@@ -81,11 +81,57 @@ export class DashboardRenderTool extends BaseTool {
       metrics.evolution_iterations = fs.readdirSync(evoDir).filter((f) => f.endsWith(".json")).length;
     }
+    // v0.8 P1-G: QC counter now reads from multiple known agent-write
+    // locations + counts per-doc reviews. Pre-v0.8 read only output/qc/*.json
+    // top-level; 资管 v0.7.5 wrote output/results/production_qc_results.json
+    // so the dashboard showed `QC Batches: 0` despite 126 pairs of data.
+    let qcBatches = 0;
+    let qcDocsReviewed = 0;
+    // (a) Top-level batch files in output/qc/ (贷款 v0.7.5 shape)
     const qcDir = path.join(ws, "output", "qc");
     if (fs.existsSync(qcDir)) {
-      metrics.qc_batches = fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).length;
+      for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json"))) {
+        qcBatches++;
+        try {
+          const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
+          const n = Number(data?.documents_reviewed);
+          if (Number.isFinite(n) && n > qcDocsReviewed) qcDocsReviewed = n;
+        } catch { /* skip malformed */ }
+      }
     }
+    // (b) Per-doc reviews at output/qc/reviews/ (贷款 detail shape)
+    const reviewsDir = path.join(ws, "output", "qc", "reviews");
+    if (fs.existsSync(reviewsDir)) {
+      const reviewFiles = fs.readdirSync(reviewsDir).filter((f) => f.endsWith(".json"));
+      qcDocsReviewed = Math.max(qcDocsReviewed, reviewFiles.length);
+    }
+    // (c) production_qc_results.json shape (资管 v0.7.5)
+    const productionQc = path.join(ws, "output", "results", "production_qc_results.json");
+    if (fs.existsSync(productionQc)) {
+      qcBatches++;
+      try {
+        const data = JSON.parse(fs.readFileSync(productionQc, "utf-8"));
+        const totalDocs = Number(data?.total_docs);
+        if (Number.isFinite(totalDocs)) qcDocsReviewed = Math.max(qcDocsReviewed, totalDocs);
+        // Otherwise, dedup doc keys from nested results
+        if (!Number.isFinite(totalDocs) && data?.results && typeof data.results === "object") {
+          const docSet = new Set();
+          for (const docs of Object.values(data.results)) {
+            if (docs && typeof docs === "object") {
+              for (const k of Object.keys(docs)) docSet.add(k);
+            }
+          }
+          if (docSet.size > 0) qcDocsReviewed = Math.max(qcDocsReviewed, docSet.size);
+        }
+      } catch { /* skip */ }
+    }
+    metrics.qc_batches = qcBatches;
+    metrics.qc_docs_reviewed = qcDocsReviewed;
     return metrics;
   }
@@ -126,6 +172,7 @@ th { color: #737373; font-size: 0.85em; }
 <div class="metric"><span class="value">${total}</span><br><span class="label">Results</span></div>
 <div class="metric"><span class="value">${metrics.evolution_iterations}</span><br><span class="label">Evolution Cycles</span></div>
 <div class="metric"><span class="value">${metrics.qc_batches}</span><br><span class="label">QC Batches</span></div>
+<div class="metric"><span class="value">${metrics.qc_docs_reviewed || 0}</span><br><span class="label">Docs Reviewed</span></div>
 </div>
 <h2>Confidence Distribution</h2>
 <div class="card">

package/src/agent/tools/document-parse.js CHANGED Viewed

@@ -12,14 +12,43 @@ const MIN_CHARS_PER_PAGE = 50;
  * Level 3: OCR models via SiliconFlow — fallback via vision models
  */
 export class DocumentParseTool extends BaseTool {
-  constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel } = {}) {
+  /**
+   * @param {object} workspace
+   * @param {object} opts
+   * @param {string} [opts.mineruApiUrl]
+   * @param {string} [opts.mineruApiKey]
+   * @param {string} [opts.llmApiKey]
+   * @param {string} [opts.llmBaseUrl]
+   * @param {string} [opts.ocrModel] — static fallback (legacy)
+   * @param {() => string} [opts.getOcrModel] — v0.8.1 P9-B: live-read
+   *   callback. If provided, takes precedence over `ocrModel`. The
+   *   constructor used to capture vlmTier1 once at engine startup, but
+   *   workspace_env_overlay (P1-B) fires AFTER tool construction in
+   *   some flows (e.g. agent edits .env mid-run, OR overlay applies on
+   *   a subagent's engine but parent already cached the gc default).
+   *   E2E #11 资管 v0.8 audit found document_parse errors quoting
+   *   Qwen3-VL-235B-A22B-Instruct (gc default) even though .env set
+   *   OCR_MODEL_TIER1=zai-org/GLM-4.6V — the overlay applied 5 min
+   *   after first failed call. Live-read fixes the race.
+   */
+  constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel, getOcrModel } = {}) {
     super();
     this._workspace = workspace;
     this._mineruApiUrl = mineruApiUrl || "";
     this._mineruApiKey = mineruApiKey || "";
     this._vlmApiKey = llmApiKey || "";
     this._vlmBaseUrl = (llmBaseUrl || "").replace(/\/+$/, "");
-    this._ocrModel = ocrModel || "";
+    this._ocrModelStatic = ocrModel || "";
+    this._getOcrModel = typeof getOcrModel === "function" ? getOcrModel : null;
+  }
+  /** Read ocrModel live (P9-B) or fall back to the static value captured at construction. */
+  get _ocrModel() {
+    if (this._getOcrModel) {
+      try { return this._getOcrModel() || this._ocrModelStatic; }
+      catch { return this._ocrModelStatic; }
+    }
+    return this._ocrModelStatic;
   }
   get name() { return "document_parse"; }

package/src/agent/tools/phase-advance.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { BaseTool, ToolResult } from "./base.js";
 import { Phase } from "../pipelines/index.js";
+import { getPrescriptiveHint } from "../pipelines/_advance-hints.js";
 const VALID_PHASES = new Set(Object.values(Phase));
@@ -72,12 +73,12 @@ export class PhaseAdvanceTool extends BaseTool {
     const beforePhase = this._getCurrentPhase();
     // H1: short-circuit the "already in target" case with an informational
-    // message — the agent was trying to advance correctly, engine just
-    // auto-advanced ahead of it (common when _maybeAutoAdvance fires on a
-    // criteria flip). Treat as success, not refusal.
+    // message — agent was trying to advance correctly, engine was already
+    // there (from a prior pipeline_event-driven advance or an earlier
+    // explicit call). Treat as success, not refusal.
     if (beforePhase && beforePhase === to) {
       return new ToolResult(
-        `Already in phase ${to} (engine auto-advanced earlier via criteria flip or prior explicit call). Proceed with phase-appropriate work.`,
+        `Already in phase ${to} (engine was already there from a prior advance). Proceed with phase-appropriate work.`,
       );
     }
@@ -126,18 +127,21 @@ export class PhaseAdvanceTool extends BaseTool {
     // exactly which milestones the gate is reading and can satisfy them.
     // E2E #6 v070 showed the generic "check /status" hint wasn't concrete
     // enough — agents forced through. Naming the gap inline reduces that.
-    const engineCountsLine = advanceResult?.engineCounts
-      ? `\nEngine telemetry: ${advanceResult.engineCounts}`
-      : "";
+    // v0.8 P0-E: prescriptive refusal hint — name the artifacts the agent
+    // needs to produce, derived from the same paths _milestone-derive.js
+    // walks. Replaces the v0.7.x descriptive "check /status" message that
+    // 资管 + 贷款 v0.7.5 audits showed agents force-bypassing.
+    const prescriptive = getPrescriptiveHint(
+      beforePhase,
+      advanceResult?.engineCounts,
+      advanceResult?.engineCounts || "",
+    );
     return new ToolResult(
       `Did not advance to ${to} (currently in ${beforePhase || "?"}). ` +
-      `Likely cause: source-phase exit criteria not met.${engineCountsLine}\n\n` +
-      `Run /status (or read the phase describeState block in this turn's system reminder) ` +
-      `to see which milestones are missing, then produce the disk artifacts that satisfy them — ` +
-      `the engine derives milestones from filesystem facts (rule_skills/<id>/SKILL.md, check.py, ` +
-      `workflows/<id>/*.py, output/results/*.json, etc.). ` +
-      `If the transition is non-adjacent or this phase truly is done despite the gate, ` +
+      `Likely cause: source-phase exit criteria not met.\n\n` +
+      prescriptive +
+      `\n\nIf the transition is non-adjacent or this phase truly is done despite the gate, ` +
       `re-call with the documented schema flag. The engine logged the precise reason in ` +
       `events.jsonl as 'phase_advance_refused'.`,
       false,