npm - kc-beta - Versions diffs - 0.5.6 → 0.6.1 - Mend

kc-beta 0.5.6 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/QUICKSTART.md +17 -4
package/README.md +58 -11
package/bin/kc-beta.js +35 -1
package/package.json +1 -1
package/src/agent/bundle-tree.js +553 -0
package/src/agent/context.js +40 -1
package/src/agent/engine.js +828 -31
package/src/agent/llm-client.js +67 -18
package/src/agent/pipelines/distillation.js +15 -0
package/src/agent/pipelines/extraction.js +60 -3
package/src/agent/pipelines/finalization.js +186 -0
package/src/agent/pipelines/index.js +8 -0
package/src/agent/pipelines/initializer.js +40 -0
package/src/agent/pipelines/production-qc.js +63 -13
package/src/agent/pipelines/skill-authoring.js +136 -7
package/src/agent/skill-loader.js +54 -4
package/src/agent/task-manager.js +81 -3
package/src/agent/tools/agent-tool.js +283 -35
package/src/agent/tools/bundle-search.js +146 -0
package/src/agent/tools/document-chunk.js +246 -0
package/src/agent/tools/document-classify.js +311 -0
package/src/agent/tools/document-parse.js +8 -1
package/src/agent/tools/phase-advance.js +30 -7
package/src/agent/tools/registry.js +10 -0
package/src/agent/tools/rule-catalog.js +17 -3
package/src/agent/tools/sandbox-exec.js +30 -0
package/src/agent/tools/workflow-run.js +34 -1
package/src/agent/workspace.js +168 -14
package/src/cli/components.js +165 -17
package/src/cli/index.js +166 -19
package/src/cli/meme.js +58 -0
package/src/config.js +39 -2
package/src/providers.js +26 -0
package/template/skills/en/meta-meta/evolution-loop/SKILL.md +13 -1
package/template/skills/en/meta-meta/rule-extraction/SKILL.md +74 -0
package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +7 -1
package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +73 -0

package/src/agent/pipelines/skill-authoring.js CHANGED Viewed

@@ -4,9 +4,18 @@ import { Phase, PipelineEvent } from "./index.js";
 import { Pipeline } from "./base.js";
 export class SkillAuthoringPipeline extends Pipeline {
-  constructor(workspace) {
+  /**
+   * @param {Workspace} workspace
+   * @param {TaskManager|null} [taskManager] - v0.6.1 A2: pass the engine's
+   *   TaskManager so exitCriteriaMet can require task-completion parity in
+   *   addition to D2 filename coverage. Subagents pass null (no taskManager
+   *   in subagent scope), in which case the gate falls back to D2-only
+   *   behaviour.
+   */
+  constructor(workspace, taskManager = null) {
     super();
     this._workspace = workspace;
+    this._taskManager = taskManager;
     this.totalRules = [];
     this.skillsAuthored = [];
     this.skillsWithScripts = [];
@@ -34,6 +43,14 @@ export class SkillAuthoringPipeline extends Pipeline {
   _scanSkills() {
     this.skillsAuthored = [];
     this.skillsWithScripts = [];
+    // D2: rule_ids that are covered by some authored skill — whether that
+    // skill is single-rule (rule_skills/R014/) or grouped
+    // (rule_skills/SK02/check_r002_r007.py). Populated by _walkForRuleIds
+    // below so the exit criterion counts DISTINCT rule coverage rather
+    // than skill-directory count, which over-counts when skills are
+    // grouped (session 6304673afaa0's rule_skills/ had 289 rules packed
+    // into 23 skill files).
+    this.ruleIdsCovered = new Set();
     const dir = path.join(this._workspace.cwd, "rule_skills");
     if (!fs.existsSync(dir)) return;
     for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
@@ -46,19 +63,109 @@ export class SkillAuthoringPipeline extends Pipeline {
       if (fs.existsSync(scriptsDir) && fs.readdirSync(scriptsDir).length > 0) {
         this.skillsWithScripts.push(e.name);
       }
+      this._walkForRuleIds(skillPath);
     }
   }
+  /**
+   * D2: Find rule_ids referenced by any file under the skill directory.
+   * Recognizes three naming patterns from actual sessions:
+   *   - Directory name matches a rule: rule_skills/R014/
+   *   - Single-rule script: check_r014.py
+   *   - Grouped script: check_r002_r007.py → covers R002 through R007
+   */
+  _walkForRuleIds(skillDir) {
+    const dirName = path.basename(skillDir);
+    const dirMatch = dirName.match(/^R0*(\d+)$/i);
+    if (dirMatch) this.ruleIdsCovered.add(`R${String(parseInt(dirMatch[1], 10)).padStart(3, "0")}`);
+    const walk = (d) => {
+      let entries;
+      try { entries = fs.readdirSync(d, { withFileTypes: true }); }
+      catch { return; }
+      for (const e of entries) {
+        if (e.name.startsWith(".")) continue;
+        const p = path.join(d, e.name);
+        if (e.isDirectory()) { walk(p); continue; }
+        // Per-rule: check_r014.py
+        const single = e.name.match(/check_r0*(\d+)\.py$/i);
+        if (single) {
+          this.ruleIdsCovered.add(`R${String(parseInt(single[1], 10)).padStart(3, "0")}`);
+          continue;
+        }
+        // Grouped: check_r002_r007.py, check_r002-r007.py, check_r59_r77.py
+        const grouped = e.name.match(/check_r0*(\d+)[_-]+r0*(\d+)\.py$/i);
+        if (grouped) {
+          const lo = parseInt(grouped[1], 10);
+          const hi = parseInt(grouped[2], 10);
+          for (let n = lo; n <= hi; n++) {
+            this.ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
+          }
+          continue;
+        }
+        // Directory names that encode ranges: R078_R128/
+        // handled by caller passing skillDir
+      }
+    };
+    // Also handle dirs named like R078_R128/
+    const rangeDir = dirName.match(/^R0*(\d+)[_-]R0*(\d+)$/i);
+    if (rangeDir) {
+      const lo = parseInt(rangeDir[1], 10);
+      const hi = parseInt(rangeDir[2], 10);
+      for (let n = lo; n <= hi; n++) {
+        this.ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
+      }
+    }
+    walk(skillDir);
+  }
   describeState() {
     this._scanWorkspace();
     const total = this.totalRules.length;
-    const authored = this.skillsAuthored.length;
-    const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
-    const parts = ["## Phase: SKILL_AUTHORING\nWrite verification skills for each extracted rule. Skills are first-class deliverables — they may serve as the production solution when worker LLM workflows are insufficient. Follow Anthropic skill-creator format. This is BUILD mode."];
-    parts.push(`### Progress\n- Rules: ${total}\n- Skills authored: ${authored}\n- Skills with scripts/: ${this.skillsWithScripts.length}${remaining.length > 0 ? `\n- Remaining: ${remaining.slice(0, 10).join(", ")}` : ""}`);
+    const covered = this.ruleIdsCovered.size;
+    const uncovered = this.totalRules.filter((r) => !this.ruleIdsCovered.has(r));
+    const parts = [
+      "## Phase: SKILL_AUTHORING\n" +
+      "Write verification skills for each extracted rule. Skills are first-class " +
+      "deliverables — they may serve as the production solution when worker LLM " +
+      "workflows are insufficient. Follow Anthropic skill-creator format. This is " +
+      "BUILD mode.\n\n" +
+      // D2: soft granularity nudge
+      "**Granularity preference:** 1 rule = 1 skill directory. Group rules into " +
+      "the same file ONLY when they share evidence and fail together (e.g. " +
+      "siblings from the same required-fields table). When grouping, name the " +
+      "file with the range: `check_r002_r007.py`. Downstream consumers " +
+      "(workflow-run, dashboards) count rule coverage by parsing these names, " +
+      "so the file-naming matters.\n\n" +
+      "**Do not write to rules/catalog.json via sandbox_exec.** Use the " +
+      "`rule_catalog` tool for any catalog edits — sandbox_exec bypasses the " +
+      "workspace file lock and races with parallel workers."
+    ];
+    // v0.6.1 A2: surface task-completion parity so the agent sees the gate
+    let taskLine = "";
+    if (this._taskManager) {
+      const totalT = this._taskManager.countByPhase("skill_authoring");
+      const doneT = this._taskManager.countByPhase("skill_authoring", "completed");
+      const failedT = this._taskManager.countByPhase("skill_authoring", "failed");
+      if (totalT > 0) {
+        taskLine = `\n- Per-rule tasks completed: ${doneT}/${totalT}` +
+          (failedT > 0 ? ` (+${failedT} failed)` : "");
+      }
+    }
+    parts.push(
+      `### Progress (rule-id coverage, D2)\n` +
+      `- Total rules in catalog: ${total}\n` +
+      `- Rule ids covered by some skill: ${covered}\n` +
+      `- Skill directories authored: ${this.skillsAuthored.length}\n` +
+      `- Skills with scripts/: ${this.skillsWithScripts.length}` +
+      taskLine +
+      (uncovered.length > 0
+        ? `\n- Missing coverage (${uncovered.length}): ${uncovered.slice(0, 15).join(", ")}${uncovered.length > 15 ? "…" : ""}`
+        : ""),
+    );
     if (this.exitCriteriaMet()) {
-      parts.push("### Exit\nAll rules have skills. Proceed to SKILL_TESTING.");
+      parts.push("### Exit\nAll rule ids are covered by some skill. Proceed to SKILL_TESTING.");
     }
     return parts.join("\n\n");
   }
@@ -75,7 +182,29 @@ export class SkillAuthoringPipeline extends Pipeline {
   exitCriteriaMet() {
     if (!this.totalRules.length) return false;
-    return this.skillsAuthored.length >= this.totalRules.length && this.skillsWithScripts.length >= this.skillsAuthored.length * 0.5;
+    // D2: exit requires distinct rule-id coverage, not skill-dir count.
+    // Original heuristic (skillsAuthored >= totalRules) passed the phase
+    // even when KC grouped many rules into one file — a false signal when
+    // the user wants per-rule verification. Now every rule id in the
+    // catalog must appear in some skill name. The scripts/ heuristic is
+    // preserved as a secondary gate on skill depth.
+    const allCovered = this.totalRules.every((r) => this.ruleIdsCovered.has(r));
+    if (!allCovered) return false;
+    // v0.6.1 A2: tasks-parity gate. The 17-minute skill_authoring transition
+    // in E2E #4 happened because D2 fired on 20 skeleton SK01-SK20 dirs
+    // covering all 110 rule_ids by filename, while only ~5 of 110 per-rule
+    // skill_authoring tasks had actually been worked on. Now require every
+    // per-rule task in TaskManager to be in a terminal state (completed or
+    // failed). Subagents (no taskManager) skip this gate.
+    if (this._taskManager) {
+      const total = this._taskManager.countByPhase("skill_authoring");
+      if (total > 0) {
+        const completed = this._taskManager.countByPhase("skill_authoring", "completed");
+        const failed = this._taskManager.countByPhase("skill_authoring", "failed");
+        if (completed + failed < total) return false;
+      }
+    }
+    return this.skillsWithScripts.length >= Math.max(1, this.skillsAuthored.length * 0.5);
   }
   exportState() {

package/src/agent/skill-loader.js CHANGED Viewed

@@ -5,6 +5,46 @@ import { fileURLToPath } from "node:url";
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const BUNDLED_SKILLS_DIR = path.resolve(__dirname, "../../template/skills");
+// D3b: Phase-relevance map. Skills not listed here are always visible
+// (safe default for future additions). Skills listed here are only
+// included in the context index for the named phases — unrelated
+// phases save the system-prompt budget. This is a soft filter: the
+// agent can still `workspace_file` read any skill on-demand.
+//
+// Keep this close to the skill set it describes — one hardcoded table
+// per release, not spread across files. When adding a skill to
+// template/skills/, add it here if phase-specific, or leave it out
+// to default to always-visible.
+const PHASE_RELEVANT_SKILLS = {
+  "bootstrap-workspace": ["bootstrap"],
+  "rule-extraction":     ["bootstrap", "extraction"],
+  "rule-graph":          ["extraction", "skill_authoring"],
+  "task-decomposition":  ["extraction", "skill_authoring", "distillation"],
+  "skill-authoring":     ["skill_authoring", "skill_testing"],
+  "skill-to-workflow":   ["distillation"],
+  "evolution-loop":      ["skill_testing", "distillation", "production_qc"],
+  "version-control":     ["bootstrap", "extraction", "skill_authoring", "skill_testing", "distillation", "production_qc", "finalization"],
+  "quality-control":     ["production_qc", "finalization"],
+  "confidence-system":   ["distillation", "production_qc"],
+  "dashboard-reporting": ["production_qc", "finalization"],
+  "cross-document-verification": ["production_qc"],
+  "corner-case-management": ["skill_testing", "distillation", "production_qc"],
+  "data-sensibility":    ["extraction", "skill_authoring"],
+  "entity-extraction":   ["skill_authoring", "distillation"],
+  "document-parsing":    ["bootstrap", "extraction", "skill_authoring"],
+  "document-chunking":   ["bootstrap", "extraction"],
+  "tree-processing":     ["skill_authoring", "skill_testing"],
+  "compliance-judgment": ["skill_authoring", "skill_testing", "production_qc"],
+  "skill-creator":       ["skill_authoring"],
+};
+function isSkillRelevantToPhase(skillName, phase) {
+  const relevantPhases = PHASE_RELEVANT_SKILLS[skillName];
+  if (!relevantPhases) return true; // unknown skill → always visible
+  if (!phase) return true; // caller didn't pass phase → always visible
+  return relevantPhases.includes(phase);
+}
 /**
  * Discover and index meta skills from template/skills/.
  * Follows Claude Code's pattern: skills are NOT dumped into the system prompt.
@@ -79,15 +119,25 @@ export class SkillLoader {
   /**
    * Format the skill index for injection into agent context.
    * Brief listing — agent reads full content on demand.
+   *
+   * D3b: when `phase` is provided, filter out skills that aren't relevant
+   * to the phase (per PHASE_RELEVANT_SKILLS). Unknown skills stay visible
+   * so new additions to template/skills/ aren't accidentally hidden.
+   *
+   * @param {string} [phase] - Current engine phase for filtering
    * @returns {string}
    */
-  formatForContext() {
+  formatForContext(phase) {
     const index = this.getIndex();
     if (index.length === 0) return "";
-    const metaMeta = index.filter((s) => s.category === "meta-meta");
-    const meta = index.filter((s) => s.category === "meta");
-    const other = index.filter((s) => s.category !== "meta-meta" && s.category !== "meta");
+    const visible = phase
+      ? index.filter((s) => isSkillRelevantToPhase(s.name, phase))
+      : index;
+    const metaMeta = visible.filter((s) => s.category === "meta-meta");
+    const meta = visible.filter((s) => s.category === "meta");
+    const other = visible.filter((s) => s.category !== "meta-meta" && s.category !== "meta");
     const lines = ["## Available Methodology Skills",
       "Read full skill content from the skills/ directory when needed.\n"];

package/src/agent/task-manager.js CHANGED Viewed

@@ -62,13 +62,65 @@ export class TaskManager {
   }
   /**
-   * Get the next pending task.
+   * Get the next pending task (read-only). For serial-mode callers.
+   * Parallel workers MUST use claimNextPending() to avoid racing.
    * @returns {object|null}
    */
   getNextPending() {
     return this._tasks.find((t) => t.status === "pending") || null;
   }
+  /**
+   * B2: Atomically claim the next pending task — flips status to
+   * "in_progress" and records the worker. Single-threaded JavaScript
+   * means this is race-free WITHOUT a filesystem lock as long as neither
+   * the find nor the status mutation awaits, because the event loop
+   * won't interleave another worker's call between them. If we ever
+   * move TaskManager to share state across processes (unlikely; each
+   * session has its own file), wrap with workspace.withFileLock.
+   *
+   * @param {string} [workerLabel] - optional identifier for the claimer,
+   *   stored on the task for debugging + the TUI taskboard.
+   * @returns {object|null} The claimed task, or null if none pending.
+   */
+  claimNextPending(workerLabel) {
+    const task = this._tasks.find((t) => t.status === "pending");
+    if (!task) return null;
+    task.status = "in_progress";
+    task.startedAt = new Date().toISOString();
+    if (workerLabel) task.worker = String(workerLabel);
+    this.save();
+    return task;
+  }
+  /**
+   * B2: Mark a previously-claimed task as done. Pass an optional
+   * summary for the taskboard / display. Worker label is cleared since
+   * the task has left in_progress state.
+   */
+  markDone(id, summary) {
+    const task = this._tasks.find((t) => t.id === id);
+    if (!task) return;
+    task.status = "completed";
+    task.completedAt = new Date().toISOString();
+    if (summary !== undefined) task.summary = summary;
+    delete task.worker;
+    this.save();
+  }
+  /**
+   * B2: Mark a claimed task as failed. Preserves the worker label so
+   * post-mortems can trace which slot crashed.
+   */
+  markFailed(id, errorMessage) {
+    const task = this._tasks.find((t) => t.id === id);
+    if (!task) return;
+    task.status = "failed";
+    task.completedAt = new Date().toISOString();
+    if (errorMessage) task.summary = String(errorMessage).slice(0, 500);
+    this.save();
+  }
   /**
    * Get all tasks.
    * @returns {Array}
@@ -87,12 +139,23 @@ export class TaskManager {
   // --- Bulk creation from rule catalog ---
   /**
-   * Create one task per rule for a given phase.
-   * Reads rules from the provided array (typically from rules/catalog.json).
+   * Phases where one-task-per-rule is the natural unit of work.
+   * For BOOTSTRAP / EXTRACTION the unit is a regulation (one PDF → many rules);
+   * ralph-loop shouldn't drive per-rule there because the rules don't exist yet
+   * (or are the *output*, not the input) — see E2E #3 coverage check.
+   */
+  static PER_RULE_PHASES = new Set(["skill_authoring", "skill_testing"]);
+  /**
+   * Create one task per rule for a given phase — but only if the phase's unit
+   * of work is actually a rule. For other phases this is a no-op, and any
+   * per-regulation tasks are created separately at session init.
+   *
    * @param {Array<{id: string, title?: string, description?: string}>} rules
    * @param {string} phase - The phase these tasks belong to
    */
   createRuleTasks(rules, phase) {
+    if (!TaskManager.PER_RULE_PHASES.has(phase)) return;
     for (const rule of rules) {
       const ruleId = rule.id || rule.rule_id;
       const title = rule.title || rule.description || ruleId;
@@ -119,6 +182,21 @@ export class TaskManager {
     return { total, completed, inProgress, pending, failed };
   }
+  /**
+   * v0.6.1 A2: Phase-scoped task count. Used by SkillAuthoringPipeline's
+   * exitCriteriaMet to gate phase advance on TaskManager parity, not just
+   * filename-regex coverage. Pass a status to filter; omit for total.
+   *
+   * @param {string} phase - Phase name (e.g., "skill_authoring")
+   * @param {string|null} [status] - Optional status filter ("completed", "pending", etc.)
+   * @returns {number}
+   */
+  countByPhase(phase, status = null) {
+    return this._tasks.filter(
+      (t) => t.phase === phase && (status == null || t.status === status),
+    ).length;
+  }
   /**
    * Format task list for injection into system prompt context.
    * Compact checklist — not conversation history.