kc-beta 0.5.6 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/QUICKSTART.md +17 -4
  2. package/README.md +58 -11
  3. package/bin/kc-beta.js +35 -1
  4. package/package.json +1 -1
  5. package/src/agent/bundle-tree.js +553 -0
  6. package/src/agent/context.js +40 -1
  7. package/src/agent/engine.js +828 -31
  8. package/src/agent/llm-client.js +67 -18
  9. package/src/agent/pipelines/distillation.js +15 -0
  10. package/src/agent/pipelines/extraction.js +60 -3
  11. package/src/agent/pipelines/finalization.js +186 -0
  12. package/src/agent/pipelines/index.js +8 -0
  13. package/src/agent/pipelines/initializer.js +40 -0
  14. package/src/agent/pipelines/production-qc.js +63 -13
  15. package/src/agent/pipelines/skill-authoring.js +136 -7
  16. package/src/agent/skill-loader.js +54 -4
  17. package/src/agent/task-manager.js +81 -3
  18. package/src/agent/tools/agent-tool.js +283 -35
  19. package/src/agent/tools/bundle-search.js +146 -0
  20. package/src/agent/tools/document-chunk.js +246 -0
  21. package/src/agent/tools/document-classify.js +311 -0
  22. package/src/agent/tools/document-parse.js +8 -1
  23. package/src/agent/tools/phase-advance.js +30 -7
  24. package/src/agent/tools/registry.js +10 -0
  25. package/src/agent/tools/rule-catalog.js +17 -3
  26. package/src/agent/tools/sandbox-exec.js +30 -0
  27. package/src/agent/tools/workflow-run.js +34 -1
  28. package/src/agent/workspace.js +168 -14
  29. package/src/cli/components.js +165 -17
  30. package/src/cli/index.js +166 -19
  31. package/src/cli/meme.js +58 -0
  32. package/src/config.js +39 -2
  33. package/src/providers.js +26 -0
  34. package/template/skills/en/meta-meta/evolution-loop/SKILL.md +13 -1
  35. package/template/skills/en/meta-meta/rule-extraction/SKILL.md +74 -0
  36. package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +7 -1
  37. package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +73 -0
@@ -4,9 +4,18 @@ import { Phase, PipelineEvent } from "./index.js";
4
4
  import { Pipeline } from "./base.js";
5
5
 
6
6
  export class SkillAuthoringPipeline extends Pipeline {
7
- constructor(workspace) {
7
+ /**
8
+ * @param {Workspace} workspace
9
+ * @param {TaskManager|null} [taskManager] - v0.6.1 A2: pass the engine's
10
+ * TaskManager so exitCriteriaMet can require task-completion parity in
11
+ * addition to D2 filename coverage. Subagents pass null (no taskManager
12
+ * in subagent scope), in which case the gate falls back to D2-only
13
+ * behaviour.
14
+ */
15
+ constructor(workspace, taskManager = null) {
8
16
  super();
9
17
  this._workspace = workspace;
18
+ this._taskManager = taskManager;
10
19
  this.totalRules = [];
11
20
  this.skillsAuthored = [];
12
21
  this.skillsWithScripts = [];
@@ -34,6 +43,14 @@ export class SkillAuthoringPipeline extends Pipeline {
34
43
  _scanSkills() {
35
44
  this.skillsAuthored = [];
36
45
  this.skillsWithScripts = [];
46
+ // D2: rule_ids that are covered by some authored skill — whether that
47
+ // skill is single-rule (rule_skills/R014/) or grouped
48
+ // (rule_skills/SK02/check_r002_r007.py). Populated by _walkForRuleIds
49
+ // below so the exit criterion counts DISTINCT rule coverage rather
50
+ // than skill-directory count, which over-counts when skills are
51
+ // grouped (session 6304673afaa0's rule_skills/ had 289 rules packed
52
+ // into 23 skill files).
53
+ this.ruleIdsCovered = new Set();
37
54
  const dir = path.join(this._workspace.cwd, "rule_skills");
38
55
  if (!fs.existsSync(dir)) return;
39
56
  for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
@@ -46,19 +63,109 @@ export class SkillAuthoringPipeline extends Pipeline {
46
63
  if (fs.existsSync(scriptsDir) && fs.readdirSync(scriptsDir).length > 0) {
47
64
  this.skillsWithScripts.push(e.name);
48
65
  }
66
+ this._walkForRuleIds(skillPath);
49
67
  }
50
68
  }
51
69
 
70
+ /**
71
+ * D2: Find rule_ids referenced by any file under the skill directory.
72
+ * Recognizes three naming patterns from actual sessions:
73
+ * - Directory name matches a rule: rule_skills/R014/
74
+ * - Single-rule script: check_r014.py
75
+ * - Grouped script: check_r002_r007.py → covers R002 through R007
76
+ */
77
+ _walkForRuleIds(skillDir) {
78
+ const dirName = path.basename(skillDir);
79
+ const dirMatch = dirName.match(/^R0*(\d+)$/i);
80
+ if (dirMatch) this.ruleIdsCovered.add(`R${String(parseInt(dirMatch[1], 10)).padStart(3, "0")}`);
81
+
82
+ const walk = (d) => {
83
+ let entries;
84
+ try { entries = fs.readdirSync(d, { withFileTypes: true }); }
85
+ catch { return; }
86
+ for (const e of entries) {
87
+ if (e.name.startsWith(".")) continue;
88
+ const p = path.join(d, e.name);
89
+ if (e.isDirectory()) { walk(p); continue; }
90
+ // Per-rule: check_r014.py
91
+ const single = e.name.match(/check_r0*(\d+)\.py$/i);
92
+ if (single) {
93
+ this.ruleIdsCovered.add(`R${String(parseInt(single[1], 10)).padStart(3, "0")}`);
94
+ continue;
95
+ }
96
+ // Grouped: check_r002_r007.py, check_r002-r007.py, check_r59_r77.py
97
+ const grouped = e.name.match(/check_r0*(\d+)[_-]+r0*(\d+)\.py$/i);
98
+ if (grouped) {
99
+ const lo = parseInt(grouped[1], 10);
100
+ const hi = parseInt(grouped[2], 10);
101
+ for (let n = lo; n <= hi; n++) {
102
+ this.ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
103
+ }
104
+ continue;
105
+ }
106
+ // Directory names that encode ranges: R078_R128/
107
+ // handled by caller passing skillDir
108
+ }
109
+ };
110
+ // Also handle dirs named like R078_R128/
111
+ const rangeDir = dirName.match(/^R0*(\d+)[_-]R0*(\d+)$/i);
112
+ if (rangeDir) {
113
+ const lo = parseInt(rangeDir[1], 10);
114
+ const hi = parseInt(rangeDir[2], 10);
115
+ for (let n = lo; n <= hi; n++) {
116
+ this.ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
117
+ }
118
+ }
119
+ walk(skillDir);
120
+ }
121
+
52
122
  describeState() {
53
123
  this._scanWorkspace();
54
124
  const total = this.totalRules.length;
55
- const authored = this.skillsAuthored.length;
56
- const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
57
- const parts = ["## Phase: SKILL_AUTHORING\nWrite verification skills for each extracted rule. Skills are first-class deliverables — they may serve as the production solution when worker LLM workflows are insufficient. Follow Anthropic skill-creator format. This is BUILD mode."];
58
- parts.push(`### Progress\n- Rules: ${total}\n- Skills authored: ${authored}\n- Skills with scripts/: ${this.skillsWithScripts.length}${remaining.length > 0 ? `\n- Remaining: ${remaining.slice(0, 10).join(", ")}` : ""}`);
125
+ const covered = this.ruleIdsCovered.size;
126
+ const uncovered = this.totalRules.filter((r) => !this.ruleIdsCovered.has(r));
127
+ const parts = [
128
+ "## Phase: SKILL_AUTHORING\n" +
129
+ "Write verification skills for each extracted rule. Skills are first-class " +
130
+ "deliverables — they may serve as the production solution when worker LLM " +
131
+ "workflows are insufficient. Follow Anthropic skill-creator format. This is " +
132
+ "BUILD mode.\n\n" +
133
+ // D2: soft granularity nudge
134
+ "**Granularity preference:** 1 rule = 1 skill directory. Group rules into " +
135
+ "the same file ONLY when they share evidence and fail together (e.g. " +
136
+ "siblings from the same required-fields table). When grouping, name the " +
137
+ "file with the range: `check_r002_r007.py`. Downstream consumers " +
138
+ "(workflow-run, dashboards) count rule coverage by parsing these names, " +
139
+ "so the file-naming matters.\n\n" +
140
+ "**Do not write to rules/catalog.json via sandbox_exec.** Use the " +
141
+ "`rule_catalog` tool for any catalog edits — sandbox_exec bypasses the " +
142
+ "workspace file lock and races with parallel workers."
143
+ ];
144
+ // v0.6.1 A2: surface task-completion parity so the agent sees the gate
145
+ let taskLine = "";
146
+ if (this._taskManager) {
147
+ const totalT = this._taskManager.countByPhase("skill_authoring");
148
+ const doneT = this._taskManager.countByPhase("skill_authoring", "completed");
149
+ const failedT = this._taskManager.countByPhase("skill_authoring", "failed");
150
+ if (totalT > 0) {
151
+ taskLine = `\n- Per-rule tasks completed: ${doneT}/${totalT}` +
152
+ (failedT > 0 ? ` (+${failedT} failed)` : "");
153
+ }
154
+ }
155
+ parts.push(
156
+ `### Progress (rule-id coverage, D2)\n` +
157
+ `- Total rules in catalog: ${total}\n` +
158
+ `- Rule ids covered by some skill: ${covered}\n` +
159
+ `- Skill directories authored: ${this.skillsAuthored.length}\n` +
160
+ `- Skills with scripts/: ${this.skillsWithScripts.length}` +
161
+ taskLine +
162
+ (uncovered.length > 0
163
+ ? `\n- Missing coverage (${uncovered.length}): ${uncovered.slice(0, 15).join(", ")}${uncovered.length > 15 ? "…" : ""}`
164
+ : ""),
165
+ );
59
166
 
60
167
  if (this.exitCriteriaMet()) {
61
- parts.push("### Exit\nAll rules have skills. Proceed to SKILL_TESTING.");
168
+ parts.push("### Exit\nAll rule ids are covered by some skill. Proceed to SKILL_TESTING.");
62
169
  }
63
170
  return parts.join("\n\n");
64
171
  }
@@ -75,7 +182,29 @@ export class SkillAuthoringPipeline extends Pipeline {
75
182
 
76
183
  exitCriteriaMet() {
77
184
  if (!this.totalRules.length) return false;
78
- return this.skillsAuthored.length >= this.totalRules.length && this.skillsWithScripts.length >= this.skillsAuthored.length * 0.5;
185
+ // D2: exit requires distinct rule-id coverage, not skill-dir count.
186
+ // Original heuristic (skillsAuthored >= totalRules) passed the phase
187
+ // even when KC grouped many rules into one file — a false signal when
188
+ // the user wants per-rule verification. Now every rule id in the
189
+ // catalog must appear in some skill name. The scripts/ heuristic is
190
+ // preserved as a secondary gate on skill depth.
191
+ const allCovered = this.totalRules.every((r) => this.ruleIdsCovered.has(r));
192
+ if (!allCovered) return false;
193
+ // v0.6.1 A2: tasks-parity gate. The 17-minute skill_authoring transition
194
+ // in E2E #4 happened because D2 fired on 20 skeleton SK01-SK20 dirs
195
+ // covering all 110 rule_ids by filename, while only ~5 of 110 per-rule
196
+ // skill_authoring tasks had actually been worked on. Now require every
197
+ // per-rule task in TaskManager to be in a terminal state (completed or
198
+ // failed). Subagents (no taskManager) skip this gate.
199
+ if (this._taskManager) {
200
+ const total = this._taskManager.countByPhase("skill_authoring");
201
+ if (total > 0) {
202
+ const completed = this._taskManager.countByPhase("skill_authoring", "completed");
203
+ const failed = this._taskManager.countByPhase("skill_authoring", "failed");
204
+ if (completed + failed < total) return false;
205
+ }
206
+ }
207
+ return this.skillsWithScripts.length >= Math.max(1, this.skillsAuthored.length * 0.5);
79
208
  }
80
209
 
81
210
  exportState() {
@@ -5,6 +5,46 @@ import { fileURLToPath } from "node:url";
5
5
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
6
6
  const BUNDLED_SKILLS_DIR = path.resolve(__dirname, "../../template/skills");
7
7
 
8
+ // D3b: Phase-relevance map. Skills not listed here are always visible
9
+ // (safe default for future additions). Skills listed here are only
10
+ // included in the context index for the named phases — unrelated
11
+ // phases save the system-prompt budget. This is a soft filter: the
12
+ // agent can still `workspace_file` read any skill on-demand.
13
+ //
14
+ // Keep this close to the skill set it describes — one hardcoded table
15
+ // per release, not spread across files. When adding a skill to
16
+ // template/skills/, add it here if phase-specific, or leave it out
17
+ // to default to always-visible.
18
+ const PHASE_RELEVANT_SKILLS = {
19
+ "bootstrap-workspace": ["bootstrap"],
20
+ "rule-extraction": ["bootstrap", "extraction"],
21
+ "rule-graph": ["extraction", "skill_authoring"],
22
+ "task-decomposition": ["extraction", "skill_authoring", "distillation"],
23
+ "skill-authoring": ["skill_authoring", "skill_testing"],
24
+ "skill-to-workflow": ["distillation"],
25
+ "evolution-loop": ["skill_testing", "distillation", "production_qc"],
26
+ "version-control": ["bootstrap", "extraction", "skill_authoring", "skill_testing", "distillation", "production_qc", "finalization"],
27
+ "quality-control": ["production_qc", "finalization"],
28
+ "confidence-system": ["distillation", "production_qc"],
29
+ "dashboard-reporting": ["production_qc", "finalization"],
30
+ "cross-document-verification": ["production_qc"],
31
+ "corner-case-management": ["skill_testing", "distillation", "production_qc"],
32
+ "data-sensibility": ["extraction", "skill_authoring"],
33
+ "entity-extraction": ["skill_authoring", "distillation"],
34
+ "document-parsing": ["bootstrap", "extraction", "skill_authoring"],
35
+ "document-chunking": ["bootstrap", "extraction"],
36
+ "tree-processing": ["skill_authoring", "skill_testing"],
37
+ "compliance-judgment": ["skill_authoring", "skill_testing", "production_qc"],
38
+ "skill-creator": ["skill_authoring"],
39
+ };
40
+
41
+ function isSkillRelevantToPhase(skillName, phase) {
42
+ const relevantPhases = PHASE_RELEVANT_SKILLS[skillName];
43
+ if (!relevantPhases) return true; // unknown skill → always visible
44
+ if (!phase) return true; // caller didn't pass phase → always visible
45
+ return relevantPhases.includes(phase);
46
+ }
47
+
8
48
  /**
9
49
  * Discover and index meta skills from template/skills/.
10
50
  * Follows Claude Code's pattern: skills are NOT dumped into the system prompt.
@@ -79,15 +119,25 @@ export class SkillLoader {
79
119
  /**
80
120
  * Format the skill index for injection into agent context.
81
121
  * Brief listing — agent reads full content on demand.
122
+ *
123
+ * D3b: when `phase` is provided, filter out skills that aren't relevant
124
+ * to the phase (per PHASE_RELEVANT_SKILLS). Unknown skills stay visible
125
+ * so new additions to template/skills/ aren't accidentally hidden.
126
+ *
127
+ * @param {string} [phase] - Current engine phase for filtering
82
128
  * @returns {string}
83
129
  */
84
- formatForContext() {
130
+ formatForContext(phase) {
85
131
  const index = this.getIndex();
86
132
  if (index.length === 0) return "";
87
133
 
88
- const metaMeta = index.filter((s) => s.category === "meta-meta");
89
- const meta = index.filter((s) => s.category === "meta");
90
- const other = index.filter((s) => s.category !== "meta-meta" && s.category !== "meta");
134
+ const visible = phase
135
+ ? index.filter((s) => isSkillRelevantToPhase(s.name, phase))
136
+ : index;
137
+
138
+ const metaMeta = visible.filter((s) => s.category === "meta-meta");
139
+ const meta = visible.filter((s) => s.category === "meta");
140
+ const other = visible.filter((s) => s.category !== "meta-meta" && s.category !== "meta");
91
141
 
92
142
  const lines = ["## Available Methodology Skills",
93
143
  "Read full skill content from the skills/ directory when needed.\n"];
@@ -62,13 +62,65 @@ export class TaskManager {
62
62
  }
63
63
 
64
64
  /**
65
- * Get the next pending task.
65
+ * Get the next pending task (read-only). For serial-mode callers.
66
+ * Parallel workers MUST use claimNextPending() to avoid racing.
66
67
  * @returns {object|null}
67
68
  */
68
69
  getNextPending() {
69
70
  return this._tasks.find((t) => t.status === "pending") || null;
70
71
  }
71
72
 
73
+ /**
74
+ * B2: Atomically claim the next pending task — flips status to
75
+ * "in_progress" and records the worker. Single-threaded JavaScript
76
+ * means this is race-free WITHOUT a filesystem lock as long as neither
77
+ * the find nor the status mutation awaits, because the event loop
78
+ * won't interleave another worker's call between them. If we ever
79
+ * move TaskManager to share state across processes (unlikely; each
80
+ * session has its own file), wrap with workspace.withFileLock.
81
+ *
82
+ * @param {string} [workerLabel] - optional identifier for the claimer,
83
+ * stored on the task for debugging + the TUI taskboard.
84
+ * @returns {object|null} The claimed task, or null if none pending.
85
+ */
86
+ claimNextPending(workerLabel) {
87
+ const task = this._tasks.find((t) => t.status === "pending");
88
+ if (!task) return null;
89
+ task.status = "in_progress";
90
+ task.startedAt = new Date().toISOString();
91
+ if (workerLabel) task.worker = String(workerLabel);
92
+ this.save();
93
+ return task;
94
+ }
95
+
96
+ /**
97
+ * B2: Mark a previously-claimed task as done. Pass an optional
98
+ * summary for the taskboard / display. Worker label is cleared since
99
+ * the task has left in_progress state.
100
+ */
101
+ markDone(id, summary) {
102
+ const task = this._tasks.find((t) => t.id === id);
103
+ if (!task) return;
104
+ task.status = "completed";
105
+ task.completedAt = new Date().toISOString();
106
+ if (summary !== undefined) task.summary = summary;
107
+ delete task.worker;
108
+ this.save();
109
+ }
110
+
111
+ /**
112
+ * B2: Mark a claimed task as failed. Preserves the worker label so
113
+ * post-mortems can trace which slot crashed.
114
+ */
115
+ markFailed(id, errorMessage) {
116
+ const task = this._tasks.find((t) => t.id === id);
117
+ if (!task) return;
118
+ task.status = "failed";
119
+ task.completedAt = new Date().toISOString();
120
+ if (errorMessage) task.summary = String(errorMessage).slice(0, 500);
121
+ this.save();
122
+ }
123
+
72
124
  /**
73
125
  * Get all tasks.
74
126
  * @returns {Array}
@@ -87,12 +139,23 @@ export class TaskManager {
87
139
  // --- Bulk creation from rule catalog ---
88
140
 
89
141
  /**
90
- * Create one task per rule for a given phase.
91
- * Reads rules from the provided array (typically from rules/catalog.json).
142
+ * Phases where one-task-per-rule is the natural unit of work.
143
+ * For BOOTSTRAP / EXTRACTION the unit is a regulation (one PDF → many rules);
144
+ * ralph-loop shouldn't drive per-rule there because the rules don't exist yet
145
+ * (or are the *output*, not the input) — see E2E #3 coverage check.
146
+ */
147
+ static PER_RULE_PHASES = new Set(["skill_authoring", "skill_testing"]);
148
+
149
+ /**
150
+ * Create one task per rule for a given phase — but only if the phase's unit
151
+ * of work is actually a rule. For other phases this is a no-op, and any
152
+ * per-regulation tasks are created separately at session init.
153
+ *
92
154
  * @param {Array<{id: string, title?: string, description?: string}>} rules
93
155
  * @param {string} phase - The phase these tasks belong to
94
156
  */
95
157
  createRuleTasks(rules, phase) {
158
+ if (!TaskManager.PER_RULE_PHASES.has(phase)) return;
96
159
  for (const rule of rules) {
97
160
  const ruleId = rule.id || rule.rule_id;
98
161
  const title = rule.title || rule.description || ruleId;
@@ -119,6 +182,21 @@ export class TaskManager {
119
182
  return { total, completed, inProgress, pending, failed };
120
183
  }
121
184
 
185
+ /**
186
+ * v0.6.1 A2: Phase-scoped task count. Used by SkillAuthoringPipeline's
187
+ * exitCriteriaMet to gate phase advance on TaskManager parity, not just
188
+ * filename-regex coverage. Pass a status to filter; omit for total.
189
+ *
190
+ * @param {string} phase - Phase name (e.g., "skill_authoring")
191
+ * @param {string|null} [status] - Optional status filter ("completed", "pending", etc.)
192
+ * @returns {number}
193
+ */
194
+ countByPhase(phase, status = null) {
195
+ return this._tasks.filter(
196
+ (t) => t.phase === phase && (status == null || t.status === status),
197
+ ).length;
198
+ }
199
+
122
200
  /**
123
201
  * Format task list for injection into system prompt context.
124
202
  * Compact checklist — not conversation history.