kc-beta 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import os from "node:os";
4
+ import { Phase, PipelineEvent } from "./index.js";
5
+ import { Pipeline } from "./base.js";
6
+
7
+ const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills"];
8
+
9
+ const DEFAULT_ENV = `# === KC Agent Project Configuration ===
10
+
11
+ # Language: en | zh
12
+ LANGUAGE=en
13
+
14
+ # === Worker LLM API (SiliconFlow) ===
15
+ SILICONFLOW_API_KEY=
16
+ SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1
17
+
18
+ # === Worker LLM Tiers (highest capability to lowest) ===
19
+ TIER1=Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5
20
+ TIER2=Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5, Qwen/Qwen3.5-397B-A17B
21
+ TIER3=Qwen/Qwen3.5-122B-A10B
22
+ TIER4=Qwen/Qwen3.5-35B-A3B
23
+
24
+ # === OCR Model Tiers ===
25
+ OCR_MODEL_TIER1=zai-org/GLM-4.6V
26
+
27
+ # === Quality Thresholds ===
28
+ SKILL_ACCURACY=0.9
29
+ WORKFLOW_ACCURACY=0.9
30
+ MONITOR_FREQUENCY=mid
31
+
32
+ # === Evolution Control ===
33
+ MAX_ITERATIONS=20
34
+ `;
35
+
36
+ export class ProjectInitializer extends Pipeline {
37
+ constructor(workspace) {
38
+ super();
39
+ this._workspace = workspace;
40
+ this.workspaceCreated = false;
41
+ this.configReady = false;
42
+ this.hasRegulations = false;
43
+ this.hasSamples = false;
44
+ this._setupWorkspace();
45
+ }
46
+
47
+ _setupWorkspace() {
48
+ for (const d of REQUIRED_DIRS) {
49
+ fs.mkdirSync(path.join(this._workspace.cwd, d), { recursive: true });
50
+ }
51
+
52
+ const envPath = path.join(this._workspace.cwd, ".env");
53
+ if (!fs.existsSync(envPath)) {
54
+ let envContent = DEFAULT_ENV;
55
+ const gc = this._loadGlobalConfig();
56
+ if (gc.api_key) envContent = envContent.replace("SILICONFLOW_API_KEY=", `SILICONFLOW_API_KEY=${gc.api_key}`);
57
+ if (gc.base_url) envContent = envContent.replace("SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1", `SILICONFLOW_BASE_URL=${gc.base_url}`);
58
+ if (gc.accuracy_threshold) {
59
+ envContent = envContent.replace("SKILL_ACCURACY=0.9", `SKILL_ACCURACY=${gc.accuracy_threshold}`);
60
+ envContent = envContent.replace("WORKFLOW_ACCURACY=0.9", `WORKFLOW_ACCURACY=${gc.accuracy_threshold}`);
61
+ }
62
+ const tiers = gc.tiers || {};
63
+ for (const tk of ["tier1", "tier2", "tier3", "tier4"]) {
64
+ if (tiers[tk]) {
65
+ const tag = tk.toUpperCase();
66
+ envContent = envContent.split("\n").map((l) => l.startsWith(`${tag}=`) ? `${tag}=${tiers[tk]}` : l).join("\n");
67
+ }
68
+ }
69
+ fs.writeFileSync(envPath, envContent, "utf-8");
70
+ }
71
+
72
+ const manifestPath = path.join(this._workspace.cwd, "versions.json");
73
+ if (!fs.existsSync(manifestPath)) {
74
+ fs.writeFileSync(manifestPath, JSON.stringify({ version: "0.1.0", entries: [] }, null, 2), "utf-8");
75
+ }
76
+
77
+ this.workspaceCreated = true;
78
+ this._checkRegulations();
79
+ this._checkSamples();
80
+ this._checkConfig();
81
+ }
82
+
83
+ _checkRegulations() {
84
+ const dir = path.join(this._workspace.cwd, "rules");
85
+ if (!fs.existsSync(dir)) { this.hasRegulations = false; return; }
86
+ this.hasRegulations = fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile());
87
+ }
88
+
89
+ _checkSamples() {
90
+ const dir = path.join(this._workspace.cwd, "samples");
91
+ if (!fs.existsSync(dir)) { this.hasSamples = false; return; }
92
+ this.hasSamples = fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile());
93
+ }
94
+
95
+ _checkConfig() {
96
+ const envPath = path.join(this._workspace.cwd, ".env");
97
+ if (fs.existsSync(envPath)) {
98
+ for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
99
+ if (line.startsWith("SILICONFLOW_API_KEY=") && line.split("=")[1].trim()) {
100
+ this.configReady = true; return;
101
+ }
102
+ }
103
+ }
104
+ const gc = this._loadGlobalConfig();
105
+ this.configReady = !!gc.api_key;
106
+ }
107
+
108
+ _loadGlobalConfig() {
109
+ const p = path.join(os.homedir(), ".kc_agent", "config.json");
110
+ if (fs.existsSync(p)) { try { return JSON.parse(fs.readFileSync(p, "utf-8")); } catch { /* skip */ } }
111
+ return {};
112
+ }
113
+
114
+ describeState() {
115
+ const completed = [], pending = [];
116
+ if (this.workspaceCreated) completed.push("Workspace structure created"); else pending.push("Create workspace structure");
117
+ if (this.configReady) completed.push("Configuration ready (API keys set)"); else pending.push("Configure .env (API key needed)");
118
+ if (this.hasRegulations) completed.push("Regulation documents available in rules/"); else pending.push("Regulation documents needed in rules/");
119
+ if (this.hasSamples) completed.push("Sample documents available in samples/"); else pending.push("Sample documents needed in samples/");
120
+
121
+ const parts = ["## Current Phase: BOOTSTRAP"];
122
+ if (completed.length) parts.push("### Completed\n" + completed.map((c) => `- [x] ${c}`).join("\n"));
123
+ if (pending.length) parts.push("### Pending\n" + pending.map((p) => `- [ ] ${p}`).join("\n"));
124
+
125
+ if (this.exitCriteriaMet()) {
126
+ parts.push("### Ready\nAll bootstrap requirements met. Proceed to EXTRACTION phase.");
127
+ } else {
128
+ parts.push(
129
+ "### What to do now\nTalk to the developer user about their verification scenario:\n" +
130
+ "- What documents do they verify?\n- What regulations apply?\n" +
131
+ "- Ask them to provide regulation documents (save to rules/) and sample documents (save to samples/)."
132
+ );
133
+ }
134
+ return parts.join("\n\n");
135
+ }
136
+
137
+ onToolResult(toolName, toolInput, result) {
138
+ if (result.isError) return null;
139
+ const wasReady = this.exitCriteriaMet();
140
+
141
+ if (toolName === "workspace_file") {
142
+ const op = toolInput.operation || "";
143
+ const p = toolInput.path || "";
144
+ if (op === "write") {
145
+ if (p.startsWith("rules/")) this.hasRegulations = true;
146
+ else if (p.startsWith("samples/")) this.hasSamples = true;
147
+ else if (p === ".env") this._checkConfig();
148
+ } else if (op === "list") {
149
+ this._checkRegulations();
150
+ this._checkSamples();
151
+ }
152
+ }
153
+
154
+ if (!wasReady && this.exitCriteriaMet()) {
155
+ return new PipelineEvent({ type: "phase_ready", message: "Bootstrap complete. Ready for EXTRACTION.", nextPhase: Phase.EXTRACTION });
156
+ }
157
+ return null;
158
+ }
159
+
160
+ exitCriteriaMet() {
161
+ return this.workspaceCreated && this.configReady && this.hasRegulations && this.hasSamples;
162
+ }
163
+ }
@@ -0,0 +1,99 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { PipelineEvent } from "./index.js";
4
+ import { Pipeline } from "./base.js";
5
+
6
+ const FREQUENCY_MAP = { high: 1.0, mid: 0.5, low: 0.2 };
7
+
8
+ export class ProductionQCPipeline extends Pipeline {
9
+ constructor(workspace) {
10
+ super();
11
+ this._workspace = workspace;
12
+ this.batchesProcessed = 0;
13
+ this.totalDocuments = 0;
14
+ this.documentsReviewed = 0;
15
+ this.accuracyByRule = {};
16
+ this.confidenceDistribution = { low: 0, medium: 0, high: 0 };
17
+ this.issuesFound = [];
18
+ this.monitoringPhase = "initial";
19
+ this._samplingRate = 0.5;
20
+ this._accuracyThreshold = 0.9;
21
+ this._scanWorkspace();
22
+ }
23
+
24
+ _scanWorkspace() {
25
+ this._loadConfig();
26
+ this._scanQcResults();
27
+ }
28
+
29
+ _loadConfig() {
30
+ const envPath = path.join(this._workspace.cwd, ".env");
31
+ if (!fs.existsSync(envPath)) return;
32
+ for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
33
+ if (line.startsWith("MONITOR_FREQUENCY=")) this._samplingRate = FREQUENCY_MAP[line.split("=")[1].trim().toLowerCase()] ?? 0.5;
34
+ if (line.startsWith("WORKFLOW_ACCURACY=")) try { this._accuracyThreshold = parseFloat(line.split("=")[1]); } catch { /* skip */ }
35
+ }
36
+ }
37
+
38
+ _scanQcResults() {
39
+ this.batchesProcessed = 0;
40
+ this.totalDocuments = 0;
41
+ this.documentsReviewed = 0;
42
+ this.accuracyByRule = {};
43
+ this.confidenceDistribution = { low: 0, medium: 0, high: 0 };
44
+ this.issuesFound = [];
45
+
46
+ const qcDir = path.join(this._workspace.cwd, "output", "qc");
47
+ if (!fs.existsSync(qcDir)) return;
48
+
49
+ for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).sort()) {
50
+ try {
51
+ const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
52
+ this.batchesProcessed++;
53
+ this.totalDocuments += typeof data.documents === "number" ? data.documents : (data.total || 0);
54
+ this.documentsReviewed += data.reviewed || 0;
55
+ if (data.accuracy_by_rule) Object.assign(this.accuracyByRule, data.accuracy_by_rule);
56
+ if (data.confidence) {
57
+ for (const band of ["low", "medium", "high"]) this.confidenceDistribution[band] += data.confidence[band] || 0;
58
+ }
59
+ if (Array.isArray(data.issues)) this.issuesFound.push(...data.issues);
60
+ } catch { /* skip */ }
61
+ }
62
+
63
+ // Determine monitoring phase
64
+ if (this.batchesProcessed < 3) this.monitoringPhase = "initial";
65
+ else if (this.issuesFound.length > 0) this.monitoringPhase = "active";
66
+ else if (Object.values(this.accuracyByRule).every((a) => a >= this._accuracyThreshold)) this.monitoringPhase = "stable";
67
+ else this.monitoringPhase = "active";
68
+ }
69
+
70
+ describeState() {
71
+ this._scanWorkspace();
72
+ const parts = ["## Current Phase: PRODUCTION_QC"];
73
+ parts.push(`### Progress\n- Batches: ${this.batchesProcessed}\n- Documents: ${this.totalDocuments}\n- Reviewed: ${this.documentsReviewed}\n- Monitoring: ${this.monitoringPhase}\n- Sampling rate: ${(this._samplingRate * 100).toFixed(0)}%`);
74
+
75
+ if (Object.keys(this.accuracyByRule).length) {
76
+ const lines = Object.entries(this.accuracyByRule).map(([r, a]) => `- ${r}: ${a}`);
77
+ parts.push("### Accuracy by rule\n" + lines.join("\n"));
78
+ }
79
+
80
+ if (this.monitoringPhase === "initial") {
81
+ parts.push("### What to do now\nRun workflows on input/ documents. Save results to output/. Review and save QC to output/qc/.");
82
+ } else if (this.monitoringPhase === "stable") {
83
+ parts.push("### Status: Stable\nWorkflows running reliably. Spot-check only.");
84
+ }
85
+ return parts.join("\n\n");
86
+ }
87
+
88
+ onToolResult(toolName, toolInput, result) {
89
+ if (result.isError) return null;
90
+ const wasStable = this.monitoringPhase === "stable";
91
+ if (toolName === "workspace_file" && (toolInput.path || "").includes("output/")) this._scanQcResults();
92
+ if (!wasStable && this.monitoringPhase === "stable") {
93
+ return new PipelineEvent({ type: "milestone", message: "Production QC reached stable monitoring phase." });
94
+ }
95
+ return null;
96
+ }
97
+
98
+ exitCriteriaMet() { return this.monitoringPhase === "stable"; }
99
+ }
@@ -0,0 +1,83 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { Phase, PipelineEvent } from "./index.js";
4
+ import { Pipeline } from "./base.js";
5
+
6
+ export class SkillAuthoringPipeline extends Pipeline {
7
+ constructor(workspace) {
8
+ super();
9
+ this._workspace = workspace;
10
+ this.totalRules = [];
11
+ this.skillsAuthored = [];
12
+ this.skillsWithScripts = [];
13
+ this._scanWorkspace();
14
+ }
15
+
16
+ _scanWorkspace() {
17
+ this._loadRules();
18
+ this._scanSkills();
19
+ }
20
+
21
+ _loadRules() {
22
+ this.totalRules = [];
23
+ const rulesDir = path.join(this._workspace.cwd, "rules");
24
+ if (!fs.existsSync(rulesDir)) return;
25
+ for (const f of fs.readdirSync(rulesDir).filter((f) => f.endsWith(".json"))) {
26
+ try {
27
+ const data = JSON.parse(fs.readFileSync(path.join(rulesDir, f), "utf-8"));
28
+ const rules = Array.isArray(data) ? data : (data.rules || []);
29
+ for (const r of rules) { if (r.id) this.totalRules.push(r.id); }
30
+ } catch { /* skip */ }
31
+ }
32
+ }
33
+
34
+ _scanSkills() {
35
+ this.skillsAuthored = [];
36
+ this.skillsWithScripts = [];
37
+ const dir = path.join(this._workspace.cwd, "rule_skills");
38
+ if (!fs.existsSync(dir)) return;
39
+ for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
40
+ if (!e.isDirectory() || e.name.startsWith("__")) continue;
41
+ const skillPath = path.join(dir, e.name);
42
+ if (fs.existsSync(path.join(skillPath, "SKILL.md")) || fs.readdirSync(skillPath).some((f) => f.endsWith(".py"))) {
43
+ this.skillsAuthored.push(e.name);
44
+ }
45
+ const scriptsDir = path.join(skillPath, "scripts");
46
+ if (fs.existsSync(scriptsDir) && fs.readdirSync(scriptsDir).length > 0) {
47
+ this.skillsWithScripts.push(e.name);
48
+ }
49
+ }
50
+ }
51
+
52
+ describeState() {
53
+ this._scanWorkspace();
54
+ const total = this.totalRules.length;
55
+ const parts = ["## Current Phase: SKILL_AUTHORING"];
56
+ parts.push(`### Progress\n- Rules from extraction: ${total}\n- Skills authored: ${this.skillsAuthored.length}\n- Skills with scripts/: ${this.skillsWithScripts.length}`);
57
+
58
+ if (this.exitCriteriaMet()) {
59
+ parts.push("### Ready\nAll rules have skills. Proceed to SKILL_TESTING.");
60
+ } else if (this.skillsAuthored.length === 0) {
61
+ parts.push("### What to do now\nWrite a SKILL.md for each rule in rule_skills/{rule_id}/.\nDescribe: what to check, where to look, what to extract, how to judge.");
62
+ } else {
63
+ const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
64
+ parts.push(`### What to do now\n${total - this.skillsAuthored.length} rules still need skills. Remaining: ${remaining.slice(0, 10).join(", ")}`);
65
+ }
66
+ return parts.join("\n\n");
67
+ }
68
+
69
+ onToolResult(toolName, toolInput, result) {
70
+ if (result.isError) return null;
71
+ const wasReady = this.exitCriteriaMet();
72
+ if (toolName === "workspace_file" && (toolInput.path || "").includes("rule_skills/")) this._scanSkills();
73
+ if (!wasReady && this.exitCriteriaMet()) {
74
+ return new PipelineEvent({ type: "phase_ready", message: "Skill authoring complete. Ready for SKILL_TESTING.", nextPhase: Phase.SKILL_TESTING });
75
+ }
76
+ return null;
77
+ }
78
+
79
+ exitCriteriaMet() {
80
+ if (!this.totalRules.length) return false;
81
+ return this.skillsAuthored.length >= this.totalRules.length && this.skillsWithScripts.length >= this.skillsAuthored.length * 0.5;
82
+ }
83
+ }
@@ -0,0 +1,111 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { Phase, PipelineEvent } from "./index.js";
4
+ import { Pipeline } from "./base.js";
5
+
6
+ export class SkillTestingPipeline extends Pipeline {
7
+ constructor(workspace) {
8
+ super();
9
+ this._workspace = workspace;
10
+ this.skillsToTest = [];
11
+ this.skillsTested = {};
12
+ this.skillsPassing = [];
13
+ this.iterationCount = 0;
14
+ this._accuracyThreshold = 0.9;
15
+ this._maxIterations = 20;
16
+ this._scanWorkspace();
17
+ }
18
+
19
+ _scanWorkspace() {
20
+ this._loadConfig();
21
+ this._loadSkills();
22
+ this._loadTestResults();
23
+ this._loadEvolutionLog();
24
+ }
25
+
26
+ _loadConfig() {
27
+ const envPath = path.join(this._workspace.cwd, ".env");
28
+ if (!fs.existsSync(envPath)) return;
29
+ for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
30
+ if (line.startsWith("SKILL_ACCURACY=")) try { this._accuracyThreshold = parseFloat(line.split("=")[1]); } catch { /* skip */ }
31
+ if (line.startsWith("MAX_ITERATIONS=")) try { this._maxIterations = parseInt(line.split("=")[1]); } catch { /* skip */ }
32
+ }
33
+ }
34
+
35
+ _loadSkills() {
36
+ this.skillsToTest = [];
37
+ const dir = path.join(this._workspace.cwd, "rule_skills");
38
+ if (!fs.existsSync(dir)) return;
39
+ for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
40
+ if (e.isDirectory() && !e.name.startsWith("__")) {
41
+ const p = path.join(dir, e.name);
42
+ if (fs.existsSync(path.join(p, "SKILL.md")) || fs.readdirSync(p).some((f) => f.endsWith(".py"))) {
43
+ this.skillsToTest.push(e.name);
44
+ }
45
+ }
46
+ }
47
+ }
48
+
49
+ _loadTestResults() {
50
+ this.skillsTested = {};
51
+ this.skillsPassing = [];
52
+ const outDir = path.join(this._workspace.cwd, "output");
53
+ if (!fs.existsSync(outDir)) return;
54
+ for (const f of fs.readdirSync(outDir).filter((f) => f.endsWith(".json"))) {
55
+ try {
56
+ const data = JSON.parse(fs.readFileSync(path.join(outDir, f), "utf-8"));
57
+ if (data.accuracy != null) {
58
+ const ruleId = data.rule_id || path.parse(f).name;
59
+ const acc = parseFloat(data.accuracy);
60
+ this.skillsTested[ruleId] = Math.max(this.skillsTested[ruleId] || 0, acc);
61
+ }
62
+ } catch { /* skip */ }
63
+ }
64
+ this.skillsPassing = Object.entries(this.skillsTested).filter(([, acc]) => acc >= this._accuracyThreshold).map(([id]) => id);
65
+ }
66
+
67
+ _loadEvolutionLog() {
68
+ const logDir = path.join(this._workspace.cwd, "logs", "evolution");
69
+ if (!fs.existsSync(logDir)) { this.iterationCount = 0; return; }
70
+ this.iterationCount = fs.readdirSync(logDir).filter((f) => f.endsWith(".json")).length;
71
+ }
72
+
73
+ describeState() {
74
+ this._scanWorkspace();
75
+ const total = this.skillsToTest.length;
76
+ const tested = Object.keys(this.skillsTested).length;
77
+ const passing = this.skillsPassing.length;
78
+ const failing = Object.entries(this.skillsTested).filter(([, acc]) => acc < this._accuracyThreshold);
79
+ const untested = this.skillsToTest.filter((s) => !(s in this.skillsTested));
80
+
81
+ const parts = ["## Current Phase: SKILL_TESTING"];
82
+ parts.push(`### Progress\n- Skills to test: ${total}\n- Tested: ${tested}\n- Passing (>=${this._accuracyThreshold}): ${passing}\n- Evolution iterations: ${this.iterationCount}/${this._maxIterations}`);
83
+
84
+ if (this.exitCriteriaMet()) {
85
+ parts.push("### Ready\nAll skills passing. Proceed to DISTILLATION.");
86
+ } else if (untested.length) {
87
+ parts.push(`### What to do now\nTest these skills: ${untested.slice(0, 10).join(", ")}`);
88
+ } else if (failing.length) {
89
+ parts.push("### What to do now — Evolution Cycle\nFailing skills:\n" +
90
+ failing.map(([id, acc]) => `- ${id}: ${acc.toFixed(2)}`).join("\n") +
91
+ "\n\nFollow: diagnose -> classify -> fix -> retest -> log");
92
+ }
93
+ return parts.join("\n\n");
94
+ }
95
+
96
+ onToolResult(toolName, toolInput, result) {
97
+ if (result.isError) return null;
98
+ const wasReady = this.exitCriteriaMet();
99
+ if (toolName === "workspace_file" || toolName === "evolution_cycle") this._scanWorkspace();
100
+ if (!wasReady && this.exitCriteriaMet()) {
101
+ return new PipelineEvent({ type: "phase_ready", message: "Skill testing complete. Ready for DISTILLATION.", nextPhase: Phase.DISTILLATION });
102
+ }
103
+ return null;
104
+ }
105
+
106
+ exitCriteriaMet() {
107
+ const total = this.skillsToTest.length;
108
+ if (!total) return false;
109
+ return Object.keys(this.skillsTested).length >= total && this.skillsPassing.length >= total * this._accuracyThreshold;
110
+ }
111
+ }
@@ -75,20 +75,16 @@ export class DistillationEngine extends Pipeline {
75
75
  const total = this.skillsToDistill.length;
76
76
  const created = Object.keys(this.workflowsCreated).length;
77
77
  const passing = this.workflowsPassing.length;
78
- const parts = ["## Current Phase: DISTILLATION"];
78
+ const notCreated = this.skillsToDistill.filter((s) => !(s in this.workflowsCreated));
79
+ const notPassing = Object.keys(this.workflowsCreated).filter((s) => !this.workflowsPassing.includes(s));
80
+
81
+ const parts = ["## Phase: DISTILLATION\nConvert proven skills into worker LLM workflows that run cheaply at scale. Skill results from the testing phase are the accuracy baseline — workflow results must match them. Worker LLM tools (worker_llm_call, tier_downgrade, workflow_run) are now available."];
79
82
  parts.push(`### Progress\n- Skills to distill: ${total}\n- Workflows created: ${created}\n- Workflows passing (>=${this._workflowAccuracy}): ${passing}`);
83
+ if (notCreated.length) parts.push(`- Need workflows: ${notCreated.slice(0, 10).join(", ")}`);
84
+ if (notPassing.length) parts.push(`- Below threshold: ${notPassing.slice(0, 10).join(", ")}`);
80
85
 
81
86
  if (this.exitCriteriaMet()) {
82
- parts.push("### Ready\nAll workflows passing. Proceed to PRODUCTION_QC.");
83
- } else if (created === 0) {
84
- parts.push("### What to do now\nConvert proven skills into worker LLM workflows.\nFor each skill: write workflow script, write prompts, test vs ground truth, tier-downgrade test.");
85
- } else {
86
- const notCreated = this.skillsToDistill.filter((s) => !(s in this.workflowsCreated));
87
- const notPassing = Object.keys(this.workflowsCreated).filter((s) => !this.workflowsPassing.includes(s));
88
- let guidance = "### What to do now\n";
89
- if (notCreated.length) guidance += `Create workflows for: ${notCreated.slice(0, 10).join(", ")}\n`;
90
- if (notPassing.length) guidance += `Improve accuracy for: ${notPassing.slice(0, 10).join(", ")}\n`;
91
- parts.push(guidance);
87
+ parts.push("### Exit\nAll workflows passing. Proceed to PRODUCTION_QC.");
92
88
  }
93
89
  return parts.join("\n\n");
94
90
  }
@@ -60,16 +60,14 @@ export class RuleExtractionPipeline extends Pipeline {
60
60
 
61
61
  describeState() {
62
62
  this._scanWorkspace();
63
- const parts = ["## Current Phase: EXTRACTION"];
64
- parts.push(`### Progress\n- Regulations scanned: ${this.regulationsScanned ? "yes" : "no"}\n- Rules extracted: ${this.rulesExtracted.length}\n- Rules with tests: ${this.rulesWithTests.length}\n- Coverage audit: ${this.coverageAudited ? "done" : "not yet"}`);
63
+ const parts = ["## Phase: EXTRACTION\nRead and decompose regulation documents into atomic, testable verification rules. This is BUILD mode — do the analysis directly."];
64
+ parts.push(`### Progress\n- Regulations scanned: ${this.regulationsScanned ? "yes" : "no"}\n- Rules extracted: ${this.rulesExtracted.length}\n- Rules with test stubs: ${this.rulesWithTests.length}\n- Coverage audit: ${this.coverageAudited ? "done" : "pending"}`);
65
65
 
66
66
  if (this.exitCriteriaMet()) {
67
- parts.push("### Ready\nExtraction complete. Proceed to SKILL_AUTHORING phase.");
68
- } else if (this.rulesExtracted.length === 0) {
69
- parts.push("### What to do now\nDecompose regulations into atomic, testable rules.\n- One rule = one pass/fail outcome\n- Work top-down: major areas → chapters → sections → atomic rules\n- Save rules to rules/catalog.json via rule_catalog tool");
70
- } else if (!this.coverageAudited) {
71
- parts.push("### What to do now\nRun a coverage audit: which regulation sections are NOT covered? Save to rules/coverage_audit.md");
67
+ parts.push("### Exit\nExtraction complete. Proceed to SKILL_AUTHORING.");
72
68
  }
69
+
70
+ parts.push(`### Exit criteria\n- [${this.regulationsScanned ? "x" : " "}] All regulations read\n- [${this.rulesExtracted.length > 0 ? "x" : " "}] Rules decomposed into atomic units\n- [${this.rulesWithTests.length >= Math.max(this.rulesExtracted.length * 0.8, 1) ? "x" : " "}] >=80% of rules have test stubs\n- [${this.coverageAudited ? "x" : " "}] Coverage audit completed`);
73
71
  return parts.join("\n\n");
74
72
  }
75
73
 
@@ -113,23 +113,17 @@ export class ProjectInitializer extends Pipeline {
113
113
 
114
114
  describeState() {
115
115
  const completed = [], pending = [];
116
- if (this.workspaceCreated) completed.push("Workspace structure created"); else pending.push("Create workspace structure");
117
- if (this.configReady) completed.push("Configuration ready (API keys set)"); else pending.push("Configure .env (API key needed)");
118
- if (this.hasRegulations) completed.push("Regulation documents available in rules/"); else pending.push("Regulation documents needed in rules/");
119
- if (this.hasSamples) completed.push("Sample documents available in samples/"); else pending.push("Sample documents needed in samples/");
116
+ if (this.workspaceCreated) completed.push("Workspace structure created"); else pending.push("Workspace structure");
117
+ if (this.configReady) completed.push("API keys configured"); else pending.push("API keys (check .env)");
118
+ if (this.hasRegulations) completed.push("Regulation documents in rules/"); else pending.push("Regulation documents in rules/");
119
+ if (this.hasSamples) completed.push("Sample documents in samples/"); else pending.push("Sample documents in samples/");
120
120
 
121
- const parts = ["## Current Phase: BOOTSTRAP"];
122
- if (completed.length) parts.push("### Completed\n" + completed.map((c) => `- [x] ${c}`).join("\n"));
123
- if (pending.length) parts.push("### Pending\n" + pending.map((p) => `- [ ] ${p}`).join("\n"));
121
+ const parts = ["## Phase: BOOTSTRAP\nSet up the workspace and understand the developer user's verification scenario. Bundled methodology skills are available in the workspace skills/ directory."];
122
+ if (completed.length) parts.push("### Done\n" + completed.map((c) => `- [x] ${c}`).join("\n"));
123
+ if (pending.length) parts.push("### Needed\n" + pending.map((p) => `- [ ] ${p}`).join("\n"));
124
124
 
125
125
  if (this.exitCriteriaMet()) {
126
- parts.push("### Ready\nAll bootstrap requirements met. Proceed to EXTRACTION phase.");
127
- } else {
128
- parts.push(
129
- "### What to do now\nTalk to the developer user about their verification scenario:\n" +
130
- "- What documents do they verify?\n- What regulations apply?\n" +
131
- "- Ask them to provide regulation documents (save to rules/) and sample documents (save to samples/)."
132
- );
126
+ parts.push("### Exit\nBootstrap requirements met. Proceed to EXTRACTION.");
133
127
  }
134
128
  return parts.join("\n\n");
135
129
  }
@@ -69,7 +69,7 @@ export class ProductionQCPipeline extends Pipeline {
69
69
 
70
70
  describeState() {
71
71
  this._scanWorkspace();
72
- const parts = ["## Current Phase: PRODUCTION_QC"];
72
+ const parts = ["## Phase: PRODUCTION_QC\nRun workflows on production documents from input/, monitor quality via confidence-based sampling. This phase transitions from active review to stable spot-checking as accuracy stabilizes."];
73
73
  parts.push(`### Progress\n- Batches: ${this.batchesProcessed}\n- Documents: ${this.totalDocuments}\n- Reviewed: ${this.documentsReviewed}\n- Monitoring: ${this.monitoringPhase}\n- Sampling rate: ${(this._samplingRate * 100).toFixed(0)}%`);
74
74
 
75
75
  if (Object.keys(this.accuracyByRule).length) {
@@ -77,10 +77,8 @@ export class ProductionQCPipeline extends Pipeline {
77
77
  parts.push("### Accuracy by rule\n" + lines.join("\n"));
78
78
  }
79
79
 
80
- if (this.monitoringPhase === "initial") {
81
- parts.push("### What to do now\nRun workflows on input/ documents. Save results to output/. Review and save QC to output/qc/.");
82
- } else if (this.monitoringPhase === "stable") {
83
- parts.push("### Status: Stable\nWorkflows running reliably. Spot-check only.");
80
+ if (this.monitoringPhase === "stable") {
81
+ parts.push("### Status: Stable monitoring. Spot-check only.");
84
82
  }
85
83
  return parts.join("\n\n");
86
84
  }
@@ -52,16 +52,13 @@ export class SkillAuthoringPipeline extends Pipeline {
52
52
  describeState() {
53
53
  this._scanWorkspace();
54
54
  const total = this.totalRules.length;
55
- const parts = ["## Current Phase: SKILL_AUTHORING"];
56
- parts.push(`### Progress\n- Rules from extraction: ${total}\n- Skills authored: ${this.skillsAuthored.length}\n- Skills with scripts/: ${this.skillsWithScripts.length}`);
55
+ const authored = this.skillsAuthored.length;
56
+ const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
57
+ const parts = ["## Phase: SKILL_AUTHORING\nWrite verification skills for each extracted rule. Skills are first-class deliverables — they may serve as the production solution when worker LLM workflows are insufficient. Follow Anthropic skill-creator format. This is BUILD mode."];
58
+ parts.push(`### Progress\n- Rules: ${total}\n- Skills authored: ${authored}\n- Skills with scripts/: ${this.skillsWithScripts.length}${remaining.length > 0 ? `\n- Remaining: ${remaining.slice(0, 10).join(", ")}` : ""}`);
57
59
 
58
60
  if (this.exitCriteriaMet()) {
59
- parts.push("### Ready\nAll rules have skills. Proceed to SKILL_TESTING.");
60
- } else if (this.skillsAuthored.length === 0) {
61
- parts.push("### What to do now\nWrite a SKILL.md for each rule in rule_skills/{rule_id}/.\nDescribe: what to check, where to look, what to extract, how to judge.");
62
- } else {
63
- const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
64
- parts.push(`### What to do now\n${total - this.skillsAuthored.length} rules still need skills. Remaining: ${remaining.slice(0, 10).join(", ")}`);
61
+ parts.push("### Exit\nAll rules have skills. Proceed to SKILL_TESTING.");
65
62
  }
66
63
  return parts.join("\n\n");
67
64
  }
@@ -78,17 +78,15 @@ export class SkillTestingPipeline extends Pipeline {
78
78
  const failing = Object.entries(this.skillsTested).filter(([, acc]) => acc < this._accuracyThreshold);
79
79
  const untested = this.skillsToTest.filter((s) => !(s in this.skillsTested));
80
80
 
81
- const parts = ["## Current Phase: SKILL_TESTING"];
81
+ const parts = ["## Phase: SKILL_TESTING\nTest skills against sample documents, iterate via evolution loop until accuracy threshold is met. This is BUILD mode — the results established here become the accuracy baseline for distillation."];
82
82
  parts.push(`### Progress\n- Skills to test: ${total}\n- Tested: ${tested}\n- Passing (>=${this._accuracyThreshold}): ${passing}\n- Evolution iterations: ${this.iterationCount}/${this._maxIterations}`);
83
+ if (untested.length) parts.push(`- Untested: ${untested.slice(0, 10).join(", ")}`);
84
+ if (failing.length) parts.push(`- Below threshold:\n${failing.map(([id, acc]) => ` - ${id}: ${acc.toFixed(2)}`).join("\n")}`);
83
85
 
84
86
  if (this.exitCriteriaMet()) {
85
- parts.push("### Ready\nAll skills passing. Proceed to DISTILLATION.");
86
- } else if (untested.length) {
87
- parts.push(`### What to do now\nTest these skills: ${untested.slice(0, 10).join(", ")}`);
88
- } else if (failing.length) {
89
- parts.push("### What to do now — Evolution Cycle\nFailing skills:\n" +
90
- failing.map(([id, acc]) => `- ${id}: ${acc.toFixed(2)}`).join("\n") +
91
- "\n\nFollow: diagnose -> classify -> fix -> retest -> log");
87
+ parts.push("### Exit\nAll skills passing. Proceed to DISTILLATION.");
88
+ } else if (this.iterationCount >= this._maxIterations) {
89
+ parts.push(`### Max iterations (${this._maxIterations}) reached. Discuss remaining failures with the developer user.`);
92
90
  }
93
91
  return parts.join("\n\n");
94
92
  }