kc-beta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/kc-beta.js +16 -0
- package/package.json +32 -0
- package/src/agent/confidence-scorer.js +120 -0
- package/src/agent/context.js +124 -0
- package/src/agent/corner-case-registry.js +119 -0
- package/src/agent/engine.js +224 -0
- package/src/agent/events.js +27 -0
- package/src/agent/history.js +101 -0
- package/src/agent/llm-client.js +131 -0
- package/src/agent/pipelines/base.js +14 -0
- package/src/agent/pipelines/distillation.js +113 -0
- package/src/agent/pipelines/extraction.js +92 -0
- package/src/agent/pipelines/index.js +23 -0
- package/src/agent/pipelines/initializer.js +163 -0
- package/src/agent/pipelines/production-qc.js +99 -0
- package/src/agent/pipelines/skill-authoring.js +83 -0
- package/src/agent/pipelines/skill-testing.js +111 -0
- package/src/agent/tools/agent-tool.js +100 -0
- package/src/agent/tools/base.js +35 -0
- package/src/agent/tools/dashboard-render.js +146 -0
- package/src/agent/tools/document-parse.js +184 -0
- package/src/agent/tools/document-search.js +111 -0
- package/src/agent/tools/evolution-cycle.js +150 -0
- package/src/agent/tools/qc-sample.js +94 -0
- package/src/agent/tools/registry.js +55 -0
- package/src/agent/tools/rule-catalog.js +113 -0
- package/src/agent/tools/sandbox-exec.js +106 -0
- package/src/agent/tools/tier-downgrade.js +114 -0
- package/src/agent/tools/worker-llm-call.js +109 -0
- package/src/agent/tools/workflow-run.js +138 -0
- package/src/agent/tools/workspace-file.js +122 -0
- package/src/agent/version-manager.js +130 -0
- package/src/agent/workspace.js +82 -0
- package/src/cli/components.js +164 -0
- package/src/cli/index.js +329 -0
- package/src/cli/init.js +80 -0
- package/src/cli/onboard.js +182 -0
- package/src/cli/terminal.js +143 -0
- package/src/config.js +93 -0
- package/template/.env.template +31 -0
- package/template/CLAUDE.md +137 -0
- package/template/Input/.gitkeep +0 -0
- package/template/Output/.gitkeep +0 -0
- package/template/Rules/.gitkeep +0 -0
- package/template/Samples/.gitkeep +0 -0
- package/template/skills/en/meta/compliance-judgment/SKILL.md +114 -0
- package/template/skills/en/meta/compliance-judgment/references/output-format.md +151 -0
- package/template/skills/en/meta/confidence-system/SKILL.md +117 -0
- package/template/skills/en/meta/corner-case-management/SKILL.md +111 -0
- package/template/skills/en/meta/cross-document-verification/SKILL.md +131 -0
- package/template/skills/en/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
- package/template/skills/en/meta/data-sensibility/SKILL.md +115 -0
- package/template/skills/en/meta/document-parsing/SKILL.md +108 -0
- package/template/skills/en/meta/document-parsing/references/parser-catalog.md +40 -0
- package/template/skills/en/meta/entity-extraction/SKILL.md +129 -0
- package/template/skills/en/meta/tree-processing/SKILL.md +103 -0
- package/template/skills/en/meta-meta/bootstrap-workspace/SKILL.md +70 -0
- package/template/skills/en/meta-meta/dashboard-reporting/SKILL.md +106 -0
- package/template/skills/en/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
- package/template/skills/en/meta-meta/evolution-loop/SKILL.md +210 -0
- package/template/skills/en/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
- package/template/skills/en/meta-meta/quality-control/SKILL.md +138 -0
- package/template/skills/en/meta-meta/quality-control/references/qa-layers.md +92 -0
- package/template/skills/en/meta-meta/quality-control/references/sampling-strategies.md +76 -0
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +100 -0
- package/template/skills/en/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
- package/template/skills/en/meta-meta/rule-graph/SKILL.md +118 -0
- package/template/skills/en/meta-meta/skill-authoring/SKILL.md +108 -0
- package/template/skills/en/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
- package/template/skills/en/meta-meta/skill-to-workflow/SKILL.md +150 -0
- package/template/skills/en/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
- package/template/skills/en/meta-meta/task-decomposition/SKILL.md +129 -0
- package/template/skills/en/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
- package/template/skills/en/meta-meta/version-control/SKILL.md +152 -0
- package/template/skills/en/meta-meta/version-control/references/trace-id-spec.md +79 -0
- package/template/skills/en/skill-creator/LICENSE.txt +202 -0
- package/template/skills/en/skill-creator/SKILL.md +479 -0
- package/template/skills/en/skill-creator/agents/analyzer.md +274 -0
- package/template/skills/en/skill-creator/agents/comparator.md +202 -0
- package/template/skills/en/skill-creator/agents/grader.md +223 -0
- package/template/skills/en/skill-creator/assets/eval_review.html +146 -0
- package/template/skills/en/skill-creator/eval-viewer/generate_review.py +471 -0
- package/template/skills/en/skill-creator/eval-viewer/viewer.html +1325 -0
- package/template/skills/en/skill-creator/references/schemas.md +430 -0
- package/template/skills/en/skill-creator/scripts/__init__.py +0 -0
- package/template/skills/en/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/template/skills/en/skill-creator/scripts/generate_report.py +326 -0
- package/template/skills/en/skill-creator/scripts/improve_description.py +248 -0
- package/template/skills/en/skill-creator/scripts/package_skill.py +136 -0
- package/template/skills/en/skill-creator/scripts/quick_validate.py +103 -0
- package/template/skills/en/skill-creator/scripts/run_eval.py +310 -0
- package/template/skills/en/skill-creator/scripts/run_loop.py +332 -0
- package/template/skills/en/skill-creator/scripts/utils.py +47 -0
- package/template/skills/zh/meta/compliance-judgment/SKILL.md +303 -0
- package/template/skills/zh/meta/compliance-judgment/references/output-format.md +151 -0
- package/template/skills/zh/meta/confidence-system/SKILL.md +228 -0
- package/template/skills/zh/meta/corner-case-management/SKILL.md +235 -0
- package/template/skills/zh/meta/cross-document-verification/SKILL.md +241 -0
- package/template/skills/zh/meta/cross-document-verification/references/contradiction-taxonomy.md +73 -0
- package/template/skills/zh/meta/data-sensibility/SKILL.md +235 -0
- package/template/skills/zh/meta/document-parsing/SKILL.md +168 -0
- package/template/skills/zh/meta/document-parsing/references/parser-catalog.md +40 -0
- package/template/skills/zh/meta/entity-extraction/SKILL.md +276 -0
- package/template/skills/zh/meta/tree-processing/SKILL.md +233 -0
- package/template/skills/zh/meta-meta/bootstrap-workspace/SKILL.md +147 -0
- package/template/skills/zh/meta-meta/dashboard-reporting/SKILL.md +281 -0
- package/template/skills/zh/meta-meta/dashboard-reporting/scripts/generate_dashboard.py +178 -0
- package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +302 -0
- package/template/skills/zh/meta-meta/evolution-loop/references/convergence-guide.md +62 -0
- package/template/skills/zh/meta-meta/quality-control/SKILL.md +269 -0
- package/template/skills/zh/meta-meta/quality-control/references/qa-layers.md +92 -0
- package/template/skills/zh/meta-meta/quality-control/references/sampling-strategies.md +76 -0
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +208 -0
- package/template/skills/zh/meta-meta/rule-extraction/references/chunking-strategies.md +80 -0
- package/template/skills/zh/meta-meta/rule-graph/SKILL.md +203 -0
- package/template/skills/zh/meta-meta/skill-authoring/SKILL.md +235 -0
- package/template/skills/zh/meta-meta/skill-authoring/references/skill-format-spec.md +78 -0
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +275 -0
- package/template/skills/zh/meta-meta/skill-to-workflow/references/worker-llm-catalog.md +50 -0
- package/template/skills/zh/meta-meta/task-decomposition/SKILL.md +224 -0
- package/template/skills/zh/meta-meta/task-decomposition/references/decision-matrix.md +81 -0
- package/template/skills/zh/meta-meta/version-control/SKILL.md +284 -0
- package/template/skills/zh/meta-meta/version-control/references/trace-id-spec.md +79 -0
- package/template/skills/zh/skill-creator/LICENSE.txt +202 -0
- package/template/skills/zh/skill-creator/SKILL.md +479 -0
- package/template/skills/zh/skill-creator/agents/analyzer.md +274 -0
- package/template/skills/zh/skill-creator/agents/comparator.md +202 -0
- package/template/skills/zh/skill-creator/agents/grader.md +223 -0
- package/template/skills/zh/skill-creator/assets/eval_review.html +146 -0
- package/template/skills/zh/skill-creator/eval-viewer/generate_review.py +471 -0
- package/template/skills/zh/skill-creator/eval-viewer/viewer.html +1325 -0
- package/template/skills/zh/skill-creator/references/schemas.md +430 -0
- package/template/skills/zh/skill-creator/scripts/__init__.py +0 -0
- package/template/skills/zh/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/template/skills/zh/skill-creator/scripts/generate_report.py +326 -0
- package/template/skills/zh/skill-creator/scripts/improve_description.py +248 -0
- package/template/skills/zh/skill-creator/scripts/package_skill.py +136 -0
- package/template/skills/zh/skill-creator/scripts/quick_validate.py +103 -0
- package/template/skills/zh/skill-creator/scripts/run_eval.py +310 -0
- package/template/skills/zh/skill-creator/scripts/run_loop.py +332 -0
- package/template/skills/zh/skill-creator/scripts/utils.py +47 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { PipelineEvent } from "./index.js";
|
|
4
|
+
import { Pipeline } from "./base.js";
|
|
5
|
+
|
|
6
|
+
const FREQUENCY_MAP = { high: 1.0, mid: 0.5, low: 0.2 };
|
|
7
|
+
|
|
8
|
+
export class ProductionQCPipeline extends Pipeline {
|
|
9
|
+
constructor(workspace) {
|
|
10
|
+
super();
|
|
11
|
+
this._workspace = workspace;
|
|
12
|
+
this.batchesProcessed = 0;
|
|
13
|
+
this.totalDocuments = 0;
|
|
14
|
+
this.documentsReviewed = 0;
|
|
15
|
+
this.accuracyByRule = {};
|
|
16
|
+
this.confidenceDistribution = { low: 0, medium: 0, high: 0 };
|
|
17
|
+
this.issuesFound = [];
|
|
18
|
+
this.monitoringPhase = "initial";
|
|
19
|
+
this._samplingRate = 0.5;
|
|
20
|
+
this._accuracyThreshold = 0.9;
|
|
21
|
+
this._scanWorkspace();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
_scanWorkspace() {
|
|
25
|
+
this._loadConfig();
|
|
26
|
+
this._scanQcResults();
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
_loadConfig() {
|
|
30
|
+
const envPath = path.join(this._workspace.cwd, ".env");
|
|
31
|
+
if (!fs.existsSync(envPath)) return;
|
|
32
|
+
for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
|
|
33
|
+
if (line.startsWith("MONITOR_FREQUENCY=")) this._samplingRate = FREQUENCY_MAP[line.split("=")[1].trim().toLowerCase()] ?? 0.5;
|
|
34
|
+
if (line.startsWith("WORKFLOW_ACCURACY=")) try { this._accuracyThreshold = parseFloat(line.split("=")[1]); } catch { /* skip */ }
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
_scanQcResults() {
|
|
39
|
+
this.batchesProcessed = 0;
|
|
40
|
+
this.totalDocuments = 0;
|
|
41
|
+
this.documentsReviewed = 0;
|
|
42
|
+
this.accuracyByRule = {};
|
|
43
|
+
this.confidenceDistribution = { low: 0, medium: 0, high: 0 };
|
|
44
|
+
this.issuesFound = [];
|
|
45
|
+
|
|
46
|
+
const qcDir = path.join(this._workspace.cwd, "output", "qc");
|
|
47
|
+
if (!fs.existsSync(qcDir)) return;
|
|
48
|
+
|
|
49
|
+
for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).sort()) {
|
|
50
|
+
try {
|
|
51
|
+
const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
|
|
52
|
+
this.batchesProcessed++;
|
|
53
|
+
this.totalDocuments += typeof data.documents === "number" ? data.documents : (data.total || 0);
|
|
54
|
+
this.documentsReviewed += data.reviewed || 0;
|
|
55
|
+
if (data.accuracy_by_rule) Object.assign(this.accuracyByRule, data.accuracy_by_rule);
|
|
56
|
+
if (data.confidence) {
|
|
57
|
+
for (const band of ["low", "medium", "high"]) this.confidenceDistribution[band] += data.confidence[band] || 0;
|
|
58
|
+
}
|
|
59
|
+
if (Array.isArray(data.issues)) this.issuesFound.push(...data.issues);
|
|
60
|
+
} catch { /* skip */ }
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Determine monitoring phase
|
|
64
|
+
if (this.batchesProcessed < 3) this.monitoringPhase = "initial";
|
|
65
|
+
else if (this.issuesFound.length > 0) this.monitoringPhase = "active";
|
|
66
|
+
else if (Object.values(this.accuracyByRule).every((a) => a >= this._accuracyThreshold)) this.monitoringPhase = "stable";
|
|
67
|
+
else this.monitoringPhase = "active";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
describeState() {
|
|
71
|
+
this._scanWorkspace();
|
|
72
|
+
const parts = ["## Current Phase: PRODUCTION_QC"];
|
|
73
|
+
parts.push(`### Progress\n- Batches: ${this.batchesProcessed}\n- Documents: ${this.totalDocuments}\n- Reviewed: ${this.documentsReviewed}\n- Monitoring: ${this.monitoringPhase}\n- Sampling rate: ${(this._samplingRate * 100).toFixed(0)}%`);
|
|
74
|
+
|
|
75
|
+
if (Object.keys(this.accuracyByRule).length) {
|
|
76
|
+
const lines = Object.entries(this.accuracyByRule).map(([r, a]) => `- ${r}: ${a}`);
|
|
77
|
+
parts.push("### Accuracy by rule\n" + lines.join("\n"));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (this.monitoringPhase === "initial") {
|
|
81
|
+
parts.push("### What to do now\nRun workflows on input/ documents. Save results to output/. Review and save QC to output/qc/.");
|
|
82
|
+
} else if (this.monitoringPhase === "stable") {
|
|
83
|
+
parts.push("### Status: Stable\nWorkflows running reliably. Spot-check only.");
|
|
84
|
+
}
|
|
85
|
+
return parts.join("\n\n");
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
onToolResult(toolName, toolInput, result) {
|
|
89
|
+
if (result.isError) return null;
|
|
90
|
+
const wasStable = this.monitoringPhase === "stable";
|
|
91
|
+
if (toolName === "workspace_file" && (toolInput.path || "").includes("output/")) this._scanQcResults();
|
|
92
|
+
if (!wasStable && this.monitoringPhase === "stable") {
|
|
93
|
+
return new PipelineEvent({ type: "milestone", message: "Production QC reached stable monitoring phase." });
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
exitCriteriaMet() { return this.monitoringPhase === "stable"; }
|
|
99
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Phase, PipelineEvent } from "./index.js";
|
|
4
|
+
import { Pipeline } from "./base.js";
|
|
5
|
+
|
|
6
|
+
export class SkillAuthoringPipeline extends Pipeline {
|
|
7
|
+
constructor(workspace) {
|
|
8
|
+
super();
|
|
9
|
+
this._workspace = workspace;
|
|
10
|
+
this.totalRules = [];
|
|
11
|
+
this.skillsAuthored = [];
|
|
12
|
+
this.skillsWithScripts = [];
|
|
13
|
+
this._scanWorkspace();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
_scanWorkspace() {
|
|
17
|
+
this._loadRules();
|
|
18
|
+
this._scanSkills();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
_loadRules() {
|
|
22
|
+
this.totalRules = [];
|
|
23
|
+
const rulesDir = path.join(this._workspace.cwd, "rules");
|
|
24
|
+
if (!fs.existsSync(rulesDir)) return;
|
|
25
|
+
for (const f of fs.readdirSync(rulesDir).filter((f) => f.endsWith(".json"))) {
|
|
26
|
+
try {
|
|
27
|
+
const data = JSON.parse(fs.readFileSync(path.join(rulesDir, f), "utf-8"));
|
|
28
|
+
const rules = Array.isArray(data) ? data : (data.rules || []);
|
|
29
|
+
for (const r of rules) { if (r.id) this.totalRules.push(r.id); }
|
|
30
|
+
} catch { /* skip */ }
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
_scanSkills() {
|
|
35
|
+
this.skillsAuthored = [];
|
|
36
|
+
this.skillsWithScripts = [];
|
|
37
|
+
const dir = path.join(this._workspace.cwd, "rule_skills");
|
|
38
|
+
if (!fs.existsSync(dir)) return;
|
|
39
|
+
for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
40
|
+
if (!e.isDirectory() || e.name.startsWith("__")) continue;
|
|
41
|
+
const skillPath = path.join(dir, e.name);
|
|
42
|
+
if (fs.existsSync(path.join(skillPath, "SKILL.md")) || fs.readdirSync(skillPath).some((f) => f.endsWith(".py"))) {
|
|
43
|
+
this.skillsAuthored.push(e.name);
|
|
44
|
+
}
|
|
45
|
+
const scriptsDir = path.join(skillPath, "scripts");
|
|
46
|
+
if (fs.existsSync(scriptsDir) && fs.readdirSync(scriptsDir).length > 0) {
|
|
47
|
+
this.skillsWithScripts.push(e.name);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
describeState() {
|
|
53
|
+
this._scanWorkspace();
|
|
54
|
+
const total = this.totalRules.length;
|
|
55
|
+
const parts = ["## Current Phase: SKILL_AUTHORING"];
|
|
56
|
+
parts.push(`### Progress\n- Rules from extraction: ${total}\n- Skills authored: ${this.skillsAuthored.length}\n- Skills with scripts/: ${this.skillsWithScripts.length}`);
|
|
57
|
+
|
|
58
|
+
if (this.exitCriteriaMet()) {
|
|
59
|
+
parts.push("### Ready\nAll rules have skills. Proceed to SKILL_TESTING.");
|
|
60
|
+
} else if (this.skillsAuthored.length === 0) {
|
|
61
|
+
parts.push("### What to do now\nWrite a SKILL.md for each rule in rule_skills/{rule_id}/.\nDescribe: what to check, where to look, what to extract, how to judge.");
|
|
62
|
+
} else {
|
|
63
|
+
const remaining = this.totalRules.filter((r) => !this.skillsAuthored.includes(r));
|
|
64
|
+
parts.push(`### What to do now\n${total - this.skillsAuthored.length} rules still need skills. Remaining: ${remaining.slice(0, 10).join(", ")}`);
|
|
65
|
+
}
|
|
66
|
+
return parts.join("\n\n");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
onToolResult(toolName, toolInput, result) {
|
|
70
|
+
if (result.isError) return null;
|
|
71
|
+
const wasReady = this.exitCriteriaMet();
|
|
72
|
+
if (toolName === "workspace_file" && (toolInput.path || "").includes("rule_skills/")) this._scanSkills();
|
|
73
|
+
if (!wasReady && this.exitCriteriaMet()) {
|
|
74
|
+
return new PipelineEvent({ type: "phase_ready", message: "Skill authoring complete. Ready for SKILL_TESTING.", nextPhase: Phase.SKILL_TESTING });
|
|
75
|
+
}
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
exitCriteriaMet() {
|
|
80
|
+
if (!this.totalRules.length) return false;
|
|
81
|
+
return this.skillsAuthored.length >= this.totalRules.length && this.skillsWithScripts.length >= this.skillsAuthored.length * 0.5;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { Phase, PipelineEvent } from "./index.js";
|
|
4
|
+
import { Pipeline } from "./base.js";
|
|
5
|
+
|
|
6
|
+
export class SkillTestingPipeline extends Pipeline {
|
|
7
|
+
constructor(workspace) {
|
|
8
|
+
super();
|
|
9
|
+
this._workspace = workspace;
|
|
10
|
+
this.skillsToTest = [];
|
|
11
|
+
this.skillsTested = {};
|
|
12
|
+
this.skillsPassing = [];
|
|
13
|
+
this.iterationCount = 0;
|
|
14
|
+
this._accuracyThreshold = 0.9;
|
|
15
|
+
this._maxIterations = 20;
|
|
16
|
+
this._scanWorkspace();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
_scanWorkspace() {
|
|
20
|
+
this._loadConfig();
|
|
21
|
+
this._loadSkills();
|
|
22
|
+
this._loadTestResults();
|
|
23
|
+
this._loadEvolutionLog();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_loadConfig() {
|
|
27
|
+
const envPath = path.join(this._workspace.cwd, ".env");
|
|
28
|
+
if (!fs.existsSync(envPath)) return;
|
|
29
|
+
for (const line of fs.readFileSync(envPath, "utf-8").split("\n")) {
|
|
30
|
+
if (line.startsWith("SKILL_ACCURACY=")) try { this._accuracyThreshold = parseFloat(line.split("=")[1]); } catch { /* skip */ }
|
|
31
|
+
if (line.startsWith("MAX_ITERATIONS=")) try { this._maxIterations = parseInt(line.split("=")[1]); } catch { /* skip */ }
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_loadSkills() {
|
|
36
|
+
this.skillsToTest = [];
|
|
37
|
+
const dir = path.join(this._workspace.cwd, "rule_skills");
|
|
38
|
+
if (!fs.existsSync(dir)) return;
|
|
39
|
+
for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
40
|
+
if (e.isDirectory() && !e.name.startsWith("__")) {
|
|
41
|
+
const p = path.join(dir, e.name);
|
|
42
|
+
if (fs.existsSync(path.join(p, "SKILL.md")) || fs.readdirSync(p).some((f) => f.endsWith(".py"))) {
|
|
43
|
+
this.skillsToTest.push(e.name);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_loadTestResults() {
|
|
50
|
+
this.skillsTested = {};
|
|
51
|
+
this.skillsPassing = [];
|
|
52
|
+
const outDir = path.join(this._workspace.cwd, "output");
|
|
53
|
+
if (!fs.existsSync(outDir)) return;
|
|
54
|
+
for (const f of fs.readdirSync(outDir).filter((f) => f.endsWith(".json"))) {
|
|
55
|
+
try {
|
|
56
|
+
const data = JSON.parse(fs.readFileSync(path.join(outDir, f), "utf-8"));
|
|
57
|
+
if (data.accuracy != null) {
|
|
58
|
+
const ruleId = data.rule_id || path.parse(f).name;
|
|
59
|
+
const acc = parseFloat(data.accuracy);
|
|
60
|
+
this.skillsTested[ruleId] = Math.max(this.skillsTested[ruleId] || 0, acc);
|
|
61
|
+
}
|
|
62
|
+
} catch { /* skip */ }
|
|
63
|
+
}
|
|
64
|
+
this.skillsPassing = Object.entries(this.skillsTested).filter(([, acc]) => acc >= this._accuracyThreshold).map(([id]) => id);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
_loadEvolutionLog() {
|
|
68
|
+
const logDir = path.join(this._workspace.cwd, "logs", "evolution");
|
|
69
|
+
if (!fs.existsSync(logDir)) { this.iterationCount = 0; return; }
|
|
70
|
+
this.iterationCount = fs.readdirSync(logDir).filter((f) => f.endsWith(".json")).length;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
describeState() {
|
|
74
|
+
this._scanWorkspace();
|
|
75
|
+
const total = this.skillsToTest.length;
|
|
76
|
+
const tested = Object.keys(this.skillsTested).length;
|
|
77
|
+
const passing = this.skillsPassing.length;
|
|
78
|
+
const failing = Object.entries(this.skillsTested).filter(([, acc]) => acc < this._accuracyThreshold);
|
|
79
|
+
const untested = this.skillsToTest.filter((s) => !(s in this.skillsTested));
|
|
80
|
+
|
|
81
|
+
const parts = ["## Current Phase: SKILL_TESTING"];
|
|
82
|
+
parts.push(`### Progress\n- Skills to test: ${total}\n- Tested: ${tested}\n- Passing (>=${this._accuracyThreshold}): ${passing}\n- Evolution iterations: ${this.iterationCount}/${this._maxIterations}`);
|
|
83
|
+
|
|
84
|
+
if (this.exitCriteriaMet()) {
|
|
85
|
+
parts.push("### Ready\nAll skills passing. Proceed to DISTILLATION.");
|
|
86
|
+
} else if (untested.length) {
|
|
87
|
+
parts.push(`### What to do now\nTest these skills: ${untested.slice(0, 10).join(", ")}`);
|
|
88
|
+
} else if (failing.length) {
|
|
89
|
+
parts.push("### What to do now — Evolution Cycle\nFailing skills:\n" +
|
|
90
|
+
failing.map(([id, acc]) => `- ${id}: ${acc.toFixed(2)}`).join("\n") +
|
|
91
|
+
"\n\nFollow: diagnose -> classify -> fix -> retest -> log");
|
|
92
|
+
}
|
|
93
|
+
return parts.join("\n\n");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
onToolResult(toolName, toolInput, result) {
|
|
97
|
+
if (result.isError) return null;
|
|
98
|
+
const wasReady = this.exitCriteriaMet();
|
|
99
|
+
if (toolName === "workspace_file" || toolName === "evolution_cycle") this._scanWorkspace();
|
|
100
|
+
if (!wasReady && this.exitCriteriaMet()) {
|
|
101
|
+
return new PipelineEvent({ type: "phase_ready", message: "Skill testing complete. Ready for DISTILLATION.", nextPhase: Phase.DISTILLATION });
|
|
102
|
+
}
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
exitCriteriaMet() {
|
|
107
|
+
const total = this.skillsToTest.length;
|
|
108
|
+
if (!total) return false;
|
|
109
|
+
return Object.keys(this.skillsTested).length >= total && this.skillsPassing.length >= total * this._accuracyThreshold;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import crypto from "node:crypto";
|
|
4
|
+
import { BaseTool, ToolResult } from "./base.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Spawn a sub-agent for parallel work.
|
|
8
|
+
* Creates a child AgentEngine sharing the workspace filesystem
|
|
9
|
+
* but with independent conversation history.
|
|
10
|
+
* Results arrive via workspace files.
|
|
11
|
+
*/
|
|
12
|
+
export class AgentTool extends BaseTool {
|
|
13
|
+
constructor(workspace, engineFactory) {
|
|
14
|
+
super();
|
|
15
|
+
this._workspace = workspace;
|
|
16
|
+
this._engineFactory = engineFactory;
|
|
17
|
+
this._runningTasks = new Map();
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get name() { return "agent_tool"; }
|
|
21
|
+
get description() {
|
|
22
|
+
return (
|
|
23
|
+
"Spawn a sub-agent for an independent task. Give it a complete, " +
|
|
24
|
+
"self-contained task description. The sub-agent works in the same " +
|
|
25
|
+
"workspace and writes results to files. Use this for parallel rule " +
|
|
26
|
+
"processing, batch testing, or any work that can run independently."
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
get inputSchema() {
|
|
31
|
+
return {
|
|
32
|
+
type: "object",
|
|
33
|
+
properties: {
|
|
34
|
+
task_description: {
|
|
35
|
+
type: "string",
|
|
36
|
+
description: "Complete task description for the sub-agent. Be specific — it has no conversation context.",
|
|
37
|
+
},
|
|
38
|
+
task_id: { type: "string", description: "Optional task identifier (auto-generated if omitted)" },
|
|
39
|
+
},
|
|
40
|
+
required: ["task_description"],
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async execute(input) {
|
|
45
|
+
const taskDesc = input.task_description || "";
|
|
46
|
+
const taskId = input.task_id || `task_${crypto.randomUUID().slice(0, 8)}`;
|
|
47
|
+
|
|
48
|
+
if (!taskDesc) return new ToolResult("No task_description provided", true);
|
|
49
|
+
|
|
50
|
+
// Create sub-agent output directory
|
|
51
|
+
const taskDir = path.join(this._workspace.cwd, "sub_agents", taskId);
|
|
52
|
+
fs.mkdirSync(taskDir, { recursive: true });
|
|
53
|
+
fs.writeFileSync(path.join(taskDir, "task.md"), taskDesc, "utf-8");
|
|
54
|
+
|
|
55
|
+
// Create child engine sharing the same workspace
|
|
56
|
+
let childEngine;
|
|
57
|
+
try {
|
|
58
|
+
childEngine = this._engineFactory(this._workspace.sessionId);
|
|
59
|
+
} catch (e) {
|
|
60
|
+
return new ToolResult(`Failed to create sub-agent: ${e.message}`, true);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Run the sub-agent asynchronously (fire and forget)
|
|
64
|
+
const taskPromise = (async () => {
|
|
65
|
+
const resultEvents = [];
|
|
66
|
+
try {
|
|
67
|
+
for await (const event of childEngine.runTurn(taskDesc)) {
|
|
68
|
+
resultEvents.push({
|
|
69
|
+
type: event.type,
|
|
70
|
+
text: event.text,
|
|
71
|
+
name: event.name,
|
|
72
|
+
output: event.output,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
fs.writeFileSync(
|
|
77
|
+
path.join(taskDir, "result.json"),
|
|
78
|
+
JSON.stringify(resultEvents, null, 2),
|
|
79
|
+
"utf-8",
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
const textParts = resultEvents.filter((e) => e.type === "text_delta").map((e) => e.text || "");
|
|
83
|
+
fs.writeFileSync(path.join(taskDir, "output.md"), textParts.join(""), "utf-8");
|
|
84
|
+
fs.writeFileSync(path.join(taskDir, "status.txt"), "completed", "utf-8");
|
|
85
|
+
} catch (e) {
|
|
86
|
+
fs.writeFileSync(path.join(taskDir, "status.txt"), `failed: ${e.message}`, "utf-8");
|
|
87
|
+
}
|
|
88
|
+
})();
|
|
89
|
+
|
|
90
|
+
this._runningTasks.set(taskId, taskPromise);
|
|
91
|
+
taskPromise.catch(() => {}).finally(() => this._runningTasks.delete(taskId));
|
|
92
|
+
|
|
93
|
+
return new ToolResult(JSON.stringify({
|
|
94
|
+
task_id: taskId,
|
|
95
|
+
status: "started",
|
|
96
|
+
output_dir: `sub_agents/${taskId}/`,
|
|
97
|
+
message: `Sub-agent started. Check sub_agents/${taskId}/status.txt for completion, output.md for text.`,
|
|
98
|
+
}, null, 2));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Result of a tool execution.
|
|
3
|
+
*/
|
|
4
|
+
export class ToolResult {
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} content - Output text
|
|
7
|
+
* @param {boolean} [isError] - Whether the tool errored
|
|
8
|
+
*/
|
|
9
|
+
constructor(content, isError = false) {
|
|
10
|
+
this.content = content;
|
|
11
|
+
this.isError = isError;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Abstract base class for all KC Agent tools.
|
|
17
|
+
* Subclass this to add a new tool. Register it with ToolRegistry.
|
|
18
|
+
*/
|
|
19
|
+
export class BaseTool {
|
|
20
|
+
/** @returns {string} Tool name */
|
|
21
|
+
get name() { throw new Error("Not implemented"); }
|
|
22
|
+
|
|
23
|
+
/** @returns {string} Tool description */
|
|
24
|
+
get description() { throw new Error("Not implemented"); }
|
|
25
|
+
|
|
26
|
+
/** @returns {object} JSON Schema for tool input */
|
|
27
|
+
get inputSchema() { throw new Error("Not implemented"); }
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Execute the tool with the given input.
|
|
31
|
+
* @param {object} input
|
|
32
|
+
* @returns {Promise<ToolResult>}
|
|
33
|
+
*/
|
|
34
|
+
async execute(input) { throw new Error("Not implemented"); }
|
|
35
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { BaseTool, ToolResult } from "./base.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Generate a self-contained HTML dashboard from project metrics.
|
|
7
|
+
* Aggregates: accuracy by rule, confidence distribution, evolution timeline, QC results.
|
|
8
|
+
*/
|
|
9
|
+
export class DashboardRenderTool extends BaseTool {
|
|
10
|
+
constructor(workspace) {
|
|
11
|
+
super();
|
|
12
|
+
this._workspace = workspace;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
get name() { return "dashboard_render"; }
|
|
16
|
+
get description() {
|
|
17
|
+
return (
|
|
18
|
+
"Generate a self-contained HTML dashboard from project metrics. " +
|
|
19
|
+
"Aggregates accuracy, confidence distribution, evolution history, " +
|
|
20
|
+
"and QC results. Saves to output/dashboards/."
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
get inputSchema() {
|
|
25
|
+
return {
|
|
26
|
+
type: "object",
|
|
27
|
+
properties: {
|
|
28
|
+
output_path: { type: "string", description: "Output file path (default: output/dashboards/dashboard.html)" },
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async execute(input) {
|
|
34
|
+
const outputPath = input.output_path || "output/dashboards/dashboard.html";
|
|
35
|
+
const metrics = this._collectMetrics();
|
|
36
|
+
const html = this._renderHtml(metrics);
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const resolved = this._workspace.resolvePath(outputPath);
|
|
40
|
+
fs.mkdirSync(path.dirname(resolved), { recursive: true });
|
|
41
|
+
fs.writeFileSync(resolved, html, "utf-8");
|
|
42
|
+
return new ToolResult(`Dashboard saved to ${outputPath} (${html.length} bytes)`);
|
|
43
|
+
} catch (e) {
|
|
44
|
+
return new ToolResult(e.message, true);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
_collectMetrics() {
|
|
49
|
+
const ws = this._workspace.cwd;
|
|
50
|
+
const metrics = {
|
|
51
|
+
generated_at: new Date().toISOString(),
|
|
52
|
+
rules: [],
|
|
53
|
+
confidence_distribution: { low: 0, medium: 0, high: 0 },
|
|
54
|
+
evolution_iterations: 0,
|
|
55
|
+
qc_batches: 0,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
const catalogPath = path.join(ws, "rules", "catalog.json");
|
|
59
|
+
if (fs.existsSync(catalogPath)) {
|
|
60
|
+
try {
|
|
61
|
+
const rules = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
62
|
+
if (Array.isArray(rules)) {
|
|
63
|
+
metrics.rules = rules.map((r) => ({ id: r.id || "?", description: (r.description || "").slice(0, 60) }));
|
|
64
|
+
}
|
|
65
|
+
} catch { /* skip */ }
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const resultsDir = path.join(ws, "output", "results");
|
|
69
|
+
if (fs.existsSync(resultsDir)) {
|
|
70
|
+
for (const f of fs.readdirSync(resultsDir).filter((f) => f.endsWith(".json"))) {
|
|
71
|
+
try {
|
|
72
|
+
const data = JSON.parse(fs.readFileSync(path.join(resultsDir, f), "utf-8"));
|
|
73
|
+
const band = data.confidence_band || "medium";
|
|
74
|
+
if (band in metrics.confidence_distribution) metrics.confidence_distribution[band]++;
|
|
75
|
+
} catch { /* skip */ }
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const evoDir = path.join(ws, "logs", "evolution");
|
|
80
|
+
if (fs.existsSync(evoDir)) {
|
|
81
|
+
metrics.evolution_iterations = fs.readdirSync(evoDir).filter((f) => f.endsWith(".json")).length;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const qcDir = path.join(ws, "output", "qc");
|
|
85
|
+
if (fs.existsSync(qcDir)) {
|
|
86
|
+
metrics.qc_batches = fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).length;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return metrics;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_renderHtml(metrics) {
|
|
93
|
+
const rulesHtml = metrics.rules.map((r) => `<tr><td>${r.id}</td><td>${r.description}</td></tr>`).join("\n");
|
|
94
|
+
const conf = metrics.confidence_distribution;
|
|
95
|
+
const total = conf.low + conf.medium + conf.high;
|
|
96
|
+
|
|
97
|
+
return `<!DOCTYPE html>
|
|
98
|
+
<html lang="en">
|
|
99
|
+
<head>
|
|
100
|
+
<meta charset="UTF-8">
|
|
101
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
102
|
+
<title>KC Agent Dashboard</title>
|
|
103
|
+
<style>
|
|
104
|
+
body { font-family: -apple-system, sans-serif; max-width: 900px; margin: 0 auto; padding: 20px; background: #0a0a0a; color: #e5e5e5; }
|
|
105
|
+
h1 { color: #a3a3a3; font-size: 1.5em; }
|
|
106
|
+
h2 { color: #737373; font-size: 1.1em; margin-top: 2em; }
|
|
107
|
+
.card { background: #171717; border: 1px solid #262626; border-radius: 8px; padding: 16px; margin: 12px 0; }
|
|
108
|
+
.metric { display: inline-block; margin-right: 32px; }
|
|
109
|
+
.metric .value { font-size: 2em; font-weight: bold; color: #22c55e; }
|
|
110
|
+
.metric .label { font-size: 0.85em; color: #737373; }
|
|
111
|
+
table { width: 100%; border-collapse: collapse; }
|
|
112
|
+
th, td { text-align: left; padding: 8px; border-bottom: 1px solid #262626; }
|
|
113
|
+
th { color: #737373; font-size: 0.85em; }
|
|
114
|
+
.bar { height: 20px; border-radius: 4px; display: inline-block; }
|
|
115
|
+
.bar-low { background: #ef4444; }
|
|
116
|
+
.bar-med { background: #eab308; }
|
|
117
|
+
.bar-high { background: #22c55e; }
|
|
118
|
+
.timestamp { color: #525252; font-size: 0.8em; }
|
|
119
|
+
</style>
|
|
120
|
+
</head>
|
|
121
|
+
<body>
|
|
122
|
+
<h1>KC Agent Dashboard</h1>
|
|
123
|
+
<p class="timestamp">Generated: ${metrics.generated_at}</p>
|
|
124
|
+
<div class="card">
|
|
125
|
+
<div class="metric"><span class="value">${metrics.rules.length}</span><br><span class="label">Rules</span></div>
|
|
126
|
+
<div class="metric"><span class="value">${total}</span><br><span class="label">Results</span></div>
|
|
127
|
+
<div class="metric"><span class="value">${metrics.evolution_iterations}</span><br><span class="label">Evolution Cycles</span></div>
|
|
128
|
+
<div class="metric"><span class="value">${metrics.qc_batches}</span><br><span class="label">QC Batches</span></div>
|
|
129
|
+
</div>
|
|
130
|
+
<h2>Confidence Distribution</h2>
|
|
131
|
+
<div class="card">
|
|
132
|
+
<div>Low: ${conf.low} <span class="bar bar-low" style="width:${conf.low * 5}px"></span></div>
|
|
133
|
+
<div>Medium: ${conf.medium} <span class="bar bar-med" style="width:${conf.medium * 5}px"></span></div>
|
|
134
|
+
<div>High: ${conf.high} <span class="bar bar-high" style="width:${conf.high * 5}px"></span></div>
|
|
135
|
+
</div>
|
|
136
|
+
<h2>Rules</h2>
|
|
137
|
+
<div class="card">
|
|
138
|
+
<table>
|
|
139
|
+
<tr><th>ID</th><th>Description</th></tr>
|
|
140
|
+
${rulesHtml || '<tr><td colspan="2">No rules in catalog yet</td></tr>'}
|
|
141
|
+
</table>
|
|
142
|
+
</div>
|
|
143
|
+
</body>
|
|
144
|
+
</html>`;
|
|
145
|
+
}
|
|
146
|
+
}
|