kc-beta 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +81 -0
- package/LICENSE-COMMERCIAL.md +125 -0
- package/README.md +21 -3
- package/package.json +14 -5
- package/src/agent/context-window.js +9 -12
- package/src/agent/context.js +14 -1
- package/src/agent/document-parser.js +169 -0
- package/src/agent/engine.js +367 -18
- package/src/agent/history/event-history.js +222 -0
- package/src/agent/llm-client.js +55 -0
- package/src/agent/message-utils.js +63 -0
- package/src/agent/pipelines/_milestone-derive.js +511 -0
- package/src/agent/pipelines/base.js +21 -0
- package/src/agent/pipelines/distillation.js +28 -15
- package/src/agent/pipelines/extraction.js +103 -36
- package/src/agent/pipelines/finalization.js +178 -11
- package/src/agent/pipelines/index.js +6 -1
- package/src/agent/pipelines/initializer.js +74 -8
- package/src/agent/pipelines/production-qc.js +31 -44
- package/src/agent/pipelines/skill-authoring.js +97 -80
- package/src/agent/pipelines/skill-testing.js +67 -23
- package/src/agent/retry.js +10 -2
- package/src/agent/scheduler.js +14 -2
- package/src/agent/session-state.js +18 -1
- package/src/agent/skill-loader.js +13 -7
- package/src/agent/skill-validator.js +19 -5
- package/src/agent/task-manager.js +61 -5
- package/src/agent/tools/document-chunk.js +21 -9
- package/src/agent/tools/phase-advance.js +18 -3
- package/src/agent/tools/release.js +51 -9
- package/src/agent/tools/rule-catalog.js +11 -1
- package/src/agent/tools/workspace-file.js +32 -0
- package/src/agent/workspace.js +39 -1
- package/src/cli/components.js +64 -14
- package/src/cli/index.js +62 -3
- package/src/cli/meme.js +26 -25
- package/src/config.js +65 -22
- package/src/model-tiers.json +24 -8
- package/src/providers.js +42 -0
- package/template/release/v1/README.md.tmpl +108 -0
- package/template/release/v1/catalog.json.tmpl +4 -0
- package/template/release/v1/kc_runtime/__init__.py +11 -0
- package/template/release/v1/kc_runtime/confidence.py +63 -0
- package/template/release/v1/kc_runtime/doc_parser.py +127 -0
- package/template/release/v1/manifest.json.tmpl +11 -0
- package/template/release/v1/render_dashboard.py +117 -0
- package/template/release/v1/run.py +212 -0
- package/template/release/v1/serve.sh +17 -0
- package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
- package/template/skills/en/skill-creator/SKILL.md +1 -1
- package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
- package/template/skills/zh/skill-creator/SKILL.md +1 -1
|
@@ -3,6 +3,7 @@ import path from "node:path";
|
|
|
3
3
|
import { Phase, PipelineEvent } from "./index.js";
|
|
4
4
|
import { Pipeline } from "./base.js";
|
|
5
5
|
import { SkillValidator } from "../skill-validator.js";
|
|
6
|
+
import { deriveSkillAuthoringMilestones } from "./_milestone-derive.js";
|
|
6
7
|
|
|
7
8
|
export class SkillAuthoringPipeline extends Pipeline {
|
|
8
9
|
/**
|
|
@@ -49,83 +50,22 @@ export class SkillAuthoringPipeline extends Pipeline {
|
|
|
49
50
|
}
|
|
50
51
|
|
|
51
52
|
_scanSkills() {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
//
|
|
55
|
-
//
|
|
56
|
-
// (
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
this.ruleIdsCovered = new Set();
|
|
62
|
-
const dir = path.join(this._workspace.cwd, "rule_skills");
|
|
63
|
-
if (!fs.existsSync(dir)) return;
|
|
64
|
-
for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
65
|
-
if (!e.isDirectory() || e.name.startsWith("__")) continue;
|
|
66
|
-
const skillPath = path.join(dir, e.name);
|
|
67
|
-
if (fs.existsSync(path.join(skillPath, "SKILL.md")) || fs.readdirSync(skillPath).some((f) => f.endsWith(".py"))) {
|
|
68
|
-
this.skillsAuthored.push(e.name);
|
|
69
|
-
}
|
|
70
|
-
const scriptsDir = path.join(skillPath, "scripts");
|
|
71
|
-
if (fs.existsSync(scriptsDir) && fs.readdirSync(scriptsDir).length > 0) {
|
|
72
|
-
this.skillsWithScripts.push(e.name);
|
|
73
|
-
}
|
|
74
|
-
this._walkForRuleIds(skillPath);
|
|
75
|
-
}
|
|
53
|
+
// v0.7.0 A1: route through filesystem-derived milestone helper. The
|
|
54
|
+
// helper centralizes the ruleId extraction patterns (R### dirs,
|
|
55
|
+
// check_r###.py, range dirs R078_R128, grouped check_r###_r###.py)
|
|
56
|
+
// and recognizes both root-level check_*.py AND scripts/check*.py
|
|
57
|
+
// (per A6 — XM E2E #5 used scripts/ subdir).
|
|
58
|
+
const m = deriveSkillAuthoringMilestones(this._workspace);
|
|
59
|
+
this.skillsAuthored = [...m.skillsAuthored];
|
|
60
|
+
this.skillsWithScripts = [...m.skillsWithScripts];
|
|
61
|
+
this.ruleIdsCovered = new Set(m.ruleIdsCovered);
|
|
76
62
|
}
|
|
77
63
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
* - Grouped script: check_r002_r007.py → covers R002 through R007
|
|
84
|
-
*/
|
|
85
|
-
_walkForRuleIds(skillDir) {
|
|
86
|
-
const dirName = path.basename(skillDir);
|
|
87
|
-
const dirMatch = dirName.match(/^R0*(\d+)$/i);
|
|
88
|
-
if (dirMatch) this.ruleIdsCovered.add(`R${String(parseInt(dirMatch[1], 10)).padStart(3, "0")}`);
|
|
89
|
-
|
|
90
|
-
const walk = (d) => {
|
|
91
|
-
let entries;
|
|
92
|
-
try { entries = fs.readdirSync(d, { withFileTypes: true }); }
|
|
93
|
-
catch { return; }
|
|
94
|
-
for (const e of entries) {
|
|
95
|
-
if (e.name.startsWith(".")) continue;
|
|
96
|
-
const p = path.join(d, e.name);
|
|
97
|
-
if (e.isDirectory()) { walk(p); continue; }
|
|
98
|
-
// Per-rule: check_r014.py
|
|
99
|
-
const single = e.name.match(/check_r0*(\d+)\.py$/i);
|
|
100
|
-
if (single) {
|
|
101
|
-
this.ruleIdsCovered.add(`R${String(parseInt(single[1], 10)).padStart(3, "0")}`);
|
|
102
|
-
continue;
|
|
103
|
-
}
|
|
104
|
-
// Grouped: check_r002_r007.py, check_r002-r007.py, check_r59_r77.py
|
|
105
|
-
const grouped = e.name.match(/check_r0*(\d+)[_-]+r0*(\d+)\.py$/i);
|
|
106
|
-
if (grouped) {
|
|
107
|
-
const lo = parseInt(grouped[1], 10);
|
|
108
|
-
const hi = parseInt(grouped[2], 10);
|
|
109
|
-
for (let n = lo; n <= hi; n++) {
|
|
110
|
-
this.ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
|
|
111
|
-
}
|
|
112
|
-
continue;
|
|
113
|
-
}
|
|
114
|
-
// Directory names that encode ranges: R078_R128/
|
|
115
|
-
// handled by caller passing skillDir
|
|
116
|
-
}
|
|
117
|
-
};
|
|
118
|
-
// Also handle dirs named like R078_R128/
|
|
119
|
-
const rangeDir = dirName.match(/^R0*(\d+)[_-]R0*(\d+)$/i);
|
|
120
|
-
if (rangeDir) {
|
|
121
|
-
const lo = parseInt(rangeDir[1], 10);
|
|
122
|
-
const hi = parseInt(rangeDir[2], 10);
|
|
123
|
-
for (let n = lo; n <= hi; n++) {
|
|
124
|
-
this.ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
walk(skillDir);
|
|
128
|
-
}
|
|
64
|
+
// v0.7.0 A1: ruleId extraction moved to _milestone-derive.js
|
|
65
|
+
// (deriveSkillAuthoringMilestones). Pattern recognition is identical
|
|
66
|
+
// — single rule (R014, check_r014.py), grouped scripts
|
|
67
|
+
// (check_r002_r007.py), range dirs (R078_R128). Kept as a single
|
|
68
|
+
// canonical implementation rather than duplicating across pipelines.
|
|
129
69
|
|
|
130
70
|
describeState() {
|
|
131
71
|
this._scanWorkspace();
|
|
@@ -136,15 +76,37 @@ export class SkillAuthoringPipeline extends Pipeline {
|
|
|
136
76
|
"## Phase: SKILL_AUTHORING\n" +
|
|
137
77
|
"Write verification skills for each extracted rule. Skills are first-class " +
|
|
138
78
|
"deliverables — they may serve as the production solution when worker LLM " +
|
|
139
|
-
"workflows are insufficient. Follow
|
|
140
|
-
"BUILD mode.\n\n" +
|
|
79
|
+
"workflows are insufficient. Follow the canonical skill-folder layout " +
|
|
80
|
+
"(below). This is BUILD mode.\n\n" +
|
|
81
|
+
// v0.7.0 D1: inline the canonical folder structure spec so the
|
|
82
|
+
// agent sees it in every system prompt of this phase. E2E #5
|
|
83
|
+
// showed three of four contestants ignored the meta-meta spec
|
|
84
|
+
// because it required navigating to read the SKILL.md file
|
|
85
|
+
// separately. Inlining costs ~250 tokens and dramatically improves
|
|
86
|
+
// first-attempt structural compliance.
|
|
87
|
+
"### Canonical skill folder layout\n" +
|
|
88
|
+
"```\n" +
|
|
89
|
+
"rule_skills/\n" +
|
|
90
|
+
" R014/ # one dir per rule (or grouped range)\n" +
|
|
91
|
+
" SKILL.md # YAML frontmatter (name+description) + methodology\n" +
|
|
92
|
+
" check_r014.py # entry point: def check_rule|verify|check|evaluate(...)\n" +
|
|
93
|
+
" references/regulation.md # verbatim regulation text (optional)\n" +
|
|
94
|
+
" references/interpretation.md # edge-case notes (optional)\n" +
|
|
95
|
+
" assets/test_cases.json # annotated samples + expected verdicts (optional)\n" +
|
|
96
|
+
"```\n" +
|
|
97
|
+
"Validator-accepted alternatives: `scripts/check_r###.py` (under scripts/) " +
|
|
98
|
+
"instead of root-level. SKILL.md filename is case-insensitive (skill.md " +
|
|
99
|
+
"is also accepted). The check.py just needs a top-level `def` at module " +
|
|
100
|
+
"level — entry-point name does not have to match a strict pattern.\n\n" +
|
|
141
101
|
// D2: soft granularity nudge
|
|
142
102
|
"**Granularity preference:** 1 rule = 1 skill directory. Group rules into " +
|
|
143
103
|
"the same file ONLY when they share evidence and fail together (e.g. " +
|
|
144
104
|
"siblings from the same required-fields table). When grouping, name the " +
|
|
145
105
|
"file with the range: `check_r002_r007.py`. Downstream consumers " +
|
|
146
|
-
"(workflow-run, dashboards) count rule coverage by parsing
|
|
147
|
-
"so the file-naming matters
|
|
106
|
+
"(workflow-run, dashboards, release tool) count rule coverage by parsing " +
|
|
107
|
+
"these names, so the file-naming matters. (Read `meta-meta/work-decomposition` " +
|
|
108
|
+
"for the full grouping/ordering decision framework + PATTERNS.md memory " +
|
|
109
|
+
"discipline.)\n\n" +
|
|
148
110
|
"**Do not write to rules/catalog.json via sandbox_exec.** Use the " +
|
|
149
111
|
"`rule_catalog` tool for any catalog edits — sandbox_exec bypasses the " +
|
|
150
112
|
"workspace file lock and races with parallel workers."
|
|
@@ -194,7 +156,38 @@ export class SkillAuthoringPipeline extends Pipeline {
|
|
|
194
156
|
onToolResult(toolName, toolInput, result) {
|
|
195
157
|
if (result.isError) return null;
|
|
196
158
|
const wasReady = this.exitCriteriaMet();
|
|
197
|
-
|
|
159
|
+
const writeToSkill = toolName === "workspace_file" &&
|
|
160
|
+
toolInput?.operation === "write" &&
|
|
161
|
+
(toolInput.path || "").includes("rule_skills/");
|
|
162
|
+
if (writeToSkill) {
|
|
163
|
+
this._scanSkills();
|
|
164
|
+
// v0.7.0 A4: validate this specific file immediately if it looks
|
|
165
|
+
// like a check.py. Surfaces syntax/entry-point issues in the next
|
|
166
|
+
// describeState rather than waiting for the phase boundary —
|
|
167
|
+
// E2E #5 had skill_authoring force-bypassed before exitCriteriaMet
|
|
168
|
+
// ever fired, so the v0.6.2 boundary-only validator never ran in
|
|
169
|
+
// practice.
|
|
170
|
+
const p = toolInput.path || "";
|
|
171
|
+
if (/\/check[_a-zA-Z0-9-]*\.py$/i.test(p) && /^rule_skills\//.test(p)) {
|
|
172
|
+
const abs = path.join(this._workspace.cwd, p);
|
|
173
|
+
// Invalidate any stale mtime cache entry for this path then
|
|
174
|
+
// re-validate. Folds the result into _validationFailures so
|
|
175
|
+
// describeState picks it up.
|
|
176
|
+
this._validator.invalidate(abs);
|
|
177
|
+
const r = this._validator.validateFile(abs);
|
|
178
|
+
if (!r.ok) {
|
|
179
|
+
// Replace any prior failure record for this path
|
|
180
|
+
this._validationFailures = this._validationFailures.filter(
|
|
181
|
+
(f) => f.filePath !== abs,
|
|
182
|
+
);
|
|
183
|
+
this._validationFailures.push({ filePath: abs, error: r.error || "unknown" });
|
|
184
|
+
} else {
|
|
185
|
+
this._validationFailures = this._validationFailures.filter(
|
|
186
|
+
(f) => f.filePath !== abs,
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
198
191
|
if (!wasReady && this.exitCriteriaMet()) {
|
|
199
192
|
return new PipelineEvent({ type: "phase_ready", message: "Skill authoring complete. Ready for SKILL_TESTING.", nextPhase: Phase.SKILL_TESTING });
|
|
200
193
|
}
|
|
@@ -242,6 +235,30 @@ export class SkillAuthoringPipeline extends Pipeline {
|
|
|
242
235
|
* v0.6.2 I2: gather every check_r###.py path under rule_skills/. Used by
|
|
243
236
|
* the skill validator. Walks one level into each skill directory.
|
|
244
237
|
*/
|
|
238
|
+
/**
|
|
239
|
+
* v0.6.3 (#74): SKILL_AUTHORING writes per-rule check scripts under
|
|
240
|
+
* rule_skills/. Workflow runs against production samples or distillation
|
|
241
|
+
* outputs are later-phase work.
|
|
242
|
+
*/
|
|
243
|
+
phaseMisfitHint(toolName, toolInput, result) {
|
|
244
|
+
if (result?.isError) return null;
|
|
245
|
+
const exitText = this.exitCriteriaMet()
|
|
246
|
+
? "Skill-authoring exit criteria are MET — call phase_advance(to=\"skill_testing\") to proceed."
|
|
247
|
+
: "Skill-authoring not yet complete (see describeState).";
|
|
248
|
+
|
|
249
|
+
if (toolName === "workspace_file" && toolInput?.operation === "write") {
|
|
250
|
+
const p = toolInput.path || "";
|
|
251
|
+
if (p.startsWith("workflows/")) {
|
|
252
|
+
return `Writing under workflows/ is DISTILLATION-phase work, but engine is in SKILL_AUTHORING. ${exitText}`;
|
|
253
|
+
}
|
|
254
|
+
if (p.startsWith("output/results/")) {
|
|
255
|
+
return `Writing under output/results/ is PRODUCTION_QC-phase work, but engine is in SKILL_AUTHORING. ${exitText}`;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return null;
|
|
260
|
+
}
|
|
261
|
+
|
|
245
262
|
_collectCheckScripts() {
|
|
246
263
|
const out = [];
|
|
247
264
|
const dir = path.join(this._workspace.cwd, "rule_skills");
|
|
@@ -2,6 +2,7 @@ import fs from "node:fs";
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { Phase, PipelineEvent } from "./index.js";
|
|
4
4
|
import { Pipeline } from "./base.js";
|
|
5
|
+
import { deriveSkillAuthoringMilestones, deriveSkillTestingMilestones } from "./_milestone-derive.js";
|
|
5
6
|
|
|
6
7
|
export class SkillTestingPipeline extends Pipeline {
|
|
7
8
|
constructor(workspace) {
|
|
@@ -33,35 +34,48 @@ export class SkillTestingPipeline extends Pipeline {
|
|
|
33
34
|
}
|
|
34
35
|
|
|
35
36
|
_loadSkills() {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if (e.isDirectory() && !e.name.startsWith("__")) {
|
|
41
|
-
const p = path.join(dir, e.name);
|
|
42
|
-
if (fs.existsSync(path.join(p, "SKILL.md")) || fs.readdirSync(p).some((f) => f.endsWith(".py"))) {
|
|
43
|
-
this.skillsToTest.push(e.name);
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
}
|
|
37
|
+
// v0.7.0 A1: route through filesystem-derived helper (skill_authoring's
|
|
38
|
+
// skillsAuthored is the canonical "what skills exist" view).
|
|
39
|
+
const m = deriveSkillAuthoringMilestones(this._workspace);
|
|
40
|
+
this.skillsToTest = [...m.skillsAuthored];
|
|
47
41
|
}
|
|
48
42
|
|
|
49
43
|
_loadTestResults() {
|
|
50
44
|
this.skillsTested = {};
|
|
51
45
|
this.skillsPassing = [];
|
|
46
|
+
|
|
47
|
+
// Layer 1 (canonical schema): output/<rule_id>.json with `accuracy` field.
|
|
48
|
+
// Carries the actual numeric threshold check.
|
|
52
49
|
const outDir = path.join(this._workspace.cwd, "output");
|
|
53
|
-
if (
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
50
|
+
if (fs.existsSync(outDir)) {
|
|
51
|
+
for (const f of fs.readdirSync(outDir).filter((f) => f.endsWith(".json"))) {
|
|
52
|
+
try {
|
|
53
|
+
const data = JSON.parse(fs.readFileSync(path.join(outDir, f), "utf-8"));
|
|
54
|
+
if (data.accuracy != null) {
|
|
55
|
+
const ruleId = data.rule_id || path.parse(f).name;
|
|
56
|
+
const acc = parseFloat(data.accuracy);
|
|
57
|
+
this.skillsTested[ruleId] = Math.max(this.skillsTested[ruleId] || 0, acc);
|
|
58
|
+
}
|
|
59
|
+
} catch { /* skip */ }
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Layer 2 (helper-derived floor): per-skill test_results/, tests/, or
|
|
64
|
+
// assets/test_cases.json count as "tested" even without an accuracy
|
|
65
|
+
// reading. Without this floor, agents who tested via sandbox_exec
|
|
66
|
+
// (no accuracy JSON written) showed skillsTested={} despite real
|
|
67
|
+
// testing — exactly the E2E #5 GLM case.
|
|
68
|
+
const m = deriveSkillTestingMilestones(this._workspace);
|
|
69
|
+
for (const id of m.skillsTested) {
|
|
70
|
+
// Test artifact present but no numeric accuracy → record as tested
|
|
71
|
+
// at threshold value (just-passing). The agent can revise via
|
|
72
|
+
// canonical-schema JSON if needed.
|
|
73
|
+
if (!(id in this.skillsTested)) this.skillsTested[id] = this._accuracyThreshold;
|
|
63
74
|
}
|
|
64
|
-
|
|
75
|
+
|
|
76
|
+
this.skillsPassing = Object.entries(this.skillsTested)
|
|
77
|
+
.filter(([, acc]) => acc >= this._accuracyThreshold)
|
|
78
|
+
.map(([id]) => id);
|
|
65
79
|
}
|
|
66
80
|
|
|
67
81
|
_loadEvolutionLog() {
|
|
@@ -104,7 +118,37 @@ export class SkillTestingPipeline extends Pipeline {
|
|
|
104
118
|
exitCriteriaMet() {
|
|
105
119
|
const total = this.skillsToTest.length;
|
|
106
120
|
if (!total) return false;
|
|
107
|
-
|
|
121
|
+
// v0.7.0 H/C2 fix: previous gate `skillsPassing.length >= total * threshold`
|
|
122
|
+
// was multiplying *count* by accuracy threshold (default 0.9), so 9/10
|
|
123
|
+
// failing skills could still pass the gate. The intent is "every
|
|
124
|
+
// skill passes its per-skill threshold" — count parity, not weighted.
|
|
125
|
+
// (Fraction-of-skills fallbacks belong in optional config, not the
|
|
126
|
+
// default exit criterion.)
|
|
127
|
+
return Object.keys(this.skillsTested).length >= total &&
|
|
128
|
+
this.skillsPassing.length >= total;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* v0.6.3 (#74): SKILL_TESTING runs check scripts against test samples and
|
|
133
|
+
* measures accuracy. Writing distillation outputs or production results
|
|
134
|
+
* here means phase boundaries got skipped.
|
|
135
|
+
*/
|
|
136
|
+
phaseMisfitHint(toolName, toolInput, result) {
|
|
137
|
+
if (result?.isError) return null;
|
|
138
|
+
const exitText = this.exitCriteriaMet()
|
|
139
|
+
? "Skill-testing exit criteria are MET — call phase_advance(to=\"distillation\")."
|
|
140
|
+
: "Skill-testing not yet complete.";
|
|
141
|
+
|
|
142
|
+
if (toolName === "workspace_file" && toolInput?.operation === "write") {
|
|
143
|
+
const p = toolInput.path || "";
|
|
144
|
+
if (p.startsWith("workflows/")) {
|
|
145
|
+
return `Writing under workflows/ is DISTILLATION-phase work, but engine is in SKILL_TESTING. ${exitText}`;
|
|
146
|
+
}
|
|
147
|
+
if (p.startsWith("output/results/")) {
|
|
148
|
+
return `Writing under output/results/ is PRODUCTION_QC-phase work, but engine is in SKILL_TESTING. ${exitText}`;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
108
152
|
}
|
|
109
153
|
|
|
110
154
|
exportState() {
|
package/src/agent/retry.js
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Retry wrapper with exponential backoff and jitter.
|
|
3
3
|
* Designed for LLM API calls — retries transient errors, fails fast on auth/validation errors.
|
|
4
|
+
*
|
|
5
|
+
* v0.6.3.1: KC_MAX_RETRIES env override. Default 10 attempts ≈ 5 min of
|
|
6
|
+
* exponential backoff (1+2+4+8+16+32+60+60+60+60s). E2E #5 surfaced a
|
|
7
|
+
* Tencent outage that lasted longer than the default; setting
|
|
8
|
+
* KC_MAX_RETRIES=20 buys ~15 more min before the engine gives up.
|
|
4
9
|
*/
|
|
5
|
-
|
|
6
|
-
const
|
|
10
|
+
const MAX_RETRIES = (() => {
|
|
11
|
+
const raw = parseInt(process.env.KC_MAX_RETRIES || "", 10);
|
|
12
|
+
if (Number.isFinite(raw) && raw >= 0 && raw <= 50) return raw;
|
|
13
|
+
return 10;
|
|
14
|
+
})();
|
|
7
15
|
const INITIAL_DELAY_MS = 1000;
|
|
8
16
|
const MAX_DELAY_MS = 60000;
|
|
9
17
|
const BACKOFF_MULTIPLIER = 2;
|
package/src/agent/scheduler.js
CHANGED
|
@@ -222,14 +222,26 @@ export class Scheduler {
|
|
|
222
222
|
}
|
|
223
223
|
|
|
224
224
|
/**
|
|
225
|
-
* Count of files directly under input/ (excluding subdirs like archived/
|
|
225
|
+
* Count of files directly under input/ (excluding subdirs like archived/
|
|
226
|
+
* and v0.7.0 F3 agent-scratch marker .kc-scratch/).
|
|
227
|
+
*
|
|
228
|
+
* Background: E2E #5 DS surfaced "📥 4 new file(s) pending in input/"
|
|
229
|
+
* when the agent's sandbox_exec had dropped 4 test fixtures into
|
|
230
|
+
* input/ during smoke-testing. The user assumed external arrivals.
|
|
231
|
+
* The scheduler never had a way to disambiguate.
|
|
232
|
+
*
|
|
233
|
+
* v0.7.0 F3: agent-side scratch writes go under input/.kc-scratch/
|
|
234
|
+
* (a sidecar dir, hidden by the standard "starts with ." filter).
|
|
235
|
+
* The banner counts only top-level non-hidden files, which is what
|
|
236
|
+
* external arrivals actually look like (schedule_fetch drops files
|
|
237
|
+
* directly into input/ root).
|
|
226
238
|
*/
|
|
227
239
|
pendingInputCount() {
|
|
228
240
|
const dir = path.join(this._workspace.cwd, "input");
|
|
229
241
|
if (!fs.existsSync(dir)) return 0;
|
|
230
242
|
try {
|
|
231
243
|
return fs.readdirSync(dir, { withFileTypes: true })
|
|
232
|
-
.filter((e) => e.isFile())
|
|
244
|
+
.filter((e) => e.isFile() && !e.name.startsWith("."))
|
|
233
245
|
.length;
|
|
234
246
|
} catch {
|
|
235
247
|
return 0;
|
|
@@ -70,7 +70,24 @@ export class SessionState {
|
|
|
70
70
|
* @returns {object} The persisted state
|
|
71
71
|
*/
|
|
72
72
|
load() {
|
|
73
|
-
|
|
73
|
+
const raw = this._loadRaw() || {};
|
|
74
|
+
// v0.6.3: phase value renamed "extraction" → "rule_extraction" to
|
|
75
|
+
// disambiguate from data/entity extraction inside skills. Migrate old
|
|
76
|
+
// session-state on read so resumed workspaces don't end up in a phase
|
|
77
|
+
// the engine doesn't recognize. Idempotent — already-renamed values
|
|
78
|
+
// pass through unchanged.
|
|
79
|
+
if (raw.currentPhase === "extraction") raw.currentPhase = "rule_extraction";
|
|
80
|
+
if (raw.pipelineMilestones?.extraction && !raw.pipelineMilestones.rule_extraction) {
|
|
81
|
+
raw.pipelineMilestones.rule_extraction = raw.pipelineMilestones.extraction;
|
|
82
|
+
delete raw.pipelineMilestones.extraction;
|
|
83
|
+
}
|
|
84
|
+
if (Array.isArray(raw.phaseSummaries)) {
|
|
85
|
+
for (const s of raw.phaseSummaries) {
|
|
86
|
+
if (s?.fromPhase === "extraction") s.fromPhase = "rule_extraction";
|
|
87
|
+
if (s?.toPhase === "extraction") s.toPhase = "rule_extraction";
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return raw;
|
|
74
91
|
}
|
|
75
92
|
|
|
76
93
|
/**
|
|
@@ -17,22 +17,28 @@ const BUNDLED_SKILLS_DIR = path.resolve(__dirname, "../../template/skills");
|
|
|
17
17
|
// to default to always-visible.
|
|
18
18
|
const PHASE_RELEVANT_SKILLS = {
|
|
19
19
|
"bootstrap-workspace": ["bootstrap"],
|
|
20
|
-
"rule-extraction": ["bootstrap", "
|
|
21
|
-
"rule-graph": ["
|
|
22
|
-
"task-decomposition": ["
|
|
20
|
+
"rule-extraction": ["bootstrap", "rule_extraction"],
|
|
21
|
+
"rule-graph": ["rule_extraction", "skill_authoring"],
|
|
22
|
+
"task-decomposition": ["rule_extraction", "skill_authoring", "distillation"],
|
|
23
|
+
// v0.7.0 B1: work-decomposition teaches the system-level decomposition
|
|
24
|
+
// discipline (ordering, grouping, difficulty triage, PATTERNS.md memory).
|
|
25
|
+
// Distinct from task-decomposition (per-rule sub-tasks). Loaded on
|
|
26
|
+
// rule_extraction → skill_authoring transition where the agent owns
|
|
27
|
+
// the TaskBoard.
|
|
28
|
+
"work-decomposition": ["rule_extraction", "skill_authoring"],
|
|
23
29
|
"skill-authoring": ["skill_authoring", "skill_testing"],
|
|
24
30
|
"skill-to-workflow": ["distillation"],
|
|
25
31
|
"evolution-loop": ["skill_testing", "distillation", "production_qc"],
|
|
26
|
-
"version-control": ["bootstrap", "
|
|
32
|
+
"version-control": ["bootstrap", "rule_extraction", "skill_authoring", "skill_testing", "distillation", "production_qc", "finalization"],
|
|
27
33
|
"quality-control": ["production_qc", "finalization"],
|
|
28
34
|
"confidence-system": ["distillation", "production_qc"],
|
|
29
35
|
"dashboard-reporting": ["production_qc", "finalization"],
|
|
30
36
|
"cross-document-verification": ["production_qc"],
|
|
31
37
|
"corner-case-management": ["skill_testing", "distillation", "production_qc"],
|
|
32
|
-
"data-sensibility": ["
|
|
38
|
+
"data-sensibility": ["rule_extraction", "skill_authoring"],
|
|
33
39
|
"entity-extraction": ["skill_authoring", "distillation"],
|
|
34
|
-
"document-parsing": ["bootstrap", "
|
|
35
|
-
"document-chunking": ["bootstrap", "
|
|
40
|
+
"document-parsing": ["bootstrap", "rule_extraction", "skill_authoring"],
|
|
41
|
+
"document-chunking": ["bootstrap", "rule_extraction"],
|
|
36
42
|
"tree-processing": ["skill_authoring", "skill_testing"],
|
|
37
43
|
"compliance-judgment": ["skill_authoring", "skill_testing", "production_qc"],
|
|
38
44
|
"skill-creator": ["skill_authoring"],
|
|
@@ -16,12 +16,15 @@
|
|
|
16
16
|
* still bypasses. The validator's job is to refuse the auto-advance,
|
|
17
17
|
* not to trap the agent.
|
|
18
18
|
*
|
|
19
|
-
* Validation rules per `
|
|
19
|
+
* Validation rules per `check_*.py`:
|
|
20
20
|
* 1. File ≥ 100 bytes (smoke test for empty stubs).
|
|
21
21
|
* 2. Passes `python3 -c "import ast; ast.parse(open(F).read())"` (no
|
|
22
22
|
* syntax errors).
|
|
23
|
-
* 3. Defines a function reachable by
|
|
24
|
-
*
|
|
23
|
+
* 3. Defines a function reachable by one of the names: `check_rule`,
|
|
24
|
+
* `verify`, OR `check_r<digits>` (e.g. `check_r014`, `check_r013_r017`).
|
|
25
|
+
* v0.7.0 A6 broadened the third pattern after E2E #5 audit found
|
|
26
|
+
* three sessions independently chose `def check_r###` over the
|
|
27
|
+
* canonical names — the validator was too strict.
|
|
25
28
|
*
|
|
26
29
|
* Disable mechanism: if `python3` is not on PATH, validator silently
|
|
27
30
|
* passes everything and emits a one-time warning — we don't want the
|
|
@@ -32,7 +35,18 @@ import { execFileSync } from "node:child_process";
|
|
|
32
35
|
import fs from "node:fs";
|
|
33
36
|
import path from "node:path";
|
|
34
37
|
|
|
35
|
-
|
|
38
|
+
// v0.7.0 A6: entry-point check is a sanity probe, not a style enforcer.
|
|
39
|
+
// The validator's real signal comes from `≥ 100 bytes` + `ast.parse
|
|
40
|
+
// passes`. Restricting to specific verb names rejected 27/28 GLM
|
|
41
|
+
// scripts in E2E #5 — the cost outweighed the catch (every contestant
|
|
42
|
+
// converged on a different naming convention).
|
|
43
|
+
//
|
|
44
|
+
// New rule: any top-level `def \w+(...)` counts. Rejects pure-imports
|
|
45
|
+
// or comment-only stubs (which is what we actually wanted to catch),
|
|
46
|
+
// accepts anything with real logic. The check_*.py *filename* (matched
|
|
47
|
+
// by the path regex in `findCheckScripts`) carries the rule-id signal;
|
|
48
|
+
// the function name doesn't need to.
|
|
49
|
+
const ENTRY_POINT_REGEX = /^(?:async\s+)?def\s+\w+\s*\(/m;
|
|
36
50
|
const MIN_BYTES = 100;
|
|
37
51
|
|
|
38
52
|
export class SkillValidator {
|
|
@@ -141,7 +155,7 @@ export class SkillValidator {
|
|
|
141
155
|
try { content = fs.readFileSync(filePath, "utf-8"); }
|
|
142
156
|
catch { return { ok: false, error: "read failed after parse OK" }; }
|
|
143
157
|
if (!ENTRY_POINT_REGEX.test(content)) {
|
|
144
|
-
return { ok: false, error: "no
|
|
158
|
+
return { ok: false, error: "no callable defined: file has imports/comments only, no top-level `def`" };
|
|
145
159
|
}
|
|
146
160
|
|
|
147
161
|
return { ok: true };
|
|
@@ -139,12 +139,23 @@ export class TaskManager {
|
|
|
139
139
|
// --- Bulk creation from rule catalog ---
|
|
140
140
|
|
|
141
141
|
/**
|
|
142
|
-
* Phases where one-task-per-rule
|
|
143
|
-
*
|
|
144
|
-
*
|
|
145
|
-
*
|
|
142
|
+
* Phases where the engine auto-creates one-task-per-rule on phase entry.
|
|
143
|
+
*
|
|
144
|
+
* v0.7.0 B2: empty by default. Agent owns TaskBoard decisions per the
|
|
145
|
+
* work-decomposition meta-meta skill — engine no longer assumes per-rule
|
|
146
|
+
* granularity is right. The agent reads the rule list from describeState
|
|
147
|
+
* and calls TaskCreate with whatever shape (single, grouped, range,
|
|
148
|
+
* non-rule) makes sense for the corpus.
|
|
149
|
+
*
|
|
150
|
+
* Override `KC_AGENT_OWNS_TASKBOARD=0` to restore v0.6.x behavior
|
|
151
|
+
* (engine auto-populates per-rule for skill_authoring + skill_testing).
|
|
152
|
+
* The override is a staged-rollout safety valve, not a long-lived
|
|
153
|
+
* config — slated for removal in v0.8.0 after E2E #6 validates the
|
|
154
|
+
* agent-owned default.
|
|
146
155
|
*/
|
|
147
|
-
static PER_RULE_PHASES =
|
|
156
|
+
static PER_RULE_PHASES = (process.env.KC_AGENT_OWNS_TASKBOARD === "0")
|
|
157
|
+
? new Set(["skill_authoring", "skill_testing"])
|
|
158
|
+
: new Set();
|
|
148
159
|
|
|
149
160
|
/**
|
|
150
161
|
* Create one task per rule for a given phase — but only if the phase's unit
|
|
@@ -197,6 +208,51 @@ export class TaskManager {
|
|
|
197
208
|
).length;
|
|
198
209
|
}
|
|
199
210
|
|
|
211
|
+
/**
|
|
212
|
+
* v0.7.0 A5: Reconcile per-rule tasks against disk artifacts.
|
|
213
|
+
*
|
|
214
|
+
* Background: E2E #5 DS audit found tasks.json showing 70/70 completed
|
|
215
|
+
* while only ~56 dirs / 36 with check_*.py existed on disk. The agent
|
|
216
|
+
* called markDone() optimistically but the artifacts didn't materialize
|
|
217
|
+
* (or were deleted later). The engine's phase gate trusted the count.
|
|
218
|
+
*
|
|
219
|
+
* Reconcile walks every "completed" task in PER_RULE_PHASES and checks
|
|
220
|
+
* whether the expected disk artifacts exist via a caller-supplied
|
|
221
|
+
* `expectsFn(task) -> boolean` predicate. Tasks whose artifacts are
|
|
222
|
+
* missing are flipped back to `pending` with a `reconcile_failed`
|
|
223
|
+
* note so the agent can re-do the work, and the gate can refuse
|
|
224
|
+
* advance if the per-rule artifact set is incomplete.
|
|
225
|
+
*
|
|
226
|
+
* Called from engine `_advancePhase` before `exitCriteriaMet()`.
|
|
227
|
+
*
|
|
228
|
+
* @param {(task: object) => boolean} expectsFn
|
|
229
|
+
* @returns {{ reconciled: number, flippedBack: string[] }}
|
|
230
|
+
* Number of tasks inspected, plus the IDs of tasks flipped back to
|
|
231
|
+
* pending. Caller logs to events.jsonl.
|
|
232
|
+
*/
|
|
233
|
+
reconcileAgainstDisk(expectsFn) {
|
|
234
|
+
let reconciled = 0;
|
|
235
|
+
const flippedBack = [];
|
|
236
|
+
if (typeof expectsFn !== "function") return { reconciled, flippedBack };
|
|
237
|
+
for (const task of this._tasks) {
|
|
238
|
+
if (task.status !== "completed") continue;
|
|
239
|
+
if (!TaskManager.PER_RULE_PHASES.has(task.phase)) continue;
|
|
240
|
+
reconciled++;
|
|
241
|
+
let ok = false;
|
|
242
|
+
try { ok = !!expectsFn(task); }
|
|
243
|
+
catch { ok = false; }
|
|
244
|
+
if (!ok) {
|
|
245
|
+
task.status = "pending";
|
|
246
|
+
task.reconcile_failed = true;
|
|
247
|
+
task.summary = (task.summary ? task.summary + " | " : "") +
|
|
248
|
+
"v0.7.0 A5: artifacts missing on disk → flipped back to pending";
|
|
249
|
+
flippedBack.push(task.id);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (flippedBack.length > 0) this.save();
|
|
253
|
+
return { reconciled, flippedBack };
|
|
254
|
+
}
|
|
255
|
+
|
|
200
256
|
/**
|
|
201
257
|
* Format task list for injection into system prompt context.
|
|
202
258
|
* Compact checklist — not conversation history.
|
|
@@ -194,20 +194,32 @@ export class DocumentChunkTool extends BaseTool {
|
|
|
194
194
|
};
|
|
195
195
|
}
|
|
196
196
|
|
|
197
|
-
//
|
|
198
|
-
//
|
|
199
|
-
//
|
|
197
|
+
// v0.7.0 G (#91): route .docx / .doc / others through native parser
|
|
198
|
+
// dispatcher (mammoth / word-extractor / LibreOffice fallback).
|
|
199
|
+
// Replaces the prior "read as UTF-8" stub which produced binary
|
|
200
|
+
// garbage on .docx and forced agents to call document_parse + chunk
|
|
201
|
+
// separately. extractText() returns clean text or a structured
|
|
202
|
+
// failure that downstream can surface to the agent.
|
|
200
203
|
try {
|
|
201
|
-
const
|
|
204
|
+
const { extractText } = await import("../document-parser.js");
|
|
205
|
+
const result = await extractText(absPath);
|
|
206
|
+
if (result.ok && result.text) {
|
|
207
|
+
return {
|
|
208
|
+
source_file: baseName,
|
|
209
|
+
total_pages: 1,
|
|
210
|
+
blocks: [{ page: 1, markdown: result.text }],
|
|
211
|
+
parse_via: result.via,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
202
214
|
return {
|
|
203
|
-
source_file: baseName,
|
|
204
|
-
|
|
205
|
-
|
|
215
|
+
source_file: baseName, total_pages: 0, blocks: [],
|
|
216
|
+
parse_error: result.error ||
|
|
217
|
+
`Unsupported format '${suffix}'. Install mammoth / word-extractor or rely on LibreOffice fallback.`,
|
|
206
218
|
};
|
|
207
|
-
} catch {
|
|
219
|
+
} catch (e) {
|
|
208
220
|
return {
|
|
209
221
|
source_file: baseName, total_pages: 0, blocks: [],
|
|
210
|
-
parse_error: `
|
|
222
|
+
parse_error: `parse exception: ${e?.message || String(e)}`,
|
|
211
223
|
};
|
|
212
224
|
}
|
|
213
225
|
}
|