kc-beta 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +81 -0
  2. package/LICENSE-COMMERCIAL.md +125 -0
  3. package/README.md +21 -3
  4. package/package.json +14 -5
  5. package/src/agent/context-window.js +9 -12
  6. package/src/agent/context.js +14 -1
  7. package/src/agent/document-parser.js +169 -0
  8. package/src/agent/engine.js +367 -18
  9. package/src/agent/history/event-history.js +222 -0
  10. package/src/agent/llm-client.js +55 -0
  11. package/src/agent/message-utils.js +63 -0
  12. package/src/agent/pipelines/_milestone-derive.js +511 -0
  13. package/src/agent/pipelines/base.js +21 -0
  14. package/src/agent/pipelines/distillation.js +28 -15
  15. package/src/agent/pipelines/extraction.js +103 -36
  16. package/src/agent/pipelines/finalization.js +178 -11
  17. package/src/agent/pipelines/index.js +6 -1
  18. package/src/agent/pipelines/initializer.js +74 -8
  19. package/src/agent/pipelines/production-qc.js +31 -44
  20. package/src/agent/pipelines/skill-authoring.js +97 -80
  21. package/src/agent/pipelines/skill-testing.js +67 -23
  22. package/src/agent/retry.js +10 -2
  23. package/src/agent/scheduler.js +14 -2
  24. package/src/agent/session-state.js +18 -1
  25. package/src/agent/skill-loader.js +13 -7
  26. package/src/agent/skill-validator.js +19 -5
  27. package/src/agent/task-manager.js +61 -5
  28. package/src/agent/tools/document-chunk.js +21 -9
  29. package/src/agent/tools/phase-advance.js +18 -3
  30. package/src/agent/tools/release.js +51 -9
  31. package/src/agent/tools/rule-catalog.js +11 -1
  32. package/src/agent/tools/workspace-file.js +32 -0
  33. package/src/agent/workspace.js +39 -1
  34. package/src/cli/components.js +64 -14
  35. package/src/cli/index.js +62 -3
  36. package/src/cli/meme.js +26 -25
  37. package/src/config.js +65 -22
  38. package/src/model-tiers.json +24 -8
  39. package/src/providers.js +42 -0
  40. package/template/release/v1/README.md.tmpl +108 -0
  41. package/template/release/v1/catalog.json.tmpl +4 -0
  42. package/template/release/v1/kc_runtime/__init__.py +11 -0
  43. package/template/release/v1/kc_runtime/confidence.py +63 -0
  44. package/template/release/v1/kc_runtime/doc_parser.py +127 -0
  45. package/template/release/v1/manifest.json.tmpl +11 -0
  46. package/template/release/v1/render_dashboard.py +117 -0
  47. package/template/release/v1/run.py +212 -0
  48. package/template/release/v1/serve.sh +17 -0
  49. package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
  50. package/template/skills/en/skill-creator/SKILL.md +1 -1
  51. package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
  52. package/template/skills/zh/skill-creator/SKILL.md +1 -1
@@ -2,6 +2,7 @@ import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { Phase, PipelineEvent } from "./index.js";
4
4
  import { Pipeline } from "./base.js";
5
+ import { deriveRuleExtractionMilestones, deriveSkillAuthoringMilestones } from "./_milestone-derive.js";
5
6
 
6
7
  export class RuleExtractionPipeline extends Pipeline {
7
8
  constructor(workspace) {
@@ -20,62 +21,91 @@ export class RuleExtractionPipeline extends Pipeline {
20
21
  }
21
22
 
22
23
  _scanWorkspace() {
24
+ // v0.7.0 A1: route through filesystem-derived milestone helper.
25
+ // Existing instance state (rulesExtracted, rulesWithChunkRefs,
26
+ // coverageAudited) becomes a cache of disk facts rather than a
27
+ // running record of which tools fired. Tool-wrapper recorders can
28
+ // still bump these via engine._recordMilestone but disk wins on
29
+ // any rescan.
30
+ const m = deriveRuleExtractionMilestones(this._workspace);
31
+ this.rulesExtracted = [...m.rulesExtracted];
32
+ this.rulesWithChunkRefs = [...m.rulesWithChunkRefs];
33
+ this.coverageAudited = m.coverageAudited;
34
+
35
+ // regulationsScanned: presence of any non-JSON file in rules/. Kept
36
+ // local to this pipeline (not in the helper) because "did the agent
37
+ // copy regs into the workspace" is a cheap heuristic specific to
38
+ // this phase.
23
39
  const rulesDir = path.join(this._workspace.cwd, "rules");
24
40
  if (fs.existsSync(rulesDir)) {
25
- const regFiles = fs.readdirSync(rulesDir).filter((f) => !f.endsWith(".json") && fs.statSync(path.join(rulesDir, f)).isFile());
26
- this.regulationsScanned = regFiles.length > 0;
27
- }
28
- this._scanRules();
29
- this._scanTests();
30
- this.coverageAudited = fs.existsSync(path.join(this._workspace.cwd, "rules", "coverage_audit.md")) ||
31
- fs.existsSync(path.join(this._workspace.cwd, "rules", "coverage_audit.json"));
32
- }
33
-
34
- _scanRules() {
35
- this.rulesExtracted = [];
36
- this.rulesWithChunkRefs = [];
37
- const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
38
- if (fs.existsSync(catalogPath)) {
39
41
  try {
40
- const data = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
41
- if (Array.isArray(data)) {
42
- this.rulesExtracted = data.map((r, i) => r.id || `rule_${i}`);
43
- // A1: collect ids whose entry has non-empty source_chunk_ids
44
- for (const r of data) {
45
- const ids = r?.source_chunk_ids;
46
- if (Array.isArray(ids) && ids.length > 0 && r?.id) {
47
- this.rulesWithChunkRefs.push(r.id);
48
- }
49
- }
50
- }
42
+ const regFiles = fs.readdirSync(rulesDir).filter(
43
+ (f) => !f.endsWith(".json") && fs.statSync(path.join(rulesDir, f)).isFile(),
44
+ );
45
+ this.regulationsScanned = regFiles.length > 0;
51
46
  } catch { /* skip */ }
52
47
  }
53
- const skillsDir = path.join(this._workspace.cwd, "rule_skills");
54
- if (fs.existsSync(skillsDir)) {
55
- for (const e of fs.readdirSync(skillsDir, { withFileTypes: true })) {
56
- if (e.isDirectory() && !e.name.startsWith("__") && !this.rulesExtracted.includes(e.name)) {
57
- this.rulesExtracted.push(e.name);
58
- }
48
+
49
+ // Union with rule_skills/ dirs — sometimes agents create skill dirs
50
+ // before adding to catalog.json (XM E2E #5 stranded-catalog case).
51
+ // Pulled from the skill-authoring helper so we share the canonical
52
+ // skill dir scan.
53
+ const sa = deriveSkillAuthoringMilestones(this._workspace);
54
+ for (const dirName of sa.skillsAuthored) {
55
+ if (!this.rulesExtracted.includes(dirName)) {
56
+ this.rulesExtracted.push(dirName);
59
57
  }
60
58
  }
59
+
60
+ this._scanTests();
61
61
  }
62
62
 
63
63
  _scanTests() {
64
+ // v0.7.0 A1: rulesWithTests now accepts multiple test shapes (was
65
+ // form-prescriptive on test_cases/ only — none of E2E #5's three
66
+ // alive contestants used that exact path; the gate refused all).
67
+ // Now: a rule is "tested" iff it has ANY of:
68
+ // rule_skills/<id>/test_cases/ (canonical, original)
69
+ // rule_skills/<id>/tests/ (alt spelling)
70
+ // rule_skills/<id>/check*.py (check IS the test for many rules)
71
+ // rule_skills/<id>/scripts/check*.py (XM-style nested scripts)
72
+ // rule_skills/<id>/assets/test_cases.json
73
+ // Spirit of the gate is "did the agent leave test artifacts behind"
74
+ // not "did they use this exact directory name."
64
75
  this.rulesWithTests = [];
65
76
  const skillsDir = path.join(this._workspace.cwd, "rule_skills");
66
77
  if (!fs.existsSync(skillsDir)) return;
67
78
  for (const e of fs.readdirSync(skillsDir, { withFileTypes: true })) {
68
79
  if (!e.isDirectory()) continue;
69
- const testDir = path.join(skillsDir, e.name, "test_cases");
70
- if (fs.existsSync(testDir) && fs.readdirSync(testDir).length > 0) {
71
- this.rulesWithTests.push(e.name);
80
+ const skillPath = path.join(skillsDir, e.name);
81
+ const testDirA = path.join(skillPath, "test_cases");
82
+ const testDirB = path.join(skillPath, "tests");
83
+ const assetsTests = path.join(skillPath, "assets", "test_cases.json");
84
+
85
+ let hasTest = false;
86
+ if (fs.existsSync(testDirA) && fs.readdirSync(testDirA).length > 0) hasTest = true;
87
+ if (!hasTest && fs.existsSync(testDirB) && fs.readdirSync(testDirB).length > 0) hasTest = true;
88
+ if (!hasTest && fs.existsSync(assetsTests)) hasTest = true;
89
+ // Check files: any check*.py at root or under scripts/
90
+ if (!hasTest) {
91
+ try {
92
+ const files = fs.readdirSync(skillPath);
93
+ if (files.some((f) => /^check.*\.py$/i.test(f))) hasTest = true;
94
+ else if (files.includes("scripts")) {
95
+ const scriptsDir = path.join(skillPath, "scripts");
96
+ try {
97
+ if (fs.readdirSync(scriptsDir).some((f) => /^check.*\.py$/i.test(f))) hasTest = true;
98
+ } catch { /* skip */ }
99
+ }
100
+ } catch { /* skip */ }
72
101
  }
102
+ if (hasTest) this.rulesWithTests.push(e.name);
73
103
  }
74
104
  }
75
105
 
76
106
  describeState() {
77
107
  this._scanWorkspace();
78
- const parts = ["## Phase: EXTRACTION\nRead and decompose regulation documents into atomic, testable verification rules. This is BUILD mode — do the analysis directly."];
108
+ const parts = ["## Phase: RULE_EXTRACTION\nRead and decompose regulation documents into atomic, testable verification rules. This is BUILD mode — do the analysis directly. (Distinct from data/entity extraction work that skills perform internally.)"];
79
109
  parts.push(`### Progress\n- Regulations scanned: ${this.regulationsScanned ? "yes" : "no"}\n- Rules extracted: ${this.rulesExtracted.length}\n- Rules with test stubs: ${this.rulesWithTests.length}\n- Coverage audit: ${this.coverageAudited ? "done" : "pending"}`);
80
110
 
81
111
  if (this.exitCriteriaMet()) {
@@ -132,7 +162,13 @@ export class RuleExtractionPipeline extends Pipeline {
132
162
  }
133
163
 
134
164
  exitCriteriaMet() {
135
- return this.regulationsScanned && this.rulesExtracted.length > 0 &&
165
+ // v0.7.0 A1: dropped explicit `regulationsScanned` gate — rulesExtracted
166
+ // > 0 already implies the agent read regulations from somewhere
167
+ // (catalog.json wouldn't exist otherwise). The old criterion measured
168
+ // "did the agent copy regs into workspace/rules/" — ceremonial work
169
+ // none of E2E #5's three contestants did because they read directly
170
+ // from projectDir/rules/.
171
+ return this.rulesExtracted.length > 0 &&
136
172
  this.rulesWithTests.length >= Math.max(this.rulesExtracted.length * 0.8, 1) &&
137
173
  this.coverageAudited &&
138
174
  // v0.6.1 A1: hard tracking — D1 source-context auto-attach requires
@@ -141,6 +177,37 @@ export class RuleExtractionPipeline extends Pipeline {
141
177
  this._chunkRefsCriterionMet();
142
178
  }
143
179
 
180
+ /**
181
+ * v0.6.3 (#74): RULE_EXTRACTION should produce rules/catalog.json + per-rule
182
+ * markdown extraction notes, not python check scripts or workflows.
183
+ */
184
+ phaseMisfitHint(toolName, toolInput, result) {
185
+ if (result?.isError) return null;
186
+ const exitText = this.exitCriteriaMet()
187
+ ? "Extraction exit criteria are MET — call phase_advance(to=\"skill_authoring\") to switch phases before continuing."
188
+ : "Extraction exit criteria NOT yet met. Either finish extraction first, or use force:true on phase_advance.";
189
+
190
+ if (toolName === "workspace_file" && toolInput?.operation === "write") {
191
+ const p = toolInput.path || "";
192
+ // Writing the actual python check is unambiguous skill-authoring work.
193
+ if (/^rule_skills\/[^/]+\/check_r\d+\.py$/.test(p) || p.endsWith("/SKILL.md") && p.startsWith("rule_skills/")) {
194
+ return `Writing "${p}" is SKILL_AUTHORING-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
195
+ }
196
+ if (p.startsWith("workflows/")) {
197
+ return `Writing under workflows/ is DISTILLATION-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
198
+ }
199
+ if (p.startsWith("output/results/")) {
200
+ return `Writing under output/results/ is PRODUCTION_QC-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
201
+ }
202
+ }
203
+
204
+ if (toolName === "workflow_run") {
205
+ return `workflow_run is SKILL_TESTING/PRODUCTION_QC-phase work, but engine is in RULE_EXTRACTION. ${exitText}`;
206
+ }
207
+
208
+ return null;
209
+ }
210
+
144
211
  exportState() {
145
212
  return {
146
213
  regulationsScanned: this.regulationsScanned,
@@ -1,8 +1,15 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
+ import { fileURLToPath } from "node:url";
3
4
  import { PipelineEvent } from "./index.js";
4
5
  import { Pipeline } from "./base.js";
5
6
  import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
7
+ import { deriveFinalizationMilestones } from "./_milestone-derive.js";
8
+
9
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
+ // v0.7.0 N: ship template/release/v1/ from the npm package; copy into
11
+ // the workspace at finalization phase entry.
12
+ const RELEASE_TEMPLATE_DIR = path.resolve(__dirname, "../../../template/release/v1");
6
13
 
7
14
  /**
8
15
  * E1: FINALIZATION — the 7th phase. Runs after PRODUCTION_QC has shown
@@ -41,17 +48,21 @@ export class FinalizationPipeline extends Pipeline {
41
48
  }
42
49
 
43
50
  _scanWorkspace() {
44
- const cwd = this._workspace.cwd;
45
- this.readmeWritten = fs.existsSync(path.join(cwd, "rule_skills", "README.md"));
46
- this.coverageReportWritten = fs.existsSync(path.join(cwd, "rule_skills", "coverage_report.md"));
47
- this.finalDashboardWritten = fs.existsSync(path.join(cwd, "output", "final_dashboard.html"));
51
+ // v0.7.0 A1: route through filesystem-derived helper. The helper
52
+ // accepts multiple shipping locations (output/releases/v#/README.md,
53
+ // rule_skills/README.md, workspace-root README.md) and enforces a
54
+ // ≥500-byte threshold to defeat empty stub files. Dashboard check
55
+ // requires sha256-distinct HTMLs in dashboards/ (Group C dedup).
56
+ const m = deriveFinalizationMilestones(this._workspace);
57
+ this.readmeWritten = m.readmeWritten;
58
+ this.coverageReportWritten = m.coverageReportWritten;
59
+ this.finalDashboardWritten = m.finalDashboardWritten;
60
+ this._dashboardDuplicatesDetected = m.dashboardDuplicatesDetected;
61
+
48
62
  // Canonical layout: every rule_id in the catalog has a dedicated
49
- // directory OR a thin-link stub under rule_skills/<rule_id>/. When
50
- // skills are already per-rule (every rule has its own dir) this is
51
- // trivially true. When skills are grouped, the agent creates
52
- // per-rule stub dirs that reference the grouped file. We approximate
53
- // "canonical" by checking: does every catalog rule_id have a
54
- // matching directory under rule_skills/?
63
+ // directory OR a thin-link stub under rule_skills/<rule_id>/. Kept
64
+ // here (not in helper) because it requires reading catalog.json
65
+ // and matching against existing dirs pipeline-specific logic.
55
66
  this.canonicalLayoutDone = this._checkCanonicalLayout();
56
67
  }
57
68
 
@@ -165,7 +176,163 @@ export class FinalizationPipeline extends Pipeline {
165
176
  return this.readmeWritten &&
166
177
  this.coverageReportWritten &&
167
178
  this.finalDashboardWritten &&
168
- this.canonicalLayoutDone;
179
+ this.canonicalLayoutDone &&
180
+ // v0.7.0 N (#94): pre-flight — every required file run.py loads
181
+ // must exist. Without this, finalization can declare "done" with
182
+ // a release dir that bombs on first invocation (E2E #5 DS shipped
183
+ // run.py requiring manifest.json which didn't exist).
184
+ this._releaseBundlePreflightOk();
185
+ }
186
+
187
+ /**
188
+ * v0.7.0 N (#94): copy `template/release/v1/` into
189
+ * `output/releases/v1/` at phase entry so the agent has a runnable
190
+ * skeleton to fill in. Skips if the release dir already exists with
191
+ * non-template content (resume case — preserve agent edits).
192
+ *
193
+ * Called from engine._advancePhase after the phase transitions to
194
+ * finalization.
195
+ */
196
+ onPhaseEnter({ fromPhase, workspace } = {}) {
197
+ if (!fs.existsSync(RELEASE_TEMPLATE_DIR)) return; // template not bundled (dev edge case)
198
+ const releaseRoot = path.join((workspace || this._workspace).cwd, "output", "releases", "v1");
199
+ if (fs.existsSync(releaseRoot)) {
200
+ // Don't overwrite existing release dir (resume / repeat phase entry).
201
+ // Re-rerunning the populator on existing files is safe but the agent
202
+ // may have hand-edited; leave alone.
203
+ return;
204
+ }
205
+ try {
206
+ this._copyTemplateRecursive(RELEASE_TEMPLATE_DIR, releaseRoot);
207
+ // Populate .tmpl files from session-state where we can.
208
+ this._populateRelease(releaseRoot);
209
+ } catch (e) {
210
+ // Defensive: never let template setup break phase transition.
211
+ // The agent can re-run via /phase finalization or recover manually.
212
+ // eslint-disable-next-line no-console
213
+ console.warn(`[finalization] release template copy failed: ${e?.message || e}`);
214
+ }
215
+ }
216
+
217
+ _copyTemplateRecursive(srcDir, destDir) {
218
+ fs.mkdirSync(destDir, { recursive: true });
219
+ for (const entry of fs.readdirSync(srcDir, { withFileTypes: true })) {
220
+ const src = path.join(srcDir, entry.name);
221
+ const dst = path.join(destDir, entry.name);
222
+ if (entry.isDirectory()) {
223
+ this._copyTemplateRecursive(src, dst);
224
+ } else if (entry.isFile()) {
225
+ fs.copyFileSync(src, dst);
226
+ // Preserve executable bits on shipped scripts
227
+ if (/\.(py|sh)$/.test(entry.name)) {
228
+ try { fs.chmodSync(dst, 0o755); } catch { /* not critical */ }
229
+ }
230
+ }
231
+ }
232
+ }
233
+
234
+ _populateRelease(releaseRoot) {
235
+ // Best-effort populator — fills the .tmpl placeholders with what
236
+ // session-state currently knows. Agent can re-edit afterwards.
237
+ const cwd = this._workspace.cwd;
238
+ const sessionId = path.basename(cwd);
239
+ const generatedAt = new Date().toISOString();
240
+
241
+ // catalog.json: copy from rules/catalog.json if present
242
+ const catalogSrc = path.join(cwd, "rules", "catalog.json");
243
+ if (fs.existsSync(catalogSrc)) {
244
+ try {
245
+ fs.copyFileSync(catalogSrc, path.join(releaseRoot, "catalog.json"));
246
+ } catch { /* ignore */ }
247
+ }
248
+
249
+ // manifest.json: scan workflows/ for rule -> file mappings
250
+ const workflowsRoot = path.join(cwd, "workflows");
251
+ const workflows = {};
252
+ let ruleCount = 0;
253
+ let workflowCount = 0;
254
+ if (fs.existsSync(workflowsRoot)) {
255
+ for (const entry of fs.readdirSync(workflowsRoot, { withFileTypes: true })) {
256
+ if (entry.isDirectory()) {
257
+ const subFiles = fs.readdirSync(path.join(workflowsRoot, entry.name));
258
+ const py = subFiles.find((f) => /workflow.*\.py$/i.test(f) || /^check.*\.py$/i.test(f));
259
+ if (py) {
260
+ workflows[entry.name] = `workflows/${entry.name}/${py}`;
261
+ workflowCount++;
262
+ }
263
+ } else if (entry.isFile()) {
264
+ const m = entry.name.match(/^(.+)_workflow\.py$/i);
265
+ if (m) {
266
+ workflows[m[1]] = `workflows/${entry.name}`;
267
+ workflowCount++;
268
+ }
269
+ }
270
+ }
271
+ }
272
+ try {
273
+ const catalog = fs.existsSync(catalogSrc)
274
+ ? JSON.parse(fs.readFileSync(catalogSrc, "utf-8"))
275
+ : [];
276
+ ruleCount = Array.isArray(catalog) ? catalog.length : (catalog?.rules?.length || 0);
277
+ } catch { /* ignore */ }
278
+
279
+ const manifest = {
280
+ release_version: "v1",
281
+ kc_version: this._readKcVersion(),
282
+ generated_at: generatedAt,
283
+ session_id: sessionId,
284
+ rules_count: ruleCount,
285
+ workflows_count: workflowCount,
286
+ workflows,
287
+ calibration_source: "confidence_calibration.json",
288
+ documentation: "README.md",
289
+ };
290
+ fs.writeFileSync(
291
+ path.join(releaseRoot, "manifest.json"),
292
+ JSON.stringify(manifest, null, 2),
293
+ "utf-8",
294
+ );
295
+
296
+ // README.md: substitute placeholders in README.md.tmpl
297
+ const readmeTmplPath = path.join(releaseRoot, "README.md.tmpl");
298
+ if (fs.existsSync(readmeTmplPath)) {
299
+ let readme = fs.readFileSync(readmeTmplPath, "utf-8");
300
+ readme = readme
301
+ .replaceAll("{{kc_version}}", this._readKcVersion())
302
+ .replaceAll("{{session_id}}", sessionId)
303
+ .replaceAll("{{generated_at}}", generatedAt)
304
+ .replaceAll("{{rule_count}}", String(ruleCount))
305
+ .replaceAll("{{workflow_count}}", String(workflowCount))
306
+ .replaceAll("{{project_description}}", "(Agent: replace with project-specific description.)")
307
+ .replaceAll("{{known_limitations}}", "(Agent: replace with known limitations from this run.)");
308
+ fs.writeFileSync(path.join(releaseRoot, "README.md"), readme, "utf-8");
309
+ }
310
+ }
311
+
312
+ _readKcVersion() {
313
+ try {
314
+ const pkg = JSON.parse(fs.readFileSync(
315
+ path.resolve(__dirname, "../../../package.json"), "utf-8",
316
+ ));
317
+ return pkg.version || "unknown";
318
+ } catch { return "unknown"; }
319
+ }
320
+
321
+ /**
322
+ * v0.7.0 N (#94): pre-flight — confirm every file `run.py` loads via
323
+ * `_load_json(..., required=True)` exists in the bundle. Without this
324
+ * the agent can declare finalization done with a bundle that bombs
325
+ * at runtime.
326
+ */
327
+ _releaseBundlePreflightOk() {
328
+ const releaseRoot = path.join(this._workspace.cwd, "output", "releases", "v1");
329
+ if (!fs.existsSync(releaseRoot)) return false;
330
+ const required = ["run.py", "manifest.json", "README.md", "kc_runtime/doc_parser.py", "kc_runtime/confidence.py"];
331
+ for (const rel of required) {
332
+ const p = path.join(releaseRoot, rel);
333
+ if (!fs.existsSync(p)) return false;
334
+ }
335
+ return true;
169
336
  }
170
337
 
171
338
  exportState() {
@@ -10,7 +10,12 @@
10
10
  */
11
11
  export const Phase = Object.freeze({
12
12
  BOOTSTRAP: "bootstrap",
13
- EXTRACTION: "extraction",
13
+ // v0.6.3: phase value renamed from "extraction" → "rule_extraction" to
14
+ // disambiguate from the data/entity extraction that skills/workflows do
15
+ // internally. The JS const name (Phase.EXTRACTION) is unchanged so call
16
+ // sites don't shift; only the string value persisted to session-state.json
17
+ // and shown in /status changes.
18
+ EXTRACTION: "rule_extraction",
14
19
  SKILL_AUTHORING: "skill_authoring",
15
20
  SKILL_TESTING: "skill_testing",
16
21
  DISTILLATION: "distillation",
@@ -4,6 +4,7 @@ import os from "node:os";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { Phase, PipelineEvent } from "./index.js";
6
6
  import { Pipeline } from "./base.js";
7
+ import { deriveBootstrapMilestones } from "./_milestone-derive.js";
7
8
 
8
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
10
  const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
@@ -107,12 +108,14 @@ export class ProjectInitializer extends Pipeline {
107
108
  }
108
109
 
109
110
  _checkSamples() {
110
- // Check workspace samples/
111
- const dir = path.join(this._workspace.cwd, "samples");
112
- if (fs.existsSync(dir) && fs.readdirSync(dir, { withFileTypes: true }).some((e) => e.isFile())) {
113
- this.hasSamples = true; return;
114
- }
115
- // Check project dir samples/ (case-insensitive)
111
+ // v0.7.0 A1: route workspace check through filesystem-derived helper.
112
+ // Helper walks recursively (catches E2E #5 GLM's samples/samples/
113
+ // nested layout that the previous top-level-only check missed) and
114
+ // counts files at any depth. Project-dir fallback kept for the
115
+ // "user has samples but hasn't ingested them yet" path.
116
+ const m = deriveBootstrapMilestones(this._workspace);
117
+ if (m.hasSamples) { this.hasSamples = true; return; }
118
+
116
119
  if (this._workspace.projectDir) {
117
120
  for (const name of ["samples", "Samples", "SAMPLES", "sample", "Sample"]) {
118
121
  const pdir = path.join(this._workspace.projectDir, name);
@@ -199,7 +202,7 @@ export class ProjectInitializer extends Pipeline {
199
202
  }
200
203
 
201
204
  if (this.exitCriteriaMet()) {
202
- parts.push("### Exit\nBootstrap requirements met. Proceed to EXTRACTION.");
205
+ parts.push("### Exit\nBootstrap requirements met. Proceed to RULE_EXTRACTION.");
203
206
  }
204
207
  return parts.join("\n\n");
205
208
  }
@@ -228,7 +231,7 @@ export class ProjectInitializer extends Pipeline {
228
231
  }
229
232
 
230
233
  if (!wasReady && this.exitCriteriaMet()) {
231
- return new PipelineEvent({ type: "phase_ready", message: "Bootstrap complete. Ready for EXTRACTION.", nextPhase: Phase.EXTRACTION });
234
+ return new PipelineEvent({ type: "phase_ready", message: "Bootstrap complete. Ready for RULE_EXTRACTION.", nextPhase: Phase.EXTRACTION });
232
235
  }
233
236
  return null;
234
237
  }
@@ -237,6 +240,69 @@ export class ProjectInitializer extends Pipeline {
237
240
  return this.workspaceCreated && this.configReady && this.hasRegulations && this.hasSamples;
238
241
  }
239
242
 
243
+ /**
244
+ * v0.6.3 (#74): nudge the agent when it does work that belongs to a later
245
+ * phase. Bootstrap is setup — reading rules/samples, configuring keys,
246
+ * orienting. Writing skill code, running workflows, or spawning extraction
247
+ * subagents from BOOTSTRAP means the milestones get tagged "bootstrap"
248
+ * instead of the right phase, breaking later exit-criteria checks.
249
+ */
250
+ phaseMisfitHint(toolName, toolInput, result) {
251
+ if (result?.isError) return null;
252
+ const exitText = this.exitCriteriaMet()
253
+ ? "Bootstrap exit criteria are MET — call phase_advance(to=\"rule_extraction\") now to record this work under the right phase."
254
+ : "Bootstrap exit criteria NOT yet met (see describeState). Either complete bootstrap setup first, or use force:true on phase_advance if you've decided to skip ahead.";
255
+
256
+ if (toolName === "workspace_file" && toolInput?.operation === "write") {
257
+ const p = toolInput.path || "";
258
+ if (p.startsWith("rule_skills/")) {
259
+ return `Writing under rule_skills/ is SKILL_AUTHORING-phase work, but engine is in BOOTSTRAP. ${exitText}`;
260
+ }
261
+ if (p.startsWith("workflows/")) {
262
+ return `Writing under workflows/ is DISTILLATION-phase work, but engine is in BOOTSTRAP. ${exitText}`;
263
+ }
264
+ if (p.startsWith("output/results/")) {
265
+ return `Writing under output/results/ is PRODUCTION_QC-phase work, but engine is in BOOTSTRAP. ${exitText}`;
266
+ }
267
+ }
268
+
269
+ if (toolName === "workflow_run") {
270
+ return `workflow_run is SKILL_TESTING/PRODUCTION_QC-phase work, but engine is in BOOTSTRAP. Workflow results recorded now will be milestone-tagged "bootstrap" and won't count toward later exit criteria. ${exitText}`;
271
+ }
272
+
273
+ // v0.6.3.1 patch: rule_catalog is the most direct signature of
274
+ // RULE_EXTRACTION work. Creating/updating rules from BOOTSTRAP means the
275
+ // rule_extraction pipeline's milestone tracker stays at zero (its
276
+ // onToolResult only fires when engine.currentPhase matches), so the
277
+ // exit gate will refuse later. Caught Tencent hy3-preview after it
278
+ // created 22 rules silently in the wrong phase. Same risk for any
279
+ // model that skips sample-inventory and jumps to rule decomposition.
280
+ if (toolName === "rule_catalog" &&
281
+ ["create", "update", "delete"].includes(toolInput?.operation)) {
282
+ return `rule_catalog ${toolInput.operation} is RULE_EXTRACTION-phase work, but engine is in BOOTSTRAP. Rules created now WILL be persisted in rules/catalog.json (the tool writes regardless of phase), but the rule_extraction pipeline's milestone tracker won't pick them up until you're in that phase, and the v0.6.3 exit gate will refuse to advance from BOOTSTRAP unless its own criteria are met. ${exitText}`;
283
+ }
284
+
285
+ if (toolName === "agent_tool" && toolInput?.operation === "spawn") {
286
+ const taskId = (toolInput.task_id || "").toLowerCase();
287
+ // Heuristic: task_ids hinting at extraction/skill/workflow work are
288
+ // out-of-phase from bootstrap. Doc-parsing or setup-shaped task names
289
+ // are fine.
290
+ if (/extract|rule|skill|workflow|verify|qc|distill/.test(taskId)) {
291
+ return `Spawning subagent "${toolInput.task_id}" looks like ${this._guessSubagentPhase(taskId).toUpperCase()}-phase work, but engine is in BOOTSTRAP. Milestones the subagent emits will be tagged "bootstrap", causing the target phase's exit criteria to start at zero later. ${exitText}`;
292
+ }
293
+ }
294
+
295
+ return null;
296
+ }
297
+
298
+ _guessSubagentPhase(taskId) {
299
+ if (/extract|rule/.test(taskId)) return "rule_extraction";
300
+ if (/skill/.test(taskId)) return "skill_authoring";
301
+ if (/workflow|distill/.test(taskId)) return "distillation";
302
+ if (/verify|qc/.test(taskId)) return "production_qc";
303
+ return "later";
304
+ }
305
+
240
306
  exportState() {
241
307
  return {
242
308
  workspaceCreated: this.workspaceCreated,
@@ -2,6 +2,7 @@ import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { PipelineEvent } from "./index.js";
4
4
  import { Pipeline } from "./base.js";
5
+ import { deriveProductionQcMilestones } from "./_milestone-derive.js";
5
6
 
6
7
  const FREQUENCY_MAP = { high: 1.0, mid: 0.5, low: 0.2 };
7
8
 
@@ -36,27 +37,31 @@ export class ProductionQCPipeline extends Pipeline {
36
37
  }
37
38
 
38
39
  _scanQcResults() {
39
- // v0.6.1 A5/A6: don't reset documentsReviewed if engine emission has
40
- // bumped it since last scan workflow_run hooks call _recordMilestone
41
- // and the increment lives in this same field. Other counters (batches,
42
- // accuracy, issues) come solely from filesystem scan and reset cleanly.
40
+ // v0.7.0 A1: route through filesystem-derived helper. The helper
41
+ // recognizes both DS-style results (object with `results` keyed by
42
+ // rule_id, doc-paths in nested keys) AND GLM-style array-of-verdicts
43
+ // (one entry per doc with .verdict/.file/.path) neither matched
44
+ // the v0.6.1 A5 heuristic alone, so E2E #5 saw batchesProcessed=0
45
+ // even with 1,951 verdicts on disk.
43
46
  const engineDocsReviewed = this.documentsReviewed;
44
- this.batchesProcessed = 0;
47
+ const m = deriveProductionQcMilestones(this._workspace);
48
+ this.batchesProcessed = m.batchesProcessed;
49
+ this.documentsReviewed = m.documentsReviewed;
50
+
51
+ // Layered: still extract accuracyByRule / confidence / issues from
52
+ // canonical output/qc/*.json batches when present. The helper
53
+ // doesn't try to reconstruct accuracy semantics (too schema-specific),
54
+ // but if the agent followed canonical schema, we surface it.
45
55
  this.totalDocuments = 0;
46
- this.documentsReviewed = 0;
47
56
  this.accuracyByRule = {};
48
57
  this.confidenceDistribution = { low: 0, medium: 0, high: 0 };
49
58
  this.issuesFound = [];
50
-
51
- // Existing canonical path: output/qc/*.json (formal QC batch reports)
52
59
  const qcDir = path.join(this._workspace.cwd, "output", "qc");
53
60
  if (fs.existsSync(qcDir)) {
54
61
  for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json")).sort()) {
55
62
  try {
56
63
  const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
57
- this.batchesProcessed++;
58
64
  this.totalDocuments += typeof data.documents === "number" ? data.documents : (data.total || 0);
59
- this.documentsReviewed += data.reviewed || 0;
60
65
  if (data.accuracy_by_rule) Object.assign(this.accuracyByRule, data.accuracy_by_rule);
61
66
  if (data.confidence) {
62
67
  for (const band of ["low", "medium", "high"]) this.confidenceDistribution[band] += data.confidence[band] || 0;
@@ -66,44 +71,26 @@ export class ProductionQCPipeline extends Pipeline {
66
71
  }
67
72
  }
68
73
 
69
- // v0.6.1 A5: also pick up batch-style results in output/results/. E2E #4
70
- // showed agents writing batch QC outputs to output/results/qc_*.json
71
- // (e.g. unified_qc.py) instead of output/qc/, so the formal scanner
72
- // missed them. Heuristic match: filename starts with "qc_" or contains
73
- // "_batch_". Each match counts as one batch; total_checks → totalDocuments.
74
- const resultsDir = path.join(this._workspace.cwd, "output", "results");
75
- if (fs.existsSync(resultsDir)) {
76
- const seen = new Set();
77
- for (const f of fs.readdirSync(resultsDir).filter((f) => f.endsWith(".json"))) {
78
- const lower = f.toLowerCase();
79
- if (!(lower.startsWith("qc_") || lower.includes("_batch_"))) continue;
80
- // Dedupe near-duplicate filenames that differ only by timestamp
81
- // suffix (qc_full_batch_20260424_141642.json vs _141921.json
82
- // — both are real batches, keep both. But qc_pt_x.json and
83
- // qc_pt_x_<ts>.json are usually the same batch saved twice; key
84
- // on the prefix before any 8-digit date.)
85
- const key = f.replace(/_\d{8}_\d{6}/g, "").replace(/\.json$/, "");
86
- if (seen.has(key)) continue;
87
- seen.add(key);
88
- this.batchesProcessed++;
89
- try {
90
- const data = JSON.parse(fs.readFileSync(path.join(resultsDir, f), "utf-8"));
91
- // Best-effort metric extraction; tolerate missing keys
92
- this.totalDocuments += typeof data.sample_count === "number" ? data.sample_count
93
- : typeof data.documents === "number" ? data.documents
94
- : typeof data.total === "number" ? data.total : 0;
95
- } catch { /* skip */ }
96
- }
97
- }
98
-
99
- // Restore engine-emitted documentsReviewed if filesystem reported less
74
+ // Restore engine-emitted documentsReviewed if disk-derived is lower
75
+ // (engine increment may know about reviews not yet flushed to disk)
100
76
  if (engineDocsReviewed > this.documentsReviewed) this.documentsReviewed = engineDocsReviewed;
101
77
 
102
- // Determine monitoring phase
78
+ // Determine monitoring phase. v0.7.0 H5 fix: empty accuracyByRule
79
+ // no longer flips to "stable" via vacuous truth — require at least
80
+ // one rule with an accuracy reading before claiming stability.
103
81
  if (this.batchesProcessed < 3) this.monitoringPhase = "initial";
104
82
  else if (this.issuesFound.length > 0) this.monitoringPhase = "active";
105
- else if (Object.values(this.accuracyByRule).every((a) => a >= this._accuracyThreshold)) this.monitoringPhase = "stable";
106
- else this.monitoringPhase = "active";
83
+ else {
84
+ const accuracies = Object.values(this.accuracyByRule);
85
+ if (accuracies.length > 0 && accuracies.every((a) => a >= this._accuracyThreshold)) {
86
+ this.monitoringPhase = "stable";
87
+ } else {
88
+ // Helper-derived batches with no accuracy data: agent ran QC but
89
+ // didn't surface accuracy schema. Treat as `active` (work
90
+ // happened, but engine can't auto-bless stability).
91
+ this.monitoringPhase = "active";
92
+ }
93
+ }
107
94
  }
108
95
 
109
96
  describeState() {