kc-beta 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +81 -0
  2. package/LICENSE-COMMERCIAL.md +125 -0
  3. package/README.md +21 -3
  4. package/package.json +14 -5
  5. package/src/agent/context-window.js +9 -12
  6. package/src/agent/context.js +14 -1
  7. package/src/agent/document-parser.js +169 -0
  8. package/src/agent/engine.js +367 -18
  9. package/src/agent/history/event-history.js +222 -0
  10. package/src/agent/llm-client.js +55 -0
  11. package/src/agent/message-utils.js +63 -0
  12. package/src/agent/pipelines/_milestone-derive.js +511 -0
  13. package/src/agent/pipelines/base.js +21 -0
  14. package/src/agent/pipelines/distillation.js +28 -15
  15. package/src/agent/pipelines/extraction.js +103 -36
  16. package/src/agent/pipelines/finalization.js +178 -11
  17. package/src/agent/pipelines/index.js +6 -1
  18. package/src/agent/pipelines/initializer.js +74 -8
  19. package/src/agent/pipelines/production-qc.js +31 -44
  20. package/src/agent/pipelines/skill-authoring.js +97 -80
  21. package/src/agent/pipelines/skill-testing.js +67 -23
  22. package/src/agent/retry.js +10 -2
  23. package/src/agent/scheduler.js +14 -2
  24. package/src/agent/session-state.js +18 -1
  25. package/src/agent/skill-loader.js +13 -7
  26. package/src/agent/skill-validator.js +19 -5
  27. package/src/agent/task-manager.js +61 -5
  28. package/src/agent/tools/document-chunk.js +21 -9
  29. package/src/agent/tools/phase-advance.js +18 -3
  30. package/src/agent/tools/release.js +51 -9
  31. package/src/agent/tools/rule-catalog.js +11 -1
  32. package/src/agent/tools/workspace-file.js +32 -0
  33. package/src/agent/workspace.js +39 -1
  34. package/src/cli/components.js +64 -14
  35. package/src/cli/index.js +62 -3
  36. package/src/cli/meme.js +26 -25
  37. package/src/config.js +65 -22
  38. package/src/model-tiers.json +24 -8
  39. package/src/providers.js +42 -0
  40. package/template/release/v1/README.md.tmpl +108 -0
  41. package/template/release/v1/catalog.json.tmpl +4 -0
  42. package/template/release/v1/kc_runtime/__init__.py +11 -0
  43. package/template/release/v1/kc_runtime/confidence.py +63 -0
  44. package/template/release/v1/kc_runtime/doc_parser.py +127 -0
  45. package/template/release/v1/manifest.json.tmpl +11 -0
  46. package/template/release/v1/render_dashboard.py +117 -0
  47. package/template/release/v1/run.py +212 -0
  48. package/template/release/v1/serve.sh +17 -0
  49. package/template/skills/en/meta-meta/work-decomposition/SKILL.md +266 -0
  50. package/template/skills/en/skill-creator/SKILL.md +1 -1
  51. package/template/skills/zh/meta-meta/work-decomposition/SKILL.md +264 -0
  52. package/template/skills/zh/skill-creator/SKILL.md +1 -1
@@ -0,0 +1,511 @@
1
+ // v0.7.0 Group A1: filesystem-derived pipeline milestones.
2
+ //
3
+ // E2E #5 finding (DS + GLM audits): every phase gate got force-bypassed
4
+ // because the engine's pipelineMilestones were tracking *which tools the
5
+ // agent called*, not *what artifacts ended up on disk*. Both contestants
6
+ // produced real work (70 skill scripts, 28 workflows, 1951 verdicts) via
7
+ // Write/Bash/sandbox_exec, so the milestone-recording tool wrappers
8
+ // (workflow-run.js → engine._recordMilestone) never fired and the gate
9
+ // stayed empty.
10
+ //
11
+ // This module is the new canonical source. Each derive function reads
12
+ // the workspace filesystem and returns the milestone fields for that
13
+ // phase. Pipelines call these instead of (or in addition to) their
14
+ // previous tool-instrumented counters.
15
+ //
16
+ // Design: simple + correct over fast + complex. Each derive is bounded
17
+ // (~10-50 stat calls per phase, all on warm OS cache → microseconds).
18
+ // No cache layer in v0.7.0 — if profiling later shows it's hot, add it
19
+ // then. The functions are pure: same disk state in, same milestones out.
20
+ //
21
+ // Workspace param is a Workspace instance with a .cwd string. Functions
22
+ // also accept a plain workspaceCwd string for tests / one-off audits
23
+ // (e.g., re-deriving E2E #5 session-state from saved workspaces).
24
+
25
+ import fs from "node:fs";
26
+ import path from "node:path";
27
+ import crypto from "node:crypto";
28
+
29
+ function cwdOf(ws) {
30
+ return typeof ws === "string" ? ws : (ws?.cwd || ws?.path || "");
31
+ }
32
+
33
+ function dirExists(p) {
34
+ try { return fs.statSync(p).isDirectory(); } catch { return false; }
35
+ }
36
+
37
+ function fileExists(p) {
38
+ try { return fs.statSync(p).isFile(); } catch { return false; }
39
+ }
40
+
41
+ function readDirSafe(p) {
42
+ try { return fs.readdirSync(p, { withFileTypes: true }); } catch { return []; }
43
+ }
44
+
45
+ function listChildren(p) {
46
+ return readDirSafe(p).filter((e) => !e.name.startsWith("."));
47
+ }
48
+
49
+ function listChildDirs(p) {
50
+ return listChildren(p).filter((e) => e.isDirectory());
51
+ }
52
+
53
+ function listChildFiles(p) {
54
+ return listChildren(p).filter((e) => e.isFile());
55
+ }
56
+
57
+ // Walk a directory recursively, yielding every file path. Skips hidden
58
+ // dirs/files and __pycache__. Used by derive functions that need to
59
+ // match arbitrarily-nested artifacts (e.g., scripts/ subdirs).
60
+ function* walkFiles(root) {
61
+ if (!dirExists(root)) return;
62
+ const stack = [root];
63
+ while (stack.length) {
64
+ const dir = stack.pop();
65
+ for (const e of readDirSafe(dir)) {
66
+ if (e.name.startsWith(".") || e.name === "__pycache__") continue;
67
+ const p = path.join(dir, e.name);
68
+ if (e.isDirectory()) stack.push(p);
69
+ else if (e.isFile()) yield p;
70
+ }
71
+ }
72
+ }
73
+
74
+ function readJsonSafe(p) {
75
+ try { return JSON.parse(fs.readFileSync(p, "utf-8")); } catch { return null; }
76
+ }
77
+
78
+ function sha256OfFile(p) {
79
+ try {
80
+ const buf = fs.readFileSync(p);
81
+ return crypto.createHash("sha256").update(buf).digest("hex");
82
+ } catch { return null; }
83
+ }
84
+
85
+ // Normalize a rule id like "R14" / "r014" / "R0014" to canonical "R014".
86
+ // Returns null for non-matching strings (e.g., thematic skill names like
87
+ // "account_identity" — those stay as-is via the second branch).
88
+ function canonicalRuleId(s) {
89
+ if (typeof s !== "string") return null;
90
+ const m = s.match(/^R0*(\d+)$/i);
91
+ if (m) return `R${String(parseInt(m[1], 10)).padStart(3, "0")}`;
92
+ return null;
93
+ }
94
+
95
+ // ───────────────────────────────────────────────────────────────────
96
+ // bootstrap
97
+ // ───────────────────────────────────────────────────────────────────
98
+
99
+ export function deriveBootstrapMilestones(workspace) {
100
+ const cwd = cwdOf(workspace);
101
+ const samplesDir = path.join(cwd, "samples");
102
+ let hasSamples = false;
103
+ let sampleCount = 0;
104
+ if (dirExists(samplesDir)) {
105
+ // Count any non-hidden file at any depth — agents may organize
106
+ // samples in subdirs (E2E #5 GLM had samples/samples/ recursion).
107
+ for (const _f of walkFiles(samplesDir)) { sampleCount++; if (sampleCount > 0) hasSamples = true; }
108
+ }
109
+ return { hasSamples, sampleCount };
110
+ }
111
+
112
+ // ───────────────────────────────────────────────────────────────────
113
+ // rule_extraction
114
+ // ───────────────────────────────────────────────────────────────────
115
+
116
+ export function deriveRuleExtractionMilestones(workspace) {
117
+ const cwd = cwdOf(workspace);
118
+ const rulesDir = path.join(cwd, "rules");
119
+
120
+ // rulesExtracted: every rule object across every JSON file in rules/
121
+ // that has a non-empty `id` field. catalog.json is canonical but agents
122
+ // sometimes fan out to per-rule files (E2E #5 DS).
123
+ const rulesExtracted = [];
124
+ const rulesWithChunkRefs = [];
125
+ if (dirExists(rulesDir)) {
126
+ for (const e of listChildFiles(rulesDir)) {
127
+ if (!e.name.endsWith(".json")) continue;
128
+ const data = readJsonSafe(path.join(rulesDir, e.name));
129
+ if (!data) continue;
130
+ const items = Array.isArray(data) ? data : (data.rules || []);
131
+ for (const r of items) {
132
+ if (r && typeof r.id === "string" && r.id.length) {
133
+ rulesExtracted.push(r.id);
134
+ if (Array.isArray(r.source_chunk_ids) && r.source_chunk_ids.length > 0) {
135
+ rulesWithChunkRefs.push(r.id);
136
+ }
137
+ }
138
+ }
139
+ }
140
+ }
141
+
142
+ // coverageAudited: presence of rules/coverage_audit.{md,json} OR a
143
+ // rules/coverage_report.md / output/coverage_report.md. Loose criterion
144
+ // because agents pick different conventions; the spirit is "did the
145
+ // agent produce a coverage doc" not "did they put it in this exact file".
146
+ const coverageAudited =
147
+ fileExists(path.join(rulesDir, "coverage_audit.md")) ||
148
+ fileExists(path.join(rulesDir, "coverage_audit.json")) ||
149
+ fileExists(path.join(rulesDir, "coverage_report.md")) ||
150
+ fileExists(path.join(cwd, "output", "coverage_report.md"));
151
+
152
+ return {
153
+ rulesExtracted,
154
+ rulesWithChunkRefs,
155
+ coverageAudited,
156
+ };
157
+ }
158
+
159
+ // ───────────────────────────────────────────────────────────────────
160
+ // skill_authoring
161
+ // ───────────────────────────────────────────────────────────────────
162
+
163
+ // Recognized check-script paths inside a skill dir, per A6 spec:
164
+ // <skillDir>/check_r###.py (DS + most agents)
165
+ // <skillDir>/check.py (canonical meta-meta spec)
166
+ // <skillDir>/scripts/check_r###.py (XM)
167
+ // <skillDir>/scripts/check.py
168
+ function findCheckScripts(skillDir) {
169
+ const found = [];
170
+ for (const f of walkFiles(skillDir)) {
171
+ const base = path.basename(f);
172
+ const rel = path.relative(skillDir, f);
173
+ // Only count scripts at depth ≤ 2 (skillDir/check.py or skillDir/scripts/check.py)
174
+ const depth = rel.split(path.sep).length;
175
+ if (depth > 2) continue;
176
+ if (/^check(_r[\d_-]+)?\.py$/i.test(base) || /^check_r[\d_-]+\.py$/i.test(base)) {
177
+ found.push(f);
178
+ }
179
+ }
180
+ return found;
181
+ }
182
+
183
+ export function deriveSkillAuthoringMilestones(workspace) {
184
+ const cwd = cwdOf(workspace);
185
+ const skillsDir = path.join(cwd, "rule_skills");
186
+ const skillsAuthored = [];
187
+ const skillsWithScripts = [];
188
+ const ruleIdsCovered = new Set();
189
+
190
+ if (!dirExists(skillsDir)) {
191
+ return { skillsAuthored, skillsWithScripts, ruleIdsCovered: [] };
192
+ }
193
+
194
+ for (const e of listChildDirs(skillsDir)) {
195
+ if (e.name.startsWith("__")) continue;
196
+ const skillPath = path.join(skillsDir, e.name);
197
+
198
+ // SKILL.md OR skill.md (case-insensitive — macOS/Windows users
199
+ // produce both, see v0.7.0 F1 task).
200
+ const hasSkillMd = listChildFiles(skillPath).some(
201
+ (f) => f.name.toLowerCase() === "skill.md",
202
+ );
203
+ const checkScripts = findCheckScripts(skillPath);
204
+ const hasAnyPy = walkFiles(skillPath).next().done === false &&
205
+ checkScripts.length > 0;
206
+
207
+ if (hasSkillMd || hasAnyPy) skillsAuthored.push(e.name);
208
+ if (checkScripts.length > 0) skillsWithScripts.push(e.name);
209
+
210
+ // Collect ruleIds covered by directory name, single check_r###.py
211
+ // names, grouped check_r###_r###.py names, and range dirs R078_R128.
212
+ const dirCanon = canonicalRuleId(e.name);
213
+ if (dirCanon) ruleIdsCovered.add(dirCanon);
214
+ const rangeDir = e.name.match(/^R0*(\d+)[_-]R0*(\d+)$/i);
215
+ if (rangeDir) {
216
+ const lo = parseInt(rangeDir[1], 10);
217
+ const hi = parseInt(rangeDir[2], 10);
218
+ for (let n = lo; n <= hi; n++) {
219
+ ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
220
+ }
221
+ }
222
+ for (const scriptPath of checkScripts) {
223
+ const base = path.basename(scriptPath);
224
+ const single = base.match(/^check_r0*(\d+)\.py$/i);
225
+ if (single) {
226
+ ruleIdsCovered.add(`R${String(parseInt(single[1], 10)).padStart(3, "0")}`);
227
+ }
228
+ const grouped = base.match(/^check_r0*(\d+)[_-]+r0*(\d+)\.py$/i);
229
+ if (grouped) {
230
+ const lo = parseInt(grouped[1], 10);
231
+ const hi = parseInt(grouped[2], 10);
232
+ for (let n = lo; n <= hi; n++) {
233
+ ruleIdsCovered.add(`R${String(n).padStart(3, "0")}`);
234
+ }
235
+ }
236
+ }
237
+ }
238
+
239
+ return {
240
+ skillsAuthored,
241
+ skillsWithScripts,
242
+ ruleIdsCovered: [...ruleIdsCovered],
243
+ };
244
+ }
245
+
246
+ // ───────────────────────────────────────────────────────────────────
247
+ // skill_testing
248
+ // ───────────────────────────────────────────────────────────────────
249
+
250
+ export function deriveSkillTestingMilestones(workspace) {
251
+ const cwd = cwdOf(workspace);
252
+ const skillsDir = path.join(cwd, "rule_skills");
253
+ const skillsTested = [];
254
+
255
+ if (dirExists(skillsDir)) {
256
+ for (const e of listChildDirs(skillsDir)) {
257
+ if (e.name.startsWith("__")) continue;
258
+ const skillPath = path.join(skillsDir, e.name);
259
+ // Tested ⇔ has any of: tests/ dir, test_results.json, test_results/,
260
+ // assets/test_cases.json, OR a successful test artifact like
261
+ // *_test_output.json. Loose because agents use different conventions.
262
+ const hasTestArtifact =
263
+ dirExists(path.join(skillPath, "tests")) ||
264
+ fileExists(path.join(skillPath, "test_results.json")) ||
265
+ dirExists(path.join(skillPath, "test_results")) ||
266
+ fileExists(path.join(skillPath, "assets", "test_cases.json")) ||
267
+ listChildFiles(skillPath).some((f) =>
268
+ /^(test|.*_test)_(output|result|log)/i.test(f.name) && f.name.endsWith(".json"));
269
+ if (hasTestArtifact) skillsTested.push(e.name);
270
+ }
271
+ }
272
+
273
+ // skillsPassing — per-skill accuracy threshold. Without a uniform
274
+ // schema across agent outputs we report `tested` as the floor; the
275
+ // pipeline's existing _loadTestResults() can layer accuracy on top.
276
+ return { skillsTested };
277
+ }
278
+
279
+ // ───────────────────────────────────────────────────────────────────
280
+ // distillation
281
+ // ───────────────────────────────────────────────────────────────────
282
+
283
+ export function deriveDistillationMilestones(workspace) {
284
+ const cwd = cwdOf(workspace);
285
+ const wfRoot = path.join(cwd, "workflows");
286
+ const workflowsCreated = [];
287
+
288
+ if (dirExists(wfRoot)) {
289
+ // Two layouts seen in E2E #5:
290
+ // workflows/<id>/workflow_v#.py (canonical, what release.js expects)
291
+ // workflows/<id>_workflow.py (DS + GLM flat layout)
292
+ // workflows/<id>.json (DS regex_skill manifest)
293
+ // Accept all three; downstream release tool's auto-relocator (Group C)
294
+ // can normalize.
295
+ for (const e of listChildren(wfRoot)) {
296
+ if (e.isDirectory()) {
297
+ const sub = path.join(wfRoot, e.name);
298
+ const hasPy = listChildFiles(sub).some((f) =>
299
+ /workflow.*\.py$/i.test(f.name) || /^check.*\.py$/i.test(f.name));
300
+ if (hasPy) workflowsCreated.push(e.name);
301
+ continue;
302
+ }
303
+ if (e.isFile()) {
304
+ const m1 = e.name.match(/^(.+)_workflow\.py$/i);
305
+ if (m1) { workflowsCreated.push(m1[1]); continue; }
306
+ const m2 = e.name.match(/^(.+)\.json$/i);
307
+ if (m2) {
308
+ const data = readJsonSafe(path.join(wfRoot, e.name));
309
+ if (data && (data.rule_id || data.entry || data.type)) workflowsCreated.push(m2[1]);
310
+ continue;
311
+ }
312
+ }
313
+ }
314
+ }
315
+
316
+ // workflowsTested — look for per-workflow test artifacts. Same loose
317
+ // contract as skill_testing: any test_results.json / test_results/ /
318
+ // baseline_*.json present means the workflow has been exercised.
319
+ const workflowsTested = [];
320
+ if (dirExists(wfRoot)) {
321
+ for (const e of listChildDirs(wfRoot)) {
322
+ const sub = path.join(wfRoot, e.name);
323
+ if (
324
+ fileExists(path.join(sub, "test_results.json")) ||
325
+ dirExists(path.join(sub, "test_results")) ||
326
+ listChildFiles(sub).some((f) => /^(baseline|test|result)_.*\.json$/i.test(f.name))
327
+ ) {
328
+ workflowsTested.push(e.name);
329
+ }
330
+ }
331
+ }
332
+
333
+ return { workflowsCreated, workflowsTested };
334
+ }
335
+
336
+ // ───────────────────────────────────────────────────────────────────
337
+ // production_qc
338
+ // ───────────────────────────────────────────────────────────────────
339
+
340
+ export function deriveProductionQcMilestones(workspace) {
341
+ const cwd = cwdOf(workspace);
342
+ const outputDir = path.join(cwd, "output");
343
+ let batchesProcessed = 0;
344
+ const documentsReviewedSet = new Set();
345
+ const candidateDirs = [
346
+ path.join(outputDir, "results"),
347
+ path.join(outputDir, "qc"),
348
+ path.join(outputDir, "distillation"),
349
+ ];
350
+
351
+ for (const dir of candidateDirs) {
352
+ if (!dirExists(dir)) continue;
353
+ for (const e of listChildFiles(dir)) {
354
+ if (!e.name.endsWith(".json")) continue;
355
+ const data = readJsonSafe(path.join(dir, e.name));
356
+ if (data === null || data === undefined) continue;
357
+
358
+ // Heuristic, two shapes seen in E2E #5:
359
+ // (a) DS — object with results/verdicts/n_skills/batch_id keys
360
+ // (b) GLM — array of per-document verdict objects (each has
361
+ // .verdict + .file/.path)
362
+ let isBatch = false;
363
+ if (Array.isArray(data) && data.length > 0) {
364
+ const first = data[0];
365
+ if (first && typeof first === "object" && "verdict" in first) isBatch = true;
366
+ } else if (data && typeof data === "object") {
367
+ isBatch = !!(
368
+ data.batch_id ||
369
+ data.n_skills ||
370
+ data.results ||
371
+ data.verdicts ||
372
+ data.verdict_stats ||
373
+ data.accuracyByRule
374
+ );
375
+ }
376
+ if (!isBatch) continue;
377
+ batchesProcessed++;
378
+
379
+ // Documents reviewed: deduped doc paths from whatever shape we got.
380
+ if (Array.isArray(data)) {
381
+ for (const r of data) {
382
+ if (r && typeof r === "object") {
383
+ const key = r.path || r.file || r.doc || r.document;
384
+ if (key) documentsReviewedSet.add(String(key));
385
+ }
386
+ }
387
+ } else if (data.results && typeof data.results === "object") {
388
+ for (const r of Object.values(data.results)) {
389
+ if (r && typeof r === "object") {
390
+ for (const docKey of Object.keys(r)) documentsReviewedSet.add(docKey);
391
+ }
392
+ }
393
+ }
394
+ if (Array.isArray(data.documents)) {
395
+ for (const d of data.documents) {
396
+ documentsReviewedSet.add(typeof d === "string" ? d : (d?.path || JSON.stringify(d)));
397
+ }
398
+ }
399
+ }
400
+ }
401
+
402
+ return {
403
+ batchesProcessed,
404
+ documentsReviewed: documentsReviewedSet.size,
405
+ documentsReviewedKeys: [...documentsReviewedSet], // for describeState detail
406
+ };
407
+ }
408
+
409
+ // ───────────────────────────────────────────────────────────────────
410
+ // finalization
411
+ // ───────────────────────────────────────────────────────────────────
412
+
413
+ export function deriveFinalizationMilestones(workspace) {
414
+ const cwd = cwdOf(workspace);
415
+
416
+ // readmeWritten: at least one populated README.md under output/releases/*/
417
+ // (≥500 bytes — sub-template-stub size). Catches DS + GLM E2E #5
418
+ // failure where run.py was shipped without a real README.
419
+ let readmeWritten = false;
420
+ const releasesRoot = path.join(cwd, "output", "releases");
421
+ if (dirExists(releasesRoot)) {
422
+ outer: for (const e of listChildDirs(releasesRoot)) {
423
+ const readme = path.join(releasesRoot, e.name, "README.md");
424
+ try {
425
+ const stat = fs.statSync(readme);
426
+ if (stat.isFile() && stat.size >= 500) { readmeWritten = true; break outer; }
427
+ } catch { /* skip */ }
428
+ }
429
+ }
430
+ // Also accept (in priority order):
431
+ // - rule_skills/README.md (the v0.6.0 finalization pipeline target)
432
+ // - workspace-root README.md (GLM E2E #5 wrote here)
433
+ // Avoids false-negatives when the agent picks a different shipping
434
+ // location than the canonical release/v1/ directory.
435
+ if (!readmeWritten) {
436
+ for (const candidate of [
437
+ path.join(cwd, "rule_skills", "README.md"),
438
+ path.join(cwd, "README.md"),
439
+ ]) {
440
+ try {
441
+ const stat = fs.statSync(candidate);
442
+ if (stat.isFile() && stat.size >= 500) { readmeWritten = true; break; }
443
+ } catch { /* skip */ }
444
+ }
445
+ }
446
+
447
+ // coverageReportWritten: rules/coverage_report.md OR output/coverage_report.md.
448
+ const coverageReportWritten =
449
+ fileExists(path.join(cwd, "rules", "coverage_report.md")) ||
450
+ fileExists(path.join(cwd, "output", "coverage_report.md"));
451
+
452
+ // finalDashboardWritten: at least one dashboards/*.html that is NOT a
453
+ // duplicate of any other. DS + GLM both shipped byte-identical
454
+ // dashboards under different filenames; sha256-distinct guards against
455
+ // it. Single-file case is OK (one dashboard, no comparison needed).
456
+ // Multi-file case requires hashes.size >= 2 OR htmls.length === 1.
457
+ //
458
+ // Fallback path (v0.6.0 final_dashboard.html) only applies when
459
+ // dashboards/ doesn't exist at all — if dashboards/ exists with
460
+ // duplicates, the gate stays closed so Group C's dedup error fires.
461
+ let finalDashboardWritten = false;
462
+ const dashboardsDir = path.join(cwd, "output", "dashboards");
463
+ let dashboardDuplicatesDetected = false;
464
+ if (dirExists(dashboardsDir)) {
465
+ const htmls = listChildFiles(dashboardsDir).filter((e) => e.name.endsWith(".html"));
466
+ if (htmls.length > 0) {
467
+ const hashes = new Set();
468
+ for (const h of htmls) {
469
+ const sig = sha256OfFile(path.join(dashboardsDir, h.name));
470
+ if (sig) hashes.add(sig);
471
+ }
472
+ if (htmls.length === 1) finalDashboardWritten = hashes.size >= 1;
473
+ else if (hashes.size >= 2) finalDashboardWritten = true;
474
+ else dashboardDuplicatesDetected = true;
475
+ }
476
+ } else {
477
+ // No dashboards/ dir — accept v0.6.0 single-file convention
478
+ if (fileExists(path.join(cwd, "output", "final_dashboard.html"))) {
479
+ finalDashboardWritten = true;
480
+ }
481
+ }
482
+
483
+ return {
484
+ readmeWritten,
485
+ coverageReportWritten,
486
+ finalDashboardWritten,
487
+ dashboardDuplicatesDetected,
488
+ };
489
+ }
490
+
491
+ // ───────────────────────────────────────────────────────────────────
492
+ // Phase-keyed dispatcher (convenience for tests + offline audit).
493
+ // ───────────────────────────────────────────────────────────────────
494
+
495
+ export const DERIVE_BY_PHASE = {
496
+ bootstrap: deriveBootstrapMilestones,
497
+ rule_extraction: deriveRuleExtractionMilestones,
498
+ skill_authoring: deriveSkillAuthoringMilestones,
499
+ skill_testing: deriveSkillTestingMilestones,
500
+ distillation: deriveDistillationMilestones,
501
+ production_qc: deriveProductionQcMilestones,
502
+ finalization: deriveFinalizationMilestones,
503
+ };
504
+
505
+ export function deriveAllMilestones(workspace) {
506
+ const out = {};
507
+ for (const [phase, fn] of Object.entries(DERIVE_BY_PHASE)) {
508
+ out[phase] = fn(workspace);
509
+ }
510
+ return out;
511
+ }
@@ -17,4 +17,25 @@ export class Pipeline {
17
17
 
18
18
  /** Restore milestone state from persisted data. Override in subclasses. */
19
19
  importState(_data) { /* no-op by default */ }
20
+
21
+ /**
22
+ * v0.6.3: Phase-misfit nudge. Called after each tool execution. If the tool
23
+ * call looks like work that belongs to a different phase, return a short
24
+ * hint string. Engine appends it as a `<system-reminder>` tag on the tool
25
+ * result, so the agent sees the mismatch on its next turn and can self-
26
+ * check whether to call phase_advance.
27
+ *
28
+ * Default: no hint. Phase-specific pipelines override with patterns they
29
+ * recognize as out-of-phase (e.g., BOOTSTRAP shouldn't write to
30
+ * rule_skills/, RULE_EXTRACTION shouldn't run workflows on production samples).
31
+ *
32
+ * Keep hints terse — they consume context budget every misfit. State the
33
+ * mismatch + suggest the right phase + remind about phase_advance.
34
+ *
35
+ * @param {string} toolName
36
+ * @param {object} toolInput
37
+ * @param {object} result - ToolResult-like { content, isError }
38
+ * @returns {string|null}
39
+ */
40
+ phaseMisfitHint(_toolName, _toolInput, _result) { return null; }
20
41
  }
@@ -2,6 +2,7 @@ import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { Phase, PipelineEvent } from "./index.js";
4
4
  import { Pipeline } from "./base.js";
5
+ import { deriveDistillationMilestones } from "./_milestone-derive.js";
5
6
 
6
7
  export class DistillationEngine extends Pipeline {
7
8
  constructor(workspace) {
@@ -40,26 +41,35 @@ export class DistillationEngine extends Pipeline {
40
41
  }
41
42
 
42
43
  _scanWorkflows() {
43
- // v0.6.1 A6: preserve engine-emitted entries across filesystem rescans.
44
- // workflow_run hook bumps workflowsTested[ruleId] and adds to
45
- // workflowsPassing on success — without this preservation, those entries
46
- // get clobbered on the next describeState() / onToolResult() rescan.
44
+ // v0.7.0 A1: route through filesystem-derived helper. The helper
45
+ // recognizes all three workflow layouts seen in E2E #5:
46
+ // workflows/<id>/workflow_v#.py (canonical, release.js's expectation)
47
+ // workflows/<id>_workflow.py (DS + GLM flat layout)
48
+ // workflows/<id>.json (DS regex_skill manifest)
49
+ // Engine-emitted entries (v0.6.1 A6) are still preserved as a soft
50
+ // overlay — disk wins on counter membership, but accuracy /
51
+ // tier-assignment data set by tool wrappers is kept.
47
52
  const engineWfTested = { ...this.workflowsTested };
48
53
  const engineWfPassing = [...this.workflowsPassing];
49
54
 
55
+ const m = deriveDistillationMilestones(this._workspace);
56
+ // workflowsCreated becomes a {ruleId: 1} dict for backwards-compat
57
+ // with downstream code that uses Object.keys() / `id in workflows`.
50
58
  this.workflowsCreated = {};
59
+ for (const id of m.workflowsCreated) this.workflowsCreated[id] = 1;
60
+
51
61
  this.workflowsTested = {};
52
62
  this.workflowsPassing = [];
53
63
  this.tierAssignments = {};
64
+
65
+ // Layered: also read per-rule config.json for tier + accuracy
66
+ // metadata — this is auxiliary signal not represented on the
67
+ // filesystem at the workflow-existence level.
54
68
  const wfDir = path.join(this._workspace.cwd, "workflows");
55
- if (!fs.existsSync(wfDir)) return;
56
-
57
- for (const e of fs.readdirSync(wfDir, { withFileTypes: true })) {
58
- if (e.isDirectory()) {
59
- const ruleDir = path.join(wfDir, e.name);
60
- const pyFiles = fs.readdirSync(ruleDir).filter((f) => f.endsWith(".py"));
61
- if (pyFiles.length > 0) this.workflowsCreated[e.name] = pyFiles.length;
62
- const cfgPath = path.join(ruleDir, "config.json");
69
+ if (fs.existsSync(wfDir)) {
70
+ for (const e of fs.readdirSync(wfDir, { withFileTypes: true })) {
71
+ if (!e.isDirectory()) continue;
72
+ const cfgPath = path.join(wfDir, e.name, "config.json");
63
73
  if (fs.existsSync(cfgPath)) {
64
74
  try {
65
75
  const cfg = JSON.parse(fs.readFileSync(cfgPath, "utf-8"));
@@ -71,12 +81,15 @@ export class DistillationEngine extends Pipeline {
71
81
  }
72
82
  } catch { /* skip */ }
73
83
  }
74
- } else if (e.isFile() && e.name.endsWith(".py")) {
75
- this.workflowsCreated[path.parse(e.name).name] = 1;
76
84
  }
77
85
  }
86
+ // Helper-derived workflowsTested too (per-workflow test_results/ etc.)
87
+ for (const id of m.workflowsTested) {
88
+ if (!(id in this.workflowsTested)) this.workflowsTested[id] = 1.0;
89
+ if (!this.workflowsPassing.includes(id)) this.workflowsPassing.push(id);
90
+ }
78
91
 
79
- // Re-merge engine-emitted entries on top of filesystem-derived state
92
+ // Re-merge engine-emitted entries (v0.6.1 A6 carry-forward)
80
93
  for (const [k, v] of Object.entries(engineWfTested)) {
81
94
  if (!(k in this.workflowsTested)) this.workflowsTested[k] = v;
82
95
  }