kc-beta 0.7.3 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +57 -4
  2. package/bin/kc-beta.js +20 -6
  3. package/package.json +3 -2
  4. package/src/agent/engine.js +493 -132
  5. package/src/agent/pipelines/_advance-hints.js +92 -0
  6. package/src/agent/pipelines/_milestone-derive.js +387 -17
  7. package/src/agent/pipelines/initializer.js +4 -1
  8. package/src/agent/pipelines/skill-authoring.js +30 -1
  9. package/src/agent/skill-loader.js +433 -111
  10. package/src/agent/tools/agent-tool.js +2 -2
  11. package/src/agent/tools/consult-skill.js +127 -0
  12. package/src/agent/tools/copy-to-workspace.js +4 -3
  13. package/src/agent/tools/dashboard-render.js +48 -1
  14. package/src/agent/tools/document-parse.js +31 -2
  15. package/src/agent/tools/phase-advance.js +17 -13
  16. package/src/agent/tools/release.js +378 -8
  17. package/src/agent/tools/sandbox-exec.js +65 -8
  18. package/src/agent/tools/worker-llm-call.js +95 -15
  19. package/src/agent/tools/workspace-file.js +7 -7
  20. package/src/agent/workspace.js +25 -4
  21. package/src/cli/components.js +4 -1
  22. package/src/cli/index.js +97 -1
  23. package/src/config.js +20 -3
  24. package/src/marathon/driver.js +217 -0
  25. package/src/marathon/prompts.js +93 -0
  26. package/template/.env.template +16 -0
  27. package/template/AGENT.md +182 -7
  28. package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  29. package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
  30. package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
  31. package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  32. package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  33. package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  34. package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  35. package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  36. package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
  37. package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
  38. package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
  39. package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  40. package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  41. package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
  42. package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
  43. package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  44. package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
  45. package/template/skills/en/skill-creator/SKILL.md +2 -1
  46. package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +58 -4
  47. package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  48. package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
  49. package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  50. package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +51 -6
  51. package/template/skills/phase_skills.yaml +112 -0
  52. package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  53. package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
  54. package/template/skills/zh/compliance-judgment/SKILL.md +83 -0
  55. package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  56. package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  57. package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  58. package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  59. package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  60. package/template/skills/zh/document-chunking/SKILL.md +40 -0
  61. package/template/skills/zh/document-parsing/SKILL.md +102 -0
  62. package/template/skills/zh/entity-extraction/SKILL.md +121 -0
  63. package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  64. package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  65. package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
  66. package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
  67. package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  68. package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
  69. package/template/skills/zh/skill-creator/SKILL.md +205 -200
  70. package/template/skills/zh/skill-to-workflow/SKILL.md +243 -0
  71. package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  72. package/template/skills/zh/tree-processing/SKILL.md +126 -0
  73. package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  74. package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +49 -4
  75. package/template/workflows/common/llm_client.py +168 -0
  76. package/template/workflows/common/utils.py +132 -0
  77. package/template/CLAUDE.md +0 -150
  78. package/template/skills/en/meta/compliance-judgment/SKILL.md +0 -82
  79. package/template/skills/en/meta/document-chunking/SKILL.md +0 -32
  80. package/template/skills/en/meta/entity-extraction/SKILL.md +0 -120
  81. package/template/skills/zh/meta/document-parsing/SKILL.md +0 -101
  82. package/template/skills/zh/meta/tree-processing/SKILL.md +0 -121
  83. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
  84. /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  85. /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  86. /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  87. /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  88. /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  89. /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  90. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  91. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  92. /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  93. /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  94. /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  95. /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  96. /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
  97. /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  98. /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  99. /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  100. /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  101. /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  102. /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  103. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  104. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  105. /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  106. /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  107. /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  108. /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  109. /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
@@ -0,0 +1,92 @@
1
+ // v0.8 P0-E: prescriptive refusal hints for phase_advance gate failures.
2
+ //
3
+ // 资管 + 贷款 v0.7.5 audits both observed the force-bypass pattern:
4
+ // engine refuses phase_advance with `engineCounts: workflowsTested: 0/14`,
5
+ // agent does ~3 min of cleanup, then forces past anyway. Cleanup happens
6
+ // (signal IS being consumed) but force always wins because the descriptive
7
+ // "exit criteria not met" hint doesn't tell the agent WHAT to write.
8
+ //
9
+ // v0.8 P0-E replaces the descriptive hint with a prescriptive one. The
10
+ // hint text below derives from the same artifact paths + filename patterns
11
+ // that _milestone-derive.js walks, so the agent's instructions match what
12
+ // the engine will check next turn.
13
+ //
14
+ // Design contract (matches v0.8 design doc Q20 user lean):
15
+ // - Single shared helper here; engine.js + phase-advance.js both call it.
16
+ // - Each hint is one or two concrete sentences naming a path, a filename
17
+ // pattern, and a script to run (where applicable).
18
+ // - Hint output is plain text, suitable to drop into a tool result.
19
+ //
20
+ // To extend: edit the per-phase hint generators below. Keep the artifact
21
+ // paths in sync with the corresponding derive function in _milestone-derive.js.
22
+
23
+ /**
24
+ * Build a prescriptive refusal hint for a phase_advance gate failure.
25
+ *
26
+ * @param {string} fromPhase — the phase the agent is trying to leave
27
+ * @param {object} engineCounts — raw engine counts (or null)
28
+ * @param {string} [engineCountsLine] — formatted summary string from _buildEngineCountsBlock
29
+ * @returns {string} a multi-line hint suitable for the LLM tool result
30
+ */
31
+ export function getPrescriptiveHint(fromPhase, engineCounts, engineCountsLine = "") {
32
+ const header = engineCountsLine
33
+ ? `Engine telemetry: ${engineCountsLine}\n\n`
34
+ : "";
35
+
36
+ const hint = HINTS_BY_PHASE[fromPhase];
37
+ if (!hint) {
38
+ return header + "Check the system prompt's phase state block for missing milestones. The engine derives milestones from filesystem facts.";
39
+ }
40
+ return header + hint;
41
+ }
42
+
43
+ const HINTS_BY_PHASE = {
44
+ bootstrap:
45
+ "To advance to rule_extraction:\n" +
46
+ " • Verify <workspace>/source_docs/ contains the regulation file(s) you're extracting rules from.\n" +
47
+ " • Verify <workspace>/samples/ contains at least one sample document for testing.\n" +
48
+ " • Ensure AGENT.md exists at workspace root with project context filled in.\n" +
49
+ "Engine reads filesystem facts; no need to call any 'mark bootstrap complete' tool — just produce the artifacts.",
50
+
51
+ rule_extraction:
52
+ "To advance to skill_authoring:\n" +
53
+ " • For each rule in the source regulation, write an entry to rules/catalog.json with {id, source_ref, falsifiability_statement, applicable_sections}.\n" +
54
+ " • Use rule_catalog tool (operation: 'write') for catalog entries; engine derives `rulesExtracted` from this file.\n" +
55
+ " • For chunk traceability: each catalog entry should reference its source chunk via applicable_sections.\n" +
56
+ " • Write rule_skills/coverage_report.md or rules/coverage_report.md to mark coverageAudited=true (a per-rule × per-section table).",
57
+
58
+ skill_authoring:
59
+ "To advance to skill_testing:\n" +
60
+ " • For each rule_id in rules/catalog.json, create rule_skills/<rule_id>/SKILL.md (uppercase! engine path-match is case-sensitive on Linux).\n" +
61
+ " • Each SKILL.md needs frontmatter (id, name, description) + a body describing verification logic.\n" +
62
+ " • Pair each SKILL.md with rule_skills/<rule_id>/check.py — substantive logic, NOT a 'return NOT_APPLICABLE' stub. If logic lives in workflows/, check.py must import + call the workflow.\n" +
63
+ " • For grouped skills covering multiple rules, frontmatter MUST include `source_rules: [R001, R005, ...]` so engine credits each rule_id.\n" +
64
+ " • Engine counts `rulesCovered` from rule_skills/ walk; aim for catalog.json's full rule list.",
65
+
66
+ skill_testing:
67
+ "To advance to distillation:\n" +
68
+ " • For each rule_id, write test results to output/results/skill_test_round<N>.json or output/results/<rule_id>_<sample>.json.\n" +
69
+ " • Each test result needs `verdict` (PASS/FAIL/NOT_APPLICABLE) plus per-rule accuracy.\n" +
70
+ " • Engine counts `skillsTested` from these files. Aim for ≥1 result per rule, with ≥90% accuracy on labeled samples.\n" +
71
+ " • If a rule consistently fails, iterate the SKILL.md + check.py before advancing (this is the evolution-loop pattern).",
72
+
73
+ distillation:
74
+ "To advance to production_qc:\n" +
75
+ " • For each rule_id, write workflows/<rule_id>/workflow_v1.py (regex-only or hybrid regex+worker_llm).\n" +
76
+ " • Each workflow.py needs a `verify(document_text, config)` function returning {verdict, evidence, confidence, ...}.\n" +
77
+ " • Engine counts `workflowsCreated` from workflows/<rule_id>/workflow_v*.py walk.\n" +
78
+ " • Run scripts/v1_regression.py (or equivalent) to populate output/results/v1_regression.json — engine counts `workflowsTested` from this.\n" +
79
+ " • For grouped workflows (one workflow covering multiple rules), declare `source_rules: [...]` in workflow's docstring or sidecar config.",
80
+
81
+ production_qc:
82
+ "To advance to finalization:\n" +
83
+ " • Write output/results/production_qc_results.json (preferred shape: {results: {<rule_id>: {<doc_id>: {verdict, evidence, confidence}}}}).\n" +
84
+ " • OR write output/qc/review_<batch>.json with `documents_reviewed: N` for each batch — engine sums across files.\n" +
85
+ " • Engine counts `batchesProcessed` and `documentsReviewed`. Each batch should cover the full doc set OR a meaningful sample.\n" +
86
+ " • If accuracy is below threshold, run evolution-loop on the failing rules before advancing.",
87
+
88
+ finalization:
89
+ "(Finalization is the terminal phase — no forward advance.)",
90
+ };
91
+
92
+ export default getPrescriptiveHint;
@@ -80,6 +80,75 @@ function readJsonSafe(p) {
80
80
  try { return JSON.parse(fs.readFileSync(p, "utf-8")); } catch { return null; }
81
81
  }
82
82
 
83
+ // v0.8 P1-A: find the first existing file from a list of candidate relative
84
+ // paths. Returns the absolute path of the first match, or null. Used for
85
+ // "agent-might-have-written-it-anywhere" lookups where conventions vary.
86
+ //
87
+ // 资管 v0.7.5 wrote rule_skills/coverage_report.md; 贷款 v0.7.5 wrote
88
+ // output/coverage_report.md or similar. Each derive function previously
89
+ // hardcoded its own short list — extracting this helper keeps additions
90
+ // centralized.
91
+ function findFileAcrossKnownPaths(workspaceCwd, relPaths) {
92
+ for (const rel of relPaths) {
93
+ const abs = path.join(workspaceCwd, rel);
94
+ if (fileExists(abs)) return abs;
95
+ }
96
+ return null;
97
+ }
98
+
99
+ function readFileSafe(p) {
100
+ try { return fs.readFileSync(p, "utf-8"); } catch { return ""; }
101
+ }
102
+
103
+ /**
104
+ * v0.7.5 G-H1: extract `source_rules: [...]` from YAML frontmatter.
105
+ *
106
+ * Supports both inline and block list forms:
107
+ * source_rules: [R001, R005, R007]
108
+ * source_rules:
109
+ * - R001
110
+ * - R005
111
+ *
112
+ * Used by milestone derivation to credit grouped/thematic skill folders
113
+ * + master workflows where the agent declares which rules are covered.
114
+ * Returns an array of canonical rule IDs (e.g., ["R001", "R005"]).
115
+ */
116
+ function parseSourceRulesFromFrontmatter(content) {
117
+ if (!content || typeof content !== "string") return [];
118
+ const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
119
+ if (!fmMatch) return [];
120
+ const fm = fmMatch[1];
121
+
122
+ // Inline form: source_rules: [R001, R005, "R007"]
123
+ const inlineMatch = fm.match(/^source_rules\s*:\s*\[([^\]]*)\]\s*$/m);
124
+ if (inlineMatch) {
125
+ return inlineMatch[1]
126
+ .split(",")
127
+ .map(s => s.trim().replace(/^["']|["']$/g, ""))
128
+ .filter(Boolean)
129
+ .map(s => canonicalRuleId(s) || s)
130
+ .filter(rid => /^R\d+$/i.test(rid))
131
+ .map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
132
+ }
133
+
134
+ // Block form: source_rules:\n - R001\n - R005
135
+ const blockMatch = fm.match(/^source_rules\s*:\s*\n((?:[ \t]+-\s+\S+\s*\n?)+)/m);
136
+ if (blockMatch) {
137
+ return blockMatch[1]
138
+ .split("\n")
139
+ .map(line => {
140
+ const m = line.match(/^[ \t]+-\s+["']?([^"'\s]+)["']?\s*$/);
141
+ return m ? m[1] : null;
142
+ })
143
+ .filter(Boolean)
144
+ .map(s => canonicalRuleId(s) || s)
145
+ .filter(rid => /^R\d+$/i.test(rid))
146
+ .map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
147
+ }
148
+
149
+ return [];
150
+ }
151
+
83
152
  function sha256OfFile(p) {
84
153
  try {
85
154
  const buf = fs.readFileSync(p);
@@ -144,15 +213,18 @@ export function deriveRuleExtractionMilestones(workspace) {
144
213
  }
145
214
  }
146
215
 
147
- // coverageAudited: presence of rules/coverage_audit.{md,json} OR a
148
- // rules/coverage_report.md / output/coverage_report.md. Loose criterion
149
- // because agents pick different conventions; the spirit is "did the
216
+ // coverageAudited: presence of any coverage audit/report doc. Loose
217
+ // criterion agents pick different conventions; the spirit is "did the
150
218
  // agent produce a coverage doc" not "did they put it in this exact file".
151
- const coverageAudited =
152
- fileExists(path.join(rulesDir, "coverage_audit.md")) ||
153
- fileExists(path.join(rulesDir, "coverage_audit.json")) ||
154
- fileExists(path.join(rulesDir, "coverage_report.md")) ||
155
- fileExists(path.join(cwd, "output", "coverage_report.md"));
219
+ // v0.8 P1-A: use the same findFileAcrossKnownPaths helper as finalization.
220
+ const coverageAudited = !!findFileAcrossKnownPaths(cwd, [
221
+ path.join("rules", "coverage_audit.md"),
222
+ path.join("rules", "coverage_audit.json"),
223
+ path.join("rules", "coverage_report.md"),
224
+ path.join("output", "coverage_report.md"),
225
+ path.join("rule_skills", "coverage_report.md"), // v0.8 P1-A
226
+ path.join("output", "qc", "coverage_report.md"),
227
+ ]);
156
228
 
157
229
  return {
158
230
  rulesExtracted,
@@ -239,15 +311,110 @@ export function deriveSkillAuthoringMilestones(workspace) {
239
311
  }
240
312
  }
241
313
  }
314
+
315
+ // v0.7.5 G-H1: also credit rule_ids declared in SKILL.md frontmatter
316
+ // `source_rules:` field. Agents using grouped/thematic skill folders
317
+ // (e.g., S01_compliance/, custodian_checks/) declare which rules
318
+ // their grouped check covers via frontmatter; engine derivation
319
+ // credits each declared rule_id. Audit found 资管 v0.7.4 session
320
+ // forced through skill_authoring → skill_testing because its 10 S*
321
+ // grouped folders weren't credited (rulesCovered=0/94).
322
+ if (hasSkillMd) {
323
+ try {
324
+ const skillMdFile = listChildFiles(skillPath).find(
325
+ (f) => f.name.toLowerCase() === "skill.md",
326
+ );
327
+ if (skillMdFile) {
328
+ const content = readFileSafe(path.join(skillPath, skillMdFile.name));
329
+ const sourceRules = parseSourceRulesFromFrontmatter(content);
330
+ for (const rid of sourceRules) ruleIdsCovered.add(rid);
331
+ }
332
+ } catch { /* best-effort */ }
333
+ }
242
334
  }
243
335
 
336
+ // v0.8 P2-F (item 22): count stub-shaped check.py files. Pairs with
337
+ // v0.8 P2-A teaching about the inverse-stub anti-pattern. Surfaces
338
+ // a ratio that downstream code (skill-authoring exitCriteriaMet)
339
+ // can choose to enforce via env flag.
340
+ const checkPyAudit = _auditCheckPyShapes(skillsDir);
341
+
244
342
  return {
245
343
  skillsAuthored,
246
344
  skillsWithScripts,
247
345
  ruleIdsCovered: [...ruleIdsCovered],
346
+ checkPyTotal: checkPyAudit.total,
347
+ checkPyStubCount: checkPyAudit.stubFiles.length,
348
+ checkPyStubFiles: checkPyAudit.stubFiles,
349
+ checkPyStubRatio: checkPyAudit.total > 0
350
+ ? +(checkPyAudit.stubFiles.length / checkPyAudit.total).toFixed(3)
351
+ : 0,
248
352
  };
249
353
  }
250
354
 
355
+ // v0.8 P2-F: walk rule_skills/<id>/ for check_*.py and check each for
356
+ // stub-shape patterns. Returns {total, stubFiles}. Patterns recognized
357
+ // as stubs (per v0.7.x audit findings):
358
+ // - returns literal `"verdict": "NOT_APPLICABLE"` (资管 v0.7.5 variant)
359
+ // - returns literal `"pass": null` (v0.7.0 legacy)
360
+ // - returns literal `"method": "stub"`
361
+ // - AND none of: workflow import, >20 non-comment lines.
362
+ // Substantive signals override the stub-return signal (a check.py that
363
+ // imports + delegates to a workflow but happens to return NOT_APPLICABLE
364
+ // for some sub-path is not a stub).
365
+ function _auditCheckPyShapes(skillsDir) {
366
+ const stubFiles = [];
367
+ let total = 0;
368
+ if (!dirExists(skillsDir)) return { total, stubFiles };
369
+
370
+ for (const dirEntry of listChildDirs(skillsDir)) {
371
+ if (dirEntry.name.startsWith("__")) continue;
372
+ const skillPath = path.join(skillsDir, dirEntry.name);
373
+ const scripts = findCheckScripts(skillPath);
374
+ for (const scriptPath of scripts) {
375
+ total++;
376
+ if (_isCheckPyStubShaped(scriptPath)) {
377
+ stubFiles.push(path.relative(skillsDir, scriptPath));
378
+ }
379
+ }
380
+ }
381
+ return { total, stubFiles };
382
+ }
383
+
384
+ function _isCheckPyStubShaped(scriptPath) {
385
+ let content;
386
+ try { content = fs.readFileSync(scriptPath, "utf-8"); }
387
+ catch { return false; }
388
+
389
+ // Substantive signal 1: imports a workflow (direct delegation)
390
+ if (/from\s+workflows[.\w]+\s+import|^import\s+workflows\./m.test(content)) {
391
+ return false;
392
+ }
393
+
394
+ // Stub return patterns. A check.py is a stub if it ALWAYS returns one
395
+ // of these regardless of input. We detect "always returns" by checking
396
+ // that the file has no other verdict literal — no PASS, FAIL, WARNING
397
+ // returns elsewhere. A scaffold with 30+ lines but a single
398
+ // NOT_APPLICABLE return path (like 资管 v0.7.5's 14 check.py files) is
399
+ // still a stub by behavior — line count is unreliable.
400
+ const stubReturn1 = /return\s+\{[^}]*["']verdict["']\s*:\s*["']NOT_APPLICABLE["']/m.test(content);
401
+ const stubReturn2 = /return\s+\{[^}]*["']pass["']\s*:\s*None/m.test(content);
402
+ const stubReturn3 = /return\s+\{[^}]*["']method["']\s*:\s*["']stub["']/m.test(content);
403
+ const hasStubReturn = stubReturn1 || stubReturn2 || stubReturn3;
404
+
405
+ if (!hasStubReturn) return false;
406
+
407
+ // If we find ANY other verdict (PASS, FAIL, WARNING), the file is doing
408
+ // real branching even if one path returns NOT_APPLICABLE — not a stub.
409
+ const hasOtherVerdict =
410
+ /["']verdict["']\s*:\s*["']PASS["']/m.test(content) ||
411
+ /["']verdict["']\s*:\s*["']FAIL["']/m.test(content) ||
412
+ /["']verdict["']\s*:\s*["']WARNING["']/m.test(content) ||
413
+ /\bmake_result\b/.test(content); // common helper that produces non-stub returns
414
+
415
+ return !hasOtherVerdict;
416
+ }
417
+
251
418
  // ───────────────────────────────────────────────────────────────────
252
419
  // skill_testing
253
420
  // ───────────────────────────────────────────────────────────────────
@@ -362,6 +529,37 @@ export function deriveDistillationMilestones(workspace) {
362
529
  const cwd = cwdOf(workspace);
363
530
  const wfRoot = path.join(cwd, "workflows");
364
531
  const workflowsCreated = [];
532
+ // v0.7.5 G-H1: also track rule IDs covered by workflows. Grouped/master
533
+ // workflows (e.g., 贷款 v0.7.4's master + R001 template) cover multiple
534
+ // rules; declare them via SKILL.md frontmatter `source_rules: [...]`.
535
+ // Engine credits each declared rule_id so workflowsCovered milestone
536
+ // matches catalog reality.
537
+ const ruleIdsCovered = new Set();
538
+
539
+ const creditWorkflowSourceRules = (workflowDir) => {
540
+ // Check for a SKILL.md (or workflow.md) declaring source_rules
541
+ const candidates = listChildFiles(workflowDir).filter(
542
+ (f) => /^(skill|workflow)\.md$/i.test(f.name),
543
+ );
544
+ for (const c of candidates) {
545
+ const content = readFileSafe(path.join(workflowDir, c.name));
546
+ for (const rid of parseSourceRulesFromFrontmatter(content)) {
547
+ ruleIdsCovered.add(rid);
548
+ }
549
+ }
550
+ // Also: per-workflow config.json may declare rule coverage
551
+ const configPath = path.join(workflowDir, "config.json");
552
+ if (fileExists(configPath)) {
553
+ const data = readJsonSafe(configPath);
554
+ const rules = Array.isArray(data?.source_rules) ? data.source_rules :
555
+ Array.isArray(data?.rules) ? data.rules :
556
+ Array.isArray(data?.rule_ids) ? data.rule_ids : [];
557
+ for (const r of rules) {
558
+ const canon = canonicalRuleId(String(r));
559
+ if (canon) ruleIdsCovered.add(canon);
560
+ }
561
+ }
562
+ };
365
563
 
366
564
  if (dirExists(wfRoot)) {
367
565
  // Two layouts seen in E2E #5:
@@ -375,16 +573,39 @@ export function deriveDistillationMilestones(workspace) {
375
573
  const sub = path.join(wfRoot, e.name);
376
574
  const hasPy = listChildFiles(sub).some((f) =>
377
575
  /workflow.*\.py$/i.test(f.name) || /^check.*\.py$/i.test(f.name));
378
- if (hasPy) workflowsCreated.push(e.name);
576
+ if (hasPy) {
577
+ workflowsCreated.push(e.name);
578
+ // Dir name might itself be a rule_id
579
+ const canon = canonicalRuleId(e.name);
580
+ if (canon) ruleIdsCovered.add(canon);
581
+ // Plus any frontmatter / config-declared source_rules
582
+ creditWorkflowSourceRules(sub);
583
+ }
379
584
  continue;
380
585
  }
381
586
  if (e.isFile()) {
382
587
  const m1 = e.name.match(/^(.+)_workflow\.py$/i);
383
- if (m1) { workflowsCreated.push(m1[1]); continue; }
588
+ if (m1) {
589
+ workflowsCreated.push(m1[1]);
590
+ const canon = canonicalRuleId(m1[1]);
591
+ if (canon) ruleIdsCovered.add(canon);
592
+ continue;
593
+ }
384
594
  const m2 = e.name.match(/^(.+)\.json$/i);
385
595
  if (m2) {
386
596
  const data = readJsonSafe(path.join(wfRoot, e.name));
387
- if (data && (data.rule_id || data.entry || data.type)) workflowsCreated.push(m2[1]);
597
+ if (data && (data.rule_id || data.entry || data.type)) {
598
+ workflowsCreated.push(m2[1]);
599
+ const canon = canonicalRuleId(data.rule_id || m2[1]);
600
+ if (canon) ruleIdsCovered.add(canon);
601
+ // Manifest-declared source_rules
602
+ const rules = Array.isArray(data.source_rules) ? data.source_rules :
603
+ Array.isArray(data.rules) ? data.rules : [];
604
+ for (const r of rules) {
605
+ const c2 = canonicalRuleId(String(r));
606
+ if (c2) ruleIdsCovered.add(c2);
607
+ }
608
+ }
388
609
  continue;
389
610
  }
390
611
  }
@@ -408,7 +629,16 @@ export function deriveDistillationMilestones(workspace) {
408
629
  }
409
630
  }
410
631
 
411
- return { workflowsCreated, workflowsTested };
632
+ return {
633
+ workflowsCreated,
634
+ workflowsTested,
635
+ // v0.7.5 G-H1: rule_ids the engine credits as having workflow coverage
636
+ // (either via dir name being a canonical rule_id, or via SKILL.md /
637
+ // workflow.md / config.json frontmatter declaring source_rules: [...]).
638
+ // Pipelines that check workflow coverage against the catalog should
639
+ // prefer ruleIdsCovered over workflowsCreated for grouped patterns.
640
+ ruleIdsCovered: [...ruleIdsCovered],
641
+ };
412
642
  }
413
643
 
414
644
  // ───────────────────────────────────────────────────────────────────
@@ -477,10 +707,45 @@ export function deriveProductionQcMilestones(workspace) {
477
707
  }
478
708
  }
479
709
 
710
+ // v0.8 P1-A: per-doc QC review files at output/qc/reviews/doc_*.json
711
+ // (贷款 v0.7.5 shape). Each file is a single review object with
712
+ // {review_id, document, verdict}. Engine previously skipped these
713
+ // because they don't match the batch heuristic, causing
714
+ // `documents_reviewed: 0` despite 16 docs on disk.
715
+ const perDocReviewsDir = path.join(outputDir, "qc", "reviews");
716
+ if (dirExists(perDocReviewsDir)) {
717
+ for (const e of listChildFiles(perDocReviewsDir)) {
718
+ if (!e.name.endsWith(".json")) continue;
719
+ const data = readJsonSafe(path.join(perDocReviewsDir, e.name));
720
+ if (!data || typeof data !== "object" || !data.verdict) continue;
721
+ // Document identifier: prefer explicit fields, fall back to filename
722
+ const docKey = data.document || data.doc || data.file || data.path || e.name.replace(/\.json$/, "");
723
+ documentsReviewedSet.add(String(docKey));
724
+ }
725
+ }
726
+
727
+ // v0.8 P1-A: also read numeric `documents_reviewed: N` from any
728
+ // top-level batch file (贷款 review_001.json declares 16 directly).
729
+ // We use this only when the doc set is smaller than the claim — agents
730
+ // sometimes write summary batches without enumerating individual docs.
731
+ let declaredDocCount = 0;
732
+ for (const dir of candidateDirs) {
733
+ if (!dirExists(dir)) continue;
734
+ for (const e of listChildFiles(dir)) {
735
+ if (!e.name.endsWith(".json")) continue;
736
+ const data = readJsonSafe(path.join(dir, e.name));
737
+ if (!data || typeof data !== "object") continue;
738
+ const n = Number(data.documents_reviewed);
739
+ if (Number.isFinite(n) && n > declaredDocCount) declaredDocCount = n;
740
+ }
741
+ }
742
+ const documentsReviewed = Math.max(documentsReviewedSet.size, declaredDocCount);
743
+
480
744
  return {
481
745
  batchesProcessed,
482
- documentsReviewed: documentsReviewedSet.size,
746
+ documentsReviewed,
483
747
  documentsReviewedKeys: [...documentsReviewedSet], // for describeState detail
748
+ documentsReviewedDeclared: declaredDocCount > documentsReviewedSet.size ? declaredDocCount : 0,
484
749
  };
485
750
  }
486
751
 
@@ -522,10 +787,18 @@ export function deriveFinalizationMilestones(workspace) {
522
787
  }
523
788
  }
524
789
 
525
- // coverageReportWritten: rules/coverage_report.md OR output/coverage_report.md.
526
- const coverageReportWritten =
527
- fileExists(path.join(cwd, "rules", "coverage_report.md")) ||
528
- fileExists(path.join(cwd, "output", "coverage_report.md"));
790
+ // coverageReportWritten: accept multiple known agent-write locations.
791
+ // v0.8 P1-A: added rule_skills/coverage_report.md (资管 v0.7.5 wrote here)
792
+ // and coverage_audit.md variants (贷款 v0.7.5 wrote rules/coverage_audit.md).
793
+ // The "coverage doc" concept covers both report-style + audit-style files.
794
+ const coverageReportWritten = !!findFileAcrossKnownPaths(cwd, [
795
+ path.join("rules", "coverage_report.md"),
796
+ path.join("rules", "coverage_audit.md"), // 贷款 v0.7.5
797
+ path.join("rules", "coverage_audit.json"),
798
+ path.join("output", "coverage_report.md"),
799
+ path.join("rule_skills", "coverage_report.md"), // 资管 v0.7.5
800
+ path.join("output", "qc", "coverage_report.md"), // future-proofing
801
+ ]);
529
802
 
530
803
  // finalDashboardWritten: at least one dashboards/*.html that is NOT a
531
804
  // duplicate of any other. DS + GLM both shipped byte-identical
@@ -558,11 +831,108 @@ export function deriveFinalizationMilestones(workspace) {
558
831
  }
559
832
  }
560
833
 
834
+ // v0.8 P0-D: stale-release detection. SOFT gate — surfaces a warning,
835
+ // doesn't refuse phase advance. 资管 audit § 9.1 finding 11 found both
836
+ // release bundles snapped BEFORE the user's "更激进 worker LLM" prompt
837
+ // drove 14 hybrid workflow_v2.py builds, but neither was re-released.
838
+ // We detect by comparing the most-recent release manifest's created_at
839
+ // against the mtimes of workflows/ and rule_skills/.
840
+ const staleRelease = _detectStaleRelease(cwd);
841
+
561
842
  return {
562
843
  readmeWritten,
563
844
  coverageReportWritten,
564
845
  finalDashboardWritten,
565
846
  dashboardDuplicatesDetected,
847
+ releaseIsStale: staleRelease.isStale,
848
+ staleReleaseDetail: staleRelease.detail,
849
+ };
850
+ }
851
+
852
+ // v0.8 P0-D: detect whether workflows/ or rule_skills/ contain files
853
+ // modified after the most-recent release manifest was written. Returns
854
+ // {isStale: bool, detail: {releaseTs?, releasePath?, newerFiles?: [...]}}.
855
+ // SOFT semantics — the milestone is informational; phase advance still
856
+ // works. The agent + downstream tooling (e2e-audit) decides what to do.
857
+ function _detectStaleRelease(cwd) {
858
+ const releasesRoot = path.join(cwd, "output", "releases");
859
+ if (!dirExists(releasesRoot)) return { isStale: false, detail: null };
860
+
861
+ // Find most-recent release manifest (by created_at OR fs mtime as fallback).
862
+ let latestRelease = null; // {path, createdAt: Date}
863
+ for (const e of listChildDirs(releasesRoot)) {
864
+ const manifestPath = path.join(releasesRoot, e.name, "manifest.json");
865
+ try {
866
+ const stat = fs.statSync(manifestPath);
867
+ if (!stat.isFile()) continue;
868
+ let createdAt = stat.mtime;
869
+ try {
870
+ const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
871
+ if (m?.created_at) {
872
+ const parsed = new Date(m.created_at);
873
+ if (!Number.isNaN(parsed.getTime())) createdAt = parsed;
874
+ }
875
+ } catch { /* fall back to mtime */ }
876
+ if (!latestRelease || createdAt > latestRelease.createdAt) {
877
+ latestRelease = { path: manifestPath, createdAt, slug: e.name };
878
+ }
879
+ } catch { /* skip */ }
880
+ }
881
+
882
+ if (!latestRelease) return { isStale: false, detail: null };
883
+
884
+ // Walk workflows/ and rule_skills/ for files newer than latestRelease.createdAt.
885
+ // Cap to first 10 newer-than-release matches to bound report size.
886
+ const newerFiles = [];
887
+ const cutoff = latestRelease.createdAt.getTime();
888
+ const SCAN_DIRS = ["workflows", "rule_skills"];
889
+ for (const sub of SCAN_DIRS) {
890
+ const root = path.join(cwd, sub);
891
+ if (!dirExists(root)) continue;
892
+ const stack = [root];
893
+ while (stack.length && newerFiles.length < 10) {
894
+ const d = stack.pop();
895
+ let entries;
896
+ try { entries = fs.readdirSync(d, { withFileTypes: true }); } catch { continue; }
897
+ for (const ent of entries) {
898
+ if (ent.name.startsWith(".") || ent.name === "__pycache__" || ent.name === "node_modules") continue;
899
+ const p = path.join(d, ent.name);
900
+ if (ent.isDirectory()) { stack.push(p); continue; }
901
+ if (!ent.isFile()) continue;
902
+ // Care about workflow_v*.py + check.py + SKILL.md/skill.md only —
903
+ // not __pycache__, not test artifacts, not .json.
904
+ if (!/(workflow_v\d+\.py|check\.py|SKILL\.md|skill\.md)$/.test(ent.name)) continue;
905
+ try {
906
+ const st = fs.statSync(p);
907
+ if (st.mtimeMs > cutoff) {
908
+ newerFiles.push({
909
+ path: path.relative(cwd, p),
910
+ mtime: new Date(st.mtimeMs).toISOString(),
911
+ });
912
+ if (newerFiles.length >= 10) break;
913
+ }
914
+ } catch { /* skip */ }
915
+ }
916
+ }
917
+ }
918
+
919
+ if (newerFiles.length === 0) return { isStale: false, detail: null };
920
+
921
+ // SOFT: accept_stale_release marker bypasses the warning. Agents that
922
+ // intentionally accept the older release write this file.
923
+ const acceptPath = path.join(cwd, "output", "releases", latestRelease.slug, ".accept_stale_release");
924
+ if (fileExists(acceptPath)) return { isStale: false, detail: { acceptedAt: latestRelease.slug } };
925
+
926
+ return {
927
+ isStale: true,
928
+ detail: {
929
+ releasePath: path.relative(cwd, latestRelease.path),
930
+ releaseSlug: latestRelease.slug,
931
+ releaseCreatedAt: latestRelease.createdAt.toISOString(),
932
+ newerFiles,
933
+ totalNewerCount: newerFiles.length,
934
+ hint: "Workspace artifacts modified after release was built. Either re-run the release tool or write .accept_stale_release into the release dir to acknowledge.",
935
+ },
566
936
  };
567
937
  }
568
938
 
@@ -9,7 +9,10 @@ import { deriveBootstrapMilestones } from "./_milestone-derive.js";
9
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
10
  const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
11
11
 
12
- const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills"];
12
+ // v0.7.5: `skills` added to required dirs. Populated by SkillLoader
13
+ // .populateWorkspaceSkills() at bootstrap + on every phase transition
14
+ // with the phase's `available` skill set (per phase_skills.yaml).
15
+ const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills", "skills"];
13
16
 
14
17
  const DEFAULT_ENV = `# === KC Agent Project Configuration ===
15
18
 
@@ -59,6 +59,10 @@ export class SkillAuthoringPipeline extends Pipeline {
59
59
  this.skillsAuthored = [...m.skillsAuthored];
60
60
  this.skillsWithScripts = [...m.skillsWithScripts];
61
61
  this.ruleIdsCovered = new Set(m.ruleIdsCovered);
62
+ // v0.8 P2-F (item 22): stub-shape audit for check.py files.
63
+ this._checkPyStubRatio = m.checkPyStubRatio || 0;
64
+ this._checkPyStubFiles = m.checkPyStubFiles || [];
65
+ this._checkPyTotal = m.checkPyTotal || 0;
62
66
  }
63
67
 
64
68
  // v0.7.0 A1: ruleId extraction moved to _milestone-derive.js
@@ -228,7 +232,32 @@ export class SkillAuthoringPipeline extends Pipeline {
228
232
  this._validationFailures = v.failures;
229
233
  this._validationSkipped = v.skipped;
230
234
  if (!v.ok) return false;
231
- return this.skillsWithScripts.length >= Math.max(1, this.skillsAuthored.length * 0.5);
235
+ if (this.skillsWithScripts.length < Math.max(1, this.skillsAuthored.length * 0.5)) {
236
+ return false;
237
+ }
238
+
239
+ // v0.8 P2-F (item 22): optional enforcement of check.py substantiveness.
240
+ // SOFT-by-default — the stub ratio is always computed (visible in
241
+ // describeState / events) but only blocks phase advance if
242
+ // KC_ENFORCE_CHECK_PY_SUBSTANTIVE=1 is set. Default-off because
243
+ // the heuristic may over-fire on legitimate scaffolds; v0.8 ships
244
+ // the detection + reporting, v0.8.x revisits enforcement after audit
245
+ // data shows whether the signal is reliable.
246
+ const enforce = process.env.KC_ENFORCE_CHECK_PY_SUBSTANTIVE === "1";
247
+ if (enforce && this._checkPyTotal > 0 && this._checkPyStubRatio > 0.5) {
248
+ this._validationFailures = this._validationFailures || [];
249
+ this._validationFailures.push({
250
+ file: "<check_py_substantiveness>",
251
+ reason:
252
+ `${this._checkPyStubCount || this._checkPyStubFiles.length}/${this._checkPyTotal} check.py files are stub-shaped ` +
253
+ `(return NOT_APPLICABLE / pass:null with no workflow import + ≤20 lines). ` +
254
+ `Examples: ${this._checkPyStubFiles.slice(0, 3).join(", ")}${this._checkPyStubFiles.length > 3 ? "..." : ""}. ` +
255
+ `See skill-authoring SKILL.md anti-pattern section. ` +
256
+ `Set KC_ENFORCE_CHECK_PY_SUBSTANTIVE=0 to bypass this gate if intentional.`,
257
+ });
258
+ return false;
259
+ }
260
+ return true;
232
261
  }
233
262
 
234
263
  /**