kc-beta 0.7.2 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +21 -8
  2. package/bin/kc-beta.js +20 -6
  3. package/package.json +1 -1
  4. package/src/agent/engine.js +138 -55
  5. package/src/agent/pipelines/_milestone-derive.js +140 -4
  6. package/src/agent/pipelines/initializer.js +4 -1
  7. package/src/agent/skill-loader.js +433 -111
  8. package/src/agent/tools/consult-skill.js +112 -0
  9. package/src/agent/tools/copy-to-workspace.js +18 -12
  10. package/src/agent/tools/release.js +128 -1
  11. package/src/agent/tools/sandbox-exec.js +4 -1
  12. package/src/agent/tools/task-board.js +194 -0
  13. package/src/agent/tools/workspace-file.js +57 -43
  14. package/src/config.js +6 -4
  15. package/template/AGENT.md +182 -7
  16. package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  17. package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +1 -0
  18. package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
  19. package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  20. package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  21. package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  22. package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  23. package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  24. package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
  25. package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
  26. package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
  27. package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  28. package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  29. package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +1 -0
  30. package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +60 -0
  31. package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  32. package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +1 -0
  33. package/template/skills/en/skill-creator/SKILL.md +2 -1
  34. package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +5 -4
  35. package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  36. package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
  37. package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  38. package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +37 -2
  39. package/template/skills/phase_skills.yaml +107 -0
  40. package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  41. package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +1 -0
  42. package/template/skills/{en/meta → zh}/compliance-judgment/SKILL.md +1 -0
  43. package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  44. package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  45. package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  46. package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  47. package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  48. package/template/skills/{en/meta → zh}/document-chunking/SKILL.md +1 -0
  49. package/template/skills/zh/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
  50. package/template/skills/{en/meta → zh}/entity-extraction/SKILL.md +1 -0
  51. package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  52. package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  53. package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +1 -0
  54. package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +48 -0
  55. package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  56. package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +1 -0
  57. package/template/skills/zh/skill-creator/SKILL.md +2 -1
  58. package/template/skills/zh/skill-to-workflow/SKILL.md +190 -0
  59. package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  60. package/template/skills/zh/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
  61. package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  62. package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +37 -2
  63. package/template/CLAUDE.md +0 -137
  64. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
  65. /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  66. /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  67. /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  68. /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  69. /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  70. /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  71. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  72. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  73. /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  74. /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  75. /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  76. /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  77. /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
  78. /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  79. /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  80. /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  81. /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  82. /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  83. /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  84. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  85. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  86. /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  87. /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  88. /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  89. /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  90. /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
package/README.md CHANGED
@@ -216,28 +216,41 @@ Quality Thresholds, Language.
216
216
 
217
217
  ## Status
218
218
 
219
- **v0.6.0first architectural beta.** This release lands:
219
+ **v0.7.4phase-control fix + codex review re-attempt.** Architectural
220
+ payload from v0.6.0+ is still in place:
220
221
 
221
222
  - Parallel ralph-loop (up to 8 concurrent workers) with a heap-safety
222
223
  conformance gate
223
224
  - Native chunker + RAG (onion-peeler + CJK bigram keyword index +
224
225
  one-shot LLM bundle classifier, ported from the AMC verification app)
225
- - Source-context auto-attach on skill_authoring tasks (rule NL + evidence
226
- chunks + sibling rules injected into the prompt, no manual search needed)
226
+ - Agent-owned task board: the agent reads the rule list from
227
+ `describeState`, decides decomposition (per-rule / grouped / range),
228
+ and calls `TaskCreate` / `TaskUpdate` / `TaskComplete` to drive the
229
+ Ralph loop **within the current phase only** (v0.7.4). Source-context
230
+ auto-attach pulls rule NL + evidence chunks + sibling rules into each
231
+ task's prompt.
232
+ - Phase boundaries = user checkpoints: the Ralph loop exits at every
233
+ phase transition, returning control to the user. The engine doesn't
234
+ auto-advance; phase advance is explicit (agent's `phase_advance` tool
235
+ call or user re-prompt). Marathon-style end-to-end autonomy lives
236
+ outside the engine.
227
237
  - Workspace file locking for shared coordination files (`rules/catalog.json`,
228
- `rules/manifest.json`, `tasks.json`, etc.)
238
+ `rules/manifest.json`, `refs/manifest.json`, `tasks.json`,
239
+ `session-state.json`) — every writer goes through `withFileLock`.
229
240
  - `agent_tool` gets `wait` / `poll` / `list` / `kill` operations +
230
241
  `stale_subagents` phase-advance signal
231
- - New FINALIZATION phase packages the session into a shippable deliverable
242
+ - FINALIZATION phase packages the session into a shippable deliverable
232
243
  (canonical `rule_skills/` layout + README + coverage report + final
233
244
  dashboard)
245
+ - Filesystem-derived phase milestones (v0.7.0+): the engine reads disk
246
+ artifacts for advance criteria, never trusts tool-call assertions
234
247
  - Input stays active during streaming (type-ahead queue), arrow keys +
235
248
  history recall, CTX smoothing + peak, per-provider context-limit caps,
236
249
  `/tools`, `/parallelism`, and more
237
250
 
238
- See [DEV_LOG.md](./DEV_LOG.md) for the full v0.6.0 change breakdown and
239
- [docs/update_design_v5.md](./docs/update_design_v5.md) for the plan that
240
- drove it.
251
+ See [DEV_LOG.md](./DEV_LOG.md) for the per-release change breakdowns and
252
+ [docs/update_design_v7.md](./docs/update_design_v7.md) for the v0.7.x
253
+ plan and patch notes.
241
254
 
242
255
  Bug reports and PRs welcome at <https://github.com/kitchen-engineer42/kc-cli>.
243
256
 
package/bin/kc-beta.js CHANGED
@@ -34,17 +34,31 @@ if (parallelismOverride !== null) {
34
34
  // their own output.
35
35
  const __filename = fileURLToPath(import.meta.url);
36
36
  const __dirname = dirname(__filename);
37
- function printBanner() {
37
+ function readPkgVersion() {
38
38
  try {
39
39
  const pkg = JSON.parse(readFileSync(resolve(__dirname, "..", "package.json"), "utf-8"));
40
- const scriptPath = __filename;
41
- process.stderr.write(`⏵⏵ KC Agent CLI v${pkg.version} · ${scriptPath}\n`);
42
- } catch { /* package.json missing or unreadable — silent */ }
40
+ return pkg.version || "unknown";
41
+ } catch { return "unknown"; }
42
+ }
43
+ function printBanner() {
44
+ try {
45
+ const v = readPkgVersion();
46
+ process.stderr.write(`⏵⏵ KC Agent CLI v${v} · ${__filename}\n`);
47
+ } catch { /* silent */ }
43
48
  }
44
- const suppressBanner = args.includes("--version") || args.includes("-v") ||
45
- args.includes("--help") || args.includes("-h");
49
+ const isVersion = args.includes("--version") || args.includes("-v");
50
+ const isHelp = args.includes("--help") || args.includes("-h");
51
+ const suppressBanner = isVersion || isHelp;
46
52
  if (!suppressBanner) printBanner();
47
53
 
54
+ // v0.7.5 G-F1: `--version` prints version and exits. Previously the flag
55
+ // suppressed the banner but fell through to TUI launch (audit confirmed
56
+ // during v0.7.4 testing). Print + exit before the subcommand dispatch.
57
+ if (isVersion) {
58
+ process.stdout.write(`${readPkgVersion()}\n`);
59
+ process.exit(0);
60
+ }
61
+
48
62
  (async () => {
49
63
  if (subcommand === "onboard" || subcommand === "setup") {
50
64
  const { onboard } = await import("../src/cli/onboard.js");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kc-beta",
3
- "version": "0.7.2",
3
+ "version": "0.7.5",
4
4
  "description": "KC Agent — LLM document verification agent (pure Node.js CLI). Dual-licensed: PolyForm Noncommercial 1.0.0 for personal/noncommercial use; commercial license required for enterprise production. See LICENSE and LICENSE-COMMERCIAL.md.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -37,6 +37,8 @@ import { EvolutionCycleTool } from "./tools/evolution-cycle.js";
37
37
  import { TierDowngradeTool } from "./tools/tier-downgrade.js";
38
38
  import { AgentTool } from "./tools/agent-tool.js";
39
39
  import { WebSearchTool } from "./tools/web-search.js";
40
+ import { TaskCreateTool, TaskUpdateTool, TaskCompleteTool } from "./tools/task-board.js";
41
+ import { ConsultSkillTool } from "./tools/consult-skill.js";
40
42
  import { SkillLoader } from "./skill-loader.js";
41
43
  import { TaskManager } from "./task-manager.js";
42
44
  import { Scheduler } from "./scheduler.js";
@@ -233,6 +235,18 @@ export class AgentEngine {
233
235
  // Skill discovery (Claude Code pattern: index in context, full content on demand)
234
236
  this._skillLoader = new SkillLoader(config.language);
235
237
 
238
+ // v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
239
+ // available skill set. Symlink with copy fallback. Re-populated on
240
+ // every phase advance/retreat (see _advancePhase).
241
+ try {
242
+ const res = this._skillLoader.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
243
+ this.eventLog?.append?.("skills_populated", {
244
+ phase: res.phase,
245
+ populated: res.populated,
246
+ failures: res.failures,
247
+ });
248
+ } catch { /* best-effort; skills/ population is not a critical-path failure */ }
249
+
236
250
  // Register tools for initial phase
237
251
  this.toolRegistry = new ToolRegistry();
238
252
  this._registerToolsForPhase(this.currentPhase);
@@ -475,6 +489,27 @@ export class AgentEngine {
475
489
  () => this.currentPhase,
476
490
  ),
477
491
  new WebSearchTool(this.config.tavilyApiKey),
492
+ // v0.7.4 (re-applied from v0.7.3 G2b): TaskCreate /
493
+ // TaskUpdate / TaskComplete — agent populates the
494
+ // Ralph-loop queue for the CURRENT phase only. Phase
495
+ // boundaries exit the loop (v0.7.4 G0c). Skipped for
496
+ // subagents (taskManager null in subagent scope).
497
+ ...(this.taskManager ? [
498
+ new TaskCreateTool(this.workspace, this.taskManager),
499
+ new TaskUpdateTool(this.workspace, this.taskManager),
500
+ new TaskCompleteTool(this.workspace, this.taskManager),
501
+ ] : []),
502
+ // v0.7.5: consult_skill loads a meta-skill body into conversation
503
+ // history on demand. Always-loaded skills are already in the
504
+ // system prompt via SkillLoader.formatForContext; this tool covers
505
+ // the "available" set for the current phase. Both main + subagents
506
+ // register their own — each has its own skillLoader + phase.
507
+ new ConsultSkillTool(
508
+ this.workspace,
509
+ this._skillLoader,
510
+ () => this.currentPhase,
511
+ this.eventLog,
512
+ ),
478
513
  ],
479
514
  // Distillation+ only (DISTILL mode)
480
515
  distill: [
@@ -1185,11 +1220,11 @@ export class AgentEngine {
1185
1220
  }
1186
1221
  this._totalTurns = (this._totalTurns || 0) + 1;
1187
1222
 
1188
- // Bug 4 trigger (1): re-check phase criteria at end of every turn —
1189
- // KC may have advanced state via conversation alone, without any
1190
- // tool that the pipeline narrowly watches.
1191
- const advancedEv = this._maybeAutoAdvance();
1192
- if (advancedEv) yield advancedEv;
1223
+ // v0.7.4 G0b: removed `_maybeAutoAdvance()` auto-fire here.
1224
+ // Phase advance is now 100% explicit (agent's `phase_advance`
1225
+ // tool, or user re-prompt). v0.7.3 phase-control regression
1226
+ // was caused by this edge-triggered auto-advance firing mid-
1227
+ // session and chaining into next phase without user check-in.
1193
1228
 
1194
1229
  this.eventLog.append("turn_complete", {});
1195
1230
  this.saveState();
@@ -1278,23 +1313,45 @@ export class AgentEngine {
1278
1313
 
1279
1314
  this.eventLog.append("tool_result", {
1280
1315
  name: tc.name,
1316
+ input: inputData,
1281
1317
  output: result.content || "",
1282
1318
  isError: result.isError,
1283
1319
  traceId: offload?.traceId || null,
1284
1320
  });
1285
1321
 
1286
- // D3a: trace skill invocations. When the agent reads a SKILL.md via
1287
- // workspace_file (the canonical way KC "uses" a skill, since skills
1288
- // are progressively-disclosed markdown), emit a skill_invoked event.
1289
- // Makes "which skills did KC actually consult?" answerable in post-run
1290
- // analysis — before this, skills were opaque to the event log.
1322
+ // v0.7.5 (G-F4): added `input` above so events.jsonl carries the
1323
+ // tool inputs (v0.7.4 G1c only patched the AgentEvent yield path,
1324
+ // missed the persistence path audit confirmed 0/453 + 0/946
1325
+ // tool_result events had `input` in v0.7.4 sessions).
1326
+
1327
+ // D3a: trace skill invocations. v0.7.5 (G-C6): only fire on
1328
+ // READS of meta-skill paths. Writes to rule_skills/<id>/SKILL.md
1329
+ // during skill_authoring are NOT skill invocations — they're the
1330
+ // agent producing its own deliverable. The old "(unknown)" spam
1331
+ // (100% of events in v0.7.1 + v0.7.4 sessions) is gone.
1332
+ //
1333
+ // Note: meta-skill body reads now happen via consult_skill, which
1334
+ // emits skill_invoked itself (with the real skill name). This
1335
+ // path-matching emission stays only as a fallback for any agent
1336
+ // that reads a SKILL.md path directly (out of pattern).
1291
1337
  try {
1338
+ const isRead =
1339
+ (tc.name === "workspace_file" && inputData?.operation === "read") ||
1340
+ (tc.name === "sandbox_exec" && /\b(cat|head|tail|less|grep|view|read)\b/.test(
1341
+ String(inputData?.command || "")
1342
+ ));
1292
1343
  if (
1293
1344
  !result.isError &&
1345
+ isRead &&
1294
1346
  (tc.name === "workspace_file" || tc.name === "sandbox_exec")
1295
1347
  ) {
1296
1348
  const p = String(inputData?.path || inputData?.command || "");
1297
- const skillMatch = p.match(/(?:template\/)?skills\/[a-z-]+\/(?:meta-meta|meta|skill-creator)\/([a-zA-Z0-9_-]+)(?:\/SKILL\.md|\/)?|\bSKILL\.md\b/);
1349
+ // v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
1350
+ // OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
1351
+ // Deep layout backward-compat preserved for any stragglers.
1352
+ const skillMatch = p.match(
1353
+ /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/SKILL\.md\b/
1354
+ ) || p.match(/\bSKILL\.md\b/);
1298
1355
  if (skillMatch) {
1299
1356
  const skillName = skillMatch[1] || "(unknown)";
1300
1357
  this.eventLog.append("skill_invoked", {
@@ -1308,6 +1365,7 @@ export class AgentEngine {
1308
1365
  yield new AgentEvent({
1309
1366
  type: "tool_result",
1310
1367
  name: tc.name,
1368
+ input: inputData,
1311
1369
  output: historyContent,
1312
1370
  isError: result.isError,
1313
1371
  });
@@ -1374,12 +1432,9 @@ export class AgentEngine {
1374
1432
  }
1375
1433
  }
1376
1434
 
1377
- // Bug 4 fix: re-check exit criteria after every tool-result loop, not
1378
- // just from pipeline.onToolResult. The pipeline's describeState() (called
1379
- // on every turn) already re-scans, so exitCriteriaMet() is accurate; we
1380
- // just need to act on it eagerly.
1381
- const ev = this._maybeAutoAdvance();
1382
- if (ev) yield ev;
1435
+ // v0.7.4 G0b: removed post-tool `_maybeAutoAdvance()` call.
1436
+ // Phase advance is now 100% explicit. See `_runTaskLoopSerial`
1437
+ // phase-change-exit guard for the loop-level checkpoint.
1383
1438
 
1384
1439
  } catch (err) {
1385
1440
  // A8: If the LLM client tagged the stream termination reason, pass
@@ -1529,6 +1584,20 @@ export class AgentEngine {
1529
1584
  this.workspace.setPhase(this.currentPhase);
1530
1585
  this._createTasksForPhase(this.currentPhase);
1531
1586
 
1587
+ // v0.7.5 G-D2: re-populate <workspace>/skills/ with the new phase's
1588
+ // available set. Symlinks are wiped + recreated. Agent's `ls skills/`
1589
+ // and any read-by-path reflects the current phase's allowlist.
1590
+ try {
1591
+ const res = this._skillLoader?.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
1592
+ if (res) {
1593
+ this.eventLog.append("skills_populated", {
1594
+ phase: res.phase,
1595
+ populated: res.populated,
1596
+ failures: res.failures,
1597
+ });
1598
+ }
1599
+ } catch { /* best-effort */ }
1600
+
1532
1601
  // v0.7.0 N (#94): give the entered pipeline a chance to do
1533
1602
  // phase-entry setup. Used by finalization to copy the release
1534
1603
  // template into output/releases/v1/. Other pipelines are no-ops.
@@ -2097,7 +2166,26 @@ export class AgentEngine {
2097
2166
  // Run the initial turn (user's request)
2098
2167
  yield* this.runTurn(userMessage);
2099
2168
 
2100
- // Auto-continue through pending tasks
2169
+ // v0.7.5 G-F5 TEMPORARILY DISABLED 2026-05-13 for overnight
2170
+ // marathon test. The strict capture-BEFORE form lets every user
2171
+ // prompt advance only one phase, which blocks unattended overnight
2172
+ // sessions. v0.7.4-style capture-AFTER (below) allows the agent
2173
+ // to chain multiple phase_advance calls within the initial runTurn,
2174
+ // then exits the while loop on subsequent phase changes.
2175
+ //
2176
+ // TODO: after the overnight E2E results come in (2026-05-14), decide:
2177
+ // (a) re-enable F5 strict and build marathon as a separate mode
2178
+ // (external driver pattern, e.g., /loop-kc command) — locked
2179
+ // earlier decision per harness-research § 7
2180
+ // (b) keep capture-AFTER permanently and accept multi-phase prompts
2181
+ //
2182
+ // To re-enable F5: move `const startingPhase = this.currentPhase;`
2183
+ // to BEFORE the `yield* this.runTurn(userMessage);` above, and add
2184
+ // the matching `if (this.currentPhase !== startingPhase) { return; }`
2185
+ // block between runTurn and the while loop.
2186
+ const startingPhase = this.currentPhase;
2187
+
2188
+ // Auto-continue through pending tasks (within current phase only)
2101
2189
  while (this.taskManager.getNextPending()) {
2102
2190
  // v0.7.0 #93: budget-aware compact threshold. The old
2103
2191
  // `messages.length > 15` was message-count-based and frozen
@@ -2158,26 +2246,20 @@ export class AgentEngine {
2158
2246
  },
2159
2247
  });
2160
2248
 
2161
- // Bug 4 trigger (2): auto-advance when all phase tasks are done AND
2162
- // the pipeline's exit criteria are also met (Bug 5 fix task state
2163
- // alone is a ralph-loop convenience, not authoritative phase signal;
2164
- // tasks could be marked skipped manually or by an editor).
2165
- if (this._allCurrentPhaseTasksComplete()) {
2166
- const pipeline = this.pipelines[this.currentPhase];
2167
- let exitMet = false;
2168
- try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
2169
- if (exitMet) {
2170
- const next = NEXT_PHASE[this.currentPhase];
2171
- if (next) {
2172
- const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
2173
- if (advanced) {
2174
- yield new AgentEvent({
2175
- type: "pipeline_event",
2176
- data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
2177
- });
2178
- }
2179
- }
2180
- }
2249
+ // v0.7.4 G0c: phase boundary = user checkpoint. Exit the
2250
+ // loop if the agent advanced phase during this task
2251
+ // even if pre-created tasks for the new phase are queued.
2252
+ // User sees current state and explicitly re-prompts to
2253
+ // begin the next phase. Marathon-style end-to-end
2254
+ // autonomy belongs to an external driver (Claude Code
2255
+ // /loop pattern), not the engine.
2256
+ if (this.currentPhase !== startingPhase) {
2257
+ this.eventLog.append("ralph_loop_exit", {
2258
+ reason: "phase_changed",
2259
+ from: startingPhase,
2260
+ to: this.currentPhase,
2261
+ });
2262
+ break;
2181
2263
  }
2182
2264
  }
2183
2265
  }
@@ -2203,6 +2285,12 @@ export class AgentEngine {
2203
2285
  // Initial turn: main agent reads user request, creates tasks.
2204
2286
  yield* this.runTurn(userMessage);
2205
2287
 
2288
+ // v0.7.5 G-F5 — TEMPORARILY DISABLED 2026-05-13 for overnight
2289
+ // marathon test. See _runTaskLoopSerial above for full rationale.
2290
+ // To re-enable F5: move `startingPhase` capture BEFORE the
2291
+ // initial runTurn, add post-runTurn exit check matching serial.
2292
+ const startingPhase = this.currentPhase;
2293
+
2206
2294
  const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
2207
2295
  if (!agentTool) {
2208
2296
  // Shouldn't happen (agent_tool is core), but fall back safely.
@@ -2227,6 +2315,9 @@ export class AgentEngine {
2227
2315
  const inFlight = new Map();
2228
2316
 
2229
2317
  const dispatch = async () => {
2318
+ // v0.7.4 G0c: stop dispatching if phase changed since loop start.
2319
+ // In-flight workers complete naturally; queue stays untouched.
2320
+ if (this.currentPhase !== startingPhase) return;
2230
2321
  while (inFlight.size < parallelism) {
2231
2322
  const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
2232
2323
  if (!task) return;
@@ -2362,23 +2453,15 @@ export class AgentEngine {
2362
2453
 
2363
2454
  this.saveState();
2364
2455
 
2365
- // After all workers done, check for phase auto-advance (same as serial path).
2366
- if (this._allCurrentPhaseTasksComplete()) {
2367
- const pipeline = this.pipelines[this.currentPhase];
2368
- let exitMet = false;
2369
- try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
2370
- if (exitMet) {
2371
- const next = NEXT_PHASE[this.currentPhase];
2372
- if (next) {
2373
- const advanced = this._advancePhase(next, "all parallel tasks completed + exit criteria met");
2374
- if (advanced) {
2375
- yield new AgentEvent({
2376
- type: "pipeline_event",
2377
- data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
2378
- });
2379
- }
2380
- }
2381
- }
2456
+ // v0.7.4 G0c: if phase changed during the parallel run, log the
2457
+ // checkpoint event for the audit trail. No auto-advance — that
2458
+ // belongs to the agent (phase_advance tool) or user re-prompt.
2459
+ if (this.currentPhase !== startingPhase) {
2460
+ this.eventLog.append("ralph_loop_exit", {
2461
+ reason: "phase_changed",
2462
+ from: startingPhase,
2463
+ to: this.currentPhase,
2464
+ });
2382
2465
  }
2383
2466
  }
2384
2467
 
@@ -80,6 +80,59 @@ function readJsonSafe(p) {
80
80
  try { return JSON.parse(fs.readFileSync(p, "utf-8")); } catch { return null; }
81
81
  }
82
82
 
83
+ function readFileSafe(p) {
84
+ try { return fs.readFileSync(p, "utf-8"); } catch { return ""; }
85
+ }
86
+
87
+ /**
88
+ * v0.7.5 G-H1: extract `source_rules: [...]` from YAML frontmatter.
89
+ *
90
+ * Supports both inline and block list forms:
91
+ * source_rules: [R001, R005, R007]
92
+ * source_rules:
93
+ * - R001
94
+ * - R005
95
+ *
96
+ * Used by milestone derivation to credit grouped/thematic skill folders
97
+ * + master workflows where the agent declares which rules are covered.
98
+ * Returns an array of canonical rule IDs (e.g., ["R001", "R005"]).
99
+ */
100
+ function parseSourceRulesFromFrontmatter(content) {
101
+ if (!content || typeof content !== "string") return [];
102
+ const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
103
+ if (!fmMatch) return [];
104
+ const fm = fmMatch[1];
105
+
106
+ // Inline form: source_rules: [R001, R005, "R007"]
107
+ const inlineMatch = fm.match(/^source_rules\s*:\s*\[([^\]]*)\]\s*$/m);
108
+ if (inlineMatch) {
109
+ return inlineMatch[1]
110
+ .split(",")
111
+ .map(s => s.trim().replace(/^["']|["']$/g, ""))
112
+ .filter(Boolean)
113
+ .map(s => canonicalRuleId(s) || s)
114
+ .filter(rid => /^R\d+$/i.test(rid))
115
+ .map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
116
+ }
117
+
118
+ // Block form: source_rules:\n - R001\n - R005
119
+ const blockMatch = fm.match(/^source_rules\s*:\s*\n((?:[ \t]+-\s+\S+\s*\n?)+)/m);
120
+ if (blockMatch) {
121
+ return blockMatch[1]
122
+ .split("\n")
123
+ .map(line => {
124
+ const m = line.match(/^[ \t]+-\s+["']?([^"'\s]+)["']?\s*$/);
125
+ return m ? m[1] : null;
126
+ })
127
+ .filter(Boolean)
128
+ .map(s => canonicalRuleId(s) || s)
129
+ .filter(rid => /^R\d+$/i.test(rid))
130
+ .map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
131
+ }
132
+
133
+ return [];
134
+ }
135
+
83
136
  function sha256OfFile(p) {
84
137
  try {
85
138
  const buf = fs.readFileSync(p);
@@ -239,6 +292,26 @@ export function deriveSkillAuthoringMilestones(workspace) {
239
292
  }
240
293
  }
241
294
  }
295
+
296
+ // v0.7.5 G-H1: also credit rule_ids declared in SKILL.md frontmatter
297
+ // `source_rules:` field. Agents using grouped/thematic skill folders
298
+ // (e.g., S01_compliance/, custodian_checks/) declare which rules
299
+ // their grouped check covers via frontmatter; engine derivation
300
+ // credits each declared rule_id. Audit found 资管 v0.7.4 session
301
+ // forced through skill_authoring → skill_testing because its 10 S*
302
+ // grouped folders weren't credited (rulesCovered=0/94).
303
+ if (hasSkillMd) {
304
+ try {
305
+ const skillMdFile = listChildFiles(skillPath).find(
306
+ (f) => f.name.toLowerCase() === "skill.md",
307
+ );
308
+ if (skillMdFile) {
309
+ const content = readFileSafe(path.join(skillPath, skillMdFile.name));
310
+ const sourceRules = parseSourceRulesFromFrontmatter(content);
311
+ for (const rid of sourceRules) ruleIdsCovered.add(rid);
312
+ }
313
+ } catch { /* best-effort */ }
314
+ }
242
315
  }
243
316
 
244
317
  return {
@@ -362,6 +435,37 @@ export function deriveDistillationMilestones(workspace) {
362
435
  const cwd = cwdOf(workspace);
363
436
  const wfRoot = path.join(cwd, "workflows");
364
437
  const workflowsCreated = [];
438
+ // v0.7.5 G-H1: also track rule IDs covered by workflows. Grouped/master
439
+ // workflows (e.g., 贷款 v0.7.4's master + R001 template) cover multiple
440
+ // rules; declare them via SKILL.md frontmatter `source_rules: [...]`.
441
+ // Engine credits each declared rule_id so workflowsCovered milestone
442
+ // matches catalog reality.
443
+ const ruleIdsCovered = new Set();
444
+
445
+ const creditWorkflowSourceRules = (workflowDir) => {
446
+ // Check for a SKILL.md (or workflow.md) declaring source_rules
447
+ const candidates = listChildFiles(workflowDir).filter(
448
+ (f) => /^(skill|workflow)\.md$/i.test(f.name),
449
+ );
450
+ for (const c of candidates) {
451
+ const content = readFileSafe(path.join(workflowDir, c.name));
452
+ for (const rid of parseSourceRulesFromFrontmatter(content)) {
453
+ ruleIdsCovered.add(rid);
454
+ }
455
+ }
456
+ // Also: per-workflow config.json may declare rule coverage
457
+ const configPath = path.join(workflowDir, "config.json");
458
+ if (fileExists(configPath)) {
459
+ const data = readJsonSafe(configPath);
460
+ const rules = Array.isArray(data?.source_rules) ? data.source_rules :
461
+ Array.isArray(data?.rules) ? data.rules :
462
+ Array.isArray(data?.rule_ids) ? data.rule_ids : [];
463
+ for (const r of rules) {
464
+ const canon = canonicalRuleId(String(r));
465
+ if (canon) ruleIdsCovered.add(canon);
466
+ }
467
+ }
468
+ };
365
469
 
366
470
  if (dirExists(wfRoot)) {
367
471
  // Two layouts seen in E2E #5:
@@ -375,16 +479,39 @@ export function deriveDistillationMilestones(workspace) {
375
479
  const sub = path.join(wfRoot, e.name);
376
480
  const hasPy = listChildFiles(sub).some((f) =>
377
481
  /workflow.*\.py$/i.test(f.name) || /^check.*\.py$/i.test(f.name));
378
- if (hasPy) workflowsCreated.push(e.name);
482
+ if (hasPy) {
483
+ workflowsCreated.push(e.name);
484
+ // Dir name might itself be a rule_id
485
+ const canon = canonicalRuleId(e.name);
486
+ if (canon) ruleIdsCovered.add(canon);
487
+ // Plus any frontmatter / config-declared source_rules
488
+ creditWorkflowSourceRules(sub);
489
+ }
379
490
  continue;
380
491
  }
381
492
  if (e.isFile()) {
382
493
  const m1 = e.name.match(/^(.+)_workflow\.py$/i);
383
- if (m1) { workflowsCreated.push(m1[1]); continue; }
494
+ if (m1) {
495
+ workflowsCreated.push(m1[1]);
496
+ const canon = canonicalRuleId(m1[1]);
497
+ if (canon) ruleIdsCovered.add(canon);
498
+ continue;
499
+ }
384
500
  const m2 = e.name.match(/^(.+)\.json$/i);
385
501
  if (m2) {
386
502
  const data = readJsonSafe(path.join(wfRoot, e.name));
387
- if (data && (data.rule_id || data.entry || data.type)) workflowsCreated.push(m2[1]);
503
+ if (data && (data.rule_id || data.entry || data.type)) {
504
+ workflowsCreated.push(m2[1]);
505
+ const canon = canonicalRuleId(data.rule_id || m2[1]);
506
+ if (canon) ruleIdsCovered.add(canon);
507
+ // Manifest-declared source_rules
508
+ const rules = Array.isArray(data.source_rules) ? data.source_rules :
509
+ Array.isArray(data.rules) ? data.rules : [];
510
+ for (const r of rules) {
511
+ const c2 = canonicalRuleId(String(r));
512
+ if (c2) ruleIdsCovered.add(c2);
513
+ }
514
+ }
388
515
  continue;
389
516
  }
390
517
  }
@@ -408,7 +535,16 @@ export function deriveDistillationMilestones(workspace) {
408
535
  }
409
536
  }
410
537
 
411
- return { workflowsCreated, workflowsTested };
538
+ return {
539
+ workflowsCreated,
540
+ workflowsTested,
541
+ // v0.7.5 G-H1: rule_ids the engine credits as having workflow coverage
542
+ // (either via dir name being a canonical rule_id, or via SKILL.md /
543
+ // workflow.md / config.json frontmatter declaring source_rules: [...]).
544
+ // Pipelines that check workflow coverage against the catalog should
545
+ // prefer ruleIdsCovered over workflowsCreated for grouped patterns.
546
+ ruleIdsCovered: [...ruleIdsCovered],
547
+ };
412
548
  }
413
549
 
414
550
  // ───────────────────────────────────────────────────────────────────
@@ -9,7 +9,10 @@ import { deriveBootstrapMilestones } from "./_milestone-derive.js";
9
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
10
  const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
11
11
 
12
- const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills"];
12
+ // v0.7.5: `skills` added to required dirs. Populated by SkillLoader
13
+ // .populateWorkspaceSkills() at bootstrap + on every phase transition
14
+ // with the phase's `available` skill set (per phase_skills.yaml).
15
+ const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills", "skills"];
13
16
 
14
17
  const DEFAULT_ENV = `# === KC Agent Project Configuration ===
15
18