kc-beta 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +10 -4
  2. package/bin/kc-beta.js +20 -6
  3. package/package.json +1 -1
  4. package/src/agent/engine.js +131 -60
  5. package/src/agent/pipelines/_milestone-derive.js +140 -4
  6. package/src/agent/pipelines/initializer.js +4 -1
  7. package/src/agent/skill-loader.js +433 -111
  8. package/src/agent/tools/consult-skill.js +112 -0
  9. package/src/agent/tools/copy-to-workspace.js +4 -3
  10. package/src/agent/tools/release.js +128 -1
  11. package/src/agent/tools/workspace-file.js +7 -7
  12. package/src/config.js +1 -1
  13. package/template/AGENT.md +182 -7
  14. package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  15. package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +1 -0
  16. package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
  17. package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  18. package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  19. package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  20. package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  21. package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  22. package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
  23. package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
  24. package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
  25. package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  26. package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  27. package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +1 -0
  28. package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
  29. package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  30. package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +1 -0
  31. package/template/skills/en/skill-creator/SKILL.md +2 -1
  32. package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +5 -4
  33. package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  34. package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
  35. package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  36. package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +17 -6
  37. package/template/skills/phase_skills.yaml +107 -0
  38. package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  39. package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +1 -0
  40. package/template/skills/{en/meta → zh}/compliance-judgment/SKILL.md +1 -0
  41. package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  42. package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  43. package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  44. package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  45. package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  46. package/template/skills/{en/meta → zh}/document-chunking/SKILL.md +1 -0
  47. package/template/skills/zh/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
  48. package/template/skills/{en/meta → zh}/entity-extraction/SKILL.md +1 -0
  49. package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  50. package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  51. package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +1 -0
  52. package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
  53. package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  54. package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +1 -0
  55. package/template/skills/zh/skill-creator/SKILL.md +2 -1
  56. package/template/skills/zh/skill-to-workflow/SKILL.md +190 -0
  57. package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  58. package/template/skills/zh/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
  59. package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  60. package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +15 -4
  61. package/template/CLAUDE.md +0 -150
  62. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
  63. /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  64. /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  65. /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  66. /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  67. /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  68. /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  69. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  70. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  71. /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  72. /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  73. /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  74. /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  75. /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
  76. /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  77. /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  78. /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  79. /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  80. /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  81. /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  82. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  83. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  84. /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  85. /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  86. /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  87. /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  88. /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
package/README.md CHANGED
@@ -216,8 +216,8 @@ Quality Thresholds, Language.
216
216
 
217
217
  ## Status
218
218
 
219
- **v0.7.3 — codex review patch release.** Latest line in the v0.7.x
220
- hardening track. Architectural payload from v0.6.0+ is still in place:
219
+ **v0.7.4phase-control fix + codex review re-attempt.** Architectural
220
+ payload from v0.6.0+ is still in place:
221
221
 
222
222
  - Parallel ralph-loop (up to 8 concurrent workers) with a heap-safety
223
223
  conformance gate
@@ -226,8 +226,14 @@ hardening track. Architectural payload from v0.6.0+ is still in place:
226
226
  - Agent-owned task board: the agent reads the rule list from
227
227
  `describeState`, decides decomposition (per-rule / grouped / range),
228
228
  and calls `TaskCreate` / `TaskUpdate` / `TaskComplete` to drive the
229
- Ralph loop. Source-context auto-attach pulls rule NL + evidence chunks
230
- + sibling rules into the prompt of each task as it runs.
229
+ Ralph loop **within the current phase only** (v0.7.4). Source-context
230
+ auto-attach pulls rule NL + evidence chunks + sibling rules into each
231
+ task's prompt.
232
+ - Phase boundaries = user checkpoints: the Ralph loop exits at every
233
+ phase transition, returning control to the user. The engine doesn't
234
+ auto-advance; phase advance is explicit (agent's `phase_advance` tool
235
+ call or user re-prompt). Marathon-style end-to-end autonomy lives
236
+ outside the engine.
231
237
  - Workspace file locking for shared coordination files (`rules/catalog.json`,
232
238
  `rules/manifest.json`, `refs/manifest.json`, `tasks.json`,
233
239
  `session-state.json`) — every writer goes through `withFileLock`.
package/bin/kc-beta.js CHANGED
@@ -34,17 +34,31 @@ if (parallelismOverride !== null) {
34
34
  // their own output.
35
35
  const __filename = fileURLToPath(import.meta.url);
36
36
  const __dirname = dirname(__filename);
37
- function printBanner() {
37
+ function readPkgVersion() {
38
38
  try {
39
39
  const pkg = JSON.parse(readFileSync(resolve(__dirname, "..", "package.json"), "utf-8"));
40
- const scriptPath = __filename;
41
- process.stderr.write(`⏵⏵ KC Agent CLI v${pkg.version} · ${scriptPath}\n`);
42
- } catch { /* package.json missing or unreadable — silent */ }
40
+ return pkg.version || "unknown";
41
+ } catch { return "unknown"; }
42
+ }
43
+ function printBanner() {
44
+ try {
45
+ const v = readPkgVersion();
46
+ process.stderr.write(`⏵⏵ KC Agent CLI v${v} · ${__filename}\n`);
47
+ } catch { /* silent */ }
43
48
  }
44
- const suppressBanner = args.includes("--version") || args.includes("-v") ||
45
- args.includes("--help") || args.includes("-h");
49
+ const isVersion = args.includes("--version") || args.includes("-v");
50
+ const isHelp = args.includes("--help") || args.includes("-h");
51
+ const suppressBanner = isVersion || isHelp;
46
52
  if (!suppressBanner) printBanner();
47
53
 
54
+ // v0.7.5 G-F1: `--version` prints version and exits. Previously the flag
55
+ // suppressed the banner but fell through to TUI launch (audit confirmed
56
+ // during v0.7.4 testing). Print + exit before the subcommand dispatch.
57
+ if (isVersion) {
58
+ process.stdout.write(`${readPkgVersion()}\n`);
59
+ process.exit(0);
60
+ }
61
+
48
62
  (async () => {
49
63
  if (subcommand === "onboard" || subcommand === "setup") {
50
64
  const { onboard } = await import("../src/cli/onboard.js");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kc-beta",
3
- "version": "0.7.3",
3
+ "version": "0.7.5",
4
4
  "description": "KC Agent — LLM document verification agent (pure Node.js CLI). Dual-licensed: PolyForm Noncommercial 1.0.0 for personal/noncommercial use; commercial license required for enterprise production. See LICENSE and LICENSE-COMMERCIAL.md.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -38,6 +38,7 @@ import { TierDowngradeTool } from "./tools/tier-downgrade.js";
38
38
  import { AgentTool } from "./tools/agent-tool.js";
39
39
  import { WebSearchTool } from "./tools/web-search.js";
40
40
  import { TaskCreateTool, TaskUpdateTool, TaskCompleteTool } from "./tools/task-board.js";
41
+ import { ConsultSkillTool } from "./tools/consult-skill.js";
41
42
  import { SkillLoader } from "./skill-loader.js";
42
43
  import { TaskManager } from "./task-manager.js";
43
44
  import { Scheduler } from "./scheduler.js";
@@ -234,6 +235,18 @@ export class AgentEngine {
234
235
  // Skill discovery (Claude Code pattern: index in context, full content on demand)
235
236
  this._skillLoader = new SkillLoader(config.language);
236
237
 
238
+ // v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
239
+ // available skill set. Symlink with copy fallback. Re-populated on
240
+ // every phase advance/retreat (see _advancePhase).
241
+ try {
242
+ const res = this._skillLoader.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
243
+ this.eventLog?.append?.("skills_populated", {
244
+ phase: res.phase,
245
+ populated: res.populated,
246
+ failures: res.failures,
247
+ });
248
+ } catch { /* best-effort; skills/ population is not a critical-path failure */ }
249
+
237
250
  // Register tools for initial phase
238
251
  this.toolRegistry = new ToolRegistry();
239
252
  this._registerToolsForPhase(this.currentPhase);
@@ -476,16 +489,27 @@ export class AgentEngine {
476
489
  () => this.currentPhase,
477
490
  ),
478
491
  new WebSearchTool(this.config.tavilyApiKey),
479
- // v0.7.3: completes the v0.7.0 "agent owns TaskBoard" design.
480
- // Skills already reference TaskCreate by name; these tools make
481
- // that contract truthful. See task-board.js + work-decomposition
482
- // SKILL.md. Skipped for subagents they don't own a task board
483
- // (taskManager is null in subagent scope, line 216).
492
+ // v0.7.4 (re-applied from v0.7.3 G2b): TaskCreate /
493
+ // TaskUpdate / TaskComplete agent populates the
494
+ // Ralph-loop queue for the CURRENT phase only. Phase
495
+ // boundaries exit the loop (v0.7.4 G0c). Skipped for
496
+ // subagents (taskManager null in subagent scope).
484
497
  ...(this.taskManager ? [
485
498
  new TaskCreateTool(this.workspace, this.taskManager),
486
499
  new TaskUpdateTool(this.workspace, this.taskManager),
487
500
  new TaskCompleteTool(this.workspace, this.taskManager),
488
501
  ] : []),
502
+ // v0.7.5: consult_skill loads a meta-skill body into conversation
503
+ // history on demand. Always-loaded skills are already in the
504
+ // system prompt via SkillLoader.formatForContext; this tool covers
505
+ // the "available" set for the current phase. Both main + subagents
506
+ // register their own — each has its own skillLoader + phase.
507
+ new ConsultSkillTool(
508
+ this.workspace,
509
+ this._skillLoader,
510
+ () => this.currentPhase,
511
+ this.eventLog,
512
+ ),
489
513
  ],
490
514
  // Distillation+ only (DISTILL mode)
491
515
  distill: [
@@ -1196,11 +1220,11 @@ export class AgentEngine {
1196
1220
  }
1197
1221
  this._totalTurns = (this._totalTurns || 0) + 1;
1198
1222
 
1199
- // Bug 4 trigger (1): re-check phase criteria at end of every turn —
1200
- // KC may have advanced state via conversation alone, without any
1201
- // tool that the pipeline narrowly watches.
1202
- const advancedEv = this._maybeAutoAdvance();
1203
- if (advancedEv) yield advancedEv;
1223
+ // v0.7.4 G0b: removed `_maybeAutoAdvance()` auto-fire here.
1224
+ // Phase advance is now 100% explicit (agent's `phase_advance`
1225
+ // tool, or user re-prompt). v0.7.3 phase-control regression
1226
+ // was caused by this edge-triggered auto-advance firing mid-
1227
+ // session and chaining into next phase without user check-in.
1204
1228
 
1205
1229
  this.eventLog.append("turn_complete", {});
1206
1230
  this.saveState();
@@ -1289,23 +1313,45 @@ export class AgentEngine {
1289
1313
 
1290
1314
  this.eventLog.append("tool_result", {
1291
1315
  name: tc.name,
1316
+ input: inputData,
1292
1317
  output: result.content || "",
1293
1318
  isError: result.isError,
1294
1319
  traceId: offload?.traceId || null,
1295
1320
  });
1296
1321
 
1297
- // D3a: trace skill invocations. When the agent reads a SKILL.md via
1298
- // workspace_file (the canonical way KC "uses" a skill, since skills
1299
- // are progressively-disclosed markdown), emit a skill_invoked event.
1300
- // Makes "which skills did KC actually consult?" answerable in post-run
1301
- // analysis — before this, skills were opaque to the event log.
1322
+ // v0.7.5 (G-F4): added `input` above so events.jsonl carries the
1323
+ // tool inputs (v0.7.4 G1c only patched the AgentEvent yield path,
1324
+ // missed the persistence path audit confirmed 0/453 + 0/946
1325
+ // tool_result events had `input` in v0.7.4 sessions).
1326
+
1327
+ // D3a: trace skill invocations. v0.7.5 (G-C6): only fire on
1328
+ // READS of meta-skill paths. Writes to rule_skills/<id>/SKILL.md
1329
+ // during skill_authoring are NOT skill invocations — they're the
1330
+ // agent producing its own deliverable. The old "(unknown)" spam
1331
+ // (100% of events in v0.7.1 + v0.7.4 sessions) is gone.
1332
+ //
1333
+ // Note: meta-skill body reads now happen via consult_skill, which
1334
+ // emits skill_invoked itself (with the real skill name). This
1335
+ // path-matching emission stays only as a fallback for any agent
1336
+ // that reads a SKILL.md path directly (out of pattern).
1302
1337
  try {
1338
+ const isRead =
1339
+ (tc.name === "workspace_file" && inputData?.operation === "read") ||
1340
+ (tc.name === "sandbox_exec" && /\b(cat|head|tail|less|grep|view|read)\b/.test(
1341
+ String(inputData?.command || "")
1342
+ ));
1303
1343
  if (
1304
1344
  !result.isError &&
1345
+ isRead &&
1305
1346
  (tc.name === "workspace_file" || tc.name === "sandbox_exec")
1306
1347
  ) {
1307
1348
  const p = String(inputData?.path || inputData?.command || "");
1308
- const skillMatch = p.match(/(?:template\/)?skills\/[a-z-]+\/(?:meta-meta|meta|skill-creator)\/([a-zA-Z0-9_-]+)(?:\/SKILL\.md|\/)?|\bSKILL\.md\b/);
1349
+ // v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
1350
+ // OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
1351
+ // Deep layout backward-compat preserved for any stragglers.
1352
+ const skillMatch = p.match(
1353
+ /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/SKILL\.md\b/
1354
+ ) || p.match(/\bSKILL\.md\b/);
1309
1355
  if (skillMatch) {
1310
1356
  const skillName = skillMatch[1] || "(unknown)";
1311
1357
  this.eventLog.append("skill_invoked", {
@@ -1386,12 +1432,9 @@ export class AgentEngine {
1386
1432
  }
1387
1433
  }
1388
1434
 
1389
- // Bug 4 fix: re-check exit criteria after every tool-result loop, not
1390
- // just from pipeline.onToolResult. The pipeline's describeState() (called
1391
- // on every turn) already re-scans, so exitCriteriaMet() is accurate; we
1392
- // just need to act on it eagerly.
1393
- const ev = this._maybeAutoAdvance();
1394
- if (ev) yield ev;
1435
+ // v0.7.4 G0b: removed post-tool `_maybeAutoAdvance()` call.
1436
+ // Phase advance is now 100% explicit. See `_runTaskLoopSerial`
1437
+ // phase-change-exit guard for the loop-level checkpoint.
1395
1438
 
1396
1439
  } catch (err) {
1397
1440
  // A8: If the LLM client tagged the stream termination reason, pass
@@ -1541,6 +1584,20 @@ export class AgentEngine {
1541
1584
  this.workspace.setPhase(this.currentPhase);
1542
1585
  this._createTasksForPhase(this.currentPhase);
1543
1586
 
1587
+ // v0.7.5 G-D2: re-populate <workspace>/skills/ with the new phase's
1588
+ // available set. Symlinks are wiped + recreated. Agent's `ls skills/`
1589
+ // and any read-by-path reflects the current phase's allowlist.
1590
+ try {
1591
+ const res = this._skillLoader?.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
1592
+ if (res) {
1593
+ this.eventLog.append("skills_populated", {
1594
+ phase: res.phase,
1595
+ populated: res.populated,
1596
+ failures: res.failures,
1597
+ });
1598
+ }
1599
+ } catch { /* best-effort */ }
1600
+
1544
1601
  // v0.7.0 N (#94): give the entered pipeline a chance to do
1545
1602
  // phase-entry setup. Used by finalization to copy the release
1546
1603
  // template into output/releases/v1/. Other pipelines are no-ops.
@@ -2109,7 +2166,26 @@ export class AgentEngine {
2109
2166
  // Run the initial turn (user's request)
2110
2167
  yield* this.runTurn(userMessage);
2111
2168
 
2112
- // Auto-continue through pending tasks
2169
+ // v0.7.5 G-F5 TEMPORARILY DISABLED 2026-05-13 for overnight
2170
+ // marathon test. The strict capture-BEFORE form lets every user
2171
+ // prompt advance only one phase, which blocks unattended overnight
2172
+ // sessions. v0.7.4-style capture-AFTER (below) allows the agent
2173
+ // to chain multiple phase_advance calls within the initial runTurn,
2174
+ // then exits the while loop on subsequent phase changes.
2175
+ //
2176
+ // TODO: after the overnight E2E results come in (2026-05-14), decide:
2177
+ // (a) re-enable F5 strict and build marathon as a separate mode
2178
+ // (external driver pattern, e.g., /loop-kc command) — locked
2179
+ // earlier decision per harness-research § 7
2180
+ // (b) keep capture-AFTER permanently and accept multi-phase prompts
2181
+ //
2182
+ // To re-enable F5: move `const startingPhase = this.currentPhase;`
2183
+ // to BEFORE the `yield* this.runTurn(userMessage);` above, and add
2184
+ // the matching `if (this.currentPhase !== startingPhase) { return; }`
2185
+ // block between runTurn and the while loop.
2186
+ const startingPhase = this.currentPhase;
2187
+
2188
+ // Auto-continue through pending tasks (within current phase only)
2113
2189
  while (this.taskManager.getNextPending()) {
2114
2190
  // v0.7.0 #93: budget-aware compact threshold. The old
2115
2191
  // `messages.length > 15` was message-count-based and frozen
@@ -2170,26 +2246,20 @@ export class AgentEngine {
2170
2246
  },
2171
2247
  });
2172
2248
 
2173
- // Bug 4 trigger (2): auto-advance when all phase tasks are done AND
2174
- // the pipeline's exit criteria are also met (Bug 5 fix task state
2175
- // alone is a ralph-loop convenience, not authoritative phase signal;
2176
- // tasks could be marked skipped manually or by an editor).
2177
- if (this._allCurrentPhaseTasksComplete()) {
2178
- const pipeline = this.pipelines[this.currentPhase];
2179
- let exitMet = false;
2180
- try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
2181
- if (exitMet) {
2182
- const next = NEXT_PHASE[this.currentPhase];
2183
- if (next) {
2184
- const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
2185
- if (advanced) {
2186
- yield new AgentEvent({
2187
- type: "pipeline_event",
2188
- data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
2189
- });
2190
- }
2191
- }
2192
- }
2249
+ // v0.7.4 G0c: phase boundary = user checkpoint. Exit the
2250
+ // loop if the agent advanced phase during this task
2251
+ // even if pre-created tasks for the new phase are queued.
2252
+ // User sees current state and explicitly re-prompts to
2253
+ // begin the next phase. Marathon-style end-to-end
2254
+ // autonomy belongs to an external driver (Claude Code
2255
+ // /loop pattern), not the engine.
2256
+ if (this.currentPhase !== startingPhase) {
2257
+ this.eventLog.append("ralph_loop_exit", {
2258
+ reason: "phase_changed",
2259
+ from: startingPhase,
2260
+ to: this.currentPhase,
2261
+ });
2262
+ break;
2193
2263
  }
2194
2264
  }
2195
2265
  }
@@ -2215,6 +2285,12 @@ export class AgentEngine {
2215
2285
  // Initial turn: main agent reads user request, creates tasks.
2216
2286
  yield* this.runTurn(userMessage);
2217
2287
 
2288
+ // v0.7.5 G-F5 — TEMPORARILY DISABLED 2026-05-13 for overnight
2289
+ // marathon test. See _runTaskLoopSerial above for full rationale.
2290
+ // To re-enable F5: move `startingPhase` capture BEFORE the
2291
+ // initial runTurn, add post-runTurn exit check matching serial.
2292
+ const startingPhase = this.currentPhase;
2293
+
2218
2294
  const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
2219
2295
  if (!agentTool) {
2220
2296
  // Shouldn't happen (agent_tool is core), but fall back safely.
@@ -2239,6 +2315,9 @@ export class AgentEngine {
2239
2315
  const inFlight = new Map();
2240
2316
 
2241
2317
  const dispatch = async () => {
2318
+ // v0.7.4 G0c: stop dispatching if phase changed since loop start.
2319
+ // In-flight workers complete naturally; queue stays untouched.
2320
+ if (this.currentPhase !== startingPhase) return;
2242
2321
  while (inFlight.size < parallelism) {
2243
2322
  const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
2244
2323
  if (!task) return;
@@ -2374,23 +2453,15 @@ export class AgentEngine {
2374
2453
 
2375
2454
  this.saveState();
2376
2455
 
2377
- // After all workers done, check for phase auto-advance (same as serial path).
2378
- if (this._allCurrentPhaseTasksComplete()) {
2379
- const pipeline = this.pipelines[this.currentPhase];
2380
- let exitMet = false;
2381
- try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
2382
- if (exitMet) {
2383
- const next = NEXT_PHASE[this.currentPhase];
2384
- if (next) {
2385
- const advanced = this._advancePhase(next, "all parallel tasks completed + exit criteria met");
2386
- if (advanced) {
2387
- yield new AgentEvent({
2388
- type: "pipeline_event",
2389
- data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
2390
- });
2391
- }
2392
- }
2393
- }
2456
+ // v0.7.4 G0c: if phase changed during the parallel run, log the
2457
+ // checkpoint event for the audit trail. No auto-advance — that
2458
+ // belongs to the agent (phase_advance tool) or user re-prompt.
2459
+ if (this.currentPhase !== startingPhase) {
2460
+ this.eventLog.append("ralph_loop_exit", {
2461
+ reason: "phase_changed",
2462
+ from: startingPhase,
2463
+ to: this.currentPhase,
2464
+ });
2394
2465
  }
2395
2466
  }
2396
2467
 
@@ -80,6 +80,59 @@ function readJsonSafe(p) {
80
80
  try { return JSON.parse(fs.readFileSync(p, "utf-8")); } catch { return null; }
81
81
  }
82
82
 
83
+ function readFileSafe(p) {
84
+ try { return fs.readFileSync(p, "utf-8"); } catch { return ""; }
85
+ }
86
+
87
+ /**
88
+ * v0.7.5 G-H1: extract `source_rules: [...]` from YAML frontmatter.
89
+ *
90
+ * Supports both inline and block list forms:
91
+ * source_rules: [R001, R005, R007]
92
+ * source_rules:
93
+ * - R001
94
+ * - R005
95
+ *
96
+ * Used by milestone derivation to credit grouped/thematic skill folders
97
+ * + master workflows where the agent declares which rules are covered.
98
+ * Returns an array of canonical rule IDs (e.g., ["R001", "R005"]).
99
+ */
100
+ function parseSourceRulesFromFrontmatter(content) {
101
+ if (!content || typeof content !== "string") return [];
102
+ const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
103
+ if (!fmMatch) return [];
104
+ const fm = fmMatch[1];
105
+
106
+ // Inline form: source_rules: [R001, R005, "R007"]
107
+ const inlineMatch = fm.match(/^source_rules\s*:\s*\[([^\]]*)\]\s*$/m);
108
+ if (inlineMatch) {
109
+ return inlineMatch[1]
110
+ .split(",")
111
+ .map(s => s.trim().replace(/^["']|["']$/g, ""))
112
+ .filter(Boolean)
113
+ .map(s => canonicalRuleId(s) || s)
114
+ .filter(rid => /^R\d+$/i.test(rid))
115
+ .map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
116
+ }
117
+
118
+ // Block form: source_rules:\n - R001\n - R005
119
+ const blockMatch = fm.match(/^source_rules\s*:\s*\n((?:[ \t]+-\s+\S+\s*\n?)+)/m);
120
+ if (blockMatch) {
121
+ return blockMatch[1]
122
+ .split("\n")
123
+ .map(line => {
124
+ const m = line.match(/^[ \t]+-\s+["']?([^"'\s]+)["']?\s*$/);
125
+ return m ? m[1] : null;
126
+ })
127
+ .filter(Boolean)
128
+ .map(s => canonicalRuleId(s) || s)
129
+ .filter(rid => /^R\d+$/i.test(rid))
130
+ .map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
131
+ }
132
+
133
+ return [];
134
+ }
135
+
83
136
  function sha256OfFile(p) {
84
137
  try {
85
138
  const buf = fs.readFileSync(p);
@@ -239,6 +292,26 @@ export function deriveSkillAuthoringMilestones(workspace) {
239
292
  }
240
293
  }
241
294
  }
295
+
296
+ // v0.7.5 G-H1: also credit rule_ids declared in SKILL.md frontmatter
297
+ // `source_rules:` field. Agents using grouped/thematic skill folders
298
+ // (e.g., S01_compliance/, custodian_checks/) declare which rules
299
+ // their grouped check covers via frontmatter; engine derivation
300
+ // credits each declared rule_id. Audit found 资管 v0.7.4 session
301
+ // forced through skill_authoring → skill_testing because its 10 S*
302
+ // grouped folders weren't credited (rulesCovered=0/94).
303
+ if (hasSkillMd) {
304
+ try {
305
+ const skillMdFile = listChildFiles(skillPath).find(
306
+ (f) => f.name.toLowerCase() === "skill.md",
307
+ );
308
+ if (skillMdFile) {
309
+ const content = readFileSafe(path.join(skillPath, skillMdFile.name));
310
+ const sourceRules = parseSourceRulesFromFrontmatter(content);
311
+ for (const rid of sourceRules) ruleIdsCovered.add(rid);
312
+ }
313
+ } catch { /* best-effort */ }
314
+ }
242
315
  }
243
316
 
244
317
  return {
@@ -362,6 +435,37 @@ export function deriveDistillationMilestones(workspace) {
362
435
  const cwd = cwdOf(workspace);
363
436
  const wfRoot = path.join(cwd, "workflows");
364
437
  const workflowsCreated = [];
438
+ // v0.7.5 G-H1: also track rule IDs covered by workflows. Grouped/master
439
+ // workflows (e.g., 贷款 v0.7.4's master + R001 template) cover multiple
440
+ // rules; declare them via SKILL.md frontmatter `source_rules: [...]`.
441
+ // Engine credits each declared rule_id so workflowsCovered milestone
442
+ // matches catalog reality.
443
+ const ruleIdsCovered = new Set();
444
+
445
+ const creditWorkflowSourceRules = (workflowDir) => {
446
+ // Check for a SKILL.md (or workflow.md) declaring source_rules
447
+ const candidates = listChildFiles(workflowDir).filter(
448
+ (f) => /^(skill|workflow)\.md$/i.test(f.name),
449
+ );
450
+ for (const c of candidates) {
451
+ const content = readFileSafe(path.join(workflowDir, c.name));
452
+ for (const rid of parseSourceRulesFromFrontmatter(content)) {
453
+ ruleIdsCovered.add(rid);
454
+ }
455
+ }
456
+ // Also: per-workflow config.json may declare rule coverage
457
+ const configPath = path.join(workflowDir, "config.json");
458
+ if (fileExists(configPath)) {
459
+ const data = readJsonSafe(configPath);
460
+ const rules = Array.isArray(data?.source_rules) ? data.source_rules :
461
+ Array.isArray(data?.rules) ? data.rules :
462
+ Array.isArray(data?.rule_ids) ? data.rule_ids : [];
463
+ for (const r of rules) {
464
+ const canon = canonicalRuleId(String(r));
465
+ if (canon) ruleIdsCovered.add(canon);
466
+ }
467
+ }
468
+ };
365
469
 
366
470
  if (dirExists(wfRoot)) {
367
471
  // Two layouts seen in E2E #5:
@@ -375,16 +479,39 @@ export function deriveDistillationMilestones(workspace) {
375
479
  const sub = path.join(wfRoot, e.name);
376
480
  const hasPy = listChildFiles(sub).some((f) =>
377
481
  /workflow.*\.py$/i.test(f.name) || /^check.*\.py$/i.test(f.name));
378
- if (hasPy) workflowsCreated.push(e.name);
482
+ if (hasPy) {
483
+ workflowsCreated.push(e.name);
484
+ // Dir name might itself be a rule_id
485
+ const canon = canonicalRuleId(e.name);
486
+ if (canon) ruleIdsCovered.add(canon);
487
+ // Plus any frontmatter / config-declared source_rules
488
+ creditWorkflowSourceRules(sub);
489
+ }
379
490
  continue;
380
491
  }
381
492
  if (e.isFile()) {
382
493
  const m1 = e.name.match(/^(.+)_workflow\.py$/i);
383
- if (m1) { workflowsCreated.push(m1[1]); continue; }
494
+ if (m1) {
495
+ workflowsCreated.push(m1[1]);
496
+ const canon = canonicalRuleId(m1[1]);
497
+ if (canon) ruleIdsCovered.add(canon);
498
+ continue;
499
+ }
384
500
  const m2 = e.name.match(/^(.+)\.json$/i);
385
501
  if (m2) {
386
502
  const data = readJsonSafe(path.join(wfRoot, e.name));
387
- if (data && (data.rule_id || data.entry || data.type)) workflowsCreated.push(m2[1]);
503
+ if (data && (data.rule_id || data.entry || data.type)) {
504
+ workflowsCreated.push(m2[1]);
505
+ const canon = canonicalRuleId(data.rule_id || m2[1]);
506
+ if (canon) ruleIdsCovered.add(canon);
507
+ // Manifest-declared source_rules
508
+ const rules = Array.isArray(data.source_rules) ? data.source_rules :
509
+ Array.isArray(data.rules) ? data.rules : [];
510
+ for (const r of rules) {
511
+ const c2 = canonicalRuleId(String(r));
512
+ if (c2) ruleIdsCovered.add(c2);
513
+ }
514
+ }
388
515
  continue;
389
516
  }
390
517
  }
@@ -408,7 +535,16 @@ export function deriveDistillationMilestones(workspace) {
408
535
  }
409
536
  }
410
537
 
411
- return { workflowsCreated, workflowsTested };
538
+ return {
539
+ workflowsCreated,
540
+ workflowsTested,
541
+ // v0.7.5 G-H1: rule_ids the engine credits as having workflow coverage
542
+ // (either via dir name being a canonical rule_id, or via SKILL.md /
543
+ // workflow.md / config.json frontmatter declaring source_rules: [...]).
544
+ // Pipelines that check workflow coverage against the catalog should
545
+ // prefer ruleIdsCovered over workflowsCreated for grouped patterns.
546
+ ruleIdsCovered: [...ruleIdsCovered],
547
+ };
412
548
  }
413
549
 
414
550
  // ───────────────────────────────────────────────────────────────────
@@ -9,7 +9,10 @@ import { deriveBootstrapMilestones } from "./_milestone-derive.js";
9
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
10
  const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
11
11
 
12
- const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills"];
12
+ // v0.7.5: `skills` added to required dirs. Populated by SkillLoader
13
+ // .populateWorkspaceSkills() at bootstrap + on every phase transition
14
+ // with the phase's `available` skill set (per phase_skills.yaml).
15
+ const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills", "skills"];
13
16
 
14
17
  const DEFAULT_ENV = `# === KC Agent Project Configuration ===
15
18