kc-beta 0.7.3 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +57 -4
  2. package/bin/kc-beta.js +20 -6
  3. package/package.json +3 -2
  4. package/src/agent/engine.js +493 -132
  5. package/src/agent/pipelines/_advance-hints.js +92 -0
  6. package/src/agent/pipelines/_milestone-derive.js +387 -17
  7. package/src/agent/pipelines/initializer.js +4 -1
  8. package/src/agent/pipelines/skill-authoring.js +30 -1
  9. package/src/agent/skill-loader.js +433 -111
  10. package/src/agent/tools/agent-tool.js +2 -2
  11. package/src/agent/tools/consult-skill.js +127 -0
  12. package/src/agent/tools/copy-to-workspace.js +4 -3
  13. package/src/agent/tools/dashboard-render.js +48 -1
  14. package/src/agent/tools/document-parse.js +31 -2
  15. package/src/agent/tools/phase-advance.js +17 -13
  16. package/src/agent/tools/release.js +378 -8
  17. package/src/agent/tools/sandbox-exec.js +65 -8
  18. package/src/agent/tools/worker-llm-call.js +95 -15
  19. package/src/agent/tools/workspace-file.js +7 -7
  20. package/src/agent/workspace.js +25 -4
  21. package/src/cli/components.js +4 -1
  22. package/src/cli/index.js +97 -1
  23. package/src/config.js +20 -3
  24. package/src/marathon/driver.js +217 -0
  25. package/src/marathon/prompts.js +93 -0
  26. package/template/.env.template +16 -0
  27. package/template/AGENT.md +182 -7
  28. package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  29. package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
  30. package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
  31. package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  32. package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  33. package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  34. package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  35. package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  36. package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
  37. package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
  38. package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
  39. package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  40. package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  41. package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
  42. package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
  43. package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  44. package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
  45. package/template/skills/en/skill-creator/SKILL.md +2 -1
  46. package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +58 -4
  47. package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  48. package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
  49. package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  50. package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +51 -6
  51. package/template/skills/phase_skills.yaml +112 -0
  52. package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
  53. package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
  54. package/template/skills/zh/compliance-judgment/SKILL.md +83 -0
  55. package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
  56. package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
  57. package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
  58. package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
  59. package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
  60. package/template/skills/zh/document-chunking/SKILL.md +40 -0
  61. package/template/skills/zh/document-parsing/SKILL.md +102 -0
  62. package/template/skills/zh/entity-extraction/SKILL.md +121 -0
  63. package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
  64. package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
  65. package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
  66. package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
  67. package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
  68. package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
  69. package/template/skills/zh/skill-creator/SKILL.md +205 -200
  70. package/template/skills/zh/skill-to-workflow/SKILL.md +243 -0
  71. package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
  72. package/template/skills/zh/tree-processing/SKILL.md +126 -0
  73. package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
  74. package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +49 -4
  75. package/template/workflows/common/llm_client.py +168 -0
  76. package/template/workflows/common/utils.py +132 -0
  77. package/template/CLAUDE.md +0 -150
  78. package/template/skills/en/meta/compliance-judgment/SKILL.md +0 -82
  79. package/template/skills/en/meta/document-chunking/SKILL.md +0 -32
  80. package/template/skills/en/meta/entity-extraction/SKILL.md +0 -120
  81. package/template/skills/zh/meta/document-parsing/SKILL.md +0 -101
  82. package/template/skills/zh/meta/tree-processing/SKILL.md +0 -121
  83. package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
  84. /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  85. /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  86. /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  87. /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  88. /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  89. /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  90. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  91. /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  92. /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  93. /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  94. /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  95. /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  96. /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
  97. /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
  98. /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
  99. /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
  100. /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
  101. /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
  102. /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
  103. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
  104. /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
  105. /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
  106. /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
  107. /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
  108. /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
  109. /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
@@ -5,6 +5,9 @@ import {
5
5
  deriveSkillAuthoringMilestones,
6
6
  deriveSkillTestingMilestones,
7
7
  } from "./pipelines/_milestone-derive.js";
8
+ import { getPrescriptiveHint } from "./pipelines/_advance-hints.js";
9
+ import { loadEnvFile } from "../config.js";
10
+ import { MarathonDriver } from "../marathon/driver.js";
8
11
  import { ContextAssembler } from "./context.js";
9
12
  import { ConversationHistory } from "./history.js";
10
13
  import { findSafeSplitPoint } from "./message-utils.js";
@@ -38,6 +41,7 @@ import { TierDowngradeTool } from "./tools/tier-downgrade.js";
38
41
  import { AgentTool } from "./tools/agent-tool.js";
39
42
  import { WebSearchTool } from "./tools/web-search.js";
40
43
  import { TaskCreateTool, TaskUpdateTool, TaskCompleteTool } from "./tools/task-board.js";
44
+ import { ConsultSkillTool } from "./tools/consult-skill.js";
41
45
  import { SkillLoader } from "./skill-loader.js";
42
46
  import { TaskManager } from "./task-manager.js";
43
47
  import { Scheduler } from "./scheduler.js";
@@ -165,6 +169,10 @@ export class AgentEngine {
165
169
  { gitAutoCommit: config.gitAutoCommit !== false },
166
170
  );
167
171
 
172
+ // v0.8 P1-B: workspace .env overlay deferred until after eventLog
173
+ // init (see _overlayWorkspaceEnv call below). Workspace dir is
174
+ // known here, but the overlay's audit event needs eventLog.
175
+
168
176
  // For sub-agents, persistence (history/events/state) lives under
169
177
  // sub_agents/<scope>/ instead of the workspace root. Workspace files
170
178
  // (rules/, rule_skills/, workflows/) stay shared.
@@ -202,6 +210,26 @@ export class AgentEngine {
202
210
  // Event log (append-only JSONL, source of truth)
203
211
  this.eventLog = new EventLog(this.workspace.cwd, { logDir });
204
212
 
213
+ // v0.8 P1-B: overlay workspace .env onto this.config. cli/index.js
214
+ // calls loadSettings() without a workspace path because the path
215
+ // isn't known until this constructor runs. Result: workspace .env's
216
+ // VLM_TIER1 / OCR_MODEL_TIER1 / TIER1..4 / LANGUAGE were silently
217
+ // ignored, with gc defaults (~/.kc_agent/config.json) winning.
218
+ // 资管 audit § 9.2 finding 7: user's OCR_MODEL_TIER1=zai-org/GLM-4.6V
219
+ // never reached document_parse; error messages quoted gc's
220
+ // Qwen3-VL-235B default. Overlay reads workspace .env, fills in
221
+ // fields where current config came from gc fallback (penv-set values
222
+ // still win because loadSettings applied them).
223
+ try { this._overlayWorkspaceEnv(); } catch { /* best-effort */ }
224
+
225
+ // v0.8.1 P8-A: inline marathon driver. v0.8.0's separate-process
226
+ // kc-marathon CLI + filesystem-watcher IPC died silently when the
227
+ // launching terminal closed (E2E #11 audit). Redesigned as an inline
228
+ // state machine activated via /marathon slash command. No filesystem
229
+ // marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
230
+ // cleared by exitMarathonMode(). Query via this.marathonDriver.
231
+ this.marathonDriver = null;
232
+
205
233
  // Context windowing
206
234
  this.contextWindow = new ContextWindow({
207
235
  contextLimit: config.kcContextLimit || 200000,
@@ -215,8 +243,6 @@ export class AgentEngine {
215
243
  // so they don't get a TaskManager.
216
244
  this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
217
245
 
218
- // Build all tool instances (but register phase-appropriate ones)
219
- this._buildTools = this._createAllTools();
220
246
  this._phaseSummaries = [];
221
247
 
222
248
  // Pipeline system (meta-meta skills as code)
@@ -232,22 +258,47 @@ export class AgentEngine {
232
258
  };
233
259
 
234
260
  // Skill discovery (Claude Code pattern: index in context, full content on demand)
261
+ // v0.7.5 — must initialize BEFORE _createAllTools() because ConsultSkillTool
262
+ // takes this._skillLoader as a constructor arg. Was a v0.7.5 init-order bug:
263
+ // _createAllTools ran first, passed undefined skillLoader to ConsultSkillTool,
264
+ // calls to consult_skill threw "Cannot read properties of undefined".
235
265
  this._skillLoader = new SkillLoader(config.language);
236
266
 
267
+ // Build all tool instances (but register phase-appropriate ones)
268
+ this._buildTools = this._createAllTools();
269
+
270
+ // v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
271
+ // available skill set. Symlink with copy fallback. Re-populated on
272
+ // every phase advance/retreat (see _advancePhase).
273
+ try {
274
+ const res = this._skillLoader.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
275
+ this.eventLog?.append?.("skills_populated", {
276
+ phase: res.phase,
277
+ populated: res.populated,
278
+ failures: res.failures,
279
+ });
280
+ } catch { /* best-effort; skills/ population is not a critical-path failure */ }
281
+
282
+ // v0.8.1 P10-A: auto-populate <workspace>/workflows/common/llm_client.py
283
+ // from the template. Idempotent (skips if file already exists). Covers
284
+ // the bench-corpus flow where `kc-beta init` was bypassed. v0.8.0
285
+ // shipped this shim as embedded source in skill-to-workflow teaching;
286
+ // E2E #11 audits found BOTH agents ignored the teaching and wrote
287
+ // their own (non-canonical) llm_client.py. Shipping it as a template
288
+ // file the agent finds via filesystem walk is more robust.
289
+ try { this._populateWorkspaceCommonShims(); } catch { /* best-effort */ }
290
+
237
291
  // Register tools for initial phase
238
292
  this.toolRegistry = new ToolRegistry();
239
293
  this._registerToolsForPhase(this.currentPhase);
240
294
 
241
- // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
242
- // phase so the first real false→true flip inside onToolResult triggers an
243
- // advance even when the user launches from a pre-populated workspace
244
- // whose exit criteria already happen to be met at boot.
245
- // resume() re-primes this from the restored pipeline state (see ~L566),
246
- // which is the correct behaviour there: resumed sessions that were already
247
- // past this phase shouldn't re-fire.
248
- this._lastReady = Object.fromEntries(
249
- Object.keys(this.pipelines).map((p) => [p, false]),
250
- );
295
+ // v0.8 P1-D: removed `_lastReady` edge-trigger state. It was the
296
+ // bookkeeping for `_maybeAutoAdvance`, which v0.7.4 G0b decommissioned
297
+ // (all call sites removed because v0.7.3's mid-session auto-advance
298
+ // chain regression was caused by it). The method definition itself
299
+ // is also gone in P1-D. Phase advance is now 100% explicit: agent's
300
+ // `phase_advance` tool or user re-prompt. Resume + rollback paths
301
+ // that previously re-primed `_lastReady` are no-ops now.
251
302
 
252
303
  // B0.1: Heap sampler. Parent engines only — sub-agents share a process
253
304
  // with the parent and would double-log. Writes a single JSONL line
@@ -258,6 +309,111 @@ export class AgentEngine {
258
309
  this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
259
310
  }
260
311
 
312
+ /**
313
+ * v0.8 P1-B: overlay workspace .env onto this.config now that
314
+ * this.workspace.cwd is known. Only fills in fields where the current
315
+ * config value was a gc fallback (empty OR the gc default) — does NOT
316
+ * override fields that came from process.env (those win at
317
+ * loadSettings() time and stay winning).
318
+ *
319
+ * Without this overlay, workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 /
320
+ * TIER1..4 / LANGUAGE are silently ignored — the v0.7.4 G1b OCR_MODEL_TIER1
321
+ * alias fix landed at the config layer but never reached the runtime
322
+ * because loadSettings() is called without a workspace path.
323
+ */
324
+ /**
325
+ * v0.8.1 P10-A: copy canonical `workflows/common/*.py` shims from the
326
+ * bundled template if they're missing in the workspace. Provides
327
+ * `llm_client.py` (worker LLM HTTP shim, provider-agnostic) and
328
+ * `utils.py` (strip_annotations + helpers). Idempotent — never
329
+ * overwrites existing files (agent edits stay intact).
330
+ *
331
+ * Runs at engine init. Covers bench-corpus mode where `kc-beta init`
332
+ * doesn't run; init-flow workspaces already have these from copyDir.
333
+ */
334
+ _populateWorkspaceCommonShims() {
335
+ const __dirname = path.dirname(new URL(import.meta.url).pathname);
336
+ const templateRoot = path.resolve(__dirname, "..", "..", "template", "workflows", "common");
337
+ if (!fs.existsSync(templateRoot)) return;
338
+
339
+ const targetRoot = path.join(this.workspace.cwd, "workflows", "common");
340
+ fs.mkdirSync(targetRoot, { recursive: true });
341
+
342
+ const copied = [];
343
+ const skipped = [];
344
+ for (const entry of fs.readdirSync(templateRoot)) {
345
+ if (!entry.endsWith(".py") || entry.startsWith(".")) continue;
346
+ const srcPath = path.join(templateRoot, entry);
347
+ const dstPath = path.join(targetRoot, entry);
348
+ if (fs.existsSync(dstPath)) {
349
+ skipped.push(entry);
350
+ continue;
351
+ }
352
+ try {
353
+ fs.copyFileSync(srcPath, dstPath);
354
+ copied.push(entry);
355
+ } catch { /* best-effort */ }
356
+ }
357
+
358
+ if (copied.length > 0) {
359
+ try {
360
+ this.eventLog?.append?.("workflows_common_populated", { copied, skipped });
361
+ } catch { /* best-effort */ }
362
+ }
363
+ }
364
+
365
+ _overlayWorkspaceEnv() {
366
+ if (!this.workspace?.cwd) return;
367
+ const envPath = path.join(this.workspace.cwd, ".env");
368
+ if (!fs.existsSync(envPath)) return;
369
+ let wsEnv;
370
+ try { wsEnv = loadEnvFile(envPath); } catch { return; }
371
+ if (!wsEnv || typeof wsEnv !== "object") return;
372
+
373
+ // VLM tiers — workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 wins over
374
+ // gc's vlm_tiers.tier1 default. process.env precedence preserved
375
+ // because loadSettings already applied it; we only fill in slots
376
+ // that fell through to gc-or-empty.
377
+ const overlays = [
378
+ { configKey: "vlmTier1", envKey: ["VLM_TIER1", "OCR_MODEL_TIER1"] },
379
+ { configKey: "vlmTier2", envKey: ["VLM_TIER2", "OCR_MODEL_TIER2"] },
380
+ { configKey: "vlmTier3", envKey: ["VLM_TIER3", "OCR_MODEL_TIER3"] },
381
+ { configKey: "tier1", envKey: ["TIER1"] },
382
+ { configKey: "tier2", envKey: ["TIER2"] },
383
+ { configKey: "tier3", envKey: ["TIER3"] },
384
+ { configKey: "tier4", envKey: ["TIER4"] },
385
+ { configKey: "language", envKey: ["LANGUAGE"] },
386
+ ];
387
+
388
+ const applied = [];
389
+ for (const { configKey, envKey } of overlays) {
390
+ // Find first non-empty workspace .env value for this config key
391
+ let wsValue = "";
392
+ for (const k of envKey) {
393
+ if (wsEnv[k]) { wsValue = wsEnv[k]; break; }
394
+ }
395
+ if (!wsValue) continue;
396
+ // Skip if process.env has the same key set — penv already won
397
+ const penvWon = envKey.some((k) => process.env[k] && process.env[k] !== wsValue);
398
+ if (penvWon) continue;
399
+ // Apply the workspace value
400
+ if (this.config[configKey] !== wsValue) {
401
+ applied.push({ key: configKey, from: this.config[configKey] || "(empty)", to: wsValue });
402
+ this.config[configKey] = wsValue;
403
+ }
404
+ }
405
+
406
+ // Audit visibility: emit a one-time event listing what was overlaid.
407
+ if (applied.length > 0) {
408
+ try {
409
+ this.eventLog?.append?.("workspace_env_overlay", {
410
+ envPath: path.relative(this.workspace.cwd, envPath),
411
+ fields: applied,
412
+ });
413
+ } catch { /* best-effort */ }
414
+ }
415
+ }
416
+
261
417
  /**
262
418
  * Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
263
419
  * Returns a stop fn. Timer is .unref()'d so it never keeps the process
@@ -267,11 +423,22 @@ export class AgentEngine {
267
423
  _startHeapSampler() {
268
424
  const logDir = path.join(this.workspace.cwd, "logs");
269
425
  const logPath = path.join(logDir, "heap.jsonl");
426
+ let stopped = false;
427
+ let lastSampleAt = 0;
428
+
270
429
  const sample = () => {
271
430
  try {
272
431
  const mem = process.memoryUsage();
432
+ const now = Date.now();
433
+ // v0.8 P1-C: track skipped intervals. If more than 90s elapsed
434
+ // since last sample on a 60s cadence, the previous tick was missed
435
+ // (event loop sleep, GC pause, etc.). Surface in the row so the
436
+ // post-mortem audit can detect gaps without needing to compare
437
+ // adjacent timestamps.
438
+ const skippedMs = lastSampleAt > 0 ? (now - lastSampleAt - 60_000) : 0;
439
+ lastSampleAt = now;
273
440
  const row = {
274
- t: new Date().toISOString(),
441
+ t: new Date(now).toISOString(),
275
442
  seq: this.eventLog?.currentSeq ?? 0,
276
443
  phase: this.currentPhase,
277
444
  rssMB: Math.round(mem.rss / 1024 / 1024),
@@ -288,17 +455,36 @@ export class AgentEngine {
288
455
  // and the row gets `componentsErr` instead.
289
456
  components: this._sampleComponents(),
290
457
  };
458
+ if (skippedMs > 0) row.skippedMs = skippedMs;
291
459
  fs.mkdirSync(logDir, { recursive: true });
292
460
  fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
293
461
  } catch { /* never fatal */ }
294
462
  };
463
+
464
+ // v0.8 P1-C: self-rescheduling setTimeout instead of setInterval. The
465
+ // 资管 v0.7.5 session shows only 2 heap.jsonl entries (12:39:40 start
466
+ // + 12:40:40 first tick) across an 18-hour run — the unref'd
467
+ // setInterval was somehow dropped between event-loop idle phases.
468
+ // setTimeout reschedules from inside the sample callback, so the
469
+ // timer is re-registered every tick. unref'd so we don't block exit.
470
+ let timeoutHandle = null;
471
+ const scheduleNext = () => {
472
+ if (stopped) return;
473
+ timeoutHandle = setTimeout(() => {
474
+ sample();
475
+ scheduleNext();
476
+ }, 60_000);
477
+ timeoutHandle.unref?.();
478
+ };
479
+
295
480
  // Record one sample at startup so we have a baseline even on short runs.
296
481
  sample();
297
- const timer = setInterval(sample, 60_000);
298
- timer.unref?.();
482
+ scheduleNext();
483
+
299
484
  return () => {
300
485
  try {
301
- clearInterval(timer);
486
+ stopped = true;
487
+ if (timeoutHandle) clearTimeout(timeoutHandle);
302
488
  sample(); // one final sample on shutdown
303
489
  } catch { /* ignore */ }
304
490
  };
@@ -415,7 +601,10 @@ export class AgentEngine {
415
601
  return {
416
602
  // Always available (BUILD + DISTILL)
417
603
  core: [
418
- new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
604
+ new SandboxExecTool(this.workspace, {
605
+ defaultTimeoutMs: this.config.kcExecDefaultTimeoutMs,
606
+ maxTimeoutMs: this.config.kcExecMaxTimeoutMs,
607
+ }),
419
608
  new WorkspaceFileTool(this.workspace, this.versionManager),
420
609
  new CopyToWorkspaceTool(this.workspace, {
421
610
  largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
@@ -455,7 +644,12 @@ export class AgentEngine {
455
644
  mineruApiKey: this.config.mineruApiKey,
456
645
  llmApiKey: workerApiKey,
457
646
  llmBaseUrl: workerBaseUrl,
647
+ // v0.8.1 P9-B: live-read vlmTier1 so workspace_env_overlay
648
+ // changes after tool construction (or mid-run .env edits)
649
+ // reach document_parse. The static `ocrModel` is the
650
+ // construction-time fallback; getOcrModel takes precedence.
458
651
  ocrModel: vlmModel,
652
+ getOcrModel: () => this.config.vlmTier1 || vlmModel,
459
653
  }),
460
654
  new DocumentSearchTool(this.workspace),
461
655
  // Group C — chunker/RAG infrastructure ported from AMC app. Core
@@ -476,16 +670,27 @@ export class AgentEngine {
476
670
  () => this.currentPhase,
477
671
  ),
478
672
  new WebSearchTool(this.config.tavilyApiKey),
479
- // v0.7.3: completes the v0.7.0 "agent owns TaskBoard" design.
480
- // Skills already reference TaskCreate by name; these tools make
481
- // that contract truthful. See task-board.js + work-decomposition
482
- // SKILL.md. Skipped for subagents they don't own a task board
483
- // (taskManager is null in subagent scope, line 216).
673
+ // v0.7.4 (re-applied from v0.7.3 G2b): TaskCreate /
674
+ // TaskUpdate / TaskComplete agent populates the
675
+ // Ralph-loop queue for the CURRENT phase only. Phase
676
+ // boundaries exit the loop (v0.7.4 G0c). Skipped for
677
+ // subagents (taskManager null in subagent scope).
484
678
  ...(this.taskManager ? [
485
679
  new TaskCreateTool(this.workspace, this.taskManager),
486
680
  new TaskUpdateTool(this.workspace, this.taskManager),
487
681
  new TaskCompleteTool(this.workspace, this.taskManager),
488
682
  ] : []),
683
+ // v0.7.5: consult_skill loads a meta-skill body into conversation
684
+ // history on demand. Always-loaded skills are already in the
685
+ // system prompt via SkillLoader.formatForContext; this tool covers
686
+ // the "available" set for the current phase. Both main + subagents
687
+ // register their own — each has its own skillLoader + phase.
688
+ new ConsultSkillTool(
689
+ this.workspace,
690
+ this._skillLoader,
691
+ () => this.currentPhase,
692
+ this.eventLog,
693
+ ),
489
694
  ],
490
695
  // Distillation+ only (DISTILL mode)
491
696
  distill: [
@@ -944,16 +1149,9 @@ export class AgentEngine {
944
1149
  }
945
1150
  }
946
1151
 
947
- // Re-prime _lastReady AFTER importState so it reflects the restored
948
- // pipeline milestones, not the empty defaults from constructor.
949
- // (Bug 5 fix without this, resume reignites auto-advance.)
950
- for (const phase of Object.keys(engine.pipelines)) {
951
- try {
952
- engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
953
- } catch {
954
- engine._lastReady[phase] = false;
955
- }
956
- }
1152
+ // v0.8 P1-D: removed `_lastReady` re-prime. Was the bookkeeping for
1153
+ // `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase advance
1154
+ // is explicit now; nothing to re-prime on resume.
957
1155
 
958
1156
  engine.eventLog.append("session_resume", {
959
1157
  resumedPhase: engine.currentPhase,
@@ -1062,6 +1260,29 @@ export class AgentEngine {
1062
1260
  // budget. Better to lose some history than crash with HTTP 400.
1063
1261
  messages = this._enforceTokenBudget(messages);
1064
1262
 
1263
+ // v0.8 P3-A: skill usage counter — emit one skill_byte_send event
1264
+ // per always-loaded skill per LLM send. Captures the cost of having
1265
+ // a skill body inlined in the system prompt (Layer B per design doc).
1266
+ // Agent-blind: events go to events.jsonl only; never surfaced to the
1267
+ // agent's context. consult_skill tool results emit their own
1268
+ // skill_invoked events with via_tool="consult_skill" (already in
1269
+ // place since v0.7.5 G-C4), so we don't double-count those here.
1270
+ try {
1271
+ const { alwaysLoaded } = this._skillLoader.getPhaseSkillSet(this.currentPhase) || {};
1272
+ if (Array.isArray(alwaysLoaded)) {
1273
+ for (const skill of alwaysLoaded) {
1274
+ const body = this._skillLoader.loadSkillBody(skill);
1275
+ if (!body) continue;
1276
+ this.eventLog.append("skill_byte_send", {
1277
+ skill,
1278
+ via: "system_prompt_always_loaded",
1279
+ byte_count: body.length,
1280
+ phase: this.currentPhase,
1281
+ });
1282
+ }
1283
+ }
1284
+ } catch { /* counter is best-effort; never break the turn */ }
1285
+
1065
1286
  this.eventLog.append("llm_start", {
1066
1287
  model: this.config.kcModel,
1067
1288
  messageCount: messages.length,
@@ -1196,11 +1417,11 @@ export class AgentEngine {
1196
1417
  }
1197
1418
  this._totalTurns = (this._totalTurns || 0) + 1;
1198
1419
 
1199
- // Bug 4 trigger (1): re-check phase criteria at end of every turn —
1200
- // KC may have advanced state via conversation alone, without any
1201
- // tool that the pipeline narrowly watches.
1202
- const advancedEv = this._maybeAutoAdvance();
1203
- if (advancedEv) yield advancedEv;
1420
+ // v0.7.4 G0b: removed `_maybeAutoAdvance()` auto-fire here.
1421
+ // Phase advance is now 100% explicit (agent's `phase_advance`
1422
+ // tool, or user re-prompt). v0.7.3 phase-control regression
1423
+ // was caused by this edge-triggered auto-advance firing mid-
1424
+ // session and chaining into next phase without user check-in.
1204
1425
 
1205
1426
  this.eventLog.append("turn_complete", {});
1206
1427
  this.saveState();
@@ -1289,23 +1510,55 @@ export class AgentEngine {
1289
1510
 
1290
1511
  this.eventLog.append("tool_result", {
1291
1512
  name: tc.name,
1513
+ input: inputData,
1292
1514
  output: result.content || "",
1293
1515
  isError: result.isError,
1294
1516
  traceId: offload?.traceId || null,
1295
1517
  });
1296
1518
 
1297
- // D3a: trace skill invocations. When the agent reads a SKILL.md via
1298
- // workspace_file (the canonical way KC "uses" a skill, since skills
1299
- // are progressively-disclosed markdown), emit a skill_invoked event.
1300
- // Makes "which skills did KC actually consult?" answerable in post-run
1301
- // analysis — before this, skills were opaque to the event log.
1519
+ // v0.7.5 (G-F4): added `input` above so events.jsonl carries the
1520
+ // tool inputs (v0.7.4 G1c only patched the AgentEvent yield path,
1521
+ // missed the persistence path audit confirmed 0/453 + 0/946
1522
+ // tool_result events had `input` in v0.7.4 sessions).
1523
+
1524
+ // D3a: trace skill invocations. v0.7.5 (G-C6): only fire on
1525
+ // READS of meta-skill paths. Writes to rule_skills/<id>/SKILL.md
1526
+ // during skill_authoring are NOT skill invocations — they're the
1527
+ // agent producing its own deliverable. The old "(unknown)" spam
1528
+ // (100% of events in v0.7.1 + v0.7.4 sessions) is gone.
1529
+ //
1530
+ // Note: meta-skill body reads now happen via consult_skill, which
1531
+ // emits skill_invoked itself (with the real skill name). This
1532
+ // path-matching emission stays only as a fallback for any agent
1533
+ // that reads a SKILL.md path directly (out of pattern).
1302
1534
  try {
1535
+ // v0.8 P1-E: heredoc detection. `cat << 'EOF' > /tmp/skill.md`
1536
+ // matches the read-verb regex but is actually a WRITE — the
1537
+ // heredoc operator `<<` means cat is consuming inline content
1538
+ // (the heredoc body), not a file path. 资管 v0.7.5 audit § 5f
1539
+ // confirmed 1 spurious skill_invoked event of this kind.
1540
+ // Excluding any command with `<<` from the isRead classification.
1541
+ const cmd = String(inputData?.command || "");
1542
+ const isHeredoc = cmd.includes("<<");
1543
+ const isRead =
1544
+ (tc.name === "workspace_file" && inputData?.operation === "read") ||
1545
+ (tc.name === "sandbox_exec" && !isHeredoc && /\b(cat|head|tail|less|grep|view|read)\b/.test(cmd));
1303
1546
  if (
1304
1547
  !result.isError &&
1548
+ isRead &&
1305
1549
  (tc.name === "workspace_file" || tc.name === "sandbox_exec")
1306
1550
  ) {
1307
1551
  const p = String(inputData?.path || inputData?.command || "");
1308
- const skillMatch = p.match(/(?:template\/)?skills\/[a-z-]+\/(?:meta-meta|meta|skill-creator)\/([a-zA-Z0-9_-]+)(?:\/SKILL\.md|\/)?|\bSKILL\.md\b/);
1552
+ // v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
1553
+ // OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
1554
+ // Deep layout backward-compat preserved for any stragglers.
1555
+ // v0.8 P0-B: accept lowercase `skill.md` too — 资管 audit § 3.2
1556
+ // found agents writing lowercase consistently (14/14 rule_skills/).
1557
+ // Limited to exact uppercase OR exact lowercase (no mixed case)
1558
+ // to avoid spurious matches on unrelated files (e.g., `Skill.md`).
1559
+ const skillMatch = p.match(
1560
+ /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/(?:SKILL|skill)\.md\b/
1561
+ ) || p.match(/\b(?:SKILL|skill)\.md\b/);
1309
1562
  if (skillMatch) {
1310
1563
  const skillName = skillMatch[1] || "(unknown)";
1311
1564
  this.eventLog.append("skill_invoked", {
@@ -1386,12 +1639,9 @@ export class AgentEngine {
1386
1639
  }
1387
1640
  }
1388
1641
 
1389
- // Bug 4 fix: re-check exit criteria after every tool-result loop, not
1390
- // just from pipeline.onToolResult. The pipeline's describeState() (called
1391
- // on every turn) already re-scans, so exitCriteriaMet() is accurate; we
1392
- // just need to act on it eagerly.
1393
- const ev = this._maybeAutoAdvance();
1394
- if (ev) yield ev;
1642
+ // v0.7.4 G0b: removed post-tool `_maybeAutoAdvance()` call.
1643
+ // Phase advance is now 100% explicit. See `_runTaskLoopSerial`
1644
+ // phase-change-exit guard for the loop-level checkpoint.
1395
1645
 
1396
1646
  } catch (err) {
1397
1647
  // A8: If the LLM client tagged the stream termination reason, pass
@@ -1409,10 +1659,12 @@ export class AgentEngine {
1409
1659
  }
1410
1660
 
1411
1661
  /**
1412
- * Centralized phase transition (Bug 4). All three triggers route through here:
1662
+ * Centralized phase transition (Bug 4). Two triggers route through here
1663
+ * after v0.7.4 G0b + v0.8 P1-D:
1413
1664
  * (1) pipeline.onToolResult returning phase_ready
1414
- * (2) post-turn auto-check via _maybeAutoAdvance
1415
- * (3) explicit user request via the phase_advance tool
1665
+ * (2) explicit user request via the phase_advance tool
1666
+ * (The historical (3) post-turn auto-check via `_maybeAutoAdvance` was
1667
+ * removed; phase advance is 100% explicit.)
1416
1668
  *
1417
1669
  * Reachability: by default only forward-by-one transitions per NEXT_PHASE.
1418
1670
  * Set `force: true` to allow non-adjacent or backward transitions (e.g. user
@@ -1490,9 +1742,17 @@ export class AgentEngine {
1490
1742
  try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
1491
1743
  if (!criteriaMet) {
1492
1744
  const counts = this._buildEngineCountsBlock(this.currentPhase);
1745
+ // v0.8 P0-E: prescriptive hint in the event payload so post-mortem
1746
+ // audits see what the agent was told (matches what phase-advance.js
1747
+ // returns to the LLM).
1748
+ let prescriptive = null;
1749
+ try {
1750
+ prescriptive = getPrescriptiveHint(this.currentPhase, null, counts || "");
1751
+ } catch { /* hint generation is best-effort */ }
1493
1752
  this.eventLog.append("phase_advance_refused", {
1494
1753
  from: this.currentPhase, to: nextPhase, reason,
1495
1754
  hint: "exit criteria not met by engine telemetry",
1755
+ prescriptive_hint: prescriptive,
1496
1756
  engineCounts: counts || null,
1497
1757
  });
1498
1758
  return false;
@@ -1541,6 +1801,20 @@ export class AgentEngine {
1541
1801
  this.workspace.setPhase(this.currentPhase);
1542
1802
  this._createTasksForPhase(this.currentPhase);
1543
1803
 
1804
+ // v0.7.5 G-D2: re-populate <workspace>/skills/ with the new phase's
1805
+ // available set. Symlinks are wiped + recreated. Agent's `ls skills/`
1806
+ // and any read-by-path reflects the current phase's allowlist.
1807
+ try {
1808
+ const res = this._skillLoader?.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
1809
+ if (res) {
1810
+ this.eventLog.append("skills_populated", {
1811
+ phase: res.phase,
1812
+ populated: res.populated,
1813
+ failures: res.failures,
1814
+ });
1815
+ }
1816
+ } catch { /* best-effort */ }
1817
+
1544
1818
  // v0.7.0 N (#94): give the entered pipeline a chance to do
1545
1819
  // phase-entry setup. Used by finalization to copy the release
1546
1820
  // template into output/releases/v1/. Other pipelines are no-ops.
@@ -1553,23 +1827,16 @@ export class AgentEngine {
1553
1827
  });
1554
1828
  }
1555
1829
 
1556
- // v0.6.2 J2: on rollback, reset the rolled-FROM phase's lastReady
1557
- // edge-trigger so that if the agent revisits it and re-flips
1558
- // exit-criteria true, _maybeAutoAdvance will fire correctly. Without
1559
- // this, the auto-advance edge trigger stays latched true and the
1560
- // moment the agent returns to fromPhase the engine immediately
1561
- // bounces them back out — defeating the rollback.
1562
- if (direction === "rollback" && this._lastReady) {
1563
- this._lastReady[fromPhase] = false;
1564
- }
1830
+ // v0.8 P1-D: removed `_lastReady` rollback reset. Was the bookkeeping
1831
+ // for `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase
1832
+ // advance is explicit now; rollback just needs to commit the new phase.
1565
1833
 
1566
1834
  this.saveState();
1567
1835
 
1568
1836
  // B8: Soft signal — surface any sub-agents left running from the prior
1569
1837
  // phase so the main agent's next turn can decide whether to kill them.
1570
- // NOT automated: phase_advance can fire from _maybeAutoAdvance on a
1571
- // criteria-flip, and auto-killing would couple lifecycle with blast
1572
- // radius. This just informs.
1838
+ // NOT automated: auto-killing would couple lifecycle with blast radius.
1839
+ // This just informs.
1573
1840
  try {
1574
1841
  const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
1575
1842
  const runningIds = agentTool?.getRunningTaskIds?.() || [];
@@ -1769,35 +2036,12 @@ export class AgentEngine {
1769
2036
  return false;
1770
2037
  }
1771
2038
 
1772
- /**
1773
- * Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
1774
- * fresh false true flip in `exitCriteriaMet()`. Sessions resumed in an
1775
- * already-met state do nothing; users iterating in a phase whose criteria
1776
- * have been met for a while do nothing. Real new evidence is required.
1777
- */
1778
- _maybeAutoAdvance() {
1779
- const phase = this.currentPhase;
1780
- const pipeline = this.pipelines[phase];
1781
- let nowReady = false;
1782
- try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
1783
-
1784
- if (!nowReady) {
1785
- this._lastReady[phase] = false;
1786
- return null;
1787
- }
1788
- // Edge-trigger: nowReady && !wasReady
1789
- if (this._lastReady[phase]) return null;
1790
- this._lastReady[phase] = true;
1791
-
1792
- const next = NEXT_PHASE[phase];
1793
- if (!next) return null;
1794
- const advanced = this._advancePhase(next, "exit criteria flipped to met");
1795
- if (!advanced) return null;
1796
- return new AgentEvent({
1797
- type: "pipeline_event",
1798
- data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
1799
- });
1800
- }
2039
+ // v0.8 P1-D: `_maybeAutoAdvance()` deleted. The method auto-fired phase
2040
+ // advance on a false→true flip of `exitCriteriaMet()`, but v0.7.3
2041
+ // showed mid-session auto-advance chains were a regression hazard
2042
+ // (user couldn't review between phases). v0.7.4 G0b removed all call
2043
+ // sites; v0.8 P1-D removes the now-dead method definition + the
2044
+ // `_lastReady` bookkeeping it relied on. Phase advance is 100% explicit.
1801
2045
 
1802
2046
  /**
1803
2047
  * Tool-call offloading. If the tool's content exceeds the threshold,
@@ -2106,10 +2350,40 @@ export class AgentEngine {
2106
2350
  /** B1: original serial ralph-loop path — one task at a time, shared
2107
2351
  * conversation history. Unchanged from pre-v0.6.0 behavior. */
2108
2352
  async *_runTaskLoopSerial(userMessage) {
2109
- // Run the initial turn (user's request)
2353
+ // v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
2354
+ // - Interactive sessions (marathon NOT active): capture startingPhase
2355
+ // BEFORE the initial runTurn, and exit the loop on ANY phase change
2356
+ // (including within the initial runTurn). One user prompt = one
2357
+ // phase advance. Path (a) per design doc Q5 lean.
2358
+ // - Marathon sessions: the kc-marathon driver provides per-phase
2359
+ // prompts via .kc_marathon/inbox.jsonl, so the engine doesn't need
2360
+ // F5's checkpoint — phase chaining is OK because each chained phase
2361
+ // STILL gets its own driver-emitted prompt next tick.
2362
+ //
2363
+ // v0.7.3 demonstrated why F5 matters interactively: auto-chained
2364
+ // phase advances skip the user check-in cycle and broke phase
2365
+ // control in team testing. v0.7.4 G0c first fixed it via
2366
+ // post-initial-runTurn exit; v0.7.5 added the strict capture-BEFORE
2367
+ // refinement; v0.8 P5-A preserves both with the marathon escape;
2368
+ // v0.8.1 P8-A switched marathon-active source from filesystem
2369
+ // marker to inline driver instance.
2370
+ const marathonActive = this.isMarathonActive();
2371
+ const startingPhase = this.currentPhase;
2110
2372
  yield* this.runTurn(userMessage);
2111
2373
 
2112
- // Auto-continue through pending tasks
2374
+ // F5 strict gate: if interactive AND phase changed during initial
2375
+ // runTurn, exit immediately (don't auto-continue tasks in the new
2376
+ // phase). Marathon bypasses — driver decides pacing.
2377
+ if (!marathonActive && this.currentPhase !== startingPhase) {
2378
+ this.eventLog.append("ralph_loop_exit", {
2379
+ reason: "f5_strict_initial_turn",
2380
+ from: startingPhase,
2381
+ to: this.currentPhase,
2382
+ });
2383
+ return;
2384
+ }
2385
+
2386
+ // Auto-continue through pending tasks (within current phase only)
2113
2387
  while (this.taskManager.getNextPending()) {
2114
2388
  // v0.7.0 #93: budget-aware compact threshold. The old
2115
2389
  // `messages.length > 15` was message-count-based and frozen
@@ -2170,28 +2444,102 @@ export class AgentEngine {
2170
2444
  },
2171
2445
  });
2172
2446
 
2173
- // Bug 4 trigger (2): auto-advance when all phase tasks are done AND
2174
- // the pipeline's exit criteria are also met (Bug 5 fix task state
2175
- // alone is a ralph-loop convenience, not authoritative phase signal;
2176
- // tasks could be marked skipped manually or by an editor).
2177
- if (this._allCurrentPhaseTasksComplete()) {
2178
- const pipeline = this.pipelines[this.currentPhase];
2179
- let exitMet = false;
2180
- try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
2181
- if (exitMet) {
2182
- const next = NEXT_PHASE[this.currentPhase];
2183
- if (next) {
2184
- const advanced = this._advancePhase(next, "all current-phase tasks completed + exit criteria met");
2185
- if (advanced) {
2186
- yield new AgentEvent({
2187
- type: "pipeline_event",
2188
- data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
2189
- });
2190
- }
2191
- }
2192
- }
2447
+ // v0.7.4 G0c: phase boundary = user checkpoint. Exit the
2448
+ // loop if the agent advanced phase during this task
2449
+ // even if pre-created tasks for the new phase are queued.
2450
+ // User sees current state and explicitly re-prompts to
2451
+ // begin the next phase. Marathon-style end-to-end
2452
+ // autonomy belongs to an external driver (Claude Code
2453
+ // /loop pattern), not the engine.
2454
+ if (this.currentPhase !== startingPhase) {
2455
+ this.eventLog.append("ralph_loop_exit", {
2456
+ reason: "phase_changed",
2457
+ from: startingPhase,
2458
+ to: this.currentPhase,
2459
+ });
2460
+ break;
2461
+ }
2462
+ }
2463
+
2464
+ // v0.8.1 P8-A: marathon mode — inline driver. After the F5 phase-
2465
+ // boundary exit, if marathon is active, query the driver for the
2466
+ // next continuation prompt and run additional turns until the driver
2467
+ // signals stop (null return). State machine logic unchanged from
2468
+ // v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
2469
+ // direct method calls.
2470
+ while (this.marathonDriver) {
2471
+ const turnsSnapshot = this.marathonDriver.turnsThisPhase;
2472
+ const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
2473
+ const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
2474
+ const decision = this.marathonDriver.decideNext({
2475
+ currentPhase: this.currentPhase,
2476
+ milestones,
2477
+ phaseChanged,
2478
+ errorSeen: false, // engine surfaces errors via tool_result.isError; not propagated here for v0.8.1 MVP
2479
+ turnsThisPhase: turnsSnapshot + 1,
2480
+ });
2481
+ if (!decision) {
2482
+ // Stop condition met — driver returned null
2483
+ this.eventLog.append("marathon_detach", {
2484
+ reason: this.marathonDriver.stopReason || "unknown",
2485
+ decisions: this.marathonDriver.decisionCount,
2486
+ });
2487
+ this.marathonDriver = null;
2488
+ break;
2193
2489
  }
2490
+ this.eventLog.append("marathon_decision", {
2491
+ template: decision.template,
2492
+ reason: decision.reason,
2493
+ phase: this.currentPhase,
2494
+ });
2495
+ yield* this.runTurn(decision.prompt);
2496
+ // Loop back: another turn just completed; driver gets another decideNext call.
2497
+ }
2498
+ }
2499
+
2500
+ /**
2501
+ * v0.8.1 P8-A: activate marathon mode with a goal-description.
2502
+ * Called from cli/index.js's /marathon slash command handler.
2503
+ * The engine's next runTaskLoop will use marathonDriver.getInitialPrompt()
2504
+ * as the kickoff user message.
2505
+ *
2506
+ * @param {string} goal — the marathon goal description (user-typed)
2507
+ * @param {object} [opts] — {maxWallclockMs?, stuckAfterMs?}
2508
+ * @returns {object} {goal, language, startedAt} for confirmation
2509
+ */
2510
+ enterMarathonMode(goal, opts = {}) {
2511
+ if (this.marathonDriver) {
2512
+ throw new Error("Marathon already active — use /marathon off to disengage first");
2194
2513
  }
2514
+ this.marathonDriver = new MarathonDriver({
2515
+ goal,
2516
+ language: this.config.language || "en",
2517
+ maxWallclockMs: opts.maxWallclockMs,
2518
+ stuckAfterMs: opts.stuckAfterMs,
2519
+ });
2520
+ this.eventLog.append("marathon_attach", {
2521
+ goal: goal.slice(0, 200),
2522
+ language: this.config.language || "en",
2523
+ });
2524
+ return this.marathonDriver.getStatus();
2525
+ }
2526
+
2527
+ /** v0.8.1 P8-A: deactivate marathon mode. Returns final status snapshot. */
2528
+ exitMarathonMode(reason = "user_off") {
2529
+ if (!this.marathonDriver) return null;
2530
+ const status = this.marathonDriver.getStatus();
2531
+ this.marathonDriver.stop(reason);
2532
+ this.eventLog.append("marathon_detach", {
2533
+ reason,
2534
+ decisions: this.marathonDriver.decisionCount,
2535
+ });
2536
+ this.marathonDriver = null;
2537
+ return status;
2538
+ }
2539
+
2540
+ /** v0.8.1 P8-A: is marathon mode currently active? (for TUI status bar) */
2541
+ isMarathonActive() {
2542
+ return !!this.marathonDriver && !this.marathonDriver.stopped;
2195
2543
  }
2196
2544
 
2197
2545
  /**
@@ -2212,9 +2560,27 @@ export class AgentEngine {
2212
2560
  * amortized against the 2-4× wall-clock speedup.
2213
2561
  */
2214
2562
  async *_runTaskLoopParallel(userMessage, parallelism) {
2563
+ // v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
2564
+ // Mirror _runTaskLoopSerial — capture startingPhase BEFORE initial
2565
+ // runTurn so phase advance during the initial turn exits the loop
2566
+ // unless marathon is active.
2567
+ // v0.8.1 P8-A: marathon check now uses inline driver instance.
2568
+ const marathonActive = this.isMarathonActive();
2569
+ const startingPhase = this.currentPhase;
2570
+
2215
2571
  // Initial turn: main agent reads user request, creates tasks.
2216
2572
  yield* this.runTurn(userMessage);
2217
2573
 
2574
+ if (!marathonActive && this.currentPhase !== startingPhase) {
2575
+ this.eventLog.append("ralph_loop_exit", {
2576
+ reason: "f5_strict_initial_turn",
2577
+ from: startingPhase,
2578
+ to: this.currentPhase,
2579
+ mode: "parallel",
2580
+ });
2581
+ return;
2582
+ }
2583
+
2218
2584
  const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
2219
2585
  if (!agentTool) {
2220
2586
  // Shouldn't happen (agent_tool is core), but fall back safely.
@@ -2239,6 +2605,9 @@ export class AgentEngine {
2239
2605
  const inFlight = new Map();
2240
2606
 
2241
2607
  const dispatch = async () => {
2608
+ // v0.7.4 G0c: stop dispatching if phase changed since loop start.
2609
+ // In-flight workers complete naturally; queue stays untouched.
2610
+ if (this.currentPhase !== startingPhase) return;
2242
2611
  while (inFlight.size < parallelism) {
2243
2612
  const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
2244
2613
  if (!task) return;
@@ -2374,23 +2743,15 @@ export class AgentEngine {
2374
2743
 
2375
2744
  this.saveState();
2376
2745
 
2377
- // After all workers done, check for phase auto-advance (same as serial path).
2378
- if (this._allCurrentPhaseTasksComplete()) {
2379
- const pipeline = this.pipelines[this.currentPhase];
2380
- let exitMet = false;
2381
- try { exitMet = !!pipeline?.exitCriteriaMet?.(); } catch { exitMet = false; }
2382
- if (exitMet) {
2383
- const next = NEXT_PHASE[this.currentPhase];
2384
- if (next) {
2385
- const advanced = this._advancePhase(next, "all parallel tasks completed + exit criteria met");
2386
- if (advanced) {
2387
- yield new AgentEvent({
2388
- type: "pipeline_event",
2389
- data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
2390
- });
2391
- }
2392
- }
2393
- }
2746
+ // v0.7.4 G0c: if phase changed during the parallel run, log the
2747
+ // checkpoint event for the audit trail. No auto-advance — that
2748
+ // belongs to the agent (phase_advance tool) or user re-prompt.
2749
+ if (this.currentPhase !== startingPhase) {
2750
+ this.eventLog.append("ralph_loop_exit", {
2751
+ reason: "phase_changed",
2752
+ from: startingPhase,
2753
+ to: this.currentPhase,
2754
+ });
2394
2755
  }
2395
2756
  }
2396
2757