kc-beta 0.7.5 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +47 -0
  2. package/package.json +3 -2
  3. package/src/agent/context.js +17 -1
  4. package/src/agent/engine.js +467 -100
  5. package/src/agent/llm-client.js +24 -1
  6. package/src/agent/pipelines/_advance-hints.js +92 -0
  7. package/src/agent/pipelines/_milestone-derive.js +325 -20
  8. package/src/agent/pipelines/skill-authoring.js +49 -3
  9. package/src/agent/tools/agent-tool.js +2 -2
  10. package/src/agent/tools/consult-skill.js +15 -0
  11. package/src/agent/tools/dashboard-render.js +48 -1
  12. package/src/agent/tools/document-parse.js +31 -2
  13. package/src/agent/tools/phase-advance.js +17 -13
  14. package/src/agent/tools/release.js +343 -7
  15. package/src/agent/tools/sandbox-exec.js +65 -8
  16. package/src/agent/tools/worker-llm-call.js +95 -15
  17. package/src/agent/workspace.js +25 -4
  18. package/src/cli/components.js +4 -1
  19. package/src/cli/index.js +125 -8
  20. package/src/config.js +19 -2
  21. package/src/marathon/driver.js +217 -0
  22. package/src/marathon/prompts.js +93 -0
  23. package/template/.env.template +17 -1
  24. package/template/AGENT.md +2 -2
  25. package/template/skills/en/auto-model-selection/SKILL.md +55 -35
  26. package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
  27. package/template/skills/en/compliance-judgment/SKILL.md +14 -0
  28. package/template/skills/en/confidence-system/SKILL.md +30 -8
  29. package/template/skills/en/corner-case-management/SKILL.md +53 -33
  30. package/template/skills/en/cross-document-verification/SKILL.md +88 -83
  31. package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
  32. package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  33. package/template/skills/en/data-sensibility/SKILL.md +19 -12
  34. package/template/skills/en/document-chunking/SKILL.md +99 -15
  35. package/template/skills/en/entity-extraction/SKILL.md +14 -4
  36. package/template/skills/en/quality-control/SKILL.md +23 -0
  37. package/template/skills/en/rule-extraction/SKILL.md +92 -94
  38. package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
  39. package/template/skills/en/skill-authoring/SKILL.md +85 -2
  40. package/template/skills/en/skill-creator/SKILL.md +25 -3
  41. package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
  42. package/template/skills/en/task-decomposition/SKILL.md +1 -1
  43. package/template/skills/en/tree-processing/SKILL.md +1 -1
  44. package/template/skills/en/version-control/SKILL.md +15 -0
  45. package/template/skills/en/work-decomposition/SKILL.md +52 -32
  46. package/template/skills/phase_skills.yaml +5 -0
  47. package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
  48. package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
  49. package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
  50. package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
  51. package/template/skills/zh/confidence-system/SKILL.md +34 -9
  52. package/template/skills/zh/corner-case-management/SKILL.md +71 -104
  53. package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
  54. package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
  55. package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
  56. package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  57. package/template/skills/zh/data-sensibility/SKILL.md +13 -0
  58. package/template/skills/zh/document-chunking/SKILL.md +101 -18
  59. package/template/skills/zh/document-parsing/SKILL.md +65 -65
  60. package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
  61. package/template/skills/zh/entity-extraction/SKILL.md +78 -68
  62. package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
  63. package/template/skills/zh/quality-control/SKILL.md +23 -0
  64. package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
  65. package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
  66. package/template/skills/zh/rule-extraction/SKILL.md +199 -188
  67. package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
  68. package/template/skills/zh/skill-authoring/SKILL.md +136 -58
  69. package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
  70. package/template/skills/zh/skill-creator/SKILL.md +215 -201
  71. package/template/skills/zh/skill-creator/references/schemas.md +60 -60
  72. package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
  73. package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
  74. package/template/skills/zh/task-decomposition/SKILL.md +1 -1
  75. package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
  76. package/template/skills/zh/tree-processing/SKILL.md +67 -63
  77. package/template/skills/zh/version-control/SKILL.md +15 -0
  78. package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
  79. package/template/skills/zh/work-decomposition/SKILL.md +52 -30
  80. package/template/workflows/common/llm_client.py +168 -0
  81. package/template/workflows/common/utils.py +132 -0
@@ -5,6 +5,9 @@ import {
5
5
  deriveSkillAuthoringMilestones,
6
6
  deriveSkillTestingMilestones,
7
7
  } from "./pipelines/_milestone-derive.js";
8
+ import { getPrescriptiveHint } from "./pipelines/_advance-hints.js";
9
+ import { loadEnvFile } from "../config.js";
10
+ import { MarathonDriver } from "../marathon/driver.js";
8
11
  import { ContextAssembler } from "./context.js";
9
12
  import { ConversationHistory } from "./history.js";
10
13
  import { findSafeSplitPoint } from "./message-utils.js";
@@ -166,6 +169,10 @@ export class AgentEngine {
166
169
  { gitAutoCommit: config.gitAutoCommit !== false },
167
170
  );
168
171
 
172
+ // v0.8 P1-B: workspace .env overlay deferred until after eventLog
173
+ // init (see _overlayWorkspaceEnv call below). Workspace dir is
174
+ // known here, but the overlay's audit event needs eventLog.
175
+
169
176
  // For sub-agents, persistence (history/events/state) lives under
170
177
  // sub_agents/<scope>/ instead of the workspace root. Workspace files
171
178
  // (rules/, rule_skills/, workflows/) stay shared.
@@ -203,6 +210,37 @@ export class AgentEngine {
203
210
  // Event log (append-only JSONL, source of truth)
204
211
  this.eventLog = new EventLog(this.workspace.cwd, { logDir });
205
212
 
213
+ // v0.8 P1-B: overlay workspace .env onto this.config. cli/index.js
214
+ // calls loadSettings() without a workspace path because the path
215
+ // isn't known until this constructor runs. Result: workspace .env's
216
+ // VLM_TIER1 / OCR_MODEL_TIER1 / TIER1..4 / LANGUAGE were silently
217
+ // ignored, with gc defaults (~/.kc_agent/config.json) winning.
218
+ // 资管 audit § 9.2 finding 7: user's OCR_MODEL_TIER1=zai-org/GLM-4.6V
219
+ // never reached document_parse; error messages quoted gc's
220
+ // Qwen3-VL-235B default. Overlay reads workspace .env, fills in
221
+ // fields where current config came from gc fallback (penv-set values
222
+ // still win because loadSettings applied them).
223
+ try { this._overlayWorkspaceEnv(); } catch { /* best-effort */ }
224
+
225
+ // v0.8.1 P8-A: inline marathon driver. v0.8.0's separate-process
226
+ // kc-marathon CLI + filesystem-watcher IPC died silently when the
227
+ // launching terminal closed (E2E #11 audit). Redesigned as an inline
228
+ // state machine activated via /marathon slash command. No filesystem
229
+ // marker, no inbox.jsonl. Driver instance set by enterMarathonMode(),
230
+ // cleared by exitMarathonMode(). Query via this.marathonDriver.
231
+ this.marathonDriver = null;
232
+ // v0.8.2 P12-A: marathon goal text. Pinned at system-prompt level via
233
+ // ContextAssembler so it survives context_windowed eviction (the v0.8.1
234
+ // regression). Stored alongside marathonDriver lifecycle.
235
+ this.marathonGoal = null;
236
+ // v0.8.2 P12-B: shared user-input queue between TUI and engine. The TUI
237
+ // queues mid-run typed messages here; the marathon decision loop drains
238
+ // this queue BEFORE asking the driver for a continuation, so user
239
+ // interrupts always win over driver autonomy. Fixes the v0.8.1 silent
240
+ // queue-starvation where /marathon mode kept the user message in a
241
+ // TUI-local queue that never reached the engine.
242
+ this.inputQueue = [];
243
+
206
244
  // Context windowing
207
245
  this.contextWindow = new ContextWindow({
208
246
  contextLimit: config.kcContextLimit || 200000,
@@ -216,8 +254,6 @@ export class AgentEngine {
216
254
  // so they don't get a TaskManager.
217
255
  this.taskManager = this._isSubagent ? null : new TaskManager(this.workspace.cwd);
218
256
 
219
- // Build all tool instances (but register phase-appropriate ones)
220
- this._buildTools = this._createAllTools();
221
257
  this._phaseSummaries = [];
222
258
 
223
259
  // Pipeline system (meta-meta skills as code)
@@ -233,8 +269,15 @@ export class AgentEngine {
233
269
  };
234
270
 
235
271
  // Skill discovery (Claude Code pattern: index in context, full content on demand)
272
+ // v0.7.5 — must initialize BEFORE _createAllTools() because ConsultSkillTool
273
+ // takes this._skillLoader as a constructor arg. Was a v0.7.5 init-order bug:
274
+ // _createAllTools ran first, passed undefined skillLoader to ConsultSkillTool,
275
+ // calls to consult_skill threw "Cannot read properties of undefined".
236
276
  this._skillLoader = new SkillLoader(config.language);
237
277
 
278
+ // Build all tool instances (but register phase-appropriate ones)
279
+ this._buildTools = this._createAllTools();
280
+
238
281
  // v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
239
282
  // available skill set. Symlink with copy fallback. Re-populated on
240
283
  // every phase advance/retreat (see _advancePhase).
@@ -247,20 +290,26 @@ export class AgentEngine {
247
290
  });
248
291
  } catch { /* best-effort; skills/ population is not a critical-path failure */ }
249
292
 
293
+ // v0.8.1 P10-A: auto-populate <workspace>/workflows/common/llm_client.py
294
+ // from the template. Idempotent (skips if file already exists). Covers
295
+ // the bench-corpus flow where `kc-beta init` was bypassed. v0.8.0
296
+ // shipped this shim as embedded source in skill-to-workflow teaching;
297
+ // E2E #11 audits found BOTH agents ignored the teaching and wrote
298
+ // their own (non-canonical) llm_client.py. Shipping it as a template
299
+ // file the agent finds via filesystem walk is more robust.
300
+ try { this._populateWorkspaceCommonShims(); } catch { /* best-effort */ }
301
+
250
302
  // Register tools for initial phase
251
303
  this.toolRegistry = new ToolRegistry();
252
304
  this._registerToolsForPhase(this.currentPhase);
253
305
 
254
- // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
255
- // phase so the first real false→true flip inside onToolResult triggers an
256
- // advance even when the user launches from a pre-populated workspace
257
- // whose exit criteria already happen to be met at boot.
258
- // resume() re-primes this from the restored pipeline state (see ~L566),
259
- // which is the correct behaviour there: resumed sessions that were already
260
- // past this phase shouldn't re-fire.
261
- this._lastReady = Object.fromEntries(
262
- Object.keys(this.pipelines).map((p) => [p, false]),
263
- );
306
+ // v0.8 P1-D: removed `_lastReady` edge-trigger state. It was the
307
+ // bookkeeping for `_maybeAutoAdvance`, which v0.7.4 G0b decommissioned
308
+ // (all call sites removed because v0.7.3's mid-session auto-advance
309
+ // chain regression was caused by it). The method definition itself
310
+ // is also gone in P1-D. Phase advance is now 100% explicit: agent's
311
+ // `phase_advance` tool or user re-prompt. Resume + rollback paths
312
+ // that previously re-primed `_lastReady` are no-ops now.
264
313
 
265
314
  // B0.1: Heap sampler. Parent engines only — sub-agents share a process
266
315
  // with the parent and would double-log. Writes a single JSONL line
@@ -271,6 +320,111 @@ export class AgentEngine {
271
320
  this._heapSamplerStop = this._isSubagent ? null : this._startHeapSampler();
272
321
  }
273
322
 
323
+ /**
324
+ * v0.8 P1-B: overlay workspace .env onto this.config now that
325
+ * this.workspace.cwd is known. Only fills in fields where the current
326
+ * config value was a gc fallback (empty OR the gc default) — does NOT
327
+ * override fields that came from process.env (those win at
328
+ * loadSettings() time and stay winning).
329
+ *
330
+ * Without this overlay, workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 /
331
+ * TIER1..4 / LANGUAGE are silently ignored — the v0.7.4 G1b OCR_MODEL_TIER1
332
+ * alias fix landed at the config layer but never reached the runtime
333
+ * because loadSettings() is called without a workspace path.
334
+ */
335
+ /**
336
+ * v0.8.1 P10-A: copy canonical `workflows/common/*.py` shims from the
337
+ * bundled template if they're missing in the workspace. Provides
338
+ * `llm_client.py` (worker LLM HTTP shim, provider-agnostic) and
339
+ * `utils.py` (strip_annotations + helpers). Idempotent — never
340
+ * overwrites existing files (agent edits stay intact).
341
+ *
342
+ * Runs at engine init. Covers bench-corpus mode where `kc-beta init`
343
+ * doesn't run; init-flow workspaces already have these from copyDir.
344
+ */
345
+ _populateWorkspaceCommonShims() {
346
+ const __dirname = path.dirname(new URL(import.meta.url).pathname);
347
+ const templateRoot = path.resolve(__dirname, "..", "..", "template", "workflows", "common");
348
+ if (!fs.existsSync(templateRoot)) return;
349
+
350
+ const targetRoot = path.join(this.workspace.cwd, "workflows", "common");
351
+ fs.mkdirSync(targetRoot, { recursive: true });
352
+
353
+ const copied = [];
354
+ const skipped = [];
355
+ for (const entry of fs.readdirSync(templateRoot)) {
356
+ if (!entry.endsWith(".py") || entry.startsWith(".")) continue;
357
+ const srcPath = path.join(templateRoot, entry);
358
+ const dstPath = path.join(targetRoot, entry);
359
+ if (fs.existsSync(dstPath)) {
360
+ skipped.push(entry);
361
+ continue;
362
+ }
363
+ try {
364
+ fs.copyFileSync(srcPath, dstPath);
365
+ copied.push(entry);
366
+ } catch { /* best-effort */ }
367
+ }
368
+
369
+ if (copied.length > 0) {
370
+ try {
371
+ this.eventLog?.append?.("workflows_common_populated", { copied, skipped });
372
+ } catch { /* best-effort */ }
373
+ }
374
+ }
375
+
376
+ _overlayWorkspaceEnv() {
377
+ if (!this.workspace?.cwd) return;
378
+ const envPath = path.join(this.workspace.cwd, ".env");
379
+ if (!fs.existsSync(envPath)) return;
380
+ let wsEnv;
381
+ try { wsEnv = loadEnvFile(envPath); } catch { return; }
382
+ if (!wsEnv || typeof wsEnv !== "object") return;
383
+
384
+ // VLM tiers — workspace .env's VLM_TIER1 / OCR_MODEL_TIER1 wins over
385
+ // gc's vlm_tiers.tier1 default. process.env precedence preserved
386
+ // because loadSettings already applied it; we only fill in slots
387
+ // that fell through to gc-or-empty.
388
+ const overlays = [
389
+ { configKey: "vlmTier1", envKey: ["VLM_TIER1", "OCR_MODEL_TIER1"] },
390
+ { configKey: "vlmTier2", envKey: ["VLM_TIER2", "OCR_MODEL_TIER2"] },
391
+ { configKey: "vlmTier3", envKey: ["VLM_TIER3", "OCR_MODEL_TIER3"] },
392
+ { configKey: "tier1", envKey: ["TIER1"] },
393
+ { configKey: "tier2", envKey: ["TIER2"] },
394
+ { configKey: "tier3", envKey: ["TIER3"] },
395
+ { configKey: "tier4", envKey: ["TIER4"] },
396
+ { configKey: "language", envKey: ["LANGUAGE"] },
397
+ ];
398
+
399
+ const applied = [];
400
+ for (const { configKey, envKey } of overlays) {
401
+ // Find first non-empty workspace .env value for this config key
402
+ let wsValue = "";
403
+ for (const k of envKey) {
404
+ if (wsEnv[k]) { wsValue = wsEnv[k]; break; }
405
+ }
406
+ if (!wsValue) continue;
407
+ // Skip if process.env has the same key set — penv already won
408
+ const penvWon = envKey.some((k) => process.env[k] && process.env[k] !== wsValue);
409
+ if (penvWon) continue;
410
+ // Apply the workspace value
411
+ if (this.config[configKey] !== wsValue) {
412
+ applied.push({ key: configKey, from: this.config[configKey] || "(empty)", to: wsValue });
413
+ this.config[configKey] = wsValue;
414
+ }
415
+ }
416
+
417
+ // Audit visibility: emit a one-time event listing what was overlaid.
418
+ if (applied.length > 0) {
419
+ try {
420
+ this.eventLog?.append?.("workspace_env_overlay", {
421
+ envPath: path.relative(this.workspace.cwd, envPath),
422
+ fields: applied,
423
+ });
424
+ } catch { /* best-effort */ }
425
+ }
426
+ }
427
+
274
428
  /**
275
429
  * Start sampling process.memoryUsage() every 60 s into logs/heap.jsonl.
276
430
  * Returns a stop fn. Timer is .unref()'d so it never keeps the process
@@ -280,11 +434,22 @@ export class AgentEngine {
280
434
  _startHeapSampler() {
281
435
  const logDir = path.join(this.workspace.cwd, "logs");
282
436
  const logPath = path.join(logDir, "heap.jsonl");
437
+ let stopped = false;
438
+ let lastSampleAt = 0;
439
+
283
440
  const sample = () => {
284
441
  try {
285
442
  const mem = process.memoryUsage();
443
+ const now = Date.now();
444
+ // v0.8 P1-C: track skipped intervals. If more than 90s elapsed
445
+ // since last sample on a 60s cadence, the previous tick was missed
446
+ // (event loop sleep, GC pause, etc.). Surface in the row so the
447
+ // post-mortem audit can detect gaps without needing to compare
448
+ // adjacent timestamps.
449
+ const skippedMs = lastSampleAt > 0 ? (now - lastSampleAt - 60_000) : 0;
450
+ lastSampleAt = now;
286
451
  const row = {
287
- t: new Date().toISOString(),
452
+ t: new Date(now).toISOString(),
288
453
  seq: this.eventLog?.currentSeq ?? 0,
289
454
  phase: this.currentPhase,
290
455
  rssMB: Math.round(mem.rss / 1024 / 1024),
@@ -301,17 +466,39 @@ export class AgentEngine {
301
466
  // and the row gets `componentsErr` instead.
302
467
  components: this._sampleComponents(),
303
468
  };
469
+ if (skippedMs > 0) row.skippedMs = skippedMs;
304
470
  fs.mkdirSync(logDir, { recursive: true });
305
471
  fs.appendFileSync(logPath, JSON.stringify(row) + "\n", "utf-8");
306
472
  } catch { /* never fatal */ }
307
473
  };
474
+
475
+ // v0.8 P1-C: self-rescheduling setTimeout instead of setInterval.
476
+ // v0.8.3 P21-B4: removed .unref() — both 资管 + 贷款 v0.8.2 sessions
477
+ // showed only 1 line in heap.jsonl across 7+ hour runs even with
478
+ // self-rescheduling setTimeout. The .unref'd timer was apparently
479
+ // being dropped by Node's event-loop housekeeping despite the
480
+ // process being kept alive by stdin / React render loop / other
481
+ // refs. The cost of dropping .unref() is that on a graceful exit
482
+ // path that doesn't call engine.stop(), the timer can delay exit
483
+ // by up to 60s. We accept this — engine.stop() is the canonical
484
+ // shutdown path and it clears the timer via clearTimeout.
485
+ let timeoutHandle = null;
486
+ const scheduleNext = () => {
487
+ if (stopped) return;
488
+ timeoutHandle = setTimeout(() => {
489
+ sample();
490
+ scheduleNext();
491
+ }, 60_000);
492
+ };
493
+
308
494
  // Record one sample at startup so we have a baseline even on short runs.
309
495
  sample();
310
- const timer = setInterval(sample, 60_000);
311
- timer.unref?.();
496
+ scheduleNext();
497
+
312
498
  return () => {
313
499
  try {
314
- clearInterval(timer);
500
+ stopped = true;
501
+ if (timeoutHandle) clearTimeout(timeoutHandle);
315
502
  sample(); // one final sample on shutdown
316
503
  } catch { /* ignore */ }
317
504
  };
@@ -428,7 +615,10 @@ export class AgentEngine {
428
615
  return {
429
616
  // Always available (BUILD + DISTILL)
430
617
  core: [
431
- new SandboxExecTool(this.workspace, this.config.kcExecTimeout),
618
+ new SandboxExecTool(this.workspace, {
619
+ defaultTimeoutMs: this.config.kcExecDefaultTimeoutMs,
620
+ maxTimeoutMs: this.config.kcExecMaxTimeoutMs,
621
+ }),
432
622
  new WorkspaceFileTool(this.workspace, this.versionManager),
433
623
  new CopyToWorkspaceTool(this.workspace, {
434
624
  largeRefThresholdMB: this.config.largeRefThresholdMB ?? 10,
@@ -468,7 +658,12 @@ export class AgentEngine {
468
658
  mineruApiKey: this.config.mineruApiKey,
469
659
  llmApiKey: workerApiKey,
470
660
  llmBaseUrl: workerBaseUrl,
661
+ // v0.8.1 P9-B: live-read vlmTier1 so workspace_env_overlay
662
+ // changes after tool construction (or mid-run .env edits)
663
+ // reach document_parse. The static `ocrModel` is the
664
+ // construction-time fallback; getOcrModel takes precedence.
471
665
  ocrModel: vlmModel,
666
+ getOcrModel: () => this.config.vlmTier1 || vlmModel,
472
667
  }),
473
668
  new DocumentSearchTool(this.workspace),
474
669
  // Group C — chunker/RAG infrastructure ported from AMC app. Core
@@ -617,6 +812,7 @@ export class AgentEngine {
617
812
  pipelineState: this.pipelines[this.currentPhase]?.describeState?.() || null,
618
813
  workspaceState: this._buildWorkspaceState(),
619
814
  projectMemory: this._readProjectMemory(),
815
+ marathonGoal: this.marathonGoal,
620
816
  });
621
817
  const systemTokens = estimateTokens(systemPrompt);
622
818
  const messageTokens = estimateMessagesTokens(this.history.messages);
@@ -968,16 +1164,9 @@ export class AgentEngine {
968
1164
  }
969
1165
  }
970
1166
 
971
- // Re-prime _lastReady AFTER importState so it reflects the restored
972
- // pipeline milestones, not the empty defaults from constructor.
973
- // (Bug 5 fix without this, resume reignites auto-advance.)
974
- for (const phase of Object.keys(engine.pipelines)) {
975
- try {
976
- engine._lastReady[phase] = !!engine.pipelines[phase].exitCriteriaMet?.();
977
- } catch {
978
- engine._lastReady[phase] = false;
979
- }
980
- }
1167
+ // v0.8 P1-D: removed `_lastReady` re-prime. Was the bookkeeping for
1168
+ // `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase advance
1169
+ // is explicit now; nothing to re-prime on resume.
981
1170
 
982
1171
  engine.eventLog.append("session_resume", {
983
1172
  resumedPhase: engine.currentPhase,
@@ -1065,6 +1254,7 @@ export class AgentEngine {
1065
1254
  pipelineState,
1066
1255
  workspaceState: this._buildWorkspaceState(),
1067
1256
  projectMemory: this._readProjectMemory(),
1257
+ marathonGoal: this.marathonGoal,
1068
1258
  });
1069
1259
  const tools = this.toolRegistry.schemasOpenai();
1070
1260
 
@@ -1086,6 +1276,29 @@ export class AgentEngine {
1086
1276
  // budget. Better to lose some history than crash with HTTP 400.
1087
1277
  messages = this._enforceTokenBudget(messages);
1088
1278
 
1279
+ // v0.8 P3-A: skill usage counter — emit one skill_byte_send event
1280
+ // per always-loaded skill per LLM send. Captures the cost of having
1281
+ // a skill body inlined in the system prompt (Layer B per design doc).
1282
+ // Agent-blind: events go to events.jsonl only; never surfaced to the
1283
+ // agent's context. consult_skill tool results emit their own
1284
+ // skill_invoked events with via_tool="consult_skill" (already in
1285
+ // place since v0.7.5 G-C4), so we don't double-count those here.
1286
+ try {
1287
+ const { alwaysLoaded } = this._skillLoader.getPhaseSkillSet(this.currentPhase) || {};
1288
+ if (Array.isArray(alwaysLoaded)) {
1289
+ for (const skill of alwaysLoaded) {
1290
+ const body = this._skillLoader.loadSkillBody(skill);
1291
+ if (!body) continue;
1292
+ this.eventLog.append("skill_byte_send", {
1293
+ skill,
1294
+ via: "system_prompt_always_loaded",
1295
+ byte_count: body.length,
1296
+ phase: this.currentPhase,
1297
+ });
1298
+ }
1299
+ }
1300
+ } catch { /* counter is best-effort; never break the turn */ }
1301
+
1089
1302
  this.eventLog.append("llm_start", {
1090
1303
  model: this.config.kcModel,
1091
1304
  messageCount: messages.length,
@@ -1335,11 +1548,17 @@ export class AgentEngine {
1335
1548
  // path-matching emission stays only as a fallback for any agent
1336
1549
  // that reads a SKILL.md path directly (out of pattern).
1337
1550
  try {
1551
+ // v0.8 P1-E: heredoc detection. `cat << 'EOF' > /tmp/skill.md`
1552
+ // matches the read-verb regex but is actually a WRITE — the
1553
+ // heredoc operator `<<` means cat is consuming inline content
1554
+ // (the heredoc body), not a file path. 资管 v0.7.5 audit § 5f
1555
+ // confirmed 1 spurious skill_invoked event of this kind.
1556
+ // Excluding any command with `<<` from the isRead classification.
1557
+ const cmd = String(inputData?.command || "");
1558
+ const isHeredoc = cmd.includes("<<");
1338
1559
  const isRead =
1339
1560
  (tc.name === "workspace_file" && inputData?.operation === "read") ||
1340
- (tc.name === "sandbox_exec" && /\b(cat|head|tail|less|grep|view|read)\b/.test(
1341
- String(inputData?.command || "")
1342
- ));
1561
+ (tc.name === "sandbox_exec" && !isHeredoc && /\b(cat|head|tail|less|grep|view|read)\b/.test(cmd));
1343
1562
  if (
1344
1563
  !result.isError &&
1345
1564
  isRead &&
@@ -1349,9 +1568,13 @@ export class AgentEngine {
1349
1568
  // v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
1350
1569
  // OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
1351
1570
  // Deep layout backward-compat preserved for any stragglers.
1571
+ // v0.8 P0-B: accept lowercase `skill.md` too — 资管 audit § 3.2
1572
+ // found agents writing lowercase consistently (14/14 rule_skills/).
1573
+ // Limited to exact uppercase OR exact lowercase (no mixed case)
1574
+ // to avoid spurious matches on unrelated files (e.g., `Skill.md`).
1352
1575
  const skillMatch = p.match(
1353
- /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/SKILL\.md\b/
1354
- ) || p.match(/\bSKILL\.md\b/);
1576
+ /(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/(?:SKILL|skill)\.md\b/
1577
+ ) || p.match(/\b(?:SKILL|skill)\.md\b/);
1355
1578
  if (skillMatch) {
1356
1579
  const skillName = skillMatch[1] || "(unknown)";
1357
1580
  this.eventLog.append("skill_invoked", {
@@ -1452,10 +1675,12 @@ export class AgentEngine {
1452
1675
  }
1453
1676
 
1454
1677
  /**
1455
- * Centralized phase transition (Bug 4). All three triggers route through here:
1678
+ * Centralized phase transition (Bug 4). Two triggers route through here
1679
+ * after v0.7.4 G0b + v0.8 P1-D:
1456
1680
  * (1) pipeline.onToolResult returning phase_ready
1457
- * (2) post-turn auto-check via _maybeAutoAdvance
1458
- * (3) explicit user request via the phase_advance tool
1681
+ * (2) explicit user request via the phase_advance tool
1682
+ * (The historical (3) post-turn auto-check via `_maybeAutoAdvance` was
1683
+ * removed; phase advance is 100% explicit.)
1459
1684
  *
1460
1685
  * Reachability: by default only forward-by-one transitions per NEXT_PHASE.
1461
1686
  * Set `force: true` to allow non-adjacent or backward transitions (e.g. user
@@ -1533,9 +1758,17 @@ export class AgentEngine {
1533
1758
  try { criteriaMet = !!fromPipeline?.exitCriteriaMet?.(); } catch { criteriaMet = true; }
1534
1759
  if (!criteriaMet) {
1535
1760
  const counts = this._buildEngineCountsBlock(this.currentPhase);
1761
+ // v0.8 P0-E: prescriptive hint in the event payload so post-mortem
1762
+ // audits see what the agent was told (matches what phase-advance.js
1763
+ // returns to the LLM).
1764
+ let prescriptive = null;
1765
+ try {
1766
+ prescriptive = getPrescriptiveHint(this.currentPhase, null, counts || "");
1767
+ } catch { /* hint generation is best-effort */ }
1536
1768
  this.eventLog.append("phase_advance_refused", {
1537
1769
  from: this.currentPhase, to: nextPhase, reason,
1538
1770
  hint: "exit criteria not met by engine telemetry",
1771
+ prescriptive_hint: prescriptive,
1539
1772
  engineCounts: counts || null,
1540
1773
  });
1541
1774
  return false;
@@ -1610,23 +1843,16 @@ export class AgentEngine {
1610
1843
  });
1611
1844
  }
1612
1845
 
1613
- // v0.6.2 J2: on rollback, reset the rolled-FROM phase's lastReady
1614
- // edge-trigger so that if the agent revisits it and re-flips
1615
- // exit-criteria true, _maybeAutoAdvance will fire correctly. Without
1616
- // this, the auto-advance edge trigger stays latched true and the
1617
- // moment the agent returns to fromPhase the engine immediately
1618
- // bounces them back out — defeating the rollback.
1619
- if (direction === "rollback" && this._lastReady) {
1620
- this._lastReady[fromPhase] = false;
1621
- }
1846
+ // v0.8 P1-D: removed `_lastReady` rollback reset. Was the bookkeeping
1847
+ // for `_maybeAutoAdvance` which v0.7.4 G0b decommissioned. Phase
1848
+ // advance is explicit now; rollback just needs to commit the new phase.
1622
1849
 
1623
1850
  this.saveState();
1624
1851
 
1625
1852
  // B8: Soft signal — surface any sub-agents left running from the prior
1626
1853
  // phase so the main agent's next turn can decide whether to kill them.
1627
- // NOT automated: phase_advance can fire from _maybeAutoAdvance on a
1628
- // criteria-flip, and auto-killing would couple lifecycle with blast
1629
- // radius. This just informs.
1854
+ // NOT automated: auto-killing would couple lifecycle with blast radius.
1855
+ // This just informs.
1630
1856
  try {
1631
1857
  const agentTool = this._buildTools?.core?.find((t) => t?.name === "agent_tool");
1632
1858
  const runningIds = agentTool?.getRunningTaskIds?.() || [];
@@ -1826,35 +2052,12 @@ export class AgentEngine {
1826
2052
  return false;
1827
2053
  }
1828
2054
 
1829
- /**
1830
- * Bug 4 trigger (1) auto-detect, edge-triggered (Bug 5): only fires on a
1831
- * fresh false true flip in `exitCriteriaMet()`. Sessions resumed in an
1832
- * already-met state do nothing; users iterating in a phase whose criteria
1833
- * have been met for a while do nothing. Real new evidence is required.
1834
- */
1835
- _maybeAutoAdvance() {
1836
- const phase = this.currentPhase;
1837
- const pipeline = this.pipelines[phase];
1838
- let nowReady = false;
1839
- try { nowReady = !!pipeline?.exitCriteriaMet?.(); } catch { nowReady = false; }
1840
-
1841
- if (!nowReady) {
1842
- this._lastReady[phase] = false;
1843
- return null;
1844
- }
1845
- // Edge-trigger: nowReady && !wasReady
1846
- if (this._lastReady[phase]) return null;
1847
- this._lastReady[phase] = true;
1848
-
1849
- const next = NEXT_PHASE[phase];
1850
- if (!next) return null;
1851
- const advanced = this._advancePhase(next, "exit criteria flipped to met");
1852
- if (!advanced) return null;
1853
- return new AgentEvent({
1854
- type: "pipeline_event",
1855
- data: { type: "phase_ready", nextPhase: next, message: "exit criteria flipped to met" },
1856
- });
1857
- }
2055
+ // v0.8 P1-D: `_maybeAutoAdvance()` deleted. The method auto-fired phase
2056
+ // advance on a false→true flip of `exitCriteriaMet()`, but v0.7.3
2057
+ // showed mid-session auto-advance chains were a regression hazard
2058
+ // (user couldn't review between phases). v0.7.4 G0b removed all call
2059
+ // sites; v0.8 P1-D removes the now-dead method definition + the
2060
+ // `_lastReady` bookkeeping it relied on. Phase advance is 100% explicit.
1858
2061
 
1859
2062
  /**
1860
2063
  * Tool-call offloading. If the tool's content exceeds the threshold,
@@ -2163,27 +2366,38 @@ export class AgentEngine {
2163
2366
  /** B1: original serial ralph-loop path — one task at a time, shared
2164
2367
  * conversation history. Unchanged from pre-v0.6.0 behavior. */
2165
2368
  async *_runTaskLoopSerial(userMessage) {
2166
- // Run the initial turn (user's request)
2167
- yield* this.runTurn(userMessage);
2168
-
2169
- // v0.7.5 G-F5 TEMPORARILY DISABLED 2026-05-13 for overnight
2170
- // marathon test. The strict capture-BEFORE form lets every user
2171
- // prompt advance only one phase, which blocks unattended overnight
2172
- // sessions. v0.7.4-style capture-AFTER (below) allows the agent
2173
- // to chain multiple phase_advance calls within the initial runTurn,
2174
- // then exits the while loop on subsequent phase changes.
2369
+ // v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
2370
+ // - Interactive sessions (marathon NOT active): capture startingPhase
2371
+ // BEFORE the initial runTurn, and exit the loop on ANY phase change
2372
+ // (including within the initial runTurn). One user prompt = one
2373
+ // phase advance. Path (a) per design doc Q5 lean.
2374
+ // - Marathon sessions: the kc-marathon driver provides per-phase
2375
+ // prompts via .kc_marathon/inbox.jsonl, so the engine doesn't need
2376
+ // F5's checkpoint phase chaining is OK because each chained phase
2377
+ // STILL gets its own driver-emitted prompt next tick.
2175
2378
  //
2176
- // TODO: after the overnight E2E results come in (2026-05-14), decide:
2177
- // (a) re-enable F5 strict and build marathon as a separate mode
2178
- // (external driver pattern, e.g., /loop-kc command) locked
2179
- // earlier decision per harness-research § 7
2180
- // (b) keep capture-AFTER permanently and accept multi-phase prompts
2181
- //
2182
- // To re-enable F5: move `const startingPhase = this.currentPhase;`
2183
- // to BEFORE the `yield* this.runTurn(userMessage);` above, and add
2184
- // the matching `if (this.currentPhase !== startingPhase) { return; }`
2185
- // block between runTurn and the while loop.
2379
+ // v0.7.3 demonstrated why F5 matters interactively: auto-chained
2380
+ // phase advances skip the user check-in cycle and broke phase
2381
+ // control in team testing. v0.7.4 G0c first fixed it via
2382
+ // post-initial-runTurn exit; v0.7.5 added the strict capture-BEFORE
2383
+ // refinement; v0.8 P5-A preserves both with the marathon escape;
2384
+ // v0.8.1 P8-A switched marathon-active source from filesystem
2385
+ // marker to inline driver instance.
2386
+ const marathonActive = this.isMarathonActive();
2186
2387
  const startingPhase = this.currentPhase;
2388
+ yield* this.runTurn(userMessage);
2389
+
2390
+ // F5 strict gate: if interactive AND phase changed during initial
2391
+ // runTurn, exit immediately (don't auto-continue tasks in the new
2392
+ // phase). Marathon bypasses — driver decides pacing.
2393
+ if (!marathonActive && this.currentPhase !== startingPhase) {
2394
+ this.eventLog.append("ralph_loop_exit", {
2395
+ reason: "f5_strict_initial_turn",
2396
+ from: startingPhase,
2397
+ to: this.currentPhase,
2398
+ });
2399
+ return;
2400
+ }
2187
2401
 
2188
2402
  // Auto-continue through pending tasks (within current phase only)
2189
2403
  while (this.taskManager.getNextPending()) {
@@ -2262,6 +2476,147 @@ export class AgentEngine {
2262
2476
  break;
2263
2477
  }
2264
2478
  }
2479
+
2480
+ // v0.8.1 P8-A: marathon mode — inline driver. After the F5 phase-
2481
+ // boundary exit, if marathon is active, query the driver for the
2482
+ // next continuation prompt and run additional turns until the driver
2483
+ // signals stop (null return). State machine logic unchanged from
2484
+ // v0.8.0; the I/O wrapper just shifted from filesystem-watcher to
2485
+ // direct method calls.
2486
+ while (this.marathonDriver) {
2487
+ // v0.8.2 P12-B: user-input queue priority. Drain queued user messages
2488
+ // FIRST so mid-run nudges always win over driver autonomy. Fixes the
2489
+ // v0.8.1 silent queue-starvation: the TUI used to queue messages in a
2490
+ // local ref that only drained after runTurn() returned, but the
2491
+ // marathon loop never returns while the driver is active. Now the
2492
+ // engine owns the queue; TUI hands off via queueUserInput().
2493
+ const queuedUserInput = this._drainNextQueuedUserInput();
2494
+ if (queuedUserInput) {
2495
+ yield* this.runTurn(queuedUserInput);
2496
+ continue;
2497
+ }
2498
+
2499
+ const turnsSnapshot = this.marathonDriver.turnsThisPhase;
2500
+ const phaseChanged = this.currentPhase !== this.marathonDriver.currentPhase;
2501
+ const milestones = this._buildEngineCountsBlock(this.currentPhase) || {};
2502
+ const decision = this.marathonDriver.decideNext({
2503
+ currentPhase: this.currentPhase,
2504
+ milestones,
2505
+ phaseChanged,
2506
+ errorSeen: false, // engine surfaces errors via tool_result.isError; not propagated here for v0.8.1 MVP
2507
+ turnsThisPhase: turnsSnapshot + 1,
2508
+ });
2509
+ if (!decision) {
2510
+ // Stop condition met — driver returned null
2511
+ this.eventLog.append("marathon_detach", {
2512
+ reason: this.marathonDriver.stopReason || "unknown",
2513
+ decisions: this.marathonDriver.decisionCount,
2514
+ });
2515
+ this.marathonDriver = null;
2516
+ this.marathonGoal = null;
2517
+ break;
2518
+ }
2519
+ this.eventLog.append("marathon_decision", {
2520
+ template: decision.template,
2521
+ reason: decision.reason,
2522
+ phase: this.currentPhase,
2523
+ });
2524
+ yield* this.runTurn(decision.prompt);
2525
+ // Loop back: another turn just completed; engine queue + driver both
2526
+ // get another chance via the next iteration's drain-then-decide.
2527
+ }
2528
+ }
2529
+
2530
+ /**
2531
+ * v0.8.1 P8-A: activate marathon mode with a goal-description.
2532
+ * Called from cli/index.js's /marathon slash command handler.
2533
+ * The engine's next runTaskLoop will use marathonDriver.getInitialPrompt()
2534
+ * as the kickoff user message.
2535
+ *
2536
+ * @param {string} goal — the marathon goal description (user-typed)
2537
+ * @param {object} [opts] — {maxWallclockMs?, stuckAfterMs?}
2538
+ * @returns {object} {goal, language, startedAt} for confirmation
2539
+ */
2540
+ enterMarathonMode(goal, opts = {}) {
2541
+ if (this.marathonDriver) {
2542
+ throw new Error("Marathon already active — use /marathon off to disengage first");
2543
+ }
2544
+ this.marathonGoal = goal;
2545
+ this.marathonDriver = new MarathonDriver({
2546
+ goal,
2547
+ language: this.config.language || "en",
2548
+ maxWallclockMs: opts.maxWallclockMs,
2549
+ stuckAfterMs: opts.stuckAfterMs,
2550
+ });
2551
+ this.eventLog.append("marathon_attach", {
2552
+ goal: goal.slice(0, 200),
2553
+ language: this.config.language || "en",
2554
+ });
2555
+ return this.marathonDriver.getStatus();
2556
+ }
2557
+
2558
+ /** v0.8.1 P8-A: deactivate marathon mode. Returns final status snapshot. */
2559
+ exitMarathonMode(reason = "user_off") {
2560
+ if (!this.marathonDriver) return null;
2561
+ const status = this.marathonDriver.getStatus();
2562
+ this.marathonDriver.stop(reason);
2563
+ this.eventLog.append("marathon_detach", {
2564
+ reason,
2565
+ decisions: this.marathonDriver.decisionCount,
2566
+ });
2567
+ this.marathonDriver = null;
2568
+ this.marathonGoal = null;
2569
+ return status;
2570
+ }
2571
+
2572
+ /** v0.8.1 P8-A: is marathon mode currently active? (for TUI status bar) */
2573
+ isMarathonActive() {
2574
+ return !!this.marathonDriver && !this.marathonDriver.stopped;
2575
+ }
2576
+
2577
+ /**
2578
+ * v0.8.2 P12-B: queue a user-typed message for the engine to pick up at
2579
+ * the next turn boundary. Called by the TUI when the user types during an
2580
+ * in-flight marathon turn. The marathon decision loop drains this queue
2581
+ * BEFORE asking the driver for a continuation, so user interrupts always
2582
+ * win over driver autonomy.
2583
+ *
2584
+ * @param {string} text — user-typed message
2585
+ */
2586
+ queueUserInput(text) {
2587
+ if (!text || typeof text !== "string") return;
2588
+ this.inputQueue.push(text);
2589
+ this.eventLog.append("user_input_queued", {
2590
+ preview: text.slice(0, 100),
2591
+ queueDepth: this.inputQueue.length,
2592
+ marathonActive: this.isMarathonActive(),
2593
+ });
2594
+ }
2595
+
2596
+ /**
2597
+ * v0.8.2 P12-B: drain the next queued user input, or null if empty.
2598
+ * Internal helper for the marathon decision loop.
2599
+ *
2600
+ * @returns {string|null}
2601
+ */
2602
+ _drainNextQueuedUserInput() {
2603
+ if (this.inputQueue.length === 0) return null;
2604
+ const text = this.inputQueue.shift();
2605
+ this.eventLog.append("user_input_drained", {
2606
+ preview: text.slice(0, 100),
2607
+ queueDepth: this.inputQueue.length,
2608
+ });
2609
+ return text;
2610
+ }
2611
+
2612
+ /**
2613
+ * v0.8.2 P12-B: query the queue depth without draining.
2614
+ * Used by TUI to display "Queued (N waiting)" indicator.
2615
+ *
2616
+ * @returns {number}
2617
+ */
2618
+ getQueueDepth() {
2619
+ return this.inputQueue.length;
2265
2620
  }
2266
2621
 
2267
2622
  /**
@@ -2282,14 +2637,26 @@ export class AgentEngine {
2282
2637
  * amortized against the 2-4× wall-clock speedup.
2283
2638
  */
2284
2639
  async *_runTaskLoopParallel(userMessage, parallelism) {
2640
+ // v0.8 P5-A: F5 re-enabled, conditional on marathon mode.
2641
+ // Mirror _runTaskLoopSerial — capture startingPhase BEFORE initial
2642
+ // runTurn so phase advance during the initial turn exits the loop
2643
+ // unless marathon is active.
2644
+ // v0.8.1 P8-A: marathon check now uses inline driver instance.
2645
+ const marathonActive = this.isMarathonActive();
2646
+ const startingPhase = this.currentPhase;
2647
+
2285
2648
  // Initial turn: main agent reads user request, creates tasks.
2286
2649
  yield* this.runTurn(userMessage);
2287
2650
 
2288
- // v0.7.5 G-F5 TEMPORARILY DISABLED 2026-05-13 for overnight
2289
- // marathon test. See _runTaskLoopSerial above for full rationale.
2290
- // To re-enable F5: move `startingPhase` capture BEFORE the
2291
- // initial runTurn, add post-runTurn exit check matching serial.
2292
- const startingPhase = this.currentPhase;
2651
+ if (!marathonActive && this.currentPhase !== startingPhase) {
2652
+ this.eventLog.append("ralph_loop_exit", {
2653
+ reason: "f5_strict_initial_turn",
2654
+ from: startingPhase,
2655
+ to: this.currentPhase,
2656
+ mode: "parallel",
2657
+ });
2658
+ return;
2659
+ }
2293
2660
 
2294
2661
  const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
2295
2662
  if (!agentTool) {