@pi-agents/orchid 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/LICENSE +21 -0
  3. package/README.md +246 -0
  4. package/agents/AGENTS-MANIFEST.md +42 -0
  5. package/agents/brain.md +42 -0
  6. package/agents/context-builder.md +46 -0
  7. package/agents/delegate.md +12 -0
  8. package/agents/dev-1.md +42 -0
  9. package/agents/oracle.md +73 -0
  10. package/agents/planner.md +55 -0
  11. package/agents/researcher.md +52 -0
  12. package/agents/reviewer.md +79 -0
  13. package/agents/scout.md +50 -0
  14. package/agents/tester.md +45 -0
  15. package/agents/worker.md +55 -0
  16. package/extensions/ralph.ts +1 -0
  17. package/extensions/reviewer-extension.ts +125 -0
  18. package/extensions/task-orchestrator.ts +28 -0
  19. package/package.json +63 -0
  20. package/prompts/gather-context-and-clarify.md +13 -0
  21. package/prompts/parallel-cleanup.md +59 -0
  22. package/prompts/parallel-context-build.md +53 -0
  23. package/prompts/parallel-handoff-plan.md +59 -0
  24. package/prompts/parallel-research.md +50 -0
  25. package/prompts/parallel-review.md +54 -0
  26. package/prompts/review-loop.md +41 -0
  27. package/skills/orchid/SKILL.md +214 -0
  28. package/skills/orchid/orchid-cleanup/SKILL.md +122 -0
  29. package/skills/orchid/orchid-converge/SKILL.md +124 -0
  30. package/skills/orchid/orchid-decompose/SKILL.md +201 -0
  31. package/skills/orchid/orchid-doctor/SKILL.md +162 -0
  32. package/skills/orchid/orchid-investigate/SKILL.md +102 -0
  33. package/skills/orchid/orchid-launch/SKILL.md +147 -0
  34. package/skills/ralph/SKILL.md +73 -0
  35. package/skills/subagents/pi-subagents/SKILL.md +813 -0
  36. package/src/index.ts +7 -0
  37. package/src/orchestrator/abort.ts +534 -0
  38. package/src/orchestrator/agent-bridge-extension.ts +1020 -0
  39. package/src/orchestrator/agent-host.ts +954 -0
  40. package/src/orchestrator/cleanup.ts +776 -0
  41. package/src/orchestrator/config-loader.ts +1412 -0
  42. package/src/orchestrator/config-schema.ts +690 -0
  43. package/src/orchestrator/config.ts +81 -0
  44. package/src/orchestrator/context-window.ts +66 -0
  45. package/src/orchestrator/diagnostic-reports.ts +475 -0
  46. package/src/orchestrator/diagnostics.ts +394 -0
  47. package/src/orchestrator/discovery.ts +1833 -0
  48. package/src/orchestrator/engine-worker.ts +415 -0
  49. package/src/orchestrator/engine.ts +5940 -0
  50. package/src/orchestrator/execution.ts +3104 -0
  51. package/src/orchestrator/extension.ts +5934 -0
  52. package/src/orchestrator/formatting.ts +785 -0
  53. package/src/orchestrator/git.ts +88 -0
  54. package/src/orchestrator/index.ts +28 -0
  55. package/src/orchestrator/lane-runner.ts +1787 -0
  56. package/src/orchestrator/mailbox.ts +780 -0
  57. package/src/orchestrator/merge.ts +3414 -0
  58. package/src/orchestrator/messages.ts +1062 -0
  59. package/src/orchestrator/migrations.ts +278 -0
  60. package/src/orchestrator/naming.ts +117 -0
  61. package/src/orchestrator/path-resolver.ts +275 -0
  62. package/src/orchestrator/persistence.ts +2625 -0
  63. package/src/orchestrator/process-registry.ts +452 -0
  64. package/src/orchestrator/quality-gate.ts +1085 -0
  65. package/src/orchestrator/resume.ts +3488 -0
  66. package/src/orchestrator/sessions.ts +57 -0
  67. package/src/orchestrator/settings-loader.ts +136 -0
  68. package/src/orchestrator/settings-tui.ts +2208 -0
  69. package/src/orchestrator/sidecar-telemetry.ts +267 -0
  70. package/src/orchestrator/supervisor.ts +4548 -0
  71. package/src/orchestrator/task-executor-core.ts +675 -0
  72. package/src/orchestrator/tmux-compat.ts +37 -0
  73. package/src/orchestrator/tool-allowlist-constants.ts +37 -0
  74. package/src/orchestrator/types.ts +4465 -0
  75. package/src/orchestrator/verification.ts +547 -0
  76. package/src/orchestrator/waves.ts +1564 -0
  77. package/src/orchestrator/workspace.ts +707 -0
  78. package/src/orchestrator/worktree.ts +2725 -0
  79. package/src/ralph/index.ts +825 -0
  80. package/src/subagents/agents/agent-management.ts +648 -0
  81. package/src/subagents/agents/agent-scope.ts +6 -0
  82. package/src/subagents/agents/agent-selection.ts +23 -0
  83. package/src/subagents/agents/agent-serializer.ts +86 -0
  84. package/src/subagents/agents/agents.ts +832 -0
  85. package/src/subagents/agents/chain-serializer.ts +137 -0
  86. package/src/subagents/agents/frontmatter.ts +29 -0
  87. package/src/subagents/agents/identity.ts +30 -0
  88. package/src/subagents/agents/skills.ts +632 -0
  89. package/src/subagents/extension/config.ts +16 -0
  90. package/src/subagents/extension/control-notices.ts +92 -0
  91. package/src/subagents/extension/doctor.ts +199 -0
  92. package/src/subagents/extension/fanout-child.ts +170 -0
  93. package/src/subagents/extension/index.ts +573 -0
  94. package/src/subagents/extension/schemas.ts +168 -0
  95. package/src/subagents/intercom/intercom-bridge.ts +379 -0
  96. package/src/subagents/intercom/result-intercom.ts +377 -0
  97. package/src/subagents/runs/background/async-execution.ts +712 -0
  98. package/src/subagents/runs/background/async-job-tracker.ts +310 -0
  99. package/src/subagents/runs/background/async-resume.ts +345 -0
  100. package/src/subagents/runs/background/async-status.ts +325 -0
  101. package/src/subagents/runs/background/completion-dedupe.ts +63 -0
  102. package/src/subagents/runs/background/notify.ts +108 -0
  103. package/src/subagents/runs/background/parallel-groups.ts +45 -0
  104. package/src/subagents/runs/background/result-watcher.ts +307 -0
  105. package/src/subagents/runs/background/run-id-resolver.ts +83 -0
  106. package/src/subagents/runs/background/run-status.ts +269 -0
  107. package/src/subagents/runs/background/stale-run-reconciler.ts +336 -0
  108. package/src/subagents/runs/background/subagent-runner.ts +1808 -0
  109. package/src/subagents/runs/background/top-level-async.ts +13 -0
  110. package/src/subagents/runs/foreground/chain-clarify.ts +1333 -0
  111. package/src/subagents/runs/foreground/chain-execution.ts +938 -0
  112. package/src/subagents/runs/foreground/execution.ts +918 -0
  113. package/src/subagents/runs/foreground/subagent-executor.ts +2527 -0
  114. package/src/subagents/runs/shared/completion-guard.ts +147 -0
  115. package/src/subagents/runs/shared/long-running-guard.ts +175 -0
  116. package/src/subagents/runs/shared/mcp-direct-tool-allowlist.ts +365 -0
  117. package/src/subagents/runs/shared/model-fallback.ts +103 -0
  118. package/src/subagents/runs/shared/nested-events.ts +819 -0
  119. package/src/subagents/runs/shared/nested-path.ts +52 -0
  120. package/src/subagents/runs/shared/nested-render.ts +115 -0
  121. package/src/subagents/runs/shared/parallel-utils.ts +109 -0
  122. package/src/subagents/runs/shared/pi-args.ts +220 -0
  123. package/src/subagents/runs/shared/pi-spawn.ts +115 -0
  124. package/src/subagents/runs/shared/run-history.ts +60 -0
  125. package/src/subagents/runs/shared/single-output.ts +164 -0
  126. package/src/subagents/runs/shared/subagent-control.ts +226 -0
  127. package/src/subagents/runs/shared/subagent-prompt-runtime.ts +170 -0
  128. package/src/subagents/runs/shared/worktree.ts +577 -0
  129. package/src/subagents/shared/artifacts.ts +98 -0
  130. package/src/subagents/shared/atomic-json.ts +16 -0
  131. package/src/subagents/shared/file-coalescer.ts +40 -0
  132. package/src/subagents/shared/fork-context.ts +76 -0
  133. package/src/subagents/shared/formatters.ts +133 -0
  134. package/src/subagents/shared/jsonl-writer.ts +81 -0
  135. package/src/subagents/shared/model-info.ts +78 -0
  136. package/src/subagents/shared/post-exit-stdio-guard.ts +85 -0
  137. package/src/subagents/shared/session-identity.ts +10 -0
  138. package/src/subagents/shared/session-tokens.ts +44 -0
  139. package/src/subagents/shared/settings.ts +397 -0
  140. package/src/subagents/shared/status-format.ts +49 -0
  141. package/src/subagents/shared/types.ts +822 -0
  142. package/src/subagents/shared/utils.ts +450 -0
  143. package/src/subagents/slash/prompt-template-bridge.ts +397 -0
  144. package/src/subagents/slash/slash-bridge.ts +174 -0
  145. package/src/subagents/slash/slash-commands.ts +528 -0
  146. package/src/subagents/slash/slash-live-state.ts +292 -0
  147. package/src/subagents/tui/render-helpers.ts +80 -0
  148. package/src/subagents/tui/render.ts +1358 -0
  149. package/templates/agents/local/supervisor.md +33 -0
  150. package/templates/agents/local/task-merger.md +27 -0
  151. package/templates/agents/local/task-reviewer.md +30 -0
  152. package/templates/agents/local/task-worker.md +34 -0
  153. package/templates/agents/supervisor-routing.md +92 -0
  154. package/templates/agents/supervisor.md +229 -0
  155. package/templates/agents/task-merger.md +214 -0
  156. package/templates/agents/task-reviewer.md +260 -0
  157. package/templates/agents/task-worker-segment.md +44 -0
  158. package/templates/agents/task-worker.md +557 -0
  159. package/templates/tasks/CONTEXT.md +30 -0
  160. package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +98 -0
  161. package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
  162. package/templates/tasks/EXAMPLE-002-parallel-smoke/PROMPT.md +97 -0
  163. package/templates/tasks/EXAMPLE-002-parallel-smoke/STATUS.md +73 -0
@@ -0,0 +1,1787 @@
1
+ /**
2
+ * Lane Runner — Headless per-lane execution for Runtime V2
3
+ *
4
+ * Replaces the legacy TMUX-backed lane execution path with a
5
+ * deterministic Node process that owns:
6
+ * - worker iteration loops
7
+ * - STATUS.md progression
8
+ * - .DONE creation detection
9
+ * - reviewer orchestration (future)
10
+ * - lane snapshot emission
11
+ *
12
+ * No Pi extension dependency. No TMUX. No TASK_AUTOSTART.
13
+ *
14
+ * @module orchid/lane-runner
15
+ * @since TP-105
16
+ */
17
+
18
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync, readdirSync } from "fs";
19
+ import { join, dirname, resolve, basename } from "path";
20
+ import { execSync } from "child_process";
21
+ import { fileURLToPath } from "url";
22
+
23
+ import {
24
+ parsePromptMd,
25
+ parseStatusMd,
26
+ generateStatusMd,
27
+ updateStatusField,
28
+ updateStepStatus,
29
+ logExecution,
30
+ isStepComplete,
31
+ type StepInfo,
32
+ type CoreParsedTask,
33
+ } from "./task-executor-core.ts";
34
+
35
+ import {
36
+ spawnAgent,
37
+ buildWorkerToolsAllowlist,
38
+ ENGINE_BRIDGE_TOOLS,
39
+ type AgentHostOptions,
40
+ type AgentHostResult,
41
+ } from "./agent-host.ts";
42
+ import { loadPiSettingsPackages, filterExcludedExtensions } from "./settings-loader.ts";
43
+
44
+ import { appendAgentEvent, writeLaneSnapshot } from "./process-registry.ts";
45
+
46
+ import {
47
+ readOutbox,
48
+ readInbox,
49
+ ackMessage,
50
+ sessionInboxDir,
51
+ ackOutboxMessage,
52
+ appendMailboxAuditEvent,
53
+ drainAgentOutbox,
54
+ } from "./mailbox.ts";
55
+
56
+ import {
57
+ resolvePacketPaths,
58
+ buildRuntimeAgentId,
59
+ runtimeAgentEventsPath,
60
+ type ExecutionUnit,
61
+ type RuntimeAgentId,
62
+ type RuntimeLaneSnapshot,
63
+ type RuntimeAgentTelemetrySnapshot,
64
+ type RuntimeTaskProgress,
65
+ type RuntimeAgentStatus,
66
+ type PacketPaths,
67
+ type LaneTaskOutcome,
68
+ type LaneTaskStatus,
69
+ type SupervisorAlertCallback,
70
+ type StepSegmentMapping,
71
+ type SegmentScopeMode,
72
+ } from "./types.ts";
73
+
74
+ const LANE_RUNNER_DIR = dirname(fileURLToPath(import.meta.url));
75
+
76
+ // ── Segment Scoping Helpers (Phase A, TP-174) ────────────────────────
77
+
78
+ /**
79
+ * Get the set of step numbers that have segments for a given repoId.
80
+ *
81
+ * Used to filter the "remaining steps" view so the worker only sees steps
82
+ * that contain work for its repo.
83
+ *
84
+ * @param stepSegmentMap - Parsed step-segment mapping from PROMPT.md
85
+ * @param repoId - Repo ID to filter by
86
+ * @returns Set of step numbers that have at least one segment for this repoId
87
+ * @since TP-174
88
+ */
89
+ export function getStepsForRepoId(
90
+ stepSegmentMap: StepSegmentMapping[],
91
+ repoId: string,
92
+ ): Set<number> {
93
+ const stepNumbers = new Set<number>();
94
+ for (const step of stepSegmentMap) {
95
+ if (step.segments.some((seg) => seg.repoId === repoId)) {
96
+ stepNumbers.add(step.stepNumber);
97
+ }
98
+ }
99
+ return stepNumbers;
100
+ }
101
+
102
+ /**
103
+ * Extract a segment's checkbox block from STATUS.md content for a given step and repoId.
104
+ *
105
+ * Looks for `#### Segment: <repoId>` headers within `### Step N:` sections,
106
+ * then returns the checkbox lines belonging to that segment block.
107
+ *
108
+ * @param statusContent - Raw STATUS.md content
109
+ * @param stepNumber - Step number to look in
110
+ * @param repoId - Repo ID of the segment
111
+ * @returns Object with checked/unchecked counts, or null if no segment block found
112
+ * @since TP-174
113
+ */
114
+ export function getSegmentCheckboxes(
115
+ statusContent: string,
116
+ stepNumber: number,
117
+ repoId: string,
118
+ ): { checked: number; unchecked: number; total: number; uncheckedTexts: string[] } | null {
119
+ const text = statusContent.replace(/\r\n/g, "\n");
120
+
121
+ // Find the step section
122
+ const stepHeaderPattern = new RegExp(`^###\\s+Step\\s+${stepNumber}:`, "m");
123
+ const stepMatch = text.match(stepHeaderPattern);
124
+ if (!stepMatch || stepMatch.index === undefined) return null;
125
+
126
+ // Find the end of this step section (next ### or end of file)
127
+ const afterStep = text.slice(stepMatch.index + stepMatch[0].length);
128
+ const nextStepMatch = afterStep.search(/^###\s+Step\s+\d+:/m);
129
+ const stepContent = nextStepMatch !== -1 ? afterStep.slice(0, nextStepMatch) : afterStep;
130
+
131
+ // Find the segment header within this step
132
+ const segHeaderPattern = new RegExp(
133
+ `^####\\s+Segment:\\s*${repoId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*$`,
134
+ "m",
135
+ );
136
+ const segMatch = stepContent.match(segHeaderPattern);
137
+ if (!segMatch || segMatch.index === undefined) return null;
138
+
139
+ // Extract content from segment header to next #### header or ### header or ---
140
+ const afterSeg = stepContent.slice(segMatch.index + segMatch[0].length);
141
+ const nextSectionMatch = afterSeg.search(/^(?:####\s|###\s|---)/m);
142
+ const segContent = nextSectionMatch !== -1 ? afterSeg.slice(0, nextSectionMatch) : afterSeg;
143
+
144
+ // Count checkboxes
145
+ let checked = 0;
146
+ let unchecked = 0;
147
+ const uncheckedTexts: string[] = [];
148
+ const cbRegex = /^\s*-\s*\[([ xX])\]\s*(.*)/gm;
149
+ let m: RegExpExecArray | null;
150
+ while ((m = cbRegex.exec(segContent)) !== null) {
151
+ if (m[1].toLowerCase() === "x") {
152
+ checked++;
153
+ } else {
154
+ unchecked++;
155
+ uncheckedTexts.push(m[2].trim());
156
+ }
157
+ }
158
+
159
+ return { checked, unchecked, total: checked + unchecked, uncheckedTexts };
160
+ }
161
+
162
+ /**
163
+ * Check if all checkboxes in a segment block are checked.
164
+ *
165
+ * @param statusContent - Raw STATUS.md content
166
+ * @param stepNumber - Step number to check
167
+ * @param repoId - Repo ID of the segment
168
+ * @returns true when all checkboxes in the segment block are checked
169
+ * @since TP-174
170
+ */
171
+ export function isSegmentComplete(
172
+ statusContent: string,
173
+ stepNumber: number,
174
+ repoId: string,
175
+ ): boolean {
176
+ const result = getSegmentCheckboxes(statusContent, stepNumber, repoId);
177
+ if (!result) return false;
178
+ if (result.total === 0) return false;
179
+ return result.unchecked === 0;
180
+ }
181
+
182
+ /**
183
+ * Compute the authoritative `SegmentScopeMode` for one worker iteration.
184
+ *
185
+ * This is the single source of truth for the FULL_TASK vs SEGMENT_SCOPED
186
+ * decision (TP-196 / #502). All segment-related side-effects (env vars,
187
+ * system-prompt overlay, prompt content, tool registration) should derive
188
+ * their behaviour from this mode rather than re-evaluating the underlying
189
+ * boolean conditions in isolation, which is what created the drift risk
190
+ * documented in #502.
191
+ *
192
+ * Returns `SEGMENT_SCOPED` iff ALL of the following hold:
193
+ * - The task has a non-empty `stepSegmentMap` (parsed from PROMPT.md markers).
194
+ * - The lane has an associated `currentRepoId` (segmentId set, so we know
195
+ * which repo this lane is iterating).
196
+ * - The (legacy-fallback-filtered) `repoStepNumbers` set is non-null (the
197
+ * repo has at least one step with explicit segment markers).
198
+ * - A `currentStepNumber` is provided (there is a step to evaluate).
199
+ * - The current step's segment mapping contains an entry for `currentRepoId`
200
+ * (the worker actually has segment-scoped work in the current step).
201
+ *
202
+ * In any other case the mode is `FULL_TASK`.
203
+ *
204
+ * @since TP-196
205
+ */
206
+ export function computeSegmentScopeMode(
207
+ stepSegmentMap: StepSegmentMapping[] | undefined | null,
208
+ repoStepNumbers: Set<number> | null,
209
+ currentRepoId: string | null,
210
+ currentStepNumber: number | null,
211
+ ): SegmentScopeMode {
212
+ if (!stepSegmentMap || !currentRepoId || !repoStepNumbers) return "FULL_TASK";
213
+ if (currentStepNumber === null) return "FULL_TASK";
214
+ const currentStepMapping = stepSegmentMap.find((s) => s.stepNumber === currentStepNumber);
215
+ if (!currentStepMapping) return "FULL_TASK";
216
+ const mySegment = currentStepMapping.segments.find((seg) => seg.repoId === currentRepoId);
217
+ return mySegment ? "SEGMENT_SCOPED" : "FULL_TASK";
218
+ }
219
+
220
+ /**
221
+ * Pre-spawn segment-completion check (TP-196 / #508).
222
+ *
223
+ * Returns `true` when the lane-runner iteration loop should SKIP spawning
224
+ * a worker because all of the segment's checkboxes for this repo are
225
+ * already complete. The lane should `break` out of its iteration loop and
226
+ * fall through to post-loop completion handling.
227
+ *
228
+ * Contract:
229
+ * - Returns `false` for FULL_TASK iterations (`currentRepoId === null` or
230
+ * `repoStepNumbers === null` or empty). Those rely on the existing
231
+ * `remainingSteps.length === 0` exit, not this check.
232
+ * - Returns `true` iff EVERY step in `repoStepNumbers` is
233
+ * `isSegmentComplete(statusContent, stepNum, currentRepoId)`.
234
+ *
235
+ * Pure function: no filesystem access, no global state. The caller reads
236
+ * the STATUS.md content once per iteration and passes it in.
237
+ *
238
+ * @since TP-196
239
+ */
240
+ export function shouldSkipSpawnForCompleteSegment(
241
+ statusContent: string,
242
+ repoStepNumbers: Set<number> | null,
243
+ currentRepoId: string | null,
244
+ ): boolean {
245
+ if (!repoStepNumbers || !currentRepoId || repoStepNumbers.size === 0) return false;
246
+ return [...repoStepNumbers].every((stepNum) =>
247
+ isSegmentComplete(statusContent, stepNum, currentRepoId),
248
+ );
249
+ }
250
+
251
+ // ── Types ────────────────────────────────────────────────────────────
252
+
253
+ /**
254
+ * Configuration for a lane-runner execution.
255
+ *
256
+ * @since TP-105
257
+ */
258
+ export interface LaneRunnerConfig {
259
+ /** Batch ID */
260
+ batchId: string;
261
+ /** Operator prefix for agent IDs (e.g., "orch-henrylach") */
262
+ agentIdPrefix: string;
263
+ /** Lane number (1-indexed) */
264
+ laneNumber: number;
265
+ /** Absolute path to the lane worktree */
266
+ worktreePath: string;
267
+ /** Git branch checked out in the worktree */
268
+ branch: string;
269
+ /** Repo ID */
270
+ repoId: string;
271
+ /** State root for runtime artifacts (workspace root or repo root) */
272
+ stateRoot: string;
273
+ /** Worker model (empty string = inherit from session) */
274
+ workerModel: string;
275
+ /** Worker tools */
276
+ workerTools: string;
277
+ /** Worker thinking mode */
278
+ workerThinking: string;
279
+ /** Worker system prompt (full-task mode) */
280
+ workerSystemPrompt: string;
281
+ /** Worker system prompt for segment-scoped mode (appended to base) */
282
+ workerSegmentPrompt: string;
283
+ /**
284
+ * Reviewer model (empty string = inherit session default).
285
+ * Set from TASKPLANE_REVIEWER_MODEL env var, sourced from runnerConfig.reviewer.model.
286
+ * @since TP-160
287
+ */
288
+ reviewerModel: string;
289
+ /**
290
+ * Reviewer thinking mode (empty string = inherit).
291
+ * @since TP-160
292
+ */
293
+ reviewerThinking: string;
294
+ /**
295
+ * Reviewer tool allowlist (comma-separated).
296
+ * @since TP-160
297
+ */
298
+ reviewerTools: string;
299
+ /** Supervisor autonomy level for bridge-tool guards. */
300
+ supervisorAutonomy?: "interactive" | "supervised" | "autonomous";
301
+ /** Project name (for review request context) */
302
+ projectName?: string;
303
+ /** Package specifiers to exclude from worker extension forwarding (exact match). @since TP-180 */
304
+ workerExcludeExtensions?: string[];
305
+ /** Package specifiers to exclude from reviewer extension forwarding (exact match). @since TP-180 */
306
+ reviewerExcludeExtensions?: string[];
307
+ /** Max worker iterations before giving up */
308
+ maxIterations: number;
309
+ /** No-progress stall limit */
310
+ noProgressLimit: number;
311
+ /** Max worker time in minutes per iteration */
312
+ maxWorkerMinutes: number;
313
+ /** Context pressure warn threshold (0-100) */
314
+ warnPercent: number;
315
+ /** Context pressure kill threshold (0-100) */
316
+ killPercent: number;
317
+ /** Optional callback for surfacing runtime mailbox replies/escalations to supervisor */
318
+ onSupervisorAlert?: SupervisorAlertCallback;
319
+ /**
320
+ * Optional callback fired when the lane reaches a terminal state (no-progress
321
+ * kill or hard-fail). The supervisor process uses this to suppress any
322
+ * subsequent zombie alerts queued for the now-dead lane.
323
+ *
324
+ * @since TP-187 (#538)
325
+ */
326
+ onLaneTerminated?: (info: import("./types.ts").LaneTerminatedInfo) => void;
327
+ }
328
+
329
+ /**
330
+ * Result of executing one task through the lane-runner.
331
+ *
332
+ * @since TP-105
333
+ */
334
+ export interface LaneRunnerTaskResult {
335
+ /** Standard lane task outcome compatible with the engine */
336
+ outcome: LaneTaskOutcome;
337
+ /** Total worker iterations consumed */
338
+ iterations: number;
339
+ /** Cumulative worker cost in USD */
340
+ costUsd: number;
341
+ /** Total tokens used */
342
+ totalTokens: number;
343
+ }
344
+
345
+ // ── Core Execution ───────────────────────────────────────────────────
346
+
347
+ /**
348
+ * Execute a single task in a lane using the Runtime V2 headless backend.
349
+ *
350
+ * This is the core function that replaces the legacy TMUX-backed
351
+ * `executeLane()` → `spawnLaneSession()` → `task-runner TASK_AUTOSTART`
352
+ * path with direct child-process hosting.
353
+ *
354
+ * Execution loop:
355
+ * 1. Parse task and ensure STATUS.md exists
356
+ * 2. For each iteration:
357
+ * a. Determine remaining steps
358
+ * b. Spawn worker agent via agent-host
359
+ * c. Wait for worker to exit
360
+ * d. Check progress (checkboxes)
361
+ * e. If all steps complete → success
362
+ * f. If no progress → increment stall counter
363
+ * g. If stall limit or iteration limit hit → fail
364
+ * 3. If all steps complete, check for .DONE
365
+ * 4. Return LaneTaskOutcome
366
+ *
367
+ * @since TP-105
368
+ */
369
+ export async function executeTaskV2(
370
+ unit: ExecutionUnit,
371
+ config: LaneRunnerConfig,
372
+ pauseSignal: { paused: boolean },
373
+ ): Promise<LaneRunnerTaskResult> {
374
+ const startTime = Date.now();
375
+ const statusPath = unit.packet.statusPath;
376
+ const donePath = unit.packet.donePath;
377
+ const promptPath = unit.packet.promptPath;
378
+ const taskFolder = unit.packet.taskFolder;
379
+ const reviewerStatePath = join(taskFolder, ".reviewer-state.json");
380
+ const taskId = unit.taskId;
381
+ const segmentId = unit.segmentId;
382
+ const workerAgentId = buildRuntimeAgentId(config.agentIdPrefix, config.laneNumber, "worker");
383
+
384
+ // ── 1. Ensure STATUS.md exists ──────────────────────────────────
385
+ if (!existsSync(statusPath)) {
386
+ const content = readFileSync(promptPath, "utf-8");
387
+ const parsed = parsePromptMd(content, promptPath);
388
+ writeFileSync(statusPath, generateStatusMd(parsed));
389
+ }
390
+
391
+ updateStatusField(statusPath, "Status", "🟡 In Progress");
392
+ updateStatusField(statusPath, "Last Updated", new Date().toISOString().slice(0, 10));
393
+ logExecution(statusPath, "Task started", "Runtime V2 lane-runner execution");
394
+
395
+ // Pre-segment guard: remove any stale .DONE from a prior segment or prior run.
396
+ // This closes the race window where the monitor sees .DONE before lane-runner
397
+ // can suppress it at segment end. For non-final segments, .DONE must not exist
398
+ // at any point during execution.
399
+ const isNonFinalAtStart =
400
+ segmentId != null &&
401
+ Array.isArray(unit.task.segmentIds) &&
402
+ unit.task.segmentIds.length > 1 &&
403
+ unit.task.segmentIds[unit.task.segmentIds.length - 1] !== segmentId;
404
+ if (isNonFinalAtStart && existsSync(donePath)) {
405
+ try {
406
+ unlinkSync(donePath);
407
+ } catch {
408
+ /* best effort */
409
+ }
410
+ logExecution(
411
+ statusPath,
412
+ "Segment start",
413
+ `Removed stale .DONE before non-final segment ${segmentId}`,
414
+ );
415
+ }
416
+
417
+ // ── 2. Iteration loop ───────────────────────────────────────────
418
+ let noProgressCount = 0;
419
+ let totalIterations = 0;
420
+ let cumulativeCostUsd = 0;
421
+ let cumulativeTokens = 0;
422
+ // TP-115: carry latest worker telemetry across iterations and into post-loop terminal snapshots
423
+ let lastTelemetry: Partial<AgentHostResult> = {};
424
+
425
+ // TP-174: Build segment context once for emitSnapshot calls.
426
+ // Available outside the loop so it can be passed to makeResult too.
427
+ const snapshotSegmentCtx: { stepSegmentMap: StepSegmentMapping[]; repoId: string } | null =
428
+ segmentId && unit.task.stepSegmentMap && config.repoId
429
+ ? (() => {
430
+ const repoSteps = getStepsForRepoId(unit.task.stepSegmentMap!, config.repoId);
431
+ return repoSteps.size > 0
432
+ ? { stepSegmentMap: unit.task.stepSegmentMap!, repoId: config.repoId }
433
+ : null;
434
+ })()
435
+ : null;
436
+
437
+ for (let iter = 0; iter < config.maxIterations; iter++) {
438
+ if (pauseSignal.paused) {
439
+ logExecution(statusPath, "Paused", `User paused at iteration ${totalIterations}`);
440
+ return makeResult(
441
+ taskId,
442
+ segmentId,
443
+ workerAgentId,
444
+ "skipped",
445
+ startTime,
446
+ "Paused by user",
447
+ false,
448
+ totalIterations,
449
+ cumulativeCostUsd,
450
+ cumulativeTokens,
451
+ config,
452
+ statusPath,
453
+ reviewerStatePath,
454
+ undefined,
455
+ snapshotSegmentCtx,
456
+ );
457
+ }
458
+
459
+ // Determine remaining steps
460
+ const currentStatus = parseStatusMd(readFileSync(statusPath, "utf-8"));
461
+ const parsed = parsePromptMd(readFileSync(promptPath, "utf-8"), promptPath);
462
+
463
+ // TP-174: Resolve segment-scoped step filtering.
464
+ // Use config.repoId (structured identity) instead of parsing opaque segmentId.
465
+ const stepSegmentMap = unit.task.stepSegmentMap;
466
+ const currentRepoId = segmentId ? config.repoId : null;
467
+ const rawRepoStepNumbers =
468
+ stepSegmentMap && currentRepoId ? getStepsForRepoId(stepSegmentMap, currentRepoId) : null;
469
+ // TP-174 legacy fallback: If no steps have segments for this repoId
470
+ // (multi-segment task without explicit markers, where all checkboxes
471
+ // are assigned to the fallback/packet repo), disable segment filtering.
472
+ const repoStepNumbers =
473
+ rawRepoStepNumbers && rawRepoStepNumbers.size > 0 ? rawRepoStepNumbers : null;
474
+
475
+ // TP-174: Read STATUS.md content once for segment-scoped checks
476
+ const iterStatusContent = readFileSync(statusPath, "utf-8");
477
+
478
+ const remainingSteps = parsed.steps.filter((step) => {
479
+ // TP-174: When segment-scoped, only show steps that have work for this repoId
480
+ if (repoStepNumbers && !repoStepNumbers.has(step.number)) return false;
481
+ // TP-174: Use segment-scoped completion check in segment mode
482
+ if (repoStepNumbers && currentRepoId) {
483
+ return !isSegmentComplete(iterStatusContent, step.number, currentRepoId);
484
+ }
485
+ const ss = currentStatus.steps.find((s) => s.number === step.number);
486
+ return !isStepComplete(ss);
487
+ });
488
+
489
+ if (remainingSteps.length === 0) break; // All done
490
+
491
+ // TP-196 / #508: Pre-spawn segment-completion check.
492
+ //
493
+ // When the lane is iterating a segment-scoped task, verify that NOT ALL
494
+ // `repoStepNumbers` are segment-complete before incurring the cost of
495
+ // spawning a worker. The `remainingSteps` filter above already enforces
496
+ // this implicitly (via `isSegmentComplete`), but expressing the check
497
+ // explicitly at the spawn boundary:
498
+ // 1. Makes the wasted-iteration prevention contract visible.
499
+ // 2. Provides a defensive backstop for cases where `parsed.steps` and
500
+ // `repoStepNumbers` diverge (e.g., legacy/partial-marker tasks).
501
+ // 3. Gives behavioural tests a clean assertion target (via the pure
502
+ // helper `shouldSkipSpawnForCompleteSegment`).
503
+ if (shouldSkipSpawnForCompleteSegment(iterStatusContent, repoStepNumbers, currentRepoId)) {
504
+ logExecution(
505
+ statusPath,
506
+ "Pre-spawn segment-completion check",
507
+ `all segment checkboxes already complete for repo '${currentRepoId}' — skipping worker spawn (#508)`,
508
+ );
509
+ break;
510
+ }
511
+
512
+ totalIterations++;
513
+ updateStatusField(
514
+ statusPath,
515
+ "Current Step",
516
+ `Step ${remainingSteps[0].number}: ${remainingSteps[0].name}`,
517
+ );
518
+ updateStatusField(statusPath, "Iteration", `${totalIterations}`);
519
+
520
+ // Mark first incomplete step as in-progress
521
+ const firstStep = remainingSteps[0];
522
+ const firstStepStatus = currentStatus.steps.find((s) => s.number === firstStep.number);
523
+ if (firstStepStatus?.status !== "in-progress") {
524
+ updateStepStatus(statusPath, firstStep.number, "in-progress");
525
+ logExecution(statusPath, `Step ${firstStep.number} started`, firstStep.name);
526
+ }
527
+
528
+ // Count checkboxes before worker runs
529
+ // TP-174: When segment-scoped, count only this segment's checkboxes
530
+ let prevTotalChecked: number;
531
+ if (repoStepNumbers && currentRepoId) {
532
+ const preStatusContent = readFileSync(statusPath, "utf-8");
533
+ const segCbs = getSegmentCheckboxes(preStatusContent, firstStep.number, currentRepoId);
534
+ prevTotalChecked = segCbs ? segCbs.checked : 0;
535
+ } else {
536
+ prevTotalChecked = currentStatus.steps.reduce((sum, s) => sum + s.totalChecked, 0);
537
+ }
538
+
539
+ // ── Build worker prompt ─────────────────────────────────────
540
+ const wrapUpFile = join(taskFolder, ".task-wrap-up");
541
+ if (existsSync(wrapUpFile))
542
+ try {
543
+ unlinkSync(wrapUpFile);
544
+ } catch {
545
+ /* ignore */
546
+ }
547
+
548
+ // TP-174/TP-501/TP-196: Compute segment scope mode BEFORE building prompt.
549
+ // `segmentScopeMode` is the authoritative TP-196 flag; `isSegmentScoped` is
550
+ // preserved as a boolean alias for ergonomics at the many existing call sites.
551
+ const segmentScopeMode: SegmentScopeMode = computeSegmentScopeMode(
552
+ stepSegmentMap,
553
+ repoStepNumbers,
554
+ currentRepoId,
555
+ remainingSteps.length > 0 ? remainingSteps[0].number : null,
556
+ );
557
+ const isSegmentScoped = segmentScopeMode === "SEGMENT_SCOPED";
558
+
559
+ const promptLines = [
560
+ `Read your task instructions at: ${promptPath}`,
561
+ `Read your execution state at: ${statusPath}`,
562
+ ``,
563
+ `Task: ${taskId}`,
564
+ `Task folder: ${taskFolder}/`,
565
+ `Iteration: ${totalIterations}`,
566
+ `Wrap-up signal file: ${wrapUpFile}`,
567
+ ``,
568
+ `Execution repo context:`,
569
+ `- Execution repo ID: ${unit.executionRepoId}`,
570
+ `- Execution worktree (worker cwd): ${unit.worktreePath}`,
571
+ `- Lane repo ID: ${config.repoId}`,
572
+ // Only show segment ID when segment-scoped. For FULL_TASK, omit to avoid
573
+ // workers incorrectly self-scoping based on segment metadata.
574
+ ...(isSegmentScoped ? [`- Active segment ID: ${segmentId}`] : []),
575
+ ``,
576
+ `Packet home context:`,
577
+ `- Packet home repo ID: ${unit.packetHomeRepoId}`,
578
+ `- Packet task folder: ${taskFolder}`,
579
+ `- Packet PROMPT path: ${promptPath}`,
580
+ `- Packet STATUS path: ${statusPath}`,
581
+ `- Packet .DONE path: ${donePath}`,
582
+ `- Packet .reviews path: ${unit.packet.reviewsDir}`,
583
+ ``,
584
+ `⚠️ ORCHESTRATED RUN: Do NOT archive or move the task folder. The orchestrator handles post-merge archival.`,
585
+ ``,
586
+ `⚠️ CHECKPOINT RULE: After completing EACH checkbox item, immediately edit STATUS.md to check it off (- [ ] → - [x]) BEFORE starting the next item. Do NOT batch checkbox updates at the end of a step.`,
587
+ ];
588
+
589
+ // Only show segment DAG in segment-scoped mode
590
+ const segmentDag = isSegmentScoped ? unit.task.explicitSegmentDag : null;
591
+ if (segmentDag && segmentDag.repoIds.length > 0) {
592
+ const edgeSummary =
593
+ segmentDag.edges.length > 0
594
+ ? segmentDag.edges.map((edge) => `${edge.fromRepoId}->${edge.toRepoId}`).join(", ")
595
+ : "(no explicit edges)";
596
+ promptLines.push(
597
+ ``,
598
+ `Segment DAG context (from PROMPT metadata):`,
599
+ `- Repos: ${segmentDag.repoIds.join(", ")}`,
600
+ `- Edges: ${edgeSummary}`,
601
+ );
602
+ }
603
+
604
+ // Segment scope mode is determined by which system prompt was loaded.
605
+ // No SegmentScopeMode line needed — the prompt IS the mode.
606
+
607
+ // TP-174/TP-196: Segment-scoped prompt — show only this segment's checkboxes.
608
+ // Gated on the authoritative `isSegmentScoped` (derived from `segmentScopeMode`)
609
+ // rather than the raw composite condition, so the prompt branch can't drift
610
+ // from the mode decision (TP-196 / #502).
611
+ if (isSegmentScoped) {
612
+ const currentStepNum = remainingSteps[0].number;
613
+ // Defensive guards: when `isSegmentScoped === true`, `computeSegmentScopeMode`
614
+ // has already verified `stepSegmentMap`, `currentRepoId`, and that the
615
+ // current step's mapping contains an entry for the active repo. We re-fetch
616
+ // the structures here for clarity. If any are missing we log and skip the
617
+ // segment block (defense-in-depth — should never trip in practice).
618
+ const currentStepMapping = stepSegmentMap?.find((s) => s.stepNumber === currentStepNum);
619
+ const mySegment = currentStepMapping?.segments.find((seg) => seg.repoId === currentRepoId);
620
+
621
+ if (!currentStepMapping || !mySegment) {
622
+ logExecution(
623
+ statusPath,
624
+ "WARN",
625
+ `segmentScopeMode === SEGMENT_SCOPED but current step mapping missing — skipping segment prompt block (currentRepoId=${currentRepoId}, stepNum=${currentStepNum})`,
626
+ );
627
+ } else {
628
+ const otherSegments = currentStepMapping.segments.filter((seg) => seg.repoId !== currentRepoId);
629
+
630
+ // Count total segments for this repo across all steps
631
+ const totalStepsForRepo = repoStepNumbers ? repoStepNumbers.size : 0;
632
+ const segmentIndexInStep =
633
+ currentStepMapping.segments.findIndex((seg) => seg.repoId === currentRepoId) + 1;
634
+ const totalSegmentsInStep = currentStepMapping.segments.length;
635
+
636
+ promptLines.push(
637
+ ``,
638
+ `Segment-scoped context (Phase A):`,
639
+ `Active segment: ${segmentId} (Step ${currentStepNum}, segment ${segmentIndexInStep} of ${totalSegmentsInStep})`,
640
+ `Your repo: ${currentRepoId}`,
641
+ ``,
642
+ );
643
+
644
+ if (mySegment && mySegment.checkboxes.length > 0) {
645
+ promptLines.push(`Your checkboxes for this step:`);
646
+ for (const cb of mySegment.checkboxes) {
647
+ promptLines.push(` ${cb}`);
648
+ }
649
+ }
650
+
651
+ if (otherSegments.length > 0) {
652
+ promptLines.push(``);
653
+ promptLines.push(`Other segments in this step (NOT yours — do not attempt):`);
654
+ for (const seg of otherSegments) {
655
+ promptLines.push(
656
+ ` - ${seg.repoId}: ${seg.checkboxes.length} checkbox(es) (will run in a separate segment)`,
657
+ );
658
+ }
659
+ }
660
+
661
+ // List completed steps for this repo
662
+ const completedForRepo = parsed.steps.filter((step) => {
663
+ if (!repoStepNumbers || !repoStepNumbers.has(step.number)) return false;
664
+ const ss = currentStatus.steps.find((s) => s.number === step.number);
665
+ return isStepComplete(ss);
666
+ });
667
+ if (completedForRepo.length > 0) {
668
+ promptLines.push(``);
669
+ promptLines.push(
670
+ `Prior steps completed: ${completedForRepo.map((s) => `Step ${s.number} (${s.name})`).join(", ")}`,
671
+ );
672
+ }
673
+
674
+ promptLines.push(
675
+ ``,
676
+ `When all YOUR checkboxes are checked, your segment is done — exit successfully.`,
677
+ `Do NOT attempt work in other repos.`,
678
+ );
679
+ }
680
+ }
681
+
682
+ if (totalIterations > 1 && remainingSteps.length > 0) {
683
+ const remainingSet = new Set(remainingSteps.map((s) => s.number));
684
+ const completedSteps = parsed.steps.filter((s) => !remainingSet.has(s.number));
685
+ promptLines.push(
686
+ ``,
687
+ `IMPORTANT: You exited previously without completing all steps.`,
688
+ `Completed (do not redo): ${completedSteps.map((s) => `Step ${s.number}: ${s.name}`).join(", ") || "(none)"}`,
689
+ `Remaining (focus here): ${remainingSteps.map((s) => `Step ${s.number}: ${s.name}`).join(", ")}`,
690
+ );
691
+
692
+ // If the worker exited without checking any boxes, add a corrective directive
693
+ if (noProgressCount > 0) {
694
+ promptLines.push(
695
+ ``,
696
+ `🚨 CRITICAL: You have exited ${noProgressCount} time(s) without completing work.`,
697
+ `Your previous exit was premature. You said something like "Now let me fix this"`,
698
+ `and then STOPPED instead of actually making the edit.`,
699
+ ``,
700
+ `DO NOT DO THIS AGAIN. When you know what to edit, call the edit tool IMMEDIATELY.`,
701
+ `Do not produce a text message describing what you plan to do. Just do it.`,
702
+ `Work continuously through ALL remaining checkboxes until the task is DONE.`,
703
+ `Do not exit between checkboxes or steps.`,
704
+ );
705
+ }
706
+ }
707
+
708
+ // ── Spawn worker ────────────────────────────────────────────
709
+ const eventsPath = runtimeAgentEventsPath(config.stateRoot, config.batchId, workerAgentId);
710
+
711
+ const mailboxDir = join(config.stateRoot, ".pi", "mailbox", config.batchId, workerAgentId);
712
+ mkdirSync(join(mailboxDir, "inbox"), { recursive: true });
713
+
714
+ const steeringPendingPath = join(taskFolder, ".steering-pending");
715
+
716
+ // TP-106: Bridge extension wiring for agent-side reply/escalate tools
717
+ const outboxDir = join(
718
+ config.stateRoot,
719
+ ".pi",
720
+ "mailbox",
721
+ config.batchId,
722
+ workerAgentId,
723
+ "outbox",
724
+ );
725
+ const bridgeExtensionPath = join(LANE_RUNNER_DIR, "agent-bridge-extension.ts");
726
+
727
+ // TP-180: Forward user-installed extensions to worker agent
728
+ const allPackages = loadPiSettingsPackages(config.stateRoot);
729
+ const workerPackages = filterExcludedExtensions(
730
+ allPackages,
731
+ config.workerExcludeExtensions ?? [],
732
+ );
733
+
734
+ const hostOpts: AgentHostOptions = {
735
+ agentId: workerAgentId,
736
+ role: "worker",
737
+ batchId: config.batchId,
738
+ laneNumber: config.laneNumber,
739
+ taskId,
740
+ repoId: config.repoId,
741
+ cwd: unit.worktreePath,
742
+ prompt: promptLines.join("\n"),
743
+ systemPrompt:
744
+ (isSegmentScoped && config.workerSegmentPrompt
745
+ ? config.workerSystemPrompt + "\n\n---\n\n" + config.workerSegmentPrompt
746
+ : config.workerSystemPrompt) || undefined,
747
+ model: config.workerModel || undefined,
748
+ // TP-184: buildWorkerToolsAllowlist always appends ENGINE_BRIDGE_TOOLS
749
+ // (review_step, notify_supervisor, request_segment_expansion) so that
750
+ // engine-internal coordination tools are present regardless of what the
751
+ // user configured for taskRunner.worker.tools. See issue #530.
752
+ tools: buildWorkerToolsAllowlist(config.workerTools),
753
+ thinking: config.workerThinking || undefined,
754
+ mailboxDir,
755
+ steeringPendingPath,
756
+ eventsPath,
757
+ exitSummaryPath: eventsPath.replace(/\.jsonl$/, "-exit.json"),
758
+ timeoutMs: config.maxWorkerMinutes * 60_000,
759
+ stateRoot: config.stateRoot,
760
+ packet: unit.packet,
761
+ extensions: [bridgeExtensionPath, ...workerPackages],
762
+ env: {
763
+ TASKPLANE_OUTBOX_DIR: outboxDir,
764
+ TASKPLANE_AGENT_ID: workerAgentId,
765
+ TASKPLANE_TASK_FOLDER: taskFolder,
766
+ TASKPLANE_STATUS_PATH: statusPath,
767
+ TASKPLANE_PROMPT_PATH: promptPath,
768
+ TASKPLANE_REVIEWS_DIR: unit.packet.reviewsDir,
769
+ TASKPLANE_REVIEWER_STATE_PATH: reviewerStatePath,
770
+ TASKPLANE_PROJECT_NAME: config.projectName || "project",
771
+ TASKPLANE_TASK_ID: taskId,
772
+ // Hard-set segment env vars based on mode. In FULL_TASK mode,
773
+ // explicitly clear them to prevent env inheritance leaking segment cues.
774
+ TASKPLANE_ACTIVE_SEGMENT_ID: isSegmentScoped ? (segmentId ?? "") : "",
775
+ TASKPLANE_SEGMENT_ID: isSegmentScoped ? (segmentId ?? "") : "",
776
+ TASKPLANE_SUPERVISOR_AUTONOMY: config.supervisorAutonomy || "autonomous",
777
+ ORCH_BATCH_ID: config.batchId,
778
+ ...(config.reviewerModel ? { TASKPLANE_REVIEWER_MODEL: config.reviewerModel } : {}),
779
+ ...(config.reviewerThinking ? { TASKPLANE_REVIEWER_THINKING: config.reviewerThinking } : {}),
780
+ ...(config.reviewerTools ? { TASKPLANE_REVIEWER_TOOLS: config.reviewerTools } : {}),
781
+ // TP-180: Pass state root and reviewer exclusions for extension forwarding
782
+ TASKPLANE_STATE_ROOT: config.stateRoot,
783
+ ...(config.reviewerExcludeExtensions && config.reviewerExcludeExtensions.length > 0
784
+ ? { TASKPLANE_REVIEWER_EXCLUDE_EXTENSIONS: JSON.stringify(config.reviewerExcludeExtensions) }
785
+ : {}),
786
+ },
787
+ // TP-172: Exit interception callback — escalate to supervisor when worker
788
+ // exits without making visible progress (no checkboxes, no blocker logged).
789
+ onPrematureExit: config.onSupervisorAlert
790
+ ? async (assistantMessage: string): Promise<string | null> => {
791
+ // Check if the worker made visible progress during this turn:
792
+ // 1. Checkbox progress (more items checked)
793
+ // 2. Blocker logged (non-empty Blockers section)
794
+ try {
795
+ const statusContent = readFileSync(statusPath, "utf-8");
796
+ // TP-174: Use same scope as prevTotalChecked (segment or global)
797
+ let midTotalChecked: number;
798
+ if (repoStepNumbers && currentRepoId) {
799
+ const segCbs = getSegmentCheckboxes(statusContent, firstStep.number, currentRepoId);
800
+ midTotalChecked = segCbs ? segCbs.checked : 0;
801
+ } else {
802
+ const midStatus = parseStatusMd(statusContent);
803
+ midTotalChecked = midStatus.steps.reduce((sum, s) => sum + s.totalChecked, 0);
804
+ }
805
+ if (midTotalChecked > prevTotalChecked) {
806
+ // Worker checked off checkboxes — let it exit normally
807
+ return null;
808
+ }
809
+ // Check for blocker entries: extract Blockers section and see if non-empty
810
+ const blockerMatch = statusContent.match(/## Blockers\s*\n([\s\S]*?)(?:\n---|-$)/i);
811
+ if (blockerMatch) {
812
+ const blockerContent = blockerMatch[1].trim();
813
+ // If blockers section has real content (not just "*None*" or empty)
814
+ if (blockerContent && blockerContent !== "*None*") {
815
+ // Worker logged a blocker — let it exit normally
816
+ return null;
817
+ }
818
+ }
819
+ } catch {
820
+ /* If we can't read STATUS.md, proceed with escalation */
821
+ }
822
+
823
+ // No visible progress — compose escalation message.
824
+ // TP-187 (#540): when the worker exits silently, fall back to the most
825
+ // recent `assistant_message` event in events.jsonl so the supervisor
826
+ // has SOMETHING to act on instead of `Worker said: ""`.
827
+ let workerSaid = (assistantMessage ?? "").trim();
828
+ let workerSaidSource: "current-turn" | "events-jsonl-fallback" | "empty-sentinel" =
829
+ "current-turn";
830
+ if (!workerSaid) {
831
+ workerSaidSource = "empty-sentinel";
832
+ try {
833
+ const raw = readFileSync(eventsPath, "utf-8");
834
+ const lines = raw.split("\n");
835
+ // Walk backward to find the most recent assistant_message with non-empty text.
836
+ for (let i = lines.length - 1; i >= 0; i--) {
837
+ const line = lines[i].trim();
838
+ if (!line) continue;
839
+ try {
840
+ const evt = JSON.parse(line) as Record<string, unknown>;
841
+ if (evt.type === "assistant_message") {
842
+ const payload = evt.payload as Record<string, unknown> | undefined;
843
+ const text = typeof payload?.text === "string" ? payload.text.trim() : "";
844
+ if (text) {
845
+ workerSaid = text;
846
+ workerSaidSource = "events-jsonl-fallback";
847
+ break;
848
+ }
849
+ }
850
+ } catch {
851
+ /* skip malformed line */
852
+ }
853
+ }
854
+ } catch {
855
+ /* events.jsonl unreadable; sentinel will be used */
856
+ }
857
+ }
858
+ if (!workerSaid) {
859
+ workerSaid =
860
+ "(no assistant message captured — worker exited without producing visible output)";
861
+ workerSaidSource = "empty-sentinel";
862
+ }
863
+ const truncatedMsg = workerSaid.slice(0, 500);
864
+ const uncheckedItems: string[] = [];
865
+ try {
866
+ const statusContent = readFileSync(statusPath, "utf-8");
867
+ // TP-174: When segment-scoped, report only this segment's unchecked items
868
+ if (repoStepNumbers && currentRepoId) {
869
+ const segCbs = getSegmentCheckboxes(statusContent, firstStep.number, currentRepoId);
870
+ if (segCbs) {
871
+ for (const text of segCbs.uncheckedTexts.slice(0, 5)) {
872
+ uncheckedItems.push(text);
873
+ }
874
+ }
875
+ } else {
876
+ const uncheckedMatches = statusContent.match(/^- \[ \] .+$/gm);
877
+ if (uncheckedMatches) {
878
+ for (const item of uncheckedMatches.slice(0, 5)) {
879
+ uncheckedItems.push(item.replace(/^- \[ \] /, "").trim());
880
+ }
881
+ }
882
+ }
883
+ } catch {
884
+ /* best effort */
885
+ }
886
+
887
+ const currentStepInfo =
888
+ remainingSteps.length > 0
889
+ ? `Step ${remainingSteps[0].number}: ${remainingSteps[0].name}`
890
+ : "Unknown";
891
+
892
+ // Fire supervisor alert
893
+ try {
894
+ config.onSupervisorAlert!({
895
+ category: "worker-exit-intercept",
896
+ summary:
897
+ `🔄 Worker on lane ${config.laneNumber} wants to exit with no progress.\n` +
898
+ ` Task: ${taskId}\n` +
899
+ ` Current step: ${currentStepInfo}\n` +
900
+ ` Iteration: ${totalIterations}, No-progress count: ${noProgressCount + 1}\n` +
901
+ ` Unchecked items: ${uncheckedItems.length > 0 ? uncheckedItems.join("; ") : "(none found)"}\n` +
902
+ ` Worker said: "${truncatedMsg}"` +
903
+ (workerSaidSource === "events-jsonl-fallback"
904
+ ? ` (fallback: most-recent assistant_message from events.jsonl)\n`
905
+ : workerSaidSource === "empty-sentinel"
906
+ ? ` (no assistant message captured this iteration)\n`
907
+ : "\n") +
908
+ `\nSend a steering message to ${workerAgentId} with targeted instructions,` +
909
+ ` or reply "skip" / "let it fail" to close the session.`,
910
+ context: {
911
+ taskId,
912
+ laneId: `lane-${config.laneNumber}`,
913
+ laneNumber: config.laneNumber,
914
+ agentId: workerAgentId,
915
+ exitReason: `worker_exit_no_progress: ${truncatedMsg.slice(0, 200)}`,
916
+ },
917
+ });
918
+ } catch {
919
+ /* best effort — don't block on alert failure */
920
+ }
921
+
922
+ // Poll worker mailbox inbox for supervisor reply (60s timeout)
923
+ const SUPERVISOR_REPLY_TIMEOUT_MS = 60_000;
924
+ const POLL_INTERVAL_MS = 2_000;
925
+ const escalationTimestamp = Date.now();
926
+ const inboxDir = sessionInboxDir(config.stateRoot, config.batchId, workerAgentId);
927
+
928
+ const supervisorReply = await new Promise<string | null>((resolve) => {
929
+ const deadline = Date.now() + SUPERVISOR_REPLY_TIMEOUT_MS;
930
+ const poll = () => {
931
+ if (Date.now() >= deadline) {
932
+ resolve(null); // Timeout — fall back to corrective re-spawn
933
+ return;
934
+ }
935
+ try {
936
+ const messages = readInbox(inboxDir, config.batchId);
937
+ // Only accept messages newer than escalation timestamp
938
+ for (const { filename, message } of messages) {
939
+ if (message.timestamp >= escalationTimestamp && message.from === "supervisor") {
940
+ // Consume the message
941
+ const ackDir = join(dirname(inboxDir), "ack");
942
+ try {
943
+ ackMessage(inboxDir, filename);
944
+ } catch {
945
+ /* best effort */
946
+ }
947
+ resolve(message.content);
948
+ return;
949
+ }
950
+ }
951
+ } catch {
952
+ /* inbox not ready yet */
953
+ }
954
+ setTimeout(poll, POLL_INTERVAL_MS);
955
+ };
956
+ poll();
957
+ });
958
+
959
+ if (!supervisorReply) {
960
+ // Timeout — let the session close, corrective re-spawn will handle it
961
+ logExecution(
962
+ statusPath,
963
+ "Exit intercept timeout",
964
+ `Supervisor did not respond within ${SUPERVISOR_REPLY_TIMEOUT_MS / 1000}s — closing session`,
965
+ );
966
+ return null;
967
+ }
968
+
969
+ // Interpret supervisor reply: close directives vs instructional content
970
+ const normalizedReply = supervisorReply.trim().toLowerCase();
971
+ const CLOSE_DIRECTIVES = ["skip", "let it fail", "close", "abort", "stop"];
972
+ // Only short messages (< 30 chars) can be close directives.
973
+ // Longer messages are always instructions even if they start with "stop".
974
+ const isShortEnoughForDirective = normalizedReply.length < 30;
975
+ if (
976
+ isShortEnoughForDirective &&
977
+ CLOSE_DIRECTIVES.some(
978
+ (d) =>
979
+ normalizedReply === d ||
980
+ normalizedReply.startsWith(d + ":") ||
981
+ normalizedReply.startsWith(d + " ") ||
982
+ normalizedReply.startsWith(d + ".") ||
983
+ normalizedReply.startsWith(d + " -"),
984
+ )
985
+ ) {
986
+ logExecution(
987
+ statusPath,
988
+ "Exit intercept close",
989
+ `Supervisor directed session close: "${supervisorReply.slice(0, 100)}"`,
990
+ );
991
+ return null;
992
+ }
993
+
994
+ // Instructional reply — return as new prompt for the worker
995
+ logExecution(
996
+ statusPath,
997
+ "Exit intercept reprompt",
998
+ `Supervisor provided instructions (${supervisorReply.length} chars) — reprompting worker`,
999
+ );
1000
+ return supervisorReply;
1001
+ }
1002
+ : undefined,
1003
+ };
1004
+
1005
+ // TP-184: Defense-in-depth sanity check. Under normal operation,
1006
+ // `buildWorkerToolsAllowlist()` guarantees ENGINE_BRIDGE_TOOLS are
1007
+ // present in the allowlist. Warn (do NOT throw or block spawn) if any
1008
+ // is missing — this catches future helper bugs or accidental bypasses.
1009
+ // See issue #530 for what silently breaks when bridge tools are missing.
1010
+ const toolsList = (hostOpts.tools ?? "")
1011
+ .split(",")
1012
+ .map((s) => s.trim())
1013
+ .filter(Boolean);
1014
+ for (const bridgeTool of ENGINE_BRIDGE_TOOLS) {
1015
+ if (!toolsList.includes(bridgeTool)) {
1016
+ logExecution(
1017
+ statusPath,
1018
+ "WARN",
1019
+ `workerTools allowlist missing engine bridge tool '${bridgeTool}'; review/coordination features will silently no-op`,
1020
+ );
1021
+ }
1022
+ }
1023
+
1024
+ // Context pressure: write wrap-up signal before kill
1025
+ let workerKillReason: "context" | "timer" | null = null;
1026
+ let iterationTelemetry: Partial<AgentHostResult> = {};
1027
+
1028
+ const spawned = spawnAgent(hostOpts, undefined, (telemetry) => {
1029
+ try {
1030
+ // Context pressure check
1031
+ if (telemetry.contextUsage) {
1032
+ const pct = telemetry.contextUsage.percent;
1033
+ if (pct >= config.warnPercent) {
1034
+ const msg = `Wrap up (context ${Math.round(pct)}%)`;
1035
+ if (!existsSync(wrapUpFile)) writeFileSync(wrapUpFile, msg);
1036
+ }
1037
+ if (pct >= config.killPercent) {
1038
+ workerKillReason = "context";
1039
+ spawned.kill();
1040
+ }
1041
+ }
1042
+
1043
+ iterationTelemetry = telemetry;
1044
+ lastTelemetry = telemetry;
1045
+ // Emit lane snapshot
1046
+ emitSnapshot(
1047
+ config,
1048
+ taskId,
1049
+ segmentId,
1050
+ "running",
1051
+ telemetry,
1052
+ statusPath,
1053
+ reviewerStatePath,
1054
+ snapshotSegmentCtx,
1055
+ );
1056
+ } catch {
1057
+ /* non-fatal: telemetry callback must never crash the engine */
1058
+ }
1059
+ });
1060
+
1061
+ // Reviewer telemetry is written by the worker bridge during review_step.
1062
+ // Poll snapshot refresh independently from worker message_end cadence so
1063
+ // the dashboard sees reviewer activity while tool calls are in-flight.
1064
+ let reviewerSnapshotFailures = 0;
1065
+ const reviewerRefreshFailureThreshold = 5;
1066
+ const reviewerRefresh = setInterval(() => {
1067
+ const ok = emitSnapshot(
1068
+ config,
1069
+ taskId,
1070
+ segmentId,
1071
+ "running",
1072
+ iterationTelemetry,
1073
+ statusPath,
1074
+ reviewerStatePath,
1075
+ snapshotSegmentCtx,
1076
+ );
1077
+ if (ok) {
1078
+ reviewerSnapshotFailures = 0;
1079
+ return;
1080
+ }
1081
+
1082
+ reviewerSnapshotFailures += 1;
1083
+ if (reviewerSnapshotFailures >= reviewerRefreshFailureThreshold) {
1084
+ clearInterval(reviewerRefresh);
1085
+ logExecution(
1086
+ statusPath,
1087
+ "Snapshot refresh disabled",
1088
+ `Lane ${config.laneNumber}, task ${taskId}: ${reviewerSnapshotFailures} consecutive emitSnapshot failures`,
1089
+ );
1090
+ }
1091
+ }, 1000);
1092
+
1093
+ let workerResult: AgentHostResult;
1094
+ try {
1095
+ workerResult = await spawned.promise;
1096
+ } finally {
1097
+ clearInterval(reviewerRefresh);
1098
+ }
1099
+
1100
+ // TP-115: Update lastTelemetry with definitive final values from AgentHostResult
1101
+ lastTelemetry = workerResult;
1102
+
1103
+ // Clean up wrap-up signal
1104
+ if (existsSync(wrapUpFile))
1105
+ try {
1106
+ unlinkSync(wrapUpFile);
1107
+ } catch {
1108
+ /* ignore */
1109
+ }
1110
+
1111
+ // Accumulate costs
1112
+ cumulativeCostUsd += workerResult.costUsd;
1113
+ cumulativeTokens +=
1114
+ workerResult.inputTokens +
1115
+ workerResult.outputTokens +
1116
+ workerResult.cacheReadTokens +
1117
+ workerResult.cacheWriteTokens;
1118
+
1119
+ // ── TP-106: Poll worker outbox for replies/escalations ─────
1120
+ try {
1121
+ const outboxMessages = readOutbox(config.stateRoot, config.batchId, workerAgentId);
1122
+ for (const msg of outboxMessages) {
1123
+ const sanitized = msg.content.replace(/\r?\n/g, " / ").slice(0, 200);
1124
+ logExecution(statusPath, `Agent ${msg.type}`, sanitized);
1125
+
1126
+ if (msg.type === "reply" || msg.type === "escalate") {
1127
+ appendAgentEvent(config.stateRoot, config.batchId, workerAgentId, {
1128
+ batchId: config.batchId,
1129
+ agentId: workerAgentId,
1130
+ role: "worker",
1131
+ laneNumber: config.laneNumber,
1132
+ taskId,
1133
+ repoId: config.repoId,
1134
+ ts: Date.now(),
1135
+ type: msg.type === "reply" ? "reply_sent" : "escalation_sent",
1136
+ payload: {
1137
+ messageId: msg.id,
1138
+ replyTo: msg.replyTo ?? null,
1139
+ content: sanitized,
1140
+ },
1141
+ });
1142
+
1143
+ appendMailboxAuditEvent(config.stateRoot, config.batchId, {
1144
+ type: msg.type === "reply" ? "message_replied" : "message_escalated",
1145
+ from: workerAgentId,
1146
+ to: "supervisor",
1147
+ messageId: msg.id,
1148
+ messageType: msg.type,
1149
+ contentPreview: sanitized,
1150
+ });
1151
+
1152
+ if (config.onSupervisorAlert) {
1153
+ const isEscalation = msg.type === "escalate";
1154
+ try {
1155
+ config.onSupervisorAlert({
1156
+ category: "agent-message",
1157
+ summary:
1158
+ `${isEscalation ? "🚨" : "📨"} Agent ${isEscalation ? "escalation" : "reply"} from ${workerAgentId}\n` +
1159
+ ` Task: ${taskId}\n` +
1160
+ ` Lane: lane-${config.laneNumber}\n` +
1161
+ ` Message: ${sanitized}`,
1162
+ context: {
1163
+ taskId,
1164
+ laneId: `lane-${config.laneNumber}`,
1165
+ laneNumber: config.laneNumber,
1166
+ agentId: workerAgentId,
1167
+ messageId: msg.id,
1168
+ exitReason: `${isEscalation ? "agent_escalation" : "agent_reply"}: ${sanitized}`,
1169
+ },
1170
+ });
1171
+ } catch {
1172
+ /* best effort */
1173
+ }
1174
+ }
1175
+ }
1176
+
1177
+ // Consume outbox message to prevent duplicate processing in later iterations.
1178
+ ackOutboxMessage(config.stateRoot, config.batchId, workerAgentId, msg.id);
1179
+ }
1180
+ } catch {
1181
+ /* best effort */
1182
+ }
1183
+
1184
+ // ── Steering annotation ─────────────────────────────────────
1185
+ try {
1186
+ if (existsSync(steeringPendingPath)) {
1187
+ const raw = readFileSync(steeringPendingPath, "utf-8");
1188
+ for (const line of raw.split("\n").filter((l) => l.trim())) {
1189
+ try {
1190
+ const entry = JSON.parse(line) as { ts: number; content: string; id: string };
1191
+ const sanitized = entry.content.replace(/\r?\n/g, " / ").replace(/\|/g, "\\|").slice(0, 200);
1192
+ const ts = new Date(entry.ts).toISOString().slice(0, 16).replace("T", " ");
1193
+ logExecution(statusPath, "⚠️ Steering", sanitized);
1194
+ } catch {
1195
+ /* skip malformed */
1196
+ }
1197
+ }
1198
+ unlinkSync(steeringPendingPath);
1199
+ }
1200
+ } catch {
1201
+ /* non-fatal */
1202
+ }
1203
+
1204
+ // Log iteration result
1205
+ const statusMsg = workerResult.killed
1206
+ ? `killed (${workerKillReason === "context" ? "context limit" : "wall-clock timeout"})`
1207
+ : workerResult.exitCode === 0
1208
+ ? "done"
1209
+ : `error (code ${workerResult.exitCode})`;
1210
+ logExecution(
1211
+ statusPath,
1212
+ `Worker iter ${totalIterations}`,
1213
+ `${statusMsg} in ${Math.round(workerResult.durationMs / 1000)}s, tools: ${workerResult.toolCalls}`,
1214
+ );
1215
+
1216
+ // ── Check progress ──────────────────────────────────────────
1217
+ const afterStatusContent = readFileSync(statusPath, "utf-8");
1218
+ const afterStatus = parseStatusMd(afterStatusContent);
1219
+ // TP-174: Segment-scoped progress delta
1220
+ let afterTotalChecked: number;
1221
+ if (repoStepNumbers && currentRepoId) {
1222
+ const segCbs = getSegmentCheckboxes(afterStatusContent, firstStep.number, currentRepoId);
1223
+ afterTotalChecked = segCbs ? segCbs.checked : 0;
1224
+ } else {
1225
+ afterTotalChecked = afterStatus.steps.reduce((sum, s) => sum + s.totalChecked, 0);
1226
+ }
1227
+ const progressDelta = afterTotalChecked - prevTotalChecked;
1228
+
1229
+ if (progressDelta <= 0) {
1230
+ // Check for soft progress: uncommitted changes in the worktree
1231
+ // indicate the worker is actively editing code even if no checkbox
1232
+ // was checked yet. This avoids false stall detection on complex
1233
+ // steps where analysis + editing spans multiple tool calls.
1234
+ let hasSoftProgress = false;
1235
+ try {
1236
+ const diffOutput = execSync("git diff --stat HEAD", {
1237
+ cwd: unit.worktreePath,
1238
+ timeout: 5000,
1239
+ encoding: "utf-8",
1240
+ stdio: ["pipe", "pipe", "pipe"],
1241
+ }).trim();
1242
+ // Only count source file changes as soft progress, not just STATUS.md
1243
+ const changedFiles = diffOutput.split("\n").filter((l) => l.includes("|"));
1244
+ const sourceChanges = changedFiles.filter(
1245
+ (l) => !l.includes("STATUS.md") && !l.includes(".steering"),
1246
+ );
1247
+ hasSoftProgress = sourceChanges.length > 0;
1248
+ } catch {
1249
+ /* git not available or timeout — treat as no soft progress */
1250
+ }
1251
+
1252
+ if (hasSoftProgress) {
1253
+ // Worker has uncommitted code changes — don't count toward stall.
1254
+ // Reset the counter since the worker is actively editing.
1255
+ logExecution(
1256
+ statusPath,
1257
+ "Soft progress",
1258
+ `Iteration ${totalIterations}: 0 new checkboxes but uncommitted source changes detected — not counting as stall`,
1259
+ );
1260
+ noProgressCount = 0;
1261
+ } else {
1262
+ noProgressCount++;
1263
+ logExecution(
1264
+ statusPath,
1265
+ "No progress",
1266
+ `Iteration ${totalIterations}: 0 new checkboxes (${noProgressCount}/${config.noProgressLimit} stall limit)`,
1267
+ );
1268
+ if (noProgressCount >= config.noProgressLimit) {
1269
+ logExecution(statusPath, "Task blocked", `No progress after ${noProgressCount} iterations`);
1270
+ // TP-187 (#538): synchronous outbox drain at lane-termination decision
1271
+ // point. Purges any pending escalations/replies/segment-expansions the
1272
+ // worker emitted just before termination so they are not later re-
1273
+ // discovered and re-forwarded as zombie supervisor alerts.
1274
+ try {
1275
+ const drained = drainAgentOutbox(config.stateRoot, config.batchId, workerAgentId);
1276
+ if (drained > 0) {
1277
+ logExecution(
1278
+ statusPath,
1279
+ "Outbox drained",
1280
+ `No-progress kill: drained ${drained} pending outbox entr${drained === 1 ? "y" : "ies"} for ${workerAgentId}`,
1281
+ );
1282
+ }
1283
+ } catch {
1284
+ /* best effort — do not block termination */
1285
+ }
1286
+ // TP-187 (#538): notify the supervisor process so it can suppress any
1287
+ // further alerts queued for this lane (zombie-alert filter).
1288
+ if (config.onLaneTerminated) {
1289
+ try {
1290
+ config.onLaneTerminated({
1291
+ laneNumber: config.laneNumber,
1292
+ agentId: workerAgentId,
1293
+ batchId: config.batchId,
1294
+ terminatedAt: Date.now(),
1295
+ reason: "no-progress-kill",
1296
+ });
1297
+ } catch {
1298
+ /* best effort */
1299
+ }
1300
+ }
1301
+ return makeResult(
1302
+ taskId,
1303
+ segmentId,
1304
+ workerAgentId,
1305
+ "failed",
1306
+ startTime,
1307
+ `No progress after ${noProgressCount} iterations`,
1308
+ false,
1309
+ totalIterations,
1310
+ cumulativeCostUsd,
1311
+ cumulativeTokens,
1312
+ config,
1313
+ statusPath,
1314
+ reviewerStatePath,
1315
+ lastTelemetry,
1316
+ snapshotSegmentCtx,
1317
+ );
1318
+ }
1319
+ }
1320
+ } else {
1321
+ noProgressCount = 0;
1322
+ }
1323
+
1324
+ // Mark completed steps
1325
+ // TP-174: When segment-scoped, mark step complete when the segment's
1326
+ // checkboxes are all checked (not the full step which may have other segments).
1327
+ if (repoStepNumbers && currentRepoId) {
1328
+ for (const stepNum of repoStepNumbers) {
1329
+ if (isSegmentComplete(afterStatusContent, stepNum, currentRepoId)) {
1330
+ // Only mark step complete in STATUS.md if ALL segments in that step
1331
+ // are complete (not just ours). But for loop exit, we only care about ours.
1332
+ const ss = afterStatus.steps.find((s) => s.number === stepNum);
1333
+ if (isStepComplete(ss)) {
1334
+ updateStepStatus(statusPath, stepNum, "complete");
1335
+ }
1336
+ }
1337
+ }
1338
+ } else {
1339
+ for (const step of parsed.steps) {
1340
+ const ss = afterStatus.steps.find((s) => s.number === step.number);
1341
+ if (isStepComplete(ss)) {
1342
+ updateStepStatus(statusPath, step.number, "complete");
1343
+ }
1344
+ }
1345
+ }
1346
+
1347
+ // Check if all steps are now complete
1348
+ // TP-174: When segment-scoped, exit when all steps for this repoId
1349
+ // have their segment checkboxes complete.
1350
+ let allComplete: boolean;
1351
+ if (repoStepNumbers && currentRepoId) {
1352
+ allComplete = [...repoStepNumbers].every((stepNum) =>
1353
+ isSegmentComplete(afterStatusContent, stepNum, currentRepoId),
1354
+ );
1355
+ } else {
1356
+ allComplete = parsed.steps.every((step) => {
1357
+ const ss = afterStatus.steps.find((s) => s.number === step.number);
1358
+ return isStepComplete(ss);
1359
+ });
1360
+ }
1361
+ if (allComplete) break;
1362
+ }
1363
+
1364
+ // ── 3. Post-loop completion check ───────────────────────────────
1365
+ const finalStatusContent = readFileSync(statusPath, "utf-8");
1366
+ const finalStatus = parseStatusMd(finalStatusContent);
1367
+ const parsed = parsePromptMd(readFileSync(promptPath, "utf-8"), promptPath);
1368
+
1369
+ // TP-174: Segment-scoped post-loop check. Re-derive repo scoping since
1370
+ // the iteration loop variables are out of scope here.
1371
+ const postLoopRepoId = segmentId ? config.repoId : null;
1372
+ const postLoopStepSegMap = unit.task.stepSegmentMap;
1373
+ const postLoopRepoSteps =
1374
+ postLoopStepSegMap && postLoopRepoId
1375
+ ? getStepsForRepoId(postLoopStepSegMap, postLoopRepoId)
1376
+ : null;
1377
+ const effectivePostLoopRepoSteps =
1378
+ postLoopRepoSteps && postLoopRepoSteps.size > 0 ? postLoopRepoSteps : null;
1379
+
1380
+ let allStepsComplete: boolean;
1381
+ if (effectivePostLoopRepoSteps && postLoopRepoId) {
1382
+ allStepsComplete = [...effectivePostLoopRepoSteps].every((stepNum) =>
1383
+ isSegmentComplete(finalStatusContent, stepNum, postLoopRepoId),
1384
+ );
1385
+ } else {
1386
+ allStepsComplete = parsed.steps.every((step) => {
1387
+ const ss = finalStatus.steps.find((s) => s.number === step.number);
1388
+ return isStepComplete(ss);
1389
+ });
1390
+ }
1391
+
1392
+ if (!allStepsComplete) {
1393
+ let incomplete: string;
1394
+ if (effectivePostLoopRepoSteps && postLoopRepoId) {
1395
+ incomplete = [...effectivePostLoopRepoSteps]
1396
+ .filter((stepNum) => !isSegmentComplete(finalStatusContent, stepNum, postLoopRepoId))
1397
+ .map((n) => `Step ${n}`)
1398
+ .join(", ");
1399
+ } else {
1400
+ incomplete = parsed.steps
1401
+ .filter((step) => {
1402
+ const ss = finalStatus.steps.find((s) => s.number === step.number);
1403
+ return !isStepComplete(ss);
1404
+ })
1405
+ .map((s) => `Step ${s.number}`)
1406
+ .join(", ");
1407
+ }
1408
+ logExecution(statusPath, "Task incomplete", `Max iterations reached. Incomplete: ${incomplete}`);
1409
+ return makeResult(
1410
+ taskId,
1411
+ segmentId,
1412
+ workerAgentId,
1413
+ "failed",
1414
+ startTime,
1415
+ `Max iterations (${config.maxIterations}) reached with incomplete steps: ${incomplete}`,
1416
+ false,
1417
+ totalIterations,
1418
+ cumulativeCostUsd,
1419
+ cumulativeTokens,
1420
+ config,
1421
+ statusPath,
1422
+ reviewerStatePath,
1423
+ lastTelemetry,
1424
+ snapshotSegmentCtx,
1425
+ );
1426
+ }
1427
+
1428
+ // TP-145: Determine if this is a non-final segment of a multi-segment task.
1429
+ // If more segments remain after this one, suppress .DONE creation so that
1430
+ // the engine can advance the segment frontier and execute subsequent segments.
1431
+ // .DONE must only exist when ALL segments of a multi-segment task are complete.
1432
+ const isNonFinalSegment =
1433
+ segmentId != null &&
1434
+ Array.isArray(unit.task.segmentIds) &&
1435
+ unit.task.segmentIds.length > 1 &&
1436
+ unit.task.segmentIds[unit.task.segmentIds.length - 1] !== segmentId;
1437
+
1438
+ // TP-165: Check for pending expansion requests in the worker's outbox.
1439
+ // If the worker filed expansion requests, more segments may be added by the
1440
+ // engine at the segment boundary — .DONE must not be created even if this
1441
+ // appears to be the final segment based on the static segmentIds list.
1442
+ const hasPendingExpansionRequests =
1443
+ segmentId != null &&
1444
+ hasPendingExpansionRequestFiles(config.stateRoot, config.batchId, workerAgentId);
1445
+
1446
+ if (isNonFinalSegment || hasPendingExpansionRequests) {
1447
+ // Segment succeeded but more segments remain — suppress .DONE and "✅ Complete" status.
1448
+ // The engine will advance the frontier and dispatch the next segment.
1449
+ // Also delete any .DONE the worker may have created directly (workers have
1450
+ // write access and sometimes create .DONE on their own, bypassing this gate).
1451
+ if (existsSync(donePath)) {
1452
+ let deleted = false;
1453
+ try {
1454
+ unlinkSync(donePath);
1455
+ deleted = true;
1456
+ } catch {
1457
+ /* best effort */
1458
+ }
1459
+ if (deleted) {
1460
+ logExecution(
1461
+ statusPath,
1462
+ "Segment complete",
1463
+ `Segment ${segmentId} succeeded (non-final — removed premature worker-created .DONE)`,
1464
+ );
1465
+ } else {
1466
+ logExecution(
1467
+ statusPath,
1468
+ "Segment complete",
1469
+ `⚠️ Segment ${segmentId} succeeded but FAILED to remove premature .DONE — downstream segments may be skipped`,
1470
+ );
1471
+ }
1472
+ } else {
1473
+ logExecution(
1474
+ statusPath,
1475
+ "Segment complete",
1476
+ `Segment ${segmentId} succeeded (not final — .DONE suppressed)`,
1477
+ );
1478
+ }
1479
+ const suppressionReason = isNonFinalSegment ? "non-final" : "pending expansion requests";
1480
+ return makeResult(
1481
+ taskId,
1482
+ segmentId,
1483
+ workerAgentId,
1484
+ "succeeded",
1485
+ startTime,
1486
+ `Segment completed (${suppressionReason} — .DONE suppressed)`,
1487
+ false,
1488
+ totalIterations,
1489
+ cumulativeCostUsd,
1490
+ cumulativeTokens,
1491
+ config,
1492
+ statusPath,
1493
+ reviewerStatePath,
1494
+ lastTelemetry,
1495
+ snapshotSegmentCtx,
1496
+ );
1497
+ }
1498
+
1499
+ // Create .DONE if not already present (final segment or single-segment/whole-task execution)
1500
+ if (!existsSync(donePath)) {
1501
+ writeFileSync(donePath, `Completed: ${new Date().toISOString()}\nTask: ${taskId}\n`);
1502
+ }
1503
+ updateStatusField(statusPath, "Status", "✅ Complete");
1504
+ logExecution(statusPath, "Task complete", ".DONE created");
1505
+
1506
+ return makeResult(
1507
+ taskId,
1508
+ segmentId,
1509
+ workerAgentId,
1510
+ "succeeded",
1511
+ startTime,
1512
+ ".DONE file created by lane-runner",
1513
+ true,
1514
+ totalIterations,
1515
+ cumulativeCostUsd,
1516
+ cumulativeTokens,
1517
+ config,
1518
+ statusPath,
1519
+ reviewerStatePath,
1520
+ lastTelemetry,
1521
+ snapshotSegmentCtx,
1522
+ );
1523
+ }
1524
+
1525
+ // ── Helpers ──────────────────────────────────────────────────────────
1526
+
1527
+ /**
1528
+ * TP-165: Check if the worker's outbox contains pending segment expansion requests.
1529
+ *
1530
+ * Pending expansion request files match `segment-expansion-*.json` (not renamed
1531
+ * to `.processed`, `.rejected`, etc.). If any exist, the engine will process them
1532
+ * at the segment boundary — and may add more segments to the task.
1533
+ *
1534
+ * @returns true if at least one pending expansion request file exists
1535
+ */
1536
+ export function hasPendingExpansionRequestFiles(
1537
+ stateRoot: string,
1538
+ batchId: string,
1539
+ agentId: string,
1540
+ ): boolean {
1541
+ const outboxDir = join(stateRoot, ".pi", "mailbox", batchId, agentId, "outbox");
1542
+ if (!existsSync(outboxDir)) return false;
1543
+ try {
1544
+ const entries = readdirSync(outboxDir);
1545
+ return entries.some((entry) => /^segment-expansion-.+\.json$/.test(entry));
1546
+ } catch {
1547
+ return false;
1548
+ }
1549
+ }
1550
+
1551
+ export function mapLaneTaskStatusToTerminalSnapshotStatus(
1552
+ status: LaneTaskStatus,
1553
+ ): "idle" | "complete" | "failed" {
1554
+ if (status === "succeeded") return "complete";
1555
+ if (status === "skipped") return "idle";
1556
+ return "failed";
1557
+ }
1558
+
1559
+ export function mapLaneSnapshotStatusToWorkerStatus(
1560
+ status: "running" | "idle" | "complete" | "failed",
1561
+ ): RuntimeAgentStatus {
1562
+ if (status === "running") return "running";
1563
+ if (status === "complete") return "exited";
1564
+ if (status === "idle") return "wrapping_up";
1565
+ return "crashed";
1566
+ }
1567
+
1568
+ function makeResult(
1569
+ taskId: string,
1570
+ segmentId: string | null,
1571
+ sessionName: string,
1572
+ status: LaneTaskStatus,
1573
+ startTime: number,
1574
+ exitReason: string,
1575
+ doneFileFound: boolean,
1576
+ iterations: number,
1577
+ costUsd: number,
1578
+ totalTokens: number,
1579
+ config?: LaneRunnerConfig,
1580
+ statusPath?: string,
1581
+ reviewerStatePath?: string,
1582
+ finalTelemetry?: Partial<AgentHostResult>,
1583
+ /** TP-174: Segment context for segment-scoped snapshot progress */
1584
+ segmentCtx?: { stepSegmentMap: StepSegmentMapping[]; repoId: string } | null,
1585
+ ): LaneRunnerTaskResult {
1586
+ const telemetry =
1587
+ status === "skipped"
1588
+ ? undefined
1589
+ : {
1590
+ inputTokens: finalTelemetry?.inputTokens ?? 0,
1591
+ outputTokens: finalTelemetry?.outputTokens ?? 0,
1592
+ cacheReadTokens: finalTelemetry?.cacheReadTokens ?? 0,
1593
+ cacheWriteTokens: finalTelemetry?.cacheWriteTokens ?? 0,
1594
+ costUsd: finalTelemetry?.costUsd ?? 0,
1595
+ toolCalls: finalTelemetry?.toolCalls ?? 0,
1596
+ durationMs: finalTelemetry?.durationMs ?? 0,
1597
+ };
1598
+
1599
+ const result: LaneRunnerTaskResult = {
1600
+ outcome: {
1601
+ taskId,
1602
+ status,
1603
+ segmentId,
1604
+ startTime,
1605
+ endTime: Date.now(),
1606
+ exitReason,
1607
+ sessionName,
1608
+ doneFileFound,
1609
+ laneNumber: config?.laneNumber,
1610
+ telemetry,
1611
+ },
1612
+ iterations,
1613
+ costUsd,
1614
+ totalTokens,
1615
+ };
1616
+
1617
+ // TP-115: Emit terminal snapshot with real telemetry from agent-host result
1618
+ if (config && statusPath && reviewerStatePath) {
1619
+ const terminalStatus = mapLaneTaskStatusToTerminalSnapshotStatus(status);
1620
+ emitSnapshot(
1621
+ config,
1622
+ taskId,
1623
+ segmentId,
1624
+ terminalStatus,
1625
+ finalTelemetry ?? {},
1626
+ statusPath,
1627
+ reviewerStatePath,
1628
+ segmentCtx,
1629
+ );
1630
+ }
1631
+
1632
+ return result;
1633
+ }
1634
+
1635
+ /** Max age for reviewer state file before it's considered stale (2 minutes). */
1636
+ const REVIEWER_STATE_STALE_MS = 120_000;
1637
+
1638
+ export function readReviewerTelemetrySnapshot(
1639
+ config: LaneRunnerConfig,
1640
+ reviewerStatePathOrStatusPath: string,
1641
+ ): (RuntimeAgentTelemetrySnapshot & { reviewType?: string; reviewStep?: number }) | null {
1642
+ const reviewerPath =
1643
+ basename(reviewerStatePathOrStatusPath).toLowerCase() === "status.md"
1644
+ ? join(dirname(reviewerStatePathOrStatusPath), ".reviewer-state.json")
1645
+ : reviewerStatePathOrStatusPath;
1646
+ if (!existsSync(reviewerPath)) return null;
1647
+
1648
+ try {
1649
+ const raw = readFileSync(reviewerPath, "utf-8");
1650
+ const parsed = JSON.parse(raw) as Partial<{
1651
+ status: string;
1652
+ elapsedMs: number;
1653
+ toolCalls: number;
1654
+ contextPct: number;
1655
+ costUsd: number;
1656
+ lastTool: string;
1657
+ inputTokens: number;
1658
+ outputTokens: number;
1659
+ cacheReadTokens: number;
1660
+ cacheWriteTokens: number;
1661
+ updatedAt: number;
1662
+ reviewType: string;
1663
+ reviewStep: number;
1664
+ }>;
1665
+
1666
+ if (parsed.status !== "running") return null;
1667
+
1668
+ // Stale guard: if updatedAt is present and older than threshold, ignore
1669
+ if (parsed.updatedAt && Date.now() - parsed.updatedAt > REVIEWER_STATE_STALE_MS) return null;
1670
+
1671
+ return {
1672
+ agentId: buildRuntimeAgentId(config.agentIdPrefix, config.laneNumber, "reviewer"),
1673
+ status: "running",
1674
+ elapsedMs: Number.isFinite(parsed.elapsedMs) ? Number(parsed.elapsedMs) : 0,
1675
+ toolCalls: Number.isFinite(parsed.toolCalls) ? Number(parsed.toolCalls) : 0,
1676
+ contextPct: Number.isFinite(parsed.contextPct) ? Number(parsed.contextPct) : 0,
1677
+ costUsd: Number.isFinite(parsed.costUsd) ? Number(parsed.costUsd) : 0,
1678
+ lastTool: typeof parsed.lastTool === "string" ? parsed.lastTool : "",
1679
+ inputTokens: Number.isFinite(parsed.inputTokens) ? Number(parsed.inputTokens) : 0,
1680
+ outputTokens: Number.isFinite(parsed.outputTokens) ? Number(parsed.outputTokens) : 0,
1681
+ cacheReadTokens: Number.isFinite(parsed.cacheReadTokens) ? Number(parsed.cacheReadTokens) : 0,
1682
+ cacheWriteTokens: Number.isFinite(parsed.cacheWriteTokens) ? Number(parsed.cacheWriteTokens) : 0,
1683
+ reviewType: typeof parsed.reviewType === "string" ? parsed.reviewType : undefined,
1684
+ reviewStep: Number.isFinite(parsed.reviewStep) ? Number(parsed.reviewStep) : undefined,
1685
+ };
1686
+ } catch {
1687
+ return null;
1688
+ }
1689
+ }
1690
+
1691
+ /**
1692
+ * Emit a lane snapshot to disk. NON-THROWING by contract — all errors are
1693
+ * caught and logged. This function is called from setInterval callbacks
1694
+ * and onTelemetry callbacks where an unhandled throw would trigger
1695
+ * uncaughtException and crash the engine-worker process.
1696
+ *
1697
+ * @returns true when snapshot write succeeds, false when it fails.
1698
+ */
1699
+ function emitSnapshot(
1700
+ config: LaneRunnerConfig,
1701
+ taskId: string,
1702
+ segmentId: string | null,
1703
+ status: "running" | "idle" | "complete" | "failed",
1704
+ telemetry: Partial<AgentHostResult>,
1705
+ statusPath: string,
1706
+ reviewerStatePath: string,
1707
+ /** TP-174: Optional segment context for segment-scoped progress reporting */
1708
+ segmentContext?: { stepSegmentMap: StepSegmentMapping[]; repoId: string } | null,
1709
+ ): boolean {
1710
+ try {
1711
+ // Parse progress from STATUS.md
1712
+ let progress: RuntimeTaskProgress | null = null;
1713
+ try {
1714
+ const content = readFileSync(statusPath, "utf-8");
1715
+ const parsed = parseStatusMd(content);
1716
+ const currentStepMatch = content.match(/\*\*Current Step:\*\*\s*(.+)/);
1717
+
1718
+ // TP-174: Segment-scoped progress when segment markers are present.
1719
+ // Only count checkboxes from steps that belong to this segment's repoId.
1720
+ let checked: number;
1721
+ let total: number;
1722
+ if (segmentContext) {
1723
+ const { stepSegmentMap, repoId } = segmentContext;
1724
+ const repoSteps = getStepsForRepoId(stepSegmentMap, repoId);
1725
+ let segChecked = 0;
1726
+ let segTotal = 0;
1727
+ for (const stepNum of repoSteps) {
1728
+ const segCbs = getSegmentCheckboxes(content, stepNum, repoId);
1729
+ if (segCbs) {
1730
+ segChecked += segCbs.checked;
1731
+ segTotal += segCbs.total;
1732
+ }
1733
+ }
1734
+ checked = segChecked;
1735
+ total = segTotal;
1736
+ } else {
1737
+ checked = parsed.steps.reduce((sum, s) => sum + s.totalChecked, 0);
1738
+ total = parsed.steps.reduce((sum, s) => sum + s.totalItems, 0);
1739
+ }
1740
+
1741
+ progress = {
1742
+ currentStep: currentStepMatch?.[1]?.trim() || "Unknown",
1743
+ checked,
1744
+ total,
1745
+ iteration: parsed.iteration,
1746
+ reviews: parsed.reviewCounter,
1747
+ };
1748
+ } catch {
1749
+ /* best effort */
1750
+ }
1751
+
1752
+ const reviewerSnapshot = readReviewerTelemetrySnapshot(config, reviewerStatePath);
1753
+
1754
+ const snapshot: RuntimeLaneSnapshot = {
1755
+ batchId: config.batchId,
1756
+ laneNumber: config.laneNumber,
1757
+ laneId: `lane-${config.laneNumber}`,
1758
+ repoId: config.repoId,
1759
+ taskId,
1760
+ segmentId,
1761
+ status,
1762
+ worker: {
1763
+ agentId: buildRuntimeAgentId(config.agentIdPrefix, config.laneNumber, "worker"),
1764
+ status: mapLaneSnapshotStatusToWorkerStatus(status),
1765
+ elapsedMs: telemetry.durationMs ?? 0,
1766
+ toolCalls: telemetry.toolCalls ?? 0,
1767
+ contextPct: telemetry.contextUsage?.percent ?? 0,
1768
+ costUsd: telemetry.costUsd ?? 0,
1769
+ lastTool: telemetry.lastTool ?? "",
1770
+ inputTokens: telemetry.inputTokens ?? 0,
1771
+ outputTokens: telemetry.outputTokens ?? 0,
1772
+ cacheReadTokens: telemetry.cacheReadTokens ?? 0,
1773
+ cacheWriteTokens: telemetry.cacheWriteTokens ?? 0,
1774
+ },
1775
+ reviewer: reviewerSnapshot,
1776
+ progress,
1777
+ updatedAt: Date.now(),
1778
+ };
1779
+
1780
+ writeLaneSnapshot(config.stateRoot, config.batchId, config.laneNumber, snapshot as any);
1781
+ return true;
1782
+ } catch {
1783
+ // Non-fatal: snapshot is telemetry, not execution-critical.
1784
+ // Swallow to prevent uncaughtException crash in setInterval/callback contexts.
1785
+ return false;
1786
+ }
1787
+ }