@pi-agents/orchid 0.1.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/LICENSE +21 -0
- package/README.md +246 -0
- package/agents/AGENTS-MANIFEST.md +42 -0
- package/agents/brain.md +42 -0
- package/agents/context-builder.md +46 -0
- package/agents/delegate.md +12 -0
- package/agents/dev-1.md +42 -0
- package/agents/oracle.md +73 -0
- package/agents/planner.md +55 -0
- package/agents/researcher.md +52 -0
- package/agents/reviewer.md +79 -0
- package/agents/scout.md +50 -0
- package/agents/tester.md +45 -0
- package/agents/worker.md +55 -0
- package/extensions/ralph.ts +1 -0
- package/extensions/reviewer-extension.ts +125 -0
- package/extensions/task-orchestrator.ts +28 -0
- package/package.json +63 -0
- package/prompts/gather-context-and-clarify.md +13 -0
- package/prompts/parallel-cleanup.md +59 -0
- package/prompts/parallel-context-build.md +53 -0
- package/prompts/parallel-handoff-plan.md +59 -0
- package/prompts/parallel-research.md +50 -0
- package/prompts/parallel-review.md +54 -0
- package/prompts/review-loop.md +41 -0
- package/skills/orchid/SKILL.md +214 -0
- package/skills/orchid/orchid-cleanup/SKILL.md +122 -0
- package/skills/orchid/orchid-converge/SKILL.md +124 -0
- package/skills/orchid/orchid-decompose/SKILL.md +201 -0
- package/skills/orchid/orchid-doctor/SKILL.md +162 -0
- package/skills/orchid/orchid-investigate/SKILL.md +102 -0
- package/skills/orchid/orchid-launch/SKILL.md +147 -0
- package/skills/ralph/SKILL.md +73 -0
- package/skills/subagents/pi-subagents/SKILL.md +813 -0
- package/src/index.ts +7 -0
- package/src/orchestrator/abort.ts +534 -0
- package/src/orchestrator/agent-bridge-extension.ts +1020 -0
- package/src/orchestrator/agent-host.ts +954 -0
- package/src/orchestrator/cleanup.ts +776 -0
- package/src/orchestrator/config-loader.ts +1412 -0
- package/src/orchestrator/config-schema.ts +690 -0
- package/src/orchestrator/config.ts +81 -0
- package/src/orchestrator/context-window.ts +66 -0
- package/src/orchestrator/diagnostic-reports.ts +475 -0
- package/src/orchestrator/diagnostics.ts +394 -0
- package/src/orchestrator/discovery.ts +1833 -0
- package/src/orchestrator/engine-worker.ts +415 -0
- package/src/orchestrator/engine.ts +5940 -0
- package/src/orchestrator/execution.ts +3104 -0
- package/src/orchestrator/extension.ts +5934 -0
- package/src/orchestrator/formatting.ts +785 -0
- package/src/orchestrator/git.ts +88 -0
- package/src/orchestrator/index.ts +28 -0
- package/src/orchestrator/lane-runner.ts +1787 -0
- package/src/orchestrator/mailbox.ts +780 -0
- package/src/orchestrator/merge.ts +3414 -0
- package/src/orchestrator/messages.ts +1062 -0
- package/src/orchestrator/migrations.ts +278 -0
- package/src/orchestrator/naming.ts +117 -0
- package/src/orchestrator/path-resolver.ts +275 -0
- package/src/orchestrator/persistence.ts +2625 -0
- package/src/orchestrator/process-registry.ts +452 -0
- package/src/orchestrator/quality-gate.ts +1085 -0
- package/src/orchestrator/resume.ts +3488 -0
- package/src/orchestrator/sessions.ts +57 -0
- package/src/orchestrator/settings-loader.ts +136 -0
- package/src/orchestrator/settings-tui.ts +2208 -0
- package/src/orchestrator/sidecar-telemetry.ts +267 -0
- package/src/orchestrator/supervisor.ts +4548 -0
- package/src/orchestrator/task-executor-core.ts +675 -0
- package/src/orchestrator/tmux-compat.ts +37 -0
- package/src/orchestrator/tool-allowlist-constants.ts +37 -0
- package/src/orchestrator/types.ts +4465 -0
- package/src/orchestrator/verification.ts +547 -0
- package/src/orchestrator/waves.ts +1564 -0
- package/src/orchestrator/workspace.ts +707 -0
- package/src/orchestrator/worktree.ts +2725 -0
- package/src/ralph/index.ts +825 -0
- package/src/subagents/agents/agent-management.ts +648 -0
- package/src/subagents/agents/agent-scope.ts +6 -0
- package/src/subagents/agents/agent-selection.ts +23 -0
- package/src/subagents/agents/agent-serializer.ts +86 -0
- package/src/subagents/agents/agents.ts +832 -0
- package/src/subagents/agents/chain-serializer.ts +137 -0
- package/src/subagents/agents/frontmatter.ts +29 -0
- package/src/subagents/agents/identity.ts +30 -0
- package/src/subagents/agents/skills.ts +632 -0
- package/src/subagents/extension/config.ts +16 -0
- package/src/subagents/extension/control-notices.ts +92 -0
- package/src/subagents/extension/doctor.ts +199 -0
- package/src/subagents/extension/fanout-child.ts +170 -0
- package/src/subagents/extension/index.ts +573 -0
- package/src/subagents/extension/schemas.ts +168 -0
- package/src/subagents/intercom/intercom-bridge.ts +379 -0
- package/src/subagents/intercom/result-intercom.ts +377 -0
- package/src/subagents/runs/background/async-execution.ts +712 -0
- package/src/subagents/runs/background/async-job-tracker.ts +310 -0
- package/src/subagents/runs/background/async-resume.ts +345 -0
- package/src/subagents/runs/background/async-status.ts +325 -0
- package/src/subagents/runs/background/completion-dedupe.ts +63 -0
- package/src/subagents/runs/background/notify.ts +108 -0
- package/src/subagents/runs/background/parallel-groups.ts +45 -0
- package/src/subagents/runs/background/result-watcher.ts +307 -0
- package/src/subagents/runs/background/run-id-resolver.ts +83 -0
- package/src/subagents/runs/background/run-status.ts +269 -0
- package/src/subagents/runs/background/stale-run-reconciler.ts +336 -0
- package/src/subagents/runs/background/subagent-runner.ts +1808 -0
- package/src/subagents/runs/background/top-level-async.ts +13 -0
- package/src/subagents/runs/foreground/chain-clarify.ts +1333 -0
- package/src/subagents/runs/foreground/chain-execution.ts +938 -0
- package/src/subagents/runs/foreground/execution.ts +918 -0
- package/src/subagents/runs/foreground/subagent-executor.ts +2527 -0
- package/src/subagents/runs/shared/completion-guard.ts +147 -0
- package/src/subagents/runs/shared/long-running-guard.ts +175 -0
- package/src/subagents/runs/shared/mcp-direct-tool-allowlist.ts +365 -0
- package/src/subagents/runs/shared/model-fallback.ts +103 -0
- package/src/subagents/runs/shared/nested-events.ts +819 -0
- package/src/subagents/runs/shared/nested-path.ts +52 -0
- package/src/subagents/runs/shared/nested-render.ts +115 -0
- package/src/subagents/runs/shared/parallel-utils.ts +109 -0
- package/src/subagents/runs/shared/pi-args.ts +220 -0
- package/src/subagents/runs/shared/pi-spawn.ts +115 -0
- package/src/subagents/runs/shared/run-history.ts +60 -0
- package/src/subagents/runs/shared/single-output.ts +164 -0
- package/src/subagents/runs/shared/subagent-control.ts +226 -0
- package/src/subagents/runs/shared/subagent-prompt-runtime.ts +170 -0
- package/src/subagents/runs/shared/worktree.ts +577 -0
- package/src/subagents/shared/artifacts.ts +98 -0
- package/src/subagents/shared/atomic-json.ts +16 -0
- package/src/subagents/shared/file-coalescer.ts +40 -0
- package/src/subagents/shared/fork-context.ts +76 -0
- package/src/subagents/shared/formatters.ts +133 -0
- package/src/subagents/shared/jsonl-writer.ts +81 -0
- package/src/subagents/shared/model-info.ts +78 -0
- package/src/subagents/shared/post-exit-stdio-guard.ts +85 -0
- package/src/subagents/shared/session-identity.ts +10 -0
- package/src/subagents/shared/session-tokens.ts +44 -0
- package/src/subagents/shared/settings.ts +397 -0
- package/src/subagents/shared/status-format.ts +49 -0
- package/src/subagents/shared/types.ts +822 -0
- package/src/subagents/shared/utils.ts +450 -0
- package/src/subagents/slash/prompt-template-bridge.ts +397 -0
- package/src/subagents/slash/slash-bridge.ts +174 -0
- package/src/subagents/slash/slash-commands.ts +528 -0
- package/src/subagents/slash/slash-live-state.ts +292 -0
- package/src/subagents/tui/render-helpers.ts +80 -0
- package/src/subagents/tui/render.ts +1358 -0
- package/templates/agents/local/supervisor.md +33 -0
- package/templates/agents/local/task-merger.md +27 -0
- package/templates/agents/local/task-reviewer.md +30 -0
- package/templates/agents/local/task-worker.md +34 -0
- package/templates/agents/supervisor-routing.md +92 -0
- package/templates/agents/supervisor.md +229 -0
- package/templates/agents/task-merger.md +214 -0
- package/templates/agents/task-reviewer.md +260 -0
- package/templates/agents/task-worker-segment.md +44 -0
- package/templates/agents/task-worker.md +557 -0
- package/templates/tasks/CONTEXT.md +30 -0
- package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +98 -0
- package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
- package/templates/tasks/EXAMPLE-002-parallel-smoke/PROMPT.md +97 -0
- package/templates/tasks/EXAMPLE-002-parallel-smoke/STATUS.md +73 -0
|
@@ -0,0 +1,1787 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lane Runner — Headless per-lane execution for Runtime V2
|
|
3
|
+
*
|
|
4
|
+
* Replaces the legacy TMUX-backed lane execution path with a
|
|
5
|
+
* deterministic Node process that owns:
|
|
6
|
+
* - worker iteration loops
|
|
7
|
+
* - STATUS.md progression
|
|
8
|
+
* - .DONE creation detection
|
|
9
|
+
* - reviewer orchestration (future)
|
|
10
|
+
* - lane snapshot emission
|
|
11
|
+
*
|
|
12
|
+
* No Pi extension dependency. No TMUX. No TASK_AUTOSTART.
|
|
13
|
+
*
|
|
14
|
+
* @module orchid/lane-runner
|
|
15
|
+
* @since TP-105
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync, readdirSync } from "fs";
|
|
19
|
+
import { join, dirname, resolve, basename } from "path";
|
|
20
|
+
import { execSync } from "child_process";
|
|
21
|
+
import { fileURLToPath } from "url";
|
|
22
|
+
|
|
23
|
+
import {
|
|
24
|
+
parsePromptMd,
|
|
25
|
+
parseStatusMd,
|
|
26
|
+
generateStatusMd,
|
|
27
|
+
updateStatusField,
|
|
28
|
+
updateStepStatus,
|
|
29
|
+
logExecution,
|
|
30
|
+
isStepComplete,
|
|
31
|
+
type StepInfo,
|
|
32
|
+
type CoreParsedTask,
|
|
33
|
+
} from "./task-executor-core.ts";
|
|
34
|
+
|
|
35
|
+
import {
|
|
36
|
+
spawnAgent,
|
|
37
|
+
buildWorkerToolsAllowlist,
|
|
38
|
+
ENGINE_BRIDGE_TOOLS,
|
|
39
|
+
type AgentHostOptions,
|
|
40
|
+
type AgentHostResult,
|
|
41
|
+
} from "./agent-host.ts";
|
|
42
|
+
import { loadPiSettingsPackages, filterExcludedExtensions } from "./settings-loader.ts";
|
|
43
|
+
|
|
44
|
+
import { appendAgentEvent, writeLaneSnapshot } from "./process-registry.ts";
|
|
45
|
+
|
|
46
|
+
import {
|
|
47
|
+
readOutbox,
|
|
48
|
+
readInbox,
|
|
49
|
+
ackMessage,
|
|
50
|
+
sessionInboxDir,
|
|
51
|
+
ackOutboxMessage,
|
|
52
|
+
appendMailboxAuditEvent,
|
|
53
|
+
drainAgentOutbox,
|
|
54
|
+
} from "./mailbox.ts";
|
|
55
|
+
|
|
56
|
+
import {
|
|
57
|
+
resolvePacketPaths,
|
|
58
|
+
buildRuntimeAgentId,
|
|
59
|
+
runtimeAgentEventsPath,
|
|
60
|
+
type ExecutionUnit,
|
|
61
|
+
type RuntimeAgentId,
|
|
62
|
+
type RuntimeLaneSnapshot,
|
|
63
|
+
type RuntimeAgentTelemetrySnapshot,
|
|
64
|
+
type RuntimeTaskProgress,
|
|
65
|
+
type RuntimeAgentStatus,
|
|
66
|
+
type PacketPaths,
|
|
67
|
+
type LaneTaskOutcome,
|
|
68
|
+
type LaneTaskStatus,
|
|
69
|
+
type SupervisorAlertCallback,
|
|
70
|
+
type StepSegmentMapping,
|
|
71
|
+
type SegmentScopeMode,
|
|
72
|
+
} from "./types.ts";
|
|
73
|
+
|
|
74
|
+
const LANE_RUNNER_DIR = dirname(fileURLToPath(import.meta.url));
|
|
75
|
+
|
|
76
|
+
// ── Segment Scoping Helpers (Phase A, TP-174) ────────────────────────
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Get the set of step numbers that have segments for a given repoId.
|
|
80
|
+
*
|
|
81
|
+
* Used to filter the "remaining steps" view so the worker only sees steps
|
|
82
|
+
* that contain work for its repo.
|
|
83
|
+
*
|
|
84
|
+
* @param stepSegmentMap - Parsed step-segment mapping from PROMPT.md
|
|
85
|
+
* @param repoId - Repo ID to filter by
|
|
86
|
+
* @returns Set of step numbers that have at least one segment for this repoId
|
|
87
|
+
* @since TP-174
|
|
88
|
+
*/
|
|
89
|
+
export function getStepsForRepoId(
|
|
90
|
+
stepSegmentMap: StepSegmentMapping[],
|
|
91
|
+
repoId: string,
|
|
92
|
+
): Set<number> {
|
|
93
|
+
const stepNumbers = new Set<number>();
|
|
94
|
+
for (const step of stepSegmentMap) {
|
|
95
|
+
if (step.segments.some((seg) => seg.repoId === repoId)) {
|
|
96
|
+
stepNumbers.add(step.stepNumber);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return stepNumbers;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Extract a segment's checkbox block from STATUS.md content for a given step and repoId.
|
|
104
|
+
*
|
|
105
|
+
* Looks for `#### Segment: <repoId>` headers within `### Step N:` sections,
|
|
106
|
+
* then returns the checkbox lines belonging to that segment block.
|
|
107
|
+
*
|
|
108
|
+
* @param statusContent - Raw STATUS.md content
|
|
109
|
+
* @param stepNumber - Step number to look in
|
|
110
|
+
* @param repoId - Repo ID of the segment
|
|
111
|
+
* @returns Object with checked/unchecked counts, or null if no segment block found
|
|
112
|
+
* @since TP-174
|
|
113
|
+
*/
|
|
114
|
+
export function getSegmentCheckboxes(
|
|
115
|
+
statusContent: string,
|
|
116
|
+
stepNumber: number,
|
|
117
|
+
repoId: string,
|
|
118
|
+
): { checked: number; unchecked: number; total: number; uncheckedTexts: string[] } | null {
|
|
119
|
+
const text = statusContent.replace(/\r\n/g, "\n");
|
|
120
|
+
|
|
121
|
+
// Find the step section
|
|
122
|
+
const stepHeaderPattern = new RegExp(`^###\\s+Step\\s+${stepNumber}:`, "m");
|
|
123
|
+
const stepMatch = text.match(stepHeaderPattern);
|
|
124
|
+
if (!stepMatch || stepMatch.index === undefined) return null;
|
|
125
|
+
|
|
126
|
+
// Find the end of this step section (next ### or end of file)
|
|
127
|
+
const afterStep = text.slice(stepMatch.index + stepMatch[0].length);
|
|
128
|
+
const nextStepMatch = afterStep.search(/^###\s+Step\s+\d+:/m);
|
|
129
|
+
const stepContent = nextStepMatch !== -1 ? afterStep.slice(0, nextStepMatch) : afterStep;
|
|
130
|
+
|
|
131
|
+
// Find the segment header within this step
|
|
132
|
+
const segHeaderPattern = new RegExp(
|
|
133
|
+
`^####\\s+Segment:\\s*${repoId.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\s*$`,
|
|
134
|
+
"m",
|
|
135
|
+
);
|
|
136
|
+
const segMatch = stepContent.match(segHeaderPattern);
|
|
137
|
+
if (!segMatch || segMatch.index === undefined) return null;
|
|
138
|
+
|
|
139
|
+
// Extract content from segment header to next #### header or ### header or ---
|
|
140
|
+
const afterSeg = stepContent.slice(segMatch.index + segMatch[0].length);
|
|
141
|
+
const nextSectionMatch = afterSeg.search(/^(?:####\s|###\s|---)/m);
|
|
142
|
+
const segContent = nextSectionMatch !== -1 ? afterSeg.slice(0, nextSectionMatch) : afterSeg;
|
|
143
|
+
|
|
144
|
+
// Count checkboxes
|
|
145
|
+
let checked = 0;
|
|
146
|
+
let unchecked = 0;
|
|
147
|
+
const uncheckedTexts: string[] = [];
|
|
148
|
+
const cbRegex = /^\s*-\s*\[([ xX])\]\s*(.*)/gm;
|
|
149
|
+
let m: RegExpExecArray | null;
|
|
150
|
+
while ((m = cbRegex.exec(segContent)) !== null) {
|
|
151
|
+
if (m[1].toLowerCase() === "x") {
|
|
152
|
+
checked++;
|
|
153
|
+
} else {
|
|
154
|
+
unchecked++;
|
|
155
|
+
uncheckedTexts.push(m[2].trim());
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { checked, unchecked, total: checked + unchecked, uncheckedTexts };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Check if all checkboxes in a segment block are checked.
|
|
164
|
+
*
|
|
165
|
+
* @param statusContent - Raw STATUS.md content
|
|
166
|
+
* @param stepNumber - Step number to check
|
|
167
|
+
* @param repoId - Repo ID of the segment
|
|
168
|
+
* @returns true when all checkboxes in the segment block are checked
|
|
169
|
+
* @since TP-174
|
|
170
|
+
*/
|
|
171
|
+
export function isSegmentComplete(
|
|
172
|
+
statusContent: string,
|
|
173
|
+
stepNumber: number,
|
|
174
|
+
repoId: string,
|
|
175
|
+
): boolean {
|
|
176
|
+
const result = getSegmentCheckboxes(statusContent, stepNumber, repoId);
|
|
177
|
+
if (!result) return false;
|
|
178
|
+
if (result.total === 0) return false;
|
|
179
|
+
return result.unchecked === 0;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Compute the authoritative `SegmentScopeMode` for one worker iteration.
|
|
184
|
+
*
|
|
185
|
+
* This is the single source of truth for the FULL_TASK vs SEGMENT_SCOPED
|
|
186
|
+
* decision (TP-196 / #502). All segment-related side-effects (env vars,
|
|
187
|
+
* system-prompt overlay, prompt content, tool registration) should derive
|
|
188
|
+
* their behaviour from this mode rather than re-evaluating the underlying
|
|
189
|
+
* boolean conditions in isolation, which is what created the drift risk
|
|
190
|
+
* documented in #502.
|
|
191
|
+
*
|
|
192
|
+
* Returns `SEGMENT_SCOPED` iff ALL of the following hold:
|
|
193
|
+
* - The task has a non-empty `stepSegmentMap` (parsed from PROMPT.md markers).
|
|
194
|
+
* - The lane has an associated `currentRepoId` (segmentId set, so we know
|
|
195
|
+
* which repo this lane is iterating).
|
|
196
|
+
* - The (legacy-fallback-filtered) `repoStepNumbers` set is non-null (the
|
|
197
|
+
* repo has at least one step with explicit segment markers).
|
|
198
|
+
* - A `currentStepNumber` is provided (there is a step to evaluate).
|
|
199
|
+
* - The current step's segment mapping contains an entry for `currentRepoId`
|
|
200
|
+
* (the worker actually has segment-scoped work in the current step).
|
|
201
|
+
*
|
|
202
|
+
* In any other case the mode is `FULL_TASK`.
|
|
203
|
+
*
|
|
204
|
+
* @since TP-196
|
|
205
|
+
*/
|
|
206
|
+
export function computeSegmentScopeMode(
|
|
207
|
+
stepSegmentMap: StepSegmentMapping[] | undefined | null,
|
|
208
|
+
repoStepNumbers: Set<number> | null,
|
|
209
|
+
currentRepoId: string | null,
|
|
210
|
+
currentStepNumber: number | null,
|
|
211
|
+
): SegmentScopeMode {
|
|
212
|
+
if (!stepSegmentMap || !currentRepoId || !repoStepNumbers) return "FULL_TASK";
|
|
213
|
+
if (currentStepNumber === null) return "FULL_TASK";
|
|
214
|
+
const currentStepMapping = stepSegmentMap.find((s) => s.stepNumber === currentStepNumber);
|
|
215
|
+
if (!currentStepMapping) return "FULL_TASK";
|
|
216
|
+
const mySegment = currentStepMapping.segments.find((seg) => seg.repoId === currentRepoId);
|
|
217
|
+
return mySegment ? "SEGMENT_SCOPED" : "FULL_TASK";
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Pre-spawn segment-completion check (TP-196 / #508).
|
|
222
|
+
*
|
|
223
|
+
* Returns `true` when the lane-runner iteration loop should SKIP spawning
|
|
224
|
+
* a worker because all of the segment's checkboxes for this repo are
|
|
225
|
+
* already complete. The lane should `break` out of its iteration loop and
|
|
226
|
+
* fall through to post-loop completion handling.
|
|
227
|
+
*
|
|
228
|
+
* Contract:
|
|
229
|
+
* - Returns `false` for FULL_TASK iterations (`currentRepoId === null` or
|
|
230
|
+
* `repoStepNumbers === null` or empty). Those rely on the existing
|
|
231
|
+
* `remainingSteps.length === 0` exit, not this check.
|
|
232
|
+
* - Returns `true` iff EVERY step in `repoStepNumbers` is
|
|
233
|
+
* `isSegmentComplete(statusContent, stepNum, currentRepoId)`.
|
|
234
|
+
*
|
|
235
|
+
* Pure function: no filesystem access, no global state. The caller reads
|
|
236
|
+
* the STATUS.md content once per iteration and passes it in.
|
|
237
|
+
*
|
|
238
|
+
* @since TP-196
|
|
239
|
+
*/
|
|
240
|
+
export function shouldSkipSpawnForCompleteSegment(
|
|
241
|
+
statusContent: string,
|
|
242
|
+
repoStepNumbers: Set<number> | null,
|
|
243
|
+
currentRepoId: string | null,
|
|
244
|
+
): boolean {
|
|
245
|
+
if (!repoStepNumbers || !currentRepoId || repoStepNumbers.size === 0) return false;
|
|
246
|
+
return [...repoStepNumbers].every((stepNum) =>
|
|
247
|
+
isSegmentComplete(statusContent, stepNum, currentRepoId),
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// ── Types ────────────────────────────────────────────────────────────
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Configuration for a lane-runner execution.
|
|
255
|
+
*
|
|
256
|
+
* @since TP-105
|
|
257
|
+
*/
|
|
258
|
+
export interface LaneRunnerConfig {
|
|
259
|
+
/** Batch ID */
|
|
260
|
+
batchId: string;
|
|
261
|
+
/** Operator prefix for agent IDs (e.g., "orch-henrylach") */
|
|
262
|
+
agentIdPrefix: string;
|
|
263
|
+
/** Lane number (1-indexed) */
|
|
264
|
+
laneNumber: number;
|
|
265
|
+
/** Absolute path to the lane worktree */
|
|
266
|
+
worktreePath: string;
|
|
267
|
+
/** Git branch checked out in the worktree */
|
|
268
|
+
branch: string;
|
|
269
|
+
/** Repo ID */
|
|
270
|
+
repoId: string;
|
|
271
|
+
/** State root for runtime artifacts (workspace root or repo root) */
|
|
272
|
+
stateRoot: string;
|
|
273
|
+
/** Worker model (empty string = inherit from session) */
|
|
274
|
+
workerModel: string;
|
|
275
|
+
/** Worker tools */
|
|
276
|
+
workerTools: string;
|
|
277
|
+
/** Worker thinking mode */
|
|
278
|
+
workerThinking: string;
|
|
279
|
+
/** Worker system prompt (full-task mode) */
|
|
280
|
+
workerSystemPrompt: string;
|
|
281
|
+
/** Worker system prompt for segment-scoped mode (appended to base) */
|
|
282
|
+
workerSegmentPrompt: string;
|
|
283
|
+
/**
|
|
284
|
+
* Reviewer model (empty string = inherit session default).
|
|
285
|
+
* Set from TASKPLANE_REVIEWER_MODEL env var, sourced from runnerConfig.reviewer.model.
|
|
286
|
+
* @since TP-160
|
|
287
|
+
*/
|
|
288
|
+
reviewerModel: string;
|
|
289
|
+
/**
|
|
290
|
+
* Reviewer thinking mode (empty string = inherit).
|
|
291
|
+
* @since TP-160
|
|
292
|
+
*/
|
|
293
|
+
reviewerThinking: string;
|
|
294
|
+
/**
|
|
295
|
+
* Reviewer tool allowlist (comma-separated).
|
|
296
|
+
* @since TP-160
|
|
297
|
+
*/
|
|
298
|
+
reviewerTools: string;
|
|
299
|
+
/** Supervisor autonomy level for bridge-tool guards. */
|
|
300
|
+
supervisorAutonomy?: "interactive" | "supervised" | "autonomous";
|
|
301
|
+
/** Project name (for review request context) */
|
|
302
|
+
projectName?: string;
|
|
303
|
+
/** Package specifiers to exclude from worker extension forwarding (exact match). @since TP-180 */
|
|
304
|
+
workerExcludeExtensions?: string[];
|
|
305
|
+
/** Package specifiers to exclude from reviewer extension forwarding (exact match). @since TP-180 */
|
|
306
|
+
reviewerExcludeExtensions?: string[];
|
|
307
|
+
/** Max worker iterations before giving up */
|
|
308
|
+
maxIterations: number;
|
|
309
|
+
/** No-progress stall limit */
|
|
310
|
+
noProgressLimit: number;
|
|
311
|
+
/** Max worker time in minutes per iteration */
|
|
312
|
+
maxWorkerMinutes: number;
|
|
313
|
+
/** Context pressure warn threshold (0-100) */
|
|
314
|
+
warnPercent: number;
|
|
315
|
+
/** Context pressure kill threshold (0-100) */
|
|
316
|
+
killPercent: number;
|
|
317
|
+
/** Optional callback for surfacing runtime mailbox replies/escalations to supervisor */
|
|
318
|
+
onSupervisorAlert?: SupervisorAlertCallback;
|
|
319
|
+
/**
|
|
320
|
+
* Optional callback fired when the lane reaches a terminal state (no-progress
|
|
321
|
+
* kill or hard-fail). The supervisor process uses this to suppress any
|
|
322
|
+
* subsequent zombie alerts queued for the now-dead lane.
|
|
323
|
+
*
|
|
324
|
+
* @since TP-187 (#538)
|
|
325
|
+
*/
|
|
326
|
+
onLaneTerminated?: (info: import("./types.ts").LaneTerminatedInfo) => void;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Result of executing one task through the lane-runner.
|
|
331
|
+
*
|
|
332
|
+
* @since TP-105
|
|
333
|
+
*/
|
|
334
|
+
export interface LaneRunnerTaskResult {
|
|
335
|
+
/** Standard lane task outcome compatible with the engine */
|
|
336
|
+
outcome: LaneTaskOutcome;
|
|
337
|
+
/** Total worker iterations consumed */
|
|
338
|
+
iterations: number;
|
|
339
|
+
/** Cumulative worker cost in USD */
|
|
340
|
+
costUsd: number;
|
|
341
|
+
/** Total tokens used */
|
|
342
|
+
totalTokens: number;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// ── Core Execution ───────────────────────────────────────────────────
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Execute a single task in a lane using the Runtime V2 headless backend.
|
|
349
|
+
*
|
|
350
|
+
* This is the core function that replaces the legacy TMUX-backed
|
|
351
|
+
* `executeLane()` → `spawnLaneSession()` → `task-runner TASK_AUTOSTART`
|
|
352
|
+
* path with direct child-process hosting.
|
|
353
|
+
*
|
|
354
|
+
* Execution loop:
|
|
355
|
+
* 1. Parse task and ensure STATUS.md exists
|
|
356
|
+
* 2. For each iteration:
|
|
357
|
+
* a. Determine remaining steps
|
|
358
|
+
* b. Spawn worker agent via agent-host
|
|
359
|
+
* c. Wait for worker to exit
|
|
360
|
+
* d. Check progress (checkboxes)
|
|
361
|
+
* e. If all steps complete → success
|
|
362
|
+
* f. If no progress → increment stall counter
|
|
363
|
+
* g. If stall limit or iteration limit hit → fail
|
|
364
|
+
* 3. If all steps complete, check for .DONE
|
|
365
|
+
* 4. Return LaneTaskOutcome
|
|
366
|
+
*
|
|
367
|
+
* @since TP-105
|
|
368
|
+
*/
|
|
369
|
+
export async function executeTaskV2(
|
|
370
|
+
unit: ExecutionUnit,
|
|
371
|
+
config: LaneRunnerConfig,
|
|
372
|
+
pauseSignal: { paused: boolean },
|
|
373
|
+
): Promise<LaneRunnerTaskResult> {
|
|
374
|
+
const startTime = Date.now();
|
|
375
|
+
const statusPath = unit.packet.statusPath;
|
|
376
|
+
const donePath = unit.packet.donePath;
|
|
377
|
+
const promptPath = unit.packet.promptPath;
|
|
378
|
+
const taskFolder = unit.packet.taskFolder;
|
|
379
|
+
const reviewerStatePath = join(taskFolder, ".reviewer-state.json");
|
|
380
|
+
const taskId = unit.taskId;
|
|
381
|
+
const segmentId = unit.segmentId;
|
|
382
|
+
const workerAgentId = buildRuntimeAgentId(config.agentIdPrefix, config.laneNumber, "worker");
|
|
383
|
+
|
|
384
|
+
// ── 1. Ensure STATUS.md exists ──────────────────────────────────
|
|
385
|
+
if (!existsSync(statusPath)) {
|
|
386
|
+
const content = readFileSync(promptPath, "utf-8");
|
|
387
|
+
const parsed = parsePromptMd(content, promptPath);
|
|
388
|
+
writeFileSync(statusPath, generateStatusMd(parsed));
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
updateStatusField(statusPath, "Status", "🟡 In Progress");
|
|
392
|
+
updateStatusField(statusPath, "Last Updated", new Date().toISOString().slice(0, 10));
|
|
393
|
+
logExecution(statusPath, "Task started", "Runtime V2 lane-runner execution");
|
|
394
|
+
|
|
395
|
+
// Pre-segment guard: remove any stale .DONE from a prior segment or prior run.
|
|
396
|
+
// This closes the race window where the monitor sees .DONE before lane-runner
|
|
397
|
+
// can suppress it at segment end. For non-final segments, .DONE must not exist
|
|
398
|
+
// at any point during execution.
|
|
399
|
+
const isNonFinalAtStart =
|
|
400
|
+
segmentId != null &&
|
|
401
|
+
Array.isArray(unit.task.segmentIds) &&
|
|
402
|
+
unit.task.segmentIds.length > 1 &&
|
|
403
|
+
unit.task.segmentIds[unit.task.segmentIds.length - 1] !== segmentId;
|
|
404
|
+
if (isNonFinalAtStart && existsSync(donePath)) {
|
|
405
|
+
try {
|
|
406
|
+
unlinkSync(donePath);
|
|
407
|
+
} catch {
|
|
408
|
+
/* best effort */
|
|
409
|
+
}
|
|
410
|
+
logExecution(
|
|
411
|
+
statusPath,
|
|
412
|
+
"Segment start",
|
|
413
|
+
`Removed stale .DONE before non-final segment ${segmentId}`,
|
|
414
|
+
);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// ── 2. Iteration loop ───────────────────────────────────────────
|
|
418
|
+
let noProgressCount = 0;
|
|
419
|
+
let totalIterations = 0;
|
|
420
|
+
let cumulativeCostUsd = 0;
|
|
421
|
+
let cumulativeTokens = 0;
|
|
422
|
+
// TP-115: carry latest worker telemetry across iterations and into post-loop terminal snapshots
|
|
423
|
+
let lastTelemetry: Partial<AgentHostResult> = {};
|
|
424
|
+
|
|
425
|
+
// TP-174: Build segment context once for emitSnapshot calls.
|
|
426
|
+
// Available outside the loop so it can be passed to makeResult too.
|
|
427
|
+
const snapshotSegmentCtx: { stepSegmentMap: StepSegmentMapping[]; repoId: string } | null =
|
|
428
|
+
segmentId && unit.task.stepSegmentMap && config.repoId
|
|
429
|
+
? (() => {
|
|
430
|
+
const repoSteps = getStepsForRepoId(unit.task.stepSegmentMap!, config.repoId);
|
|
431
|
+
return repoSteps.size > 0
|
|
432
|
+
? { stepSegmentMap: unit.task.stepSegmentMap!, repoId: config.repoId }
|
|
433
|
+
: null;
|
|
434
|
+
})()
|
|
435
|
+
: null;
|
|
436
|
+
|
|
437
|
+
for (let iter = 0; iter < config.maxIterations; iter++) {
|
|
438
|
+
if (pauseSignal.paused) {
|
|
439
|
+
logExecution(statusPath, "Paused", `User paused at iteration ${totalIterations}`);
|
|
440
|
+
return makeResult(
|
|
441
|
+
taskId,
|
|
442
|
+
segmentId,
|
|
443
|
+
workerAgentId,
|
|
444
|
+
"skipped",
|
|
445
|
+
startTime,
|
|
446
|
+
"Paused by user",
|
|
447
|
+
false,
|
|
448
|
+
totalIterations,
|
|
449
|
+
cumulativeCostUsd,
|
|
450
|
+
cumulativeTokens,
|
|
451
|
+
config,
|
|
452
|
+
statusPath,
|
|
453
|
+
reviewerStatePath,
|
|
454
|
+
undefined,
|
|
455
|
+
snapshotSegmentCtx,
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Determine remaining steps
|
|
460
|
+
const currentStatus = parseStatusMd(readFileSync(statusPath, "utf-8"));
|
|
461
|
+
const parsed = parsePromptMd(readFileSync(promptPath, "utf-8"), promptPath);
|
|
462
|
+
|
|
463
|
+
// TP-174: Resolve segment-scoped step filtering.
|
|
464
|
+
// Use config.repoId (structured identity) instead of parsing opaque segmentId.
|
|
465
|
+
const stepSegmentMap = unit.task.stepSegmentMap;
|
|
466
|
+
const currentRepoId = segmentId ? config.repoId : null;
|
|
467
|
+
const rawRepoStepNumbers =
|
|
468
|
+
stepSegmentMap && currentRepoId ? getStepsForRepoId(stepSegmentMap, currentRepoId) : null;
|
|
469
|
+
// TP-174 legacy fallback: If no steps have segments for this repoId
|
|
470
|
+
// (multi-segment task without explicit markers, where all checkboxes
|
|
471
|
+
// are assigned to the fallback/packet repo), disable segment filtering.
|
|
472
|
+
const repoStepNumbers =
|
|
473
|
+
rawRepoStepNumbers && rawRepoStepNumbers.size > 0 ? rawRepoStepNumbers : null;
|
|
474
|
+
|
|
475
|
+
// TP-174: Read STATUS.md content once for segment-scoped checks
|
|
476
|
+
const iterStatusContent = readFileSync(statusPath, "utf-8");
|
|
477
|
+
|
|
478
|
+
const remainingSteps = parsed.steps.filter((step) => {
|
|
479
|
+
// TP-174: When segment-scoped, only show steps that have work for this repoId
|
|
480
|
+
if (repoStepNumbers && !repoStepNumbers.has(step.number)) return false;
|
|
481
|
+
// TP-174: Use segment-scoped completion check in segment mode
|
|
482
|
+
if (repoStepNumbers && currentRepoId) {
|
|
483
|
+
return !isSegmentComplete(iterStatusContent, step.number, currentRepoId);
|
|
484
|
+
}
|
|
485
|
+
const ss = currentStatus.steps.find((s) => s.number === step.number);
|
|
486
|
+
return !isStepComplete(ss);
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
if (remainingSteps.length === 0) break; // All done
|
|
490
|
+
|
|
491
|
+
// TP-196 / #508: Pre-spawn segment-completion check.
|
|
492
|
+
//
|
|
493
|
+
// When the lane is iterating a segment-scoped task, verify that NOT ALL
|
|
494
|
+
// `repoStepNumbers` are segment-complete before incurring the cost of
|
|
495
|
+
// spawning a worker. The `remainingSteps` filter above already enforces
|
|
496
|
+
// this implicitly (via `isSegmentComplete`), but expressing the check
|
|
497
|
+
// explicitly at the spawn boundary:
|
|
498
|
+
// 1. Makes the wasted-iteration prevention contract visible.
|
|
499
|
+
// 2. Provides a defensive backstop for cases where `parsed.steps` and
|
|
500
|
+
// `repoStepNumbers` diverge (e.g., legacy/partial-marker tasks).
|
|
501
|
+
// 3. Gives behavioural tests a clean assertion target (via the pure
|
|
502
|
+
// helper `shouldSkipSpawnForCompleteSegment`).
|
|
503
|
+
if (shouldSkipSpawnForCompleteSegment(iterStatusContent, repoStepNumbers, currentRepoId)) {
|
|
504
|
+
logExecution(
|
|
505
|
+
statusPath,
|
|
506
|
+
"Pre-spawn segment-completion check",
|
|
507
|
+
`all segment checkboxes already complete for repo '${currentRepoId}' — skipping worker spawn (#508)`,
|
|
508
|
+
);
|
|
509
|
+
break;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
totalIterations++;
|
|
513
|
+
updateStatusField(
|
|
514
|
+
statusPath,
|
|
515
|
+
"Current Step",
|
|
516
|
+
`Step ${remainingSteps[0].number}: ${remainingSteps[0].name}`,
|
|
517
|
+
);
|
|
518
|
+
updateStatusField(statusPath, "Iteration", `${totalIterations}`);
|
|
519
|
+
|
|
520
|
+
// Mark first incomplete step as in-progress
|
|
521
|
+
const firstStep = remainingSteps[0];
|
|
522
|
+
const firstStepStatus = currentStatus.steps.find((s) => s.number === firstStep.number);
|
|
523
|
+
if (firstStepStatus?.status !== "in-progress") {
|
|
524
|
+
updateStepStatus(statusPath, firstStep.number, "in-progress");
|
|
525
|
+
logExecution(statusPath, `Step ${firstStep.number} started`, firstStep.name);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// Count checkboxes before worker runs
|
|
529
|
+
// TP-174: When segment-scoped, count only this segment's checkboxes
|
|
530
|
+
let prevTotalChecked: number;
|
|
531
|
+
if (repoStepNumbers && currentRepoId) {
|
|
532
|
+
const preStatusContent = readFileSync(statusPath, "utf-8");
|
|
533
|
+
const segCbs = getSegmentCheckboxes(preStatusContent, firstStep.number, currentRepoId);
|
|
534
|
+
prevTotalChecked = segCbs ? segCbs.checked : 0;
|
|
535
|
+
} else {
|
|
536
|
+
prevTotalChecked = currentStatus.steps.reduce((sum, s) => sum + s.totalChecked, 0);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// ── Build worker prompt ─────────────────────────────────────
|
|
540
|
+
const wrapUpFile = join(taskFolder, ".task-wrap-up");
|
|
541
|
+
if (existsSync(wrapUpFile))
|
|
542
|
+
try {
|
|
543
|
+
unlinkSync(wrapUpFile);
|
|
544
|
+
} catch {
|
|
545
|
+
/* ignore */
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
// TP-174/TP-501/TP-196: Compute segment scope mode BEFORE building prompt.
|
|
549
|
+
// `segmentScopeMode` is the authoritative TP-196 flag; `isSegmentScoped` is
|
|
550
|
+
// preserved as a boolean alias for ergonomics at the many existing call sites.
|
|
551
|
+
const segmentScopeMode: SegmentScopeMode = computeSegmentScopeMode(
|
|
552
|
+
stepSegmentMap,
|
|
553
|
+
repoStepNumbers,
|
|
554
|
+
currentRepoId,
|
|
555
|
+
remainingSteps.length > 0 ? remainingSteps[0].number : null,
|
|
556
|
+
);
|
|
557
|
+
const isSegmentScoped = segmentScopeMode === "SEGMENT_SCOPED";
|
|
558
|
+
|
|
559
|
+
const promptLines = [
|
|
560
|
+
`Read your task instructions at: ${promptPath}`,
|
|
561
|
+
`Read your execution state at: ${statusPath}`,
|
|
562
|
+
``,
|
|
563
|
+
`Task: ${taskId}`,
|
|
564
|
+
`Task folder: ${taskFolder}/`,
|
|
565
|
+
`Iteration: ${totalIterations}`,
|
|
566
|
+
`Wrap-up signal file: ${wrapUpFile}`,
|
|
567
|
+
``,
|
|
568
|
+
`Execution repo context:`,
|
|
569
|
+
`- Execution repo ID: ${unit.executionRepoId}`,
|
|
570
|
+
`- Execution worktree (worker cwd): ${unit.worktreePath}`,
|
|
571
|
+
`- Lane repo ID: ${config.repoId}`,
|
|
572
|
+
// Only show segment ID when segment-scoped. For FULL_TASK, omit to avoid
|
|
573
|
+
// workers incorrectly self-scoping based on segment metadata.
|
|
574
|
+
...(isSegmentScoped ? [`- Active segment ID: ${segmentId}`] : []),
|
|
575
|
+
``,
|
|
576
|
+
`Packet home context:`,
|
|
577
|
+
`- Packet home repo ID: ${unit.packetHomeRepoId}`,
|
|
578
|
+
`- Packet task folder: ${taskFolder}`,
|
|
579
|
+
`- Packet PROMPT path: ${promptPath}`,
|
|
580
|
+
`- Packet STATUS path: ${statusPath}`,
|
|
581
|
+
`- Packet .DONE path: ${donePath}`,
|
|
582
|
+
`- Packet .reviews path: ${unit.packet.reviewsDir}`,
|
|
583
|
+
``,
|
|
584
|
+
`⚠️ ORCHESTRATED RUN: Do NOT archive or move the task folder. The orchestrator handles post-merge archival.`,
|
|
585
|
+
``,
|
|
586
|
+
`⚠️ CHECKPOINT RULE: After completing EACH checkbox item, immediately edit STATUS.md to check it off (- [ ] → - [x]) BEFORE starting the next item. Do NOT batch checkbox updates at the end of a step.`,
|
|
587
|
+
];
|
|
588
|
+
|
|
589
|
+
// Only show segment DAG in segment-scoped mode
|
|
590
|
+
const segmentDag = isSegmentScoped ? unit.task.explicitSegmentDag : null;
|
|
591
|
+
if (segmentDag && segmentDag.repoIds.length > 0) {
|
|
592
|
+
const edgeSummary =
|
|
593
|
+
segmentDag.edges.length > 0
|
|
594
|
+
? segmentDag.edges.map((edge) => `${edge.fromRepoId}->${edge.toRepoId}`).join(", ")
|
|
595
|
+
: "(no explicit edges)";
|
|
596
|
+
promptLines.push(
|
|
597
|
+
``,
|
|
598
|
+
`Segment DAG context (from PROMPT metadata):`,
|
|
599
|
+
`- Repos: ${segmentDag.repoIds.join(", ")}`,
|
|
600
|
+
`- Edges: ${edgeSummary}`,
|
|
601
|
+
);
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// Segment scope mode is determined by which system prompt was loaded.
|
|
605
|
+
// No SegmentScopeMode line needed — the prompt IS the mode.
|
|
606
|
+
|
|
607
|
+
// TP-174/TP-196: Segment-scoped prompt — show only this segment's checkboxes.
|
|
608
|
+
// Gated on the authoritative `isSegmentScoped` (derived from `segmentScopeMode`)
|
|
609
|
+
// rather than the raw composite condition, so the prompt branch can't drift
|
|
610
|
+
// from the mode decision (TP-196 / #502).
|
|
611
|
+
if (isSegmentScoped) {
|
|
612
|
+
const currentStepNum = remainingSteps[0].number;
|
|
613
|
+
// Defensive guards: when `isSegmentScoped === true`, `computeSegmentScopeMode`
|
|
614
|
+
// has already verified `stepSegmentMap`, `currentRepoId`, and that the
|
|
615
|
+
// current step's mapping contains an entry for the active repo. We re-fetch
|
|
616
|
+
// the structures here for clarity. If any are missing we log and skip the
|
|
617
|
+
// segment block (defense-in-depth — should never trip in practice).
|
|
618
|
+
const currentStepMapping = stepSegmentMap?.find((s) => s.stepNumber === currentStepNum);
|
|
619
|
+
const mySegment = currentStepMapping?.segments.find((seg) => seg.repoId === currentRepoId);
|
|
620
|
+
|
|
621
|
+
if (!currentStepMapping || !mySegment) {
|
|
622
|
+
logExecution(
|
|
623
|
+
statusPath,
|
|
624
|
+
"WARN",
|
|
625
|
+
`segmentScopeMode === SEGMENT_SCOPED but current step mapping missing — skipping segment prompt block (currentRepoId=${currentRepoId}, stepNum=${currentStepNum})`,
|
|
626
|
+
);
|
|
627
|
+
} else {
|
|
628
|
+
const otherSegments = currentStepMapping.segments.filter((seg) => seg.repoId !== currentRepoId);
|
|
629
|
+
|
|
630
|
+
// Count total segments for this repo across all steps
|
|
631
|
+
const totalStepsForRepo = repoStepNumbers ? repoStepNumbers.size : 0;
|
|
632
|
+
const segmentIndexInStep =
|
|
633
|
+
currentStepMapping.segments.findIndex((seg) => seg.repoId === currentRepoId) + 1;
|
|
634
|
+
const totalSegmentsInStep = currentStepMapping.segments.length;
|
|
635
|
+
|
|
636
|
+
promptLines.push(
|
|
637
|
+
``,
|
|
638
|
+
`Segment-scoped context (Phase A):`,
|
|
639
|
+
`Active segment: ${segmentId} (Step ${currentStepNum}, segment ${segmentIndexInStep} of ${totalSegmentsInStep})`,
|
|
640
|
+
`Your repo: ${currentRepoId}`,
|
|
641
|
+
``,
|
|
642
|
+
);
|
|
643
|
+
|
|
644
|
+
if (mySegment && mySegment.checkboxes.length > 0) {
|
|
645
|
+
promptLines.push(`Your checkboxes for this step:`);
|
|
646
|
+
for (const cb of mySegment.checkboxes) {
|
|
647
|
+
promptLines.push(` ${cb}`);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
if (otherSegments.length > 0) {
|
|
652
|
+
promptLines.push(``);
|
|
653
|
+
promptLines.push(`Other segments in this step (NOT yours — do not attempt):`);
|
|
654
|
+
for (const seg of otherSegments) {
|
|
655
|
+
promptLines.push(
|
|
656
|
+
` - ${seg.repoId}: ${seg.checkboxes.length} checkbox(es) (will run in a separate segment)`,
|
|
657
|
+
);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// List completed steps for this repo
|
|
662
|
+
const completedForRepo = parsed.steps.filter((step) => {
|
|
663
|
+
if (!repoStepNumbers || !repoStepNumbers.has(step.number)) return false;
|
|
664
|
+
const ss = currentStatus.steps.find((s) => s.number === step.number);
|
|
665
|
+
return isStepComplete(ss);
|
|
666
|
+
});
|
|
667
|
+
if (completedForRepo.length > 0) {
|
|
668
|
+
promptLines.push(``);
|
|
669
|
+
promptLines.push(
|
|
670
|
+
`Prior steps completed: ${completedForRepo.map((s) => `Step ${s.number} (${s.name})`).join(", ")}`,
|
|
671
|
+
);
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
promptLines.push(
|
|
675
|
+
``,
|
|
676
|
+
`When all YOUR checkboxes are checked, your segment is done — exit successfully.`,
|
|
677
|
+
`Do NOT attempt work in other repos.`,
|
|
678
|
+
);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
if (totalIterations > 1 && remainingSteps.length > 0) {
|
|
683
|
+
const remainingSet = new Set(remainingSteps.map((s) => s.number));
|
|
684
|
+
const completedSteps = parsed.steps.filter((s) => !remainingSet.has(s.number));
|
|
685
|
+
promptLines.push(
|
|
686
|
+
``,
|
|
687
|
+
`IMPORTANT: You exited previously without completing all steps.`,
|
|
688
|
+
`Completed (do not redo): ${completedSteps.map((s) => `Step ${s.number}: ${s.name}`).join(", ") || "(none)"}`,
|
|
689
|
+
`Remaining (focus here): ${remainingSteps.map((s) => `Step ${s.number}: ${s.name}`).join(", ")}`,
|
|
690
|
+
);
|
|
691
|
+
|
|
692
|
+
// If the worker exited without checking any boxes, add a corrective directive
|
|
693
|
+
if (noProgressCount > 0) {
|
|
694
|
+
promptLines.push(
|
|
695
|
+
``,
|
|
696
|
+
`🚨 CRITICAL: You have exited ${noProgressCount} time(s) without completing work.`,
|
|
697
|
+
`Your previous exit was premature. You said something like "Now let me fix this"`,
|
|
698
|
+
`and then STOPPED instead of actually making the edit.`,
|
|
699
|
+
``,
|
|
700
|
+
`DO NOT DO THIS AGAIN. When you know what to edit, call the edit tool IMMEDIATELY.`,
|
|
701
|
+
`Do not produce a text message describing what you plan to do. Just do it.`,
|
|
702
|
+
`Work continuously through ALL remaining checkboxes until the task is DONE.`,
|
|
703
|
+
`Do not exit between checkboxes or steps.`,
|
|
704
|
+
);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
// ── Spawn worker ────────────────────────────────────────────
|
|
709
|
+
const eventsPath = runtimeAgentEventsPath(config.stateRoot, config.batchId, workerAgentId);
|
|
710
|
+
|
|
711
|
+
const mailboxDir = join(config.stateRoot, ".pi", "mailbox", config.batchId, workerAgentId);
|
|
712
|
+
mkdirSync(join(mailboxDir, "inbox"), { recursive: true });
|
|
713
|
+
|
|
714
|
+
const steeringPendingPath = join(taskFolder, ".steering-pending");
|
|
715
|
+
|
|
716
|
+
// TP-106: Bridge extension wiring for agent-side reply/escalate tools
|
|
717
|
+
const outboxDir = join(
|
|
718
|
+
config.stateRoot,
|
|
719
|
+
".pi",
|
|
720
|
+
"mailbox",
|
|
721
|
+
config.batchId,
|
|
722
|
+
workerAgentId,
|
|
723
|
+
"outbox",
|
|
724
|
+
);
|
|
725
|
+
const bridgeExtensionPath = join(LANE_RUNNER_DIR, "agent-bridge-extension.ts");
|
|
726
|
+
|
|
727
|
+
// TP-180: Forward user-installed extensions to worker agent
|
|
728
|
+
const allPackages = loadPiSettingsPackages(config.stateRoot);
|
|
729
|
+
const workerPackages = filterExcludedExtensions(
|
|
730
|
+
allPackages,
|
|
731
|
+
config.workerExcludeExtensions ?? [],
|
|
732
|
+
);
|
|
733
|
+
|
|
734
|
+
const hostOpts: AgentHostOptions = {
|
|
735
|
+
agentId: workerAgentId,
|
|
736
|
+
role: "worker",
|
|
737
|
+
batchId: config.batchId,
|
|
738
|
+
laneNumber: config.laneNumber,
|
|
739
|
+
taskId,
|
|
740
|
+
repoId: config.repoId,
|
|
741
|
+
cwd: unit.worktreePath,
|
|
742
|
+
prompt: promptLines.join("\n"),
|
|
743
|
+
systemPrompt:
|
|
744
|
+
(isSegmentScoped && config.workerSegmentPrompt
|
|
745
|
+
? config.workerSystemPrompt + "\n\n---\n\n" + config.workerSegmentPrompt
|
|
746
|
+
: config.workerSystemPrompt) || undefined,
|
|
747
|
+
model: config.workerModel || undefined,
|
|
748
|
+
// TP-184: buildWorkerToolsAllowlist always appends ENGINE_BRIDGE_TOOLS
|
|
749
|
+
// (review_step, notify_supervisor, request_segment_expansion) so that
|
|
750
|
+
// engine-internal coordination tools are present regardless of what the
|
|
751
|
+
// user configured for taskRunner.worker.tools. See issue #530.
|
|
752
|
+
tools: buildWorkerToolsAllowlist(config.workerTools),
|
|
753
|
+
thinking: config.workerThinking || undefined,
|
|
754
|
+
mailboxDir,
|
|
755
|
+
steeringPendingPath,
|
|
756
|
+
eventsPath,
|
|
757
|
+
exitSummaryPath: eventsPath.replace(/\.jsonl$/, "-exit.json"),
|
|
758
|
+
timeoutMs: config.maxWorkerMinutes * 60_000,
|
|
759
|
+
stateRoot: config.stateRoot,
|
|
760
|
+
packet: unit.packet,
|
|
761
|
+
extensions: [bridgeExtensionPath, ...workerPackages],
|
|
762
|
+
env: {
|
|
763
|
+
TASKPLANE_OUTBOX_DIR: outboxDir,
|
|
764
|
+
TASKPLANE_AGENT_ID: workerAgentId,
|
|
765
|
+
TASKPLANE_TASK_FOLDER: taskFolder,
|
|
766
|
+
TASKPLANE_STATUS_PATH: statusPath,
|
|
767
|
+
TASKPLANE_PROMPT_PATH: promptPath,
|
|
768
|
+
TASKPLANE_REVIEWS_DIR: unit.packet.reviewsDir,
|
|
769
|
+
TASKPLANE_REVIEWER_STATE_PATH: reviewerStatePath,
|
|
770
|
+
TASKPLANE_PROJECT_NAME: config.projectName || "project",
|
|
771
|
+
TASKPLANE_TASK_ID: taskId,
|
|
772
|
+
// Hard-set segment env vars based on mode. In FULL_TASK mode,
|
|
773
|
+
// explicitly clear them to prevent env inheritance leaking segment cues.
|
|
774
|
+
TASKPLANE_ACTIVE_SEGMENT_ID: isSegmentScoped ? (segmentId ?? "") : "",
|
|
775
|
+
TASKPLANE_SEGMENT_ID: isSegmentScoped ? (segmentId ?? "") : "",
|
|
776
|
+
TASKPLANE_SUPERVISOR_AUTONOMY: config.supervisorAutonomy || "autonomous",
|
|
777
|
+
ORCH_BATCH_ID: config.batchId,
|
|
778
|
+
...(config.reviewerModel ? { TASKPLANE_REVIEWER_MODEL: config.reviewerModel } : {}),
|
|
779
|
+
...(config.reviewerThinking ? { TASKPLANE_REVIEWER_THINKING: config.reviewerThinking } : {}),
|
|
780
|
+
...(config.reviewerTools ? { TASKPLANE_REVIEWER_TOOLS: config.reviewerTools } : {}),
|
|
781
|
+
// TP-180: Pass state root and reviewer exclusions for extension forwarding
|
|
782
|
+
TASKPLANE_STATE_ROOT: config.stateRoot,
|
|
783
|
+
...(config.reviewerExcludeExtensions && config.reviewerExcludeExtensions.length > 0
|
|
784
|
+
? { TASKPLANE_REVIEWER_EXCLUDE_EXTENSIONS: JSON.stringify(config.reviewerExcludeExtensions) }
|
|
785
|
+
: {}),
|
|
786
|
+
},
|
|
787
|
+
// TP-172: Exit interception callback — escalate to supervisor when worker
|
|
788
|
+
// exits without making visible progress (no checkboxes, no blocker logged).
|
|
789
|
+
onPrematureExit: config.onSupervisorAlert
|
|
790
|
+
? async (assistantMessage: string): Promise<string | null> => {
|
|
791
|
+
// Check if the worker made visible progress during this turn:
|
|
792
|
+
// 1. Checkbox progress (more items checked)
|
|
793
|
+
// 2. Blocker logged (non-empty Blockers section)
|
|
794
|
+
try {
|
|
795
|
+
const statusContent = readFileSync(statusPath, "utf-8");
|
|
796
|
+
// TP-174: Use same scope as prevTotalChecked (segment or global)
|
|
797
|
+
let midTotalChecked: number;
|
|
798
|
+
if (repoStepNumbers && currentRepoId) {
|
|
799
|
+
const segCbs = getSegmentCheckboxes(statusContent, firstStep.number, currentRepoId);
|
|
800
|
+
midTotalChecked = segCbs ? segCbs.checked : 0;
|
|
801
|
+
} else {
|
|
802
|
+
const midStatus = parseStatusMd(statusContent);
|
|
803
|
+
midTotalChecked = midStatus.steps.reduce((sum, s) => sum + s.totalChecked, 0);
|
|
804
|
+
}
|
|
805
|
+
if (midTotalChecked > prevTotalChecked) {
|
|
806
|
+
// Worker checked off checkboxes — let it exit normally
|
|
807
|
+
return null;
|
|
808
|
+
}
|
|
809
|
+
// Check for blocker entries: extract Blockers section and see if non-empty
|
|
810
|
+
const blockerMatch = statusContent.match(/## Blockers\s*\n([\s\S]*?)(?:\n---|-$)/i);
|
|
811
|
+
if (blockerMatch) {
|
|
812
|
+
const blockerContent = blockerMatch[1].trim();
|
|
813
|
+
// If blockers section has real content (not just "*None*" or empty)
|
|
814
|
+
if (blockerContent && blockerContent !== "*None*") {
|
|
815
|
+
// Worker logged a blocker — let it exit normally
|
|
816
|
+
return null;
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
} catch {
|
|
820
|
+
/* If we can't read STATUS.md, proceed with escalation */
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// No visible progress — compose escalation message.
|
|
824
|
+
// TP-187 (#540): when the worker exits silently, fall back to the most
|
|
825
|
+
// recent `assistant_message` event in events.jsonl so the supervisor
|
|
826
|
+
// has SOMETHING to act on instead of `Worker said: ""`.
|
|
827
|
+
let workerSaid = (assistantMessage ?? "").trim();
|
|
828
|
+
let workerSaidSource: "current-turn" | "events-jsonl-fallback" | "empty-sentinel" =
|
|
829
|
+
"current-turn";
|
|
830
|
+
if (!workerSaid) {
|
|
831
|
+
workerSaidSource = "empty-sentinel";
|
|
832
|
+
try {
|
|
833
|
+
const raw = readFileSync(eventsPath, "utf-8");
|
|
834
|
+
const lines = raw.split("\n");
|
|
835
|
+
// Walk backward to find the most recent assistant_message with non-empty text.
|
|
836
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
837
|
+
const line = lines[i].trim();
|
|
838
|
+
if (!line) continue;
|
|
839
|
+
try {
|
|
840
|
+
const evt = JSON.parse(line) as Record<string, unknown>;
|
|
841
|
+
if (evt.type === "assistant_message") {
|
|
842
|
+
const payload = evt.payload as Record<string, unknown> | undefined;
|
|
843
|
+
const text = typeof payload?.text === "string" ? payload.text.trim() : "";
|
|
844
|
+
if (text) {
|
|
845
|
+
workerSaid = text;
|
|
846
|
+
workerSaidSource = "events-jsonl-fallback";
|
|
847
|
+
break;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
} catch {
|
|
851
|
+
/* skip malformed line */
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
} catch {
|
|
855
|
+
/* events.jsonl unreadable; sentinel will be used */
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
if (!workerSaid) {
|
|
859
|
+
workerSaid =
|
|
860
|
+
"(no assistant message captured — worker exited without producing visible output)";
|
|
861
|
+
workerSaidSource = "empty-sentinel";
|
|
862
|
+
}
|
|
863
|
+
const truncatedMsg = workerSaid.slice(0, 500);
|
|
864
|
+
const uncheckedItems: string[] = [];
|
|
865
|
+
try {
|
|
866
|
+
const statusContent = readFileSync(statusPath, "utf-8");
|
|
867
|
+
// TP-174: When segment-scoped, report only this segment's unchecked items
|
|
868
|
+
if (repoStepNumbers && currentRepoId) {
|
|
869
|
+
const segCbs = getSegmentCheckboxes(statusContent, firstStep.number, currentRepoId);
|
|
870
|
+
if (segCbs) {
|
|
871
|
+
for (const text of segCbs.uncheckedTexts.slice(0, 5)) {
|
|
872
|
+
uncheckedItems.push(text);
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
} else {
|
|
876
|
+
const uncheckedMatches = statusContent.match(/^- \[ \] .+$/gm);
|
|
877
|
+
if (uncheckedMatches) {
|
|
878
|
+
for (const item of uncheckedMatches.slice(0, 5)) {
|
|
879
|
+
uncheckedItems.push(item.replace(/^- \[ \] /, "").trim());
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
} catch {
|
|
884
|
+
/* best effort */
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
const currentStepInfo =
|
|
888
|
+
remainingSteps.length > 0
|
|
889
|
+
? `Step ${remainingSteps[0].number}: ${remainingSteps[0].name}`
|
|
890
|
+
: "Unknown";
|
|
891
|
+
|
|
892
|
+
// Fire supervisor alert
|
|
893
|
+
try {
|
|
894
|
+
config.onSupervisorAlert!({
|
|
895
|
+
category: "worker-exit-intercept",
|
|
896
|
+
summary:
|
|
897
|
+
`🔄 Worker on lane ${config.laneNumber} wants to exit with no progress.\n` +
|
|
898
|
+
` Task: ${taskId}\n` +
|
|
899
|
+
` Current step: ${currentStepInfo}\n` +
|
|
900
|
+
` Iteration: ${totalIterations}, No-progress count: ${noProgressCount + 1}\n` +
|
|
901
|
+
` Unchecked items: ${uncheckedItems.length > 0 ? uncheckedItems.join("; ") : "(none found)"}\n` +
|
|
902
|
+
` Worker said: "${truncatedMsg}"` +
|
|
903
|
+
(workerSaidSource === "events-jsonl-fallback"
|
|
904
|
+
? ` (fallback: most-recent assistant_message from events.jsonl)\n`
|
|
905
|
+
: workerSaidSource === "empty-sentinel"
|
|
906
|
+
? ` (no assistant message captured this iteration)\n`
|
|
907
|
+
: "\n") +
|
|
908
|
+
`\nSend a steering message to ${workerAgentId} with targeted instructions,` +
|
|
909
|
+
` or reply "skip" / "let it fail" to close the session.`,
|
|
910
|
+
context: {
|
|
911
|
+
taskId,
|
|
912
|
+
laneId: `lane-${config.laneNumber}`,
|
|
913
|
+
laneNumber: config.laneNumber,
|
|
914
|
+
agentId: workerAgentId,
|
|
915
|
+
exitReason: `worker_exit_no_progress: ${truncatedMsg.slice(0, 200)}`,
|
|
916
|
+
},
|
|
917
|
+
});
|
|
918
|
+
} catch {
|
|
919
|
+
/* best effort — don't block on alert failure */
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
// Poll worker mailbox inbox for supervisor reply (60s timeout)
|
|
923
|
+
const SUPERVISOR_REPLY_TIMEOUT_MS = 60_000;
|
|
924
|
+
const POLL_INTERVAL_MS = 2_000;
|
|
925
|
+
const escalationTimestamp = Date.now();
|
|
926
|
+
const inboxDir = sessionInboxDir(config.stateRoot, config.batchId, workerAgentId);
|
|
927
|
+
|
|
928
|
+
const supervisorReply = await new Promise<string | null>((resolve) => {
|
|
929
|
+
const deadline = Date.now() + SUPERVISOR_REPLY_TIMEOUT_MS;
|
|
930
|
+
const poll = () => {
|
|
931
|
+
if (Date.now() >= deadline) {
|
|
932
|
+
resolve(null); // Timeout — fall back to corrective re-spawn
|
|
933
|
+
return;
|
|
934
|
+
}
|
|
935
|
+
try {
|
|
936
|
+
const messages = readInbox(inboxDir, config.batchId);
|
|
937
|
+
// Only accept messages newer than escalation timestamp
|
|
938
|
+
for (const { filename, message } of messages) {
|
|
939
|
+
if (message.timestamp >= escalationTimestamp && message.from === "supervisor") {
|
|
940
|
+
// Consume the message
|
|
941
|
+
const ackDir = join(dirname(inboxDir), "ack");
|
|
942
|
+
try {
|
|
943
|
+
ackMessage(inboxDir, filename);
|
|
944
|
+
} catch {
|
|
945
|
+
/* best effort */
|
|
946
|
+
}
|
|
947
|
+
resolve(message.content);
|
|
948
|
+
return;
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
} catch {
|
|
952
|
+
/* inbox not ready yet */
|
|
953
|
+
}
|
|
954
|
+
setTimeout(poll, POLL_INTERVAL_MS);
|
|
955
|
+
};
|
|
956
|
+
poll();
|
|
957
|
+
});
|
|
958
|
+
|
|
959
|
+
if (!supervisorReply) {
|
|
960
|
+
// Timeout — let the session close, corrective re-spawn will handle it
|
|
961
|
+
logExecution(
|
|
962
|
+
statusPath,
|
|
963
|
+
"Exit intercept timeout",
|
|
964
|
+
`Supervisor did not respond within ${SUPERVISOR_REPLY_TIMEOUT_MS / 1000}s — closing session`,
|
|
965
|
+
);
|
|
966
|
+
return null;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// Interpret supervisor reply: close directives vs instructional content
|
|
970
|
+
const normalizedReply = supervisorReply.trim().toLowerCase();
|
|
971
|
+
const CLOSE_DIRECTIVES = ["skip", "let it fail", "close", "abort", "stop"];
|
|
972
|
+
// Only short messages (< 30 chars) can be close directives.
|
|
973
|
+
// Longer messages are always instructions even if they start with "stop".
|
|
974
|
+
const isShortEnoughForDirective = normalizedReply.length < 30;
|
|
975
|
+
if (
|
|
976
|
+
isShortEnoughForDirective &&
|
|
977
|
+
CLOSE_DIRECTIVES.some(
|
|
978
|
+
(d) =>
|
|
979
|
+
normalizedReply === d ||
|
|
980
|
+
normalizedReply.startsWith(d + ":") ||
|
|
981
|
+
normalizedReply.startsWith(d + " ") ||
|
|
982
|
+
normalizedReply.startsWith(d + ".") ||
|
|
983
|
+
normalizedReply.startsWith(d + " -"),
|
|
984
|
+
)
|
|
985
|
+
) {
|
|
986
|
+
logExecution(
|
|
987
|
+
statusPath,
|
|
988
|
+
"Exit intercept close",
|
|
989
|
+
`Supervisor directed session close: "${supervisorReply.slice(0, 100)}"`,
|
|
990
|
+
);
|
|
991
|
+
return null;
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
// Instructional reply — return as new prompt for the worker
|
|
995
|
+
logExecution(
|
|
996
|
+
statusPath,
|
|
997
|
+
"Exit intercept reprompt",
|
|
998
|
+
`Supervisor provided instructions (${supervisorReply.length} chars) — reprompting worker`,
|
|
999
|
+
);
|
|
1000
|
+
return supervisorReply;
|
|
1001
|
+
}
|
|
1002
|
+
: undefined,
|
|
1003
|
+
};
|
|
1004
|
+
|
|
1005
|
+
// TP-184: Defense-in-depth sanity check. Under normal operation,
|
|
1006
|
+
// `buildWorkerToolsAllowlist()` guarantees ENGINE_BRIDGE_TOOLS are
|
|
1007
|
+
// present in the allowlist. Warn (do NOT throw or block spawn) if any
|
|
1008
|
+
// is missing — this catches future helper bugs or accidental bypasses.
|
|
1009
|
+
// See issue #530 for what silently breaks when bridge tools are missing.
|
|
1010
|
+
const toolsList = (hostOpts.tools ?? "")
|
|
1011
|
+
.split(",")
|
|
1012
|
+
.map((s) => s.trim())
|
|
1013
|
+
.filter(Boolean);
|
|
1014
|
+
for (const bridgeTool of ENGINE_BRIDGE_TOOLS) {
|
|
1015
|
+
if (!toolsList.includes(bridgeTool)) {
|
|
1016
|
+
logExecution(
|
|
1017
|
+
statusPath,
|
|
1018
|
+
"WARN",
|
|
1019
|
+
`workerTools allowlist missing engine bridge tool '${bridgeTool}'; review/coordination features will silently no-op`,
|
|
1020
|
+
);
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
// Context pressure: write wrap-up signal before kill
|
|
1025
|
+
let workerKillReason: "context" | "timer" | null = null;
|
|
1026
|
+
let iterationTelemetry: Partial<AgentHostResult> = {};
|
|
1027
|
+
|
|
1028
|
+
const spawned = spawnAgent(hostOpts, undefined, (telemetry) => {
|
|
1029
|
+
try {
|
|
1030
|
+
// Context pressure check
|
|
1031
|
+
if (telemetry.contextUsage) {
|
|
1032
|
+
const pct = telemetry.contextUsage.percent;
|
|
1033
|
+
if (pct >= config.warnPercent) {
|
|
1034
|
+
const msg = `Wrap up (context ${Math.round(pct)}%)`;
|
|
1035
|
+
if (!existsSync(wrapUpFile)) writeFileSync(wrapUpFile, msg);
|
|
1036
|
+
}
|
|
1037
|
+
if (pct >= config.killPercent) {
|
|
1038
|
+
workerKillReason = "context";
|
|
1039
|
+
spawned.kill();
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
iterationTelemetry = telemetry;
|
|
1044
|
+
lastTelemetry = telemetry;
|
|
1045
|
+
// Emit lane snapshot
|
|
1046
|
+
emitSnapshot(
|
|
1047
|
+
config,
|
|
1048
|
+
taskId,
|
|
1049
|
+
segmentId,
|
|
1050
|
+
"running",
|
|
1051
|
+
telemetry,
|
|
1052
|
+
statusPath,
|
|
1053
|
+
reviewerStatePath,
|
|
1054
|
+
snapshotSegmentCtx,
|
|
1055
|
+
);
|
|
1056
|
+
} catch {
|
|
1057
|
+
/* non-fatal: telemetry callback must never crash the engine */
|
|
1058
|
+
}
|
|
1059
|
+
});
|
|
1060
|
+
|
|
1061
|
+
// Reviewer telemetry is written by the worker bridge during review_step.
|
|
1062
|
+
// Poll snapshot refresh independently from worker message_end cadence so
|
|
1063
|
+
// the dashboard sees reviewer activity while tool calls are in-flight.
|
|
1064
|
+
let reviewerSnapshotFailures = 0;
|
|
1065
|
+
const reviewerRefreshFailureThreshold = 5;
|
|
1066
|
+
const reviewerRefresh = setInterval(() => {
|
|
1067
|
+
const ok = emitSnapshot(
|
|
1068
|
+
config,
|
|
1069
|
+
taskId,
|
|
1070
|
+
segmentId,
|
|
1071
|
+
"running",
|
|
1072
|
+
iterationTelemetry,
|
|
1073
|
+
statusPath,
|
|
1074
|
+
reviewerStatePath,
|
|
1075
|
+
snapshotSegmentCtx,
|
|
1076
|
+
);
|
|
1077
|
+
if (ok) {
|
|
1078
|
+
reviewerSnapshotFailures = 0;
|
|
1079
|
+
return;
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
reviewerSnapshotFailures += 1;
|
|
1083
|
+
if (reviewerSnapshotFailures >= reviewerRefreshFailureThreshold) {
|
|
1084
|
+
clearInterval(reviewerRefresh);
|
|
1085
|
+
logExecution(
|
|
1086
|
+
statusPath,
|
|
1087
|
+
"Snapshot refresh disabled",
|
|
1088
|
+
`Lane ${config.laneNumber}, task ${taskId}: ${reviewerSnapshotFailures} consecutive emitSnapshot failures`,
|
|
1089
|
+
);
|
|
1090
|
+
}
|
|
1091
|
+
}, 1000);
|
|
1092
|
+
|
|
1093
|
+
let workerResult: AgentHostResult;
|
|
1094
|
+
try {
|
|
1095
|
+
workerResult = await spawned.promise;
|
|
1096
|
+
} finally {
|
|
1097
|
+
clearInterval(reviewerRefresh);
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// TP-115: Update lastTelemetry with definitive final values from AgentHostResult
|
|
1101
|
+
lastTelemetry = workerResult;
|
|
1102
|
+
|
|
1103
|
+
// Clean up wrap-up signal
|
|
1104
|
+
if (existsSync(wrapUpFile))
|
|
1105
|
+
try {
|
|
1106
|
+
unlinkSync(wrapUpFile);
|
|
1107
|
+
} catch {
|
|
1108
|
+
/* ignore */
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
// Accumulate costs
|
|
1112
|
+
cumulativeCostUsd += workerResult.costUsd;
|
|
1113
|
+
cumulativeTokens +=
|
|
1114
|
+
workerResult.inputTokens +
|
|
1115
|
+
workerResult.outputTokens +
|
|
1116
|
+
workerResult.cacheReadTokens +
|
|
1117
|
+
workerResult.cacheWriteTokens;
|
|
1118
|
+
|
|
1119
|
+
// ── TP-106: Poll worker outbox for replies/escalations ─────
|
|
1120
|
+
try {
|
|
1121
|
+
const outboxMessages = readOutbox(config.stateRoot, config.batchId, workerAgentId);
|
|
1122
|
+
for (const msg of outboxMessages) {
|
|
1123
|
+
const sanitized = msg.content.replace(/\r?\n/g, " / ").slice(0, 200);
|
|
1124
|
+
logExecution(statusPath, `Agent ${msg.type}`, sanitized);
|
|
1125
|
+
|
|
1126
|
+
if (msg.type === "reply" || msg.type === "escalate") {
|
|
1127
|
+
appendAgentEvent(config.stateRoot, config.batchId, workerAgentId, {
|
|
1128
|
+
batchId: config.batchId,
|
|
1129
|
+
agentId: workerAgentId,
|
|
1130
|
+
role: "worker",
|
|
1131
|
+
laneNumber: config.laneNumber,
|
|
1132
|
+
taskId,
|
|
1133
|
+
repoId: config.repoId,
|
|
1134
|
+
ts: Date.now(),
|
|
1135
|
+
type: msg.type === "reply" ? "reply_sent" : "escalation_sent",
|
|
1136
|
+
payload: {
|
|
1137
|
+
messageId: msg.id,
|
|
1138
|
+
replyTo: msg.replyTo ?? null,
|
|
1139
|
+
content: sanitized,
|
|
1140
|
+
},
|
|
1141
|
+
});
|
|
1142
|
+
|
|
1143
|
+
appendMailboxAuditEvent(config.stateRoot, config.batchId, {
|
|
1144
|
+
type: msg.type === "reply" ? "message_replied" : "message_escalated",
|
|
1145
|
+
from: workerAgentId,
|
|
1146
|
+
to: "supervisor",
|
|
1147
|
+
messageId: msg.id,
|
|
1148
|
+
messageType: msg.type,
|
|
1149
|
+
contentPreview: sanitized,
|
|
1150
|
+
});
|
|
1151
|
+
|
|
1152
|
+
if (config.onSupervisorAlert) {
|
|
1153
|
+
const isEscalation = msg.type === "escalate";
|
|
1154
|
+
try {
|
|
1155
|
+
config.onSupervisorAlert({
|
|
1156
|
+
category: "agent-message",
|
|
1157
|
+
summary:
|
|
1158
|
+
`${isEscalation ? "🚨" : "📨"} Agent ${isEscalation ? "escalation" : "reply"} from ${workerAgentId}\n` +
|
|
1159
|
+
` Task: ${taskId}\n` +
|
|
1160
|
+
` Lane: lane-${config.laneNumber}\n` +
|
|
1161
|
+
` Message: ${sanitized}`,
|
|
1162
|
+
context: {
|
|
1163
|
+
taskId,
|
|
1164
|
+
laneId: `lane-${config.laneNumber}`,
|
|
1165
|
+
laneNumber: config.laneNumber,
|
|
1166
|
+
agentId: workerAgentId,
|
|
1167
|
+
messageId: msg.id,
|
|
1168
|
+
exitReason: `${isEscalation ? "agent_escalation" : "agent_reply"}: ${sanitized}`,
|
|
1169
|
+
},
|
|
1170
|
+
});
|
|
1171
|
+
} catch {
|
|
1172
|
+
/* best effort */
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
// Consume outbox message to prevent duplicate processing in later iterations.
|
|
1178
|
+
ackOutboxMessage(config.stateRoot, config.batchId, workerAgentId, msg.id);
|
|
1179
|
+
}
|
|
1180
|
+
} catch {
|
|
1181
|
+
/* best effort */
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
// ── Steering annotation ─────────────────────────────────────
|
|
1185
|
+
try {
|
|
1186
|
+
if (existsSync(steeringPendingPath)) {
|
|
1187
|
+
const raw = readFileSync(steeringPendingPath, "utf-8");
|
|
1188
|
+
for (const line of raw.split("\n").filter((l) => l.trim())) {
|
|
1189
|
+
try {
|
|
1190
|
+
const entry = JSON.parse(line) as { ts: number; content: string; id: string };
|
|
1191
|
+
const sanitized = entry.content.replace(/\r?\n/g, " / ").replace(/\|/g, "\\|").slice(0, 200);
|
|
1192
|
+
const ts = new Date(entry.ts).toISOString().slice(0, 16).replace("T", " ");
|
|
1193
|
+
logExecution(statusPath, "⚠️ Steering", sanitized);
|
|
1194
|
+
} catch {
|
|
1195
|
+
/* skip malformed */
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
unlinkSync(steeringPendingPath);
|
|
1199
|
+
}
|
|
1200
|
+
} catch {
|
|
1201
|
+
/* non-fatal */
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
// Log iteration result
|
|
1205
|
+
const statusMsg = workerResult.killed
|
|
1206
|
+
? `killed (${workerKillReason === "context" ? "context limit" : "wall-clock timeout"})`
|
|
1207
|
+
: workerResult.exitCode === 0
|
|
1208
|
+
? "done"
|
|
1209
|
+
: `error (code ${workerResult.exitCode})`;
|
|
1210
|
+
logExecution(
|
|
1211
|
+
statusPath,
|
|
1212
|
+
`Worker iter ${totalIterations}`,
|
|
1213
|
+
`${statusMsg} in ${Math.round(workerResult.durationMs / 1000)}s, tools: ${workerResult.toolCalls}`,
|
|
1214
|
+
);
|
|
1215
|
+
|
|
1216
|
+
// ── Check progress ──────────────────────────────────────────
|
|
1217
|
+
const afterStatusContent = readFileSync(statusPath, "utf-8");
|
|
1218
|
+
const afterStatus = parseStatusMd(afterStatusContent);
|
|
1219
|
+
// TP-174: Segment-scoped progress delta
|
|
1220
|
+
let afterTotalChecked: number;
|
|
1221
|
+
if (repoStepNumbers && currentRepoId) {
|
|
1222
|
+
const segCbs = getSegmentCheckboxes(afterStatusContent, firstStep.number, currentRepoId);
|
|
1223
|
+
afterTotalChecked = segCbs ? segCbs.checked : 0;
|
|
1224
|
+
} else {
|
|
1225
|
+
afterTotalChecked = afterStatus.steps.reduce((sum, s) => sum + s.totalChecked, 0);
|
|
1226
|
+
}
|
|
1227
|
+
const progressDelta = afterTotalChecked - prevTotalChecked;
|
|
1228
|
+
|
|
1229
|
+
if (progressDelta <= 0) {
|
|
1230
|
+
// Check for soft progress: uncommitted changes in the worktree
|
|
1231
|
+
// indicate the worker is actively editing code even if no checkbox
|
|
1232
|
+
// was checked yet. This avoids false stall detection on complex
|
|
1233
|
+
// steps where analysis + editing spans multiple tool calls.
|
|
1234
|
+
let hasSoftProgress = false;
|
|
1235
|
+
try {
|
|
1236
|
+
const diffOutput = execSync("git diff --stat HEAD", {
|
|
1237
|
+
cwd: unit.worktreePath,
|
|
1238
|
+
timeout: 5000,
|
|
1239
|
+
encoding: "utf-8",
|
|
1240
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1241
|
+
}).trim();
|
|
1242
|
+
// Only count source file changes as soft progress, not just STATUS.md
|
|
1243
|
+
const changedFiles = diffOutput.split("\n").filter((l) => l.includes("|"));
|
|
1244
|
+
const sourceChanges = changedFiles.filter(
|
|
1245
|
+
(l) => !l.includes("STATUS.md") && !l.includes(".steering"),
|
|
1246
|
+
);
|
|
1247
|
+
hasSoftProgress = sourceChanges.length > 0;
|
|
1248
|
+
} catch {
|
|
1249
|
+
/* git not available or timeout — treat as no soft progress */
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
if (hasSoftProgress) {
|
|
1253
|
+
// Worker has uncommitted code changes — don't count toward stall.
|
|
1254
|
+
// Reset the counter since the worker is actively editing.
|
|
1255
|
+
logExecution(
|
|
1256
|
+
statusPath,
|
|
1257
|
+
"Soft progress",
|
|
1258
|
+
`Iteration ${totalIterations}: 0 new checkboxes but uncommitted source changes detected — not counting as stall`,
|
|
1259
|
+
);
|
|
1260
|
+
noProgressCount = 0;
|
|
1261
|
+
} else {
|
|
1262
|
+
noProgressCount++;
|
|
1263
|
+
logExecution(
|
|
1264
|
+
statusPath,
|
|
1265
|
+
"No progress",
|
|
1266
|
+
`Iteration ${totalIterations}: 0 new checkboxes (${noProgressCount}/${config.noProgressLimit} stall limit)`,
|
|
1267
|
+
);
|
|
1268
|
+
if (noProgressCount >= config.noProgressLimit) {
|
|
1269
|
+
logExecution(statusPath, "Task blocked", `No progress after ${noProgressCount} iterations`);
|
|
1270
|
+
// TP-187 (#538): synchronous outbox drain at lane-termination decision
|
|
1271
|
+
// point. Purges any pending escalations/replies/segment-expansions the
|
|
1272
|
+
// worker emitted just before termination so they are not later re-
|
|
1273
|
+
// discovered and re-forwarded as zombie supervisor alerts.
|
|
1274
|
+
try {
|
|
1275
|
+
const drained = drainAgentOutbox(config.stateRoot, config.batchId, workerAgentId);
|
|
1276
|
+
if (drained > 0) {
|
|
1277
|
+
logExecution(
|
|
1278
|
+
statusPath,
|
|
1279
|
+
"Outbox drained",
|
|
1280
|
+
`No-progress kill: drained ${drained} pending outbox entr${drained === 1 ? "y" : "ies"} for ${workerAgentId}`,
|
|
1281
|
+
);
|
|
1282
|
+
}
|
|
1283
|
+
} catch {
|
|
1284
|
+
/* best effort — do not block termination */
|
|
1285
|
+
}
|
|
1286
|
+
// TP-187 (#538): notify the supervisor process so it can suppress any
|
|
1287
|
+
// further alerts queued for this lane (zombie-alert filter).
|
|
1288
|
+
if (config.onLaneTerminated) {
|
|
1289
|
+
try {
|
|
1290
|
+
config.onLaneTerminated({
|
|
1291
|
+
laneNumber: config.laneNumber,
|
|
1292
|
+
agentId: workerAgentId,
|
|
1293
|
+
batchId: config.batchId,
|
|
1294
|
+
terminatedAt: Date.now(),
|
|
1295
|
+
reason: "no-progress-kill",
|
|
1296
|
+
});
|
|
1297
|
+
} catch {
|
|
1298
|
+
/* best effort */
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
return makeResult(
|
|
1302
|
+
taskId,
|
|
1303
|
+
segmentId,
|
|
1304
|
+
workerAgentId,
|
|
1305
|
+
"failed",
|
|
1306
|
+
startTime,
|
|
1307
|
+
`No progress after ${noProgressCount} iterations`,
|
|
1308
|
+
false,
|
|
1309
|
+
totalIterations,
|
|
1310
|
+
cumulativeCostUsd,
|
|
1311
|
+
cumulativeTokens,
|
|
1312
|
+
config,
|
|
1313
|
+
statusPath,
|
|
1314
|
+
reviewerStatePath,
|
|
1315
|
+
lastTelemetry,
|
|
1316
|
+
snapshotSegmentCtx,
|
|
1317
|
+
);
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
} else {
|
|
1321
|
+
noProgressCount = 0;
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
// Mark completed steps
|
|
1325
|
+
// TP-174: When segment-scoped, mark step complete when the segment's
|
|
1326
|
+
// checkboxes are all checked (not the full step which may have other segments).
|
|
1327
|
+
if (repoStepNumbers && currentRepoId) {
|
|
1328
|
+
for (const stepNum of repoStepNumbers) {
|
|
1329
|
+
if (isSegmentComplete(afterStatusContent, stepNum, currentRepoId)) {
|
|
1330
|
+
// Only mark step complete in STATUS.md if ALL segments in that step
|
|
1331
|
+
// are complete (not just ours). But for loop exit, we only care about ours.
|
|
1332
|
+
const ss = afterStatus.steps.find((s) => s.number === stepNum);
|
|
1333
|
+
if (isStepComplete(ss)) {
|
|
1334
|
+
updateStepStatus(statusPath, stepNum, "complete");
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1338
|
+
} else {
|
|
1339
|
+
for (const step of parsed.steps) {
|
|
1340
|
+
const ss = afterStatus.steps.find((s) => s.number === step.number);
|
|
1341
|
+
if (isStepComplete(ss)) {
|
|
1342
|
+
updateStepStatus(statusPath, step.number, "complete");
|
|
1343
|
+
}
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
// Check if all steps are now complete
|
|
1348
|
+
// TP-174: When segment-scoped, exit when all steps for this repoId
|
|
1349
|
+
// have their segment checkboxes complete.
|
|
1350
|
+
let allComplete: boolean;
|
|
1351
|
+
if (repoStepNumbers && currentRepoId) {
|
|
1352
|
+
allComplete = [...repoStepNumbers].every((stepNum) =>
|
|
1353
|
+
isSegmentComplete(afterStatusContent, stepNum, currentRepoId),
|
|
1354
|
+
);
|
|
1355
|
+
} else {
|
|
1356
|
+
allComplete = parsed.steps.every((step) => {
|
|
1357
|
+
const ss = afterStatus.steps.find((s) => s.number === step.number);
|
|
1358
|
+
return isStepComplete(ss);
|
|
1359
|
+
});
|
|
1360
|
+
}
|
|
1361
|
+
if (allComplete) break;
|
|
1362
|
+
}
|
|
1363
|
+
|
|
1364
|
+
// ── 3. Post-loop completion check ───────────────────────────────
|
|
1365
|
+
const finalStatusContent = readFileSync(statusPath, "utf-8");
|
|
1366
|
+
const finalStatus = parseStatusMd(finalStatusContent);
|
|
1367
|
+
const parsed = parsePromptMd(readFileSync(promptPath, "utf-8"), promptPath);
|
|
1368
|
+
|
|
1369
|
+
// TP-174: Segment-scoped post-loop check. Re-derive repo scoping since
|
|
1370
|
+
// the iteration loop variables are out of scope here.
|
|
1371
|
+
const postLoopRepoId = segmentId ? config.repoId : null;
|
|
1372
|
+
const postLoopStepSegMap = unit.task.stepSegmentMap;
|
|
1373
|
+
const postLoopRepoSteps =
|
|
1374
|
+
postLoopStepSegMap && postLoopRepoId
|
|
1375
|
+
? getStepsForRepoId(postLoopStepSegMap, postLoopRepoId)
|
|
1376
|
+
: null;
|
|
1377
|
+
const effectivePostLoopRepoSteps =
|
|
1378
|
+
postLoopRepoSteps && postLoopRepoSteps.size > 0 ? postLoopRepoSteps : null;
|
|
1379
|
+
|
|
1380
|
+
let allStepsComplete: boolean;
|
|
1381
|
+
if (effectivePostLoopRepoSteps && postLoopRepoId) {
|
|
1382
|
+
allStepsComplete = [...effectivePostLoopRepoSteps].every((stepNum) =>
|
|
1383
|
+
isSegmentComplete(finalStatusContent, stepNum, postLoopRepoId),
|
|
1384
|
+
);
|
|
1385
|
+
} else {
|
|
1386
|
+
allStepsComplete = parsed.steps.every((step) => {
|
|
1387
|
+
const ss = finalStatus.steps.find((s) => s.number === step.number);
|
|
1388
|
+
return isStepComplete(ss);
|
|
1389
|
+
});
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1392
|
+
if (!allStepsComplete) {
|
|
1393
|
+
let incomplete: string;
|
|
1394
|
+
if (effectivePostLoopRepoSteps && postLoopRepoId) {
|
|
1395
|
+
incomplete = [...effectivePostLoopRepoSteps]
|
|
1396
|
+
.filter((stepNum) => !isSegmentComplete(finalStatusContent, stepNum, postLoopRepoId))
|
|
1397
|
+
.map((n) => `Step ${n}`)
|
|
1398
|
+
.join(", ");
|
|
1399
|
+
} else {
|
|
1400
|
+
incomplete = parsed.steps
|
|
1401
|
+
.filter((step) => {
|
|
1402
|
+
const ss = finalStatus.steps.find((s) => s.number === step.number);
|
|
1403
|
+
return !isStepComplete(ss);
|
|
1404
|
+
})
|
|
1405
|
+
.map((s) => `Step ${s.number}`)
|
|
1406
|
+
.join(", ");
|
|
1407
|
+
}
|
|
1408
|
+
logExecution(statusPath, "Task incomplete", `Max iterations reached. Incomplete: ${incomplete}`);
|
|
1409
|
+
return makeResult(
|
|
1410
|
+
taskId,
|
|
1411
|
+
segmentId,
|
|
1412
|
+
workerAgentId,
|
|
1413
|
+
"failed",
|
|
1414
|
+
startTime,
|
|
1415
|
+
`Max iterations (${config.maxIterations}) reached with incomplete steps: ${incomplete}`,
|
|
1416
|
+
false,
|
|
1417
|
+
totalIterations,
|
|
1418
|
+
cumulativeCostUsd,
|
|
1419
|
+
cumulativeTokens,
|
|
1420
|
+
config,
|
|
1421
|
+
statusPath,
|
|
1422
|
+
reviewerStatePath,
|
|
1423
|
+
lastTelemetry,
|
|
1424
|
+
snapshotSegmentCtx,
|
|
1425
|
+
);
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
// TP-145: Determine if this is a non-final segment of a multi-segment task.
|
|
1429
|
+
// If more segments remain after this one, suppress .DONE creation so that
|
|
1430
|
+
// the engine can advance the segment frontier and execute subsequent segments.
|
|
1431
|
+
// .DONE must only exist when ALL segments of a multi-segment task are complete.
|
|
1432
|
+
const isNonFinalSegment =
|
|
1433
|
+
segmentId != null &&
|
|
1434
|
+
Array.isArray(unit.task.segmentIds) &&
|
|
1435
|
+
unit.task.segmentIds.length > 1 &&
|
|
1436
|
+
unit.task.segmentIds[unit.task.segmentIds.length - 1] !== segmentId;
|
|
1437
|
+
|
|
1438
|
+
// TP-165: Check for pending expansion requests in the worker's outbox.
|
|
1439
|
+
// If the worker filed expansion requests, more segments may be added by the
|
|
1440
|
+
// engine at the segment boundary — .DONE must not be created even if this
|
|
1441
|
+
// appears to be the final segment based on the static segmentIds list.
|
|
1442
|
+
const hasPendingExpansionRequests =
|
|
1443
|
+
segmentId != null &&
|
|
1444
|
+
hasPendingExpansionRequestFiles(config.stateRoot, config.batchId, workerAgentId);
|
|
1445
|
+
|
|
1446
|
+
if (isNonFinalSegment || hasPendingExpansionRequests) {
|
|
1447
|
+
// Segment succeeded but more segments remain — suppress .DONE and "✅ Complete" status.
|
|
1448
|
+
// The engine will advance the frontier and dispatch the next segment.
|
|
1449
|
+
// Also delete any .DONE the worker may have created directly (workers have
|
|
1450
|
+
// write access and sometimes create .DONE on their own, bypassing this gate).
|
|
1451
|
+
if (existsSync(donePath)) {
|
|
1452
|
+
let deleted = false;
|
|
1453
|
+
try {
|
|
1454
|
+
unlinkSync(donePath);
|
|
1455
|
+
deleted = true;
|
|
1456
|
+
} catch {
|
|
1457
|
+
/* best effort */
|
|
1458
|
+
}
|
|
1459
|
+
if (deleted) {
|
|
1460
|
+
logExecution(
|
|
1461
|
+
statusPath,
|
|
1462
|
+
"Segment complete",
|
|
1463
|
+
`Segment ${segmentId} succeeded (non-final — removed premature worker-created .DONE)`,
|
|
1464
|
+
);
|
|
1465
|
+
} else {
|
|
1466
|
+
logExecution(
|
|
1467
|
+
statusPath,
|
|
1468
|
+
"Segment complete",
|
|
1469
|
+
`⚠️ Segment ${segmentId} succeeded but FAILED to remove premature .DONE — downstream segments may be skipped`,
|
|
1470
|
+
);
|
|
1471
|
+
}
|
|
1472
|
+
} else {
|
|
1473
|
+
logExecution(
|
|
1474
|
+
statusPath,
|
|
1475
|
+
"Segment complete",
|
|
1476
|
+
`Segment ${segmentId} succeeded (not final — .DONE suppressed)`,
|
|
1477
|
+
);
|
|
1478
|
+
}
|
|
1479
|
+
const suppressionReason = isNonFinalSegment ? "non-final" : "pending expansion requests";
|
|
1480
|
+
return makeResult(
|
|
1481
|
+
taskId,
|
|
1482
|
+
segmentId,
|
|
1483
|
+
workerAgentId,
|
|
1484
|
+
"succeeded",
|
|
1485
|
+
startTime,
|
|
1486
|
+
`Segment completed (${suppressionReason} — .DONE suppressed)`,
|
|
1487
|
+
false,
|
|
1488
|
+
totalIterations,
|
|
1489
|
+
cumulativeCostUsd,
|
|
1490
|
+
cumulativeTokens,
|
|
1491
|
+
config,
|
|
1492
|
+
statusPath,
|
|
1493
|
+
reviewerStatePath,
|
|
1494
|
+
lastTelemetry,
|
|
1495
|
+
snapshotSegmentCtx,
|
|
1496
|
+
);
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
// Create .DONE if not already present (final segment or single-segment/whole-task execution)
|
|
1500
|
+
if (!existsSync(donePath)) {
|
|
1501
|
+
writeFileSync(donePath, `Completed: ${new Date().toISOString()}\nTask: ${taskId}\n`);
|
|
1502
|
+
}
|
|
1503
|
+
updateStatusField(statusPath, "Status", "✅ Complete");
|
|
1504
|
+
logExecution(statusPath, "Task complete", ".DONE created");
|
|
1505
|
+
|
|
1506
|
+
return makeResult(
|
|
1507
|
+
taskId,
|
|
1508
|
+
segmentId,
|
|
1509
|
+
workerAgentId,
|
|
1510
|
+
"succeeded",
|
|
1511
|
+
startTime,
|
|
1512
|
+
".DONE file created by lane-runner",
|
|
1513
|
+
true,
|
|
1514
|
+
totalIterations,
|
|
1515
|
+
cumulativeCostUsd,
|
|
1516
|
+
cumulativeTokens,
|
|
1517
|
+
config,
|
|
1518
|
+
statusPath,
|
|
1519
|
+
reviewerStatePath,
|
|
1520
|
+
lastTelemetry,
|
|
1521
|
+
snapshotSegmentCtx,
|
|
1522
|
+
);
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
// ── Helpers ──────────────────────────────────────────────────────────
|
|
1526
|
+
|
|
1527
|
+
/**
|
|
1528
|
+
* TP-165: Check if the worker's outbox contains pending segment expansion requests.
|
|
1529
|
+
*
|
|
1530
|
+
* Pending expansion request files match `segment-expansion-*.json` (not renamed
|
|
1531
|
+
* to `.processed`, `.rejected`, etc.). If any exist, the engine will process them
|
|
1532
|
+
* at the segment boundary — and may add more segments to the task.
|
|
1533
|
+
*
|
|
1534
|
+
* @returns true if at least one pending expansion request file exists
|
|
1535
|
+
*/
|
|
1536
|
+
export function hasPendingExpansionRequestFiles(
|
|
1537
|
+
stateRoot: string,
|
|
1538
|
+
batchId: string,
|
|
1539
|
+
agentId: string,
|
|
1540
|
+
): boolean {
|
|
1541
|
+
const outboxDir = join(stateRoot, ".pi", "mailbox", batchId, agentId, "outbox");
|
|
1542
|
+
if (!existsSync(outboxDir)) return false;
|
|
1543
|
+
try {
|
|
1544
|
+
const entries = readdirSync(outboxDir);
|
|
1545
|
+
return entries.some((entry) => /^segment-expansion-.+\.json$/.test(entry));
|
|
1546
|
+
} catch {
|
|
1547
|
+
return false;
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
export function mapLaneTaskStatusToTerminalSnapshotStatus(
|
|
1552
|
+
status: LaneTaskStatus,
|
|
1553
|
+
): "idle" | "complete" | "failed" {
|
|
1554
|
+
if (status === "succeeded") return "complete";
|
|
1555
|
+
if (status === "skipped") return "idle";
|
|
1556
|
+
return "failed";
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
export function mapLaneSnapshotStatusToWorkerStatus(
|
|
1560
|
+
status: "running" | "idle" | "complete" | "failed",
|
|
1561
|
+
): RuntimeAgentStatus {
|
|
1562
|
+
if (status === "running") return "running";
|
|
1563
|
+
if (status === "complete") return "exited";
|
|
1564
|
+
if (status === "idle") return "wrapping_up";
|
|
1565
|
+
return "crashed";
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
function makeResult(
|
|
1569
|
+
taskId: string,
|
|
1570
|
+
segmentId: string | null,
|
|
1571
|
+
sessionName: string,
|
|
1572
|
+
status: LaneTaskStatus,
|
|
1573
|
+
startTime: number,
|
|
1574
|
+
exitReason: string,
|
|
1575
|
+
doneFileFound: boolean,
|
|
1576
|
+
iterations: number,
|
|
1577
|
+
costUsd: number,
|
|
1578
|
+
totalTokens: number,
|
|
1579
|
+
config?: LaneRunnerConfig,
|
|
1580
|
+
statusPath?: string,
|
|
1581
|
+
reviewerStatePath?: string,
|
|
1582
|
+
finalTelemetry?: Partial<AgentHostResult>,
|
|
1583
|
+
/** TP-174: Segment context for segment-scoped snapshot progress */
|
|
1584
|
+
segmentCtx?: { stepSegmentMap: StepSegmentMapping[]; repoId: string } | null,
|
|
1585
|
+
): LaneRunnerTaskResult {
|
|
1586
|
+
const telemetry =
|
|
1587
|
+
status === "skipped"
|
|
1588
|
+
? undefined
|
|
1589
|
+
: {
|
|
1590
|
+
inputTokens: finalTelemetry?.inputTokens ?? 0,
|
|
1591
|
+
outputTokens: finalTelemetry?.outputTokens ?? 0,
|
|
1592
|
+
cacheReadTokens: finalTelemetry?.cacheReadTokens ?? 0,
|
|
1593
|
+
cacheWriteTokens: finalTelemetry?.cacheWriteTokens ?? 0,
|
|
1594
|
+
costUsd: finalTelemetry?.costUsd ?? 0,
|
|
1595
|
+
toolCalls: finalTelemetry?.toolCalls ?? 0,
|
|
1596
|
+
durationMs: finalTelemetry?.durationMs ?? 0,
|
|
1597
|
+
};
|
|
1598
|
+
|
|
1599
|
+
const result: LaneRunnerTaskResult = {
|
|
1600
|
+
outcome: {
|
|
1601
|
+
taskId,
|
|
1602
|
+
status,
|
|
1603
|
+
segmentId,
|
|
1604
|
+
startTime,
|
|
1605
|
+
endTime: Date.now(),
|
|
1606
|
+
exitReason,
|
|
1607
|
+
sessionName,
|
|
1608
|
+
doneFileFound,
|
|
1609
|
+
laneNumber: config?.laneNumber,
|
|
1610
|
+
telemetry,
|
|
1611
|
+
},
|
|
1612
|
+
iterations,
|
|
1613
|
+
costUsd,
|
|
1614
|
+
totalTokens,
|
|
1615
|
+
};
|
|
1616
|
+
|
|
1617
|
+
// TP-115: Emit terminal snapshot with real telemetry from agent-host result
|
|
1618
|
+
if (config && statusPath && reviewerStatePath) {
|
|
1619
|
+
const terminalStatus = mapLaneTaskStatusToTerminalSnapshotStatus(status);
|
|
1620
|
+
emitSnapshot(
|
|
1621
|
+
config,
|
|
1622
|
+
taskId,
|
|
1623
|
+
segmentId,
|
|
1624
|
+
terminalStatus,
|
|
1625
|
+
finalTelemetry ?? {},
|
|
1626
|
+
statusPath,
|
|
1627
|
+
reviewerStatePath,
|
|
1628
|
+
segmentCtx,
|
|
1629
|
+
);
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
return result;
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
/** Max age for reviewer state file before it's considered stale (2 minutes). */
|
|
1636
|
+
const REVIEWER_STATE_STALE_MS = 120_000;
|
|
1637
|
+
|
|
1638
|
+
export function readReviewerTelemetrySnapshot(
|
|
1639
|
+
config: LaneRunnerConfig,
|
|
1640
|
+
reviewerStatePathOrStatusPath: string,
|
|
1641
|
+
): (RuntimeAgentTelemetrySnapshot & { reviewType?: string; reviewStep?: number }) | null {
|
|
1642
|
+
const reviewerPath =
|
|
1643
|
+
basename(reviewerStatePathOrStatusPath).toLowerCase() === "status.md"
|
|
1644
|
+
? join(dirname(reviewerStatePathOrStatusPath), ".reviewer-state.json")
|
|
1645
|
+
: reviewerStatePathOrStatusPath;
|
|
1646
|
+
if (!existsSync(reviewerPath)) return null;
|
|
1647
|
+
|
|
1648
|
+
try {
|
|
1649
|
+
const raw = readFileSync(reviewerPath, "utf-8");
|
|
1650
|
+
const parsed = JSON.parse(raw) as Partial<{
|
|
1651
|
+
status: string;
|
|
1652
|
+
elapsedMs: number;
|
|
1653
|
+
toolCalls: number;
|
|
1654
|
+
contextPct: number;
|
|
1655
|
+
costUsd: number;
|
|
1656
|
+
lastTool: string;
|
|
1657
|
+
inputTokens: number;
|
|
1658
|
+
outputTokens: number;
|
|
1659
|
+
cacheReadTokens: number;
|
|
1660
|
+
cacheWriteTokens: number;
|
|
1661
|
+
updatedAt: number;
|
|
1662
|
+
reviewType: string;
|
|
1663
|
+
reviewStep: number;
|
|
1664
|
+
}>;
|
|
1665
|
+
|
|
1666
|
+
if (parsed.status !== "running") return null;
|
|
1667
|
+
|
|
1668
|
+
// Stale guard: if updatedAt is present and older than threshold, ignore
|
|
1669
|
+
if (parsed.updatedAt && Date.now() - parsed.updatedAt > REVIEWER_STATE_STALE_MS) return null;
|
|
1670
|
+
|
|
1671
|
+
return {
|
|
1672
|
+
agentId: buildRuntimeAgentId(config.agentIdPrefix, config.laneNumber, "reviewer"),
|
|
1673
|
+
status: "running",
|
|
1674
|
+
elapsedMs: Number.isFinite(parsed.elapsedMs) ? Number(parsed.elapsedMs) : 0,
|
|
1675
|
+
toolCalls: Number.isFinite(parsed.toolCalls) ? Number(parsed.toolCalls) : 0,
|
|
1676
|
+
contextPct: Number.isFinite(parsed.contextPct) ? Number(parsed.contextPct) : 0,
|
|
1677
|
+
costUsd: Number.isFinite(parsed.costUsd) ? Number(parsed.costUsd) : 0,
|
|
1678
|
+
lastTool: typeof parsed.lastTool === "string" ? parsed.lastTool : "",
|
|
1679
|
+
inputTokens: Number.isFinite(parsed.inputTokens) ? Number(parsed.inputTokens) : 0,
|
|
1680
|
+
outputTokens: Number.isFinite(parsed.outputTokens) ? Number(parsed.outputTokens) : 0,
|
|
1681
|
+
cacheReadTokens: Number.isFinite(parsed.cacheReadTokens) ? Number(parsed.cacheReadTokens) : 0,
|
|
1682
|
+
cacheWriteTokens: Number.isFinite(parsed.cacheWriteTokens) ? Number(parsed.cacheWriteTokens) : 0,
|
|
1683
|
+
reviewType: typeof parsed.reviewType === "string" ? parsed.reviewType : undefined,
|
|
1684
|
+
reviewStep: Number.isFinite(parsed.reviewStep) ? Number(parsed.reviewStep) : undefined,
|
|
1685
|
+
};
|
|
1686
|
+
} catch {
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
/**
|
|
1692
|
+
* Emit a lane snapshot to disk. NON-THROWING by contract — all errors are
|
|
1693
|
+
* caught and logged. This function is called from setInterval callbacks
|
|
1694
|
+
* and onTelemetry callbacks where an unhandled throw would trigger
|
|
1695
|
+
* uncaughtException and crash the engine-worker process.
|
|
1696
|
+
*
|
|
1697
|
+
* @returns true when snapshot write succeeds, false when it fails.
|
|
1698
|
+
*/
|
|
1699
|
+
function emitSnapshot(
|
|
1700
|
+
config: LaneRunnerConfig,
|
|
1701
|
+
taskId: string,
|
|
1702
|
+
segmentId: string | null,
|
|
1703
|
+
status: "running" | "idle" | "complete" | "failed",
|
|
1704
|
+
telemetry: Partial<AgentHostResult>,
|
|
1705
|
+
statusPath: string,
|
|
1706
|
+
reviewerStatePath: string,
|
|
1707
|
+
/** TP-174: Optional segment context for segment-scoped progress reporting */
|
|
1708
|
+
segmentContext?: { stepSegmentMap: StepSegmentMapping[]; repoId: string } | null,
|
|
1709
|
+
): boolean {
|
|
1710
|
+
try {
|
|
1711
|
+
// Parse progress from STATUS.md
|
|
1712
|
+
let progress: RuntimeTaskProgress | null = null;
|
|
1713
|
+
try {
|
|
1714
|
+
const content = readFileSync(statusPath, "utf-8");
|
|
1715
|
+
const parsed = parseStatusMd(content);
|
|
1716
|
+
const currentStepMatch = content.match(/\*\*Current Step:\*\*\s*(.+)/);
|
|
1717
|
+
|
|
1718
|
+
// TP-174: Segment-scoped progress when segment markers are present.
|
|
1719
|
+
// Only count checkboxes from steps that belong to this segment's repoId.
|
|
1720
|
+
let checked: number;
|
|
1721
|
+
let total: number;
|
|
1722
|
+
if (segmentContext) {
|
|
1723
|
+
const { stepSegmentMap, repoId } = segmentContext;
|
|
1724
|
+
const repoSteps = getStepsForRepoId(stepSegmentMap, repoId);
|
|
1725
|
+
let segChecked = 0;
|
|
1726
|
+
let segTotal = 0;
|
|
1727
|
+
for (const stepNum of repoSteps) {
|
|
1728
|
+
const segCbs = getSegmentCheckboxes(content, stepNum, repoId);
|
|
1729
|
+
if (segCbs) {
|
|
1730
|
+
segChecked += segCbs.checked;
|
|
1731
|
+
segTotal += segCbs.total;
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
checked = segChecked;
|
|
1735
|
+
total = segTotal;
|
|
1736
|
+
} else {
|
|
1737
|
+
checked = parsed.steps.reduce((sum, s) => sum + s.totalChecked, 0);
|
|
1738
|
+
total = parsed.steps.reduce((sum, s) => sum + s.totalItems, 0);
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
progress = {
|
|
1742
|
+
currentStep: currentStepMatch?.[1]?.trim() || "Unknown",
|
|
1743
|
+
checked,
|
|
1744
|
+
total,
|
|
1745
|
+
iteration: parsed.iteration,
|
|
1746
|
+
reviews: parsed.reviewCounter,
|
|
1747
|
+
};
|
|
1748
|
+
} catch {
|
|
1749
|
+
/* best effort */
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1752
|
+
const reviewerSnapshot = readReviewerTelemetrySnapshot(config, reviewerStatePath);
|
|
1753
|
+
|
|
1754
|
+
const snapshot: RuntimeLaneSnapshot = {
|
|
1755
|
+
batchId: config.batchId,
|
|
1756
|
+
laneNumber: config.laneNumber,
|
|
1757
|
+
laneId: `lane-${config.laneNumber}`,
|
|
1758
|
+
repoId: config.repoId,
|
|
1759
|
+
taskId,
|
|
1760
|
+
segmentId,
|
|
1761
|
+
status,
|
|
1762
|
+
worker: {
|
|
1763
|
+
agentId: buildRuntimeAgentId(config.agentIdPrefix, config.laneNumber, "worker"),
|
|
1764
|
+
status: mapLaneSnapshotStatusToWorkerStatus(status),
|
|
1765
|
+
elapsedMs: telemetry.durationMs ?? 0,
|
|
1766
|
+
toolCalls: telemetry.toolCalls ?? 0,
|
|
1767
|
+
contextPct: telemetry.contextUsage?.percent ?? 0,
|
|
1768
|
+
costUsd: telemetry.costUsd ?? 0,
|
|
1769
|
+
lastTool: telemetry.lastTool ?? "",
|
|
1770
|
+
inputTokens: telemetry.inputTokens ?? 0,
|
|
1771
|
+
outputTokens: telemetry.outputTokens ?? 0,
|
|
1772
|
+
cacheReadTokens: telemetry.cacheReadTokens ?? 0,
|
|
1773
|
+
cacheWriteTokens: telemetry.cacheWriteTokens ?? 0,
|
|
1774
|
+
},
|
|
1775
|
+
reviewer: reviewerSnapshot,
|
|
1776
|
+
progress,
|
|
1777
|
+
updatedAt: Date.now(),
|
|
1778
|
+
};
|
|
1779
|
+
|
|
1780
|
+
writeLaneSnapshot(config.stateRoot, config.batchId, config.laneNumber, snapshot as any);
|
|
1781
|
+
return true;
|
|
1782
|
+
} catch {
|
|
1783
|
+
// Non-fatal: snapshot is telemetry, not execution-critical.
|
|
1784
|
+
// Swallow to prevent uncaughtException crash in setInterval/callback contexts.
|
|
1785
|
+
return false;
|
|
1786
|
+
}
|
|
1787
|
+
}
|