@pi-agents/orchid 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/LICENSE +21 -0
  3. package/README.md +246 -0
  4. package/agents/AGENTS-MANIFEST.md +42 -0
  5. package/agents/brain.md +42 -0
  6. package/agents/context-builder.md +46 -0
  7. package/agents/delegate.md +12 -0
  8. package/agents/dev-1.md +42 -0
  9. package/agents/oracle.md +73 -0
  10. package/agents/planner.md +55 -0
  11. package/agents/researcher.md +52 -0
  12. package/agents/reviewer.md +79 -0
  13. package/agents/scout.md +50 -0
  14. package/agents/tester.md +45 -0
  15. package/agents/worker.md +55 -0
  16. package/extensions/ralph.ts +1 -0
  17. package/extensions/reviewer-extension.ts +125 -0
  18. package/extensions/task-orchestrator.ts +28 -0
  19. package/package.json +63 -0
  20. package/prompts/gather-context-and-clarify.md +13 -0
  21. package/prompts/parallel-cleanup.md +59 -0
  22. package/prompts/parallel-context-build.md +53 -0
  23. package/prompts/parallel-handoff-plan.md +59 -0
  24. package/prompts/parallel-research.md +50 -0
  25. package/prompts/parallel-review.md +54 -0
  26. package/prompts/review-loop.md +41 -0
  27. package/skills/orchid/SKILL.md +214 -0
  28. package/skills/orchid/orchid-cleanup/SKILL.md +122 -0
  29. package/skills/orchid/orchid-converge/SKILL.md +124 -0
  30. package/skills/orchid/orchid-decompose/SKILL.md +201 -0
  31. package/skills/orchid/orchid-doctor/SKILL.md +162 -0
  32. package/skills/orchid/orchid-investigate/SKILL.md +102 -0
  33. package/skills/orchid/orchid-launch/SKILL.md +147 -0
  34. package/skills/ralph/SKILL.md +73 -0
  35. package/skills/subagents/pi-subagents/SKILL.md +813 -0
  36. package/src/index.ts +7 -0
  37. package/src/orchestrator/abort.ts +534 -0
  38. package/src/orchestrator/agent-bridge-extension.ts +1020 -0
  39. package/src/orchestrator/agent-host.ts +954 -0
  40. package/src/orchestrator/cleanup.ts +776 -0
  41. package/src/orchestrator/config-loader.ts +1412 -0
  42. package/src/orchestrator/config-schema.ts +690 -0
  43. package/src/orchestrator/config.ts +81 -0
  44. package/src/orchestrator/context-window.ts +66 -0
  45. package/src/orchestrator/diagnostic-reports.ts +475 -0
  46. package/src/orchestrator/diagnostics.ts +394 -0
  47. package/src/orchestrator/discovery.ts +1833 -0
  48. package/src/orchestrator/engine-worker.ts +415 -0
  49. package/src/orchestrator/engine.ts +5940 -0
  50. package/src/orchestrator/execution.ts +3104 -0
  51. package/src/orchestrator/extension.ts +5934 -0
  52. package/src/orchestrator/formatting.ts +785 -0
  53. package/src/orchestrator/git.ts +88 -0
  54. package/src/orchestrator/index.ts +28 -0
  55. package/src/orchestrator/lane-runner.ts +1787 -0
  56. package/src/orchestrator/mailbox.ts +780 -0
  57. package/src/orchestrator/merge.ts +3414 -0
  58. package/src/orchestrator/messages.ts +1062 -0
  59. package/src/orchestrator/migrations.ts +278 -0
  60. package/src/orchestrator/naming.ts +117 -0
  61. package/src/orchestrator/path-resolver.ts +275 -0
  62. package/src/orchestrator/persistence.ts +2625 -0
  63. package/src/orchestrator/process-registry.ts +452 -0
  64. package/src/orchestrator/quality-gate.ts +1085 -0
  65. package/src/orchestrator/resume.ts +3488 -0
  66. package/src/orchestrator/sessions.ts +57 -0
  67. package/src/orchestrator/settings-loader.ts +136 -0
  68. package/src/orchestrator/settings-tui.ts +2208 -0
  69. package/src/orchestrator/sidecar-telemetry.ts +267 -0
  70. package/src/orchestrator/supervisor.ts +4548 -0
  71. package/src/orchestrator/task-executor-core.ts +675 -0
  72. package/src/orchestrator/tmux-compat.ts +37 -0
  73. package/src/orchestrator/tool-allowlist-constants.ts +37 -0
  74. package/src/orchestrator/types.ts +4465 -0
  75. package/src/orchestrator/verification.ts +547 -0
  76. package/src/orchestrator/waves.ts +1564 -0
  77. package/src/orchestrator/workspace.ts +707 -0
  78. package/src/orchestrator/worktree.ts +2725 -0
  79. package/src/ralph/index.ts +825 -0
  80. package/src/subagents/agents/agent-management.ts +648 -0
  81. package/src/subagents/agents/agent-scope.ts +6 -0
  82. package/src/subagents/agents/agent-selection.ts +23 -0
  83. package/src/subagents/agents/agent-serializer.ts +86 -0
  84. package/src/subagents/agents/agents.ts +832 -0
  85. package/src/subagents/agents/chain-serializer.ts +137 -0
  86. package/src/subagents/agents/frontmatter.ts +29 -0
  87. package/src/subagents/agents/identity.ts +30 -0
  88. package/src/subagents/agents/skills.ts +632 -0
  89. package/src/subagents/extension/config.ts +16 -0
  90. package/src/subagents/extension/control-notices.ts +92 -0
  91. package/src/subagents/extension/doctor.ts +199 -0
  92. package/src/subagents/extension/fanout-child.ts +170 -0
  93. package/src/subagents/extension/index.ts +573 -0
  94. package/src/subagents/extension/schemas.ts +168 -0
  95. package/src/subagents/intercom/intercom-bridge.ts +379 -0
  96. package/src/subagents/intercom/result-intercom.ts +377 -0
  97. package/src/subagents/runs/background/async-execution.ts +712 -0
  98. package/src/subagents/runs/background/async-job-tracker.ts +310 -0
  99. package/src/subagents/runs/background/async-resume.ts +345 -0
  100. package/src/subagents/runs/background/async-status.ts +325 -0
  101. package/src/subagents/runs/background/completion-dedupe.ts +63 -0
  102. package/src/subagents/runs/background/notify.ts +108 -0
  103. package/src/subagents/runs/background/parallel-groups.ts +45 -0
  104. package/src/subagents/runs/background/result-watcher.ts +307 -0
  105. package/src/subagents/runs/background/run-id-resolver.ts +83 -0
  106. package/src/subagents/runs/background/run-status.ts +269 -0
  107. package/src/subagents/runs/background/stale-run-reconciler.ts +336 -0
  108. package/src/subagents/runs/background/subagent-runner.ts +1808 -0
  109. package/src/subagents/runs/background/top-level-async.ts +13 -0
  110. package/src/subagents/runs/foreground/chain-clarify.ts +1333 -0
  111. package/src/subagents/runs/foreground/chain-execution.ts +938 -0
  112. package/src/subagents/runs/foreground/execution.ts +918 -0
  113. package/src/subagents/runs/foreground/subagent-executor.ts +2527 -0
  114. package/src/subagents/runs/shared/completion-guard.ts +147 -0
  115. package/src/subagents/runs/shared/long-running-guard.ts +175 -0
  116. package/src/subagents/runs/shared/mcp-direct-tool-allowlist.ts +365 -0
  117. package/src/subagents/runs/shared/model-fallback.ts +103 -0
  118. package/src/subagents/runs/shared/nested-events.ts +819 -0
  119. package/src/subagents/runs/shared/nested-path.ts +52 -0
  120. package/src/subagents/runs/shared/nested-render.ts +115 -0
  121. package/src/subagents/runs/shared/parallel-utils.ts +109 -0
  122. package/src/subagents/runs/shared/pi-args.ts +220 -0
  123. package/src/subagents/runs/shared/pi-spawn.ts +115 -0
  124. package/src/subagents/runs/shared/run-history.ts +60 -0
  125. package/src/subagents/runs/shared/single-output.ts +164 -0
  126. package/src/subagents/runs/shared/subagent-control.ts +226 -0
  127. package/src/subagents/runs/shared/subagent-prompt-runtime.ts +170 -0
  128. package/src/subagents/runs/shared/worktree.ts +577 -0
  129. package/src/subagents/shared/artifacts.ts +98 -0
  130. package/src/subagents/shared/atomic-json.ts +16 -0
  131. package/src/subagents/shared/file-coalescer.ts +40 -0
  132. package/src/subagents/shared/fork-context.ts +76 -0
  133. package/src/subagents/shared/formatters.ts +133 -0
  134. package/src/subagents/shared/jsonl-writer.ts +81 -0
  135. package/src/subagents/shared/model-info.ts +78 -0
  136. package/src/subagents/shared/post-exit-stdio-guard.ts +85 -0
  137. package/src/subagents/shared/session-identity.ts +10 -0
  138. package/src/subagents/shared/session-tokens.ts +44 -0
  139. package/src/subagents/shared/settings.ts +397 -0
  140. package/src/subagents/shared/status-format.ts +49 -0
  141. package/src/subagents/shared/types.ts +822 -0
  142. package/src/subagents/shared/utils.ts +450 -0
  143. package/src/subagents/slash/prompt-template-bridge.ts +397 -0
  144. package/src/subagents/slash/slash-bridge.ts +174 -0
  145. package/src/subagents/slash/slash-commands.ts +528 -0
  146. package/src/subagents/slash/slash-live-state.ts +292 -0
  147. package/src/subagents/tui/render-helpers.ts +80 -0
  148. package/src/subagents/tui/render.ts +1358 -0
  149. package/templates/agents/local/supervisor.md +33 -0
  150. package/templates/agents/local/task-merger.md +27 -0
  151. package/templates/agents/local/task-reviewer.md +30 -0
  152. package/templates/agents/local/task-worker.md +34 -0
  153. package/templates/agents/supervisor-routing.md +92 -0
  154. package/templates/agents/supervisor.md +229 -0
  155. package/templates/agents/task-merger.md +214 -0
  156. package/templates/agents/task-reviewer.md +260 -0
  157. package/templates/agents/task-worker-segment.md +44 -0
  158. package/templates/agents/task-worker.md +557 -0
  159. package/templates/tasks/CONTEXT.md +30 -0
  160. package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +98 -0
  161. package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
  162. package/templates/tasks/EXAMPLE-002-parallel-smoke/PROMPT.md +97 -0
  163. package/templates/tasks/EXAMPLE-002-parallel-smoke/STATUS.md +73 -0
@@ -0,0 +1,4548 @@
1
+ /**
2
+ * Supervisor agent module — activates an interactive LLM agent in the pi
3
+ * session after `/orch` starts a non-blocking batch.
4
+ *
5
+ * The supervisor monitors engine events, handles failures, and keeps the
6
+ * operator informed. It shares the pi session, so the operator can converse
7
+ * naturally ("how's it going?", "fix it", "I'm going to bed") while the
8
+ * batch runs.
9
+ *
10
+ * Key components:
11
+ * - System prompt design (identity, context, capabilities, standing orders)
12
+ * - Activation after engine starts (via pi.sendMessage with triggerTurn)
13
+ * - System prompt persistence across turns (via before_agent_start event)
14
+ * - Model inheritance + config override
15
+ * - Lockfile + heartbeat for session takeover prevention (Step 2)
16
+ * - Startup detection + stale lock takeover with rehydration (Step 2)
17
+ * - Event tailer: batch-scoped consumption of events.jsonl (Step 3)
18
+ * - Proactive notifications with autonomy-aware verbosity (Step 3)
19
+ * - Task completion digest coalescing (Step 3)
20
+ * - Engine event consumption + proactive notifications (Step 3)
21
+ * - Recovery action classification model (Step 4)
22
+ * - Audit trail logging to actions.jsonl (Step 4)
23
+ * - Autonomy-driven confirmation behavior (Step 4)
24
+ *
25
+ * @module supervisor
26
+ * @since TP-041
27
+ */
28
+
29
+ import { join, dirname } from "path";
30
+ import { fileURLToPath } from "url";
31
+ import {
32
+ existsSync,
33
+ readFileSync,
34
+ readdirSync,
35
+ writeFileSync,
36
+ unlinkSync,
37
+ mkdirSync,
38
+ renameSync,
39
+ statSync,
40
+ openSync,
41
+ readSync,
42
+ closeSync,
43
+ appendFileSync,
44
+ } from "fs";
45
+ import {
46
+ stat as fsStat,
47
+ open as fsOpen,
48
+ readFile as fsReadFile,
49
+ writeFile as fsWriteFile,
50
+ rename as fsRename,
51
+ } from "fs/promises";
52
+ import { execFileSync } from "child_process";
53
+ import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
54
+ import type { Model, Api } from "@earendil-works/pi-ai";
55
+ import type {
56
+ OrchBatchRuntimeState,
57
+ OrchestratorConfig,
58
+ PersistedBatchState,
59
+ EngineEvent,
60
+ EngineEventType,
61
+ } from "./types.ts";
62
+ import type { Tier0Event, Tier0EventType } from "./persistence.ts";
63
+
64
+ // ── Recovery Action Classification (TP-041 Step 4) ───────────────────
65
+
66
+ /**
67
+ * Recovery action classification.
68
+ *
69
+ * Determines whether an action requires operator confirmation based
70
+ * on the current autonomy level. From spec §6.3:
71
+ *
72
+ * - **diagnostic**: Reading state, running non-mutating commands.
73
+ * Always allowed at all autonomy levels.
74
+ * - **tier0_known**: Known recovery patterns (session restart, worktree
75
+ * cleanup, merge retry). Automatic in supervised/autonomous modes.
76
+ * - **destructive**: State mutations, git operations that alter history,
77
+ * session kills, batch-state edits. Requires confirmation in
78
+ * interactive mode, conditional in supervised mode.
79
+ *
80
+ * Decision matrix:
81
+ *
82
+ * | Classification | Interactive | Supervised | Autonomous |
83
+ * |----------------|-------------|---------------|------------|
84
+ * | diagnostic | auto | auto | auto |
85
+ * | tier0_known | ASK | auto | auto |
86
+ * | destructive | ASK | ASK | auto |
87
+ *
88
+ * @since TP-041
89
+ */
90
+ export type RecoveryActionClassification = "diagnostic" | "tier0_known" | "destructive";
91
+
92
+ /**
93
+ * Determines whether operator confirmation is required for a given
94
+ * action classification at a given autonomy level.
95
+ *
96
+ * @param classification - The action's classification
97
+ * @param autonomy - Current supervisor autonomy level
98
+ * @returns true if the supervisor should ask the operator before executing
99
+ *
100
+ * @since TP-041
101
+ */
102
+ export function requiresConfirmation(
103
+ classification: RecoveryActionClassification,
104
+ autonomy: SupervisorAutonomyLevel,
105
+ ): boolean {
106
+ // Diagnostics never require confirmation
107
+ if (classification === "diagnostic") return false;
108
+
109
+ // Autonomous mode never asks
110
+ if (autonomy === "autonomous") return false;
111
+
112
+ // Interactive mode asks for everything non-diagnostic
113
+ if (autonomy === "interactive") return true;
114
+
115
+ // Supervised mode: auto for tier0_known, ask for destructive
116
+ return classification === "destructive";
117
+ }
118
+
119
+ /**
120
+ * Examples of actions in each classification category.
121
+ *
122
+ * Used by the system prompt to give the supervisor concrete guidance
123
+ * on how to classify its recovery actions.
124
+ *
125
+ * @since TP-041
126
+ */
127
+ export const ACTION_CLASSIFICATION_EXAMPLES: Readonly<
128
+ Record<RecoveryActionClassification, readonly string[]>
129
+ > = {
130
+ diagnostic: [
131
+ "Reading batch-state.json, STATUS.md, events.jsonl, merge results",
132
+ "Running git status, git log, git diff",
133
+ "Running test suites (node --experimental-strip-types --experimental-test-module-mocks --no-warnings --import ./tests/loader.mjs --test ..., etc.)",
134
+ "Inspecting active agents and lane status (list_active_agents, read_agent_status)",
135
+ "Checking worktree health (git worktree list)",
136
+ "Reading any file for diagnostics",
137
+ ],
138
+ tier0_known: [
139
+ "Triggering graceful wrap-up/retry flow for a stalled worker lane",
140
+ "Cleaning up stale worktrees for retry",
141
+ "Retrying a timed-out merge",
142
+ "Resetting a session name collision",
143
+ "Clearing a git lock file (.git/index.lock)",
144
+ ],
145
+ destructive: [
146
+ "Forcing lane/batch termination paths (for example orch_abort(hard=true))",
147
+ "Editing batch-state.json fields",
148
+ "Running git reset, git merge, git checkout -B",
149
+ "Removing worktrees (git worktree remove)",
150
+ "Modifying STATUS.md or .DONE files",
151
+ "Deleting git branches (git branch -D)",
152
+ "Skipping tasks or waves",
153
+ ],
154
+ };
155
+
156
+ // ── Audit Trail (TP-041 Step 4) ──────────────────────────────────────
157
+
158
+ /**
159
+ * Structured audit trail entry written to `.pi/supervisor/actions.jsonl`.
160
+ *
161
+ * Every supervisor recovery action produces one entry. Destructive actions
162
+ * MUST be logged **before** execution (pre-action entry with result="pending"),
163
+ * then updated with the outcome after execution (result entry).
164
+ *
165
+ * Non-destructive diagnostics may be logged post-execution for completeness,
166
+ * but pre-action logging is not required.
167
+ *
168
+ * Schema contract: these fields are stable for takeover rehydration
169
+ * (buildTakeoverSummary reads this file). Adding new optional fields
170
+ * is safe; removing or renaming existing fields is a breaking change.
171
+ *
172
+ * @since TP-041
173
+ */
174
+ export interface AuditTrailEntry {
175
+ /** ISO 8601 timestamp of this log entry */
176
+ ts: string;
177
+ /** Action identifier — what the supervisor did (e.g., "merge_retry", "kill_session", "read_state") */
178
+ action: string;
179
+ /** Recovery action classification */
180
+ classification: RecoveryActionClassification;
181
+ /** Human-readable context — why this action was taken */
182
+ context: string;
183
+ /** Command or operation executed (e.g., "git merge --no-ff task/lane-2", "read batch-state.json") */
184
+ command: string;
185
+ /** Outcome of the action: "pending" (pre-action), "success", "failure", "skipped" */
186
+ result: "pending" | "success" | "failure" | "skipped";
187
+ /** Result detail — error message on failure, summary on success */
188
+ detail: string;
189
+ /** Batch ID for correlation */
190
+ batchId: string;
191
+ /** Optional: wave index if the action is wave-scoped */
192
+ waveIndex?: number;
193
+ /** Optional: lane number if the action is lane-scoped */
194
+ laneNumber?: number;
195
+ /** Optional: task ID if the action is task-scoped */
196
+ taskId?: string;
197
+ /** Optional: duration in milliseconds (populated on result entries) */
198
+ durationMs?: number;
199
+ }
200
+
201
+ /**
202
+ * Resolve the audit trail file path.
203
+ *
204
+ * @param stateRoot - Root path for .pi/ state directory
205
+ * @returns Absolute path to actions.jsonl
206
+ *
207
+ * @since TP-041
208
+ */
209
+ export function auditTrailPath(stateRoot: string): string {
210
+ return join(stateRoot, ".pi", "supervisor", "actions.jsonl");
211
+ }
212
+
213
+ /**
214
+ * Append a single audit trail entry to actions.jsonl.
215
+ *
216
+ * Best-effort and non-fatal: logging failures do not crash or block
217
+ * recovery actions. If the file or directory doesn't exist, it is
218
+ * created. If the append fails, the error is silently swallowed.
219
+ *
220
+ * @param stateRoot - Root path for .pi/ state directory
221
+ * @param entry - The audit entry to append
222
+ *
223
+ * @since TP-041
224
+ */
225
+ export function appendAuditEntry(stateRoot: string, entry: AuditTrailEntry): void {
226
+ try {
227
+ const dir = join(stateRoot, ".pi", "supervisor");
228
+ if (!existsSync(dir)) {
229
+ mkdirSync(dir, { recursive: true });
230
+ }
231
+ const path = auditTrailPath(stateRoot);
232
+ const line = JSON.stringify(entry) + "\n";
233
+ appendFileSync(path, line, "utf-8");
234
+ } catch {
235
+ // Best-effort: logging failures must not crash recovery
236
+ }
237
+ }
238
+
239
+ /**
240
+ * Log a recovery action to the audit trail.
241
+ *
242
+ * Convenience wrapper around appendAuditEntry that fills in timestamp
243
+ * and batchId automatically from the supervisor state.
244
+ *
245
+ * For destructive actions, call this BEFORE execution with result="pending",
246
+ * then call again AFTER execution with the actual result.
247
+ *
248
+ * @param stateRoot - Root path for .pi/ state directory
249
+ * @param batchId - Current batch ID
250
+ * @param fields - Action fields (action, classification, context, command, result, detail, etc.)
251
+ *
252
+ * @since TP-041
253
+ */
254
+ export function logRecoveryAction(
255
+ stateRoot: string,
256
+ batchId: string,
257
+ fields: Omit<AuditTrailEntry, "ts" | "batchId">,
258
+ ): void {
259
+ const entry: AuditTrailEntry = {
260
+ ts: new Date().toISOString(),
261
+ batchId,
262
+ ...fields,
263
+ };
264
+ appendAuditEntry(stateRoot, entry);
265
+ }
266
+
267
+ /**
268
+ * Read audit trail entries from actions.jsonl.
269
+ *
270
+ * Returns parsed entries, skipping malformed lines (best-effort).
271
+ * Useful for:
272
+ * - Takeover rehydration (buildTakeoverSummary)
273
+ * - Test verification
274
+ * - Operator "what happened?" queries
275
+ *
276
+ * @param stateRoot - Root path for .pi/ state directory
277
+ * @param options - Optional filters: limit (max entries, from tail), batchId (filter by batch)
278
+ * @returns Array of parsed audit entries (most recent last)
279
+ *
280
+ * @since TP-041
281
+ */
282
+ export function readAuditTrail(
283
+ stateRoot: string,
284
+ options?: { limit?: number; batchId?: string },
285
+ ): AuditTrailEntry[] {
286
+ const path = auditTrailPath(stateRoot);
287
+ if (!existsSync(path)) return [];
288
+
289
+ try {
290
+ const raw = readFileSync(path, "utf-8").trim();
291
+ if (!raw) return [];
292
+
293
+ const lines = raw.split("\n");
294
+ const entries: AuditTrailEntry[] = [];
295
+
296
+ for (const line of lines) {
297
+ const trimmed = line.trim();
298
+ if (!trimmed) continue;
299
+ try {
300
+ const parsed = JSON.parse(trimmed) as AuditTrailEntry;
301
+ // Minimal validation: must have ts, action, batchId
302
+ if (typeof parsed.ts !== "string" || typeof parsed.action !== "string") continue;
303
+
304
+ // Apply batchId filter if specified
305
+ if (options?.batchId && parsed.batchId !== options.batchId) continue;
306
+
307
+ entries.push(parsed);
308
+ } catch {
309
+ // Skip malformed lines
310
+ }
311
+ }
312
+
313
+ // Apply tail limit if specified
314
+ if (options?.limit && entries.length > options.limit) {
315
+ return entries.slice(-options.limit);
316
+ }
317
+
318
+ return entries;
319
+ } catch {
320
+ return [];
321
+ }
322
+ }
323
+
324
+ // ── Branch Protection Detection (TP-043) ─────────────────────────────
325
+
326
+ /**
327
+ * Result of branch protection detection.
328
+ *
329
+ * - `protected`: Branch has protection rules enabled (require PRs)
330
+ * - `unprotected`: No protection rules found (direct push/merge OK)
331
+ * - `unknown`: Detection failed (no `gh` CLI, no remote, auth issues, etc.)
332
+ *
333
+ * @since TP-043
334
+ */
335
+ export type BranchProtectionStatus = "protected" | "unprotected" | "unknown";
336
+
337
+ /**
338
+ * Detect whether a branch has protection rules on GitHub.
339
+ *
340
+ * Uses `gh api repos/{owner}/{repo}/branches/{branch}/protection`:
341
+ * - HTTP 200 → protected (rules exist)
342
+ * - HTTP 404 → unprotected (no rules)
343
+ * - Any error → unknown (gh unavailable, no remote, auth issue, etc.)
344
+ *
345
+ * Extracts owner/repo from the git remote URL via `gh repo view`.
346
+ *
347
+ * @param branch - Branch name to check (e.g., "main")
348
+ * @param cwd - Working directory with the git repo
349
+ * @returns Branch protection status
350
+ *
351
+ * @since TP-043
352
+ */
353
+ export function detectBranchProtection(branch: string, cwd: string): BranchProtectionStatus {
354
+ try {
355
+ // Get owner/repo from gh (handles SSH, HTTPS, and gh-specific remotes)
356
+ const repoInfo = execFileSync(
357
+ "gh",
358
+ ["repo", "view", "--json", "owner,name", "--jq", '.owner.login + "/" + .name'],
359
+ {
360
+ encoding: "utf-8",
361
+ timeout: 15_000,
362
+ cwd,
363
+ stdio: ["pipe", "pipe", "pipe"],
364
+ },
365
+ ).trim();
366
+
367
+ if (!repoInfo || !repoInfo.includes("/")) {
368
+ return "unknown";
369
+ }
370
+
371
+ // Check branch protection via GitHub API
372
+ const result = execFileSync(
373
+ "gh",
374
+ ["api", `repos/${repoInfo}/branches/${branch}/protection`, "--silent"],
375
+ {
376
+ encoding: "utf-8",
377
+ timeout: 15_000,
378
+ cwd,
379
+ stdio: ["pipe", "pipe", "pipe"],
380
+ },
381
+ );
382
+
383
+ // If we get here (no error), the API returned 200 → branch is protected
384
+ return "protected";
385
+ } catch (err: unknown) {
386
+ const e = err as { stderr?: string; status?: number };
387
+ const stderr = e.stderr || "";
388
+
389
+ // gh api returns exit code 1 with "HTTP 404" for unprotected branches
390
+ if (stderr.includes("HTTP 404") || stderr.includes("Not Found")) {
391
+ return "unprotected";
392
+ }
393
+
394
+ // Any other error (no gh, no auth, no remote, network, etc.)
395
+ return "unknown";
396
+ }
397
+ }
398
+
399
+ // ── Supervisor-Managed Integration Flow (TP-043) ─────────────────────
400
+
401
+ /**
402
+ * Integration plan describes the supervisor's proposed integration action.
403
+ *
404
+ * Built after analyzing the batch state, branch relationships, and
405
+ * branch protection status. Presented to the operator in supervised mode;
406
+ * executed directly in auto mode.
407
+ *
408
+ * @since TP-043
409
+ */
410
+ export interface IntegrationPlan {
411
+ /** The integration mode to use: ff, merge, or pr */
412
+ mode: "ff" | "merge" | "pr";
413
+ /** Orch branch to integrate from */
414
+ orchBranch: string;
415
+ /** Base branch to integrate into */
416
+ baseBranch: string;
417
+ /** Batch ID for logging/audit */
418
+ batchId: string;
419
+ /** Whether the base branch is protected */
420
+ branchProtection: BranchProtectionStatus;
421
+ /** Human-readable rationale for the chosen mode */
422
+ rationale: string;
423
+ /** Number of succeeded tasks (for summary) */
424
+ succeededTasks: number;
425
+ /** Number of failed tasks (for summary) */
426
+ failedTasks: number;
427
+ }
428
+
429
+ /**
430
+ * Check whether the git repository has any remotes configured.
431
+ *
432
+ * Used by integration planning to determine if PR mode is possible.
433
+ * A repo without remotes cannot create pull requests.
434
+ *
435
+ * @param cwd - Working directory with the git repo
436
+ * @returns true if at least one remote is configured
437
+ *
438
+ * @since TP-149
439
+ */
440
+ export function hasGitRemotes(cwd: string): boolean {
441
+ try {
442
+ const result = execFileSync("git", ["remote"], {
443
+ encoding: "utf-8",
444
+ timeout: 5_000,
445
+ cwd,
446
+ stdio: ["pipe", "pipe", "pipe"],
447
+ });
448
+ return result.trim().length > 0;
449
+ } catch {
450
+ return false;
451
+ }
452
+ }
453
+
454
+ /**
455
+ * Build an integration plan based on the batch state and branch status.
456
+ *
457
+ * Mode selection logic (TP-149):
458
+ * 1. Check if remotes exist (determines if PR mode is possible)
459
+ * 2. If base branch is confirmed protected AND remotes exist → PR mode
460
+ * 3. Try fast-forward first (cleanest, most common)
461
+ * 4. If FF not possible (diverged) → merge mode
462
+ *
463
+ * PR mode is only selected when protection is **confirmed** (not "unknown").
464
+ * When protection status is indeterminate (gh unavailable, auth issues),
465
+ * the plan prefers FF → merge over PR, since PR may also fail in that state.
466
+ * Repos without remotes skip protection checks and PR mode entirely.
467
+ *
468
+ * @param batchState - Runtime batch state (orchBranch, baseBranch, counts)
469
+ * @param cwd - Working directory with the git repo
470
+ * @param protectionOverride - Injectable protection status for testing
471
+ * @returns Integration plan, or null if integration is not possible
472
+ *
473
+ * @since TP-043
474
+ * @modified TP-149 — Reordered to FF → merge → PR; check remotes first
475
+ */
476
+ export function buildIntegrationPlan(
477
+ batchState: OrchBatchRuntimeState,
478
+ cwd: string,
479
+ protectionOverride?: BranchProtectionStatus,
480
+ ): IntegrationPlan | null {
481
+ if (!batchState.orchBranch || !batchState.baseBranch) {
482
+ return null;
483
+ }
484
+
485
+ if (batchState.succeededTasks === 0) {
486
+ return null; // Nothing to integrate
487
+ }
488
+
489
+ const orchBranch = batchState.orchBranch;
490
+ const baseBranch = batchState.baseBranch;
491
+ const batchId = batchState.batchId;
492
+
493
+ // Step 1: Check for remotes — determines if PR mode is even possible (TP-149)
494
+ const remotes = hasGitRemotes(cwd);
495
+
496
+ // Step 2: Determine protection status
497
+ // - Override: use as-is (test injection path)
498
+ // - Remotes exist: detect via gh API
499
+ // - No remotes: treat as unprotected (can't create PRs anyway)
500
+ const protection =
501
+ protectionOverride ?? (remotes ? detectBranchProtection(baseBranch, cwd) : "unprotected");
502
+
503
+ // Step 3: Always try FF first, then merge, then PR (TP-149).
504
+ // Protected branches may still allow FF/merge via API tokens.
505
+ // PR is the last resort when direct merge is blocked.
506
+
507
+ // Step 3a: Try fast-forward first (cleanest, most common)
508
+ try {
509
+ execFileSync("git", ["merge-base", "--is-ancestor", baseBranch, orchBranch], {
510
+ encoding: "utf-8",
511
+ timeout: 10_000,
512
+ cwd,
513
+ stdio: ["pipe", "pipe", "pipe"],
514
+ });
515
+ // If no error, baseBranch is ancestor → ff is possible
516
+ return {
517
+ mode: "ff",
518
+ orchBranch,
519
+ baseBranch,
520
+ batchId,
521
+ branchProtection: protection,
522
+ rationale: `Branches are linear — fast-forward merge (cleanest history).`,
523
+ succeededTasks: batchState.succeededTasks,
524
+ failedTasks: batchState.failedTasks,
525
+ };
526
+ } catch {
527
+ // Branches have diverged — need merge commit or PR
528
+ // Step 3c: If protected AND remotes exist, prefer PR (merge may be blocked by push protection)
529
+ if (protection === "protected" && remotes) {
530
+ return {
531
+ mode: "pr",
532
+ orchBranch,
533
+ baseBranch,
534
+ batchId,
535
+ branchProtection: protection,
536
+ rationale: `Branches diverged and \`${baseBranch}\` is protected — creating a pull request.`,
537
+ succeededTasks: batchState.succeededTasks,
538
+ failedTasks: batchState.failedTasks,
539
+ };
540
+ }
541
+ return {
542
+ mode: "merge",
543
+ orchBranch,
544
+ baseBranch,
545
+ batchId,
546
+ branchProtection: protection,
547
+ rationale: `Branches have diverged — creating a merge commit.`,
548
+ succeededTasks: batchState.succeededTasks,
549
+ failedTasks: batchState.failedTasks,
550
+ };
551
+ }
552
+ }
553
+
554
+ /**
555
+ * Format an integration plan as a human-readable notification.
556
+ *
557
+ * Used in supervised mode to present the plan for operator confirmation.
558
+ *
559
+ * @param plan - The integration plan to format
560
+ * @returns Formatted notification string
561
+ *
562
+ * @since TP-043
563
+ */
564
+ export function formatIntegrationPlan(plan: IntegrationPlan): string {
565
+ const modeLabels: Record<string, string> = {
566
+ ff: "fast-forward merge",
567
+ merge: "merge commit",
568
+ pr: "pull request",
569
+ };
570
+
571
+ const lines: string[] = [];
572
+ lines.push(`🔀 **Integration Plan**`);
573
+ lines.push(``);
574
+ lines.push(`- **Mode:** ${modeLabels[plan.mode] || plan.mode}`);
575
+ lines.push(`- **From:** \`${plan.orchBranch}\` → \`${plan.baseBranch}\``);
576
+ lines.push(
577
+ `- **Tasks:** ${plan.succeededTasks} succeeded${plan.failedTasks > 0 ? `, ${plan.failedTasks} failed` : ""}`,
578
+ );
579
+ lines.push(`- **Rationale:** ${plan.rationale}`);
580
+
581
+ if (plan.branchProtection === "protected") {
582
+ lines.push(`- **Note:** Branch protection detected — PR mode is required.`);
583
+ }
584
+
585
+ return lines.join("\n");
586
+ }
587
+
588
+ /**
589
+ * Format a message describing the integration outcome for the supervisor
590
+ * to present to the operator.
591
+ *
592
+ * @param plan - The integration plan that was executed
593
+ * @param success - Whether the integration succeeded
594
+ * @param detail - Additional detail (PR URL, error message, etc.)
595
+ * @returns Formatted outcome message
596
+ *
597
+ * @since TP-043
598
+ */
599
+ export function formatIntegrationOutcome(
600
+ plan: IntegrationPlan,
601
+ success: boolean,
602
+ detail: string,
603
+ ): string {
604
+ if (success) {
605
+ const modeLabel =
606
+ plan.mode === "ff" ? "Fast-forwarded" : plan.mode === "merge" ? "Merged" : "Created PR for";
607
+ return `✅ **Integration complete!** ${modeLabel} \`${plan.orchBranch}\` → \`${plan.baseBranch}\`.\n${detail}`;
608
+ }
609
+ return `❌ **Integration failed** (\`${plan.orchBranch}\` → \`${plan.baseBranch}\`).\n${detail}`;
610
+ }
611
+
612
+ /**
613
+ * Integration executor callback type.
614
+ *
615
+ * Wraps `executeIntegration` from extension.ts to avoid circular imports.
616
+ * The callback receives the plan mode and context, and returns the result.
617
+ *
618
+ * @since TP-043 R002
619
+ */
620
+ export type IntegrationExecutor = (
621
+ mode: "ff" | "merge" | "pr",
622
+ context: {
623
+ orchBranch: string;
624
+ baseBranch: string;
625
+ batchId: string;
626
+ currentBranch: string;
627
+ notices: string[];
628
+ },
629
+ ) => {
630
+ success: boolean;
631
+ integratedLocally: boolean;
632
+ commitCount: string;
633
+ message: string;
634
+ error?: string;
635
+ };
636
+
637
+ /**
638
+ * Dependencies for programmatic CI polling and PR merge (R002-2).
639
+ *
640
+ * Injected alongside the IntegrationExecutor to provide gh CLI access
641
+ * for CI status checks and PR merge operations.
642
+ *
643
+ * @since TP-043
644
+ */
645
+ export interface CiDeps {
646
+ /** Run an arbitrary command (e.g., gh CLI) in the repo root. */
647
+ runCommand: (cmd: string, args: string[]) => { ok: boolean; stdout: string; stderr: string };
648
+ /** Run a git command in the repo root. */
649
+ runGit: (args: string[]) => { ok: boolean; stdout: string; stderr: string };
650
+ /** Delete the batch state file. */
651
+ deleteBatchState: () => void;
652
+ }
653
+
654
+ /**
655
+ * Poll PR CI status checks programmatically.
656
+ *
657
+ * Polls `gh pr checks <branch> --json name,state,conclusion` up to
658
+ * maxAttempts times with a delay between each poll. Returns a summary
659
+ * of the CI outcome.
660
+ *
661
+ * @param orchBranch - The branch the PR was created from
662
+ * @param deps - CI deps (runCommand for gh CLI)
663
+ * @param maxAttempts - Maximum polling attempts (default: 30 → ~5 min at 10s intervals)
664
+ * @param delayMs - Delay between polls in ms (default: 10_000 → 10s)
665
+ * @returns CI check result
666
+ *
667
+ * @since TP-043
668
+ */
669
+ export async function pollPrCiStatus(
670
+ orchBranch: string,
671
+ deps: CiDeps,
672
+ maxAttempts: number = 30,
673
+ delayMs: number = 10_000,
674
+ ): Promise<{ status: "pass" | "fail" | "timeout" | "no-checks"; detail: string }> {
675
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
676
+ // Wait before polling (except first attempt — check immediately)
677
+ if (attempt > 1) {
678
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
679
+ }
680
+
681
+ const result = deps.runCommand("gh", [
682
+ "pr",
683
+ "checks",
684
+ orchBranch,
685
+ "--json",
686
+ "name,state,conclusion",
687
+ ]);
688
+
689
+ if (!result.ok) {
690
+ // gh pr checks failed — may be no PR or no checks configured
691
+ if (result.stderr.includes("no checks") || result.stderr.includes("no status checks")) {
692
+ return { status: "no-checks", detail: "No CI checks are configured for this repository." };
693
+ }
694
+ // On first attempt, the PR may not be fully created yet — retry
695
+ if (attempt === 1) continue;
696
+ return { status: "fail", detail: `Failed to query PR checks: ${result.stderr}` };
697
+ }
698
+
699
+ // Parse the JSON array of checks
700
+ let checks: Array<{ name: string; state: string; conclusion: string }>;
701
+ try {
702
+ checks = JSON.parse(result.stdout);
703
+ } catch {
704
+ continue; // Malformed output — retry
705
+ }
706
+
707
+ if (checks.length === 0) {
708
+ return { status: "no-checks", detail: "No CI checks are configured for this repository." };
709
+ }
710
+
711
+ // Check if all checks are complete
712
+ const allComplete = checks.every((c) => c.state === "COMPLETED" || c.state === "completed");
713
+ if (!allComplete) continue; // Some still pending — keep polling
714
+
715
+ // All complete — check conclusions
716
+ const allPassing = checks.every(
717
+ (c) =>
718
+ c.conclusion === "SUCCESS" ||
719
+ c.conclusion === "success" ||
720
+ c.conclusion === "NEUTRAL" ||
721
+ c.conclusion === "neutral" ||
722
+ c.conclusion === "SKIPPED" ||
723
+ c.conclusion === "skipped",
724
+ );
725
+
726
+ if (allPassing) {
727
+ return { status: "pass", detail: `All ${checks.length} CI check(s) passed.` };
728
+ }
729
+
730
+ // Some checks failed
731
+ const failed = checks.filter(
732
+ (c) =>
733
+ c.conclusion !== "SUCCESS" &&
734
+ c.conclusion !== "success" &&
735
+ c.conclusion !== "NEUTRAL" &&
736
+ c.conclusion !== "neutral" &&
737
+ c.conclusion !== "SKIPPED" &&
738
+ c.conclusion !== "skipped",
739
+ );
740
+ const failedNames = failed.map((c) => `${c.name}: ${c.conclusion}`).join(", ");
741
+ return { status: "fail", detail: `CI check(s) failed: ${failedNames}` };
742
+ }
743
+
744
+ return {
745
+ status: "timeout",
746
+ detail: `CI checks did not complete within ${maxAttempts} polling attempts.`,
747
+ };
748
+ }
749
+
750
+ /**
751
+ * Merge a PR via gh CLI after CI passes.
752
+ *
753
+ * Uses regular merge (preserves per-commit history from orch branches).
754
+ * Falls back to squash if regular merge is not allowed by repo rules.
755
+ *
756
+ * Regular merge is preferred because squash collapses all branch commits
757
+ * into one, which loses per-task attribution and can silently drop
758
+ * commits made by other agents between push and merge.
759
+ *
760
+ * @param orchBranch - The branch the PR was created from
761
+ * @param deps - CI deps (runCommand for gh CLI)
762
+ * @returns Merge result
763
+ *
764
+ * @since TP-043
765
+ */
766
+ export function mergePr(orchBranch: string, deps: CiDeps): { success: boolean; detail: string } {
767
+ // Try regular merge first (preserves per-commit history)
768
+ const mergeResult = deps.runCommand("gh", [
769
+ "pr",
770
+ "merge",
771
+ orchBranch,
772
+ "--merge",
773
+ "--delete-branch",
774
+ ]);
775
+ if (mergeResult.ok) {
776
+ return { success: true, detail: "PR merged and remote branch deleted." };
777
+ }
778
+
779
+ // Regular merge not allowed — try squash as fallback
780
+ const squashResult = deps.runCommand("gh", [
781
+ "pr",
782
+ "merge",
783
+ orchBranch,
784
+ "--squash",
785
+ "--delete-branch",
786
+ ]);
787
+ if (squashResult.ok) {
788
+ return { success: true, detail: "PR merged (squash) and remote branch deleted." };
789
+ }
790
+
791
+ return {
792
+ success: false,
793
+ detail: `PR merge failed: ${squashResult.stderr || mergeResult.stderr}`,
794
+ };
795
+ }
796
+
797
+ /**
798
+ * Dependencies for batch summary generation within integration flows.
799
+ *
800
+ * Passed through triggerSupervisorIntegration to ensure summary is
801
+ * generated before supervisor deactivation on all terminal paths.
802
+ *
803
+ * @since TP-043
804
+ */
805
+ export interface SummaryDeps {
806
+ /** Operator identifier for file naming */
807
+ opId: string;
808
+ /** Batch diagnostics (taskExits, batchCost) — null if unavailable */
809
+ diagnostics: {
810
+ taskExits: Record<string, { classification: string; cost: number; durationSec: number }>;
811
+ batchCost: number;
812
+ } | null;
813
+ /** Merge results for cost breakdown */
814
+ mergeResults: Array<{
815
+ waveIndex: number;
816
+ status: string;
817
+ failedLane: number | null;
818
+ failureReason: string | null;
819
+ }>;
820
+ }
821
+
822
+ /**
823
+ * Execute the full PR lifecycle: poll CI, merge on success, clean up.
824
+ *
825
+ * Called after `executeIntegration("pr", ...)` succeeds (PR created).
826
+ * Polls CI status, merges when checks pass, reports failures.
827
+ * Always generates batch summary and deactivates the supervisor at
828
+ * the end (deterministic shutdown).
829
+ *
830
+ * @param plan - Integration plan (for branch/batch info)
831
+ * @param ciDeps - CI deps for gh CLI operations
832
+ * @param pi - ExtensionAPI for messaging
833
+ * @param state - Supervisor state (for deactivation)
834
+ * @param batchState - Runtime batch state (for summary generation)
835
+ * @param summaryDeps - Summary generation dependencies (optional, skipped if null)
836
+ *
837
+ * @since TP-043
838
+ */
839
+ async function handlePrLifecycle(
840
+ plan: IntegrationPlan,
841
+ ciDeps: CiDeps,
842
+ pi: ExtensionAPI,
843
+ state: SupervisorState,
844
+ batchState?: OrchBatchRuntimeState,
845
+ summaryDeps?: SummaryDeps | null,
846
+ ): Promise<void> {
847
+ // Poll CI status
848
+ const ciResult = await pollPrCiStatus(plan.orchBranch, ciDeps);
849
+
850
+ if (ciResult.status === "pass" || ciResult.status === "no-checks") {
851
+ // CI passed (or no checks) — merge the PR
852
+ const mergeOutcome = mergePr(plan.orchBranch, ciDeps);
853
+ if (mergeOutcome.success) {
854
+ // Clean up local state after remote merge
855
+ ciDeps.deleteBatchState();
856
+ ciDeps.runGit(["branch", "-D", plan.orchBranch]);
857
+ pi.sendMessage(
858
+ {
859
+ customType: "supervisor-integration-result",
860
+ content: [
861
+ {
862
+ type: "text",
863
+ text:
864
+ `✅ **Integration complete!** PR merged into \`${plan.baseBranch}\`.\n` +
865
+ `${ciResult.detail}\n${mergeOutcome.detail}`,
866
+ },
867
+ ],
868
+ display: "Integration complete — PR merged",
869
+ },
870
+ { triggerTurn: false },
871
+ );
872
+ } else {
873
+ pi.sendMessage(
874
+ {
875
+ customType: "supervisor-integration-result",
876
+ content: [
877
+ {
878
+ type: "text",
879
+ text:
880
+ `⚠️ **CI passed but merge failed.** ${mergeOutcome.detail}\n` +
881
+ `The PR is still open — merge manually on GitHub.`,
882
+ },
883
+ ],
884
+ display: "CI passed but PR merge failed",
885
+ },
886
+ { triggerTurn: false },
887
+ );
888
+ }
889
+ } else if (ciResult.status === "fail") {
890
+ pi.sendMessage(
891
+ {
892
+ customType: "supervisor-integration-result",
893
+ content: [
894
+ {
895
+ type: "text",
896
+ text:
897
+ `❌ **CI checks failed.** ${ciResult.detail}\n` +
898
+ `The PR is still open. Fix the issues and merge manually, or close and retry.`,
899
+ },
900
+ ],
901
+ display: "CI checks failed — manual intervention needed",
902
+ },
903
+ { triggerTurn: false },
904
+ );
905
+ } else {
906
+ // timeout
907
+ pi.sendMessage(
908
+ {
909
+ customType: "supervisor-integration-result",
910
+ content: [
911
+ {
912
+ type: "text",
913
+ text:
914
+ `⏰ **CI check timeout.** ${ciResult.detail}\n` +
915
+ `The PR is still open. Check CI status manually and merge when ready.`,
916
+ },
917
+ ],
918
+ display: "CI check timeout — check manually",
919
+ },
920
+ { triggerTurn: false },
921
+ );
922
+ }
923
+
924
+ // TP-043: Generate batch summary before deactivation
925
+ if (batchState && summaryDeps && state.stateRoot) {
926
+ presentBatchSummary(
927
+ pi,
928
+ batchState,
929
+ state.stateRoot,
930
+ summaryDeps.opId,
931
+ summaryDeps.diagnostics,
932
+ summaryDeps.mergeResults,
933
+ );
934
+ }
935
+
936
+ // Always deactivate after PR lifecycle completes (R002 issue #3)
937
+ deactivateSupervisor(pi, state);
938
+ }
939
+
940
+ /**
941
+ * Trigger the supervisor-managed integration flow after batch completion.
942
+ *
943
+ * Called from the engine's onTerminal callback when integration mode is
944
+ * "supervised" or "auto" and batch phase is "completed" (R002-1).
945
+ *
946
+ * **Auto mode (R002-2):** Executes integration programmatically via the
947
+ * provided executor (which wraps `executeIntegration` from extension.ts).
948
+ * For PR mode, programmatically polls CI status and merges on success.
949
+ * Reports outcome and deactivates supervisor deterministically — no path
950
+ * leaves the supervisor alive without a code-driven shutdown.
951
+ *
952
+ * **Supervised mode:** Presents the integration plan and asks the LLM to
953
+ * confirm with the operator. After confirmation, directs the LLM to run
954
+ * `/orch-integrate --{mode}` which uses the established execution path
955
+ * (resolveIntegrationContext + executeIntegration). This avoids duplicating
956
+ * integration logic via free-form git/gh instructions.
957
+ *
958
+ * If no integration is possible (no orch branch, no succeeded tasks),
959
+ * the supervisor is deactivated immediately.
960
+ *
961
+ * @param pi - ExtensionAPI for sending messages and deactivation
962
+ * @param state - Supervisor state (for deactivation if no integration needed)
963
+ * @param batchState - Runtime batch state
964
+ * @param integrationMode - "supervised" or "auto"
965
+ * @param cwd - Working directory for git operations
966
+ * @param executor - Integration executor callback (wraps executeIntegration to avoid circular imports)
967
+ * @param ciDeps - CI deps for programmatic PR polling and merge (auto/PR mode)
968
+ * @param summaryDeps - Optional summary deps for batch summary generation on all terminal paths
969
+ *
970
+ * @since TP-043
971
+ */
972
+ export function triggerSupervisorIntegration(
973
+ pi: ExtensionAPI,
974
+ state: SupervisorState,
975
+ batchState: OrchBatchRuntimeState,
976
+ integrationMode: "supervised" | "auto",
977
+ cwd: string,
978
+ executor?: IntegrationExecutor,
979
+ ciDeps?: CiDeps,
980
+ summaryDeps?: SummaryDeps | null,
981
+ ): void {
982
+ // TP-043: Helper to generate summary before deactivation
983
+ const summarizeAndDeactivate = () => {
984
+ if (summaryDeps && state.stateRoot) {
985
+ presentBatchSummary(
986
+ pi,
987
+ batchState,
988
+ state.stateRoot,
989
+ summaryDeps.opId,
990
+ summaryDeps.diagnostics,
991
+ summaryDeps.mergeResults,
992
+ );
993
+ }
994
+ deactivateSupervisor(pi, state);
995
+ };
996
+
997
+ // Build integration plan
998
+ const plan = buildIntegrationPlan(batchState, cwd);
999
+
1000
+ if (!plan) {
1001
+ // No integration possible — deactivate supervisor
1002
+ pi.sendMessage(
1003
+ {
1004
+ customType: "supervisor-integration",
1005
+ content: [
1006
+ {
1007
+ type: "text",
1008
+ text: `📋 **Batch complete.** No integration needed (no orch branch or no succeeded tasks). Supervisor deactivating.`,
1009
+ },
1010
+ ],
1011
+ display: "No integration needed — supervisor deactivating",
1012
+ },
1013
+ { triggerTurn: false },
1014
+ );
1015
+ summarizeAndDeactivate();
1016
+ return;
1017
+ }
1018
+
1019
+ // Format the plan for reporting
1020
+ const planText = formatIntegrationPlan(plan);
1021
+
1022
+ if (integrationMode === "supervised") {
1023
+ // Supervised mode: present plan, ask LLM to confirm with operator,
1024
+ // then direct it to /orch-integrate (established execution path).
1025
+ const modeFlag = plan.mode === "ff" ? "" : plan.mode === "merge" ? " --merge" : " --pr";
1026
+ pi.sendMessage(
1027
+ {
1028
+ customType: "supervisor-integration",
1029
+ content: [
1030
+ {
1031
+ type: "text",
1032
+ text:
1033
+ `🏁 **Batch complete!** Ready to integrate.\n\n` +
1034
+ planText +
1035
+ `\n\n` +
1036
+ `**Action required:** Ask the operator for confirmation.\n\n` +
1037
+ `Say something like: "The batch completed successfully. I'd like to integrate ` +
1038
+ `the changes from \`${plan.orchBranch}\` into \`${plan.baseBranch}\` using ` +
1039
+ `${plan.mode === "ff" ? "fast-forward" : plan.mode === "merge" ? "a merge commit" : "a pull request"}. ` +
1040
+ `${plan.rationale} Shall I proceed?"\n\n` +
1041
+ `If the operator confirms, run: \`/orch-integrate${modeFlag}\`\n` +
1042
+ `If the operator declines, acknowledge and deactivate.\n` +
1043
+ `If the operator wants a different mode, adjust the flag:\n` +
1044
+ ` - Fast-forward: \`/orch-integrate\`\n` +
1045
+ ` - Merge commit: \`/orch-integrate --merge\`\n` +
1046
+ ` - Pull request: \`/orch-integrate --pr\``,
1047
+ },
1048
+ ],
1049
+ display: "Integration plan ready — awaiting operator confirmation",
1050
+ },
1051
+ { triggerTurn: true },
1052
+ );
1053
+
1054
+ // TP-043 R004: Defer summary until after integration completes (or operator declines).
1055
+ // Store deps on supervisor state so /orch-integrate completion or deactivateSupervisor
1056
+ // can present the summary at the correct time.
1057
+ if (summaryDeps) {
1058
+ state.pendingSummaryDeps = summaryDeps;
1059
+ }
1060
+ return;
1061
+ }
1062
+
1063
+ // ── Auto mode: execute integration programmatically (R002-2) ──
1064
+
1065
+ if (!executor) {
1066
+ // Fallback: no executor provided — instruct operator to use /orch-integrate.
1067
+ // This should not happen in normal operation but prevents a crash.
1068
+ const modeFlag = plan.mode === "ff" ? "" : plan.mode === "merge" ? " --merge" : " --pr";
1069
+ pi.sendMessage(
1070
+ {
1071
+ customType: "supervisor-integration",
1072
+ content: [
1073
+ {
1074
+ type: "text",
1075
+ text:
1076
+ `🏁 **Batch complete!** Integration executor unavailable.\n\n` +
1077
+ planText +
1078
+ `\n\n` +
1079
+ `Run \`/orch-integrate${modeFlag}\` to integrate manually.`,
1080
+ },
1081
+ ],
1082
+ display: "Auto-integration fallback — run /orch-integrate",
1083
+ },
1084
+ { triggerTurn: false },
1085
+ );
1086
+ summarizeAndDeactivate();
1087
+ return;
1088
+ }
1089
+
1090
+ // Execute the integration synchronously using the provided executor
1091
+ const context = {
1092
+ orchBranch: plan.orchBranch,
1093
+ baseBranch: plan.baseBranch,
1094
+ batchId: plan.batchId,
1095
+ currentBranch: plan.baseBranch,
1096
+ notices: [],
1097
+ };
1098
+
1099
+ let result = executor(plan.mode, context);
1100
+
1101
+ // If ff fails, automatically fall back to merge mode
1102
+ if (!result.success && plan.mode === "ff") {
1103
+ const fallbackResult = executor("merge", context);
1104
+ if (fallbackResult.success) {
1105
+ result = fallbackResult;
1106
+ result.message = `⚠️ Fast-forward failed (branches diverged). Fell back to merge.\n${result.message}`;
1107
+ }
1108
+ // If merge also fails, result stays as the merge failure
1109
+ }
1110
+
1111
+ if (result.success) {
1112
+ const outcomeText = formatIntegrationOutcome(plan, true, result.message);
1113
+
1114
+ if (plan.mode === "pr" || !result.integratedLocally) {
1115
+ // PR mode: integration created a PR but didn't merge locally.
1116
+ // Programmatically poll CI status and merge (R002-2).
1117
+ pi.sendMessage(
1118
+ {
1119
+ customType: "supervisor-integration-progress",
1120
+ content: [
1121
+ {
1122
+ type: "text",
1123
+ text: `${outcomeText}\n\n⏳ Waiting for CI checks to complete...`,
1124
+ },
1125
+ ],
1126
+ display: "PR created — polling CI status",
1127
+ },
1128
+ { triggerTurn: false },
1129
+ );
1130
+
1131
+ if (ciDeps) {
1132
+ // Fire-and-forget — handlePrLifecycle handles messaging,
1133
+ // summary generation, and deterministic deactivation internally.
1134
+ handlePrLifecycle(plan, ciDeps, pi, state, batchState, summaryDeps).catch((err: unknown) => {
1135
+ const msg = err instanceof Error ? err.message : String(err);
1136
+ pi.sendMessage(
1137
+ {
1138
+ customType: "supervisor-integration-result",
1139
+ content: [
1140
+ {
1141
+ type: "text",
1142
+ text: `❌ **CI monitoring crashed:** ${msg}\nThe PR is still open — check status and merge manually.`,
1143
+ },
1144
+ ],
1145
+ display: "CI monitoring crashed",
1146
+ },
1147
+ { triggerTurn: false },
1148
+ );
1149
+ summarizeAndDeactivate();
1150
+ });
1151
+ } else {
1152
+ // No CI deps — can't poll. Report and deactivate.
1153
+ pi.sendMessage(
1154
+ {
1155
+ customType: "supervisor-integration-result",
1156
+ content: [
1157
+ {
1158
+ type: "text",
1159
+ text: `PR created. CI polling unavailable — check status and merge manually on GitHub.`,
1160
+ },
1161
+ ],
1162
+ display: "PR created — merge manually",
1163
+ },
1164
+ { triggerTurn: false },
1165
+ );
1166
+ summarizeAndDeactivate();
1167
+ }
1168
+ return;
1169
+ }
1170
+
1171
+ // Local integration succeeded (ff or merge) — report and deactivate
1172
+ pi.sendMessage(
1173
+ {
1174
+ customType: "supervisor-integration-result",
1175
+ content: [
1176
+ {
1177
+ type: "text",
1178
+ text: outcomeText,
1179
+ },
1180
+ ],
1181
+ display: `Integration complete (${plan.mode})`,
1182
+ },
1183
+ { triggerTurn: false },
1184
+ );
1185
+ summarizeAndDeactivate();
1186
+ } else {
1187
+ // Integration failed — report the error and deactivate
1188
+ const errorDetail = result.error || result.message || "Unknown integration error";
1189
+ const outcomeText = formatIntegrationOutcome(plan, false, errorDetail);
1190
+
1191
+ pi.sendMessage(
1192
+ {
1193
+ customType: "supervisor-integration-result",
1194
+ content: [
1195
+ {
1196
+ type: "text",
1197
+ text:
1198
+ outcomeText + `\n\n` + `Run \`/orch-integrate\` manually to retry with a different mode.`,
1199
+ },
1200
+ ],
1201
+ display: "Integration failed — run /orch-integrate manually",
1202
+ },
1203
+ { triggerTurn: false },
1204
+ );
1205
+ summarizeAndDeactivate();
1206
+ }
1207
+ }
1208
+
1209
+ // ── Batch Summary Generation (TP-043 Step 2) ────────────────────────
1210
+
1211
+ /**
1212
+ * Data required to generate a batch summary.
1213
+ *
1214
+ * Assembled from runtime and persisted state. Pure data — no side effects.
1215
+ *
1216
+ * @since TP-043
1217
+ */
1218
+ export interface BatchSummaryData {
1219
+ /** Batch ID */
1220
+ batchId: string;
1221
+ /** Batch phase at summary generation time */
1222
+ phase: string;
1223
+ /** Epoch ms when batch started */
1224
+ startedAt: number;
1225
+ /** Epoch ms when batch ended (null if still running) */
1226
+ endedAt: number | null;
1227
+ /** Total tasks in batch */
1228
+ totalTasks: number;
1229
+ /** Tasks completed successfully */
1230
+ succeededTasks: number;
1231
+ /** Tasks that failed */
1232
+ failedTasks: number;
1233
+ /** Tasks skipped */
1234
+ skippedTasks: number;
1235
+ /** Tasks blocked */
1236
+ blockedTasks: number;
1237
+ /** Batch cost in USD (from diagnostics) */
1238
+ batchCost: number;
1239
+ /** Wave plan (array of arrays of task IDs per wave) */
1240
+ wavePlan: string[][];
1241
+ /** Wave results with timing data */
1242
+ waveResults: Array<{
1243
+ waveIndex: number;
1244
+ startedAt: number;
1245
+ endedAt: number;
1246
+ succeededTaskIds: string[];
1247
+ failedTaskIds: string[];
1248
+ skippedTaskIds: string[];
1249
+ overallStatus: string;
1250
+ }>;
1251
+ /** Per-task exit summaries keyed by task ID (from diagnostics) */
1252
+ taskExits: Record<string, { classification: string; cost: number; durationSec: number }>;
1253
+ /** Merge results per wave */
1254
+ mergeResults: Array<{
1255
+ waveIndex: number;
1256
+ status: string;
1257
+ failedLane: number | null;
1258
+ failureReason: string | null;
1259
+ }>;
1260
+ /** Segment-level outcomes (when segment tracking is available). */
1261
+ segmentOutcomes: {
1262
+ totalSegments: number;
1263
+ succeeded: number;
1264
+ failed: number;
1265
+ stalled: number;
1266
+ skipped: number;
1267
+ running: number;
1268
+ pending: number;
1269
+ multiSegmentTasks: Array<{
1270
+ taskId: string;
1271
+ totalSegments: number;
1272
+ terminalSegments: number;
1273
+ succeeded: number;
1274
+ failed: number;
1275
+ stalled: number;
1276
+ skipped: number;
1277
+ running: number;
1278
+ pending: number;
1279
+ }>;
1280
+ } | null;
1281
+ /** Audit trail entries for the batch */
1282
+ auditEntries: AuditTrailEntry[];
1283
+ /** Tier 0 events from events.jsonl (recovery attempts, successes, exhausted, escalations) */
1284
+ tier0Events: Tier0EventSummary[];
1285
+ /** Errors accumulated during the batch */
1286
+ errors: string[];
1287
+ }
1288
+
1289
+ /**
1290
+ * Compact representation of a Tier 0 event for batch summary display.
1291
+ *
1292
+ * Extracted from events.jsonl, filtered to tier0_* event types and
1293
+ * the current batchId.
1294
+ *
1295
+ * @since TP-043
1296
+ */
1297
+ export interface Tier0EventSummary {
1298
+ /** ISO 8601 timestamp */
1299
+ timestamp: string;
1300
+ /** Event type (tier0_recovery_attempt, tier0_recovery_success, etc.) */
1301
+ type: string;
1302
+ /** Recovery pattern being applied */
1303
+ pattern: string;
1304
+ /** Current attempt number (1-based) */
1305
+ attempt: number;
1306
+ /** Maximum attempts allowed */
1307
+ maxAttempts: number;
1308
+ /** Affected task ID (if task-scoped) */
1309
+ taskId?: string;
1310
+ /** Resolution description (for success events) */
1311
+ resolution?: string;
1312
+ /** Error message (for exhausted events) */
1313
+ error?: string;
1314
+ /** Suggested remediation (for exhausted events) */
1315
+ suggestion?: string;
1316
+ /** Affected task IDs (for escalation context) */
1317
+ affectedTaskIds?: string[];
1318
+ }
1319
+
1320
+ /**
1321
+ * Tier 0 event types relevant to batch summary incidents.
1322
+ *
1323
+ * @since TP-043
1324
+ */
1325
+ const TIER0_SUMMARY_TYPES = new Set([
1326
+ "tier0_recovery_attempt",
1327
+ "tier0_recovery_success",
1328
+ "tier0_recovery_exhausted",
1329
+ "tier0_escalation",
1330
+ ]);
1331
+
1332
+ /**
1333
+ * Read Tier 0 events from events.jsonl, filtered by batchId.
1334
+ *
1335
+ * Parses each line as JSON, filters for tier0_* event types matching
1336
+ * the given batchId. Returns compact summaries sorted by timestamp.
1337
+ *
1338
+ * Best-effort: returns empty array if file doesn't exist or parsing fails.
1339
+ * Reuses the same parsing pattern as the event tailer (supervisor.ts:2493+).
1340
+ *
1341
+ * @param stateRoot - Root path for .pi/ state directory
1342
+ * @param batchId - Batch ID to filter events
1343
+ * @returns Array of Tier 0 event summaries (chronological order)
1344
+ *
1345
+ * @since TP-043
1346
+ */
1347
+ export function readTier0EventsForBatch(stateRoot: string, batchId: string): Tier0EventSummary[] {
1348
+ const eventsPath = join(stateRoot, ".pi", "supervisor", "events.jsonl");
1349
+ if (!existsSync(eventsPath)) return [];
1350
+
1351
+ try {
1352
+ const raw = readFileSync(eventsPath, "utf-8").trim();
1353
+ if (!raw) return [];
1354
+
1355
+ const results: Tier0EventSummary[] = [];
1356
+
1357
+ for (const line of raw.split("\n")) {
1358
+ const trimmed = line.trim();
1359
+ if (!trimmed) continue;
1360
+ try {
1361
+ const parsed = JSON.parse(trimmed);
1362
+ // Must match batchId and be a Tier 0 event type
1363
+ if (parsed.batchId !== batchId) continue;
1364
+ if (!TIER0_SUMMARY_TYPES.has(parsed.type)) continue;
1365
+
1366
+ results.push({
1367
+ timestamp: parsed.timestamp ?? "",
1368
+ type: parsed.type,
1369
+ pattern: parsed.pattern ?? "unknown",
1370
+ attempt: parsed.attempt ?? 0,
1371
+ maxAttempts: parsed.maxAttempts ?? 0,
1372
+ ...(parsed.taskId ? { taskId: parsed.taskId } : {}),
1373
+ ...(parsed.resolution ? { resolution: parsed.resolution } : {}),
1374
+ ...(parsed.error ? { error: parsed.error } : {}),
1375
+ ...(parsed.suggestion ? { suggestion: parsed.suggestion } : {}),
1376
+ ...(parsed.affectedTaskIds?.length ? { affectedTaskIds: parsed.affectedTaskIds } : {}),
1377
+ });
1378
+ } catch {
1379
+ // Skip malformed lines
1380
+ }
1381
+ }
1382
+
1383
+ return results;
1384
+ } catch {
1385
+ return [];
1386
+ }
1387
+ }
1388
+
1389
+ /**
1390
+ * Format a duration in milliseconds to a human-readable string.
1391
+ *
1392
+ * @since TP-043
1393
+ */
1394
+ function formatDurationMs(ms: number): string {
1395
+ if (ms < 0) ms = 0;
1396
+ const totalSecs = Math.floor(ms / 1000);
1397
+ if (totalSecs < 60) return `${totalSecs}s`;
1398
+ const mins = Math.floor(totalSecs / 60);
1399
+ const secs = totalSecs % 60;
1400
+ if (mins < 60) return `${mins}m${secs > 0 ? ` ${secs}s` : ""}`;
1401
+ const hours = Math.floor(mins / 60);
1402
+ const remainMins = mins % 60;
1403
+ return `${hours}h${remainMins > 0 ? ` ${remainMins}m` : ""}`;
1404
+ }
1405
+
1406
+ /**
1407
+ * Collect summary data from runtime batch state.
1408
+ *
1409
+ * Gathers data from OrchBatchRuntimeState, BatchDiagnostics, merge results,
1410
+ * and the audit trail. This function reads state — the formatter
1411
+ * (`formatBatchSummary`) is pure.
1412
+ *
1413
+ * @param batchState - Runtime batch state
1414
+ * @param stateRoot - Root path for .pi/ state directory
1415
+ * @param diagnostics - Batch diagnostics (taskExits, batchCost) or null
1416
+ * @param mergeResults - Persisted merge results or empty array
1417
+ * @returns Summary data ready for formatting
1418
+ *
1419
+ * @since TP-043
1420
+ */
1421
+
1422
+ /**
1423
+ * TP-115: Compute batch cost from V2 lane snapshots.
1424
+ * Reads .pi/runtime/{batchId}/lanes/*.json and sums worker + reviewer costUsd.
1425
+ * Returns 0 if no V2 data exists.
1426
+ * @since TP-115
1427
+ */
1428
+ function computeV2BatchCost(stateRoot: string, batchId: string): number {
1429
+ try {
1430
+ const lanesDir = join(stateRoot, ".pi", "runtime", batchId, "lanes");
1431
+ if (!existsSync(lanesDir)) return 0;
1432
+ const files = readdirSync(lanesDir).filter((f) => f.startsWith("lane-") && f.endsWith(".json"));
1433
+ let total = 0;
1434
+ for (const f of files) {
1435
+ try {
1436
+ const snap = JSON.parse(readFileSync(join(lanesDir, f), "utf-8"));
1437
+ total += snap.worker?.costUsd || 0;
1438
+ total += snap.reviewer?.costUsd || 0;
1439
+ } catch {
1440
+ /* skip */
1441
+ }
1442
+ }
1443
+ return total;
1444
+ } catch {
1445
+ return 0;
1446
+ }
1447
+ }
1448
+
1449
+ export function collectBatchSummaryData(
1450
+ batchState: OrchBatchRuntimeState,
1451
+ stateRoot: string,
1452
+ diagnostics?: {
1453
+ taskExits: Record<string, { classification: string; cost: number; durationSec: number }>;
1454
+ batchCost: number;
1455
+ } | null,
1456
+ mergeResults?: Array<{
1457
+ waveIndex: number;
1458
+ status: string;
1459
+ failedLane: number | null;
1460
+ failureReason: string | null;
1461
+ }>,
1462
+ ): BatchSummaryData {
1463
+ // Read audit trail for incidents
1464
+ const auditEntries = readAuditTrail(stateRoot, { batchId: batchState.batchId });
1465
+
1466
+ // Read Tier 0 events from events.jsonl for recovery/escalation incidents (R003)
1467
+ const tier0Events = readTier0EventsForBatch(stateRoot, batchState.batchId);
1468
+
1469
+ // Extract wave results (may not exist if batch failed during planning)
1470
+ const waveResults = (batchState.waveResults || []).map((wr) => ({
1471
+ waveIndex: wr.waveIndex,
1472
+ startedAt: wr.startedAt,
1473
+ endedAt: wr.endedAt,
1474
+ succeededTaskIds: wr.succeededTaskIds || [],
1475
+ failedTaskIds: wr.failedTaskIds || [],
1476
+ skippedTaskIds: wr.skippedTaskIds || [],
1477
+ overallStatus: wr.overallStatus || "unknown",
1478
+ }));
1479
+
1480
+ const segmentRecords = batchState.segments || [];
1481
+ let segmentOutcomes: BatchSummaryData["segmentOutcomes"] = null;
1482
+ if (segmentRecords.length > 0) {
1483
+ const byTaskId = new Map<string, typeof segmentRecords>();
1484
+ for (const segment of segmentRecords) {
1485
+ const existing = byTaskId.get(segment.taskId) || [];
1486
+ existing.push(segment);
1487
+ byTaskId.set(segment.taskId, existing);
1488
+ }
1489
+
1490
+ const multiSegmentTasks: NonNullable<BatchSummaryData["segmentOutcomes"]>["multiSegmentTasks"] =
1491
+ [];
1492
+ for (const [taskId, taskSegments] of [...byTaskId.entries()].sort((a, b) =>
1493
+ a[0].localeCompare(b[0]),
1494
+ )) {
1495
+ if (taskSegments.length <= 1) continue;
1496
+ const succeeded = taskSegments.filter((segment) => segment.status === "succeeded").length;
1497
+ const failed = taskSegments.filter((segment) => segment.status === "failed").length;
1498
+ const stalled = taskSegments.filter((segment) => segment.status === "stalled").length;
1499
+ const skipped = taskSegments.filter((segment) => segment.status === "skipped").length;
1500
+ const running = taskSegments.filter((segment) => segment.status === "running").length;
1501
+ const pending = taskSegments.filter((segment) => segment.status === "pending").length;
1502
+ const terminalSegments = succeeded + failed + stalled + skipped;
1503
+ multiSegmentTasks.push({
1504
+ taskId,
1505
+ totalSegments: taskSegments.length,
1506
+ terminalSegments,
1507
+ succeeded,
1508
+ failed,
1509
+ stalled,
1510
+ skipped,
1511
+ running,
1512
+ pending,
1513
+ });
1514
+ }
1515
+
1516
+ segmentOutcomes = {
1517
+ totalSegments: segmentRecords.length,
1518
+ succeeded: segmentRecords.filter((segment) => segment.status === "succeeded").length,
1519
+ failed: segmentRecords.filter((segment) => segment.status === "failed").length,
1520
+ stalled: segmentRecords.filter((segment) => segment.status === "stalled").length,
1521
+ skipped: segmentRecords.filter((segment) => segment.status === "skipped").length,
1522
+ running: segmentRecords.filter((segment) => segment.status === "running").length,
1523
+ pending: segmentRecords.filter((segment) => segment.status === "pending").length,
1524
+ multiSegmentTasks,
1525
+ };
1526
+ }
1527
+
1528
+ return {
1529
+ batchId: batchState.batchId,
1530
+ phase: batchState.phase,
1531
+ startedAt: batchState.startedAt,
1532
+ endedAt: batchState.endedAt,
1533
+ totalTasks: batchState.totalTasks,
1534
+ succeededTasks: batchState.succeededTasks,
1535
+ failedTasks: batchState.failedTasks,
1536
+ skippedTasks: batchState.skippedTasks,
1537
+ blockedTasks: batchState.blockedTasks,
1538
+ batchCost:
1539
+ (diagnostics?.batchCost ?? 0) > 0
1540
+ ? diagnostics!.batchCost
1541
+ : computeV2BatchCost(stateRoot, batchState.batchId),
1542
+ wavePlan: [], // Not directly available on runtime state — use waveResults
1543
+ waveResults,
1544
+ taskExits: diagnostics?.taskExits ?? {},
1545
+ mergeResults: mergeResults ?? [],
1546
+ segmentOutcomes,
1547
+ auditEntries,
1548
+ tier0Events,
1549
+ errors: batchState.errors || [],
1550
+ };
1551
+ }
1552
+
1553
+ /**
1554
+ * Format a batch summary as a structured markdown string.
1555
+ *
1556
+ * Pure function — no I/O, no side effects. Follows the format specified
1557
+ * in spec §9.2: header with duration/cost/result, wave timeline, incidents,
1558
+ * recommendations, and cost breakdown by wave.
1559
+ *
1560
+ * When data is unavailable (no diagnostics, no audit trail, etc.), sections
1561
+ * are emitted with "Not available" rather than omitted — ensuring a complete
1562
+ * skeleton is always produced.
1563
+ *
1564
+ * @param data - Collected batch summary data
1565
+ * @returns Formatted markdown string
1566
+ *
1567
+ * @since TP-043
1568
+ */
1569
+ export function formatBatchSummary(data: BatchSummaryData): string {
1570
+ const lines: string[] = [];
1571
+
1572
+ // ── Header ───────────────────────────────────────────────────
1573
+ lines.push(`# Batch Summary: ${data.batchId}`);
1574
+ lines.push("");
1575
+
1576
+ // Duration
1577
+ const duration =
1578
+ data.endedAt && data.startedAt ? formatDurationMs(data.endedAt - data.startedAt) : "In progress";
1579
+ lines.push(`**Duration:** ${duration}`);
1580
+
1581
+ // Cost
1582
+ if (data.batchCost > 0) {
1583
+ lines.push(`**Cost:** $${data.batchCost.toFixed(2)}`);
1584
+ } else {
1585
+ lines.push(`**Cost:** Not available`);
1586
+ }
1587
+
1588
+ // Result
1589
+ const resultParts: string[] = [];
1590
+ resultParts.push(`${data.succeededTasks}/${data.totalTasks} tasks succeeded`);
1591
+ if (data.failedTasks > 0) resultParts.push(`${data.failedTasks} failed`);
1592
+ if (data.skippedTasks > 0) resultParts.push(`${data.skippedTasks} skipped`);
1593
+ if (data.blockedTasks > 0) resultParts.push(`${data.blockedTasks} blocked`);
1594
+ lines.push(`**Result:** ${resultParts.join(", ")}`);
1595
+ lines.push(`**Phase:** ${data.phase}`);
1596
+ lines.push("");
1597
+
1598
+ // ── Wave Timeline ────────────────────────────────────────────
1599
+ lines.push("## Wave Timeline");
1600
+ lines.push("");
1601
+
1602
+ if (data.waveResults.length === 0) {
1603
+ lines.push("No wave data available.");
1604
+ } else {
1605
+ for (const wave of data.waveResults) {
1606
+ const waveNum = wave.waveIndex + 1;
1607
+ const taskCount =
1608
+ wave.succeededTaskIds.length + wave.failedTaskIds.length + wave.skippedTaskIds.length;
1609
+ const waveDuration = formatDurationMs(wave.endedAt - wave.startedAt);
1610
+
1611
+ // Check for merge result for this wave
1612
+ const mergeResult = data.mergeResults.find((mr) => mr.waveIndex === wave.waveIndex);
1613
+ let mergeInfo = "";
1614
+ if (mergeResult) {
1615
+ if (mergeResult.status === "succeeded") {
1616
+ mergeInfo = " ✅";
1617
+ } else if (mergeResult.status === "failed") {
1618
+ mergeInfo = ` ❌ (merge failed: ${mergeResult.failureReason || "unknown"})`;
1619
+ } else if (mergeResult.status === "partial") {
1620
+ mergeInfo = ` ⚠️ (partial merge)`;
1621
+ }
1622
+ }
1623
+
1624
+ const statusIcon =
1625
+ wave.overallStatus === "succeeded"
1626
+ ? "✅"
1627
+ : wave.overallStatus === "failed"
1628
+ ? "❌"
1629
+ : wave.overallStatus === "partial"
1630
+ ? "⚠️"
1631
+ : wave.overallStatus === "aborted"
1632
+ ? "🛑"
1633
+ : "❓";
1634
+
1635
+ lines.push(`- Wave ${waveNum} (${taskCount} tasks): ${waveDuration} ${statusIcon}${mergeInfo}`);
1636
+
1637
+ // Show failed tasks inline
1638
+ if (wave.failedTaskIds.length > 0) {
1639
+ lines.push(` - Failed: ${wave.failedTaskIds.join(", ")}`);
1640
+ }
1641
+ }
1642
+ }
1643
+ lines.push("");
1644
+
1645
+ // ── Segment Outcomes ─────────────────────────────────────────
1646
+ lines.push("## Segment Outcomes");
1647
+ lines.push("");
1648
+ if (!data.segmentOutcomes) {
1649
+ lines.push("Segment data not available.");
1650
+ } else if (data.segmentOutcomes.multiSegmentTasks.length === 0) {
1651
+ lines.push(
1652
+ `No multi-segment task outcomes recorded (${data.segmentOutcomes.totalSegments} segment record(s) total).`,
1653
+ );
1654
+ } else {
1655
+ const statusParts = [
1656
+ `${data.segmentOutcomes.succeeded} succeeded`,
1657
+ `${data.segmentOutcomes.failed} failed`,
1658
+ ];
1659
+ if (data.segmentOutcomes.running > 0) statusParts.push(`${data.segmentOutcomes.running} running`);
1660
+ if (data.segmentOutcomes.pending > 0) statusParts.push(`${data.segmentOutcomes.pending} pending`);
1661
+ if (data.segmentOutcomes.skipped > 0) statusParts.push(`${data.segmentOutcomes.skipped} skipped`);
1662
+ if (data.segmentOutcomes.stalled > 0) statusParts.push(`${data.segmentOutcomes.stalled} stalled`);
1663
+ lines.push(`- **Tracked segments:** ${data.segmentOutcomes.totalSegments}`);
1664
+ lines.push(`- **Status mix:** ${statusParts.join(", ")}`);
1665
+ lines.push(`- **Multi-segment tasks:** ${data.segmentOutcomes.multiSegmentTasks.length}`);
1666
+ for (const task of data.segmentOutcomes.multiSegmentTasks) {
1667
+ const taskParts = [`${task.succeeded}✓`, `${task.failed}✗`];
1668
+ if (task.running > 0) taskParts.push(`${task.running} running`);
1669
+ if (task.pending > 0) taskParts.push(`${task.pending} pending`);
1670
+ if (task.skipped > 0) taskParts.push(`${task.skipped} skipped`);
1671
+ if (task.stalled > 0) taskParts.push(`${task.stalled} stalled`);
1672
+ lines.push(
1673
+ ` - ${task.taskId}: ${task.terminalSegments}/${task.totalSegments} terminal (${taskParts.join(", ")})`,
1674
+ );
1675
+ }
1676
+ }
1677
+ lines.push("");
1678
+
1679
+ // ── Incidents & Recoveries ───────────────────────────────────
1680
+ lines.push("## Incidents");
1681
+ lines.push("");
1682
+
1683
+ // Extract incidents from audit trail: non-diagnostic actions
1684
+ const incidents = data.auditEntries.filter(
1685
+ (e) => e.classification !== "diagnostic" && e.result !== "pending",
1686
+ );
1687
+
1688
+ const hasTier0Events = data.tier0Events.length > 0;
1689
+ const hasAuditIncidents = incidents.length > 0;
1690
+ const hasErrors = data.errors.length > 0;
1691
+
1692
+ if (!hasAuditIncidents && !hasTier0Events && !hasErrors) {
1693
+ lines.push("No incidents recorded.");
1694
+ } else {
1695
+ // ── Tier 0 Recovery Events (from events.jsonl) ───────────
1696
+ if (hasTier0Events) {
1697
+ lines.push("### Tier 0 Recoveries");
1698
+ lines.push("");
1699
+
1700
+ // Group Tier 0 events by pattern for readability
1701
+ const byPattern = new Map<string, typeof data.tier0Events>();
1702
+ for (const evt of data.tier0Events) {
1703
+ const key = evt.pattern;
1704
+ if (!byPattern.has(key)) byPattern.set(key, []);
1705
+ byPattern.get(key)!.push(evt);
1706
+ }
1707
+
1708
+ for (const [pattern, events] of byPattern) {
1709
+ const attempts = events.filter((e) => e.type === "tier0_recovery_attempt").length;
1710
+ const successes = events.filter((e) => e.type === "tier0_recovery_success").length;
1711
+ const exhausted = events.filter((e) => e.type === "tier0_recovery_exhausted").length;
1712
+ const escalations = events.filter((e) => e.type === "tier0_escalation").length;
1713
+
1714
+ const statusIcon = exhausted > 0 || escalations > 0 ? "❌" : successes > 0 ? "✅" : "⏳";
1715
+
1716
+ lines.push(
1717
+ `- **${pattern}** ${statusIcon} — ${attempts} attempt(s), ${successes} success(es), ${exhausted} exhausted`,
1718
+ );
1719
+
1720
+ // Show affected tasks
1721
+ const taskIds = new Set<string>();
1722
+ for (const evt of events) {
1723
+ if (evt.taskId) taskIds.add(evt.taskId);
1724
+ if (evt.affectedTaskIds) {
1725
+ for (const tid of evt.affectedTaskIds) taskIds.add(tid);
1726
+ }
1727
+ }
1728
+ if (taskIds.size > 0) {
1729
+ lines.push(` - Affected tasks: ${[...taskIds].join(", ")}`);
1730
+ }
1731
+
1732
+ // Show escalation details
1733
+ for (const evt of events.filter((e) => e.type === "tier0_escalation")) {
1734
+ if (evt.suggestion) {
1735
+ lines.push(` - Escalation: ${evt.suggestion}`);
1736
+ }
1737
+ }
1738
+
1739
+ // Show resolution details
1740
+ for (const evt of events.filter((e) => e.type === "tier0_recovery_success")) {
1741
+ if (evt.resolution) {
1742
+ lines.push(` - Resolution: ${evt.resolution}`);
1743
+ }
1744
+ }
1745
+
1746
+ // Show error details for exhausted
1747
+ for (const evt of events.filter((e) => e.type === "tier0_recovery_exhausted")) {
1748
+ if (evt.error) {
1749
+ lines.push(` - Error: ${evt.error}`);
1750
+ }
1751
+ }
1752
+ }
1753
+ lines.push("");
1754
+ }
1755
+
1756
+ // ── Supervisor Actions (from audit trail) ────────────────
1757
+ if (hasAuditIncidents) {
1758
+ if (hasTier0Events) {
1759
+ lines.push("### Supervisor Actions");
1760
+ lines.push("");
1761
+ }
1762
+
1763
+ let incidentNum = 0;
1764
+ for (const entry of incidents) {
1765
+ incidentNum++;
1766
+ const resultIcon =
1767
+ entry.result === "success"
1768
+ ? "✅"
1769
+ : entry.result === "failure"
1770
+ ? "❌"
1771
+ : entry.result === "skipped"
1772
+ ? "⏭️"
1773
+ : "❓";
1774
+ lines.push(`${incidentNum}. **${entry.action}** (${entry.classification}) ${resultIcon}`);
1775
+ lines.push(` ${entry.context}`);
1776
+ if (entry.detail && entry.detail !== entry.context) {
1777
+ lines.push(` Result: ${entry.detail}`);
1778
+ }
1779
+ if (entry.durationMs !== undefined) {
1780
+ lines.push(` Duration: ${formatDurationMs(entry.durationMs)}`);
1781
+ }
1782
+ }
1783
+ lines.push("");
1784
+ }
1785
+
1786
+ // Add errors that weren't captured in audit trail
1787
+ if (hasErrors) {
1788
+ lines.push("### Errors");
1789
+ for (const error of data.errors) {
1790
+ lines.push(`- ${error}`);
1791
+ }
1792
+ }
1793
+ }
1794
+ lines.push("");
1795
+
1796
+ // ── Recommendations ──────────────────────────────────────────
1797
+ lines.push("## Recommendations");
1798
+ lines.push("");
1799
+
1800
+ const recommendations: string[] = [];
1801
+
1802
+ // Timeout recommendations: look for merge failures in audit trail
1803
+ const mergeFailures = data.mergeResults.filter((mr) => mr.status === "failed");
1804
+ if (mergeFailures.length > 0) {
1805
+ recommendations.push(
1806
+ "- Consider increasing `merge.timeoutMinutes` — merge failures were detected during this batch.",
1807
+ );
1808
+ }
1809
+
1810
+ // Failure rate recommendations
1811
+ if (data.totalTasks > 0 && data.failedTasks > 0) {
1812
+ const failureRate = data.failedTasks / data.totalTasks;
1813
+ if (failureRate > 0.3) {
1814
+ recommendations.push(
1815
+ "- High failure rate (" +
1816
+ Math.round(failureRate * 100) +
1817
+ "%) — consider reducing task scope or adding more context to PROMPT.md files.",
1818
+ );
1819
+ }
1820
+ }
1821
+
1822
+ // Long-running task recommendations
1823
+ const longTasks = Object.entries(data.taskExits).filter(([, exit]) => exit.durationSec > 3600);
1824
+ if (longTasks.length > 0) {
1825
+ const names = longTasks.map(([id]) => id).join(", ");
1826
+ recommendations.push(
1827
+ `- Long-running tasks detected (${names}): ${longTasks.length} task(s) exceeded 1 hour — consider splitting into smaller tasks.`,
1828
+ );
1829
+ }
1830
+
1831
+ // Recovery recommendations — check both audit trail and Tier 0 events
1832
+ const recoveryExhaustedAudit = data.auditEntries.filter(
1833
+ (e) =>
1834
+ e.action === "tier0_recovery_exhausted" ||
1835
+ (e.classification === "tier0_known" && e.result === "failure"),
1836
+ );
1837
+ const recoveryExhaustedTier0 = data.tier0Events.filter(
1838
+ (e) => e.type === "tier0_recovery_exhausted",
1839
+ );
1840
+ const escalationsTier0 = data.tier0Events.filter((e) => e.type === "tier0_escalation");
1841
+ if (recoveryExhaustedAudit.length > 0 || recoveryExhaustedTier0.length > 0) {
1842
+ recommendations.push(
1843
+ "- Recovery budget was exhausted for some issues — review recurring failures and consider addressing root causes.",
1844
+ );
1845
+ }
1846
+ if (escalationsTier0.length > 0) {
1847
+ const uniqueSuggestions = [...new Set(escalationsTier0.map((e) => e.suggestion).filter(Boolean))];
1848
+ if (uniqueSuggestions.length > 0) {
1849
+ for (const suggestion of uniqueSuggestions) {
1850
+ recommendations.push(`- Tier 0 escalation: ${suggestion}`);
1851
+ }
1852
+ }
1853
+ }
1854
+
1855
+ // Blocked tasks recommendations
1856
+ if (data.blockedTasks > 0) {
1857
+ recommendations.push(
1858
+ `- ${data.blockedTasks} task(s) were blocked due to upstream failures — fix failed tasks and re-run with \`/orch-resume\`.`,
1859
+ );
1860
+ }
1861
+
1862
+ if (recommendations.length === 0) {
1863
+ lines.push("No recommendations — batch ran smoothly.");
1864
+ } else {
1865
+ for (const rec of recommendations) {
1866
+ lines.push(rec);
1867
+ }
1868
+ }
1869
+ lines.push("");
1870
+
1871
+ // ── Cost Breakdown by Wave ───────────────────────────────────
1872
+ lines.push("## Cost Breakdown");
1873
+ lines.push("");
1874
+
1875
+ if (Object.keys(data.taskExits).length === 0) {
1876
+ lines.push("Cost data not available (no telemetry recorded).");
1877
+ } else {
1878
+ // Build per-wave cost table
1879
+ lines.push("| Wave | Tasks | Cost | Duration |");
1880
+ lines.push("|------|-------|------|----------|");
1881
+
1882
+ let totalCost = 0;
1883
+ for (const wave of data.waveResults) {
1884
+ const waveNum = wave.waveIndex + 1;
1885
+ const allTaskIds = [...wave.succeededTaskIds, ...wave.failedTaskIds, ...wave.skippedTaskIds];
1886
+ let waveCost = 0;
1887
+ let waveDurationSec = 0;
1888
+
1889
+ for (const taskId of allTaskIds) {
1890
+ const exit = data.taskExits[taskId];
1891
+ if (exit) {
1892
+ waveCost += exit.cost;
1893
+ waveDurationSec += exit.durationSec;
1894
+ }
1895
+ }
1896
+
1897
+ totalCost += waveCost;
1898
+ const waveDurationStr = formatDurationMs(waveDurationSec * 1000);
1899
+ lines.push(
1900
+ `| ${waveNum} | ${allTaskIds.length} | $${waveCost.toFixed(2)} | ${waveDurationStr} |`,
1901
+ );
1902
+ }
1903
+
1904
+ lines.push(
1905
+ `| **Total** | **${data.totalTasks}** | **$${totalCost.toFixed(2)}** | **${duration}** |`,
1906
+ );
1907
+ }
1908
+ lines.push("");
1909
+
1910
+ // ── Footer ───────────────────────────────────────────────────
1911
+ lines.push("---");
1912
+ lines.push(`*Generated at ${new Date().toISOString()}*`);
1913
+
1914
+ return lines.join("\n");
1915
+ }
1916
+
1917
+ /**
1918
+ * Generate and write the batch summary file.
1919
+ *
1920
+ * Collects data from the runtime batch state, formats it, and writes to
1921
+ * `.pi/supervisor/{opId}-{batchId}-summary.md`.
1922
+ *
1923
+ * Best-effort and non-fatal: if the file cannot be written, the error is
1924
+ * swallowed. The caller should also present the summary in conversation.
1925
+ *
1926
+ * @param batchState - Runtime batch state
1927
+ * @param stateRoot - Root path for .pi/ state directory
1928
+ * @param opId - Operator identifier (for file naming)
1929
+ * @param diagnostics - Batch diagnostics or null
1930
+ * @param mergeResults - Persisted merge results or empty array
1931
+ * @returns The formatted summary markdown string (for conversation presentation)
1932
+ *
1933
+ * @since TP-043
1934
+ */
1935
+ export function generateBatchSummary(
1936
+ batchState: OrchBatchRuntimeState,
1937
+ stateRoot: string,
1938
+ opId: string,
1939
+ diagnostics?: {
1940
+ taskExits: Record<string, { classification: string; cost: number; durationSec: number }>;
1941
+ batchCost: number;
1942
+ } | null,
1943
+ mergeResults?: Array<{
1944
+ waveIndex: number;
1945
+ status: string;
1946
+ failedLane: number | null;
1947
+ failureReason: string | null;
1948
+ }>,
1949
+ ): string {
1950
+ const data = collectBatchSummaryData(batchState, stateRoot, diagnostics, mergeResults);
1951
+ const markdown = formatBatchSummary(data);
1952
+
1953
+ // Write to file — best-effort, non-fatal
1954
+ try {
1955
+ const dir = join(stateRoot, ".pi", "supervisor");
1956
+ if (!existsSync(dir)) {
1957
+ mkdirSync(dir, { recursive: true });
1958
+ }
1959
+ const filename = `${opId}-${batchState.batchId}-summary.md`;
1960
+ const filepath = join(dir, filename);
1961
+ writeFileSync(filepath, markdown, "utf-8");
1962
+ } catch {
1963
+ // Best-effort: file write failure must not block summary presentation
1964
+ }
1965
+
1966
+ return markdown;
1967
+ }
1968
+
1969
+ /**
1970
+ * Present a batch summary to the operator via a supervisor message.
1971
+ *
1972
+ * Generates the summary file and sends a concise version in conversation.
1973
+ * The full summary is available in the written file.
1974
+ *
1975
+ * @param pi - ExtensionAPI for sending messages
1976
+ * @param batchState - Runtime batch state
1977
+ * @param stateRoot - Root path for .pi/ state directory
1978
+ * @param opId - Operator identifier
1979
+ * @param diagnostics - Batch diagnostics or null
1980
+ * @param mergeResults - Persisted merge results or empty array
1981
+ *
1982
+ * @since TP-043
1983
+ */
1984
+ export function presentBatchSummary(
1985
+ pi: ExtensionAPI,
1986
+ batchState: OrchBatchRuntimeState,
1987
+ stateRoot: string,
1988
+ opId: string,
1989
+ diagnostics?: {
1990
+ taskExits: Record<string, { classification: string; cost: number; durationSec: number }>;
1991
+ batchCost: number;
1992
+ } | null,
1993
+ mergeResults?: Array<{
1994
+ waveIndex: number;
1995
+ status: string;
1996
+ failedLane: number | null;
1997
+ failureReason: string | null;
1998
+ }>,
1999
+ ): void {
2000
+ const summary = generateBatchSummary(batchState, stateRoot, opId, diagnostics, mergeResults);
2001
+
2002
+ // Build a concise conversation message (full details in the file)
2003
+ const duration =
2004
+ batchState.endedAt && batchState.startedAt
2005
+ ? formatDurationMs(batchState.endedAt - batchState.startedAt)
2006
+ : "in progress";
2007
+ // TP-115: Use V2 lane snapshot cost when diagnostics.batchCost is zero
2008
+ const rawCost =
2009
+ (diagnostics?.batchCost ?? 0) > 0
2010
+ ? diagnostics!.batchCost
2011
+ : computeV2BatchCost(stateRoot, batchState.batchId);
2012
+ const cost = rawCost > 0 ? `$${rawCost.toFixed(2)}` : "not tracked";
2013
+ const filename = `${opId}-${batchState.batchId}-summary.md`;
2014
+
2015
+ const conciseText =
2016
+ `📊 **Batch Summary** — ${batchState.batchId}\n\n` +
2017
+ `- **Result:** ${batchState.succeededTasks}/${batchState.totalTasks} tasks succeeded\n` +
2018
+ `- **Duration:** ${duration}\n` +
2019
+ `- **Cost:** ${cost}\n` +
2020
+ (batchState.failedTasks > 0 ? `- **Failed:** ${batchState.failedTasks} task(s)\n` : "") +
2021
+ `\nFull summary written to \`.pi/supervisor/${filename}\`.`;
2022
+
2023
+ pi.sendMessage(
2024
+ {
2025
+ customType: "supervisor-batch-summary",
2026
+ content: [{ type: "text", text: conciseText }],
2027
+ display: `Batch summary: ${batchState.succeededTasks}/${batchState.totalTasks} succeeded`,
2028
+ },
2029
+ { triggerTurn: false },
2030
+ );
2031
+ }
2032
+
2033
+ // ── Supervisor Config Types ──────────────────────────────────────────
2034
+
2035
+ /**
2036
+ * Autonomy level for the supervisor agent.
2037
+ *
2038
+ * Controls how much the supervisor does automatically vs. asking the operator.
2039
+ *
2040
+ * - `interactive`: Ask before any recovery action
2041
+ * - `supervised`: Tier 0 patterns auto, novel recovery asks
2042
+ * - `autonomous`: Handle everything, pause only when stuck
2043
+ *
2044
+ * @since TP-041
2045
+ */
2046
+ export type SupervisorAutonomyLevel = "interactive" | "supervised" | "autonomous";
2047
+
2048
+ /**
2049
+ * Supervisor configuration resolved from project config + global preferences.
2050
+ *
2051
+ * @since TP-041
2052
+ */
2053
+ export interface SupervisorConfig {
2054
+ /** Model to use for supervisor agent. Empty string = inherit session model. */
2055
+ model: string;
2056
+ /** Autonomy level controlling confirmation behavior. */
2057
+ autonomy: SupervisorAutonomyLevel;
2058
+ }
2059
+
2060
+ /** Default supervisor config values. */
2061
+ export const DEFAULT_SUPERVISOR_CONFIG: SupervisorConfig = {
2062
+ model: "",
2063
+ autonomy: "supervised",
2064
+ };
2065
+
2066
+ // ── System Prompt ────────────────────────────────────────────────────
2067
+
2068
+ /**
2069
+ * Path to the supervisor primer markdown file, resolved relative to this
2070
+ * module's directory (extensions/@pi-agents/orchid/).
2071
+ */
2072
+ function resolvePrimerPath(): string {
2073
+ try {
2074
+ const thisDir = dirname(fileURLToPath(import.meta.url));
2075
+ return join(thisDir, "supervisor-primer.md");
2076
+ } catch {
2077
+ // Fallback for environments where import.meta.url is unavailable
2078
+ return join(__dirname, "supervisor-primer.md");
2079
+ }
2080
+ }
2081
+
2082
+ // ── Template Loading (TP-058) ────────────────────────────────────────
2083
+
2084
+ /**
2085
+ * Resolve the path to a base supervisor template shipped with the package.
2086
+ *
2087
+ * Templates live in `<package-root>/templates/agents/`. This function derives
2088
+ * the package root from the extension file's location
2089
+ * (`<package-root>/extensions/@pi-agents/orchid/supervisor.ts`).
2090
+ *
2091
+ * @param name - Template filename without extension (e.g. "supervisor", "supervisor-routing")
2092
+ * @returns Absolute path to the template file
2093
+ *
2094
+ * @since TP-058
2095
+ */
2096
+ function resolveBaseTemplatePath(name: string): string {
2097
+ try {
2098
+ const thisDir = dirname(fileURLToPath(import.meta.url));
2099
+ // thisDir = <package-root>/extensions/@pi-agents/orchid/
2100
+ return join(thisDir, "..", "..", "templates", "agents", `${name}.md`);
2101
+ } catch {
2102
+ return join(__dirname, "..", "..", "templates", "agents", `${name}.md`);
2103
+ }
2104
+ }
2105
+
2106
+ /**
2107
+ * Parse a simple frontmatter+body markdown file.
2108
+ * Returns null if the file doesn't exist or has no frontmatter.
2109
+ *
2110
+ * @since TP-058
2111
+ */
2112
+ function parseSupervisorTemplate(
2113
+ filePath: string,
2114
+ ): { fm: Record<string, string>; body: string } | null {
2115
+ if (!existsSync(filePath)) return null;
2116
+ const raw = readFileSync(filePath, "utf-8").replace(/\r\n/g, "\n");
2117
+ const match = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
2118
+ if (!match) return null;
2119
+ const fm: Record<string, string> = {};
2120
+ for (const line of match[1].split("\n")) {
2121
+ const idx = line.indexOf(":");
2122
+ if (idx > 0) {
2123
+ const key = line.slice(0, idx).trim();
2124
+ if (!key.startsWith("#")) {
2125
+ // Skip commented-out frontmatter
2126
+ fm[key] = line.slice(idx + 1).trim();
2127
+ }
2128
+ }
2129
+ }
2130
+ return { fm, body: match[2].trim() };
2131
+ }
2132
+
2133
+ /**
2134
+ * Load a supervisor template: base (from package) + local override (from project).
2135
+ *
2136
+ * Follows the same composition pattern as `loadAgentDef()`:
2137
+ * - Base template: shipped in `templates/agents/{name}.md`
2138
+ * - Local override: `.pi/agents/{name}.md` in the project
2139
+ * - If local has `standalone: true`, use it exclusively
2140
+ * - Otherwise, compose base + local with a separator
2141
+ *
2142
+ * @param name - Template name (e.g. "supervisor", "supervisor-routing")
2143
+ * @param stateRoot - Root path for .pi/ state directory
2144
+ * @returns The composed template body, or null if no template found
2145
+ *
2146
+ * @since TP-058
2147
+ */
2148
+ export function loadSupervisorTemplate(
2149
+ name: string,
2150
+ stateRoot: string,
2151
+ localName?: string,
2152
+ ): string | null {
2153
+ const basePath = resolveBaseTemplatePath(name);
2154
+ const baseDef = parseSupervisorTemplate(basePath);
2155
+
2156
+ // Load local override from .pi/agents/{localName}.md (defaults to base name)
2157
+ // This allows routing template (base: "supervisor-routing") to share the
2158
+ // same local override as the main supervisor (local: "supervisor").
2159
+ const effectiveLocalName = localName || name;
2160
+ const localPath = stateRoot ? join(stateRoot, ".pi", "agents", `${effectiveLocalName}.md`) : "";
2161
+ const localDef = localPath ? parseSupervisorTemplate(localPath) : null;
2162
+
2163
+ // No base and no local → null (triggers fallback to inline prompt)
2164
+ if (!baseDef && !localDef) return null;
2165
+
2166
+ // Local with standalone: true → use local as-is, ignore base
2167
+ if (localDef?.fm.standalone === "true") {
2168
+ return localDef.body;
2169
+ }
2170
+
2171
+ // Compose base + local
2172
+ const baseBody = baseDef?.body || "";
2173
+ const localBody = localDef?.body || "";
2174
+ if (localBody) {
2175
+ return baseBody + "\n\n---\n\n## Project-Specific Guidance\n\n" + localBody;
2176
+ }
2177
+ return baseBody;
2178
+ }
2179
+
2180
+ /**
2181
+ * Replace `{{variable}}` placeholders in a template string.
2182
+ *
2183
+ * @param template - Template string with `{{key}}` placeholders
2184
+ * @param vars - Key-value map of variable replacements
2185
+ * @returns Template with all known placeholders replaced
2186
+ *
2187
+ * @since TP-058
2188
+ */
2189
+ function replaceTemplateVars(template: string, vars: Record<string, string>): string {
2190
+ return template.replace(/\{\{(\w+)\}\}/g, (match, key) => {
2191
+ return key in vars ? vars[key] : match;
2192
+ });
2193
+ }
2194
+
2195
+ /**
2196
+ * Build the guardrails section dynamically based on integration mode (TP-043).
2197
+ * Extracted as a helper so both the template path and inline fallback can reuse it.
2198
+ * @since TP-058
2199
+ */
2200
+ function buildGuardrailsSection(integrationMode: string): string {
2201
+ if (integrationMode === "supervised" || integrationMode === "auto") {
2202
+ const modeNote =
2203
+ integrationMode === "supervised"
2204
+ ? `**Supervised mode:** Before executing integration, describe your plan and ask the operator for confirmation.`
2205
+ : `**Auto mode:** Execute integration directly. Report the outcome to the operator. Pause only on errors or conflicts.`;
2206
+ return `## What You Must NEVER Do
2207
+
2208
+ 1. Never delete \`.pi/batch-state.json\` without operator approval
2209
+ 2. Never modify task code (files that workers wrote)
2210
+ 3. Never modify PROMPT.md files
2211
+ 4. Never \`git reset --hard\` with uncommitted changes
2212
+ 5. Never skip tasks/waves without telling the operator
2213
+ 6. Never create GitHub releases
2214
+
2215
+ ## Integration Permissions (mode: ${integrationMode})
2216
+
2217
+ You are authorized to perform integration operations after batch completion:
2218
+ - \`git push origin <orch-branch>\` — push the orch branch for PR creation
2219
+ - \`gh pr create\` — create pull requests for integration
2220
+ - \`git merge --ff-only\` or \`git merge --no-edit\` — local branch integration
2221
+ - \`git branch -D <orch-branch>\` — cleanup after successful integration
2222
+
2223
+ ${modeNote}`;
2224
+ }
2225
+ return `## What You Must NEVER Do
2226
+
2227
+ 1. Never \`git push\` to any remote
2228
+ 2. Never delete \`.pi/batch-state.json\` without operator approval
2229
+ 3. Never modify task code (files that workers wrote)
2230
+ 4. Never modify PROMPT.md files
2231
+ 5. Never \`git reset --hard\` with uncommitted changes
2232
+ 6. Never skip tasks/waves without telling the operator
2233
+ 7. Never create PRs or GitHub releases`;
2234
+ }
2235
+
2236
+ /**
2237
+ * Build the autonomy level description for the current autonomy setting.
2238
+ * @since TP-058
2239
+ */
2240
+ function buildAutonomyDescription(autonomyLabel: string): string {
2241
+ switch (autonomyLabel) {
2242
+ case "interactive":
2243
+ return `**Your current level is INTERACTIVE.** ASK the operator before any Tier 0 Known or Destructive action. Explain what you want to do, why, and what the alternatives are. Let the operator decide.`;
2244
+ case "supervised":
2245
+ return `**Your current level is SUPERVISED.** Execute Tier 0 Known patterns automatically (retries, cleanup, session restarts). ASK before Destructive actions (manual merges, state editing, skipping tasks, killing sessions). Always explain what you did and why.`;
2246
+ case "autonomous":
2247
+ return `**Your current level is AUTONOMOUS.** Execute all recovery actions automatically. Pause and summarize only when you're genuinely stuck and cannot resolve the issue. The operator trusts you to make reasonable decisions.`;
2248
+ default:
2249
+ return "";
2250
+ }
2251
+ }
2252
+
2253
+ /**
2254
+ * Build the supervisor system prompt.
2255
+ *
2256
+ * The prompt establishes:
2257
+ * 1. **Identity**: "You are the batch supervisor"
2258
+ * 2. **Context**: Batch metadata, file paths, wave plan
2259
+ * 3. **Capabilities**: Full tool access for monitoring and recovery
2260
+ * 4. **Standing orders**: Monitor events, handle failures, keep operator informed
2261
+ * 5. **Primer reference**: Read supervisor-primer.md for detailed operational knowledge
2262
+ *
2263
+ * The prompt is rebuilt on every LLM turn from the live batchState reference,
2264
+ * ensuring it always reflects the latest batch metadata (including batchId,
2265
+ * wave counts, and task counts that are populated asynchronously by the engine).
2266
+ *
2267
+ * @param batchState - Current batch runtime state (live reference)
2268
+ * @param config - Orchestrator configuration
2269
+ * @param supervisorConfig - Supervisor-specific configuration
2270
+ * @param stateRoot - Root path for .pi/ state directory
2271
+ * @returns The complete system prompt string
2272
+ *
2273
+ * @since TP-041
2274
+ */
2275
+ export function buildSupervisorSystemPrompt(
2276
+ batchState: OrchBatchRuntimeState,
2277
+ config: OrchestratorConfig,
2278
+ supervisorConfig: SupervisorConfig,
2279
+ stateRoot: string,
2280
+ ): string {
2281
+ const primerPath = resolvePrimerPath();
2282
+ const batchStatePath = join(stateRoot, ".pi", "batch-state.json");
2283
+ const eventsPath = join(stateRoot, ".pi", "supervisor", "events.jsonl");
2284
+ const autonomyLabel = supervisorConfig.autonomy;
2285
+
2286
+ // Build wave plan summary
2287
+ const waveSummary =
2288
+ batchState.totalWaves > 0
2289
+ ? `${batchState.currentWaveIndex + 1}/${batchState.totalWaves} waves`
2290
+ : "planning";
2291
+
2292
+ const actionsPath = auditTrailPath(stateRoot);
2293
+ const integrationMode = config.orchestrator.integration;
2294
+
2295
+ // Build dynamic sections
2296
+ const guardrailsSection = buildGuardrailsSection(integrationMode);
2297
+ const autonomyGuidance = buildAutonomyDescription(autonomyLabel);
2298
+
2299
+ // TP-058: Try template-based prompt first, fall back to inline prompt.
2300
+ const template = loadSupervisorTemplate("supervisor", stateRoot);
2301
+ if (template) {
2302
+ const vars: Record<string, string> = {
2303
+ batchId: batchState.batchId || "(initializing — read batch state file)",
2304
+ phase: batchState.phase,
2305
+ baseBranch: batchState.baseBranch,
2306
+ orchBranch: batchState.orchBranch || "(legacy mode)",
2307
+ waveSummary,
2308
+ totalTasks: String(batchState.totalTasks),
2309
+ succeededTasks: String(batchState.succeededTasks),
2310
+ failedTasks: String(batchState.failedTasks),
2311
+ skippedTasks: String(batchState.skippedTasks),
2312
+ blockedTasks: String(batchState.blockedTasks),
2313
+ autonomy: autonomyLabel,
2314
+ batchStatePath,
2315
+ eventsPath,
2316
+ actionsPath,
2317
+ stateRoot,
2318
+ primerPath,
2319
+ guardrailsSection,
2320
+ autonomyGuidance,
2321
+ };
2322
+ return replaceTemplateVars(template, vars);
2323
+ }
2324
+
2325
+ // ── Fallback: inline prompt (backward compatibility when template missing) ──
2326
+ const prompt = `# Supervisor Agent
2327
+
2328
+ You are the **batch supervisor** — a persistent agent that monitors an OrchID
2329
+ orchestration batch, handles failures, and keeps the operator informed.
2330
+
2331
+ ## Identity
2332
+
2333
+ You share this terminal session with the human operator. After \`/orch\` started
2334
+ a batch, you activated to supervise it. The operator can talk to you naturally
2335
+ at any time. You are a senior engineer on call for this batch.
2336
+
2337
+ ## Current Batch Context
2338
+
2339
+ - **Batch ID:** ${batchState.batchId || "(initializing — read batch state file)"}
2340
+ - **Phase:** ${batchState.phase}
2341
+ - **Base branch:** ${batchState.baseBranch}
2342
+ - **Orch branch:** ${batchState.orchBranch || "(legacy mode)"}
2343
+ - **Progress:** ${waveSummary}, ${batchState.totalTasks} total tasks
2344
+ - **Succeeded:** ${batchState.succeededTasks} | **Failed:** ${batchState.failedTasks} | **Skipped:** ${batchState.skippedTasks} | **Blocked:** ${batchState.blockedTasks}
2345
+ - **Autonomy:** ${autonomyLabel}
2346
+
2347
+ ## Key File Paths
2348
+
2349
+ - **Batch state:** \`${batchStatePath}\`
2350
+ - **Engine events:** \`${eventsPath}\`
2351
+ - **Audit trail:** \`${actionsPath}\`
2352
+ - **State root:** \`${stateRoot}\`
2353
+
2354
+ ## Capabilities
2355
+
2356
+ You have full tool access: \`read\`, \`write\`, \`edit\`, \`bash\`, \`grep\`, \`find\`, \`ls\`.
2357
+ Use these to:
2358
+ - Read batch state, STATUS.md files, merge results, event logs
2359
+ - Run git commands for diagnostics and manual merge recovery
2360
+ - Edit batch-state.json for state repairs (when needed)
2361
+ - Manage worker lane execution state (agent status, wrap-up, diagnostics)
2362
+ - Run verification commands (tests)
2363
+
2364
+ ## Standing Orders
2365
+
2366
+ 1. **Monitor engine events.** Periodically read \`${eventsPath}\` to track
2367
+ batch progress. Report significant events to the operator proactively:
2368
+ - Wave starts/completions
2369
+ - Task failures requiring attention
2370
+ - Merge successes/failures
2371
+ - Batch completion
2372
+
2373
+ 2. **Handle failures.** When tasks fail or merges time out, diagnose the
2374
+ issue using the patterns in supervisor-primer.md and take appropriate
2375
+ recovery action based on your autonomy level (${autonomyLabel}).
2376
+
2377
+ 3. **Keep the operator informed.** Provide clear, natural status updates.
2378
+ When the operator asks "how's it going?" — read batch state and summarize.
2379
+
2380
+ 4. **Log all recovery actions** to the audit trail (see Audit Trail section below).
2381
+
2382
+ 5. **Respect your autonomy level** (see Recovery Action Classification below).
2383
+
2384
+ ## Recovery Action Classification
2385
+
2386
+ Every action you take falls into one of three categories:
2387
+
2388
+ ### Diagnostic (always allowed — no confirmation needed)
2389
+ - Reading batch-state.json, STATUS.md, events.jsonl, merge results
2390
+ - Running \`git status\`, \`git log\`, \`git diff\`
2391
+ - Running test suites (\`node --experimental-strip-types --experimental-test-module-mocks --no-warnings --import ./tests/loader.mjs --test ...\`, etc.)
2392
+ - Inspecting active agents and lane status (\`list_active_agents\`, \`read_agent_status\`)
2393
+ - Checking worktree health (\`git worktree list\`)
2394
+ - Reading any file for diagnostics
2395
+
2396
+ ### Tier 0 Known (known recovery patterns)
2397
+ - Triggering graceful wrap-up/retry flow for a stalled worker lane
2398
+ - Cleaning up stale worktrees for retry
2399
+ - Retrying a timed-out merge
2400
+ - Resetting a session name collision
2401
+ - Clearing a git lock file (\`.git/index.lock\`)
2402
+
2403
+ ### Destructive (state mutations, irreversible operations)
2404
+ - Forcing lane/batch termination paths (for example \`orch_abort(hard=true)\`)
2405
+ - Editing batch-state.json fields
2406
+ - Running \`git reset\`, \`git merge\`, \`git checkout -B\`
2407
+ - Removing worktrees (\`git worktree remove\`)
2408
+ - Modifying STATUS.md or .DONE files
2409
+ - Deleting git branches (\`git branch -D\`)
2410
+ - Skipping tasks or waves
2411
+
2412
+ ### Autonomy Decision Table (current level: ${autonomyLabel})
2413
+
2414
+ | Classification | Interactive | Supervised | Autonomous |
2415
+ |----------------|-------------|------------|------------|
2416
+ | Diagnostic | ✅ auto | ✅ auto | ✅ auto |
2417
+ | Tier 0 Known | ❓ ASK | ✅ auto | ✅ auto |
2418
+ | Destructive | ❓ ASK | ❓ ASK | ✅ auto |
2419
+
2420
+ ${autonomyGuidance}
2421
+
2422
+ ## Audit Trail
2423
+
2424
+ Log every recovery action to \`${actionsPath}\` as a single-line JSON entry.
2425
+
2426
+ **Format** (one JSON object per line):
2427
+ \`\`\`json
2428
+ {"ts":"<ISO 8601>","action":"<action_name>","classification":"<diagnostic|tier0_known|destructive>","context":"<why>","command":"<what>","result":"<pending|success|failure|skipped>","detail":"<outcome>","batchId":"${batchState.batchId || "BATCH_ID"}"}
2429
+ \`\`\`
2430
+
2431
+ **Rules:**
2432
+ 1. For **destructive** actions: write a "pending" entry BEFORE executing, then
2433
+ write a result entry AFTER with "success" or "failure" and detail.
2434
+ 2. For **diagnostic** and **tier0_known** actions: write a single result entry
2435
+ AFTER execution.
2436
+ 3. Include optional fields when relevant: \`waveIndex\`, \`laneNumber\`, \`taskId\`, \`durationMs\`.
2437
+ 4. Use the \`bash\` tool to append entries. Example:
2438
+ \`echo '{"ts":"...","action":"merge_retry","classification":"tier0_known","context":"merge timeout on wave 2","command":"git merge --no-ff task/lane-2","result":"success","detail":"merged with 0 conflicts","batchId":"..."}' >> ${actionsPath}\`
2439
+
2440
+ **Why this matters:** When you're taken over by another session or the operator
2441
+ asks "what did you do?", the audit trail is the definitive record.
2442
+
2443
+ ## Operational Knowledge
2444
+
2445
+ **IMPORTANT:** Read \`${primerPath}\` for your complete operational runbook.
2446
+ It contains:
2447
+ - Architecture details and wave lifecycle
2448
+ - Common failure patterns and recovery procedures
2449
+ - Batch state editing guide (safe vs. dangerous edits)
2450
+ - Git operations reference
2451
+ - Communication guidelines
2452
+
2453
+ Read it now before doing anything else. It is your primary reference.
2454
+
2455
+ ${guardrailsSection}
2456
+
2457
+ ## Available Orchestrator Tools
2458
+
2459
+ You can invoke these tools directly — no need to ask the operator or use slash commands:
2460
+
2461
+ - **orch_start(target)** — Start a new batch. Target is \`"all"\` for all pending tasks, or a task area name/path.
2462
+ - **orch_status()** — Check current batch status (phase, wave progress, task counts, elapsed time)
2463
+ - **orch_pause()** — Pause the running batch (current tasks finish, no new tasks start)
2464
+ - **orch_resume(force?)** — Resume a paused or interrupted batch. Use \`force=true\` for stuck batches.
2465
+ - **orch_abort(hard?)** — Abort the running batch. Use \`hard=true\` for immediate kill.
2466
+ - **orch_integrate(mode?, force?, branch?)** — Integrate completed batch into working branch.
2467
+ Modes: \`"fast-forward"\` (default), \`"merge"\`, \`"pr"\`.
2468
+
2469
+ ### When to Use These Tools
2470
+
2471
+ Use tools **proactively** when the situation calls for it:
2472
+ - Operator asks to run tasks or start a batch → call \`orch_start(target="all")\` (or a specific area)
2473
+ - Operator asks "how's it going?" → call \`orch_status()\` first, then summarize
2474
+ - Batch paused due to a failure you diagnosed and fixed → call \`orch_resume()\`
2475
+ - Batch completed successfully → offer to call \`orch_integrate(mode="pr")\` or the operator's preferred mode
2476
+ - Batch is stuck or failing repeatedly → call \`orch_status()\` to diagnose, then \`orch_abort()\` if needed
2477
+ - Need to investigate before more tasks launch → call \`orch_pause()\` first
2478
+
2479
+ These tools are preferred over reading batch-state.json directly because they handle
2480
+ disk fallback, in-memory state, and all edge cases automatically.
2481
+
2482
+ ## Startup Checklist
2483
+
2484
+ Now that you've activated:
2485
+ 1. Read the supervisor primer at \`${primerPath}\`
2486
+ 2. Read \`${batchStatePath}\` for full batch metadata
2487
+ 3. Read \`${eventsPath}\` for any events already emitted
2488
+ 4. Report to the operator: batch status, wave progress, what you're monitoring
2489
+ `;
2490
+
2491
+ return prompt;
2492
+ }
2493
+
2494
+ // ── Routing System Prompt (TP-042) ───────────────────────────────────
2495
+
2496
+ /**
2497
+ * Build the supervisor system prompt for routing mode (no active batch).
2498
+ *
2499
+ * Used when `/orch` is called with no arguments and the supervisor is activated
2500
+ * to guide the operator through onboarding, batch planning, or other
2501
+ * conversational flows. The prompt includes:
2502
+ *
2503
+ * 1. **Identity**: "You are the project supervisor"
2504
+ * 2. **Routing state**: What was detected (no-config, pending-tasks, etc.)
2505
+ * 3. **Script guidance**: Which onboarding/returning-user script to follow
2506
+ * 4. **Primer reference**: Read supervisor-primer.md for detailed scripts
2507
+ * 5. **Capabilities**: Full tool access for project analysis and config generation
2508
+ *
2509
+ * The prompt directs the supervisor to the correct script in the primer based
2510
+ * on the routing state, implementing the Script 1/2/3 trigger discrimination
2511
+ * from spec §14.4.
2512
+ *
2513
+ * @param routingContext - The routing context from /orch no-args detection
2514
+ * @param stateRoot - Root path for .pi/ state directory (may be empty for no-config)
2515
+ * @returns The complete system prompt string
2516
+ *
2517
+ * @since TP-042
2518
+ */
2519
+ export function buildRoutingSystemPrompt(
2520
+ routingContext: SupervisorRoutingContext,
2521
+ stateRoot: string,
2522
+ ): string {
2523
+ const primerPath = resolvePrimerPath();
2524
+
2525
+ // Map routing state to the appropriate script section in the primer
2526
+ const scriptGuidance = buildRoutingScriptGuidance(routingContext.routingState, primerPath);
2527
+
2528
+ // TP-058: Try template-based prompt first, fall back to inline prompt.
2529
+ const template = loadSupervisorTemplate("supervisor-routing", stateRoot, "supervisor");
2530
+ if (template) {
2531
+ const vars: Record<string, string> = {
2532
+ routingState: routingContext.routingState,
2533
+ contextMessage: routingContext.contextMessage,
2534
+ scriptGuidance,
2535
+ primerPath,
2536
+ };
2537
+ return replaceTemplateVars(template, vars);
2538
+ }
2539
+
2540
+ // ── Fallback: inline prompt (backward compatibility when template missing) ──
2541
+ return buildRoutingInlinePrompt(routingContext, primerPath, scriptGuidance);
2542
+ }
2543
+
2544
+ /**
2545
+ * Build the script guidance section for routing prompts.
2546
+ * Contains the per-state instructions that guide the supervisor's behavior.
2547
+ *
2548
+ * @since TP-058
2549
+ */
2550
+ function buildRoutingScriptGuidance(routingState: string, primerPath: string): string {
2551
+ let scriptGuidance: string;
2552
+ switch (routingState) {
2553
+ case "no-config":
2554
+ scriptGuidance = `## Your Mission: Onboarding
2555
+
2556
+ This project has no OrchID configuration. You need to determine which
2557
+ onboarding script to follow from the primer's "Onboarding Scripts" section:
2558
+
2559
+ 1. **Read the primer** at \`${primerPath}\` — specifically the "Onboarding Scripts" section
2560
+ 2. **Analyze the project** to determine its maturity:
2561
+ - No \`.pi/\` directory AND minimal code → **Script 1: First Time Ever** or **Script 2: New/Empty Project**
2562
+ - No \`.pi/\` directory AND substantial code → **Script 3: Established Project**
2563
+ - The scripts describe specific triggers and exploration steps
2564
+ 3. **Follow the matched script** — it guides the conversation, exploration,
2565
+ and artifact generation
2566
+ 4. **Delegate to Script 4** (Task Area Design) and **Script 5** (Git Branching)
2567
+ as sub-flows during onboarding — the main scripts tell you when
2568
+
2569
+ ### Key Onboarding Artifacts to Create
2570
+
2571
+ When the conversation reaches the config generation phase, create ALL of these
2572
+ (idempotent — create only if they don't already exist):
2573
+
2574
+ - \`.pi/orchid-config.json\` — project configuration (task areas, lanes, review level, etc.)
2575
+ - \`{task_area}/CONTEXT.md\` — one per task area, describing scope and conventions
2576
+ - \`.pi/agents/task-worker.md\` — worker prompt overrides (can start empty with a brief comment)
2577
+ - \`.pi/agents/task-reviewer.md\` — reviewer prompt overrides (can start empty with a brief comment)
2578
+ - \`.pi/agents/task-merger.md\` — merger prompt overrides (can start empty with a brief comment)
2579
+ - \`.pi/agents/supervisor.md\` — supervisor prompt overrides (can start empty with a brief comment)
2580
+ - \`.gitignore\` entries — add OrchID working file patterns if not already present
2581
+
2582
+ Use conservative creation: check if each file exists before writing. If files
2583
+ already exist (partial setup), read and merge rather than overwrite.
2584
+
2585
+ ### CRITICAL: Task Area Registration
2586
+
2587
+ **Every task folder MUST be registered in \`.pi/orchid-config.json\` under
2588
+ \`taskRunner.taskAreas\`.** Without registration, \`/orch all\` will fail with
2589
+ "no task areas configured" — even if the folders and tasks physically exist.
2590
+
2591
+ When creating a task folder (e.g., \`orchid-tasks/\`):
2592
+ 1. Create the folder and its \`CONTEXT.md\`
2593
+ 2. Register it in \`.pi/orchid-config.json\`:
2594
+ \`\`\`json
2595
+ {
2596
+ "taskRunner": {
2597
+ "taskAreas": {
2598
+ "general": {
2599
+ "path": "orchid-tasks",
2600
+ "prefix": "OR",
2601
+ "context": "orchid-tasks/CONTEXT.md"
2602
+ }
2603
+ }
2604
+ }
2605
+ }
2606
+ \`\`\`
2607
+ 3. **Verify** by reading the config back to confirm the area is registered
2608
+
2609
+ When creating tasks inside an area, check that the area is registered first.
2610
+ If it's not (e.g., operator created the folder manually), register it before
2611
+ proceeding.
2612
+
2613
+ This also applies when creating tasks later in the conversation — always verify
2614
+ the task area is registered in the config before offering to run \`/orch all\`.`;
2615
+ break;
2616
+
2617
+ case "pending-tasks":
2618
+ scriptGuidance = `## Your Mission: Batch Planning
2619
+
2620
+ This project has OrchID configured and has pending tasks ready to execute.
2621
+ Follow the primer's **"Script 6: Batch Planning"** section (pending-tasks path).
2622
+
2623
+ 1. **Read the primer** at \`${primerPath}\` — specifically Script 6's exploration
2624
+ phase and "pending tasks exist" conversation flow
2625
+ 2. **Review pending tasks** — scan task areas for folders without \`.DONE\` files,
2626
+ read each PROMPT.md header for size/deps/title, list them for the operator
2627
+ 3. **Explain dependencies and wave structure** if tasks have dependency chains
2628
+ 4. **Offer to plan and start a batch** — suggest \`/orch-plan all\` to preview
2629
+ wave breakdown, or \`/orch all\` to start directly
2630
+ 5. **Surface supplementary items** — check CONTEXT.md tech debt sections and
2631
+ GitHub Issues (\`gh issue list\` if available) for additional work to include
2632
+ 6. **Offer a health check** (Script 7) if the operator wants to verify project
2633
+ state before starting`;
2634
+ break;
2635
+
2636
+ case "no-tasks":
2637
+ scriptGuidance = `## Your Mission: Task Creation Guidance
2638
+
2639
+ This project has OrchID configured but no pending tasks.
2640
+ Follow the primer's **"Script 6: Batch Planning"** section
2641
+ (specifically the "no pending tasks" conversation flow).
2642
+
2643
+ 1. **Read the primer** at \`${primerPath}\` — specifically Script 6's exploration
2644
+ phase and "no pending tasks" conversation flow
2645
+ 2. **Run the exploration phase** — scan CONTEXT.md tech debt sections, check
2646
+ GitHub Issues (\`gh issue list\` if available), grep for TODO/FIXME comments
2647
+ 3. **Present a source inventory** — group potential work items by source
2648
+ (GitHub Issues, tech debt, TODOs) with counts
2649
+ 4. **Help the operator create tasks** — offer to generate task packets from
2650
+ GitHub Issues, tech debt items, or a new spec described in conversation
2651
+ 5. **Offer a health check** (Script 7) if the operator prefers to assess
2652
+ project state rather than create tasks
2653
+ 6. **Graceful fallback**: If \`gh\` CLI is unavailable, skip GitHub checks and
2654
+ mention it to the operator — continue with CONTEXT.md and TODO scanning
2655
+
2656
+ ### Important: Task Area Verification
2657
+
2658
+ Before creating any tasks, verify that the target task area folder is registered
2659
+ in \`.pi/orchid-config.json\` under \`taskRunner.taskAreas\`. If it's missing
2660
+ (e.g., the folder exists but was never registered), register it first. Without
2661
+ registration, \`/orch all\` will fail with "no task areas configured."`;
2662
+ break;
2663
+
2664
+ case "completed-batch":
2665
+ scriptGuidance = `## Your Mission: Integration & Retrospective
2666
+
2667
+ A completed batch exists that hasn't been integrated yet.
2668
+
2669
+ 1. **Read the primer** at \`${primerPath}\` — specifically Script 8 (Post-Batch Retrospective)
2670
+ and Script 7 (Health Check) sections
2671
+ 2. **Explain the orch branch model** — work is on the orch branch, not yet on the working branch
2672
+ 3. **Guide the operator** toward \`/orch-integrate\` to bring the batch's work into their branch
2673
+ 4. **Offer to run a health check** (Script 7) if they want to verify state first
2674
+ 5. **Run a retrospective** (Script 8) — read batch-state.json and the audit
2675
+ trail (\`.pi/supervisor/actions.jsonl\`) to summarize batch outcomes, highlight
2676
+ incidents, and recommend improvements. Present this either before or after
2677
+ integration based on what the operator prefers.
2678
+ 6. **Surface next steps** — check for pending tasks and offer to plan the next batch`;
2679
+ break;
2680
+
2681
+ default:
2682
+ scriptGuidance = `## Your Mission: Project Assistance
2683
+
2684
+ Detected state: ${routingState}
2685
+
2686
+ 1. **Read the primer** at \`${primerPath}\`
2687
+ 2. **Assess the situation** and help the operator with their next step
2688
+ 3. **Offer relevant guidance** based on what you discover`;
2689
+ break;
2690
+ }
2691
+
2692
+ return scriptGuidance;
2693
+ }
2694
+
2695
+ /**
2696
+ * Inline fallback for the routing system prompt.
2697
+ * Used when the base template file cannot be found.
2698
+ *
2699
+ * @since TP-058
2700
+ */
2701
+ function buildRoutingInlinePrompt(
2702
+ routingContext: SupervisorRoutingContext,
2703
+ primerPath: string,
2704
+ scriptGuidance: string,
2705
+ ): string {
2706
+ const prompt = `# Project Supervisor
2707
+
2708
+ You are the **project supervisor** — a conversational agent that helps operators
2709
+ set up, plan, and manage their OrchID project. You were activated because the
2710
+ operator typed \`/orch\` without arguments, and I detected the project state.
2711
+
2712
+ ## Identity
2713
+
2714
+ You share this terminal session with the human operator. You are a senior
2715
+ engineer helping them get the most out of OrchID. Be conversational, helpful,
2716
+ and adaptive — follow the scripts as guides, not rigid templates. If the
2717
+ operator wants to skip ahead or go minimal, respect that.
2718
+
2719
+ ## Detected State
2720
+
2721
+ **Routing state:** ${routingContext.routingState}
2722
+ **Context:** ${routingContext.contextMessage}
2723
+
2724
+ ${scriptGuidance}
2725
+
2726
+ ## Capabilities
2727
+
2728
+ You have full tool access: \`read\`, \`write\`, \`edit\`, \`bash\`, \`grep\`, \`find\`, \`ls\`.
2729
+ Use these to:
2730
+ - Analyze project structure (read files, list directories, grep for patterns)
2731
+ - Read existing configuration and docs
2732
+ - Generate configuration files and CONTEXT.md documents
2733
+ - Run git commands for branch analysis
2734
+ - Run \`gh\` CLI commands for GitHub integration (issues, branch protection)
2735
+ - Create task folders and PROMPT.md files
2736
+
2737
+ ### Orchestrator Tools
2738
+
2739
+ You also have orchestrator tools available for batch management:
2740
+ - **orch_start(target)** — Start a new batch (target: "all" or a task area name/path)
2741
+ - **orch_status()** — Check batch status
2742
+ - **orch_resume(force?)** — Resume a paused batch
2743
+ - **orch_integrate(mode?, force?, branch?)** — Integrate completed batch (modes: "fast-forward", "merge", "pr")
2744
+ - **orch_pause()** — Pause running batch
2745
+ - **orch_abort(hard?)** — Abort running batch
2746
+
2747
+ Use these when the conversation leads to batch operations (e.g., starting a batch, integrating a completed batch).
2748
+
2749
+ ## Operational Knowledge
2750
+
2751
+ **IMPORTANT:** Read \`${primerPath}\` for your complete operational runbook.
2752
+ It contains:
2753
+ - Onboarding scripts (Scripts 1-5) with detailed conversation guides
2754
+ - Returning user scripts (Scripts 6-8) for batch planning, health checks, and retrospectives
2755
+ - Project detection heuristics and exploration checklists
2756
+ - Config generation templates and conventions
2757
+
2758
+ Read the relevant script section now before starting the conversation.
2759
+
2760
+ ## Communication Style
2761
+
2762
+ - Be conversational, not robotic — you're having a dialog, not running a wizard
2763
+ - Show what you discover as you explore ("I can see you have a TypeScript project with...")
2764
+ - Ask questions when choices matter, propose defaults when they don't
2765
+ - Summarize what you'll create before writing files — let the operator confirm
2766
+ - If the operator says "just give me defaults", do it and move on
2767
+
2768
+ ## Starting a Batch
2769
+
2770
+ When the operator wants to run pending tasks, use the \`/orch all\` command.
2771
+ You can invoke it directly — it will seamlessly transition you from conversational
2772
+ mode to batch monitoring mode. Examples of operator intent:
2773
+
2774
+ - "run the open tasks" → respond with a brief confirmation, then invoke \`/orch all\`
2775
+ - "start the batch" → invoke \`/orch all\`
2776
+ - "run just the platform tasks" → invoke \`/orch platform\` (with the area name)
2777
+
2778
+ Before starting, you may optionally:
2779
+ - Show a quick summary of pending tasks and wave plan (\`/orch-plan all\`)
2780
+ - Ask for confirmation if the operator's intent was ambiguous
2781
+
2782
+ After \`/orch all\` starts, your system prompt will automatically switch to
2783
+ batch monitoring mode. You'll have full visibility into wave progress, task
2784
+ outcomes, and can handle failures.
2785
+
2786
+ ## What You Must NEVER Do
2787
+
2788
+ 1. Never modify existing code files (only create config/scaffolding)
2789
+ 2. Never \`git push\` to any remote
2790
+ 3. Never overwrite existing config files without asking
2791
+ 4. Never make assumptions about project conventions — detect them
2792
+ `;
2793
+
2794
+ return prompt;
2795
+ }
2796
+
2797
+ // ── Activation ───────────────────────────────────────────────────────
2798
+
2799
+ /**
2800
+ * Supervisor activation state.
2801
+ *
2802
+ * Tracks whether the supervisor is active for the current batch,
2803
+ * preventing duplicate activations and enabling guard logic for
2804
+ * the before_agent_start hook.
2805
+ *
2806
+ * The prompt is rebuilt dynamically each turn from the live batchState
2807
+ * reference, ensuring it always has current metadata (batchId, wave/task
2808
+ * counts are populated asynchronously by the engine after planning).
2809
+ *
2810
+ * @since TP-041
2811
+ */
2812
+ export interface SupervisorState {
2813
+ /** Whether the supervisor is currently active */
2814
+ active: boolean;
2815
+ /** Batch ID the supervisor is monitoring (empty if inactive or pre-planning) */
2816
+ batchId: string;
2817
+ /** Supervisor configuration */
2818
+ config: SupervisorConfig;
2819
+
2820
+ // ── Live references for dynamic prompt rebuild ──────────────────
2821
+ /** Live reference to the batch state (for dynamic prompt rebuild) */
2822
+ batchStateRef: OrchBatchRuntimeState | null;
2823
+ /** Orchestrator config reference (for dynamic prompt rebuild) */
2824
+ orchConfigRef: OrchestratorConfig | null;
2825
+ /** State root path (for dynamic prompt rebuild) */
2826
+ stateRoot: string;
2827
+
2828
+ // ── Model override tracking ────────────────────────────────────
2829
+ /** Model that was active before supervisor activation (for restoration) */
2830
+ previousModel: Model<Api> | null;
2831
+ /** Whether we switched models on activation (determines if we restore) */
2832
+ didSwitchModel: boolean;
2833
+
2834
+ // ── Lockfile + Heartbeat (Step 2) ──────────────────────────────
2835
+ /** Session ID written to the lockfile (for yield detection) */
2836
+ lockSessionId: string;
2837
+ /** Heartbeat timer handle (null when not active) */
2838
+ heartbeatTimer: ReturnType<typeof setInterval> | null;
2839
+
2840
+ // ── Event Tailer (Step 3) ──────────────────────────────────────
2841
+ /** Event tailer state for consuming engine events */
2842
+ eventTailer: EventTailerState;
2843
+
2844
+ // ── Routing Context (TP-042) ───────────────────────────────────
2845
+ /** When non-null, supervisor is in routing mode (onboarding / returning-user flows) */
2846
+ routingContext: SupervisorRoutingContext | null;
2847
+
2848
+ // ── Deferred Summary (TP-043 R004) ─────────────────────────────
2849
+ /**
2850
+ * When non-null, a batch summary is pending presentation. Used in supervised
2851
+ * mode where summary must wait until /orch-integrate completes (or operator
2852
+ * declines and supervisor deactivates).
2853
+ */
2854
+ pendingSummaryDeps: SummaryDeps | null;
2855
+ }
2856
+
2857
+ /**
2858
+ * Create fresh (inactive) supervisor state.
2859
+ */
2860
+ export function freshSupervisorState(): SupervisorState {
2861
+ return {
2862
+ active: false,
2863
+ batchId: "",
2864
+ config: { ...DEFAULT_SUPERVISOR_CONFIG },
2865
+ batchStateRef: null,
2866
+ orchConfigRef: null,
2867
+ stateRoot: "",
2868
+ previousModel: null,
2869
+ didSwitchModel: false,
2870
+ lockSessionId: "",
2871
+ heartbeatTimer: null,
2872
+ eventTailer: freshEventTailerState(),
2873
+ routingContext: null,
2874
+ pendingSummaryDeps: null,
2875
+ };
2876
+ }
2877
+
2878
+ /**
2879
+ * Resolve a model string (e.g., "anthropic/claude-sonnet-4" or "claude-sonnet-4")
2880
+ * to a Model object from the model registry.
2881
+ *
2882
+ * Format: "provider/modelId" or just "modelId" (searches all providers).
2883
+ *
2884
+ * @returns The resolved Model, or undefined if not found
2885
+ * @since TP-041
2886
+ */
2887
+ export function resolveModelFromString(
2888
+ modelStr: string,
2889
+ ctx: ExtensionContext,
2890
+ ): Model<Api> | undefined {
2891
+ if (!modelStr) return undefined;
2892
+
2893
+ // Try "provider/id" format first
2894
+ const slashIdx = modelStr.indexOf("/");
2895
+ if (slashIdx > 0) {
2896
+ const provider = modelStr.substring(0, slashIdx);
2897
+ const id = modelStr.substring(slashIdx + 1);
2898
+ return ctx.modelRegistry.find(provider, id);
2899
+ }
2900
+
2901
+ // No provider prefix — search all models for matching id
2902
+ const allModels = ctx.modelRegistry.getAll();
2903
+ return allModels.find((m) => m.id === modelStr);
2904
+ }
2905
+
2906
+ /**
2907
+ * Optional routing context for /orch no-args activation.
2908
+ *
2909
+ * When provided, the supervisor is activated in "routing mode" — it handles
2910
+ * onboarding, batch planning, or other conversational flows instead of
2911
+ * batch monitoring. Lockfile/heartbeat/event-tailer are skipped because
2912
+ * there's no active batch to monitor.
2913
+ *
2914
+ * @since TP-042
2915
+ */
2916
+ export interface SupervisorRoutingContext {
2917
+ /** The detected project state (e.g., "no-config", "pending-tasks") */
2918
+ routingState: string;
2919
+ /** Human-readable context message for the supervisor's first turn */
2920
+ contextMessage: string;
2921
+ }
2922
+
2923
+ /**
2924
+ * Activate the supervisor agent in the current pi session.
2925
+ *
2926
+ * This is called after `startBatchInWorker()` in the `/orch` command handler,
2927
+ * or directly by the `/orch` no-args routing logic (TP-042).
2928
+ *
2929
+ * It:
2930
+ * 1. Stores live references to batchState/config for dynamic prompt rebuild
2931
+ * 2. Optionally switches model via pi.setModel() if supervisor.model is configured
2932
+ * 3. Sends an activation message via pi.sendMessage() with triggerTurn=true
2933
+ * to kick off the supervisor's first turn
2934
+ *
2935
+ * When `routingContext` is provided (TP-042 no-args routing), lockfile/heartbeat
2936
+ * and event tailer are skipped — there's no active batch to monitor. The
2937
+ * activation message uses the routing context instead of batch metadata.
2938
+ *
2939
+ * The system prompt is NOT cached at activation time — it is rebuilt dynamically
2940
+ * on every LLM turn by the before_agent_start hook. This ensures the prompt
2941
+ * always has current batch metadata, even though batchId/wave/task counts are
2942
+ * populated asynchronously by the engine after planning.
2943
+ *
2944
+ * @param pi - The ExtensionAPI instance
2945
+ * @param state - Mutable supervisor state to populate
2946
+ * @param batchState - Current batch runtime state (live reference)
2947
+ * @param orchConfig - Orchestrator configuration
2948
+ * @param supervisorConfig - Supervisor-specific configuration
2949
+ * @param stateRoot - Root path for .pi/ state directory
2950
+ * @param ctx - Extension context (for model resolution)
2951
+ * @param routingContext - Optional routing context for /orch no-args (TP-042)
2952
+ *
2953
+ * @since TP-041
2954
+ */
2955
+ export async function activateSupervisor(
2956
+ pi: ExtensionAPI,
2957
+ state: SupervisorState,
2958
+ batchState: OrchBatchRuntimeState,
2959
+ orchConfig: OrchestratorConfig,
2960
+ supervisorConfig: SupervisorConfig,
2961
+ stateRoot: string,
2962
+ ctx: ExtensionContext,
2963
+ routingContext?: SupervisorRoutingContext,
2964
+ ): Promise<void> {
2965
+ // Store live references for dynamic prompt rebuild
2966
+ state.active = true;
2967
+ state.batchId = batchState.batchId; // May be empty pre-planning — that's OK
2968
+ state.config = { ...supervisorConfig };
2969
+ state.batchStateRef = batchState;
2970
+ state.orchConfigRef = orchConfig;
2971
+ state.stateRoot = stateRoot;
2972
+
2973
+ // ── TP-042 R004: Clear routing context on non-routing activation ──
2974
+ // If a previous activation set routingContext (onboarding/returning-user),
2975
+ // clear it now so the before_agent_start hook switches to batch-monitoring
2976
+ // prompt instead of keeping the stale routing prompt.
2977
+ state.routingContext = routingContext ?? null;
2978
+
2979
+ // ── Model override ───────────────────────────────────────────────
2980
+ // If supervisor.model is configured, switch to it. Store the previous
2981
+ // model for restoration on deactivation.
2982
+ state.previousModel = ctx.model ?? null;
2983
+ state.didSwitchModel = false;
2984
+
2985
+ if (supervisorConfig.model) {
2986
+ const targetModel = resolveModelFromString(supervisorConfig.model, ctx);
2987
+ if (targetModel) {
2988
+ const success = await pi.setModel(targetModel);
2989
+ if (success) {
2990
+ state.didSwitchModel = true;
2991
+ }
2992
+ // If setModel fails (no API key), fall through to session model
2993
+ }
2994
+ // If model not found in registry, fall through to session model (inheritance)
2995
+ }
2996
+
2997
+ // ── TP-042: Routing mode — skip batch monitoring infrastructure ──
2998
+ // When activated via /orch no-args routing, there's no active batch.
2999
+ // Skip lockfile/heartbeat/event-tailer and send routing context message.
3000
+ // routingContext was already stored above (via routingContext ?? null).
3001
+ if (routingContext) {
3002
+ pi.sendMessage(
3003
+ {
3004
+ customType: "supervisor-routing",
3005
+ content: [
3006
+ {
3007
+ type: "text",
3008
+ text:
3009
+ `🔀 **Supervisor activated** (${routingContext.routingState}).\n\n` +
3010
+ routingContext.contextMessage,
3011
+ },
3012
+ ],
3013
+ display: `Supervisor activated — ${routingContext.routingState}`,
3014
+ },
3015
+ // triggerTurn starts an LLM turn immediately when the agent is idle.
3016
+ // Do NOT use deliverAs:"nextTurn" — that queues the message for a
3017
+ // future turn instead of starting one, causing the terminal to hang
3018
+ // until the user sends input.
3019
+ { triggerTurn: true },
3020
+ );
3021
+ return;
3022
+ }
3023
+
3024
+ // ── Lockfile + Heartbeat (Step 2) ────────────────────────────────
3025
+ // Write lockfile to claim supervisor role. Generate a unique session ID
3026
+ // for yield detection (if another session force-takes over, our heartbeat
3027
+ // will detect the sessionId mismatch and yield).
3028
+ const sessionId = `pi-${process.pid}-${Date.now()}`;
3029
+ state.lockSessionId = sessionId;
3030
+
3031
+ const lock: SupervisorLockfile = {
3032
+ pid: process.pid,
3033
+ sessionId,
3034
+ batchId: batchState.batchId || "(initializing)",
3035
+ startedAt: new Date().toISOString(),
3036
+ heartbeat: new Date().toISOString(),
3037
+ };
3038
+ writeLockfile(stateRoot, lock);
3039
+
3040
+ // Start heartbeat timer — updates lockfile every 30s, detects takeover
3041
+ state.heartbeatTimer = startHeartbeat(stateRoot, state, pi);
3042
+
3043
+ // ── Event tailer (Step 3) ────────────────────────────────────
3044
+ // Start tailing events.jsonl for proactive notifications.
3045
+ // Initializes byte offset to current file size so we skip stale events.
3046
+ // Idempotent — safe even if called from takeover paths that may have
3047
+ // started a tailer previously (stopEventTailer is called in deactivate).
3048
+ startEventTailer(pi, state.eventTailer, state, (key, text) => {
3049
+ try {
3050
+ ctx.ui.setStatus(key, text);
3051
+ } catch {
3052
+ /* non-fatal */
3053
+ }
3054
+ });
3055
+
3056
+ // Send activation message to trigger the supervisor's first turn.
3057
+ // The content is generic — specific counts may not be available yet
3058
+ // since the engine sets batchId/totalWaves/totalTasks asynchronously.
3059
+ // The supervisor's first action (per standing orders) is to read the
3060
+ // batch state file for full metadata.
3061
+ pi.sendMessage(
3062
+ {
3063
+ customType: "supervisor-activation",
3064
+ content: [
3065
+ {
3066
+ type: "text",
3067
+ text:
3068
+ `🔀 **Batch started.** ` +
3069
+ `Supervisor activated (autonomy: ${supervisorConfig.autonomy}).\n\n` +
3070
+ `Read your operational primer and batch state, then report initial status to the operator.`,
3071
+ },
3072
+ ],
3073
+ display: "Supervisor activated" + (batchState.batchId ? ` for batch ${batchState.batchId}` : ""),
3074
+ },
3075
+ // triggerTurn starts an LLM turn immediately when the agent is idle.
3076
+ // Do NOT use deliverAs:"nextTurn" here — see routing path comment.
3077
+ { triggerTurn: true },
3078
+ );
3079
+ }
3080
+
3081
+ /**
3082
+ * Deactivate the supervisor agent.
3083
+ *
3084
+ * Called when a batch completes, fails terminally, is stopped, or is aborted.
3085
+ * Clears the supervisor state so the before_agent_start hook stops
3086
+ * injecting the supervisor system prompt. Restores the previous model
3087
+ * if one was switched on activation.
3088
+ *
3089
+ * Safe to call multiple times (idempotent) — subsequent calls are no-ops.
3090
+ *
3091
+ * @param pi - The ExtensionAPI instance (for model restoration)
3092
+ * @param state - Supervisor state to clear
3093
+ *
3094
+ * @since TP-041
3095
+ */
3096
+ export async function deactivateSupervisor(
3097
+ pi: ExtensionAPI,
3098
+ state: SupervisorState,
3099
+ ): Promise<void> {
3100
+ if (!state.active) return; // Already inactive — idempotent guard
3101
+
3102
+ // ── Stop event tailer (Step 3) ───────────────────────────────
3103
+ stopEventTailer(state.eventTailer);
3104
+
3105
+ // ── Stop heartbeat timer (Step 2) ────────────────────────────
3106
+ if (state.heartbeatTimer) {
3107
+ clearInterval(state.heartbeatTimer);
3108
+ state.heartbeatTimer = null;
3109
+ }
3110
+
3111
+ // ── Remove lockfile (Step 2) ─────────────────────────────────
3112
+ // Only remove if we still own it (our sessionId matches).
3113
+ // If another session force-took-over, the lockfile belongs to them.
3114
+ if (state.stateRoot && state.lockSessionId) {
3115
+ const currentLock = readLockfile(state.stateRoot);
3116
+ if (!currentLock || currentLock.sessionId === state.lockSessionId) {
3117
+ removeLockfile(state.stateRoot);
3118
+ }
3119
+ }
3120
+
3121
+ // ── TP-043 R004: Present deferred batch summary ─────────────
3122
+ // If a batch summary was deferred (supervised mode awaiting integration
3123
+ // confirmation), present it now — before we clear state refs.
3124
+ if (state.pendingSummaryDeps && state.batchStateRef && state.stateRoot) {
3125
+ const deps = state.pendingSummaryDeps;
3126
+ presentBatchSummary(
3127
+ pi,
3128
+ state.batchStateRef,
3129
+ state.stateRoot,
3130
+ deps.opId,
3131
+ deps.diagnostics,
3132
+ deps.mergeResults,
3133
+ );
3134
+ state.pendingSummaryDeps = null;
3135
+ }
3136
+
3137
+ // Restore previous model if we switched on activation
3138
+ if (state.didSwitchModel && state.previousModel) {
3139
+ try {
3140
+ await pi.setModel(state.previousModel);
3141
+ } catch {
3142
+ // Non-fatal — model may no longer be available
3143
+ }
3144
+ }
3145
+
3146
+ state.active = false;
3147
+ state.batchId = "";
3148
+ state.batchStateRef = null;
3149
+ state.orchConfigRef = null;
3150
+ state.stateRoot = "";
3151
+ state.previousModel = null;
3152
+ state.didSwitchModel = false;
3153
+ state.lockSessionId = "";
3154
+ state.routingContext = null;
3155
+ state.pendingSummaryDeps = null;
3156
+ }
3157
+
3158
+ /**
3159
+ * Transition the supervisor from batch-monitoring mode back to routing mode.
3160
+ *
3161
+ * Called after a batch completes (or fails/pauses) instead of fully deactivating.
3162
+ * Tears down batch-monitoring infrastructure (lockfile, heartbeat, event tailer)
3163
+ * but keeps the supervisor active with a routing context — so the operator can
3164
+ * continue the conversation (plan next batch, create tasks, integrate, etc.)
3165
+ * without needing to re-invoke `/orch`.
3166
+ *
3167
+ * This enables the continuous workflow:
3168
+ * /orch → conversation → "run the tasks" → batch runs → batch completes →
3169
+ * conversation continues → "create more tasks" → "run them" → repeat
3170
+ *
3171
+ * @param pi - The ExtensionAPI instance
3172
+ * @param state - Supervisor state to transition
3173
+ * @param routingContext - The routing context for the new conversational mode
3174
+ *
3175
+ * @since TP-128
3176
+ */
3177
+ export async function transitionToRoutingMode(
3178
+ pi: ExtensionAPI,
3179
+ state: SupervisorState,
3180
+ routingContext: SupervisorRoutingContext,
3181
+ ): Promise<void> {
3182
+ if (!state.active) return;
3183
+
3184
+ // Tear down batch-monitoring infrastructure
3185
+ stopEventTailer(state.eventTailer);
3186
+
3187
+ if (state.heartbeatTimer) {
3188
+ clearInterval(state.heartbeatTimer);
3189
+ state.heartbeatTimer = null;
3190
+ }
3191
+
3192
+ // Remove lockfile (no active batch to protect)
3193
+ if (state.stateRoot && state.lockSessionId) {
3194
+ const currentLock = readLockfile(state.stateRoot);
3195
+ if (!currentLock || currentLock.sessionId === state.lockSessionId) {
3196
+ removeLockfile(state.stateRoot);
3197
+ }
3198
+ }
3199
+ state.lockSessionId = "";
3200
+
3201
+ // Present deferred batch summary if any
3202
+ if (state.pendingSummaryDeps && state.batchStateRef && state.stateRoot) {
3203
+ const deps = state.pendingSummaryDeps;
3204
+ presentBatchSummary(
3205
+ pi,
3206
+ state.batchStateRef,
3207
+ state.stateRoot,
3208
+ deps.opId,
3209
+ deps.diagnostics,
3210
+ deps.mergeResults,
3211
+ );
3212
+ state.pendingSummaryDeps = null;
3213
+ }
3214
+
3215
+ // Switch to routing mode — keep supervisor active with new context
3216
+ state.routingContext = routingContext;
3217
+ state.batchId = "";
3218
+ // Keep batchStateRef/orchConfigRef/stateRoot — routing prompt may need them
3219
+ // Keep model override — don't switch models mid-conversation
3220
+
3221
+ // TP-052: Send a prominent conversational message that clearly signals
3222
+ // the supervisor is ready for input. Uses triggerTurn to force an LLM
3223
+ // response, which ensures the pi TUI redraws and shows the input prompt.
3224
+ pi.sendMessage(
3225
+ {
3226
+ customType: "supervisor-routing-transition",
3227
+ content: [
3228
+ {
3229
+ type: "text",
3230
+ text:
3231
+ `━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n` +
3232
+ `🔀 **Ready for your input.**\n\n` +
3233
+ routingContext.contextMessage +
3234
+ `\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`,
3235
+ },
3236
+ ],
3237
+ display: `Supervisor — ${routingContext.routingState}`,
3238
+ },
3239
+ { triggerTurn: true },
3240
+ );
3241
+ }
3242
+
3243
+ /**
3244
+ * Register the before_agent_start hook for persistent system prompt injection.
3245
+ *
3246
+ * While the supervisor is active, every LLM turn gets the supervisor system
3247
+ * prompt injected. The prompt is rebuilt dynamically from the live batchState
3248
+ * reference, ensuring it always reflects the latest batch metadata (batchId,
3249
+ * wave/task counts populated asynchronously by the engine after planning).
3250
+ *
3251
+ * When the supervisor is inactive (no batch running), this hook is a no-op
3252
+ * and the original system prompt is used unmodified.
3253
+ *
3254
+ * @param pi - The ExtensionAPI instance
3255
+ * @param state - Supervisor state (checked on each turn)
3256
+ *
3257
+ * @since TP-041
3258
+ */
3259
+ export function registerSupervisorPromptHook(pi: ExtensionAPI, state: SupervisorState): void {
3260
+ pi.on("before_agent_start", (_event) => {
3261
+ if (!state.active) {
3262
+ return undefined; // No-op: don't modify system prompt
3263
+ }
3264
+
3265
+ // ── TP-042: Routing mode — use onboarding/returning-user prompt ──
3266
+ // When routingContext is set, we're in a conversational flow (onboarding,
3267
+ // batch planning, etc.), not batch monitoring. Use the routing prompt
3268
+ // which includes script guidance from the primer.
3269
+ if (state.routingContext) {
3270
+ const systemPrompt = buildRoutingSystemPrompt(state.routingContext, state.stateRoot);
3271
+ return { systemPrompt };
3272
+ }
3273
+
3274
+ // ── Batch monitoring mode — use standard supervisor prompt ──
3275
+ if (!state.batchStateRef || !state.orchConfigRef) {
3276
+ return undefined; // No-op: missing batch state for prompt rebuild
3277
+ }
3278
+
3279
+ // Rebuild prompt dynamically from live batchState reference.
3280
+ // This ensures the prompt always has current metadata, even though
3281
+ // batchId/totalWaves/totalTasks are populated asynchronously.
3282
+ const systemPrompt = buildSupervisorSystemPrompt(
3283
+ state.batchStateRef,
3284
+ state.orchConfigRef,
3285
+ state.config,
3286
+ state.stateRoot,
3287
+ );
3288
+
3289
+ return {
3290
+ systemPrompt,
3291
+ };
3292
+ });
3293
+ }
3294
+
3295
+ /**
3296
+ * Resolve supervisor configuration from available sources.
3297
+ *
3298
+ * Resolution order (highest precedence first):
3299
+ * 1. Global preferences (supervisorModel → orchestrator.supervisor.model)
3300
+ * 2. Project config (orchestrator.supervisor section in orchid-config.json)
3301
+ * 3. Defaults (model="" = inherit session model, autonomy="supervised")
3302
+ *
3303
+ * This function is a convenience wrapper for cases where the full config
3304
+ * loading pipeline has already run. For direct config loading, use
3305
+ * `loadSupervisorConfig()` from config.ts instead.
3306
+ *
3307
+ * @param supervisorSection - Pre-loaded supervisor config section (or undefined for defaults)
3308
+ * @returns Resolved supervisor configuration
3309
+ *
3310
+ * @since TP-041
3311
+ */
3312
+ export function resolveSupervisorConfig(
3313
+ supervisorSection?: Partial<SupervisorConfig>,
3314
+ ): SupervisorConfig {
3315
+ if (!supervisorSection) return { ...DEFAULT_SUPERVISOR_CONFIG };
3316
+ return {
3317
+ model: supervisorSection.model ?? DEFAULT_SUPERVISOR_CONFIG.model,
3318
+ autonomy: supervisorSection.autonomy ?? DEFAULT_SUPERVISOR_CONFIG.autonomy,
3319
+ };
3320
+ }
3321
+
3322
+ // ── Lockfile Types + Helpers (TP-041 Step 2) ─────────────────────────
3323
+
3324
+ /** Heartbeat interval in milliseconds (30 seconds). */
3325
+ export const HEARTBEAT_INTERVAL_MS = 30_000;
3326
+
3327
+ /** Staleness threshold: if heartbeat is older than this, lock is stale (90s = 3 missed heartbeats). */
3328
+ export const STALE_LOCK_THRESHOLD_MS = 90_000;
3329
+
3330
+ /**
3331
+ * Supervisor lockfile shape — written to `.pi/supervisor/lock.json`.
3332
+ *
3333
+ * The lockfile enforces a 1:1 ratio between supervisors and batches.
3334
+ * Only one supervisor session may be active per project at a time.
3335
+ *
3336
+ * @since TP-041
3337
+ */
3338
+ export interface SupervisorLockfile {
3339
+ /** Process ID of the supervisor session */
3340
+ pid: number;
3341
+ /** Unique session identifier (from pi session) */
3342
+ sessionId: string;
3343
+ /** Batch ID being supervised */
3344
+ batchId: string;
3345
+ /** ISO 8601 timestamp when this supervisor started */
3346
+ startedAt: string;
3347
+ /** ISO 8601 timestamp of most recent heartbeat */
3348
+ heartbeat: string;
3349
+ }
3350
+
3351
+ /**
3352
+ * Result of checking the supervisor lockfile on startup.
3353
+ *
3354
+ * @since TP-041
3355
+ */
3356
+ export type LockfileCheckResult =
3357
+ | { status: "no-active-batch" }
3358
+ | { status: "no-lockfile"; batchState: PersistedBatchState }
3359
+ | { status: "stale"; lock: SupervisorLockfile; batchState: PersistedBatchState }
3360
+ | { status: "live"; lock: SupervisorLockfile; batchState: PersistedBatchState }
3361
+ | { status: "corrupt"; batchState: PersistedBatchState };
3362
+
3363
+ /**
3364
+ * Resolve the lockfile path for a given state root.
3365
+ */
3366
+ export function lockfilePath(stateRoot: string): string {
3367
+ return join(stateRoot, ".pi", "supervisor", "lock.json");
3368
+ }
3369
+
3370
+ /**
3371
+ * Read and parse the supervisor lockfile.
3372
+ *
3373
+ * Returns null if the file doesn't exist. If the file is corrupt/malformed,
3374
+ * returns null (treat as stale per R003 suggestion — caller should rewrite).
3375
+ *
3376
+ * @param stateRoot - Root path for .pi/ state directory
3377
+ * @returns Parsed lockfile or null
3378
+ *
3379
+ * @since TP-041
3380
+ */
3381
+ export function readLockfile(stateRoot: string): SupervisorLockfile | null {
3382
+ const path = lockfilePath(stateRoot);
3383
+ if (!existsSync(path)) return null;
3384
+
3385
+ try {
3386
+ const raw = readFileSync(path, "utf-8");
3387
+ const parsed = JSON.parse(raw) as Record<string, unknown>;
3388
+
3389
+ // Validate required fields
3390
+ if (
3391
+ typeof parsed.pid !== "number" ||
3392
+ typeof parsed.sessionId !== "string" ||
3393
+ typeof parsed.batchId !== "string" ||
3394
+ typeof parsed.startedAt !== "string" ||
3395
+ typeof parsed.heartbeat !== "string"
3396
+ ) {
3397
+ return null; // Malformed — treat as stale/absent
3398
+ }
3399
+
3400
+ return parsed as unknown as SupervisorLockfile;
3401
+ } catch {
3402
+ return null; // Corrupt JSON — treat as stale/absent
3403
+ }
3404
+ }
3405
+
3406
+ /**
3407
+ * Write the supervisor lockfile atomically (temp file + rename).
3408
+ *
3409
+ * Creates the `.pi/supervisor/` directory if it doesn't exist.
3410
+ * Uses temp+rename to prevent partial writes from corrupting the file.
3411
+ *
3412
+ * @param stateRoot - Root path for .pi/ state directory
3413
+ * @param lock - Lockfile data to write
3414
+ *
3415
+ * @since TP-041
3416
+ */
3417
+ export function writeLockfile(stateRoot: string, lock: SupervisorLockfile): void {
3418
+ const dir = join(stateRoot, ".pi", "supervisor");
3419
+ if (!existsSync(dir)) {
3420
+ mkdirSync(dir, { recursive: true });
3421
+ }
3422
+
3423
+ const finalPath = lockfilePath(stateRoot);
3424
+ const tmpPath = finalPath + ".tmp";
3425
+ const json = JSON.stringify(lock, null, 2) + "\n";
3426
+
3427
+ writeFileSync(tmpPath, json, "utf-8");
3428
+ renameSync(tmpPath, finalPath);
3429
+ }
3430
+
3431
+ /**
3432
+ * Async version of readLockfile — reads lockfile without blocking the event loop.
3433
+ *
3434
+ * @param stateRoot - Root path for .pi/ state directory
3435
+ * @returns Parsed lockfile or null
3436
+ *
3437
+ * @since TP-070
3438
+ */
3439
+ export async function readLockfileAsync(stateRoot: string): Promise<SupervisorLockfile | null> {
3440
+ const path = lockfilePath(stateRoot);
3441
+
3442
+ try {
3443
+ const raw = await fsReadFile(path, "utf-8");
3444
+ const parsed = JSON.parse(raw) as Record<string, unknown>;
3445
+
3446
+ if (
3447
+ typeof parsed.pid !== "number" ||
3448
+ typeof parsed.sessionId !== "string" ||
3449
+ typeof parsed.batchId !== "string" ||
3450
+ typeof parsed.startedAt !== "string" ||
3451
+ typeof parsed.heartbeat !== "string"
3452
+ ) {
3453
+ return null;
3454
+ }
3455
+
3456
+ return parsed as unknown as SupervisorLockfile;
3457
+ } catch {
3458
+ return null;
3459
+ }
3460
+ }
3461
+
3462
+ /**
3463
+ * Async version of writeLockfile — writes lockfile without blocking the event loop.
3464
+ *
3465
+ * Creates the `.pi/supervisor/` directory if it doesn't exist.
3466
+ * Uses temp+rename for atomicity.
3467
+ *
3468
+ * @param stateRoot - Root path for .pi/ state directory
3469
+ * @param lock - Lockfile data to write
3470
+ *
3471
+ * @since TP-070
3472
+ */
3473
+ export async function writeLockfileAsync(
3474
+ stateRoot: string,
3475
+ lock: SupervisorLockfile,
3476
+ ): Promise<void> {
3477
+ const dir = join(stateRoot, ".pi", "supervisor");
3478
+ if (!existsSync(dir)) {
3479
+ mkdirSync(dir, { recursive: true });
3480
+ }
3481
+
3482
+ const finalPath = lockfilePath(stateRoot);
3483
+ const tmpPath = finalPath + ".tmp";
3484
+ const json = JSON.stringify(lock, null, 2) + "\n";
3485
+
3486
+ await fsWriteFile(tmpPath, json, "utf-8");
3487
+ await fsRename(tmpPath, finalPath);
3488
+ }
3489
+
3490
+ /**
3491
+ * Remove the supervisor lockfile.
3492
+ *
3493
+ * Safe to call when the file doesn't exist (no-op).
3494
+ *
3495
+ * @param stateRoot - Root path for .pi/ state directory
3496
+ *
3497
+ * @since TP-041
3498
+ */
3499
+ export function removeLockfile(stateRoot: string): void {
3500
+ const path = lockfilePath(stateRoot);
3501
+ try {
3502
+ if (existsSync(path)) {
3503
+ unlinkSync(path);
3504
+ }
3505
+ } catch {
3506
+ // Best-effort — if we can't remove it, it'll be detected as stale on next startup
3507
+ }
3508
+ }
3509
+
3510
+ /**
3511
+ * Check whether a process with the given PID is alive.
3512
+ *
3513
+ * Uses `process.kill(pid, 0)` which sends signal 0 (no-op) — throws
3514
+ * if the process doesn't exist, returns true if it does.
3515
+ *
3516
+ * @param pid - Process ID to check
3517
+ * @returns true if the process is alive
3518
+ *
3519
+ * @since TP-041
3520
+ */
3521
+ export function isProcessAlive(pid: number): boolean {
3522
+ try {
3523
+ process.kill(pid, 0);
3524
+ return true;
3525
+ } catch {
3526
+ return false;
3527
+ }
3528
+ }
3529
+
3530
+ /**
3531
+ * Check whether a lockfile's heartbeat is stale.
3532
+ *
3533
+ * A heartbeat is stale if it's older than STALE_LOCK_THRESHOLD_MS (90s).
3534
+ * This accounts for 3 missed 30-second heartbeat intervals.
3535
+ *
3536
+ * @param lock - Lockfile to check
3537
+ * @returns true if the heartbeat is stale
3538
+ *
3539
+ * @since TP-041
3540
+ */
3541
+ export function isLockStale(lock: SupervisorLockfile): boolean {
3542
+ const heartbeatTime = new Date(lock.heartbeat).getTime();
3543
+ if (isNaN(heartbeatTime)) return true; // Invalid date — treat as stale
3544
+ return Date.now() - heartbeatTime > STALE_LOCK_THRESHOLD_MS;
3545
+ }
3546
+
3547
+ // ── Terminal Phase Detection ─────────────────────────────────────────
3548
+
3549
+ /**
3550
+ * Phases that indicate a batch is terminal (no longer active).
3551
+ * If batch-state.json has one of these phases, there's no active batch
3552
+ * and no lockfile arbitration is needed.
3553
+ */
3554
+ const TERMINAL_PHASES = new Set<string>(["idle", "completed", "failed", "stopped"]);
3555
+
3556
+ /**
3557
+ * Check whether a batch phase is terminal (no active batch).
3558
+ *
3559
+ * @since TP-041
3560
+ */
3561
+ export function isBatchTerminal(phase: string): boolean {
3562
+ return TERMINAL_PHASES.has(phase);
3563
+ }
3564
+
3565
+ // ── Startup Detection (Section 13.10) ────────────────────────────────
3566
+
3567
+ /**
3568
+ * Check startup state: is there an active batch and an existing lockfile?
3569
+ *
3570
+ * Implements the startup gate from spec Section 13.10:
3571
+ * 1. Check for active batch (.pi/batch-state.json with non-terminal phase)
3572
+ * 2. If no active batch, return early (no lockfile arbitration needed)
3573
+ * 3. If active batch, check lockfile state (absent, stale, live, corrupt)
3574
+ *
3575
+ * @param stateRoot - Root path for .pi/ state directory
3576
+ * @param loadBatchStateFn - Function to load batch state (injectable for testing)
3577
+ * @returns LockfileCheckResult describing the current state
3578
+ *
3579
+ * @since TP-041
3580
+ */
3581
+ export function checkSupervisorLockOnStartup(
3582
+ stateRoot: string,
3583
+ loadBatchStateFn: (root: string) => PersistedBatchState | null,
3584
+ ): LockfileCheckResult {
3585
+ // ── Step 1: Check for active batch ───────────────────────────
3586
+ let batchState: PersistedBatchState | null;
3587
+ try {
3588
+ batchState = loadBatchStateFn(stateRoot);
3589
+ } catch {
3590
+ // Batch state unreadable — no active batch to supervise
3591
+ return { status: "no-active-batch" };
3592
+ }
3593
+
3594
+ if (!batchState || isBatchTerminal(batchState.phase)) {
3595
+ return { status: "no-active-batch" };
3596
+ }
3597
+
3598
+ // ── Step 2: Active batch exists — check lockfile ─────────────
3599
+ const lock = readLockfile(stateRoot);
3600
+
3601
+ if (!lock) {
3602
+ // No lockfile (or corrupt) — check if the file exists but was corrupt
3603
+ const lockPath = lockfilePath(stateRoot);
3604
+ if (existsSync(lockPath)) {
3605
+ // File exists but couldn't be parsed — corrupt
3606
+ return { status: "corrupt", batchState };
3607
+ }
3608
+ // No lockfile at all — become the supervisor
3609
+ return { status: "no-lockfile", batchState };
3610
+ }
3611
+
3612
+ // ── Step 3: Lockfile exists — live or stale? ─────────────────
3613
+ if (!isProcessAlive(lock.pid) || isLockStale(lock)) {
3614
+ return { status: "stale", lock, batchState };
3615
+ }
3616
+
3617
+ return { status: "live", lock, batchState };
3618
+ }
3619
+
3620
+ // ── Rehydration Summary ──────────────────────────────────────────────
3621
+
3622
+ /**
3623
+ * Build a rehydration summary for the operator after a takeover.
3624
+ *
3625
+ * Reads:
3626
+ * 1. Batch state for current wave, task statuses, phase
3627
+ * 2. `.pi/supervisor/actions.jsonl` for what the previous supervisor did
3628
+ * 3. `.pi/supervisor/events.jsonl` for recent engine events
3629
+ *
3630
+ * Returns a human-readable summary string.
3631
+ *
3632
+ * @param stateRoot - Root path for .pi/ state directory
3633
+ * @param batchState - Current batch state
3634
+ * @returns Summary string for the operator
3635
+ *
3636
+ * @since TP-041
3637
+ */
3638
+ export function buildTakeoverSummary(stateRoot: string, batchState: PersistedBatchState): string {
3639
+ const lines: string[] = [];
3640
+
3641
+ lines.push(`📋 **Taking over batch ${batchState.batchId}**`);
3642
+ lines.push("");
3643
+ lines.push(`**Phase:** ${batchState.phase}`);
3644
+ lines.push(
3645
+ `**Wave:** ${batchState.currentWaveIndex + 1}/${batchState.wavePlan?.length ?? batchState.totalWaves ?? "?"}`,
3646
+ );
3647
+ lines.push(`**Base branch:** ${batchState.baseBranch}`);
3648
+
3649
+ // Task summary from persisted state
3650
+ const tasks = batchState.tasks ?? [];
3651
+ const succeeded = tasks.filter((t) => t.status === "succeeded").length;
3652
+ const failed = tasks.filter((t) => t.status === "failed").length;
3653
+ const running = tasks.filter((t) => t.status === "running").length;
3654
+ const pending = tasks.filter((t) => t.status === "pending").length;
3655
+ lines.push(
3656
+ `**Tasks:** ${succeeded} succeeded, ${failed} failed, ${running} running, ${pending} pending`,
3657
+ );
3658
+
3659
+ // Recent actions from audit trail (using readAuditTrail helper)
3660
+ const recentActions = readAuditTrail(stateRoot, { limit: 5 });
3661
+ if (recentActions.length > 0) {
3662
+ lines.push("");
3663
+ lines.push(`**Previous supervisor actions** (last ${recentActions.length}):`);
3664
+ for (const action of recentActions) {
3665
+ lines.push(` - ${action.action ?? "unknown"}: ${action.context ?? ""}`);
3666
+ }
3667
+ }
3668
+
3669
+ // Recent engine events
3670
+ const eventsPath = join(stateRoot, ".pi", "supervisor", "events.jsonl");
3671
+ if (existsSync(eventsPath)) {
3672
+ try {
3673
+ const eventsRaw = readFileSync(eventsPath, "utf-8").trim();
3674
+ if (eventsRaw) {
3675
+ const eventLines = eventsRaw.split("\n");
3676
+ const recentEvents = eventLines.slice(-5); // Last 5 events
3677
+ lines.push("");
3678
+ lines.push(`**Recent engine events** (last ${recentEvents.length}):`);
3679
+ for (const line of recentEvents) {
3680
+ try {
3681
+ const event = JSON.parse(line) as Record<string, unknown>;
3682
+ lines.push(` - [${event.type ?? "?"}] ${event.message ?? event.taskId ?? ""}`);
3683
+ } catch {
3684
+ lines.push(` - (unparseable event)`);
3685
+ }
3686
+ }
3687
+ }
3688
+ } catch {
3689
+ // Best-effort — events file may not exist
3690
+ }
3691
+ }
3692
+
3693
+ return lines.join("\n");
3694
+ }
3695
+
3696
+ // ── Heartbeat Timer ──────────────────────────────────────────────────
3697
+
3698
+ /**
3699
+ * Start the heartbeat timer for the supervisor lockfile.
3700
+ *
3701
+ * Updates the lockfile's `heartbeat` field every HEARTBEAT_INTERVAL_MS.
3702
+ * Also checks if the lockfile has been taken over by another session
3703
+ * (force takeover detection) — if the sessionId no longer matches,
3704
+ * the previous session yields gracefully.
3705
+ *
3706
+ * @param stateRoot - Root path for .pi/ state directory
3707
+ * @param state - Supervisor state (used for yield detection)
3708
+ * @param pi - ExtensionAPI for deactivation on yield
3709
+ * @returns Timer handle (for cleanup via clearInterval)
3710
+ *
3711
+ * @since TP-041
3712
+ */
3713
+ export function startHeartbeat(
3714
+ stateRoot: string,
3715
+ state: SupervisorState,
3716
+ pi: ExtensionAPI,
3717
+ ): ReturnType<typeof setInterval> {
3718
+ const sessionId = state.lockSessionId;
3719
+ let heartbeatInProgress = false; // Overlap guard (TP-070)
3720
+
3721
+ const timer = setInterval(async () => {
3722
+ if (!state.active) {
3723
+ clearInterval(timer);
3724
+ return;
3725
+ }
3726
+
3727
+ if (heartbeatInProgress) return; // Overlap guard (TP-070)
3728
+ heartbeatInProgress = true;
3729
+
3730
+ try {
3731
+ // Read current lockfile to detect force takeover — async (TP-070)
3732
+ const currentLock = await readLockfileAsync(stateRoot);
3733
+ if (currentLock && currentLock.sessionId !== sessionId) {
3734
+ // Another session has taken over — yield gracefully
3735
+ clearInterval(timer);
3736
+ pi.sendMessage(
3737
+ {
3738
+ customType: "supervisor-yield",
3739
+ content: [
3740
+ {
3741
+ type: "text",
3742
+ text: "⚡ Another session has taken over supervisor duties. Yielding.",
3743
+ },
3744
+ ],
3745
+ display: "Supervisor yielded to another session",
3746
+ },
3747
+ { triggerTurn: false },
3748
+ );
3749
+ deactivateSupervisor(pi, state);
3750
+ return;
3751
+ }
3752
+
3753
+ // Update heartbeat (and refresh batchId if it was initially unknown)
3754
+ try {
3755
+ const lock = await readLockfileAsync(stateRoot);
3756
+ if (lock && lock.sessionId === sessionId) {
3757
+ lock.heartbeat = new Date().toISOString();
3758
+ // TP-130: batchId may have been "(initializing)" at lock creation
3759
+ // because the batch hadn't started yet. Refresh from live state ref.
3760
+ if (state.batchStateRef?.batchId && lock.batchId !== state.batchStateRef.batchId) {
3761
+ lock.batchId = state.batchStateRef.batchId;
3762
+ }
3763
+ await writeLockfileAsync(stateRoot, lock);
3764
+ }
3765
+ } catch {
3766
+ // Best-effort heartbeat — don't crash the supervisor
3767
+ }
3768
+ } finally {
3769
+ heartbeatInProgress = false;
3770
+ }
3771
+ }, HEARTBEAT_INTERVAL_MS);
3772
+
3773
+ // Unref the timer so it doesn't prevent Node.js from exiting
3774
+ if (timer && typeof timer === "object" && "unref" in timer) {
3775
+ timer.unref();
3776
+ }
3777
+
3778
+ return timer;
3779
+ }
3780
+
3781
+ // ── Engine Event Consumption + Notifications (TP-041 Step 3) ─────────
3782
+
3783
+ /**
3784
+ * Polling interval for the event tailer (10 seconds).
3785
+ *
3786
+ * Balances responsiveness (operator sees events quickly) with resource
3787
+ * efficiency (avoid excessive file reads). Chosen to be shorter than
3788
+ * the heartbeat interval (30s) so the supervisor reports events before
3789
+ * the next heartbeat.
3790
+ *
3791
+ * @since TP-041
3792
+ */
3793
+ export const EVENT_POLL_INTERVAL_MS = 10_000;
3794
+
3795
+ /**
3796
+ * Coalescing window for task_complete digests (30 seconds).
3797
+ *
3798
+ * Instead of emitting one notification per task completion, the tailer
3799
+ * buffers completions and emits a periodic digest. This prevents turn
3800
+ * spam when many tasks complete in quick succession.
3801
+ *
3802
+ * @since TP-041
3803
+ */
3804
+ export const TASK_DIGEST_INTERVAL_MS = 30_000;
3805
+
3806
+ /**
3807
+ * All known event types that appear in the unified events.jsonl.
3808
+ * Used for type narrowing when parsing lines.
3809
+ *
3810
+ * @since TP-041
3811
+ */
3812
+ type UnifiedEventType = EngineEventType | Tier0EventType;
3813
+
3814
+ /**
3815
+ * A parsed event from the unified events.jsonl file.
3816
+ *
3817
+ * The file contains both EngineEvent and Tier0Event entries; we use
3818
+ * a discriminated union on the `type` field. For parsing safety, we
3819
+ * use a minimal common shape plus the union type.
3820
+ *
3821
+ * @since TP-041
3822
+ */
3823
+ interface ParsedEvent {
3824
+ timestamp: string;
3825
+ type: UnifiedEventType;
3826
+ batchId: string;
3827
+ waveIndex: number;
3828
+ // ── EngineEvent-specific optional fields ─────────────────────
3829
+ phase?: string;
3830
+ taskIds?: string[];
3831
+ laneCount?: number;
3832
+ taskId?: string;
3833
+ durationMs?: number;
3834
+ outcome?: string;
3835
+ reason?: string;
3836
+ partialProgress?: boolean;
3837
+ laneNumber?: number;
3838
+ error?: string;
3839
+ testCount?: number;
3840
+ totalWaves?: number;
3841
+ succeededTasks?: number;
3842
+ failedTasks?: number;
3843
+ skippedTasks?: number;
3844
+ blockedTasks?: number;
3845
+ batchDurationMs?: number;
3846
+ // ── Merge health monitoring fields (TP-056) ─────────────────
3847
+ sessionName?: string;
3848
+ healthStatus?: string;
3849
+ stalledMinutes?: number;
3850
+ // ── Tier0Event-specific optional fields ──────────────────────
3851
+ pattern?: string;
3852
+ attempt?: number;
3853
+ maxAttempts?: number;
3854
+ classification?: string;
3855
+ resolution?: string;
3856
+ suggestion?: string;
3857
+ affectedTaskIds?: string[];
3858
+ message?: string;
3859
+ }
3860
+
3861
+ /**
3862
+ * Event types that are considered "significant" for proactive notification.
3863
+ *
3864
+ * - Engine lifecycle: wave_start, merge_success, merge_failed, batch_complete, batch_paused
3865
+ * - Tier 0 escalation: tier0_escalation (requires supervisor/operator attention)
3866
+ *
3867
+ * task_complete and task_failed are coalesced into periodic digests
3868
+ * rather than individual notifications.
3869
+ *
3870
+ * @since TP-041
3871
+ */
3872
+ const SIGNIFICANT_EVENT_TYPES = new Set<UnifiedEventType>([
3873
+ "wave_start",
3874
+ "merge_start",
3875
+ "merge_success",
3876
+ "merge_failed",
3877
+ "merge_health_warning",
3878
+ "merge_health_dead",
3879
+ "merge_health_stuck",
3880
+ "batch_complete",
3881
+ "batch_paused",
3882
+ "tier0_escalation",
3883
+ ]);
3884
+
3885
+ /**
3886
+ * Event types that are coalesced into periodic digests.
3887
+ *
3888
+ * @since TP-041
3889
+ */
3890
+ const DIGEST_EVENT_TYPES = new Set<UnifiedEventType>([
3891
+ "task_complete",
3892
+ "task_failed",
3893
+ "tier0_recovery_attempt",
3894
+ "tier0_recovery_success",
3895
+ "tier0_recovery_exhausted",
3896
+ ]);
3897
+
3898
+ /**
3899
+ * Buffered task events for digest coalescing.
3900
+ *
3901
+ * @since TP-041
3902
+ */
3903
+ interface TaskDigestBuffer {
3904
+ /** Completed task IDs since last digest */
3905
+ completed: string[];
3906
+ /** Failed task IDs since last digest */
3907
+ failed: string[];
3908
+ /** Tier 0 recovery attempts since last digest */
3909
+ recoveryAttempts: number;
3910
+ /** Tier 0 recovery successes since last digest */
3911
+ recoverySuccesses: number;
3912
+ /** Tier 0 recovery exhausted since last digest */
3913
+ recoveryExhausted: number;
3914
+ }
3915
+
3916
+ /**
3917
+ * Event tailer state — tracks the byte offset cursor, digest buffer,
3918
+ * and timer handles for the polling loop and digest flush.
3919
+ *
3920
+ * @since TP-041
3921
+ */
3922
+ export interface EventTailerState {
3923
+ /** Whether the tailer is currently running */
3924
+ running: boolean;
3925
+ /** Byte offset into events.jsonl — only bytes after this are new */
3926
+ byteOffset: number;
3927
+ /** Partial line buffer (when a read ends mid-line) */
3928
+ partialLine: string;
3929
+ /** Active batch ID to filter events against */
3930
+ batchId: string;
3931
+ /** Task digest buffer for coalescing task_complete/task_failed */
3932
+ digestBuffer: TaskDigestBuffer;
3933
+ /** Polling timer handle */
3934
+ pollTimer: ReturnType<typeof setInterval> | null;
3935
+ /** Digest flush timer handle */
3936
+ digestTimer: ReturnType<typeof setInterval> | null;
3937
+ }
3938
+
3939
+ /**
3940
+ * Create a fresh (stopped) event tailer state.
3941
+ *
3942
+ * @since TP-041
3943
+ */
3944
+ export function freshEventTailerState(): EventTailerState {
3945
+ return {
3946
+ running: false,
3947
+ byteOffset: 0,
3948
+ partialLine: "",
3949
+ batchId: "",
3950
+ digestBuffer: freshDigestBuffer(),
3951
+ pollTimer: null,
3952
+ digestTimer: null,
3953
+ };
3954
+ }
3955
+
3956
+ /**
3957
+ * Create a fresh digest buffer.
3958
+ *
3959
+ * @since TP-041
3960
+ */
3961
+ function freshDigestBuffer(): TaskDigestBuffer {
3962
+ return {
3963
+ completed: [],
3964
+ failed: [],
3965
+ recoveryAttempts: 0,
3966
+ recoverySuccesses: 0,
3967
+ recoveryExhausted: 0,
3968
+ };
3969
+ }
3970
+
3971
+ /**
3972
+ * Check if a digest buffer has any content worth flushing.
3973
+ *
3974
+ * @since TP-041
3975
+ */
3976
+ function isDigestEmpty(buf: TaskDigestBuffer): boolean {
3977
+ return (
3978
+ buf.completed.length === 0 &&
3979
+ buf.failed.length === 0 &&
3980
+ buf.recoveryAttempts === 0 &&
3981
+ buf.recoverySuccesses === 0 &&
3982
+ buf.recoveryExhausted === 0
3983
+ );
3984
+ }
3985
+
3986
+ /**
3987
+ * Read new bytes from the events JSONL file starting at the given offset.
3988
+ *
3989
+ * Uses low-level file descriptor operations for efficient tailing without
3990
+ * reading the entire file. Returns the raw UTF-8 string of new bytes,
3991
+ * or empty string if no new data.
3992
+ *
3993
+ * @param eventsPath - Full path to events.jsonl
3994
+ * @param byteOffset - Start reading from this byte offset
3995
+ * @returns [newData, newByteOffset] — the new data and the updated offset
3996
+ *
3997
+ * @since TP-041
3998
+ */
3999
+ export function readNewBytes(eventsPath: string, byteOffset: number): [string, number] {
4000
+ if (!existsSync(eventsPath)) return ["", byteOffset];
4001
+
4002
+ let fileSize: number;
4003
+ try {
4004
+ fileSize = statSync(eventsPath).size;
4005
+ } catch {
4006
+ return ["", byteOffset];
4007
+ }
4008
+
4009
+ if (fileSize <= byteOffset) return ["", byteOffset];
4010
+
4011
+ const bytesToRead = fileSize - byteOffset;
4012
+ const buffer = Buffer.alloc(bytesToRead);
4013
+
4014
+ let fd: number | null = null;
4015
+ try {
4016
+ fd = openSync(eventsPath, "r");
4017
+ readSync(fd, buffer, 0, bytesToRead, byteOffset);
4018
+ } catch {
4019
+ return ["", byteOffset];
4020
+ } finally {
4021
+ if (fd !== null) {
4022
+ try {
4023
+ closeSync(fd);
4024
+ } catch {
4025
+ /* best-effort */
4026
+ }
4027
+ }
4028
+ }
4029
+
4030
+ return [buffer.toString("utf-8"), fileSize];
4031
+ }
4032
+
4033
+ /**
4034
+ * Async version of readNewBytes — reads new bytes without blocking the event loop.
4035
+ *
4036
+ * Uses `fs/promises` for non-blocking stat and read operations.
4037
+ *
4038
+ * @param eventsPath - Full path to events.jsonl
4039
+ * @param byteOffset - Start reading from this byte offset
4040
+ * @returns [newData, newByteOffset]
4041
+ *
4042
+ * @since TP-070
4043
+ */
4044
+ export async function readNewBytesAsync(
4045
+ eventsPath: string,
4046
+ byteOffset: number,
4047
+ ): Promise<[string, number]> {
4048
+ try {
4049
+ const stats = await fsStat(eventsPath);
4050
+ const fileSize = stats.size;
4051
+ if (fileSize <= byteOffset) return ["", byteOffset];
4052
+
4053
+ const bytesToRead = fileSize - byteOffset;
4054
+ const buffer = Buffer.alloc(bytesToRead);
4055
+
4056
+ const fh = await fsOpen(eventsPath, "r");
4057
+ try {
4058
+ await fh.read(buffer, 0, bytesToRead, byteOffset);
4059
+ } finally {
4060
+ await fh.close();
4061
+ }
4062
+
4063
+ return [buffer.toString("utf-8"), fileSize];
4064
+ } catch {
4065
+ return ["", byteOffset];
4066
+ }
4067
+ }
4068
+
4069
+ /**
4070
+ * Parse JSONL lines from raw data, handling partial lines.
4071
+ *
4072
+ * Returns parsed events and any remaining partial line (incomplete
4073
+ * trailing data that doesn't end with a newline).
4074
+ *
4075
+ * Malformed/partial JSON lines are skipped (best-effort, per R005 suggestion).
4076
+ *
4077
+ * @param data - Raw string data from the file
4078
+ * @param partialLine - Leftover partial line from previous read
4079
+ * @returns [parsedEvents, remainingPartialLine]
4080
+ *
4081
+ * @since TP-041
4082
+ */
4083
+ export function parseJsonlLines(data: string, partialLine: string): [ParsedEvent[], string] {
4084
+ const combined = partialLine + data;
4085
+ const lines = combined.split("\n");
4086
+
4087
+ // Last element is either empty (if data ended with \n) or a partial line
4088
+ const remaining = lines.pop() ?? "";
4089
+
4090
+ const events: ParsedEvent[] = [];
4091
+ for (const line of lines) {
4092
+ const trimmed = line.trim();
4093
+ if (!trimmed) continue; // Skip empty lines
4094
+
4095
+ try {
4096
+ const parsed = JSON.parse(trimmed) as Record<string, unknown>;
4097
+ // Minimal validation: must have timestamp, type, batchId
4098
+ if (
4099
+ typeof parsed.timestamp === "string" &&
4100
+ typeof parsed.type === "string" &&
4101
+ typeof parsed.batchId === "string"
4102
+ ) {
4103
+ events.push(parsed as unknown as ParsedEvent);
4104
+ }
4105
+ } catch {
4106
+ // Malformed line — skip and continue (R005 suggestion)
4107
+ }
4108
+ }
4109
+
4110
+ return [events, remaining];
4111
+ }
4112
+
4113
+ /**
4114
+ * Format a significant event into an operator-facing notification string.
4115
+ *
4116
+ * The notification style varies by event type and autonomy level.
4117
+ *
4118
+ * @param event - The parsed event to format
4119
+ * @param autonomy - Current autonomy level
4120
+ * @returns Formatted notification string
4121
+ *
4122
+ * @since TP-041
4123
+ */
4124
+ export function formatEventNotification(
4125
+ event: ParsedEvent,
4126
+ autonomy: SupervisorAutonomyLevel,
4127
+ ): string {
4128
+ const waveNum = event.waveIndex >= 0 ? event.waveIndex + 1 : "?";
4129
+
4130
+ switch (event.type) {
4131
+ case "wave_start": {
4132
+ const taskCount = event.taskIds?.length ?? 0;
4133
+ const laneInfo = event.laneCount ? ` across ${event.laneCount} lanes` : "";
4134
+ return `🌊 **Wave ${waveNum} starting** with ${taskCount} task(s)${laneInfo}.`;
4135
+ }
4136
+ case "merge_start": {
4137
+ return `🔀 Wave ${waveNum} merge starting...`;
4138
+ }
4139
+ case "merge_success": {
4140
+ const waveProg = event.totalWaves ? ` (${waveNum}/${event.totalWaves})` : "";
4141
+ const testInfo = event.testCount ? ` Tests pass (${event.testCount}).` : " Tests pass.";
4142
+ return `✅ **Wave ${waveNum} merged successfully**${waveProg}.${testInfo}`;
4143
+ }
4144
+ case "merge_failed": {
4145
+ const reason = event.reason || event.error || "unknown reason";
4146
+ const laneInfo = event.laneNumber !== undefined ? ` (lane ${event.laneNumber})` : "";
4147
+ if (autonomy === "autonomous") {
4148
+ return `⚠️ Wave ${waveNum} merge failed${laneInfo}: ${reason}. Attempting recovery...`;
4149
+ }
4150
+ return (
4151
+ `⚠️ **Wave ${waveNum} merge failed**${laneInfo}: ${reason}.\n` +
4152
+ ` Recovery may be needed. Check the merge logs for details.`
4153
+ );
4154
+ }
4155
+ case "merge_health_warning": {
4156
+ const lane = event.laneNumber !== undefined ? event.laneNumber : "?";
4157
+ const mins = event.stalledMinutes ?? "?";
4158
+ return `⚠️ Merge agent on lane ${lane} may be stalled (no output for ${mins} min)`;
4159
+ }
4160
+ case "merge_health_dead": {
4161
+ const lane = event.laneNumber !== undefined ? event.laneNumber : "?";
4162
+ return `💀 Merge agent on lane ${lane} session died — triggering early retry`;
4163
+ }
4164
+ case "merge_health_stuck": {
4165
+ const lane = event.laneNumber !== undefined ? event.laneNumber : "?";
4166
+ const mins = event.stalledMinutes ?? "?";
4167
+ return `🔒 Merge agent on lane ${lane} appears stuck (no output for ${mins} min). Consider killing and retrying.`;
4168
+ }
4169
+ case "batch_complete": {
4170
+ const parts: string[] = [];
4171
+ if (event.succeededTasks !== undefined) parts.push(`${event.succeededTasks} succeeded`);
4172
+ if (event.failedTasks !== undefined && event.failedTasks > 0)
4173
+ parts.push(`${event.failedTasks} failed`);
4174
+ if (event.skippedTasks !== undefined && event.skippedTasks > 0)
4175
+ parts.push(`${event.skippedTasks} skipped`);
4176
+ if (event.blockedTasks !== undefined && event.blockedTasks > 0)
4177
+ parts.push(`${event.blockedTasks} blocked`);
4178
+ const summary = parts.length > 0 ? parts.join(", ") : "all tasks processed";
4179
+ const duration = event.batchDurationMs ? ` in ${formatDuration(event.batchDurationMs)}` : "";
4180
+ return `🏁 **Batch complete!** ${summary}${duration}.`;
4181
+ }
4182
+ case "batch_paused": {
4183
+ const reason = event.reason || "unknown reason";
4184
+ if (autonomy === "interactive") {
4185
+ return (
4186
+ `⏸️ **Batch paused:** ${reason}\n` +
4187
+ ` What would you like to do? Options: fix the issue, skip the task, or abort.`
4188
+ );
4189
+ }
4190
+ return `⏸️ **Batch paused:** ${reason}`;
4191
+ }
4192
+ case "tier0_escalation": {
4193
+ const pattern = event.pattern || "unknown";
4194
+ const suggestion = event.suggestion || "Manual intervention needed.";
4195
+ if (autonomy === "autonomous") {
4196
+ return `⚡ **Tier 0 escalation** (${pattern}): Investigating automatically. ${suggestion}`;
4197
+ }
4198
+ if (autonomy === "interactive") {
4199
+ return (
4200
+ `❌ **Tier 0 escalation** (${pattern}): ${suggestion}\n` +
4201
+ ` Need your input on how to proceed.`
4202
+ );
4203
+ }
4204
+ // supervised
4205
+ return (
4206
+ `⚡ **Tier 0 escalation** (${pattern}): ${suggestion}\n` +
4207
+ ` Diagnosing — will ask if novel recovery is needed.`
4208
+ );
4209
+ }
4210
+ default:
4211
+ return `📌 Event: ${event.type} (wave ${waveNum})`;
4212
+ }
4213
+ }
4214
+
4215
+ /**
4216
+ * Format a task digest buffer into a summary notification.
4217
+ *
4218
+ * @param buf - Digest buffer to format
4219
+ * @param autonomy - Current autonomy level
4220
+ * @returns Formatted digest string, or null if buffer is empty
4221
+ *
4222
+ * @since TP-041
4223
+ */
4224
+ export function formatTaskDigest(
4225
+ buf: TaskDigestBuffer,
4226
+ autonomy: SupervisorAutonomyLevel,
4227
+ ): string | null {
4228
+ if (isDigestEmpty(buf)) return null;
4229
+
4230
+ const parts: string[] = [];
4231
+
4232
+ if (buf.completed.length > 0) {
4233
+ if (autonomy === "interactive") {
4234
+ // Show individual task IDs in interactive mode
4235
+ parts.push(`✓ ${buf.completed.length} task(s) completed: ${buf.completed.join(", ")}`);
4236
+ } else {
4237
+ parts.push(`✓ ${buf.completed.length} task(s) completed`);
4238
+ }
4239
+ }
4240
+
4241
+ if (buf.failed.length > 0) {
4242
+ // Always show failed task IDs — they need attention
4243
+ parts.push(`✗ ${buf.failed.length} task(s) failed: ${buf.failed.join(", ")}`);
4244
+ }
4245
+
4246
+ if (buf.recoveryAttempts > 0 && autonomy !== "autonomous") {
4247
+ const successRate = buf.recoverySuccesses > 0 ? ` (${buf.recoverySuccesses} succeeded)` : "";
4248
+ parts.push(`🔄 ${buf.recoveryAttempts} recovery attempt(s)${successRate}`);
4249
+ }
4250
+
4251
+ if (buf.recoveryExhausted > 0) {
4252
+ parts.push(`⚠️ ${buf.recoveryExhausted} recovery budget(s) exhausted`);
4253
+ }
4254
+
4255
+ if (parts.length === 0) return null;
4256
+
4257
+ return `📊 **Progress update:**\n ${parts.join("\n ")}`;
4258
+ }
4259
+
4260
+ /**
4261
+ * Format a duration in milliseconds to a human-readable string.
4262
+ *
4263
+ * @since TP-041
4264
+ */
4265
+ function formatDuration(ms: number): string {
4266
+ const secs = Math.floor(ms / 1000);
4267
+ if (secs < 60) return `${secs}s`;
4268
+ const mins = Math.floor(secs / 60);
4269
+ const remainSecs = secs % 60;
4270
+ if (mins < 60) return `${mins}m${remainSecs > 0 ? ` ${remainSecs}s` : ""}`;
4271
+ const hours = Math.floor(mins / 60);
4272
+ const remainMins = mins % 60;
4273
+ return `${hours}h${remainMins > 0 ? ` ${remainMins}m` : ""}`;
4274
+ }
4275
+
4276
+ /**
4277
+ * Should a notification for this event type be sent at the given autonomy level?
4278
+ *
4279
+ * Controls notification frequency:
4280
+ * - **interactive**: all significant events + verbose digests
4281
+ * - **supervised**: all significant events + concise digests
4282
+ * - **autonomous**: only failures, escalations, and batch completion; skip routine
4283
+ *
4284
+ * @since TP-041
4285
+ */
4286
+ export function shouldNotify(
4287
+ eventType: UnifiedEventType,
4288
+ autonomy: SupervisorAutonomyLevel,
4289
+ ): boolean {
4290
+ // Always notify for terminal/failure events regardless of autonomy
4291
+ if (
4292
+ eventType === "batch_complete" ||
4293
+ eventType === "batch_paused" ||
4294
+ eventType === "merge_failed" ||
4295
+ eventType === "merge_health_dead" ||
4296
+ eventType === "merge_health_stuck" ||
4297
+ eventType === "tier0_escalation"
4298
+ ) {
4299
+ return true;
4300
+ }
4301
+
4302
+ // Autonomous mode: skip routine progress events
4303
+ if (autonomy === "autonomous") {
4304
+ return false;
4305
+ }
4306
+
4307
+ // Interactive and supervised: notify for all significant events
4308
+ return SIGNIFICANT_EVENT_TYPES.has(eventType);
4309
+ }
4310
+
4311
+ /**
4312
+ * Process a batch of parsed events: filter to active batch, classify,
4313
+ * and emit notifications or buffer for digest.
4314
+ *
4315
+ * @param events - Parsed events from the JSONL file
4316
+ * @param tailer - Event tailer state (for batchId filter + digest buffer)
4317
+ * @param autonomy - Current autonomy level
4318
+ * @param notify - Callback to emit a notification to the operator
4319
+ * @param onBatchComplete - Optional callback fired when batch_complete event is detected (TP-043)
4320
+ *
4321
+ * @since TP-041
4322
+ */
4323
+ export function processEvents(
4324
+ events: ParsedEvent[],
4325
+ tailer: EventTailerState,
4326
+ autonomy: SupervisorAutonomyLevel,
4327
+ notify: (text: string) => void,
4328
+ onBatchComplete?: (event: ParsedEvent) => void,
4329
+ ): void {
4330
+ for (const event of events) {
4331
+ // ── Batch-scoped filter (R005-1) ─────────────────────────
4332
+ // Skip events from other batches. When batchId is empty
4333
+ // (pre-planning), accept all events — we'll get the real
4334
+ // batchId on the first event.
4335
+ if (tailer.batchId && event.batchId && event.batchId !== tailer.batchId) {
4336
+ continue;
4337
+ }
4338
+
4339
+ // Update batchId if we were waiting for it (pre-planning)
4340
+ if (!tailer.batchId && event.batchId) {
4341
+ tailer.batchId = event.batchId;
4342
+ }
4343
+
4344
+ // ── TP-043: Trigger integration flow on batch_complete ──
4345
+ if (event.type === "batch_complete" && onBatchComplete) {
4346
+ onBatchComplete(event);
4347
+ }
4348
+
4349
+ // ── Classify: significant (immediate) vs digest (buffered) ──
4350
+ if (DIGEST_EVENT_TYPES.has(event.type)) {
4351
+ // Buffer for digest coalescing
4352
+ bufferDigestEvent(event, tailer.digestBuffer);
4353
+ } else if (shouldNotify(event.type, autonomy)) {
4354
+ // Emit immediate notification
4355
+ const text = formatEventNotification(event, autonomy);
4356
+ notify(text);
4357
+ }
4358
+ // Other event types (merge_start in autonomous mode, etc.) are silently consumed
4359
+ }
4360
+ }
4361
+
4362
+ /**
4363
+ * Buffer a digest-class event into the digest buffer.
4364
+ *
4365
+ * @since TP-041
4366
+ */
4367
+ function bufferDigestEvent(event: ParsedEvent, buf: TaskDigestBuffer): void {
4368
+ switch (event.type) {
4369
+ case "task_complete":
4370
+ if (event.taskId) buf.completed.push(event.taskId);
4371
+ break;
4372
+ case "task_failed":
4373
+ if (event.taskId) buf.failed.push(event.taskId);
4374
+ break;
4375
+ case "tier0_recovery_attempt":
4376
+ buf.recoveryAttempts++;
4377
+ break;
4378
+ case "tier0_recovery_success":
4379
+ buf.recoverySuccesses++;
4380
+ break;
4381
+ case "tier0_recovery_exhausted":
4382
+ buf.recoveryExhausted++;
4383
+ break;
4384
+ }
4385
+ }
4386
+
4387
+ /**
4388
+ * Start the event tailer — polls events.jsonl for new events and
4389
+ * emits proactive notifications to the operator.
4390
+ *
4391
+ * The tailer:
4392
+ * 1. Polls at EVENT_POLL_INTERVAL_MS for new bytes in events.jsonl
4393
+ * 2. Parses new JSONL lines, filtering to active batchId
4394
+ * 3. Significant events → immediate notification via pi.sendMessage
4395
+ * 4. task_complete/task_failed → buffered into periodic digests
4396
+ *
4397
+ * Idempotent: safe to call when already running (no-op).
4398
+ *
4399
+ * @param pi - ExtensionAPI for sending notifications
4400
+ * @param tailer - Event tailer state (mutated)
4401
+ * @param supervisorState - Supervisor state (for config + stateRoot)
4402
+ *
4403
+ * @since TP-041
4404
+ */
4405
+ export function startEventTailer(
4406
+ pi: ExtensionAPI,
4407
+ tailer: EventTailerState,
4408
+ supervisorState: SupervisorState,
4409
+ /** Optional callback to update footer status immediately (bypasses sendMessage queue). @since TP-068/214 */
4410
+ setStatus?: (key: string, text: string) => void,
4411
+ ): void {
4412
+ if (tailer.running) return; // Idempotent guard (R005-2)
4413
+
4414
+ const stateRoot = supervisorState.stateRoot;
4415
+ const eventsPath = join(stateRoot, ".pi", "supervisor", "events.jsonl");
4416
+ const autonomy = supervisorState.config.autonomy;
4417
+
4418
+ tailer.running = true;
4419
+ tailer.batchId = supervisorState.batchId;
4420
+
4421
+ // Initialize byte offset to current file size so we only process
4422
+ // events emitted after activation (not stale events from previous batches).
4423
+ // For takeover paths, the activation message's standing orders tell the
4424
+ // supervisor to read the full events file manually for context.
4425
+ if (existsSync(eventsPath)) {
4426
+ try {
4427
+ tailer.byteOffset = statSync(eventsPath).size;
4428
+ } catch {
4429
+ tailer.byteOffset = 0;
4430
+ }
4431
+ } else {
4432
+ tailer.byteOffset = 0;
4433
+ }
4434
+
4435
+ // Notification callback — sends as a supervisor event message
4436
+ const notify = (text: string) => {
4437
+ if (!supervisorState.active) return; // Guard: don't notify after deactivation
4438
+
4439
+ // TP-068/214: Update footer status immediately for visibility.
4440
+ // setStatus renders in the TUI footer without waiting for user input,
4441
+ // unlike sendMessage which queues until next turn.
4442
+ if (setStatus) {
4443
+ const statusText = text.replace(/\*\*/g, "").replace(/\n.*/s, "").substring(0, 120);
4444
+ setStatus("supervisor", `🔀 ${statusText}`);
4445
+ }
4446
+
4447
+ pi.sendMessage(
4448
+ {
4449
+ customType: "supervisor-event",
4450
+ content: [{ type: "text", text }],
4451
+ display: text.replace(/\*\*/g, "").substring(0, 80),
4452
+ },
4453
+ { triggerTurn: true },
4454
+ );
4455
+ };
4456
+
4457
+ // ── TP-043: Integration is triggered by triggerSupervisorIntegration() ──
4458
+ // called from the onTerminal callback in startBatchInWorker (extension.ts),
4459
+ // gated on phase === "completed" (R002-1). For auto mode, integration is
4460
+ // executed programmatically via the executor callback (R002-2). The event
4461
+ // tailer does NOT duplicate the integration trigger — batch_complete events
4462
+ // are handled via the normal notification path (formatEventNotification).
4463
+
4464
+ // ── Poll timer (async, TP-070) ───────────────────────────────
4465
+ let tailerPollInProgress = false; // Overlap guard (TP-070)
4466
+ tailer.pollTimer = setInterval(async () => {
4467
+ if (!supervisorState.active || !tailer.running) {
4468
+ stopEventTailer(tailer);
4469
+ return;
4470
+ }
4471
+
4472
+ if (tailerPollInProgress) return; // Overlap guard (TP-070)
4473
+ tailerPollInProgress = true;
4474
+
4475
+ try {
4476
+ const [newData, newOffset] = await readNewBytesAsync(eventsPath, tailer.byteOffset);
4477
+ if (!newData) return; // No new data
4478
+
4479
+ tailer.byteOffset = newOffset;
4480
+ const [events, remaining] = parseJsonlLines(newData, tailer.partialLine);
4481
+ tailer.partialLine = remaining;
4482
+
4483
+ processEvents(events, tailer, autonomy, notify);
4484
+ } finally {
4485
+ tailerPollInProgress = false;
4486
+ }
4487
+ }, EVENT_POLL_INTERVAL_MS);
4488
+
4489
+ // ── Digest flush timer ───────────────────────────────────────
4490
+ tailer.digestTimer = setInterval(() => {
4491
+ if (!supervisorState.active || !tailer.running) {
4492
+ stopEventTailer(tailer);
4493
+ return;
4494
+ }
4495
+
4496
+ if (isDigestEmpty(tailer.digestBuffer)) return;
4497
+
4498
+ const digest = formatTaskDigest(tailer.digestBuffer, autonomy);
4499
+ if (digest) {
4500
+ notify(digest);
4501
+ }
4502
+
4503
+ // Reset buffer
4504
+ tailer.digestBuffer = freshDigestBuffer();
4505
+ }, TASK_DIGEST_INTERVAL_MS);
4506
+
4507
+ // Unref timers so they don't prevent Node.js exit
4508
+ if (tailer.pollTimer && typeof tailer.pollTimer === "object" && "unref" in tailer.pollTimer) {
4509
+ tailer.pollTimer.unref();
4510
+ }
4511
+ if (
4512
+ tailer.digestTimer &&
4513
+ typeof tailer.digestTimer === "object" &&
4514
+ "unref" in tailer.digestTimer
4515
+ ) {
4516
+ tailer.digestTimer.unref();
4517
+ }
4518
+ }
4519
+
4520
+ /**
4521
+ * Stop the event tailer.
4522
+ *
4523
+ * Clears timers and flushes any remaining digest buffer (best-effort,
4524
+ * the final digest is not sent — it would be stale).
4525
+ *
4526
+ * Idempotent: safe to call when already stopped (no-op).
4527
+ *
4528
+ * @param tailer - Event tailer state (mutated)
4529
+ *
4530
+ * @since TP-041
4531
+ */
4532
+ export function stopEventTailer(tailer: EventTailerState): void {
4533
+ if (!tailer.running) return; // Idempotent guard
4534
+
4535
+ if (tailer.pollTimer) {
4536
+ clearInterval(tailer.pollTimer);
4537
+ tailer.pollTimer = null;
4538
+ }
4539
+
4540
+ if (tailer.digestTimer) {
4541
+ clearInterval(tailer.digestTimer);
4542
+ tailer.digestTimer = null;
4543
+ }
4544
+
4545
+ tailer.running = false;
4546
+ tailer.partialLine = "";
4547
+ tailer.digestBuffer = freshDigestBuffer();
4548
+ }