taskplane 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +2 -20
- package/bin/taskplane.mjs +706 -0
- package/dashboard/public/app.js +900 -0
- package/dashboard/public/index.html +92 -0
- package/dashboard/public/style.css +924 -0
- package/dashboard/server.cjs +531 -0
- package/extensions/task-orchestrator.ts +28 -0
- package/extensions/task-runner.ts +1923 -0
- package/extensions/taskplane/abort.ts +466 -0
- package/extensions/taskplane/config.ts +102 -0
- package/extensions/taskplane/discovery.ts +988 -0
- package/extensions/taskplane/engine.ts +758 -0
- package/extensions/taskplane/execution.ts +1752 -0
- package/extensions/taskplane/extension.ts +577 -0
- package/extensions/taskplane/formatting.ts +718 -0
- package/extensions/taskplane/git.ts +38 -0
- package/extensions/taskplane/index.ts +22 -0
- package/extensions/taskplane/merge.ts +795 -0
- package/extensions/taskplane/messages.ts +134 -0
- package/extensions/taskplane/persistence.ts +1121 -0
- package/extensions/taskplane/resume.ts +1092 -0
- package/extensions/taskplane/sessions.ts +92 -0
- package/extensions/taskplane/types.ts +1514 -0
- package/extensions/taskplane/waves.ts +900 -0
- package/extensions/taskplane/worktree.ts +1624 -0
- package/package.json +50 -4
- package/skills/create-taskplane-task/SKILL.md +326 -0
- package/skills/create-taskplane-task/references/context-template.md +78 -0
- package/skills/create-taskplane-task/references/prompt-template.md +246 -0
- package/templates/agents/task-merger.md +256 -0
- package/templates/agents/task-reviewer.md +81 -0
- package/templates/agents/task-worker.md +140 -0
- package/templates/config/task-orchestrator.yaml +89 -0
- package/templates/config/task-runner.yaml +99 -0
- package/templates/tasks/CONTEXT.md +31 -0
- package/templates/tasks/EXAMPLE-001-hello-world/PROMPT.md +90 -0
- package/templates/tasks/EXAMPLE-001-hello-world/STATUS.md +73 -0
|
@@ -0,0 +1,1514 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* All types, interfaces, error classes, constants, and defaults
|
|
3
|
+
* @module orch/types
|
|
4
|
+
*/
|
|
5
|
+
import { join } from "path";
|
|
6
|
+
|
|
7
|
+
// ── Types ────────────────────────────────────────────────────────────
|
|
8
|
+
|
|
9
|
+
/** Configuration from .pi/task-orchestrator.yaml */
|
|
10
|
+
export interface OrchestratorConfig {
|
|
11
|
+
orchestrator: {
|
|
12
|
+
max_lanes: number;
|
|
13
|
+
worktree_location: "sibling" | "subdirectory";
|
|
14
|
+
worktree_prefix: string;
|
|
15
|
+
integration_branch: string;
|
|
16
|
+
batch_id_format: "timestamp" | "sequential";
|
|
17
|
+
spawn_mode: "tmux" | "subprocess";
|
|
18
|
+
tmux_prefix: string;
|
|
19
|
+
};
|
|
20
|
+
dependencies: {
|
|
21
|
+
source: "prompt" | "agent";
|
|
22
|
+
cache: boolean;
|
|
23
|
+
};
|
|
24
|
+
assignment: {
|
|
25
|
+
strategy: "affinity-first" | "round-robin" | "load-balanced";
|
|
26
|
+
size_weights: Record<string, number>;
|
|
27
|
+
};
|
|
28
|
+
pre_warm: {
|
|
29
|
+
auto_detect: boolean;
|
|
30
|
+
commands: Record<string, string>;
|
|
31
|
+
always: string[];
|
|
32
|
+
};
|
|
33
|
+
merge: {
|
|
34
|
+
model: string;
|
|
35
|
+
tools: string;
|
|
36
|
+
verify: string[];
|
|
37
|
+
order: "fewest-files-first" | "sequential";
|
|
38
|
+
};
|
|
39
|
+
failure: {
|
|
40
|
+
on_task_failure: "skip-dependents" | "stop-wave" | "stop-all";
|
|
41
|
+
on_merge_failure: "pause" | "abort";
|
|
42
|
+
stall_timeout: number;
|
|
43
|
+
max_worker_minutes: number;
|
|
44
|
+
abort_grace_period: number;
|
|
45
|
+
};
|
|
46
|
+
monitoring: {
|
|
47
|
+
poll_interval: number;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** A parsed task from PROMPT.md, enriched for orchestrator use */
|
|
52
|
+
export interface ParsedTask {
|
|
53
|
+
taskId: string;
|
|
54
|
+
taskName: string;
|
|
55
|
+
reviewLevel: number;
|
|
56
|
+
size: string;
|
|
57
|
+
dependencies: string[];
|
|
58
|
+
fileScope: string[];
|
|
59
|
+
taskFolder: string;
|
|
60
|
+
promptPath: string;
|
|
61
|
+
areaName: string;
|
|
62
|
+
status: "pending" | "complete";
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** A wave: a group of tasks whose dependencies are all satisfied */
|
|
66
|
+
export interface WaveAssignment {
|
|
67
|
+
waveNumber: number;
|
|
68
|
+
tasks: LaneAssignment[];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** A task assigned to a specific lane within a wave */
|
|
72
|
+
export interface LaneAssignment {
|
|
73
|
+
taskId: string;
|
|
74
|
+
lane: number;
|
|
75
|
+
task: ParsedTask;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Runtime state of the entire batch execution */
|
|
79
|
+
export interface BatchState {
|
|
80
|
+
phase: "idle" | "planning" | "running" | "paused" | "merging" | "complete" | "error" | "aborted";
|
|
81
|
+
batchId: string;
|
|
82
|
+
waves: WaveAssignment[];
|
|
83
|
+
currentWave: number;
|
|
84
|
+
tasksTotal: number;
|
|
85
|
+
tasksComplete: number;
|
|
86
|
+
tasksFailed: number;
|
|
87
|
+
laneCount: number;
|
|
88
|
+
laneStatuses: Map<number, LaneStatus>;
|
|
89
|
+
startTime: number;
|
|
90
|
+
errors: string[];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Per-lane runtime status */
|
|
94
|
+
export interface LaneStatus {
|
|
95
|
+
lane: number;
|
|
96
|
+
taskId: string | null;
|
|
97
|
+
status: "idle" | "running" | "complete" | "failed" | "stalled";
|
|
98
|
+
stepProgress: string;
|
|
99
|
+
iteration: number;
|
|
100
|
+
elapsed: number;
|
|
101
|
+
tmuxSession: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Task area definition from task-runner.yaml */
|
|
105
|
+
export interface TaskArea {
|
|
106
|
+
path: string;
|
|
107
|
+
prefix: string;
|
|
108
|
+
context: string;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Subset of task-runner.yaml that the orchestrator needs */
|
|
112
|
+
export interface TaskRunnerConfig {
|
|
113
|
+
task_areas: Record<string, TaskArea>;
|
|
114
|
+
reference_docs: Record<string, string>;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** Result of a preflight check */
|
|
118
|
+
export interface PreflightResult {
|
|
119
|
+
passed: boolean;
|
|
120
|
+
checks: PreflightCheck[];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/** Individual preflight check */
|
|
124
|
+
export interface PreflightCheck {
|
|
125
|
+
name: string;
|
|
126
|
+
status: "pass" | "fail" | "warn";
|
|
127
|
+
message: string;
|
|
128
|
+
hint?: string;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
// ── Defaults ─────────────────────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
export const DEFAULT_ORCHESTRATOR_CONFIG: OrchestratorConfig = {
|
|
135
|
+
orchestrator: {
|
|
136
|
+
max_lanes: 3,
|
|
137
|
+
worktree_location: "subdirectory",
|
|
138
|
+
worktree_prefix: "taskplane-wt",
|
|
139
|
+
integration_branch: "main",
|
|
140
|
+
batch_id_format: "timestamp",
|
|
141
|
+
spawn_mode: "subprocess",
|
|
142
|
+
tmux_prefix: "orch",
|
|
143
|
+
},
|
|
144
|
+
dependencies: {
|
|
145
|
+
source: "prompt",
|
|
146
|
+
cache: true,
|
|
147
|
+
},
|
|
148
|
+
assignment: {
|
|
149
|
+
strategy: "affinity-first",
|
|
150
|
+
size_weights: { S: 1, M: 2, L: 4 },
|
|
151
|
+
},
|
|
152
|
+
pre_warm: {
|
|
153
|
+
auto_detect: false,
|
|
154
|
+
commands: {},
|
|
155
|
+
always: [],
|
|
156
|
+
},
|
|
157
|
+
merge: {
|
|
158
|
+
model: "",
|
|
159
|
+
tools: "read,write,edit,bash,grep,find,ls",
|
|
160
|
+
verify: [],
|
|
161
|
+
order: "fewest-files-first",
|
|
162
|
+
},
|
|
163
|
+
failure: {
|
|
164
|
+
on_task_failure: "skip-dependents",
|
|
165
|
+
on_merge_failure: "pause",
|
|
166
|
+
stall_timeout: 30,
|
|
167
|
+
max_worker_minutes: 30,
|
|
168
|
+
abort_grace_period: 60,
|
|
169
|
+
},
|
|
170
|
+
monitoring: {
|
|
171
|
+
poll_interval: 5,
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
export const DEFAULT_TASK_RUNNER_CONFIG: TaskRunnerConfig = {
|
|
176
|
+
task_areas: {},
|
|
177
|
+
reference_docs: {},
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
// ── Helpers ──────────────────────────────────────────────────────────
|
|
182
|
+
|
|
183
|
+
export function freshBatchState(): BatchState {
|
|
184
|
+
return {
|
|
185
|
+
phase: "idle",
|
|
186
|
+
batchId: "",
|
|
187
|
+
waves: [],
|
|
188
|
+
currentWave: 0,
|
|
189
|
+
tasksTotal: 0,
|
|
190
|
+
tasksComplete: 0,
|
|
191
|
+
tasksFailed: 0,
|
|
192
|
+
laneCount: 0,
|
|
193
|
+
laneStatuses: new Map(),
|
|
194
|
+
startTime: 0,
|
|
195
|
+
errors: [],
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ── Worktree Types ───────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
/** Information about a created worktree. Returned by createWorktree(). */
|
|
202
|
+
export interface WorktreeInfo {
|
|
203
|
+
/** Absolute filesystem path to the worktree directory */
|
|
204
|
+
path: string;
|
|
205
|
+
/** Branch name checked out in the worktree (e.g. task/lane-1-20260308T111750) */
|
|
206
|
+
branch: string;
|
|
207
|
+
/** Lane number (1-indexed) this worktree is assigned to */
|
|
208
|
+
laneNumber: number;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/** Options for createWorktree() */
|
|
212
|
+
export interface CreateWorktreeOptions {
|
|
213
|
+
/** Lane number (1-indexed) */
|
|
214
|
+
laneNumber: number;
|
|
215
|
+
/** Batch ID timestamp (e.g. "20260308T111750") */
|
|
216
|
+
batchId: string;
|
|
217
|
+
/** Branch to base the worktree on (e.g. "develop") */
|
|
218
|
+
baseBranch: string;
|
|
219
|
+
/** Worktree directory prefix (e.g. "taskplane-wt") */
|
|
220
|
+
prefix: string;
|
|
221
|
+
/** Full orchestrator config (optional; used for worktree_location) */
|
|
222
|
+
config?: OrchestratorConfig;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Stable error codes for worktree operations.
|
|
227
|
+
*
|
|
228
|
+
* - WORKTREE_PATH_IS_WORKTREE: path already registered as a git worktree
|
|
229
|
+
* - WORKTREE_PATH_NOT_EMPTY: path exists and is a non-empty non-worktree dir
|
|
230
|
+
* - WORKTREE_BRANCH_EXISTS: branch name already exists (checked out elsewhere)
|
|
231
|
+
* - WORKTREE_INVALID_BASE: base branch does not exist
|
|
232
|
+
* - WORKTREE_GIT_ERROR: unexpected git command failure
|
|
233
|
+
* - WORKTREE_VERIFY_FAILED: post-creation/reset verification failed
|
|
234
|
+
* - WORKTREE_REMOVE_FAILED: worktree removal failed (even after retries)
|
|
235
|
+
* - WORKTREE_REMOVE_RETRY_EXHAUSTED: all retry attempts for worktree removal exhausted (Windows file locking)
|
|
236
|
+
* - WORKTREE_BRANCH_DELETE_FAILED: branch deletion failed after successful worktree removal
|
|
237
|
+
* - WORKTREE_NOT_FOUND: worktree path does not exist on disk
|
|
238
|
+
* - WORKTREE_NOT_REGISTERED: path exists but is not a registered git worktree
|
|
239
|
+
* - WORKTREE_DIRTY: worktree has uncommitted changes (cannot reset)
|
|
240
|
+
* - WORKTREE_RESET_FAILED: git checkout -B reset command failed
|
|
241
|
+
*/
|
|
242
|
+
export type WorktreeErrorCode =
|
|
243
|
+
| "WORKTREE_PATH_IS_WORKTREE"
|
|
244
|
+
| "WORKTREE_PATH_NOT_EMPTY"
|
|
245
|
+
| "WORKTREE_BRANCH_EXISTS"
|
|
246
|
+
| "WORKTREE_INVALID_BASE"
|
|
247
|
+
| "WORKTREE_GIT_ERROR"
|
|
248
|
+
| "WORKTREE_VERIFY_FAILED"
|
|
249
|
+
| "WORKTREE_REMOVE_FAILED"
|
|
250
|
+
| "WORKTREE_REMOVE_RETRY_EXHAUSTED"
|
|
251
|
+
| "WORKTREE_BRANCH_DELETE_FAILED"
|
|
252
|
+
| "WORKTREE_NOT_FOUND"
|
|
253
|
+
| "WORKTREE_NOT_REGISTERED"
|
|
254
|
+
| "WORKTREE_DIRTY"
|
|
255
|
+
| "WORKTREE_RESET_FAILED";
|
|
256
|
+
|
|
257
|
+
/** Typed error class for worktree operations with stable error codes. */
|
|
258
|
+
export class WorktreeError extends Error {
|
|
259
|
+
code: WorktreeErrorCode;
|
|
260
|
+
|
|
261
|
+
constructor(code: WorktreeErrorCode, message: string) {
|
|
262
|
+
super(message);
|
|
263
|
+
this.name = "WorktreeError";
|
|
264
|
+
this.code = code;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Result of a removeWorktree() operation.
|
|
270
|
+
*
|
|
271
|
+
* Provides status flags so callers can branch on outcome without
|
|
272
|
+
* catching errors for expected idempotent scenarios.
|
|
273
|
+
*/
|
|
274
|
+
export interface RemoveWorktreeResult {
|
|
275
|
+
/** Whether the worktree directory was removed in this call */
|
|
276
|
+
removed: boolean;
|
|
277
|
+
/** Whether the worktree was already absent (idempotent no-op) */
|
|
278
|
+
alreadyRemoved: boolean;
|
|
279
|
+
/** Whether the lane branch was deleted (or was already absent) */
|
|
280
|
+
branchDeleted: boolean;
|
|
281
|
+
/** Whether the lane branch was preserved (unmerged commits detected) */
|
|
282
|
+
branchPreserved: boolean;
|
|
283
|
+
/** The saved branch name (if preserved) */
|
|
284
|
+
savedBranch?: string;
|
|
285
|
+
/** Number of unmerged commits (if preserved) */
|
|
286
|
+
unmergedCount?: number;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// ── Bulk Operation Types ─────────────────────────────────────────────
|
|
290
|
+
|
|
291
|
+
/** Error from a single worktree within a bulk operation. */
|
|
292
|
+
export interface BulkWorktreeError {
|
|
293
|
+
/** Lane number that failed */
|
|
294
|
+
laneNumber: number;
|
|
295
|
+
/** Error code from WorktreeError (if available) */
|
|
296
|
+
code: WorktreeErrorCode | "UNKNOWN";
|
|
297
|
+
/** Human-readable error message */
|
|
298
|
+
message: string;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Result of createLaneWorktrees() bulk creation.
|
|
303
|
+
*
|
|
304
|
+
* On success: `success=true`, `worktrees` contains all created WorktreeInfos.
|
|
305
|
+
* On failure: `success=false`, `errors` lists per-lane failures,
|
|
306
|
+
* `rolledBack` indicates whether cleanup of partial state succeeded.
|
|
307
|
+
*/
|
|
308
|
+
export interface CreateLaneWorktreesResult {
|
|
309
|
+
/** Whether all lane worktrees were created successfully */
|
|
310
|
+
success: boolean;
|
|
311
|
+
/** Created worktrees (sorted by laneNumber). Empty on failure if rolled back. */
|
|
312
|
+
worktrees: WorktreeInfo[];
|
|
313
|
+
/** Per-lane errors encountered during creation */
|
|
314
|
+
errors: BulkWorktreeError[];
|
|
315
|
+
/** Whether rollback of partially-created worktrees succeeded (only relevant on failure) */
|
|
316
|
+
rolledBack: boolean;
|
|
317
|
+
/** Errors encountered during rollback (if any) */
|
|
318
|
+
rollbackErrors: BulkWorktreeError[];
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Per-worktree outcome within removeAllWorktrees().
|
|
323
|
+
*/
|
|
324
|
+
export interface RemoveWorktreeOutcome {
|
|
325
|
+
/** The worktree that was targeted for removal */
|
|
326
|
+
worktree: WorktreeInfo;
|
|
327
|
+
/** The removal result (null if removal threw an error) */
|
|
328
|
+
result: RemoveWorktreeResult | null;
|
|
329
|
+
/** Error encountered during removal (null on success) */
|
|
330
|
+
error: BulkWorktreeError | null;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Result of removeAllWorktrees() bulk removal.
|
|
335
|
+
*
|
|
336
|
+
* Best-effort: continues on per-worktree errors (does not fail-fast).
|
|
337
|
+
*/
|
|
338
|
+
export interface RemoveAllWorktreesResult {
|
|
339
|
+
/** Total worktrees found matching the prefix */
|
|
340
|
+
totalAttempted: number;
|
|
341
|
+
/** Successfully removed (or already removed) worktrees */
|
|
342
|
+
removed: WorktreeInfo[];
|
|
343
|
+
/** Worktrees that failed to remove */
|
|
344
|
+
failed: RemoveWorktreeOutcome[];
|
|
345
|
+
/** All per-worktree outcomes in order */
|
|
346
|
+
outcomes: RemoveWorktreeOutcome[];
|
|
347
|
+
/** Branches preserved (had unmerged commits) */
|
|
348
|
+
preserved: Array<{ branch: string; savedBranch: string; laneNumber: number; unmergedCount?: number }>;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// ── Discovery Types ──────────────────────────────────────────────────
|
|
352
|
+
|
|
353
|
+
/** Structured error from the discovery phase with diagnostic context */
|
|
354
|
+
export interface DiscoveryError {
|
|
355
|
+
code:
|
|
356
|
+
| "PARSE_MISSING_ID"
|
|
357
|
+
| "PARSE_MALFORMED"
|
|
358
|
+
| "DUPLICATE_ID"
|
|
359
|
+
| "UNKNOWN_ARG"
|
|
360
|
+
| "SCAN_ERROR"
|
|
361
|
+
| "DEP_UNRESOLVED"
|
|
362
|
+
| "DEP_PENDING"
|
|
363
|
+
| "DEP_AMBIGUOUS"
|
|
364
|
+
| "DEP_SOURCE_FALLBACK";
|
|
365
|
+
message: string;
|
|
366
|
+
taskPath?: string;
|
|
367
|
+
taskId?: string;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/** Result of the full discovery pipeline */
|
|
371
|
+
export interface DiscoveryResult {
|
|
372
|
+
pending: Map<string, ParsedTask>;
|
|
373
|
+
completed: Set<string>;
|
|
374
|
+
errors: DiscoveryError[];
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
// ── Wave Computation Types ───────────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
/** Dependency graph: adjacency list (task → tasks it depends on) */
|
|
381
|
+
export interface DependencyGraph {
|
|
382
|
+
/** Map from task ID to list of task IDs it depends on (predecessors) */
|
|
383
|
+
dependencies: Map<string, string[]>;
|
|
384
|
+
/** Map from task ID to list of task IDs that depend on it (successors) */
|
|
385
|
+
dependents: Map<string, string[]>;
|
|
386
|
+
/** All task IDs in the graph (pending only, not completed) */
|
|
387
|
+
nodes: Set<string>;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/** Result of graph validation */
|
|
391
|
+
export interface GraphValidationResult {
|
|
392
|
+
valid: boolean;
|
|
393
|
+
errors: DiscoveryError[];
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/** Result of wave computation */
|
|
397
|
+
export interface WaveComputationResult {
|
|
398
|
+
waves: WaveAssignment[];
|
|
399
|
+
errors: DiscoveryError[];
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
// ── Lane Allocation (Phase 3) ────────────────────────────────────────
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Error codes specific to lane allocation.
|
|
407
|
+
*
|
|
408
|
+
* - ALLOC_INVALID_CONFIG: configuration validation failed
|
|
409
|
+
* - ALLOC_EMPTY_WAVE: no tasks provided for allocation
|
|
410
|
+
* - ALLOC_WORKTREE_FAILED: worktree creation failed (includes rollback info)
|
|
411
|
+
* - ALLOC_TASK_NOT_FOUND: task ID from wave not found in pending map
|
|
412
|
+
*/
|
|
413
|
+
export type AllocationErrorCode =
|
|
414
|
+
| "ALLOC_INVALID_CONFIG"
|
|
415
|
+
| "ALLOC_EMPTY_WAVE"
|
|
416
|
+
| "ALLOC_WORKTREE_FAILED"
|
|
417
|
+
| "ALLOC_TASK_NOT_FOUND";
|
|
418
|
+
|
|
419
|
+
/** Typed error for lane allocation failures. */
|
|
420
|
+
export class AllocationError extends Error {
|
|
421
|
+
code: AllocationErrorCode;
|
|
422
|
+
details?: string;
|
|
423
|
+
|
|
424
|
+
constructor(code: AllocationErrorCode, message: string, details?: string) {
|
|
425
|
+
super(message);
|
|
426
|
+
this.name = "AllocationError";
|
|
427
|
+
this.code = code;
|
|
428
|
+
this.details = details;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* A task assigned within a lane, with its ordering position.
|
|
434
|
+
*
|
|
435
|
+
* Tasks within a lane execute sequentially in `order` (ascending).
|
|
436
|
+
* The ordering is deterministic given the same input.
|
|
437
|
+
*/
|
|
438
|
+
export interface AllocatedTask {
|
|
439
|
+
/** Task ID (e.g., "TO-014") */
|
|
440
|
+
taskId: string;
|
|
441
|
+
/** Execution order within the lane (0-indexed) */
|
|
442
|
+
order: number;
|
|
443
|
+
/** Full parsed task metadata */
|
|
444
|
+
task: ParsedTask;
|
|
445
|
+
/** Estimated duration in minutes */
|
|
446
|
+
estimatedMinutes: number;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* A fully-allocated lane ready for execution.
|
|
451
|
+
*
|
|
452
|
+
* Contains everything Steps 2-3 need to spawn TMUX sessions,
|
|
453
|
+
* monitor progress, and identify the lane. This is the contract
|
|
454
|
+
* between Step 1 (allocation) and Step 2 (execution).
|
|
455
|
+
*/
|
|
456
|
+
export interface AllocatedLane {
|
|
457
|
+
/** Lane number (1-indexed, deterministic) */
|
|
458
|
+
laneNumber: number;
|
|
459
|
+
/** Lane identifier for display and logging (e.g., "lane-1") */
|
|
460
|
+
laneId: string;
|
|
461
|
+
/** TMUX session naming seed (e.g., "orch-lane-1") — used by Step 2 */
|
|
462
|
+
tmuxSessionName: string;
|
|
463
|
+
/** Absolute path to the lane's worktree directory */
|
|
464
|
+
worktreePath: string;
|
|
465
|
+
/** Git branch name checked out in the worktree */
|
|
466
|
+
branch: string;
|
|
467
|
+
/** Tasks assigned to this lane, ordered for sequential execution */
|
|
468
|
+
tasks: AllocatedTask[];
|
|
469
|
+
/** Assignment strategy that was used (for diagnostics) */
|
|
470
|
+
strategy: "affinity-first" | "round-robin" | "load-balanced";
|
|
471
|
+
/** Total estimated load (sum of task weights) */
|
|
472
|
+
estimatedLoad: number;
|
|
473
|
+
/** Total estimated duration in minutes (sum of task durations) */
|
|
474
|
+
estimatedMinutes: number;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
// ── Execution Types & Contracts ──────────────────────────────────────
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Lifecycle status for a single task within lane execution.
|
|
482
|
+
*
|
|
483
|
+
* State machine:
|
|
484
|
+
* pending → running → succeeded
|
|
485
|
+
* → failed
|
|
486
|
+
* → stalled
|
|
487
|
+
* pending → skipped (pause/abort before task starts, or prior task failed)
|
|
488
|
+
*/
|
|
489
|
+
export type LaneTaskStatus = "pending" | "running" | "succeeded" | "failed" | "stalled" | "skipped";
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Outcome of a single task execution within a lane.
|
|
493
|
+
*
|
|
494
|
+
* Produced by `executeLane()` for each task in the lane's task list.
|
|
495
|
+
* Consumed by Step 3 (monitoring) and Step 4 (wave policy logic).
|
|
496
|
+
*/
|
|
497
|
+
export interface LaneTaskOutcome {
|
|
498
|
+
/** Task identifier (e.g., "TO-014") */
|
|
499
|
+
taskId: string;
|
|
500
|
+
/** Final task status */
|
|
501
|
+
status: LaneTaskStatus;
|
|
502
|
+
/** When execution started (epoch ms), null if never started (skipped) */
|
|
503
|
+
startTime: number | null;
|
|
504
|
+
/** When execution ended (epoch ms), null if still pending */
|
|
505
|
+
endTime: number | null;
|
|
506
|
+
/** Human-readable reason for the outcome */
|
|
507
|
+
exitReason: string;
|
|
508
|
+
/** TMUX session name used for this task (e.g., "orch-lane-1") */
|
|
509
|
+
sessionName: string;
|
|
510
|
+
/** Whether .DONE file was found */
|
|
511
|
+
doneFileFound: boolean;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Overall result of executing all tasks in a lane.
|
|
516
|
+
*
|
|
517
|
+
* The lane runs tasks sequentially. If a task fails and the lane
|
|
518
|
+
* has remaining tasks, those remaining tasks are marked as `skipped`.
|
|
519
|
+
*/
|
|
520
|
+
export interface LaneExecutionResult {
|
|
521
|
+
/** Lane number (1-indexed) */
|
|
522
|
+
laneNumber: number;
|
|
523
|
+
/** Lane identifier for display (e.g., "lane-1") */
|
|
524
|
+
laneId: string;
|
|
525
|
+
/** Per-task outcomes in execution order */
|
|
526
|
+
tasks: LaneTaskOutcome[];
|
|
527
|
+
/** Aggregate lane status: succeeded if all tasks succeeded, failed if any failed */
|
|
528
|
+
overallStatus: "succeeded" | "failed" | "partial";
|
|
529
|
+
/** When lane execution started (epoch ms) */
|
|
530
|
+
startTime: number;
|
|
531
|
+
/** When lane execution ended (epoch ms) */
|
|
532
|
+
endTime: number;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// ── Execution Constants ──────────────────────────────────────────────
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Grace period (ms) after TMUX session exits before declaring failure.
|
|
539
|
+
* Allows time for .DONE file to be flushed to disk on slow filesystems.
|
|
540
|
+
*/
|
|
541
|
+
export const DONE_GRACE_MS = 5_000;
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Polling interval (ms) for checking session liveness and .DONE file.
|
|
545
|
+
*/
|
|
546
|
+
export const EXECUTION_POLL_INTERVAL_MS = 2_000;
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Maximum retries for TMUX session spawn failures.
|
|
550
|
+
* Only transient failures (session name collision) are retried.
|
|
551
|
+
*/
|
|
552
|
+
export const SESSION_SPAWN_RETRY_MAX = 2;
|
|
553
|
+
|
|
554
|
+
// ── Execution Error Types ────────────────────────────────────────────
|
|
555
|
+
|
|
556
|
+
/**
|
|
557
|
+
* Error codes for lane execution failures.
|
|
558
|
+
*
|
|
559
|
+
* - EXEC_SPAWN_FAILED: TMUX session could not be created after retries
|
|
560
|
+
* - EXEC_TASK_FAILED: task completed without .DONE (non-zero exit)
|
|
561
|
+
* - EXEC_TASK_STALLED: STATUS.md unchanged for stall_timeout (handled by Step 3)
|
|
562
|
+
* - EXEC_TMUX_NOT_AVAILABLE: tmux binary not found
|
|
563
|
+
* - EXEC_WORKTREE_MISSING: lane worktree path doesn't exist
|
|
564
|
+
*/
|
|
565
|
+
export type ExecutionErrorCode =
|
|
566
|
+
| "EXEC_SPAWN_FAILED"
|
|
567
|
+
| "EXEC_TASK_FAILED"
|
|
568
|
+
| "EXEC_TASK_STALLED"
|
|
569
|
+
| "EXEC_TMUX_NOT_AVAILABLE"
|
|
570
|
+
| "EXEC_WORKTREE_MISSING";
|
|
571
|
+
|
|
572
|
+
/** Typed error for lane execution failures. */
|
|
573
|
+
export class ExecutionError extends Error {
|
|
574
|
+
code: ExecutionErrorCode;
|
|
575
|
+
laneId?: string;
|
|
576
|
+
taskId?: string;
|
|
577
|
+
|
|
578
|
+
constructor(code: ExecutionErrorCode, message: string, laneId?: string, taskId?: string) {
|
|
579
|
+
super(message);
|
|
580
|
+
this.name = "ExecutionError";
|
|
581
|
+
this.code = code;
|
|
582
|
+
this.laneId = laneId;
|
|
583
|
+
this.taskId = taskId;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
// ── Monitoring Types & Contracts ─────────────────────────────────────
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Snapshot of a single task's monitored state at a point in time.
|
|
592
|
+
*
|
|
593
|
+
* Produced by `resolveTaskMonitorState()` from combining:
|
|
594
|
+
* - .DONE file presence
|
|
595
|
+
* - TMUX session liveness
|
|
596
|
+
* - STATUS.md parse results
|
|
597
|
+
* - STATUS.md mtime for stall detection
|
|
598
|
+
*/
|
|
599
|
+
export interface TaskMonitorSnapshot {
|
|
600
|
+
/** Task ID (e.g., "TO-014") */
|
|
601
|
+
taskId: string;
|
|
602
|
+
/** Resolved monitoring status */
|
|
603
|
+
status: "pending" | "running" | "succeeded" | "failed" | "stalled" | "skipped" | "unknown";
|
|
604
|
+
/** Current step name (e.g., "Implement Service Layer"), null if not parsed */
|
|
605
|
+
currentStepName: string | null;
|
|
606
|
+
/** Current step number, null if not parsed */
|
|
607
|
+
currentStepNumber: number | null;
|
|
608
|
+
/** Total steps in the task */
|
|
609
|
+
totalSteps: number;
|
|
610
|
+
/** Checked checkbox count across all steps */
|
|
611
|
+
totalChecked: number;
|
|
612
|
+
/** Total checkbox count across all steps */
|
|
613
|
+
totalItems: number;
|
|
614
|
+
/** Whether the TMUX session is alive */
|
|
615
|
+
sessionAlive: boolean;
|
|
616
|
+
/** Whether the .DONE file was found */
|
|
617
|
+
doneFileFound: boolean;
|
|
618
|
+
/** Stall reason (null if not stalled) */
|
|
619
|
+
stallReason: string | null;
|
|
620
|
+
/** Epoch ms of last known STATUS.md modification */
|
|
621
|
+
lastHeartbeat: number | null;
|
|
622
|
+
/** Epoch ms when this snapshot was taken */
|
|
623
|
+
observedAt: number;
|
|
624
|
+
/** Reason string if STATUS.md couldn't be read */
|
|
625
|
+
parseError: string | null;
|
|
626
|
+
/** Worker iteration number from STATUS.md */
|
|
627
|
+
iteration: number;
|
|
628
|
+
/** Review counter from STATUS.md */
|
|
629
|
+
reviewCounter: number;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Per-lane monitoring snapshot aggregating task-level snapshots.
|
|
634
|
+
*/
|
|
635
|
+
export interface LaneMonitorSnapshot {
|
|
636
|
+
/** Lane identifier (e.g., "lane-1") */
|
|
637
|
+
laneId: string;
|
|
638
|
+
/** Lane number (1-indexed) */
|
|
639
|
+
laneNumber: number;
|
|
640
|
+
/** TMUX session name (e.g., "orch-lane-1") */
|
|
641
|
+
sessionName: string;
|
|
642
|
+
/** Whether the TMUX session is alive right now */
|
|
643
|
+
sessionAlive: boolean;
|
|
644
|
+
/** Current task being executed (null if lane is idle/complete) */
|
|
645
|
+
currentTaskId: string | null;
|
|
646
|
+
/** Snapshot of the current task (null if no current task) */
|
|
647
|
+
currentTaskSnapshot: TaskMonitorSnapshot | null;
|
|
648
|
+
/** Task IDs that have completed (succeeded) */
|
|
649
|
+
completedTasks: string[];
|
|
650
|
+
/** Task IDs that failed or stalled */
|
|
651
|
+
failedTasks: string[];
|
|
652
|
+
/** Task IDs not yet started */
|
|
653
|
+
remainingTasks: string[];
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* Aggregate monitoring state across all lanes.
|
|
658
|
+
*
|
|
659
|
+
* This is the primary data contract consumed by:
|
|
660
|
+
* - Step 4 (wave execution loop) for failure policy decisions
|
|
661
|
+
* - Step 6 (dashboard widget) for rendering
|
|
662
|
+
*/
|
|
663
|
+
export interface MonitorState {
|
|
664
|
+
/** Per-lane snapshots */
|
|
665
|
+
lanes: LaneMonitorSnapshot[];
|
|
666
|
+
/** Overall progress: tasks done / total */
|
|
667
|
+
tasksDone: number;
|
|
668
|
+
tasksFailed: number;
|
|
669
|
+
tasksTotal: number;
|
|
670
|
+
/** Current wave number */
|
|
671
|
+
waveNumber: number;
|
|
672
|
+
/** Number of poll cycles completed */
|
|
673
|
+
pollCount: number;
|
|
674
|
+
/** Epoch ms of last poll */
|
|
675
|
+
lastPollTime: number;
|
|
676
|
+
/** Whether all lanes have reached terminal state */
|
|
677
|
+
allTerminal: boolean;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
/**
|
|
681
|
+
* Per-task mtime tracker for stall detection.
|
|
682
|
+
*
|
|
683
|
+
* Tracks when we first observed the task (for startup grace),
|
|
684
|
+
* last known STATUS.md mtime, and stall timer state.
|
|
685
|
+
*/
|
|
686
|
+
export interface MtimeTracker {
|
|
687
|
+
/** Task ID */
|
|
688
|
+
taskId: string;
|
|
689
|
+
/** Epoch ms when we first observed this task running */
|
|
690
|
+
firstObservedAt: number;
|
|
691
|
+
/** Whether we've successfully read STATUS.md at least once */
|
|
692
|
+
statusFileSeenOnce: boolean;
|
|
693
|
+
/** Last known STATUS.md mtime (epoch ms), null if never read */
|
|
694
|
+
lastMtime: number | null;
|
|
695
|
+
/** Epoch ms when the stall timer started (mtime stopped changing) */
|
|
696
|
+
stallTimerStart: number | null;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
// ── Wave Execution Types & Contracts ─────────────────────────────────
|
|
701
|
+
|
|
702
|
+
/**
|
|
703
|
+
* Failure policy action matrix.
|
|
704
|
+
*
|
|
705
|
+
* Defines what happens to tasks in different states when a failure occurs,
|
|
706
|
+
* depending on the configured failure policy.
|
|
707
|
+
*
|
|
708
|
+
* | Task State | skip-dependents | stop-wave | stop-all |
|
|
709
|
+
* |---------------|--------------------------|------------------------|---------------------------|
|
|
710
|
+
* | In-flight | Continue running | Continue running | Kill immediately |
|
|
711
|
+
* | Queued (lane) | Continue if not dependent| Skip remaining in lane | Skip remaining in lane |
|
|
712
|
+
* | Future waves | Prune transitive deps | Don't start next wave | Don't start any more |
|
|
713
|
+
*
|
|
714
|
+
* Ownership contract:
|
|
715
|
+
* - executeLane() is source-of-truth for terminal task status
|
|
716
|
+
* - monitorLanes() runs as sibling async loop, can kill stalled sessions
|
|
717
|
+
* - executeWave() coordinates both and applies policy
|
|
718
|
+
* - Monitor's stall-kill does NOT conflict with executeLane() because
|
|
719
|
+
* executeLane() polls tmux session status and will see the killed session
|
|
720
|
+
*/
|
|
721
|
+
|
|
722
|
+
/**
|
|
723
|
+
* Result of executing a single wave.
|
|
724
|
+
*
|
|
725
|
+
* Consumed by:
|
|
726
|
+
* - Step 5 (/orch command) for wave-to-wave progression decisions
|
|
727
|
+
* - Step 6 (dashboard widget) for rendering wave summaries
|
|
728
|
+
*/
|
|
729
|
+
export interface WaveExecutionResult {
|
|
730
|
+
/** Wave number (1-indexed) */
|
|
731
|
+
waveIndex: number;
|
|
732
|
+
/** Epoch ms when wave execution started */
|
|
733
|
+
startedAt: number;
|
|
734
|
+
/** Epoch ms when wave execution ended */
|
|
735
|
+
endedAt: number;
|
|
736
|
+
/** Per-lane execution results */
|
|
737
|
+
laneResults: LaneExecutionResult[];
|
|
738
|
+
/** Which failure policy was configured */
|
|
739
|
+
policyApplied: "skip-dependents" | "stop-wave" | "stop-all";
|
|
740
|
+
/** Whether the wave was stopped early due to policy */
|
|
741
|
+
stoppedEarly: boolean;
|
|
742
|
+
/** Task IDs that failed (including stalled) */
|
|
743
|
+
failedTaskIds: string[];
|
|
744
|
+
/** Task IDs that were skipped (due to pause, prior failure, or policy) */
|
|
745
|
+
skippedTaskIds: string[];
|
|
746
|
+
/** Task IDs that succeeded */
|
|
747
|
+
succeededTaskIds: string[];
|
|
748
|
+
/** Task IDs blocked for future waves (transitive dependents of failed tasks) */
|
|
749
|
+
blockedTaskIds: string[];
|
|
750
|
+
/** Number of lanes used */
|
|
751
|
+
laneCount: number;
|
|
752
|
+
/** Overall wave status */
|
|
753
|
+
overallStatus: "succeeded" | "failed" | "partial" | "aborted";
|
|
754
|
+
/** Final monitor state snapshot (null if monitoring wasn't started) */
|
|
755
|
+
finalMonitorState: MonitorState | null;
|
|
756
|
+
/** Allocated lanes used in this wave (preserved for merge and cleanup) */
|
|
757
|
+
allocatedLanes: AllocatedLane[];
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
// ── Orchestrator Runtime State ───────────────────────────────────────
|
|
762
|
+
|
|
763
|
+
/**
|
|
764
|
+
* Runtime phase of the orchestrator batch execution.
|
|
765
|
+
*
|
|
766
|
+
* State machine:
|
|
767
|
+
* idle → planning → executing → completed
|
|
768
|
+
* → failed
|
|
769
|
+
* → stopped (stop-wave/stop-all policy triggered)
|
|
770
|
+
* → paused (via /orch-pause)
|
|
771
|
+
* Any active state → idle (via cleanup after completion/failure)
|
|
772
|
+
*/
|
|
773
|
+
export type OrchBatchPhase = "idle" | "planning" | "executing" | "merging" | "paused" | "stopped" | "completed" | "failed";
|
|
774
|
+
|
|
775
|
+
/**
|
|
776
|
+
* Runtime state for a batch execution.
|
|
777
|
+
*
|
|
778
|
+
* This is the primary state object that:
|
|
779
|
+
* - Tracks progress across waves for the /orch command
|
|
780
|
+
* - Is consumed by Step 6 (dashboard widget) for rendering
|
|
781
|
+
* - Tracks pauseSignal for /orch-pause
|
|
782
|
+
* - Accumulates wave results for summary
|
|
783
|
+
*/
|
|
784
|
+
export interface OrchBatchRuntimeState {
|
|
785
|
+
/** Current execution phase */
|
|
786
|
+
phase: OrchBatchPhase;
|
|
787
|
+
/** Unique batch identifier (timestamp format, e.g., "20260308T214300") */
|
|
788
|
+
batchId: string;
|
|
789
|
+
/** Shared pause signal — set by /orch-pause, read by executeLane/executeWave */
|
|
790
|
+
pauseSignal: { paused: boolean };
|
|
791
|
+
/** All wave results in order (grows as waves complete) */
|
|
792
|
+
waveResults: WaveExecutionResult[];
|
|
793
|
+
/** Current wave index (0-based into waves array, -1 if not started) */
|
|
794
|
+
currentWaveIndex: number;
|
|
795
|
+
/** Total number of waves planned */
|
|
796
|
+
totalWaves: number;
|
|
797
|
+
/** Set of task IDs blocked for future waves (from skip-dependents policy) */
|
|
798
|
+
blockedTaskIds: Set<string>;
|
|
799
|
+
/** Epoch ms when batch started */
|
|
800
|
+
startedAt: number;
|
|
801
|
+
/** Epoch ms when batch ended (null if still running) */
|
|
802
|
+
endedAt: number | null;
|
|
803
|
+
/** Total tasks in batch */
|
|
804
|
+
totalTasks: number;
|
|
805
|
+
/** Tasks completed successfully */
|
|
806
|
+
succeededTasks: number;
|
|
807
|
+
/** Tasks that failed */
|
|
808
|
+
failedTasks: number;
|
|
809
|
+
/** Tasks skipped */
|
|
810
|
+
skippedTasks: number;
|
|
811
|
+
/** Tasks blocked (transitive dependents of failures) */
|
|
812
|
+
blockedTasks: number;
|
|
813
|
+
/** Error messages for display */
|
|
814
|
+
errors: string[];
|
|
815
|
+
/** Allocated lanes from current wave (for session registry) */
|
|
816
|
+
currentLanes: AllocatedLane[];
|
|
817
|
+
/** Dependency graph for the batch (for skip-dependents computation) */
|
|
818
|
+
dependencyGraph: DependencyGraph | null;
|
|
819
|
+
/** Accumulated merge results across all waves */
|
|
820
|
+
mergeResults: MergeWaveResult[];
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
/**
|
|
824
|
+
* Session registry entry for /orch-sessions command.
|
|
825
|
+
*/
|
|
826
|
+
export interface OrchestratorSessionEntry {
|
|
827
|
+
/** TMUX session name (e.g., "orch-lane-1") */
|
|
828
|
+
sessionName: string;
|
|
829
|
+
/** Lane ID (e.g., "lane-1") */
|
|
830
|
+
laneId: string;
|
|
831
|
+
/** Task ID currently running (if tracked) */
|
|
832
|
+
taskId: string | null;
|
|
833
|
+
/** Session status */
|
|
834
|
+
status: "alive" | "dead";
|
|
835
|
+
/** Worktree path */
|
|
836
|
+
worktreePath: string;
|
|
837
|
+
/** Attach command for user */
|
|
838
|
+
attachCmd: string;
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
/**
|
|
842
|
+
* Session registry: maps session names to their metadata.
|
|
843
|
+
*/
|
|
844
|
+
export type OrchestratorSessionRegistry = Map<string, OrchestratorSessionEntry>;
|
|
845
|
+
|
|
846
|
+
// ── Batch ID Generation ──────────────────────────────────────────────
|
|
847
|
+
|
|
848
|
+
/**
|
|
849
|
+
* Generate a batch ID from the current timestamp.
|
|
850
|
+
* Format: "YYYYMMDDTHHMMSS" (e.g., "20260308T214300")
|
|
851
|
+
*/
|
|
852
|
+
export function generateBatchId(): string {
|
|
853
|
+
const now = new Date();
|
|
854
|
+
const pad = (n: number) => String(n).padStart(2, "0");
|
|
855
|
+
return `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}T${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
/**
|
|
859
|
+
* Create a fresh batch runtime state.
|
|
860
|
+
*/
|
|
861
|
+
export function freshOrchBatchState(): OrchBatchRuntimeState {
|
|
862
|
+
return {
|
|
863
|
+
phase: "idle",
|
|
864
|
+
batchId: "",
|
|
865
|
+
pauseSignal: { paused: false },
|
|
866
|
+
waveResults: [],
|
|
867
|
+
currentWaveIndex: -1,
|
|
868
|
+
totalWaves: 0,
|
|
869
|
+
blockedTaskIds: new Set(),
|
|
870
|
+
startedAt: 0,
|
|
871
|
+
endedAt: null,
|
|
872
|
+
totalTasks: 0,
|
|
873
|
+
succeededTasks: 0,
|
|
874
|
+
failedTasks: 0,
|
|
875
|
+
skippedTasks: 0,
|
|
876
|
+
blockedTasks: 0,
|
|
877
|
+
errors: [],
|
|
878
|
+
currentLanes: [],
|
|
879
|
+
dependencyGraph: null,
|
|
880
|
+
mergeResults: [],
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
// ── Merge Types ──────────────────────────────────────────────────────
|
|
886
|
+
|
|
887
|
+
/**
|
|
888
|
+
* Valid merge result statuses.
|
|
889
|
+
* Matches the contract in .pi/agents/task-merger.md.
|
|
890
|
+
*/
|
|
891
|
+
export type MergeResultStatus = "SUCCESS" | "CONFLICT_RESOLVED" | "CONFLICT_UNRESOLVED" | "BUILD_FAILURE";
|
|
892
|
+
|
|
893
|
+
/** All valid status strings for runtime validation. */
|
|
894
|
+
export const VALID_MERGE_STATUSES: ReadonlySet<string> = new Set([
|
|
895
|
+
"SUCCESS",
|
|
896
|
+
"CONFLICT_RESOLVED",
|
|
897
|
+
"CONFLICT_UNRESOLVED",
|
|
898
|
+
"BUILD_FAILURE",
|
|
899
|
+
]);
|
|
900
|
+
|
|
901
|
+
/** A single conflict entry in the merge result. */
|
|
902
|
+
export interface MergeConflict {
|
|
903
|
+
file: string;
|
|
904
|
+
type: string;
|
|
905
|
+
resolved: boolean;
|
|
906
|
+
resolution?: string;
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
/** Verification outcome in the merge result. */
|
|
910
|
+
export interface MergeVerification {
|
|
911
|
+
ran: boolean;
|
|
912
|
+
passed: boolean;
|
|
913
|
+
output: string;
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
/**
|
|
917
|
+
* Merge result JSON written by the merge agent.
|
|
918
|
+
* Matches the schema in .pi/agents/task-merger.md § Result File Format.
|
|
919
|
+
*/
|
|
920
|
+
export interface MergeResult {
|
|
921
|
+
status: MergeResultStatus;
|
|
922
|
+
source_branch: string;
|
|
923
|
+
target_branch: string;
|
|
924
|
+
merge_commit: string;
|
|
925
|
+
conflicts: MergeConflict[];
|
|
926
|
+
verification: MergeVerification;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
/** Per-lane merge outcome, enriched by the orchestrator. */
|
|
930
|
+
export interface MergeLaneResult {
|
|
931
|
+
laneNumber: number;
|
|
932
|
+
laneId: string;
|
|
933
|
+
sourceBranch: string;
|
|
934
|
+
targetBranch: string;
|
|
935
|
+
result: MergeResult | null;
|
|
936
|
+
error: string | null;
|
|
937
|
+
durationMs: number;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
/** Overall wave merge outcome. */
|
|
941
|
+
export interface MergeWaveResult {
|
|
942
|
+
waveIndex: number;
|
|
943
|
+
status: "succeeded" | "failed" | "partial";
|
|
944
|
+
laneResults: MergeLaneResult[];
|
|
945
|
+
failedLane: number | null;
|
|
946
|
+
failureReason: string | null;
|
|
947
|
+
totalDurationMs: number;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
// ── Merge Error Types ────────────────────────────────────────────────
|
|
951
|
+
|
|
952
|
+
/**
|
|
953
|
+
* Error codes for merge operations.
|
|
954
|
+
*
|
|
955
|
+
* - MERGE_SPAWN_FAILED: Could not create TMUX session for merge agent
|
|
956
|
+
* - MERGE_TIMEOUT: Merge agent did not produce result within timeout
|
|
957
|
+
* - MERGE_SESSION_DIED: TMUX session exited without writing result
|
|
958
|
+
* - MERGE_RESULT_INVALID: Result file exists but contains invalid JSON
|
|
959
|
+
* - MERGE_RESULT_MISSING_FIELDS: Result JSON missing required fields
|
|
960
|
+
* - MERGE_UNKNOWN_STATUS: Result has an unrecognized status value
|
|
961
|
+
* - MERGE_GIT_ERROR: Git command failure during merge setup
|
|
962
|
+
*/
|
|
963
|
+
export type MergeErrorCode =
|
|
964
|
+
| "MERGE_SPAWN_FAILED"
|
|
965
|
+
| "MERGE_TIMEOUT"
|
|
966
|
+
| "MERGE_SESSION_DIED"
|
|
967
|
+
| "MERGE_RESULT_INVALID"
|
|
968
|
+
| "MERGE_RESULT_MISSING_FIELDS"
|
|
969
|
+
| "MERGE_UNKNOWN_STATUS"
|
|
970
|
+
| "MERGE_GIT_ERROR";
|
|
971
|
+
|
|
972
|
+
/** Typed error class for merge operations. */
|
|
973
|
+
export class MergeError extends Error {
|
|
974
|
+
code: MergeErrorCode;
|
|
975
|
+
|
|
976
|
+
constructor(code: MergeErrorCode, message: string) {
|
|
977
|
+
super(message);
|
|
978
|
+
this.name = "MergeError";
|
|
979
|
+
this.code = code;
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
// ── Merge Constants ──────────────────────────────────────────────────
|
|
984
|
+
|
|
985
|
+
/**
|
|
986
|
+
* Default timeout for merge agent execution (ms).
|
|
987
|
+
* Merge agents typically complete in 10-60 seconds. A 5-minute timeout
|
|
988
|
+
* is generous and covers verification (go build) on large codebases.
|
|
989
|
+
*/
|
|
990
|
+
export const MERGE_TIMEOUT_MS = 5 * 60 * 1000;
|
|
991
|
+
|
|
992
|
+
/**
|
|
993
|
+
* Polling interval for merge result file (ms).
|
|
994
|
+
* Merge agents are fast; poll aggressively.
|
|
995
|
+
*/
|
|
996
|
+
export const MERGE_POLL_INTERVAL_MS = 2_000;
|
|
997
|
+
|
|
998
|
+
/**
|
|
999
|
+
* Grace period after TMUX session exits before declaring failure (ms).
|
|
1000
|
+
* Allows for slow disk flush of the result file.
|
|
1001
|
+
*/
|
|
1002
|
+
export const MERGE_RESULT_GRACE_MS = 3_000;
|
|
1003
|
+
|
|
1004
|
+
/**
|
|
1005
|
+
* Maximum retries for reading a partially-written result file.
|
|
1006
|
+
* If JSON parse fails, wait and retry in case the file is still being written.
|
|
1007
|
+
*/
|
|
1008
|
+
export const MERGE_RESULT_READ_RETRIES = 3;
|
|
1009
|
+
|
|
1010
|
+
/**
|
|
1011
|
+
* Delay between result file read retries (ms).
|
|
1012
|
+
*/
|
|
1013
|
+
export const MERGE_RESULT_READ_RETRY_DELAY_MS = 1_000;
|
|
1014
|
+
|
|
1015
|
+
/**
|
|
1016
|
+
* Maximum retries for TMUX session spawn during merge.
|
|
1017
|
+
*/
|
|
1018
|
+
export const MERGE_SPAWN_RETRY_MAX = 2;
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
// ── View-Model Types ─────────────────────────────────────────────────
|
|
1022
|
+
|
|
1023
|
+
/**
|
|
1024
|
+
* Summary counts for the orchestrator dashboard.
|
|
1025
|
+
* Pure data — no rendering logic.
|
|
1026
|
+
*/
|
|
1027
|
+
export interface OrchSummaryCounts {
|
|
1028
|
+
completed: number;
|
|
1029
|
+
running: number;
|
|
1030
|
+
queued: number;
|
|
1031
|
+
failed: number;
|
|
1032
|
+
blocked: number;
|
|
1033
|
+
stalled: number;
|
|
1034
|
+
total: number;
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
/**
|
|
1038
|
+
* Per-lane view data for dashboard rendering.
|
|
1039
|
+
* Derived from MonitorState LaneMonitorSnapshot + AllocatedLane metadata.
|
|
1040
|
+
*/
|
|
1041
|
+
export interface OrchLaneCardData {
|
|
1042
|
+
laneNumber: number;
|
|
1043
|
+
laneId: string;
|
|
1044
|
+
sessionName: string;
|
|
1045
|
+
sessionAlive: boolean;
|
|
1046
|
+
currentTaskId: string | null;
|
|
1047
|
+
currentStepName: string | null;
|
|
1048
|
+
totalChecked: number;
|
|
1049
|
+
totalItems: number;
|
|
1050
|
+
completedTasks: number;
|
|
1051
|
+
totalLaneTasks: number;
|
|
1052
|
+
status: "idle" | "running" | "succeeded" | "failed" | "stalled";
|
|
1053
|
+
stallReason: string | null;
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
/**
|
|
1057
|
+
* Dashboard view-model — maps runtime state to render-ready data.
|
|
1058
|
+
*
|
|
1059
|
+
* This is the single data contract between OrchBatchRuntimeState +
|
|
1060
|
+
* MonitorState and the widget rendering function.
|
|
1061
|
+
*/
|
|
1062
|
+
export interface OrchDashboardViewModel {
|
|
1063
|
+
phase: OrchBatchPhase;
|
|
1064
|
+
batchId: string;
|
|
1065
|
+
waveProgress: string; // e.g., "2/3"
|
|
1066
|
+
elapsed: string; // e.g., "2m 14s"
|
|
1067
|
+
summary: OrchSummaryCounts;
|
|
1068
|
+
laneCards: OrchLaneCardData[];
|
|
1069
|
+
attachHint: string; // e.g., "tmux attach -t orch-lane-1"
|
|
1070
|
+
errors: string[];
|
|
1071
|
+
failurePolicy: string | null; // e.g., "stop-wave" if stopped by policy
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
// ── State Persistence Types (TS-009) ─────────────────────────────────
|
|
1076
|
+
|
|
1077
|
+
/**
|
|
1078
|
+
* Current schema version for batch-state.json.
|
|
1079
|
+
* Increment when the persisted schema changes in incompatible ways.
|
|
1080
|
+
* loadBatchState() rejects files with a different schemaVersion.
|
|
1081
|
+
*/
|
|
1082
|
+
export const BATCH_STATE_SCHEMA_VERSION = 1;
|
|
1083
|
+
|
|
1084
|
+
/**
|
|
1085
|
+
* Canonical file path for persisted batch state.
|
|
1086
|
+
* Resolved relative to repository root: `.pi/batch-state.json`
|
|
1087
|
+
*/
|
|
1088
|
+
export const BATCH_STATE_FILENAME = "batch-state.json";
|
|
1089
|
+
|
|
1090
|
+
/**
|
|
1091
|
+
* Resolve the absolute path to the batch state file.
|
|
1092
|
+
* @param repoRoot - Absolute path to the repository root
|
|
1093
|
+
*/
|
|
1094
|
+
export function batchStatePath(repoRoot: string): string {
|
|
1095
|
+
return join(repoRoot, ".pi", BATCH_STATE_FILENAME);
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
/**
|
|
1099
|
+
* Error codes for state persistence operations.
|
|
1100
|
+
*
|
|
1101
|
+
* - STATE_FILE_IO_ERROR: Filesystem read/write/rename failure
|
|
1102
|
+
* - STATE_FILE_PARSE_ERROR: File exists but contains invalid JSON
|
|
1103
|
+
* - STATE_SCHEMA_INVALID: JSON is valid but fails schema validation
|
|
1104
|
+
* (missing required fields, unknown enum values, version mismatch)
|
|
1105
|
+
*/
|
|
1106
|
+
export type StateFileErrorCode =
|
|
1107
|
+
| "STATE_FILE_IO_ERROR"
|
|
1108
|
+
| "STATE_FILE_PARSE_ERROR"
|
|
1109
|
+
| "STATE_SCHEMA_INVALID";
|
|
1110
|
+
|
|
1111
|
+
/** Typed error class for state file operations. */
|
|
1112
|
+
export class StateFileError extends Error {
|
|
1113
|
+
code: StateFileErrorCode;
|
|
1114
|
+
|
|
1115
|
+
constructor(code: StateFileErrorCode, message: string) {
|
|
1116
|
+
super(message);
|
|
1117
|
+
this.name = "StateFileError";
|
|
1118
|
+
this.code = code;
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
/**
|
|
1123
|
+
* Persisted record of a single task's execution state.
|
|
1124
|
+
*
|
|
1125
|
+
* Contains everything `/orch-resume` needs to reconstruct
|
|
1126
|
+
* task progress without re-running discovery.
|
|
1127
|
+
*/
|
|
1128
|
+
export interface PersistedTaskRecord {
|
|
1129
|
+
/** Task identifier (e.g., "TO-014") */
|
|
1130
|
+
taskId: string;
|
|
1131
|
+
/** Lane number the task was assigned to (1-indexed) */
|
|
1132
|
+
laneNumber: number;
|
|
1133
|
+
/** TMUX session name used (e.g., "orch-lane-1") */
|
|
1134
|
+
sessionName: string;
|
|
1135
|
+
/** Current task status */
|
|
1136
|
+
status: LaneTaskStatus;
|
|
1137
|
+
/** Absolute path to the task's folder (contains PROMPT.md, STATUS.md) */
|
|
1138
|
+
taskFolder: string;
|
|
1139
|
+
/** Epoch ms when task started (null if never started) */
|
|
1140
|
+
startedAt: number | null;
|
|
1141
|
+
/** Epoch ms when task ended (null if still pending/running) */
|
|
1142
|
+
endedAt: number | null;
|
|
1143
|
+
/** Whether .DONE file was found for this task */
|
|
1144
|
+
doneFileFound: boolean;
|
|
1145
|
+
/** Human-readable exit reason (if completed/failed) */
|
|
1146
|
+
exitReason: string;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
/**
|
|
1150
|
+
* Persisted record of a lane's configuration.
|
|
1151
|
+
*
|
|
1152
|
+
* Captures worktree/branch assignment so `/orch-resume` can
|
|
1153
|
+
* reconnect to existing worktrees without re-allocation.
|
|
1154
|
+
*/
|
|
1155
|
+
export interface PersistedLaneRecord {
|
|
1156
|
+
/** Lane number (1-indexed) */
|
|
1157
|
+
laneNumber: number;
|
|
1158
|
+
/** Lane identifier (e.g., "lane-1") */
|
|
1159
|
+
laneId: string;
|
|
1160
|
+
/** TMUX session name (e.g., "orch-lane-1") */
|
|
1161
|
+
tmuxSessionName: string;
|
|
1162
|
+
/** Absolute path to the lane's worktree directory */
|
|
1163
|
+
worktreePath: string;
|
|
1164
|
+
/** Git branch name checked out in the worktree */
|
|
1165
|
+
branch: string;
|
|
1166
|
+
/** Task IDs assigned to this lane in execution order */
|
|
1167
|
+
taskIds: string[];
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
/**
|
|
1171
|
+
* Persisted summary of a wave merge result.
|
|
1172
|
+
* Minimal subset of MergeWaveResult needed for resume decisions.
|
|
1173
|
+
*/
|
|
1174
|
+
export interface PersistedMergeResult {
|
|
1175
|
+
/** Wave index (0-based) */
|
|
1176
|
+
waveIndex: number;
|
|
1177
|
+
/** Merge status */
|
|
1178
|
+
status: "succeeded" | "failed" | "partial";
|
|
1179
|
+
/** Which lane failed (null if all succeeded) */
|
|
1180
|
+
failedLane: number | null;
|
|
1181
|
+
/** Failure reason (null if all succeeded) */
|
|
1182
|
+
failureReason: string | null;
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
/**
|
|
1186
|
+
* Persisted batch state written to `.pi/batch-state.json`.
|
|
1187
|
+
*
|
|
1188
|
+
* This is the serialization contract for batch state persistence.
|
|
1189
|
+
* It captures enough information for `/orch-resume` to reconstruct
|
|
1190
|
+
* the orchestrator state after a terminal disconnect.
|
|
1191
|
+
*
|
|
1192
|
+
* Design decisions:
|
|
1193
|
+
* - `schemaVersion` enables forward-compatible rejection of old formats
|
|
1194
|
+
* - Phase uses the same `OrchBatchPhase` literal union as runtime state
|
|
1195
|
+
* - Per-task records include folder paths and session names for resume
|
|
1196
|
+
* - Merge results are summarized (not full MergeWaveResult) for size
|
|
1197
|
+
* - `updatedAt` is monotonic (epoch ms) for staleness detection
|
|
1198
|
+
* - `lastError` captures most recent error without PII
|
|
1199
|
+
*/
|
|
1200
|
+
export interface PersistedBatchState {
|
|
1201
|
+
/** Schema version — must equal BATCH_STATE_SCHEMA_VERSION */
|
|
1202
|
+
schemaVersion: number;
|
|
1203
|
+
/** Current batch execution phase */
|
|
1204
|
+
phase: OrchBatchPhase;
|
|
1205
|
+
/** Unique batch identifier (timestamp format) */
|
|
1206
|
+
batchId: string;
|
|
1207
|
+
/** Epoch ms when batch started */
|
|
1208
|
+
startedAt: number;
|
|
1209
|
+
/** Epoch ms when state was last written */
|
|
1210
|
+
updatedAt: number;
|
|
1211
|
+
/** Epoch ms when batch ended (null if still active) */
|
|
1212
|
+
endedAt: number | null;
|
|
1213
|
+
/** Current wave index (0-based, -1 if not started) */
|
|
1214
|
+
currentWaveIndex: number;
|
|
1215
|
+
/** Total number of waves in the plan */
|
|
1216
|
+
totalWaves: number;
|
|
1217
|
+
/** Wave plan: array of arrays of task IDs per wave */
|
|
1218
|
+
wavePlan: string[][];
|
|
1219
|
+
/** Per-lane configuration records */
|
|
1220
|
+
lanes: PersistedLaneRecord[];
|
|
1221
|
+
/** Per-task execution records (all tasks across all waves) */
|
|
1222
|
+
tasks: PersistedTaskRecord[];
|
|
1223
|
+
/** Merge results for completed waves */
|
|
1224
|
+
mergeResults: PersistedMergeResult[];
|
|
1225
|
+
/** Summary counters */
|
|
1226
|
+
totalTasks: number;
|
|
1227
|
+
succeededTasks: number;
|
|
1228
|
+
failedTasks: number;
|
|
1229
|
+
skippedTasks: number;
|
|
1230
|
+
blockedTasks: number;
|
|
1231
|
+
/** Task IDs blocked for future waves (from skip-dependents) */
|
|
1232
|
+
blockedTaskIds: string[];
|
|
1233
|
+
/** Most recent error (code + message, no PII) */
|
|
1234
|
+
lastError: { code: string; message: string } | null;
|
|
1235
|
+
/** Accumulated error messages */
|
|
1236
|
+
errors: string[];
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
|
|
1240
|
+
// ── Resume (TS-009 Step 4) ───────────────────────────────────────────
|
|
1241
|
+
|
|
1242
|
+
/**
|
|
1243
|
+
* Error codes for /orch-resume command failures.
|
|
1244
|
+
*
|
|
1245
|
+
* - RESUME_NO_STATE: No batch-state.json found on disk
|
|
1246
|
+
* - RESUME_INVALID_STATE: State file exists but cannot be parsed/validated
|
|
1247
|
+
* - RESUME_SCHEMA_MISMATCH: State file has incompatible schema version
|
|
1248
|
+
* - RESUME_PHASE_NOT_RESUMABLE: Persisted phase does not allow resume
|
|
1249
|
+
* - RESUME_TMUX_UNAVAILABLE: TMUX is not available for session reconnection
|
|
1250
|
+
* - RESUME_EXECUTION_FAILED: Resume reconciliation succeeded but execution failed
|
|
1251
|
+
*/
|
|
1252
|
+
export type ResumeErrorCode =
|
|
1253
|
+
| "RESUME_NO_STATE"
|
|
1254
|
+
| "RESUME_INVALID_STATE"
|
|
1255
|
+
| "RESUME_SCHEMA_MISMATCH"
|
|
1256
|
+
| "RESUME_PHASE_NOT_RESUMABLE"
|
|
1257
|
+
| "RESUME_TMUX_UNAVAILABLE"
|
|
1258
|
+
| "RESUME_EXECUTION_FAILED";
|
|
1259
|
+
|
|
1260
|
+
/** Typed error class for resume failures with stable error codes. */
|
|
1261
|
+
export class ResumeError extends Error {
|
|
1262
|
+
code: ResumeErrorCode;
|
|
1263
|
+
|
|
1264
|
+
constructor(code: ResumeErrorCode, message: string) {
|
|
1265
|
+
super(message);
|
|
1266
|
+
this.name = "ResumeError";
|
|
1267
|
+
this.code = code;
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
/**
|
|
1272
|
+
* Result of reconciling a single task's persisted state against live signals.
|
|
1273
|
+
*
|
|
1274
|
+
* Combines persisted status, tmux session liveness, and .DONE file presence
|
|
1275
|
+
* into a deterministic action for the resume engine.
|
|
1276
|
+
*
|
|
1277
|
+
* Reconciliation precedence (highest → lowest):
|
|
1278
|
+
* 1. .DONE file found → "mark-complete" (regardless of session state)
|
|
1279
|
+
* 2. Session alive + no .DONE → "reconnect" (task is still running)
|
|
1280
|
+
* 3. Persisted status is terminal (succeeded/failed/stalled/skipped) → "skip"
|
|
1281
|
+
* 4. Session dead + no .DONE + was running → "mark-failed"
|
|
1282
|
+
*/
|
|
1283
|
+
export interface ReconciledTaskState {
|
|
1284
|
+
/** Task identifier */
|
|
1285
|
+
taskId: string;
|
|
1286
|
+
/** Status from the persisted state file */
|
|
1287
|
+
persistedStatus: LaneTaskStatus;
|
|
1288
|
+
/** Reconciled live status after checking signals */
|
|
1289
|
+
liveStatus: LaneTaskStatus;
|
|
1290
|
+
/** Whether the TMUX session is alive right now */
|
|
1291
|
+
sessionAlive: boolean;
|
|
1292
|
+
/** Whether the .DONE file was found */
|
|
1293
|
+
doneFileFound: boolean;
|
|
1294
|
+
/** Whether the lane worktree still exists on disk */
|
|
1295
|
+
worktreeExists: boolean;
|
|
1296
|
+
/** Action the resume engine should take */
|
|
1297
|
+
action: "reconnect" | "mark-complete" | "mark-failed" | "re-execute" | "skip";
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
/**
|
|
1301
|
+
* Result of resume eligibility check.
|
|
1302
|
+
*
|
|
1303
|
+
* Determines whether a persisted batch state can be resumed based on its phase.
|
|
1304
|
+
*/
|
|
1305
|
+
export interface ResumeEligibility {
|
|
1306
|
+
/** Whether the batch can be resumed */
|
|
1307
|
+
eligible: boolean;
|
|
1308
|
+
/** Human-readable reason (for both eligible and ineligible) */
|
|
1309
|
+
reason: string;
|
|
1310
|
+
/** Persisted phase */
|
|
1311
|
+
phase: OrchBatchPhase;
|
|
1312
|
+
/** Batch ID */
|
|
1313
|
+
batchId: string;
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
/**
|
|
1317
|
+
* Resume point computed from reconciled task states.
|
|
1318
|
+
*
|
|
1319
|
+
* Tells the resume engine where to start in the wave plan.
|
|
1320
|
+
*/
|
|
1321
|
+
export interface ResumePoint {
|
|
1322
|
+
/** Wave index to resume from (0-based) */
|
|
1323
|
+
resumeWaveIndex: number;
|
|
1324
|
+
/** Task IDs confirmed completed (via .DONE or prior succeeded) */
|
|
1325
|
+
completedTaskIds: string[];
|
|
1326
|
+
/** Task IDs that still need execution */
|
|
1327
|
+
pendingTaskIds: string[];
|
|
1328
|
+
/** Task IDs confirmed failed (dead session, no .DONE) */
|
|
1329
|
+
failedTaskIds: string[];
|
|
1330
|
+
/** Task IDs with alive sessions that need reconnection */
|
|
1331
|
+
reconnectTaskIds: string[];
|
|
1332
|
+
/** Task IDs with dead sessions but existing worktrees that need re-execution */
|
|
1333
|
+
reExecuteTaskIds: string[];
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
// ── Abort (TS-009 Step 5) ────────────────────────────────────────────
|
|
1337
|
+
|
|
1338
|
+
/**
|
|
1339
|
+
* Abort mode: graceful (checkpoint + wait + force-kill) or hard (immediate kill).
|
|
1340
|
+
*/
|
|
1341
|
+
export type AbortMode = "graceful" | "hard";
|
|
1342
|
+
|
|
1343
|
+
/**
|
|
1344
|
+
* Error codes for abort operations.
|
|
1345
|
+
*
|
|
1346
|
+
* - ABORT_TMUX_LIST_FAILED: Could not list TMUX sessions
|
|
1347
|
+
* - ABORT_WRAPUP_WRITE_FAILED: Failed to write wrap-up signal file(s)
|
|
1348
|
+
* - ABORT_KILL_FAILED: Failed to kill one or more TMUX sessions
|
|
1349
|
+
* - ABORT_STATE_DELETE_FAILED: Failed to delete batch-state.json
|
|
1350
|
+
*/
|
|
1351
|
+
export type AbortErrorCode =
|
|
1352
|
+
| "ABORT_TMUX_LIST_FAILED"
|
|
1353
|
+
| "ABORT_WRAPUP_WRITE_FAILED"
|
|
1354
|
+
| "ABORT_KILL_FAILED"
|
|
1355
|
+
| "ABORT_STATE_DELETE_FAILED";
|
|
1356
|
+
|
|
1357
|
+
/**
|
|
1358
|
+
* Per-lane result from an abort operation.
|
|
1359
|
+
*/
|
|
1360
|
+
export interface AbortLaneResult {
|
|
1361
|
+
/** TMUX session name */
|
|
1362
|
+
sessionName: string;
|
|
1363
|
+
/** Lane ID (e.g., "lane-1") or "unknown" */
|
|
1364
|
+
laneId: string;
|
|
1365
|
+
/** Task ID if known */
|
|
1366
|
+
taskId: string | null;
|
|
1367
|
+
/** Task folder path in the worktree (for wrap-up file writing) */
|
|
1368
|
+
taskFolderInWorktree: string | null;
|
|
1369
|
+
/** Whether wrap-up files were written (graceful only) */
|
|
1370
|
+
wrapUpWritten: boolean;
|
|
1371
|
+
/** Wrap-up write error if any */
|
|
1372
|
+
wrapUpError: string | null;
|
|
1373
|
+
/** Whether the session was killed */
|
|
1374
|
+
sessionKilled: boolean;
|
|
1375
|
+
/** Whether the session exited gracefully (before force-kill) */
|
|
1376
|
+
exitedGracefully: boolean;
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
/**
|
|
1380
|
+
* Overall result from an abort operation.
|
|
1381
|
+
*/
|
|
1382
|
+
export interface AbortResult {
|
|
1383
|
+
/** Abort mode used */
|
|
1384
|
+
mode: AbortMode;
|
|
1385
|
+
/** Number of sessions found to abort */
|
|
1386
|
+
sessionsFound: number;
|
|
1387
|
+
/** Number of sessions actually killed (force-killed or graceful exit) */
|
|
1388
|
+
sessionsKilled: number;
|
|
1389
|
+
/** Number of sessions that exited gracefully (before timeout) */
|
|
1390
|
+
gracefulExits: number;
|
|
1391
|
+
/** Per-lane results */
|
|
1392
|
+
laneResults: AbortLaneResult[];
|
|
1393
|
+
/** Number of wrap-up write failures (graceful only) */
|
|
1394
|
+
wrapUpFailures: number;
|
|
1395
|
+
/** Whether batch state file was deleted */
|
|
1396
|
+
stateDeleted: boolean;
|
|
1397
|
+
/** Aggregated errors */
|
|
1398
|
+
errors: Array<{ code: AbortErrorCode; message: string }>;
|
|
1399
|
+
/** Duration of the abort operation in milliseconds */
|
|
1400
|
+
durationMs: number;
|
|
1401
|
+
}
|
|
1402
|
+
|
|
1403
|
+
/**
|
|
1404
|
+
* Action step in an abort plan.
|
|
1405
|
+
*/
|
|
1406
|
+
export type AbortActionStep =
|
|
1407
|
+
| { type: "write-wrapup" }
|
|
1408
|
+
| { type: "poll-wait"; gracePeriodMs: number; pollIntervalMs: number }
|
|
1409
|
+
| { type: "kill-remaining" }
|
|
1410
|
+
| { type: "kill-all" };
|
|
1411
|
+
|
|
1412
|
+
/**
|
|
1413
|
+
* Target session with enrichment from persisted state.
|
|
1414
|
+
*/
|
|
1415
|
+
export interface AbortTargetSession {
|
|
1416
|
+
/** TMUX session name */
|
|
1417
|
+
sessionName: string;
|
|
1418
|
+
/** Lane ID from persisted state or "unknown" */
|
|
1419
|
+
laneId: string;
|
|
1420
|
+
/** Task ID from persisted state or null */
|
|
1421
|
+
taskId: string | null;
|
|
1422
|
+
/** Task folder path resolved in the worktree (for wrap-up files), or null */
|
|
1423
|
+
taskFolderInWorktree: string | null;
|
|
1424
|
+
/** Worktree path from persisted state or batch state */
|
|
1425
|
+
worktreePath: string | null;
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
// ── Size-to-Duration Mapping ─────────────────────────────────────────
|
|
1429
|
+
|
|
1430
|
+
/**
|
|
1431
|
+
* Default duration mapping (size → minutes).
|
|
1432
|
+
*
|
|
1433
|
+
* | Size | Weight | Duration |
|
|
1434
|
+
* |------|--------|----------|
|
|
1435
|
+
* | S | 1 | 30 min |
|
|
1436
|
+
* | M | 2 | 60 min |
|
|
1437
|
+
* | L | 4 | 120 min |
|
|
1438
|
+
*/
|
|
1439
|
+
export const SIZE_DURATION_MINUTES: Record<string, number> = {
|
|
1440
|
+
S: 30,
|
|
1441
|
+
M: 60,
|
|
1442
|
+
L: 120,
|
|
1443
|
+
};
|
|
1444
|
+
export const DURATION_BASE_MINUTES = 30;
|
|
1445
|
+
|
|
1446
|
+
/**
|
|
1447
|
+
* Get estimated duration in minutes for a task size.
|
|
1448
|
+
* Uses explicit mapping, falling back to weight × base.
|
|
1449
|
+
*/
|
|
1450
|
+
export function getTaskDurationMinutes(
|
|
1451
|
+
size: string,
|
|
1452
|
+
sizeWeights: Record<string, number>,
|
|
1453
|
+
): number {
|
|
1454
|
+
if (SIZE_DURATION_MINUTES[size] !== undefined) {
|
|
1455
|
+
return SIZE_DURATION_MINUTES[size];
|
|
1456
|
+
}
|
|
1457
|
+
const weight = sizeWeights[size] || sizeWeights["M"] || 2;
|
|
1458
|
+
return weight * DURATION_BASE_MINUTES;
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
|
|
1462
|
+
// ── Batch History ────────────────────────────────────────────────────
|
|
1463
|
+
|
|
1464
|
+
/** Token counts for a task, wave, or batch. */
|
|
1465
|
+
export interface TokenCounts {
|
|
1466
|
+
input: number;
|
|
1467
|
+
output: number;
|
|
1468
|
+
cacheRead: number;
|
|
1469
|
+
cacheWrite: number;
|
|
1470
|
+
costUsd: number;
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
/** Per-task summary for history. */
|
|
1474
|
+
export interface BatchTaskSummary {
|
|
1475
|
+
taskId: string;
|
|
1476
|
+
taskName: string;
|
|
1477
|
+
status: "succeeded" | "failed" | "skipped" | "blocked" | "stalled";
|
|
1478
|
+
wave: number; // 1-based
|
|
1479
|
+
lane: number; // 1-based
|
|
1480
|
+
durationMs: number;
|
|
1481
|
+
tokens: TokenCounts;
|
|
1482
|
+
exitReason: string | null;
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
/** Per-wave summary for history. */
|
|
1486
|
+
export interface BatchWaveSummary {
|
|
1487
|
+
wave: number; // 1-based
|
|
1488
|
+
tasks: string[]; // task IDs
|
|
1489
|
+
mergeStatus: "succeeded" | "failed" | "partial" | "skipped";
|
|
1490
|
+
durationMs: number;
|
|
1491
|
+
tokens: TokenCounts;
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
/** Complete batch history entry — written after Phase 3 cleanup. */
|
|
1495
|
+
export interface BatchHistorySummary {
|
|
1496
|
+
batchId: string;
|
|
1497
|
+
status: "completed" | "partial" | "failed" | "aborted";
|
|
1498
|
+
startedAt: number;
|
|
1499
|
+
endedAt: number;
|
|
1500
|
+
durationMs: number;
|
|
1501
|
+
totalWaves: number;
|
|
1502
|
+
totalTasks: number;
|
|
1503
|
+
succeededTasks: number;
|
|
1504
|
+
failedTasks: number;
|
|
1505
|
+
skippedTasks: number;
|
|
1506
|
+
blockedTasks: number;
|
|
1507
|
+
tokens: TokenCounts;
|
|
1508
|
+
tasks: BatchTaskSummary[];
|
|
1509
|
+
waves: BatchWaveSummary[];
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
/** Max number of batch history entries to retain. */
|
|
1513
|
+
export const BATCH_HISTORY_MAX_ENTRIES = 100;
|
|
1514
|
+
|