@litmers/cursorflow-orchestrator 0.1.20 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/commands/cursorflow-clean.md +19 -0
  3. package/commands/cursorflow-runs.md +59 -0
  4. package/commands/cursorflow-stop.md +55 -0
  5. package/dist/cli/clean.js +171 -0
  6. package/dist/cli/clean.js.map +1 -1
  7. package/dist/cli/index.js +7 -0
  8. package/dist/cli/index.js.map +1 -1
  9. package/dist/cli/init.js +1 -1
  10. package/dist/cli/init.js.map +1 -1
  11. package/dist/cli/logs.js +83 -42
  12. package/dist/cli/logs.js.map +1 -1
  13. package/dist/cli/monitor.d.ts +7 -0
  14. package/dist/cli/monitor.js +1007 -189
  15. package/dist/cli/monitor.js.map +1 -1
  16. package/dist/cli/prepare.js +4 -3
  17. package/dist/cli/prepare.js.map +1 -1
  18. package/dist/cli/resume.js +188 -236
  19. package/dist/cli/resume.js.map +1 -1
  20. package/dist/cli/run.js +8 -3
  21. package/dist/cli/run.js.map +1 -1
  22. package/dist/cli/runs.d.ts +5 -0
  23. package/dist/cli/runs.js +214 -0
  24. package/dist/cli/runs.js.map +1 -0
  25. package/dist/cli/setup-commands.js +0 -0
  26. package/dist/cli/signal.js +1 -1
  27. package/dist/cli/signal.js.map +1 -1
  28. package/dist/cli/stop.d.ts +5 -0
  29. package/dist/cli/stop.js +215 -0
  30. package/dist/cli/stop.js.map +1 -0
  31. package/dist/cli/tasks.d.ts +10 -0
  32. package/dist/cli/tasks.js +165 -0
  33. package/dist/cli/tasks.js.map +1 -0
  34. package/dist/core/auto-recovery.d.ts +212 -0
  35. package/dist/core/auto-recovery.js +737 -0
  36. package/dist/core/auto-recovery.js.map +1 -0
  37. package/dist/core/failure-policy.d.ts +156 -0
  38. package/dist/core/failure-policy.js +488 -0
  39. package/dist/core/failure-policy.js.map +1 -0
  40. package/dist/core/orchestrator.d.ts +15 -2
  41. package/dist/core/orchestrator.js +392 -15
  42. package/dist/core/orchestrator.js.map +1 -1
  43. package/dist/core/reviewer.d.ts +2 -0
  44. package/dist/core/reviewer.js +2 -0
  45. package/dist/core/reviewer.js.map +1 -1
  46. package/dist/core/runner.d.ts +33 -10
  47. package/dist/core/runner.js +321 -146
  48. package/dist/core/runner.js.map +1 -1
  49. package/dist/services/logging/buffer.d.ts +67 -0
  50. package/dist/services/logging/buffer.js +309 -0
  51. package/dist/services/logging/buffer.js.map +1 -0
  52. package/dist/services/logging/console.d.ts +89 -0
  53. package/dist/services/logging/console.js +169 -0
  54. package/dist/services/logging/console.js.map +1 -0
  55. package/dist/services/logging/file-writer.d.ts +71 -0
  56. package/dist/services/logging/file-writer.js +516 -0
  57. package/dist/services/logging/file-writer.js.map +1 -0
  58. package/dist/services/logging/formatter.d.ts +39 -0
  59. package/dist/services/logging/formatter.js +227 -0
  60. package/dist/services/logging/formatter.js.map +1 -0
  61. package/dist/services/logging/index.d.ts +11 -0
  62. package/dist/services/logging/index.js +30 -0
  63. package/dist/services/logging/index.js.map +1 -0
  64. package/dist/services/logging/parser.d.ts +31 -0
  65. package/dist/services/logging/parser.js +222 -0
  66. package/dist/services/logging/parser.js.map +1 -0
  67. package/dist/services/process/index.d.ts +59 -0
  68. package/dist/services/process/index.js +257 -0
  69. package/dist/services/process/index.js.map +1 -0
  70. package/dist/types/agent.d.ts +20 -0
  71. package/dist/types/agent.js +6 -0
  72. package/dist/types/agent.js.map +1 -0
  73. package/dist/types/config.d.ts +65 -0
  74. package/dist/types/config.js +6 -0
  75. package/dist/types/config.js.map +1 -0
  76. package/dist/types/events.d.ts +125 -0
  77. package/dist/types/events.js +6 -0
  78. package/dist/types/events.js.map +1 -0
  79. package/dist/types/index.d.ts +12 -0
  80. package/dist/types/index.js +37 -0
  81. package/dist/types/index.js.map +1 -0
  82. package/dist/types/lane.d.ts +43 -0
  83. package/dist/types/lane.js +6 -0
  84. package/dist/types/lane.js.map +1 -0
  85. package/dist/types/logging.d.ts +71 -0
  86. package/dist/types/logging.js +16 -0
  87. package/dist/types/logging.js.map +1 -0
  88. package/dist/types/review.d.ts +17 -0
  89. package/dist/types/review.js +6 -0
  90. package/dist/types/review.js.map +1 -0
  91. package/dist/types/run.d.ts +32 -0
  92. package/dist/types/run.js +6 -0
  93. package/dist/types/run.js.map +1 -0
  94. package/dist/types/task.d.ts +71 -0
  95. package/dist/types/task.js +6 -0
  96. package/dist/types/task.js.map +1 -0
  97. package/dist/ui/components.d.ts +134 -0
  98. package/dist/ui/components.js +389 -0
  99. package/dist/ui/components.js.map +1 -0
  100. package/dist/ui/log-viewer.d.ts +49 -0
  101. package/dist/ui/log-viewer.js +449 -0
  102. package/dist/ui/log-viewer.js.map +1 -0
  103. package/dist/utils/checkpoint.d.ts +87 -0
  104. package/dist/utils/checkpoint.js +317 -0
  105. package/dist/utils/checkpoint.js.map +1 -0
  106. package/dist/utils/config.d.ts +4 -0
  107. package/dist/utils/config.js +11 -2
  108. package/dist/utils/config.js.map +1 -1
  109. package/dist/utils/cursor-agent.js.map +1 -1
  110. package/dist/utils/dependency.d.ts +74 -0
  111. package/dist/utils/dependency.js +420 -0
  112. package/dist/utils/dependency.js.map +1 -0
  113. package/dist/utils/doctor.js +10 -5
  114. package/dist/utils/doctor.js.map +1 -1
  115. package/dist/utils/enhanced-logger.d.ts +10 -33
  116. package/dist/utils/enhanced-logger.js +94 -9
  117. package/dist/utils/enhanced-logger.js.map +1 -1
  118. package/dist/utils/git.d.ts +121 -0
  119. package/dist/utils/git.js +322 -2
  120. package/dist/utils/git.js.map +1 -1
  121. package/dist/utils/health.d.ts +91 -0
  122. package/dist/utils/health.js +556 -0
  123. package/dist/utils/health.js.map +1 -0
  124. package/dist/utils/lock.d.ts +95 -0
  125. package/dist/utils/lock.js +332 -0
  126. package/dist/utils/lock.js.map +1 -0
  127. package/dist/utils/log-buffer.d.ts +17 -0
  128. package/dist/utils/log-buffer.js +14 -0
  129. package/dist/utils/log-buffer.js.map +1 -0
  130. package/dist/utils/log-constants.d.ts +23 -0
  131. package/dist/utils/log-constants.js +28 -0
  132. package/dist/utils/log-constants.js.map +1 -0
  133. package/dist/utils/log-formatter.d.ts +9 -0
  134. package/dist/utils/log-formatter.js +113 -70
  135. package/dist/utils/log-formatter.js.map +1 -1
  136. package/dist/utils/log-service.d.ts +19 -0
  137. package/dist/utils/log-service.js +47 -0
  138. package/dist/utils/log-service.js.map +1 -0
  139. package/dist/utils/logger.d.ts +46 -27
  140. package/dist/utils/logger.js +82 -60
  141. package/dist/utils/logger.js.map +1 -1
  142. package/dist/utils/process-manager.d.ts +21 -0
  143. package/dist/utils/process-manager.js +138 -0
  144. package/dist/utils/process-manager.js.map +1 -0
  145. package/dist/utils/retry.d.ts +121 -0
  146. package/dist/utils/retry.js +374 -0
  147. package/dist/utils/retry.js.map +1 -0
  148. package/dist/utils/run-service.d.ts +88 -0
  149. package/dist/utils/run-service.js +412 -0
  150. package/dist/utils/run-service.js.map +1 -0
  151. package/dist/utils/state.d.ts +58 -2
  152. package/dist/utils/state.js +306 -3
  153. package/dist/utils/state.js.map +1 -1
  154. package/dist/utils/task-service.d.ts +82 -0
  155. package/dist/utils/task-service.js +348 -0
  156. package/dist/utils/task-service.js.map +1 -0
  157. package/dist/utils/types.d.ts +2 -272
  158. package/dist/utils/types.js +16 -0
  159. package/dist/utils/types.js.map +1 -1
  160. package/package.json +38 -23
  161. package/scripts/ai-security-check.js +0 -1
  162. package/scripts/local-security-gate.sh +0 -0
  163. package/scripts/monitor-lanes.sh +94 -0
  164. package/scripts/patches/test-cursor-agent.js +0 -1
  165. package/scripts/release.sh +0 -0
  166. package/scripts/setup-security.sh +0 -0
  167. package/scripts/stream-logs.sh +72 -0
  168. package/scripts/verify-and-fix.sh +0 -0
  169. package/src/cli/clean.ts +180 -0
  170. package/src/cli/index.ts +7 -0
  171. package/src/cli/init.ts +1 -1
  172. package/src/cli/logs.ts +79 -42
  173. package/src/cli/monitor.ts +1815 -899
  174. package/src/cli/prepare.ts +4 -3
  175. package/src/cli/resume.ts +220 -277
  176. package/src/cli/run.ts +9 -3
  177. package/src/cli/runs.ts +212 -0
  178. package/src/cli/setup-commands.ts +0 -0
  179. package/src/cli/signal.ts +1 -1
  180. package/src/cli/stop.ts +209 -0
  181. package/src/cli/tasks.ts +154 -0
  182. package/src/core/auto-recovery.ts +909 -0
  183. package/src/core/failure-policy.ts +592 -0
  184. package/src/core/orchestrator.ts +1131 -675
  185. package/src/core/reviewer.ts +4 -0
  186. package/src/core/runner.ts +388 -162
  187. package/src/services/logging/buffer.ts +326 -0
  188. package/src/services/logging/console.ts +193 -0
  189. package/src/services/logging/file-writer.ts +526 -0
  190. package/src/services/logging/formatter.ts +268 -0
  191. package/src/services/logging/index.ts +16 -0
  192. package/src/services/logging/parser.ts +232 -0
  193. package/src/services/process/index.ts +261 -0
  194. package/src/types/agent.ts +24 -0
  195. package/src/types/config.ts +79 -0
  196. package/src/types/events.ts +156 -0
  197. package/src/types/index.ts +29 -0
  198. package/src/types/lane.ts +56 -0
  199. package/src/types/logging.ts +96 -0
  200. package/src/types/review.ts +20 -0
  201. package/src/types/run.ts +37 -0
  202. package/src/types/task.ts +79 -0
  203. package/src/ui/components.ts +430 -0
  204. package/src/ui/log-viewer.ts +485 -0
  205. package/src/utils/checkpoint.ts +374 -0
  206. package/src/utils/config.ts +11 -2
  207. package/src/utils/cursor-agent.ts +1 -1
  208. package/src/utils/dependency.ts +482 -0
  209. package/src/utils/doctor.ts +11 -5
  210. package/src/utils/enhanced-logger.ts +108 -49
  211. package/src/utils/git.ts +374 -2
  212. package/src/utils/health.ts +596 -0
  213. package/src/utils/lock.ts +346 -0
  214. package/src/utils/log-buffer.ts +28 -0
  215. package/src/utils/log-constants.ts +26 -0
  216. package/src/utils/log-formatter.ts +120 -37
  217. package/src/utils/log-service.ts +49 -0
  218. package/src/utils/logger.ts +100 -51
  219. package/src/utils/process-manager.ts +100 -0
  220. package/src/utils/retry.ts +413 -0
  221. package/src/utils/run-service.ts +433 -0
  222. package/src/utils/state.ts +369 -3
  223. package/src/utils/task-service.ts +370 -0
  224. package/src/utils/types.ts +2 -315
@@ -1,675 +1,1131 @@
1
- /**
2
- * Orchestrator - Parallel lane execution with dependency management
3
- *
4
- * Adapted from admin-domains-orchestrator.js
5
- */
6
-
7
- import * as fs from 'fs';
8
- import * as path from 'path';
9
- import { spawn, ChildProcess } from 'child_process';
10
-
11
- import * as logger from '../utils/logger';
12
- import { loadState, saveState, createLaneState } from '../utils/state';
13
- import { LaneState, RunnerConfig, WebhookConfig, DependencyRequestPlan, EnhancedLogConfig } from '../utils/types';
14
- import { events } from '../utils/events';
15
- import { registerWebhooks } from '../utils/webhook';
16
- import { loadConfig, getLogsDir } from '../utils/config';
17
- import * as git from '../utils/git';
18
- import { execSync } from 'child_process';
19
- import { safeJoin } from '../utils/path';
20
- import {
21
- EnhancedLogManager,
22
- createLogManager,
23
- DEFAULT_LOG_CONFIG,
24
- ParsedMessage,
25
- stripAnsi
26
- } from '../utils/enhanced-logger';
27
- import { formatMessageForConsole } from '../utils/log-formatter';
28
-
29
- export interface LaneInfo {
30
- name: string;
31
- path: string;
32
- dependsOn: string[];
33
- startIndex?: number; // Current task index to resume from
34
- }
35
-
36
- export interface SpawnLaneResult {
37
- child: ChildProcess;
38
- logPath: string;
39
- logManager?: EnhancedLogManager;
40
- }
41
-
42
- /**
43
- * Spawn a lane process
44
- */
45
- export function spawnLane({
46
- laneName,
47
- tasksFile,
48
- laneRunDir,
49
- executor,
50
- startIndex = 0,
51
- pipelineBranch,
52
- worktreeDir,
53
- enhancedLogConfig,
54
- noGit = false,
55
- }: {
56
- laneName: string;
57
- tasksFile: string;
58
- laneRunDir: string;
59
- executor: string;
60
- startIndex?: number;
61
- pipelineBranch?: string;
62
- worktreeDir?: string;
63
- enhancedLogConfig?: Partial<EnhancedLogConfig>;
64
- noGit?: boolean;
65
- }): SpawnLaneResult {
66
- fs.mkdirSync(laneRunDir, { recursive: true});
67
-
68
- // Use extension-less resolve to handle both .ts (dev) and .js (dist)
69
- const runnerPath = require.resolve('./runner');
70
-
71
- const args = [
72
- runnerPath,
73
- tasksFile,
74
- '--run-dir', laneRunDir,
75
- '--executor', executor,
76
- '--start-index', startIndex.toString(),
77
- ];
78
-
79
- if (pipelineBranch) {
80
- args.push('--pipeline-branch', pipelineBranch);
81
- }
82
-
83
- if (worktreeDir) {
84
- args.push('--worktree-dir', worktreeDir);
85
- }
86
-
87
- if (noGit) {
88
- args.push('--no-git');
89
- }
90
-
91
- // Create enhanced log manager if enabled
92
- const logConfig = { ...DEFAULT_LOG_CONFIG, ...enhancedLogConfig };
93
- let logManager: EnhancedLogManager | undefined;
94
- let logPath: string;
95
- let child: ChildProcess;
96
-
97
- // Build environment for child process
98
- const childEnv = {
99
- ...process.env,
100
- };
101
-
102
- if (logConfig.enabled) {
103
- // Create callback for clean console output
104
- const onParsedMessage = (msg: ParsedMessage) => {
105
- const formatted = formatMessageForConsole(msg, {
106
- laneLabel: `[${laneName}]`,
107
- includeTimestamp: true
108
- });
109
- process.stdout.write(formatted + '\n');
110
- };
111
-
112
- logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage);
113
- logPath = logManager.getLogPaths().clean;
114
-
115
- // Spawn with pipe for enhanced logging
116
- child = spawn('node', args, {
117
- stdio: ['ignore', 'pipe', 'pipe'],
118
- env: childEnv,
119
- detached: false,
120
- });
121
-
122
- // Buffer for non-JSON lines
123
- let lineBuffer = '';
124
-
125
- // Pipe stdout and stderr through enhanced logger
126
- if (child.stdout) {
127
- child.stdout.on('data', (data: Buffer) => {
128
- logManager!.writeStdout(data);
129
-
130
- // Filter out JSON lines from console output to keep it clean
131
- const str = data.toString();
132
- lineBuffer += str;
133
- const lines = lineBuffer.split('\n');
134
- lineBuffer = lines.pop() || '';
135
-
136
- for (const line of lines) {
137
- const trimmed = line.trim();
138
- // Only print if NOT a noisy line
139
- if (trimmed &&
140
- !trimmed.startsWith('{') &&
141
- !trimmed.startsWith('[') &&
142
- !trimmed.includes('{"type"')) {
143
- process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${logger.COLORS.magenta}${laneName.padEnd(10)}${logger.COLORS.reset} ${line}\n`);
144
- }
145
- }
146
- });
147
- }
148
-
149
- if (child.stderr) {
150
- child.stderr.on('data', (data: Buffer) => {
151
- logManager!.writeStderr(data);
152
- const str = data.toString();
153
- const lines = str.split('\n');
154
- for (const line of lines) {
155
- const trimmed = line.trim();
156
- if (trimmed) {
157
- // Check if it's a real error or just git/status output on stderr
158
- const isStatus = trimmed.startsWith('Preparing worktree') ||
159
- trimmed.startsWith('Switched to a new branch') ||
160
- trimmed.startsWith('HEAD is now at') ||
161
- trimmed.includes('actual output');
162
-
163
- if (isStatus) {
164
- process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${logger.COLORS.magenta}${laneName.padEnd(10)}${logger.COLORS.reset} ${trimmed}\n`);
165
- } else {
166
- process.stderr.write(`${logger.COLORS.red}[${laneName}] ERROR: ${trimmed}${logger.COLORS.reset}\n`);
167
- }
168
- }
169
- }
170
- });
171
- }
172
-
173
- // Close log manager when process exits
174
- child.on('exit', () => {
175
- logManager?.close();
176
- });
177
- } else {
178
- // Fallback to simple file logging
179
- logPath = safeJoin(laneRunDir, 'terminal.log');
180
- const logFd = fs.openSync(logPath, 'a');
181
-
182
- child = spawn('node', args, {
183
- stdio: ['ignore', logFd, logFd],
184
- env: childEnv,
185
- detached: false,
186
- });
187
-
188
- try {
189
- fs.closeSync(logFd);
190
- } catch {
191
- // Ignore
192
- }
193
- }
194
-
195
- return { child, logPath, logManager };
196
- }
197
-
198
- /**
199
- * Wait for child process to exit
200
- */
201
- export function waitChild(proc: ChildProcess): Promise<number> {
202
- return new Promise((resolve) => {
203
- if (proc.exitCode !== null) {
204
- resolve(proc.exitCode);
205
- return;
206
- }
207
-
208
- proc.once('exit', (code) => resolve(code ?? 1));
209
- proc.once('error', () => resolve(1));
210
- });
211
- }
212
-
213
- /**
214
- * List lane task files in directory and load their configs for dependencies
215
- */
216
- export function listLaneFiles(tasksDir: string): LaneInfo[] {
217
- if (!fs.existsSync(tasksDir)) {
218
- return [];
219
- }
220
-
221
- const files = fs.readdirSync(tasksDir);
222
- return files
223
- .filter(f => f.endsWith('.json'))
224
- .sort()
225
- .map(f => {
226
- const filePath = safeJoin(tasksDir, f);
227
- const name = path.basename(f, '.json');
228
- let dependsOn: string[] = [];
229
-
230
- try {
231
- const config = JSON.parse(fs.readFileSync(filePath, 'utf8')) as RunnerConfig;
232
- dependsOn = config.dependsOn || [];
233
- } catch (e) {
234
- logger.warn(`Failed to parse config for lane ${name}: ${e}`);
235
- }
236
-
237
- return {
238
- name,
239
- path: filePath,
240
- dependsOn,
241
- };
242
- });
243
- }
244
-
245
- /**
246
- * Monitor lane states
247
- */
248
- export function printLaneStatus(lanes: LaneInfo[], laneRunDirs: Record<string, string>): void {
249
- const rows = lanes.map(lane => {
250
- const dir = laneRunDirs[lane.name];
251
- if (!dir) return { lane: lane.name, status: '(unknown)', task: '-' };
252
-
253
- const statePath = safeJoin(dir, 'state.json');
254
- const state = loadState<LaneState>(statePath);
255
-
256
- if (!state) {
257
- const isWaiting = lane.dependsOn.length > 0;
258
- return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
259
- }
260
-
261
- const idx = (state.currentTaskIndex || 0) + 1;
262
- return {
263
- lane: lane.name,
264
- status: state.status || 'unknown',
265
- task: `${idx}/${state.totalTasks || '?'}`,
266
- };
267
- });
268
-
269
- logger.section('📡 Lane Status');
270
- for (const r of rows) {
271
- console.log(`- ${r.lane}: ${r.status} (${r.task})`);
272
- }
273
- }
274
-
275
- /**
276
- * Resolve dependencies for all blocked lanes and sync with all active lanes
277
- */
278
- async function resolveAllDependencies(
279
- blockedLanes: Map<string, DependencyRequestPlan>,
280
- allLanes: LaneInfo[],
281
- laneRunDirs: Record<string, string>,
282
- pipelineBranch: string,
283
- runRoot: string
284
- ) {
285
- // 1. Collect all unique changes and commands from blocked lanes
286
- const allChanges: string[] = [];
287
- const allCommands: string[] = [];
288
-
289
- for (const [, plan] of blockedLanes) {
290
- if (plan.changes) allChanges.push(...plan.changes);
291
- if (plan.commands) allCommands.push(...plan.commands);
292
- }
293
-
294
- const uniqueChanges = Array.from(new Set(allChanges));
295
- const uniqueCommands = Array.from(new Set(allCommands));
296
-
297
- if (uniqueCommands.length === 0) return;
298
-
299
- // 2. Setup a temporary worktree for resolution if needed, or use the first available one
300
- const firstLaneName = Array.from(blockedLanes.keys())[0]!;
301
- const statePath = safeJoin(laneRunDirs[firstLaneName]!, 'state.json');
302
- const state = loadState<LaneState>(statePath);
303
- const worktreeDir = state?.worktreeDir || safeJoin(runRoot, 'resolution-worktree');
304
-
305
- if (!fs.existsSync(worktreeDir)) {
306
- logger.info(`Creating resolution worktree at ${worktreeDir}`);
307
- git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: 'main' });
308
- }
309
-
310
- // 3. Resolve on pipeline branch
311
- logger.info(`Resolving dependencies on ${pipelineBranch}`);
312
- git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
313
-
314
- for (const cmd of uniqueCommands) {
315
- logger.info(`Running: ${cmd}`);
316
- try {
317
- execSync(cmd, { cwd: worktreeDir, stdio: 'inherit' });
318
- } catch (e: any) {
319
- throw new Error(`Command failed: ${cmd}. ${e.message}`);
320
- }
321
- }
322
-
323
- try {
324
- git.runGit(['add', '.'], { cwd: worktreeDir });
325
- git.runGit(['commit', '-m', `chore: auto-resolve dependencies\n\n${uniqueChanges.join('\n')}`], { cwd: worktreeDir });
326
-
327
- // Log changed files
328
- const stats = git.getLastOperationStats(worktreeDir);
329
- if (stats) {
330
- logger.info('Changed files:\n' + stats);
331
- }
332
-
333
- git.push(pipelineBranch, { cwd: worktreeDir });
334
- } catch (e) { /* ignore if nothing to commit */ }
335
-
336
- // 4. Sync ALL active lanes (blocked + pending + running)
337
- // Since we only call this when running.size === 0, "active" means not completed/failed
338
- for (const lane of allLanes) {
339
- const laneDir = laneRunDirs[lane.name];
340
- if (!laneDir) continue;
341
-
342
- const laneState = loadState<LaneState>(safeJoin(laneDir, 'state.json'));
343
- if (!laneState || laneState.status === 'completed' || laneState.status === 'failed') continue;
344
-
345
- // Merge pipelineBranch into the lane's current task branch
346
- const currentIdx = laneState.currentTaskIndex;
347
- const taskConfig = JSON.parse(fs.readFileSync(lane.path, 'utf8')) as RunnerConfig;
348
- const task = taskConfig.tasks[currentIdx];
349
-
350
- if (task) {
351
- const lanePipelineBranch = `${pipelineBranch}/${lane.name}`;
352
- const taskBranch = `${lanePipelineBranch}--${String(currentIdx + 1).padStart(2, '0')}-${task.name}`;
353
- logger.info(`Syncing lane ${lane.name} branch ${taskBranch}`);
354
-
355
- try {
356
- // If task branch doesn't exist yet, it will be created from pipelineBranch when the lane starts
357
- if (git.branchExists(taskBranch, { cwd: worktreeDir })) {
358
- git.runGit(['checkout', taskBranch], { cwd: worktreeDir });
359
- git.runGit(['merge', pipelineBranch, '--no-edit'], { cwd: worktreeDir });
360
-
361
- // Log changed files
362
- const stats = git.getLastOperationStats(worktreeDir);
363
- if (stats) {
364
- logger.info(`Sync results for ${lane.name}:\n` + stats);
365
- }
366
-
367
- git.push(taskBranch, { cwd: worktreeDir });
368
- }
369
- } catch (e: any) {
370
- logger.warn(`Failed to sync branch ${taskBranch}: ${e.message}`);
371
- }
372
- }
373
- }
374
-
375
- git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
376
- }
377
-
378
- /**
379
- * Run orchestration with dependency management
380
- */
381
- export async function orchestrate(tasksDir: string, options: {
382
- runDir?: string;
383
- executor?: string;
384
- pollInterval?: number;
385
- maxConcurrentLanes?: number;
386
- webhooks?: WebhookConfig[];
387
- autoResolveDependencies?: boolean;
388
- enhancedLogging?: Partial<EnhancedLogConfig>;
389
- noGit?: boolean;
390
- } = {}): Promise<{ lanes: LaneInfo[]; exitCodes: Record<string, number>; runRoot: string }> {
391
- const lanes = listLaneFiles(tasksDir);
392
-
393
- if (lanes.length === 0) {
394
- throw new Error(`No lane task files found in ${tasksDir}`);
395
- }
396
-
397
- const config = loadConfig();
398
- const logsDir = getLogsDir(config);
399
- const runId = `run-${Date.now()}`;
400
- // Use absolute path for runRoot to avoid issues with subfolders
401
- const runRoot = options.runDir
402
- ? (path.isAbsolute(options.runDir) ? options.runDir : path.resolve(process.cwd(), options.runDir)) // nosemgrep
403
- : safeJoin(logsDir, 'runs', runId);
404
-
405
- fs.mkdirSync(runRoot, { recursive: true });
406
-
407
- const randomSuffix = Math.random().toString(36).substring(2, 7);
408
- const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
409
-
410
- // Initialize event system
411
- events.setRunId(runId);
412
- if (options.webhooks) {
413
- registerWebhooks(options.webhooks);
414
- }
415
-
416
- events.emit('orchestration.started', {
417
- runId,
418
- tasksDir,
419
- laneCount: lanes.length,
420
- runRoot,
421
- });
422
-
423
- const maxConcurrent = options.maxConcurrentLanes || 10;
424
- const running: Map<string, { child: ChildProcess; logPath: string }> = new Map();
425
- const exitCodes: Record<string, number> = {};
426
- const completedLanes = new Set<string>();
427
- const failedLanes = new Set<string>();
428
- const blockedLanes: Map<string, DependencyRequestPlan> = new Map();
429
-
430
- // Track start index for each lane (initially 0)
431
- for (const lane of lanes) {
432
- lane.startIndex = 0;
433
- }
434
-
435
- const laneRunDirs: Record<string, string> = {};
436
- const laneWorktreeDirs: Record<string, string> = {};
437
- const repoRoot = git.getRepoRoot();
438
-
439
- for (const lane of lanes) {
440
- laneRunDirs[lane.name] = safeJoin(runRoot, 'lanes', lane.name);
441
- fs.mkdirSync(laneRunDirs[lane.name]!, { recursive: true });
442
-
443
- // Create initial state for ALL lanes so resume can find them even if they didn't start
444
- try {
445
- const taskConfig = JSON.parse(fs.readFileSync(lane.path, 'utf8')) as RunnerConfig;
446
-
447
- // Calculate unique branch and worktree for this lane
448
- const lanePipelineBranch = `${pipelineBranch}/${lane.name}`;
449
-
450
- // Use a flat worktree directory name to avoid race conditions in parent directory creation
451
- // repoRoot/_cursorflow/worktrees/cursorflow-run-xxx-lane-name
452
- const laneWorktreeDir = safeJoin(
453
- repoRoot,
454
- taskConfig.worktreeRoot || '_cursorflow/worktrees',
455
- lanePipelineBranch.replace(/\//g, '-')
456
- );
457
-
458
- // Ensure the parent directory exists before spawning the runner
459
- // to avoid race conditions in git worktree add or fs operations
460
- const worktreeParent = path.dirname(laneWorktreeDir);
461
- if (!fs.existsSync(worktreeParent)) {
462
- fs.mkdirSync(worktreeParent, { recursive: true });
463
- }
464
-
465
- laneWorktreeDirs[lane.name] = laneWorktreeDir;
466
-
467
- const initialState = createLaneState(lane.name, taskConfig, lane.path, {
468
- pipelineBranch: lanePipelineBranch,
469
- worktreeDir: laneWorktreeDir
470
- });
471
- saveState(safeJoin(laneRunDirs[lane.name]!, 'state.json'), initialState);
472
- } catch (e) {
473
- logger.warn(`Failed to create initial state for lane ${lane.name}: ${e}`);
474
- }
475
- }
476
-
477
- logger.section('🧭 Starting Orchestration');
478
- logger.info(`Tasks directory: ${tasksDir}`);
479
- logger.info(`Run directory: ${runRoot}`);
480
- logger.info(`Lanes: ${lanes.length}`);
481
-
482
- // Display dependency graph
483
- logger.info('\n📊 Dependency Graph:');
484
- for (const lane of lanes) {
485
- const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
486
- console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
487
-
488
- // Simple tree-like visualization for deep dependencies
489
- if (lane.dependsOn.length > 0) {
490
- for (const dep of lane.dependsOn) {
491
- console.log(` └─ ${dep}`);
492
- }
493
- }
494
- }
495
- console.log('');
496
-
497
- // Disable auto-resolve when noGit mode is enabled
498
- const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
499
-
500
- if (options.noGit) {
501
- logger.info('🚫 Git operations disabled (--no-git mode)');
502
- }
503
-
504
- // Monitor lanes
505
- const monitorInterval = setInterval(() => {
506
- printLaneStatus(lanes, laneRunDirs);
507
- }, options.pollInterval || 60000);
508
-
509
- while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
510
- // 1. Identify lanes ready to start
511
- const readyToStart = lanes.filter(lane => {
512
- // Not already running or completed or failed or blocked
513
- if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
514
- return false;
515
- }
516
-
517
- // Check dependencies
518
- for (const dep of lane.dependsOn) {
519
- if (failedLanes.has(dep)) {
520
- logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
521
- failedLanes.add(lane.name);
522
- exitCodes[lane.name] = 1;
523
- return false;
524
- }
525
- if (blockedLanes.has(dep)) {
526
- // If a dependency is blocked, wait
527
- return false;
528
- }
529
- if (!completedLanes.has(dep)) {
530
- return false;
531
- }
532
- }
533
- return true;
534
- });
535
-
536
- // 2. Spawn ready lanes up to maxConcurrent
537
- for (const lane of readyToStart) {
538
- if (running.size >= maxConcurrent) break;
539
-
540
- logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
541
- const spawnResult = spawnLane({
542
- laneName: lane.name,
543
- tasksFile: lane.path,
544
- laneRunDir: laneRunDirs[lane.name]!,
545
- executor: options.executor || 'cursor-agent',
546
- startIndex: lane.startIndex,
547
- pipelineBranch: `${pipelineBranch}/${lane.name}`,
548
- worktreeDir: laneWorktreeDirs[lane.name],
549
- enhancedLogConfig: options.enhancedLogging,
550
- noGit: options.noGit,
551
- });
552
-
553
- running.set(lane.name, spawnResult);
554
- events.emit('lane.started', {
555
- laneName: lane.name,
556
- pid: spawnResult.child.pid,
557
- logPath: spawnResult.logPath,
558
- });
559
- }
560
-
561
- // 3. Wait for any running lane to finish
562
- if (running.size > 0) {
563
- const promises = Array.from(running.entries()).map(async ([name, { child }]) => {
564
- const code = await waitChild(child);
565
- return { name, code };
566
- });
567
-
568
- const finished = await Promise.race(promises);
569
-
570
- running.delete(finished.name);
571
- exitCodes[finished.name] = finished.code;
572
-
573
- if (finished.code === 0) {
574
- completedLanes.add(finished.name);
575
- events.emit('lane.completed', {
576
- laneName: finished.name,
577
- exitCode: finished.code,
578
- });
579
- } else if (finished.code === 2) {
580
- // Blocked by dependency
581
- const statePath = safeJoin(laneRunDirs[finished.name]!, 'state.json');
582
- const state = loadState<LaneState>(statePath);
583
-
584
- if (state && state.dependencyRequest) {
585
- blockedLanes.set(finished.name, state.dependencyRequest);
586
- const lane = lanes.find(l => l.name === finished.name);
587
- if (lane) {
588
- lane.startIndex = Math.max(0, state.currentTaskIndex - 1); // Task was blocked, retry it
589
- }
590
-
591
- events.emit('lane.blocked', {
592
- laneName: finished.name,
593
- dependencyRequest: state.dependencyRequest,
594
- });
595
- logger.warn(`Lane ${finished.name} is blocked on dependency change request`);
596
- } else {
597
- failedLanes.add(finished.name);
598
- logger.error(`Lane ${finished.name} exited with code 2 but no dependency request found`);
599
- }
600
- } else {
601
- failedLanes.add(finished.name);
602
- events.emit('lane.failed', {
603
- laneName: finished.name,
604
- exitCode: finished.code,
605
- error: 'Process exited with non-zero code',
606
- });
607
- }
608
-
609
- printLaneStatus(lanes, laneRunDirs);
610
- } else {
611
- // Nothing running. Are we blocked?
612
- if (blockedLanes.size > 0 && autoResolve) {
613
- logger.section('🛠 Auto-Resolving Dependencies');
614
-
615
- try {
616
- await resolveAllDependencies(blockedLanes, lanes, laneRunDirs, pipelineBranch, runRoot);
617
-
618
- // Clear blocked status
619
- blockedLanes.clear();
620
- logger.success('Dependencies resolved and synced across all active lanes. Resuming...');
621
- } catch (error: any) {
622
- logger.error(`Auto-resolution failed: ${error.message}`);
623
- // Move blocked to failed
624
- for (const name of blockedLanes.keys()) {
625
- failedLanes.add(name);
626
- }
627
- blockedLanes.clear();
628
- }
629
- } else if (readyToStart.length === 0 && completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length) {
630
- const remaining = lanes.filter(l => !completedLanes.has(l.name) && !failedLanes.has(l.name) && !blockedLanes.has(l.name));
631
- logger.error(`Deadlock detected! Remaining lanes cannot start: ${remaining.map(l => l.name).join(', ')}`);
632
- for (const l of remaining) {
633
- failedLanes.add(l.name);
634
- exitCodes[l.name] = 1;
635
- }
636
- } else {
637
- // All finished
638
- break;
639
- }
640
- }
641
- }
642
-
643
- clearInterval(monitorInterval);
644
- printLaneStatus(lanes, laneRunDirs);
645
-
646
- // Check for failures
647
- const failed = Object.entries(exitCodes).filter(([, code]) => code !== 0 && code !== 2);
648
-
649
- if (failed.length > 0) {
650
- logger.error(`Lanes failed: ${failed.map(([l, c]) => `${l}(${c})`).join(', ')}`);
651
- process.exit(1);
652
- }
653
-
654
- // Check for blocked lanes (if autoResolve was false)
655
- const blocked = Array.from(blockedLanes.keys());
656
-
657
- if (blocked.length > 0) {
658
- logger.warn(`Lanes blocked on dependency: ${blocked.join(', ')}`);
659
- logger.info('Handle dependency changes manually and resume lanes');
660
- events.emit('orchestration.failed', {
661
- error: 'Some lanes blocked on dependency change requests',
662
- blockedLanes: blocked,
663
- });
664
- process.exit(2);
665
- }
666
-
667
- logger.success('All lanes completed successfully!');
668
- events.emit('orchestration.completed', {
669
- runId,
670
- laneCount: lanes.length,
671
- completedCount: completedLanes.size,
672
- failedCount: failedLanes.size,
673
- });
674
- return { lanes, exitCodes, runRoot };
675
- }
1
+ /**
2
+ * Orchestrator - Parallel lane execution with dependency management
3
+ *
4
+ * Features:
5
+ * - Multi-layer stall detection
6
+ * - Cyclic dependency detection
7
+ * - Enhanced recovery strategies
8
+ * - Health checks before start
9
+ */
10
+
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { spawn, ChildProcess } from 'child_process';
14
+
15
+ import * as logger from '../utils/logger';
16
+ import { loadState, saveState, createLaneState, validateLaneState } from '../utils/state';
17
+ import { LaneState, RunnerConfig, WebhookConfig, DependencyRequestPlan, EnhancedLogConfig } from '../utils/types';
18
+ import { events } from '../utils/events';
19
+ import { registerWebhooks } from '../utils/webhook';
20
+ import { loadConfig, getLogsDir } from '../utils/config';
21
+ import * as git from '../utils/git';
22
+ import { execSync } from 'child_process';
23
+ import { safeJoin } from '../utils/path';
24
+ import {
25
+ EnhancedLogManager,
26
+ createLogManager,
27
+ DEFAULT_LOG_CONFIG,
28
+ ParsedMessage
29
+ } from '../utils/enhanced-logger';
30
+ import { formatMessageForConsole } from '../utils/log-formatter';
31
+ import { analyzeStall, RecoveryAction, logFailure, DEFAULT_STALL_CONFIG, StallDetectionConfig, FailureType } from './failure-policy';
32
+ import {
33
+ getAutoRecoveryManager,
34
+ DEFAULT_AUTO_RECOVERY_CONFIG,
35
+ AutoRecoveryConfig,
36
+ savePOF,
37
+ createPOFFromRecoveryState,
38
+ getGitPushFailureGuidance,
39
+ getMergeConflictGuidance,
40
+ getGitErrorGuidance,
41
+ } from './auto-recovery';
42
+ import { detectCyclicDependencies, validateDependencies, printDependencyGraph, DependencyInfo } from '../utils/dependency';
43
+ import { preflightCheck, printPreflightReport, autoRepair } from '../utils/health';
44
+ import { getLatestCheckpoint } from '../utils/checkpoint';
45
+ import { cleanStaleLocks, getLockDir } from '../utils/lock';
46
+
47
+ /** Default stall detection configuration - 1 minute idle timeout for fast recovery */
48
+ const DEFAULT_ORCHESTRATOR_STALL_CONFIG: StallDetectionConfig = {
49
+ ...DEFAULT_STALL_CONFIG,
50
+ idleTimeoutMs: 60 * 1000, // 1 minute (quick detection for continue signal)
51
+ progressTimeoutMs: 10 * 60 * 1000, // 10 minutes
52
+ maxRestarts: 2,
53
+ };
54
+
55
+ export interface LaneInfo {
56
+ name: string;
57
+ path: string;
58
+ dependsOn: string[];
59
+ startIndex?: number; // Current task index to resume from
60
+ restartCount?: number; // Number of times restarted due to stall
61
+ lastStateUpdate?: number; // Timestamp of last state file update
62
+ taskStartTime?: number; // When current task started
63
+ }
64
+
65
+ export interface SpawnLaneResult {
66
+ child: ChildProcess;
67
+ logPath: string;
68
+ logManager?: EnhancedLogManager;
69
+ }
70
+
71
+ /**
72
+ * Lane execution tracking info
73
+ */
74
+ interface RunningLaneInfo {
75
+ child: ChildProcess;
76
+ logPath: string;
77
+ logManager?: EnhancedLogManager;
78
+ lastActivity: number;
79
+ lastStateUpdate: number;
80
+ stallPhase: number; // 0: normal, 1: continued, 2: stronger_prompt, 3: restarted
81
+ taskStartTime: number;
82
+ lastOutput: string;
83
+ statePath: string;
84
+ bytesReceived: number; // Total bytes received from agent
85
+ lastBytesCheck: number; // Bytes at last check (for delta calculation)
86
+ continueSignalsSent: number; // Number of continue signals sent
87
+ }
88
+
89
+ /**
90
+ * Spawn a lane process
91
+ */
92
+ export function spawnLane({
93
+ laneName,
94
+ tasksFile,
95
+ laneRunDir,
96
+ executor,
97
+ startIndex = 0,
98
+ pipelineBranch,
99
+ worktreeDir,
100
+ enhancedLogConfig,
101
+ noGit = false,
102
+ onActivity,
103
+ }: {
104
+ laneName: string;
105
+ tasksFile: string;
106
+ laneRunDir: string;
107
+ executor: string;
108
+ startIndex?: number;
109
+ pipelineBranch?: string;
110
+ worktreeDir?: string;
111
+ enhancedLogConfig?: Partial<EnhancedLogConfig>;
112
+ noGit?: boolean;
113
+ onActivity?: () => void;
114
+ }): SpawnLaneResult {
115
+ fs.mkdirSync(laneRunDir, { recursive: true});
116
+
117
+ // Use extension-less resolve to handle both .ts (dev) and .js (dist)
118
+ const runnerPath = require.resolve('./runner');
119
+
120
+ const args = [
121
+ runnerPath,
122
+ tasksFile,
123
+ '--run-dir', laneRunDir,
124
+ '--executor', executor,
125
+ '--start-index', startIndex.toString(),
126
+ ];
127
+
128
+ if (pipelineBranch) {
129
+ args.push('--pipeline-branch', pipelineBranch);
130
+ }
131
+
132
+ if (worktreeDir) {
133
+ args.push('--worktree-dir', worktreeDir);
134
+ }
135
+
136
+ if (noGit) {
137
+ args.push('--no-git');
138
+ }
139
+
140
+ // Create enhanced log manager if enabled
141
+ const logConfig = { ...DEFAULT_LOG_CONFIG, ...enhancedLogConfig };
142
+ let logManager: EnhancedLogManager | undefined;
143
+ let logPath: string;
144
+ let child: ChildProcess;
145
+
146
+ // Build environment for child process
147
+ const childEnv = {
148
+ ...process.env,
149
+ };
150
+
151
+ if (logConfig.enabled) {
152
+ // Create callback for clean console output
153
+ const onParsedMessage = (msg: ParsedMessage) => {
154
+ if (onActivity) onActivity();
155
+ const formatted = formatMessageForConsole(msg, {
156
+ laneLabel: `[${laneName}]`,
157
+ includeTimestamp: true
158
+ });
159
+ process.stdout.write(formatted + '\n');
160
+ };
161
+
162
+ logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage);
163
+ logPath = logManager.getLogPaths().clean;
164
+
165
+ // Spawn with pipe for enhanced logging
166
+ child = spawn('node', args, {
167
+ stdio: ['ignore', 'pipe', 'pipe'],
168
+ env: childEnv,
169
+ detached: false,
170
+ });
171
+
172
+ // Buffer for non-JSON lines
173
+ let lineBuffer = '';
174
+
175
+ // Pipe stdout and stderr through enhanced logger
176
+ if (child.stdout) {
177
+ child.stdout.on('data', (data: Buffer) => {
178
+ logManager!.writeStdout(data);
179
+
180
+ // Filter out JSON lines from console output to keep it clean
181
+ const str = data.toString();
182
+ lineBuffer += str;
183
+ const lines = lineBuffer.split('\n');
184
+ lineBuffer = lines.pop() || '';
185
+
186
+ for (const line of lines) {
187
+ const trimmed = line.trim();
188
+ // Show if it's a timestamped log line (starts with [YYYY-MM-DD... or [HH:MM:SS])
189
+ // or if it's NOT a noisy JSON line
190
+ const hasTimestamp = /^\[\d{4}-\d{2}-\d{2}T|\^\[\d{2}:\d{2}:\d{2}\]/.test(trimmed);
191
+ const isJson = trimmed.startsWith('{') || trimmed.includes('{"type"');
192
+
193
+ if (trimmed && !isJson) {
194
+ if (onActivity) onActivity();
195
+ // If line already has timestamp format, just add lane prefix
196
+ if (hasTimestamp) {
197
+ // Insert lane name after first timestamp
198
+ const formatted = trimmed.replace(/^(\[[^\]]+\])/, `$1 ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset}`);
199
+ process.stdout.write(formatted + '\n');
200
+ } else {
201
+ // Add full prefix: timestamp + lane
202
+ process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${line}\n`);
203
+ }
204
+ }
205
+ }
206
+ });
207
+ }
208
+
209
+ if (child.stderr) {
210
+ child.stderr.on('data', (data: Buffer) => {
211
+ logManager!.writeStderr(data);
212
+ const str = data.toString();
213
+ const lines = str.split('\n');
214
+ for (const line of lines) {
215
+ const trimmed = line.trim();
216
+ if (trimmed) {
217
+ // Check if it's a real error or just git/status output on stderr
218
+ const isStatus = trimmed.startsWith('Preparing worktree') ||
219
+ trimmed.startsWith('Switched to a new branch') ||
220
+ trimmed.startsWith('HEAD is now at') ||
221
+ trimmed.includes('actual output');
222
+
223
+ const ts = new Date().toLocaleTimeString('en-US', { hour12: false });
224
+ if (isStatus) {
225
+ process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${trimmed}\n`);
226
+ } else {
227
+ if (onActivity) onActivity();
228
+ process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
229
+ }
230
+ }
231
+ }
232
+ });
233
+ }
234
+
235
+ // Close log manager when process exits
236
+ child.on('exit', () => {
237
+ logManager?.close();
238
+ });
239
+ } else {
240
+ // Fallback to simple file logging
241
+ logPath = safeJoin(laneRunDir, 'terminal.log');
242
+ const logFd = fs.openSync(logPath, 'a');
243
+
244
+ child = spawn('node', args, {
245
+ stdio: ['ignore', logFd, logFd],
246
+ env: childEnv,
247
+ detached: false,
248
+ });
249
+
250
+ try {
251
+ fs.closeSync(logFd);
252
+ } catch {
253
+ // Ignore
254
+ }
255
+ }
256
+
257
+ return { child, logPath, logManager };
258
+ }
259
+
260
+ /**
261
+ * Wait for child process to exit
262
+ */
263
+ export function waitChild(proc: ChildProcess): Promise<number> {
264
+ return new Promise((resolve) => {
265
+ if (proc.exitCode !== null) {
266
+ resolve(proc.exitCode);
267
+ return;
268
+ }
269
+
270
+ proc.once('exit', (code) => resolve(code ?? 1));
271
+ proc.once('error', () => resolve(1));
272
+ });
273
+ }
274
+
275
+ /**
276
+ * List lane task files in directory and load their configs for dependencies
277
+ */
278
+ export function listLaneFiles(tasksDir: string): LaneInfo[] {
279
+ if (!fs.existsSync(tasksDir)) {
280
+ return [];
281
+ }
282
+
283
+ const files = fs.readdirSync(tasksDir);
284
+ return files
285
+ .filter(f => f.endsWith('.json'))
286
+ .sort()
287
+ .map(f => {
288
+ const filePath = safeJoin(tasksDir, f);
289
+ const name = path.basename(f, '.json');
290
+ let dependsOn: string[] = [];
291
+
292
+ try {
293
+ const config = JSON.parse(fs.readFileSync(filePath, 'utf8')) as RunnerConfig;
294
+ dependsOn = config.dependsOn || [];
295
+ } catch (e) {
296
+ logger.warn(`Failed to parse config for lane ${name}: ${e}`);
297
+ }
298
+
299
+ return {
300
+ name,
301
+ path: filePath,
302
+ dependsOn,
303
+ };
304
+ });
305
+ }
306
+
307
+ /**
308
+ * Monitor lane states
309
+ */
310
+ export function printLaneStatus(lanes: LaneInfo[], laneRunDirs: Record<string, string>): void {
311
+ const rows = lanes.map(lane => {
312
+ const dir = laneRunDirs[lane.name];
313
+ if (!dir) return { lane: lane.name, status: '(unknown)', task: '-' };
314
+
315
+ const statePath = safeJoin(dir, 'state.json');
316
+ const state = loadState<LaneState>(statePath);
317
+
318
+ if (!state) {
319
+ const isWaiting = lane.dependsOn.length > 0;
320
+ return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
321
+ }
322
+
323
+ const idx = (state.currentTaskIndex || 0) + 1;
324
+ return {
325
+ lane: lane.name,
326
+ status: state.status || 'unknown',
327
+ task: `${idx}/${state.totalTasks || '?'}`,
328
+ };
329
+ });
330
+
331
+ logger.section('📡 Lane Status');
332
+ for (const r of rows) {
333
+ console.log(`- ${r.lane}: ${r.status} (${r.task})`);
334
+ }
335
+ }
336
+
337
+ /**
338
+ * Resolve dependencies for all blocked lanes and sync with all active lanes
339
+ */
340
+ async function resolveAllDependencies(
341
+ blockedLanes: Map<string, DependencyRequestPlan>,
342
+ allLanes: LaneInfo[],
343
+ laneRunDirs: Record<string, string>,
344
+ pipelineBranch: string,
345
+ runRoot: string
346
+ ) {
347
+ // 1. Collect all unique changes and commands from blocked lanes
348
+ const allChanges: string[] = [];
349
+ const allCommands: string[] = [];
350
+
351
+ for (const [, plan] of blockedLanes) {
352
+ if (plan.changes) allChanges.push(...plan.changes);
353
+ if (plan.commands) allCommands.push(...plan.commands);
354
+ }
355
+
356
+ const uniqueChanges = Array.from(new Set(allChanges));
357
+ const uniqueCommands = Array.from(new Set(allCommands));
358
+
359
+ if (uniqueCommands.length === 0) return;
360
+
361
+ // 2. Setup a temporary worktree for resolution if needed, or use the first available one
362
+ const firstLaneName = Array.from(blockedLanes.keys())[0]!;
363
+ const statePath = safeJoin(laneRunDirs[firstLaneName]!, 'state.json');
364
+ const state = loadState<LaneState>(statePath);
365
+ const worktreeDir = state?.worktreeDir || safeJoin(runRoot, 'resolution-worktree');
366
+
367
+ if (!fs.existsSync(worktreeDir)) {
368
+ logger.info(`Creating resolution worktree at ${worktreeDir}`);
369
+ git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: git.getCurrentBranch() });
370
+ }
371
+
372
+ // 3. Resolve on pipeline branch
373
+ logger.info(`Resolving dependencies on ${pipelineBranch}`);
374
+ git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
375
+
376
+ for (const cmd of uniqueCommands) {
377
+ logger.info(`Running: ${cmd}`);
378
+ try {
379
+ execSync(cmd, { cwd: worktreeDir, stdio: 'inherit' });
380
+ } catch (e: any) {
381
+ throw new Error(`Command failed: ${cmd}. ${e.message}`);
382
+ }
383
+ }
384
+
385
+ try {
386
+ git.runGit(['add', '.'], { cwd: worktreeDir });
387
+ git.runGit(['commit', '-m', `chore: auto-resolve dependencies\n\n${uniqueChanges.join('\n')}`], { cwd: worktreeDir });
388
+
389
+ // Log changed files
390
+ const stats = git.getLastOperationStats(worktreeDir);
391
+ if (stats) {
392
+ logger.info('Changed files:\n' + stats);
393
+ }
394
+
395
+ git.push(pipelineBranch, { cwd: worktreeDir });
396
+ } catch (e) { /* ignore if nothing to commit */ }
397
+
398
+ // 4. Sync ALL active lanes (blocked + pending + running)
399
+ // Since we only call this when running.size === 0, "active" means not completed/failed
400
+ for (const lane of allLanes) {
401
+ const laneDir = laneRunDirs[lane.name];
402
+ if (!laneDir) continue;
403
+
404
+ const laneState = loadState<LaneState>(safeJoin(laneDir, 'state.json'));
405
+ if (!laneState || laneState.status === 'completed' || laneState.status === 'failed') continue;
406
+
407
+ // Merge pipelineBranch into the lane's current task branch
408
+ const currentIdx = laneState.currentTaskIndex;
409
+ const taskConfig = JSON.parse(fs.readFileSync(lane.path, 'utf8')) as RunnerConfig;
410
+ const task = taskConfig.tasks[currentIdx];
411
+
412
+ if (task) {
413
+ const lanePipelineBranch = `${pipelineBranch}/${lane.name}`;
414
+ const taskBranch = `${lanePipelineBranch}--${String(currentIdx + 1).padStart(2, '0')}-${task.name}`;
415
+ logger.info(`Syncing lane ${lane.name} branch ${taskBranch}`);
416
+
417
+ try {
418
+ // If task branch doesn't exist yet, it will be created from pipelineBranch when the lane starts
419
+ if (git.branchExists(taskBranch, { cwd: worktreeDir })) {
420
+ git.runGit(['checkout', taskBranch], { cwd: worktreeDir });
421
+ git.runGit(['merge', pipelineBranch, '--no-edit'], { cwd: worktreeDir });
422
+
423
+ // Log changed files
424
+ const stats = git.getLastOperationStats(worktreeDir);
425
+ if (stats) {
426
+ logger.info(`Sync results for ${lane.name}:\n` + stats);
427
+ }
428
+
429
+ git.push(taskBranch, { cwd: worktreeDir });
430
+ }
431
+ } catch (e: any) {
432
+ logger.warn(`Failed to sync branch ${taskBranch}: ${e.message}`);
433
+ }
434
+ }
435
+ }
436
+
437
+ git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
438
+ }
439
+
440
+ /**
441
+ * Run orchestration with dependency management
442
+ */
443
+ export async function orchestrate(tasksDir: string, options: {
444
+ runDir?: string;
445
+ executor?: string;
446
+ pollInterval?: number;
447
+ maxConcurrentLanes?: number;
448
+ webhooks?: WebhookConfig[];
449
+ autoResolveDependencies?: boolean;
450
+ enhancedLogging?: Partial<EnhancedLogConfig>;
451
+ noGit?: boolean;
452
+ skipPreflight?: boolean;
453
+ stallConfig?: Partial<StallDetectionConfig>;
454
+ autoRecoveryConfig?: Partial<AutoRecoveryConfig>;
455
+ } = {}): Promise<{ lanes: LaneInfo[]; exitCodes: Record<string, number>; runRoot: string }> {
456
+ const lanes = listLaneFiles(tasksDir);
457
+
458
+ if (lanes.length === 0) {
459
+ throw new Error(`No lane task files found in ${tasksDir}`);
460
+ }
461
+
462
+ // Run preflight checks
463
+ if (!options.skipPreflight) {
464
+ logger.section('🔍 Preflight Checks');
465
+
466
+ const preflight = await preflightCheck({
467
+ requireRemote: !options.noGit,
468
+ requireAuth: true,
469
+ });
470
+
471
+ if (!preflight.canProceed) {
472
+ printPreflightReport(preflight);
473
+ throw new Error('Preflight check failed. Please fix the blockers above.');
474
+ }
475
+
476
+ // Auto-repair if there are warnings
477
+ if (preflight.warnings.length > 0) {
478
+ logger.info('Attempting auto-repair...');
479
+ const repair = await autoRepair();
480
+ if (repair.repaired.length > 0) {
481
+ for (const r of repair.repaired) {
482
+ logger.success(`✓ ${r}`);
483
+ }
484
+ }
485
+ }
486
+
487
+ logger.success('✓ Preflight checks passed');
488
+ }
489
+
490
+ // Validate dependencies and detect cycles
491
+ logger.section('📊 Dependency Analysis');
492
+
493
+ const depInfos: DependencyInfo[] = lanes.map(l => ({
494
+ name: l.name,
495
+ dependsOn: l.dependsOn,
496
+ }));
497
+
498
+ const depValidation = validateDependencies(depInfos);
499
+
500
+ if (!depValidation.valid) {
501
+ logger.error(' Dependency validation failed:');
502
+ for (const err of depValidation.errors) {
503
+ logger.error(` • ${err}`);
504
+ }
505
+ throw new Error('Invalid dependency configuration');
506
+ }
507
+
508
+ if (depValidation.warnings.length > 0) {
509
+ for (const warn of depValidation.warnings) {
510
+ logger.warn(`⚠️ ${warn}`);
511
+ }
512
+ }
513
+
514
+ // Print dependency graph
515
+ printDependencyGraph(depInfos);
516
+
517
+ const config = loadConfig();
518
+ const logsDir = getLogsDir(config);
519
+ const runId = `run-${Date.now()}`;
520
+ // Use absolute path for runRoot to avoid issues with subfolders
521
+ const runRoot = options.runDir
522
+ ? (path.isAbsolute(options.runDir) ? options.runDir : path.resolve(process.cwd(), options.runDir)) // nosemgrep
523
+ : safeJoin(logsDir, 'runs', runId);
524
+
525
+ fs.mkdirSync(runRoot, { recursive: true });
526
+
527
+ // Clean stale locks before starting
528
+ try {
529
+ const lockDir = getLockDir(git.getRepoRoot());
530
+ const cleaned = cleanStaleLocks(lockDir);
531
+ if (cleaned > 0) {
532
+ logger.info(`Cleaned ${cleaned} stale lock(s)`);
533
+ }
534
+ } catch {
535
+ // Ignore lock cleanup errors
536
+ }
537
+
538
+ const randomSuffix = Math.random().toString(36).substring(2, 7);
539
+ const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
540
+
541
+ // Stall detection configuration
542
+ const stallConfig: StallDetectionConfig = {
543
+ ...DEFAULT_ORCHESTRATOR_STALL_CONFIG,
544
+ ...options.stallConfig,
545
+ };
546
+
547
+ // Initialize auto-recovery manager
548
+ const autoRecoveryManager = getAutoRecoveryManager({
549
+ ...DEFAULT_AUTO_RECOVERY_CONFIG,
550
+ idleTimeoutMs: stallConfig.idleTimeoutMs, // Sync with stall config
551
+ ...options.autoRecoveryConfig,
552
+ });
553
+
554
+ // Initialize event system
555
+ events.setRunId(runId);
556
+ if (options.webhooks) {
557
+ registerWebhooks(options.webhooks);
558
+ }
559
+
560
+ events.emit('orchestration.started', {
561
+ runId,
562
+ tasksDir,
563
+ laneCount: lanes.length,
564
+ runRoot,
565
+ });
566
+
567
+ const maxConcurrent = options.maxConcurrentLanes || 10;
568
+ const running: Map<string, RunningLaneInfo> = new Map();
569
+ const exitCodes: Record<string, number> = {};
570
+ const completedLanes = new Set<string>();
571
+ const failedLanes = new Set<string>();
572
+ const blockedLanes: Map<string, DependencyRequestPlan> = new Map();
573
+
574
+ // Track start index for each lane (initially 0)
575
+ for (const lane of lanes) {
576
+ lane.startIndex = 0;
577
+ lane.restartCount = 0;
578
+ }
579
+
580
+ const laneRunDirs: Record<string, string> = {};
581
+ const laneWorktreeDirs: Record<string, string> = {};
582
+ const repoRoot = git.getRepoRoot();
583
+
584
+ for (const lane of lanes) {
585
+ laneRunDirs[lane.name] = safeJoin(runRoot, 'lanes', lane.name);
586
+ fs.mkdirSync(laneRunDirs[lane.name]!, { recursive: true });
587
+
588
+ // Create initial state for ALL lanes so resume can find them even if they didn't start
589
+ try {
590
+ const taskConfig = JSON.parse(fs.readFileSync(lane.path, 'utf8')) as RunnerConfig;
591
+
592
+ // Calculate unique branch and worktree for this lane
593
+ const lanePipelineBranch = `${pipelineBranch}/${lane.name}`;
594
+
595
+ // Use a flat worktree directory name to avoid race conditions in parent directory creation
596
+ // repoRoot/_cursorflow/worktrees/cursorflow-run-xxx-lane-name
597
+ const laneWorktreeDir = safeJoin(
598
+ repoRoot,
599
+ taskConfig.worktreeRoot || '_cursorflow/worktrees',
600
+ lanePipelineBranch.replace(/\//g, '-')
601
+ );
602
+
603
+ // Ensure the parent directory exists before spawning the runner
604
+ // to avoid race conditions in git worktree add or fs operations
605
+ const worktreeParent = path.dirname(laneWorktreeDir);
606
+ if (!fs.existsSync(worktreeParent)) {
607
+ fs.mkdirSync(worktreeParent, { recursive: true });
608
+ }
609
+
610
+ laneWorktreeDirs[lane.name] = laneWorktreeDir;
611
+
612
+ const initialState = createLaneState(lane.name, taskConfig, lane.path, {
613
+ pipelineBranch: lanePipelineBranch,
614
+ worktreeDir: laneWorktreeDir
615
+ });
616
+ saveState(safeJoin(laneRunDirs[lane.name]!, 'state.json'), initialState);
617
+ } catch (e) {
618
+ logger.warn(`Failed to create initial state for lane ${lane.name}: ${e}`);
619
+ }
620
+ }
621
+
622
+ logger.section('🧭 Starting Orchestration');
623
+ logger.info(`Tasks directory: ${tasksDir}`);
624
+ logger.info(`Run directory: ${runRoot}`);
625
+ logger.info(`Lanes: ${lanes.length}`);
626
+
627
+ // Display dependency graph
628
+ logger.info('\n📊 Dependency Graph:');
629
+ for (const lane of lanes) {
630
+ const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
631
+ console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
632
+
633
+ // Simple tree-like visualization for deep dependencies
634
+ if (lane.dependsOn.length > 0) {
635
+ for (const dep of lane.dependsOn) {
636
+ console.log(` └─ ${dep}`);
637
+ }
638
+ }
639
+ }
640
+ console.log('');
641
+
642
+ // Disable auto-resolve when noGit mode is enabled
643
+ const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
644
+
645
+ if (options.noGit) {
646
+ logger.info('🚫 Git operations disabled (--no-git mode)');
647
+ }
648
+
649
+ // Monitor lanes
650
+ const monitorInterval = setInterval(() => {
651
+ printLaneStatus(lanes, laneRunDirs);
652
+ }, options.pollInterval || 60000);
653
+
654
+ while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
655
+ // 1. Identify lanes ready to start
656
+ const readyToStart = lanes.filter(lane => {
657
+ // Not already running or completed or failed or blocked
658
+ if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
659
+ return false;
660
+ }
661
+
662
+ // Check dependencies
663
+ for (const dep of lane.dependsOn) {
664
+ if (failedLanes.has(dep)) {
665
+ logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
666
+ failedLanes.add(lane.name);
667
+ exitCodes[lane.name] = 1;
668
+ return false;
669
+ }
670
+ if (blockedLanes.has(dep)) {
671
+ // If a dependency is blocked, wait
672
+ return false;
673
+ }
674
+ if (!completedLanes.has(dep)) {
675
+ return false;
676
+ }
677
+ }
678
+ return true;
679
+ });
680
+
681
+ // 2. Spawn ready lanes up to maxConcurrent
682
+ for (const lane of readyToStart) {
683
+ if (running.size >= maxConcurrent) break;
684
+
685
+ const laneStatePath = safeJoin(laneRunDirs[lane.name]!, 'state.json');
686
+
687
+ // Validate and repair state before starting
688
+ const validation = validateLaneState(laneStatePath, { autoRepair: true });
689
+ if (!validation.valid && !validation.repaired) {
690
+ logger.warn(`[${lane.name}] State validation issues: ${validation.issues.join(', ')}`);
691
+ }
692
+
693
+ logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
694
+
695
+ let lastOutput = '';
696
+ const spawnResult = spawnLane({
697
+ laneName: lane.name,
698
+ tasksFile: lane.path,
699
+ laneRunDir: laneRunDirs[lane.name]!,
700
+ executor: options.executor || 'cursor-agent',
701
+ startIndex: lane.startIndex,
702
+ pipelineBranch: `${pipelineBranch}/${lane.name}`,
703
+ worktreeDir: laneWorktreeDirs[lane.name],
704
+ enhancedLogConfig: options.enhancedLogging,
705
+ noGit: options.noGit,
706
+ onActivity: () => {
707
+ const info = running.get(lane.name);
708
+ if (info) {
709
+ info.lastActivity = Date.now();
710
+ }
711
+ }
712
+ });
713
+
714
+ // Track last output and bytes received for long operation and stall detection
715
+ if (spawnResult.child.stdout) {
716
+ spawnResult.child.stdout.on('data', (data: Buffer) => {
717
+ const info = running.get(lane.name);
718
+ if (info) {
719
+ info.lastOutput = data.toString().trim().split('\n').pop() || '';
720
+ info.bytesReceived += data.length;
721
+
722
+ // Update auto-recovery manager
723
+ autoRecoveryManager.recordActivity(lane.name, data.length, info.lastOutput);
724
+ }
725
+ });
726
+ }
727
+
728
+ const now = Date.now();
729
+ running.set(lane.name, {
730
+ ...spawnResult,
731
+ lastActivity: now,
732
+ lastStateUpdate: now,
733
+ stallPhase: 0,
734
+ taskStartTime: now,
735
+ lastOutput: '',
736
+ statePath: laneStatePath,
737
+ bytesReceived: 0,
738
+ lastBytesCheck: 0,
739
+ continueSignalsSent: 0,
740
+ });
741
+
742
+ // Register lane with auto-recovery manager
743
+ autoRecoveryManager.registerLane(lane.name);
744
+
745
+ // Update lane tracking
746
+ lane.taskStartTime = now;
747
+
748
+ events.emit('lane.started', {
749
+ laneName: lane.name,
750
+ pid: spawnResult.child.pid,
751
+ logPath: spawnResult.logPath,
752
+ });
753
+ }
754
+
755
+ // 3. Wait for any running lane to finish OR check for stalls
756
+ if (running.size > 0) {
757
+ // Polling timeout for stall detection
758
+ let pollTimeout: NodeJS.Timeout | undefined;
759
+ const pollPromise = new Promise<{ name: string; code: number }>(resolve => {
760
+ pollTimeout = setTimeout(() => resolve({ name: '__poll__', code: 0 }), 10000);
761
+ });
762
+
763
+ const promises = Array.from(running.entries()).map(async ([name, { child }]) => {
764
+ const code = await waitChild(child);
765
+ return { name, code };
766
+ });
767
+
768
+ const result = await Promise.race([...promises, pollPromise]);
769
+ if (pollTimeout) clearTimeout(pollTimeout);
770
+
771
+ if (result.name === '__poll__') {
772
+ // Periodic stall check with multi-layer detection and escalating recovery
773
+ for (const [laneName, info] of running.entries()) {
774
+ const now = Date.now();
775
+ const idleTime = now - info.lastActivity;
776
+ const lane = lanes.find(l => l.name === laneName)!;
777
+
778
+ // Check state file for progress updates
779
+ let progressTime = 0;
780
+ try {
781
+ const stateStat = fs.statSync(info.statePath);
782
+ const stateUpdateTime = stateStat.mtimeMs;
783
+ if (stateUpdateTime > info.lastStateUpdate) {
784
+ info.lastStateUpdate = stateUpdateTime;
785
+ }
786
+ progressTime = now - info.lastStateUpdate;
787
+ } catch {
788
+ // State file might not exist yet
789
+ }
790
+
791
+ // Calculate bytes received since last check
792
+ const bytesDelta = info.bytesReceived - info.lastBytesCheck;
793
+ info.lastBytesCheck = info.bytesReceived;
794
+
795
+ // Use multi-layer stall analysis with enhanced context
796
+ const analysis = analyzeStall({
797
+ stallPhase: info.stallPhase,
798
+ idleTimeMs: idleTime,
799
+ progressTimeMs: progressTime,
800
+ lastOutput: info.lastOutput,
801
+ restartCount: lane.restartCount || 0,
802
+ taskStartTimeMs: info.taskStartTime,
803
+ bytesReceived: bytesDelta, // Bytes since last check
804
+ continueSignalsSent: info.continueSignalsSent,
805
+ }, stallConfig);
806
+
807
+ // Only act if action is not NONE
808
+ if (analysis.action !== RecoveryAction.NONE) {
809
+ logFailure(laneName, analysis);
810
+ info.logManager?.log('error', analysis.message);
811
+
812
+ if (analysis.action === RecoveryAction.CONTINUE_SIGNAL) {
813
+ const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
814
+ try {
815
+ fs.writeFileSync(interventionPath, 'continue');
816
+ info.stallPhase = 1;
817
+ info.lastActivity = now;
818
+ info.continueSignalsSent++;
819
+ logger.info(`[${laneName}] Sent continue signal (#${info.continueSignalsSent})`);
820
+
821
+ events.emit('recovery.continue_signal', {
822
+ laneName,
823
+ idleSeconds: Math.round(idleTime / 1000),
824
+ signalCount: info.continueSignalsSent,
825
+ });
826
+ } catch (e) {
827
+ logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
828
+ }
829
+ } else if (analysis.action === RecoveryAction.STRONGER_PROMPT) {
830
+ const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
831
+ const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck. Please continue with your current task immediately. If you're waiting for something, explain what you need and proceed with what you can do now. If you've completed the task, summarize your work and finish.`;
832
+ try {
833
+ fs.writeFileSync(interventionPath, strongerPrompt);
834
+ info.stallPhase = 2;
835
+ info.lastActivity = now;
836
+ logger.warn(`[${laneName}] Sent stronger prompt after continue signal failed`);
837
+
838
+ events.emit('recovery.stronger_prompt', { laneName });
839
+ } catch (e) {
840
+ logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
841
+ }
842
+ } else if (analysis.action === RecoveryAction.KILL_AND_RESTART ||
843
+ analysis.action === RecoveryAction.RESTART_LANE ||
844
+ analysis.action === RecoveryAction.RESTART_LANE_FROM_CHECKPOINT) {
845
+ lane.restartCount = (lane.restartCount || 0) + 1;
846
+ info.stallPhase = 3;
847
+
848
+ // Try to get checkpoint info
849
+ const checkpoint = getLatestCheckpoint(laneRunDirs[laneName]!);
850
+ if (checkpoint) {
851
+ logger.info(`[${laneName}] Checkpoint available: ${checkpoint.id} (task ${checkpoint.taskIndex})`);
852
+ }
853
+
854
+ // Kill the process
855
+ try {
856
+ info.child.kill('SIGKILL');
857
+ } catch {
858
+ // Process might already be dead
859
+ }
860
+
861
+ logger.warn(`[${laneName}] Killing and restarting lane (restart #${lane.restartCount})`);
862
+
863
+ events.emit('recovery.restart', {
864
+ laneName,
865
+ restartCount: lane.restartCount,
866
+ maxRestarts: stallConfig.maxRestarts,
867
+ });
868
+ } else if (analysis.action === RecoveryAction.RUN_DOCTOR) {
869
+ info.stallPhase = 4;
870
+
871
+ // Run diagnostics
872
+ logger.error(`[${laneName}] Running diagnostics due to persistent failures...`);
873
+
874
+ // Import health check dynamically to avoid circular dependency
875
+ const { checkAgentHealth, checkAuthHealth } = await import('../utils/health');
876
+
877
+ const [agentHealth, authHealth] = await Promise.all([
878
+ checkAgentHealth(),
879
+ checkAuthHealth(),
880
+ ]);
881
+
882
+ const issues: string[] = [];
883
+ if (!agentHealth.ok) issues.push(`Agent: ${agentHealth.message}`);
884
+ if (!authHealth.ok) issues.push(`Auth: ${authHealth.message}`);
885
+
886
+ if (issues.length > 0) {
887
+ logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
888
+ } else {
889
+ logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
890
+ }
891
+
892
+ // Save diagnostic to file
893
+ const diagnosticPath = safeJoin(laneRunDirs[laneName]!, 'diagnostic.json');
894
+ fs.writeFileSync(diagnosticPath, JSON.stringify({
895
+ timestamp: Date.now(),
896
+ agentHealthy: agentHealth.ok,
897
+ authHealthy: authHealth.ok,
898
+ issues,
899
+ analysis,
900
+ }, null, 2));
901
+
902
+ // Kill the process
903
+ try {
904
+ info.child.kill('SIGKILL');
905
+ } catch {
906
+ // Process might already be dead
907
+ }
908
+
909
+ logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
910
+
911
+ // Save POF for failed recovery
912
+ const recoveryState = autoRecoveryManager.getState(laneName);
913
+ if (recoveryState) {
914
+ try {
915
+ const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
916
+ const laneState = loadState<LaneState>(laneStatePath);
917
+ const pofDir = safeJoin(runRoot, '..', '..', 'pof');
918
+ const diagnosticInfo = {
919
+ timestamp: Date.now(),
920
+ agentHealthy: agentHealth.ok,
921
+ authHealthy: authHealth.ok,
922
+ systemHealthy: true,
923
+ suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
924
+ details: issues.join('\n') || 'No obvious issues found',
925
+ };
926
+ const pofEntry = createPOFFromRecoveryState(
927
+ runId,
928
+ runRoot,
929
+ laneName,
930
+ recoveryState,
931
+ laneState,
932
+ diagnosticInfo
933
+ );
934
+ savePOF(runId, pofDir, pofEntry);
935
+ } catch (pofError: any) {
936
+ logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
937
+ }
938
+ }
939
+
940
+ events.emit('recovery.diagnosed', {
941
+ laneName,
942
+ diagnostic: { agentHealthy: agentHealth.ok, authHealthy: authHealth.ok, issues },
943
+ });
944
+ } else if (analysis.action === RecoveryAction.ABORT_LANE) {
945
+ info.stallPhase = 5;
946
+
947
+ try {
948
+ info.child.kill('SIGKILL');
949
+ } catch {
950
+ // Process might already be dead
951
+ }
952
+
953
+ logger.error(`[${laneName}] Aborting lane due to repeated stalls`);
954
+
955
+ // Save POF for failed recovery
956
+ const recoveryState = autoRecoveryManager.getState(laneName);
957
+ if (recoveryState) {
958
+ try {
959
+ const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
960
+ const laneState = loadState<LaneState>(laneStatePath);
961
+ const pofDir = safeJoin(runRoot, '..', '..', 'pof');
962
+ const pofEntry = createPOFFromRecoveryState(
963
+ runId,
964
+ runRoot,
965
+ laneName,
966
+ recoveryState,
967
+ laneState,
968
+ recoveryState.diagnosticInfo
969
+ );
970
+ savePOF(runId, pofDir, pofEntry);
971
+ } catch (pofError: any) {
972
+ logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
973
+ }
974
+ }
975
+ } else if (analysis.action === RecoveryAction.SEND_GIT_GUIDANCE) {
976
+ // Send guidance message to agent for git issues
977
+ const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
978
+
979
+ // Determine which guidance to send based on the failure type
980
+ let guidance: string;
981
+ if (analysis.type === FailureType.GIT_PUSH_REJECTED) {
982
+ guidance = getGitPushFailureGuidance();
983
+ } else if (analysis.type === FailureType.MERGE_CONFLICT) {
984
+ guidance = getMergeConflictGuidance();
985
+ } else {
986
+ guidance = getGitErrorGuidance(analysis.message);
987
+ }
988
+
989
+ try {
990
+ fs.writeFileSync(interventionPath, guidance);
991
+ info.lastActivity = now;
992
+ logger.info(`[${laneName}] Sent git issue guidance to agent`);
993
+ } catch (e: any) {
994
+ logger.error(`[${laneName}] Failed to send guidance: ${e.message}`);
995
+ }
996
+ }
997
+ }
998
+ }
999
+ continue;
1000
+ }
1001
+
1002
+ const finished = result;
1003
+ const info = running.get(finished.name)!;
1004
+ running.delete(finished.name);
1005
+ exitCodes[finished.name] = finished.code;
1006
+
1007
+ // Unregister from auto-recovery manager
1008
+ autoRecoveryManager.unregisterLane(finished.name);
1009
+
1010
+ if (finished.code === 0) {
1011
+ completedLanes.add(finished.name);
1012
+ events.emit('lane.completed', {
1013
+ laneName: finished.name,
1014
+ exitCode: finished.code,
1015
+ });
1016
+ } else if (finished.code === 2) {
1017
+ // Blocked by dependency
1018
+ const statePath = safeJoin(laneRunDirs[finished.name]!, 'state.json');
1019
+ const state = loadState<LaneState>(statePath);
1020
+
1021
+ if (state && state.dependencyRequest) {
1022
+ blockedLanes.set(finished.name, state.dependencyRequest);
1023
+ const lane = lanes.find(l => l.name === finished.name);
1024
+ if (lane) {
1025
+ lane.startIndex = Math.max(0, state.currentTaskIndex - 1); // Task was blocked, retry it
1026
+ }
1027
+
1028
+ events.emit('lane.blocked', {
1029
+ laneName: finished.name,
1030
+ dependencyRequest: state.dependencyRequest,
1031
+ });
1032
+ logger.warn(`Lane ${finished.name} is blocked on dependency change request`);
1033
+ } else {
1034
+ failedLanes.add(finished.name);
1035
+ logger.error(`Lane ${finished.name} exited with code 2 but no dependency request found`);
1036
+ }
1037
+ } else {
1038
+ // Check if it was a restart request
1039
+ if (info.stallPhase === 2) {
1040
+ logger.info(`🔄 Lane ${finished.name} is being restarted due to stall...`);
1041
+
1042
+ // Update startIndex from current state to resume from the same task
1043
+ const statePath = safeJoin(laneRunDirs[finished.name]!, 'state.json');
1044
+ const state = loadState<LaneState>(statePath);
1045
+ if (state) {
1046
+ const lane = lanes.find(l => l.name === finished.name);
1047
+ if (lane) {
1048
+ lane.startIndex = state.currentTaskIndex;
1049
+ }
1050
+ }
1051
+
1052
+ // Note: we don't add to failedLanes or completedLanes,
1053
+ // so it will be eligible to start again in the next iteration.
1054
+ continue;
1055
+ }
1056
+
1057
+ failedLanes.add(finished.name);
1058
+ events.emit('lane.failed', {
1059
+ laneName: finished.name,
1060
+ exitCode: finished.code,
1061
+ error: info.stallPhase === 3 ? 'Stopped due to repeated stall' : 'Process exited with non-zero code',
1062
+ });
1063
+ }
1064
+
1065
+ printLaneStatus(lanes, laneRunDirs);
1066
+ } else {
1067
+ // Nothing running. Are we blocked?
1068
+ if (blockedLanes.size > 0 && autoResolve) {
1069
+ logger.section('🛠 Auto-Resolving Dependencies');
1070
+
1071
+ try {
1072
+ await resolveAllDependencies(blockedLanes, lanes, laneRunDirs, pipelineBranch, runRoot);
1073
+
1074
+ // Clear blocked status
1075
+ blockedLanes.clear();
1076
+ logger.success('Dependencies resolved and synced across all active lanes. Resuming...');
1077
+ } catch (error: any) {
1078
+ logger.error(`Auto-resolution failed: ${error.message}`);
1079
+ // Move blocked to failed
1080
+ for (const name of blockedLanes.keys()) {
1081
+ failedLanes.add(name);
1082
+ }
1083
+ blockedLanes.clear();
1084
+ }
1085
+ } else if (readyToStart.length === 0 && completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length) {
1086
+ const remaining = lanes.filter(l => !completedLanes.has(l.name) && !failedLanes.has(l.name) && !blockedLanes.has(l.name));
1087
+ logger.error(`Deadlock detected! Remaining lanes cannot start: ${remaining.map(l => l.name).join(', ')}`);
1088
+ for (const l of remaining) {
1089
+ failedLanes.add(l.name);
1090
+ exitCodes[l.name] = 1;
1091
+ }
1092
+ } else {
1093
+ // All finished
1094
+ break;
1095
+ }
1096
+ }
1097
+ }
1098
+
1099
+ clearInterval(monitorInterval);
1100
+ printLaneStatus(lanes, laneRunDirs);
1101
+
1102
+ // Check for failures
1103
+ const failed = Object.entries(exitCodes).filter(([, code]) => code !== 0 && code !== 2);
1104
+
1105
+ if (failed.length > 0) {
1106
+ logger.error(`Lanes failed: ${failed.map(([l, c]) => `${l}(${c})`).join(', ')}`);
1107
+ process.exit(1);
1108
+ }
1109
+
1110
+ // Check for blocked lanes (if autoResolve was false)
1111
+ const blocked = Array.from(blockedLanes.keys());
1112
+
1113
+ if (blocked.length > 0) {
1114
+ logger.warn(`Lanes blocked on dependency: ${blocked.join(', ')}`);
1115
+ logger.info('Handle dependency changes manually and resume lanes');
1116
+ events.emit('orchestration.failed', {
1117
+ error: 'Some lanes blocked on dependency change requests',
1118
+ blockedLanes: blocked,
1119
+ });
1120
+ process.exit(2);
1121
+ }
1122
+
1123
+ logger.success('All lanes completed successfully!');
1124
+ events.emit('orchestration.completed', {
1125
+ runId,
1126
+ laneCount: lanes.length,
1127
+ completedCount: completedLanes.size,
1128
+ failedCount: failedLanes.size,
1129
+ });
1130
+ return { lanes, exitCodes, runRoot };
1131
+ }