@litmers/cursorflow-orchestrator 0.1.31 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +144 -52
  2. package/commands/cursorflow-add.md +159 -0
  3. package/commands/cursorflow-monitor.md +23 -2
  4. package/commands/cursorflow-new.md +87 -0
  5. package/dist/cli/add.d.ts +7 -0
  6. package/dist/cli/add.js +377 -0
  7. package/dist/cli/add.js.map +1 -0
  8. package/dist/cli/clean.js +1 -0
  9. package/dist/cli/clean.js.map +1 -1
  10. package/dist/cli/config.d.ts +7 -0
  11. package/dist/cli/config.js +181 -0
  12. package/dist/cli/config.js.map +1 -0
  13. package/dist/cli/index.js +34 -30
  14. package/dist/cli/index.js.map +1 -1
  15. package/dist/cli/logs.js +7 -33
  16. package/dist/cli/logs.js.map +1 -1
  17. package/dist/cli/monitor.js +51 -62
  18. package/dist/cli/monitor.js.map +1 -1
  19. package/dist/cli/new.d.ts +7 -0
  20. package/dist/cli/new.js +232 -0
  21. package/dist/cli/new.js.map +1 -0
  22. package/dist/cli/prepare.js +95 -193
  23. package/dist/cli/prepare.js.map +1 -1
  24. package/dist/cli/resume.js +11 -47
  25. package/dist/cli/resume.js.map +1 -1
  26. package/dist/cli/run.js +27 -22
  27. package/dist/cli/run.js.map +1 -1
  28. package/dist/cli/tasks.js +1 -2
  29. package/dist/cli/tasks.js.map +1 -1
  30. package/dist/core/failure-policy.d.ts +9 -0
  31. package/dist/core/failure-policy.js +9 -0
  32. package/dist/core/failure-policy.js.map +1 -1
  33. package/dist/core/orchestrator.d.ts +20 -6
  34. package/dist/core/orchestrator.js +213 -333
  35. package/dist/core/orchestrator.js.map +1 -1
  36. package/dist/core/runner/agent.d.ts +27 -0
  37. package/dist/core/runner/agent.js +294 -0
  38. package/dist/core/runner/agent.js.map +1 -0
  39. package/dist/core/runner/index.d.ts +5 -0
  40. package/dist/core/runner/index.js +22 -0
  41. package/dist/core/runner/index.js.map +1 -0
  42. package/dist/core/runner/pipeline.d.ts +9 -0
  43. package/dist/core/runner/pipeline.js +539 -0
  44. package/dist/core/runner/pipeline.js.map +1 -0
  45. package/dist/core/runner/prompt.d.ts +25 -0
  46. package/dist/core/runner/prompt.js +175 -0
  47. package/dist/core/runner/prompt.js.map +1 -0
  48. package/dist/core/runner/task.d.ts +26 -0
  49. package/dist/core/runner/task.js +283 -0
  50. package/dist/core/runner/task.js.map +1 -0
  51. package/dist/core/runner/utils.d.ts +37 -0
  52. package/dist/core/runner/utils.js +161 -0
  53. package/dist/core/runner/utils.js.map +1 -0
  54. package/dist/core/runner.d.ts +2 -96
  55. package/dist/core/runner.js +11 -1136
  56. package/dist/core/runner.js.map +1 -1
  57. package/dist/core/stall-detection.d.ts +326 -0
  58. package/dist/core/stall-detection.js +781 -0
  59. package/dist/core/stall-detection.js.map +1 -0
  60. package/dist/types/config.d.ts +6 -6
  61. package/dist/types/flow.d.ts +84 -0
  62. package/dist/types/flow.js +10 -0
  63. package/dist/types/flow.js.map +1 -0
  64. package/dist/types/index.d.ts +1 -0
  65. package/dist/types/index.js +3 -3
  66. package/dist/types/index.js.map +1 -1
  67. package/dist/types/lane.d.ts +0 -2
  68. package/dist/types/logging.d.ts +5 -1
  69. package/dist/types/task.d.ts +7 -11
  70. package/dist/utils/config.js +7 -15
  71. package/dist/utils/config.js.map +1 -1
  72. package/dist/utils/dependency.d.ts +36 -1
  73. package/dist/utils/dependency.js +256 -1
  74. package/dist/utils/dependency.js.map +1 -1
  75. package/dist/utils/enhanced-logger.d.ts +45 -82
  76. package/dist/utils/enhanced-logger.js +238 -844
  77. package/dist/utils/enhanced-logger.js.map +1 -1
  78. package/dist/utils/git.d.ts +29 -0
  79. package/dist/utils/git.js +115 -5
  80. package/dist/utils/git.js.map +1 -1
  81. package/dist/utils/state.js +0 -2
  82. package/dist/utils/state.js.map +1 -1
  83. package/dist/utils/task-service.d.ts +2 -2
  84. package/dist/utils/task-service.js +40 -31
  85. package/dist/utils/task-service.js.map +1 -1
  86. package/package.json +4 -3
  87. package/src/cli/add.ts +397 -0
  88. package/src/cli/clean.ts +1 -0
  89. package/src/cli/config.ts +177 -0
  90. package/src/cli/index.ts +36 -32
  91. package/src/cli/logs.ts +7 -31
  92. package/src/cli/monitor.ts +55 -71
  93. package/src/cli/new.ts +235 -0
  94. package/src/cli/prepare.ts +98 -205
  95. package/src/cli/resume.ts +13 -56
  96. package/src/cli/run.ts +311 -306
  97. package/src/cli/tasks.ts +1 -2
  98. package/src/core/failure-policy.ts +9 -0
  99. package/src/core/orchestrator.ts +277 -378
  100. package/src/core/runner/agent.ts +314 -0
  101. package/src/core/runner/index.ts +6 -0
  102. package/src/core/runner/pipeline.ts +567 -0
  103. package/src/core/runner/prompt.ts +174 -0
  104. package/src/core/runner/task.ts +320 -0
  105. package/src/core/runner/utils.ts +142 -0
  106. package/src/core/runner.ts +8 -1347
  107. package/src/core/stall-detection.ts +936 -0
  108. package/src/types/config.ts +6 -6
  109. package/src/types/flow.ts +91 -0
  110. package/src/types/index.ts +15 -3
  111. package/src/types/lane.ts +0 -2
  112. package/src/types/logging.ts +5 -1
  113. package/src/types/task.ts +7 -11
  114. package/src/utils/config.ts +8 -16
  115. package/src/utils/dependency.ts +311 -2
  116. package/src/utils/enhanced-logger.ts +263 -927
  117. package/src/utils/git.ts +145 -5
  118. package/src/utils/state.ts +0 -2
  119. package/src/utils/task-service.ts +48 -40
  120. package/commands/cursorflow-review.md +0 -56
  121. package/commands/cursorflow-runs.md +0 -59
  122. package/dist/cli/runs.d.ts +0 -5
  123. package/dist/cli/runs.js +0 -214
  124. package/dist/cli/runs.js.map +0 -1
  125. package/dist/core/reviewer.d.ts +0 -66
  126. package/dist/core/reviewer.js +0 -265
  127. package/dist/core/reviewer.js.map +0 -1
  128. package/src/cli/runs.ts +0 -212
  129. package/src/core/reviewer.ts +0 -285
@@ -25,28 +25,35 @@ import {
25
25
  EnhancedLogManager,
26
26
  createLogManager,
27
27
  DEFAULT_LOG_CONFIG,
28
- ParsedMessage
28
+ ParsedMessage,
29
+ stripAnsi
29
30
  } from '../utils/enhanced-logger';
30
31
  import { formatMessageForConsole } from '../utils/log-formatter';
31
- import { analyzeStall, RecoveryAction, logFailure, DEFAULT_STALL_CONFIG, StallDetectionConfig, FailureType } from './failure-policy';
32
+ import { FailureType, analyzeFailure as analyzeFailureFromPolicy } from './failure-policy';
32
33
  import {
33
- getAutoRecoveryManager,
34
- DEFAULT_AUTO_RECOVERY_CONFIG,
35
- AutoRecoveryConfig,
36
34
  savePOF,
37
35
  createPOFFromRecoveryState,
38
36
  getGitPushFailureGuidance,
39
37
  getMergeConflictGuidance,
40
38
  getGitErrorGuidance,
39
+ LaneRecoveryState,
41
40
  } from './auto-recovery';
41
+ import {
42
+ StallDetectionService,
43
+ getStallService,
44
+ StallDetectionConfig,
45
+ DEFAULT_STALL_CONFIG,
46
+ RecoveryAction,
47
+ StallPhase,
48
+ StallAnalysis,
49
+ } from './stall-detection';
42
50
  import { detectCyclicDependencies, validateDependencies, printDependencyGraph, DependencyInfo } from '../utils/dependency';
43
51
  import { preflightCheck, printPreflightReport, autoRepair } from '../utils/health';
44
52
  import { getLatestCheckpoint } from '../utils/checkpoint';
45
53
  import { cleanStaleLocks, getLockDir } from '../utils/lock';
46
54
 
47
55
  /** Default stall detection configuration - 2 minute idle timeout for recovery */
48
- const DEFAULT_ORCHESTRATOR_STALL_CONFIG: StallDetectionConfig = {
49
- ...DEFAULT_STALL_CONFIG,
56
+ const DEFAULT_ORCHESTRATOR_STALL_CONFIG: Partial<StallDetectionConfig> = {
50
57
  idleTimeoutMs: 2 * 60 * 1000, // 2 minutes (idle detection for continue signal)
51
58
  progressTimeoutMs: 10 * 60 * 1000, // 10 minutes (only triggers if no activity at all)
52
59
  maxRestarts: 2,
@@ -55,7 +62,6 @@ const DEFAULT_ORCHESTRATOR_STALL_CONFIG: StallDetectionConfig = {
55
62
  export interface LaneInfo {
56
63
  name: string;
57
64
  path: string;
58
- dependsOn: string[];
59
65
  startIndex?: number; // Current task index to resume from
60
66
  restartCount?: number; // Number of times restarted due to stall
61
67
  lastStateUpdate?: number; // Timestamp of last state file update
@@ -66,24 +72,22 @@ export interface SpawnLaneResult {
66
72
  child: ChildProcess;
67
73
  logPath: string;
68
74
  logManager?: EnhancedLogManager;
75
+ info: RunningLaneInfo;
69
76
  }
70
77
 
71
78
  /**
72
79
  * Lane execution tracking info
80
+ *
81
+ * NOTE: Stall 감지 관련 상태(lastActivity, stallPhase 등)는 StallDetectionService에서 관리
82
+ * 여기서는 프로세스 관리에 필요한 최소한의 정보만 유지
73
83
  */
74
84
  interface RunningLaneInfo {
75
85
  child: ChildProcess;
76
86
  logPath: string;
77
87
  logManager?: EnhancedLogManager;
78
- lastActivity: number;
79
- lastStateUpdate: number;
80
- stallPhase: number; // 0: normal, 1: continued, 2: stronger_prompt, 3: restarted
81
- taskStartTime: number;
82
- lastOutput: string;
83
88
  statePath: string;
84
- bytesReceived: number; // Total bytes received from agent
85
- lastBytesCheck: number; // Bytes at last check (for delta calculation)
86
- continueSignalsSent: number; // Number of continue signals sent
89
+ laneIndex: number;
90
+ currentTaskIndex?: number;
87
91
  }
88
92
 
89
93
  /**
@@ -106,6 +110,109 @@ function logFileTail(filePath: string, lines: number = 10): void {
106
110
  }
107
111
  }
108
112
 
113
+ /**
114
+ * Handle RUN_DOCTOR action - runs async health diagnostics
115
+ */
116
+ async function handleDoctorDiagnostics(
117
+ laneName: string,
118
+ laneRunDir: string,
119
+ runId: string,
120
+ runRoot: string,
121
+ stallService: StallDetectionService,
122
+ child: ChildProcess
123
+ ): Promise<void> {
124
+ // Import health check dynamically to avoid circular dependency
125
+ const { checkAgentHealth, checkAuthHealth } = await import('../utils/health');
126
+
127
+ const [agentHealth, authHealth] = await Promise.all([
128
+ checkAgentHealth(),
129
+ checkAuthHealth(),
130
+ ]);
131
+
132
+ const issues: string[] = [];
133
+ if (!agentHealth.ok) issues.push(`Agent: ${agentHealth.message}`);
134
+ if (!authHealth.ok) issues.push(`Auth: ${authHealth.message}`);
135
+
136
+ if (issues.length > 0) {
137
+ logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
138
+ } else {
139
+ logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
140
+ }
141
+
142
+ // Save diagnostic to file
143
+ const diagnosticPath = safeJoin(laneRunDir, 'diagnostic.json');
144
+ fs.writeFileSync(diagnosticPath, JSON.stringify({
145
+ timestamp: Date.now(),
146
+ agentHealthy: agentHealth.ok,
147
+ authHealthy: authHealth.ok,
148
+ issues,
149
+ }, null, 2));
150
+
151
+ // Kill the process
152
+ try {
153
+ child.kill('SIGKILL');
154
+ } catch {
155
+ // Process might already be dead
156
+ }
157
+
158
+ logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
159
+
160
+ // Save POF for failed recovery
161
+ const stallState = stallService.getState(laneName);
162
+ if (stallState) {
163
+ try {
164
+ const laneStatePath = safeJoin(laneRunDir, 'state.json');
165
+ const laneState = loadState<LaneState>(laneStatePath);
166
+ const pofDir = safeJoin(runRoot, '..', '..', 'pof');
167
+
168
+ // Convert stall state to recovery state format for POF
169
+ // Note: StallPhase and RecoveryStage have compatible numeric values (0-5)
170
+ const recoveryState: LaneRecoveryState = {
171
+ laneName,
172
+ stage: stallState.phase as unknown as number, // Both enums use 0-5
173
+ lastActivityTime: stallState.lastRealActivityTime,
174
+ lastBytesReceived: stallState.bytesSinceLastCheck,
175
+ totalBytesReceived: stallState.totalBytesReceived,
176
+ lastOutput: stallState.lastOutput,
177
+ restartCount: stallState.restartCount,
178
+ continueSignalsSent: stallState.continueSignalCount,
179
+ lastStageChangeTime: stallState.lastPhaseChangeTime,
180
+ isLongOperation: stallState.isLongOperation,
181
+ failureHistory: stallState.failureHistory.map(f => ({
182
+ timestamp: f.timestamp,
183
+ stage: f.phase as unknown as number, // Both enums use 0-5
184
+ action: f.action as string,
185
+ message: f.message,
186
+ idleTimeMs: f.idleTimeMs,
187
+ bytesReceived: f.bytesReceived,
188
+ lastOutput: f.lastOutput,
189
+ })),
190
+ };
191
+
192
+ const diagnosticInfo = {
193
+ timestamp: Date.now(),
194
+ agentHealthy: agentHealth.ok,
195
+ authHealthy: authHealth.ok,
196
+ systemHealthy: true,
197
+ suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
198
+ details: issues.join('\n') || 'No obvious issues found',
199
+ };
200
+
201
+ const pofEntry = createPOFFromRecoveryState(
202
+ runId,
203
+ runRoot,
204
+ laneName,
205
+ recoveryState,
206
+ laneState,
207
+ diagnosticInfo
208
+ );
209
+ savePOF(runId, pofDir, pofEntry);
210
+ } catch (pofError: any) {
211
+ logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
212
+ }
213
+ }
214
+ }
215
+
109
216
  /**
110
217
  * Spawn a lane process
111
218
  */
@@ -120,6 +227,7 @@ export function spawnLane({
120
227
  enhancedLogConfig,
121
228
  noGit = false,
122
229
  onActivity,
230
+ laneIndex = 0,
123
231
  }: {
124
232
  laneName: string;
125
233
  tasksFile: string;
@@ -131,6 +239,7 @@ export function spawnLane({
131
239
  enhancedLogConfig?: Partial<EnhancedLogConfig>;
132
240
  noGit?: boolean;
133
241
  onActivity?: () => void;
242
+ laneIndex?: number;
134
243
  }): SpawnLaneResult {
135
244
  fs.mkdirSync(laneRunDir, { recursive: true});
136
245
 
@@ -169,17 +278,24 @@ export function spawnLane({
169
278
  };
170
279
 
171
280
  if (logConfig.enabled) {
281
+ // Helper to get dynamic lane label like [L01-T01-laneName]
282
+ const getDynamicLabel = () => {
283
+ const laneNum = `L${(laneIndex + 1).toString().padStart(2, '0')}`;
284
+ const taskPart = info.currentTaskIndex ? `-T${info.currentTaskIndex.toString().padStart(2, '0')}` : '';
285
+ return `[${laneNum}${taskPart}-${laneName}]`;
286
+ };
287
+
172
288
  // Create callback for clean console output
173
289
  const onParsedMessage = (msg: ParsedMessage) => {
174
290
  if (onActivity) onActivity();
175
291
  const formatted = formatMessageForConsole(msg, {
176
- laneLabel: `[${laneName}]`,
292
+ laneLabel: getDynamicLabel(),
177
293
  includeTimestamp: true
178
294
  });
179
295
  process.stdout.write(formatted + '\n');
180
296
  };
181
297
 
182
- logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage);
298
+ logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage, laneIndex);
183
299
  logPath = logManager.getLogPaths().clean;
184
300
 
185
301
  // Spawn with pipe for enhanced logging
@@ -189,6 +305,16 @@ export function spawnLane({
189
305
  detached: false,
190
306
  });
191
307
 
308
+ // Initialize info object for stdout handler to use
309
+ const info: RunningLaneInfo = {
310
+ child,
311
+ logManager,
312
+ logPath,
313
+ statePath: safeJoin(laneRunDir, 'state.json'),
314
+ laneIndex,
315
+ currentTaskIndex: startIndex > 0 ? startIndex + 1 : 0
316
+ };
317
+
192
318
  // Buffer for non-JSON lines
193
319
  let lineBuffer = '';
194
320
 
@@ -205,24 +331,52 @@ export function spawnLane({
205
331
 
206
332
  for (const line of lines) {
207
333
  const trimmed = line.trim();
334
+ if (!trimmed) continue;
335
+
336
+ // Detect task start/progress to update label
337
+ // Example: [1/1] hello-task
338
+ const cleanLine = stripAnsi(trimmed);
339
+ const taskMatch = cleanLine.match(/^\s*\[(\d+)\/(\d+)\]\s+(.+)$/);
340
+ if (taskMatch) {
341
+ info.currentTaskIndex = parseInt(taskMatch[1]!);
342
+ // Update log manager's task index to keep it in sync for readable log
343
+ if (logManager) {
344
+ logManager.setTask(taskMatch[3]!.trim(), undefined, info.currentTaskIndex - 1);
345
+ }
346
+ }
347
+
208
348
  // Show if it's a timestamped log line (starts with [YYYY-MM-DD... or [HH:MM:SS])
209
349
  // or if it's NOT a noisy JSON line
210
- const hasTimestamp = /^\[\d{4}-\d{2}-\d{2}T|\^\[\d{2}:\d{2}:\d{2}\]/.test(trimmed);
211
350
  const isJson = trimmed.startsWith('{') || trimmed.includes('{"type"');
212
351
  // Filter out heartbeats - they should NOT reset the idle timer
213
352
  const isHeartbeat = trimmed.includes('Heartbeat') && trimmed.includes('bytes received');
214
353
 
215
- if (trimmed && !isJson) {
354
+ if (!isJson) {
216
355
  // Only trigger activity for non-heartbeat lines
217
356
  if (onActivity && !isHeartbeat) onActivity();
218
- // If line alreedy has timestamp format, just add lane prefix
219
- if (hasTimestamp) {
220
- // Insert lane name after first timestamp
221
- const formatted = trimmed.replace(/^(\[[^\]]+\])/, `$1 ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset}`);
222
- process.stdout.write(formatted + '\n');
357
+
358
+ const currentLabel = getDynamicLabel();
359
+ const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
360
+
361
+ // Regex that matches timestamp even if it has ANSI color codes
362
+ // Matches: [24:39:14] or \x1b[90m[24:39:14]\x1b[0m
363
+ const timestampRegex = /^((?:\x1b\[[0-9;]*m)*)\[(\d{4}-\d{2}-\d{2}T|\d{2}:\d{2}:\d{2})\]/;
364
+ const tsMatch = trimmed.match(timestampRegex);
365
+
366
+ if (tsMatch) {
367
+ // If line already has timestamp format, just add lane prefix
368
+ // Check if lane label is already present to avoid triple duplication
369
+ if (!trimmed.includes(currentLabel)) {
370
+ // Insert label after the timestamp part
371
+ const tsPart = tsMatch[0];
372
+ const formatted = trimmed.replace(tsPart, `${tsPart} ${coloredLabel}`);
373
+ process.stdout.write(formatted + '\n');
374
+ } else {
375
+ process.stdout.write(trimmed + '\n');
376
+ }
223
377
  } else {
224
378
  // Add full prefix: timestamp + lane
225
- process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${line}\n`);
379
+ process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${coloredLabel} ${line}\n`);
226
380
  }
227
381
  }
228
382
  }
@@ -244,11 +398,14 @@ export function spawnLane({
244
398
  trimmed.includes('actual output');
245
399
 
246
400
  const ts = new Date().toLocaleTimeString('en-US', { hour12: false });
401
+ const currentLabel = getDynamicLabel();
402
+ const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
403
+
247
404
  if (isStatus) {
248
- process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${trimmed}\n`);
405
+ process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${trimmed}\n`);
249
406
  } else {
250
407
  if (onActivity) onActivity();
251
- process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
408
+ process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
252
409
  }
253
410
  }
254
411
  }
@@ -259,9 +416,11 @@ export function spawnLane({
259
416
  child.on('exit', () => {
260
417
  logManager?.close();
261
418
  });
419
+
420
+ return { child, logPath, logManager, info };
262
421
  } else {
263
422
  // Fallback to simple file logging
264
- logPath = safeJoin(laneRunDir, 'terminal.log');
423
+ logPath = safeJoin(laneRunDir, 'terminal-readable.log');
265
424
  const logFd = fs.openSync(logPath, 'a');
266
425
 
267
426
  child = spawn('node', args, {
@@ -275,9 +434,19 @@ export function spawnLane({
275
434
  } catch {
276
435
  // Ignore
277
436
  }
437
+
438
+ return {
439
+ child,
440
+ logPath,
441
+ logManager,
442
+ info: {
443
+ child,
444
+ logPath,
445
+ statePath: safeJoin(laneRunDir, 'state.json'),
446
+ laneIndex
447
+ }
448
+ };
278
449
  }
279
-
280
- return { child, logPath, logManager };
281
450
  }
282
451
 
283
452
  /**
@@ -296,7 +465,7 @@ export function waitChild(proc: ChildProcess): Promise<number> {
296
465
  }
297
466
 
298
467
  /**
299
- * List lane task files in directory and load their configs for dependencies
468
+ * List lane task files in directory
300
469
  */
301
470
  export function listLaneFiles(tasksDir: string): LaneInfo[] {
302
471
  if (!fs.existsSync(tasksDir)) {
@@ -310,19 +479,10 @@ export function listLaneFiles(tasksDir: string): LaneInfo[] {
310
479
  .map(f => {
311
480
  const filePath = safeJoin(tasksDir, f);
312
481
  const name = path.basename(f, '.json');
313
- let dependsOn: string[] = [];
314
-
315
- try {
316
- const config = JSON.parse(fs.readFileSync(filePath, 'utf8')) as RunnerConfig;
317
- dependsOn = config.dependsOn || [];
318
- } catch (e) {
319
- logger.warn(`Failed to parse config for lane ${name}: ${e}`);
320
- }
321
482
 
322
483
  return {
323
484
  name,
324
485
  path: filePath,
325
- dependsOn,
326
486
  };
327
487
  });
328
488
  }
@@ -339,8 +499,7 @@ export function printLaneStatus(lanes: LaneInfo[], laneRunDirs: Record<string, s
339
499
  const state = loadState<LaneState>(statePath);
340
500
 
341
501
  if (!state) {
342
- const isWaiting = lane.dependsOn.length > 0;
343
- return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
502
+ return { lane: lane.name, status: 'pending', task: '-' };
344
503
  }
345
504
 
346
505
  const idx = (state.currentTaskIndex || 0) + 1;
@@ -388,12 +547,12 @@ async function resolveAllDependencies(
388
547
  const worktreeDir = state?.worktreeDir || safeJoin(runRoot, 'resolution-worktree');
389
548
 
390
549
  if (!fs.existsSync(worktreeDir)) {
391
- logger.info(`Creating resolution worktree at ${worktreeDir}`);
550
+ logger.info(`🏗️ Creating resolution worktree at ${worktreeDir}`);
392
551
  git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: git.getCurrentBranch() });
393
552
  }
394
553
 
395
554
  // 3. Resolve on pipeline branch
396
- logger.info(`Resolving dependencies on ${pipelineBranch}`);
555
+ logger.info(`🔄 Resolving dependencies on branch ${pipelineBranch}`);
397
556
  git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
398
557
 
399
558
  for (const cmd of uniqueCommands) {
@@ -474,7 +633,6 @@ export async function orchestrate(tasksDir: string, options: {
474
633
  noGit?: boolean;
475
634
  skipPreflight?: boolean;
476
635
  stallConfig?: Partial<StallDetectionConfig>;
477
- autoRecoveryConfig?: Partial<AutoRecoveryConfig>;
478
636
  } = {}): Promise<{ lanes: LaneInfo[]; exitCodes: Record<string, number>; runRoot: string }> {
479
637
  const lanes = listLaneFiles(tasksDir);
480
638
 
@@ -510,34 +668,11 @@ export async function orchestrate(tasksDir: string, options: {
510
668
  logger.success('✓ Preflight checks passed');
511
669
  }
512
670
 
513
- // Validate dependencies and detect cycles
514
- logger.section('📊 Dependency Analysis');
515
-
516
- const depInfos: DependencyInfo[] = lanes.map(l => ({
517
- name: l.name,
518
- dependsOn: l.dependsOn,
519
- }));
520
-
521
- const depValidation = validateDependencies(depInfos);
522
-
523
- if (!depValidation.valid) {
524
- logger.error('❌ Dependency validation failed:');
525
- for (const err of depValidation.errors) {
526
- logger.error(` • ${err}`);
527
- }
528
- throw new Error('Invalid dependency configuration');
529
- }
530
-
531
- if (depValidation.warnings.length > 0) {
532
- for (const warn of depValidation.warnings) {
533
- logger.warn(`⚠️ ${warn}`);
534
- }
535
- }
536
-
537
- // Print dependency graph
538
- printDependencyGraph(depInfos);
539
-
540
671
  const config = loadConfig();
672
+
673
+ // Set verbose git logging from config
674
+ git.setVerboseGit(config.verboseGit || false);
675
+
541
676
  const logsDir = getLogsDir(config);
542
677
  const runId = `run-${Date.now()}`;
543
678
  // Use absolute path for runRoot to avoid issues with subfolders
@@ -561,17 +696,11 @@ export async function orchestrate(tasksDir: string, options: {
561
696
  const randomSuffix = Math.random().toString(36).substring(2, 7);
562
697
  const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
563
698
 
564
- // Stall detection configuration
565
- const stallConfig: StallDetectionConfig = {
699
+ // Initialize unified stall detection service (Single Source of Truth)
700
+ const stallService = getStallService({
566
701
  ...DEFAULT_ORCHESTRATOR_STALL_CONFIG,
567
702
  ...options.stallConfig,
568
- };
569
-
570
- // Initialize auto-recovery manager
571
- const autoRecoveryManager = getAutoRecoveryManager({
572
- ...DEFAULT_AUTO_RECOVERY_CONFIG,
573
- idleTimeoutMs: stallConfig.idleTimeoutMs, // Sync with stall config
574
- ...options.autoRecoveryConfig,
703
+ verbose: process.env['DEBUG_STALL'] === 'true',
575
704
  });
576
705
 
577
706
  // Initialize event system
@@ -632,6 +761,7 @@ export async function orchestrate(tasksDir: string, options: {
632
761
 
633
762
  laneWorktreeDirs[lane.name] = laneWorktreeDir;
634
763
 
764
+ logger.info(`🏗️ Initializing lane ${lane.name}: branch=${lanePipelineBranch}`);
635
765
  const initialState = createLaneState(lane.name, taskConfig, lane.path, {
636
766
  pipelineBranch: lanePipelineBranch,
637
767
  worktreeDir: laneWorktreeDir
@@ -647,21 +777,6 @@ export async function orchestrate(tasksDir: string, options: {
647
777
  logger.info(`Run directory: ${runRoot}`);
648
778
  logger.info(`Lanes: ${lanes.length}`);
649
779
 
650
- // Display dependency graph
651
- logger.info('\n📊 Dependency Graph:');
652
- for (const lane of lanes) {
653
- const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
654
- console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
655
-
656
- // Simple tree-like visualization for deep dependencies
657
- if (lane.dependsOn.length > 0) {
658
- for (const dep of lane.dependsOn) {
659
- console.log(` └─ ${dep}`);
660
- }
661
- }
662
- }
663
- console.log('');
664
-
665
780
  // Disable auto-resolve when noGit mode is enabled
666
781
  const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
667
782
 
@@ -696,29 +811,12 @@ export async function orchestrate(tasksDir: string, options: {
696
811
 
697
812
  try {
698
813
  while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
699
- // 1. Identify lanes ready to start
814
+ // 1. Identify lanes ready to start (all lanes can start immediately - no lane-level dependencies)
700
815
  const readyToStart = lanes.filter(lane => {
701
816
  // Not already running or completed or failed or blocked
702
817
  if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
703
818
  return false;
704
819
  }
705
-
706
- // Check dependencies
707
- for (const dep of lane.dependsOn) {
708
- if (failedLanes.has(dep)) {
709
- logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
710
- failedLanes.add(lane.name);
711
- exitCodes[lane.name] = 1;
712
- return false;
713
- }
714
- if (blockedLanes.has(dep)) {
715
- // If a dependency is blocked, wait
716
- return false;
717
- }
718
- if (!completedLanes.has(dep)) {
719
- return false;
720
- }
721
- }
722
820
  return true;
723
821
  });
724
822
 
@@ -737,23 +835,23 @@ export async function orchestrate(tasksDir: string, options: {
737
835
  logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
738
836
 
739
837
  const now = Date.now();
740
- // Pre-register lane in running map so onActivity can find it immediately
838
+
839
+ // Register lane with unified stall detection service FIRST
840
+ stallService.registerLane(lane.name, {
841
+ laneRunDir: laneRunDirs[lane.name]!,
842
+ });
843
+
844
+ const laneIdx = lanes.findIndex(l => l.name === lane.name);
845
+
846
+ // Pre-register lane in running map
741
847
  running.set(lane.name, {
742
848
  child: {} as any, // Placeholder, will be replaced below
743
849
  logManager: undefined,
744
850
  logPath: '',
745
- lastActivity: now,
746
- lastStateUpdate: now,
747
- stallPhase: 0,
748
- taskStartTime: now,
749
- lastOutput: '',
750
851
  statePath: laneStatePath,
751
- bytesReceived: 0,
752
- lastBytesCheck: 0,
753
- continueSignalsSent: 0,
852
+ laneIndex: laneIdx >= 0 ? laneIdx : 0,
754
853
  });
755
854
 
756
- let lastOutput = '';
757
855
  const spawnResult = spawnLane({
758
856
  laneName: lane.name,
759
857
  tasksFile: lane.path,
@@ -764,55 +862,40 @@ export async function orchestrate(tasksDir: string, options: {
764
862
  worktreeDir: laneWorktreeDirs[lane.name],
765
863
  enhancedLogConfig: options.enhancedLogging,
766
864
  noGit: options.noGit,
865
+ laneIndex: laneIdx >= 0 ? laneIdx : 0,
767
866
  onActivity: () => {
768
- const info = running.get(lane.name);
769
- if (info) {
770
- const actNow = Date.now();
771
- info.lastActivity = actNow;
772
- info.lastStateUpdate = actNow;
773
- info.stallPhase = 0;
774
- }
867
+ // Record state file update activity
868
+ stallService.recordStateUpdate(lane.name);
775
869
  }
776
870
  });
777
871
 
778
872
  // Update with actual spawn result
779
873
  const existingInfo = running.get(lane.name)!;
780
- Object.assign(existingInfo, spawnResult);
874
+ Object.assign(existingInfo, spawnResult.info);
875
+
876
+ // Update stall service with child process reference
877
+ stallService.setChildProcess(lane.name, spawnResult.child);
781
878
 
782
- // Track last output and bytes received for long operation and stall detection
879
+ // Track stdout for activity detection - delegate to StallDetectionService
783
880
  if (spawnResult.child.stdout) {
784
881
  spawnResult.child.stdout.on('data', (data: Buffer) => {
785
- const info = running.get(lane.name);
786
- if (info) {
787
- const output = data.toString();
788
- const lines = output.split('\n').filter(l => l.trim());
789
-
790
- // Filter out heartbeats from activity tracking to avoid resetting stall detection
791
- const realLines = lines.filter(line => !(line.includes('Heartbeat') && line.includes('bytes received')));
792
-
793
- if (realLines.length > 0) {
794
- // Real activity detected - update lastActivity to reset stall timer
795
- const actNow = Date.now();
796
- info.lastActivity = actNow;
797
- info.stallPhase = 0; // Reset stall phase on real activity
798
-
799
- const lastRealLine = realLines[realLines.length - 1]!;
800
- info.lastOutput = lastRealLine;
801
- info.bytesReceived += data.length;
802
-
803
- // Update auto-recovery manager with real activity
804
- autoRecoveryManager.recordActivity(lane.name, data.length, info.lastOutput);
805
- } else if (lines.length > 0) {
806
- // Only heartbeats received - do NOT update lastActivity (keep stall timer running)
807
- autoRecoveryManager.recordActivity(lane.name, 0, info.lastOutput);
808
- }
882
+ const output = data.toString();
883
+ const lines = output.split('\n').filter(l => l.trim());
884
+
885
+ // Filter out heartbeats from activity tracking
886
+ const realLines = lines.filter(line => !(line.includes('Heartbeat') && line.includes('bytes received')));
887
+
888
+ if (realLines.length > 0) {
889
+ // Real activity - record with bytes
890
+ const lastRealLine = realLines[realLines.length - 1]!;
891
+ stallService.recordActivity(lane.name, data.length, lastRealLine);
892
+ } else if (lines.length > 0) {
893
+ // Heartbeat only - record with 0 bytes (won't reset timer)
894
+ stallService.recordActivity(lane.name, 0);
809
895
  }
810
896
  });
811
897
  }
812
898
 
813
- // Register lane with auto-recovery manager
814
- autoRecoveryManager.registerLane(lane.name);
815
-
816
899
  // Update lane tracking
817
900
  lane.taskStartTime = now;
818
901
 
@@ -843,234 +926,47 @@ export async function orchestrate(tasksDir: string, options: {
843
926
  if (result.name === '__poll__' || (now - lastStallCheck >= 10000)) {
844
927
  lastStallCheck = now;
845
928
 
846
- // Periodic stall check with multi-layer detection and escalating recovery
929
+ // Periodic stall check using unified StallDetectionService
847
930
  for (const [laneName, info] of running.entries()) {
848
- const idleTime = now - info.lastActivity;
849
931
  const lane = lanes.find(l => l.name === laneName)!;
850
932
 
851
- if (process.env['DEBUG_STALL']) {
852
- logger.debug(`[${laneName}] Stall check: idle=${Math.round(idleTime/1000)}s, bytesDelta=${info.bytesReceived - info.lastBytesCheck}, phase=${info.stallPhase}`);
853
- }
854
-
855
933
  // Check state file for progress updates
856
- let progressTime = 0;
857
934
  try {
858
935
  const stateStat = fs.statSync(info.statePath);
859
- const stateUpdateTime = stateStat.mtimeMs;
860
- if (stateUpdateTime > info.lastStateUpdate) {
861
- info.lastStateUpdate = stateUpdateTime;
936
+ const stallState = stallService.getState(laneName);
937
+ if (stallState && stateStat.mtimeMs > stallState.lastStateUpdateTime) {
938
+ stallService.recordStateUpdate(laneName);
862
939
  }
863
- progressTime = now - info.lastStateUpdate;
864
940
  } catch {
865
941
  // State file might not exist yet
866
942
  }
867
943
 
868
- // Calculate bytes received since last check
869
- const bytesDelta = info.bytesReceived - info.lastBytesCheck;
870
- info.lastBytesCheck = info.bytesReceived;
944
+ // Debug logging
945
+ if (process.env['DEBUG_STALL']) {
946
+ logger.debug(`[${laneName}] ${stallService.dumpState(laneName)}`);
947
+ }
871
948
 
872
- // Use multi-layer stall analysis with enhanced context
873
- const analysis = analyzeStall({
874
- stallPhase: info.stallPhase,
875
- idleTimeMs: idleTime,
876
- progressTimeMs: progressTime,
877
- lastOutput: info.lastOutput,
878
- restartCount: lane.restartCount || 0,
879
- taskStartTimeMs: info.taskStartTime,
880
- bytesReceived: bytesDelta, // Bytes since last check
881
- continueSignalsSent: info.continueSignalsSent,
882
- }, stallConfig);
949
+ // Run stall analysis and recovery (all logic is in StallDetectionService)
950
+ const analysis = stallService.checkAndRecover(laneName);
883
951
 
884
- // Only act if action is not NONE
952
+ // Log to lane log manager if there was an action
885
953
  if (analysis.action !== RecoveryAction.NONE) {
886
- logFailure(laneName, analysis);
887
954
  info.logManager?.log('error', analysis.message);
888
-
889
- if (analysis.action === RecoveryAction.CONTINUE_SIGNAL) {
890
- const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
891
- try {
892
- fs.writeFileSync(interventionPath, 'continue');
893
- info.stallPhase = 1;
894
- info.lastActivity = now;
895
- info.continueSignalsSent++;
896
- logger.info(`[${laneName}] Sent continue signal (#${info.continueSignalsSent})`);
897
-
898
- events.emit('recovery.continue_signal', {
899
- laneName,
900
- idleSeconds: Math.round(idleTime / 1000),
901
- signalCount: info.continueSignalsSent,
902
- });
903
- } catch (e) {
904
- logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
905
- }
906
- } else if (analysis.action === RecoveryAction.STRONGER_PROMPT) {
907
- const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
908
- const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck. Please continue with your current task immediately. If you're waiting for something, explain what you need and proceed with what you can do now. If you've completed the task, summarize your work and finish.`;
909
- try {
910
- fs.writeFileSync(interventionPath, strongerPrompt);
911
- info.stallPhase = 2;
912
- info.lastActivity = now;
913
- logger.warn(`[${laneName}] Sent stronger prompt after continue signal failed`);
914
-
915
- events.emit('recovery.stronger_prompt', { laneName });
916
- } catch (e) {
917
- logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
918
- }
919
- } else if (analysis.action === RecoveryAction.KILL_AND_RESTART ||
920
- analysis.action === RecoveryAction.RESTART_LANE ||
921
- analysis.action === RecoveryAction.RESTART_LANE_FROM_CHECKPOINT) {
922
- lane.restartCount = (lane.restartCount || 0) + 1;
923
- info.stallPhase = 3;
924
-
925
- // Try to get checkpoint info
926
- const checkpoint = getLatestCheckpoint(laneRunDirs[laneName]!);
927
- if (checkpoint) {
928
- logger.info(`[${laneName}] Checkpoint available: ${checkpoint.id} (task ${checkpoint.taskIndex})`);
929
- }
930
-
931
- // Kill the process
932
- try {
933
- info.child.kill('SIGKILL');
934
- } catch {
935
- // Process might already be dead
936
- }
937
-
938
- logger.warn(`[${laneName}] Killing and restarting lane (restart #${lane.restartCount})`);
939
-
940
- events.emit('recovery.restart', {
941
- laneName,
942
- restartCount: lane.restartCount,
943
- maxRestarts: stallConfig.maxRestarts,
944
- });
945
- } else if (analysis.action === RecoveryAction.RUN_DOCTOR) {
946
- info.stallPhase = 4;
947
-
948
- // Run diagnostics
949
- logger.error(`[${laneName}] Running diagnostics due to persistent failures...`);
950
-
951
- // Import health check dynamically to avoid circular dependency
952
- const { checkAgentHealth, checkAuthHealth } = await import('../utils/health');
953
-
954
- const [agentHealth, authHealth] = await Promise.all([
955
- checkAgentHealth(),
956
- checkAuthHealth(),
957
- ]);
958
-
959
- const issues: string[] = [];
960
- if (!agentHealth.ok) issues.push(`Agent: ${agentHealth.message}`);
961
- if (!authHealth.ok) issues.push(`Auth: ${authHealth.message}`);
962
-
963
- if (issues.length > 0) {
964
- logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
965
- } else {
966
- logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
967
- }
968
-
969
- // Save diagnostic to file
970
- const diagnosticPath = safeJoin(laneRunDirs[laneName]!, 'diagnostic.json');
971
- fs.writeFileSync(diagnosticPath, JSON.stringify({
972
- timestamp: Date.now(),
973
- agentHealthy: agentHealth.ok,
974
- authHealthy: authHealth.ok,
975
- issues,
976
- analysis,
977
- }, null, 2));
978
-
979
- // Kill the process
980
- try {
981
- info.child.kill('SIGKILL');
982
- } catch {
983
- // Process might already be dead
984
- }
985
-
986
- logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
987
-
988
- // Save POF for failed recovery
989
- const recoveryState = autoRecoveryManager.getState(laneName);
990
- if (recoveryState) {
991
- try {
992
- const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
993
- const laneState = loadState<LaneState>(laneStatePath);
994
- const pofDir = safeJoin(runRoot, '..', '..', 'pof');
995
- const diagnosticInfo = {
996
- timestamp: Date.now(),
997
- agentHealthy: agentHealth.ok,
998
- authHealthy: authHealth.ok,
999
- systemHealthy: true,
1000
- suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
1001
- details: issues.join('\n') || 'No obvious issues found',
1002
- };
1003
- const pofEntry = createPOFFromRecoveryState(
1004
- runId,
1005
- runRoot,
1006
- laneName,
1007
- recoveryState,
1008
- laneState,
1009
- diagnosticInfo
1010
- );
1011
- savePOF(runId, pofDir, pofEntry);
1012
- } catch (pofError: any) {
1013
- logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
1014
- }
1015
- }
1016
-
1017
- events.emit('recovery.diagnosed', {
1018
- laneName,
1019
- diagnostic: { agentHealthy: agentHealth.ok, authHealthy: authHealth.ok, issues },
1020
- });
1021
- } else if (analysis.action === RecoveryAction.ABORT_LANE) {
1022
- info.stallPhase = 5;
1023
-
1024
- try {
1025
- info.child.kill('SIGKILL');
1026
- } catch {
1027
- // Process might already be dead
1028
- }
1029
-
1030
- logger.error(`[${laneName}] Aborting lane due to repeated stalls`);
1031
-
1032
- // Save POF for failed recovery
1033
- const recoveryState = autoRecoveryManager.getState(laneName);
1034
- if (recoveryState) {
1035
- try {
1036
- const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
1037
- const laneState = loadState<LaneState>(laneStatePath);
1038
- const pofDir = safeJoin(runRoot, '..', '..', 'pof');
1039
- const pofEntry = createPOFFromRecoveryState(
1040
- runId,
1041
- runRoot,
1042
- laneName,
1043
- recoveryState,
1044
- laneState,
1045
- recoveryState.diagnosticInfo
1046
- );
1047
- savePOF(runId, pofDir, pofEntry);
1048
- } catch (pofError: any) {
1049
- logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
1050
- }
1051
- }
1052
- } else if (analysis.action === RecoveryAction.SEND_GIT_GUIDANCE) {
1053
- // Send guidance message to agent for git issues
1054
- const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
1055
-
1056
- // Determine which guidance to send based on the failure type
1057
- let guidance: string;
1058
- if (analysis.type === FailureType.GIT_PUSH_REJECTED) {
1059
- guidance = getGitPushFailureGuidance();
1060
- } else if (analysis.type === FailureType.MERGE_CONFLICT) {
1061
- guidance = getMergeConflictGuidance();
1062
- } else {
1063
- guidance = getGitErrorGuidance(analysis.message);
1064
- }
1065
-
1066
- try {
1067
- fs.writeFileSync(interventionPath, guidance);
1068
- info.lastActivity = now;
1069
- logger.info(`[${laneName}] Sent git issue guidance to agent`);
1070
- } catch (e: any) {
1071
- logger.error(`[${laneName}] Failed to send guidance: ${e.message}`);
1072
- }
955
+
956
+ // Handle special case: RUN_DOCTOR needs async operations
957
+ if (analysis.action === RecoveryAction.RUN_DOCTOR) {
958
+ await handleDoctorDiagnostics(
959
+ laneName,
960
+ laneRunDirs[laneName]!,
961
+ runId,
962
+ runRoot,
963
+ stallService,
964
+ info.child
965
+ );
1073
966
  }
967
+
968
+ // Sync restartCount back to lane info (for restart logic in process exit handler)
969
+ lane.restartCount = stallService.getRestartCount(laneName);
1074
970
  }
1075
971
  }
1076
972
  continue;
@@ -1080,8 +976,11 @@ export async function orchestrate(tasksDir: string, options: {
1080
976
  running.delete(finished.name);
1081
977
  exitCodes[finished.name] = finished.code;
1082
978
 
1083
- // Unregister from auto-recovery manager
1084
- autoRecoveryManager.unregisterLane(finished.name);
979
+ // Get stall state before unregistering
980
+ const stallPhase = stallService.getPhase(finished.name);
981
+
982
+ // Unregister from stall detection service
983
+ stallService.unregisterLane(finished.name);
1085
984
 
1086
985
  if (finished.code === 0) {
1087
986
  completedLanes.add(finished.name);
@@ -1111,8 +1010,8 @@ export async function orchestrate(tasksDir: string, options: {
1111
1010
  logger.error(`Lane ${finished.name} exited with code 2 but no dependency request found`);
1112
1011
  }
1113
1012
  } else {
1114
- // Check if it was a restart request
1115
- if (info.stallPhase === 2) {
1013
+ // Check if it was a restart request (RESTART_REQUESTED phase)
1014
+ if (stallPhase === StallPhase.RESTART_REQUESTED) {
1116
1015
  logger.info(`🔄 Lane ${finished.name} is being restarted due to stall...`);
1117
1016
 
1118
1017
  // Update startIndex from current state to resume from the same task
@@ -1133,7 +1032,7 @@ export async function orchestrate(tasksDir: string, options: {
1133
1032
  failedLanes.add(finished.name);
1134
1033
 
1135
1034
  let errorMsg = 'Process exited with non-zero code';
1136
- if (info.stallPhase === 3) {
1035
+ if (stallPhase >= StallPhase.DIAGNOSED) {
1137
1036
  errorMsg = 'Stopped due to repeated stall';
1138
1037
  } else if (info.logManager) {
1139
1038
  const lastError = info.logManager.getLastError();