@litmers/cursorflow-orchestrator 0.1.31 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +182 -59
  3. package/commands/cursorflow-add.md +159 -0
  4. package/commands/cursorflow-doctor.md +45 -23
  5. package/commands/cursorflow-monitor.md +23 -2
  6. package/commands/cursorflow-new.md +87 -0
  7. package/commands/cursorflow-run.md +60 -111
  8. package/dist/cli/add.d.ts +7 -0
  9. package/dist/cli/add.js +377 -0
  10. package/dist/cli/add.js.map +1 -0
  11. package/dist/cli/clean.js +1 -0
  12. package/dist/cli/clean.js.map +1 -1
  13. package/dist/cli/config.d.ts +7 -0
  14. package/dist/cli/config.js +181 -0
  15. package/dist/cli/config.js.map +1 -0
  16. package/dist/cli/doctor.js +47 -4
  17. package/dist/cli/doctor.js.map +1 -1
  18. package/dist/cli/index.js +34 -30
  19. package/dist/cli/index.js.map +1 -1
  20. package/dist/cli/logs.js +17 -34
  21. package/dist/cli/logs.js.map +1 -1
  22. package/dist/cli/monitor.js +62 -65
  23. package/dist/cli/monitor.js.map +1 -1
  24. package/dist/cli/new.d.ts +7 -0
  25. package/dist/cli/new.js +232 -0
  26. package/dist/cli/new.js.map +1 -0
  27. package/dist/cli/prepare.js +95 -193
  28. package/dist/cli/prepare.js.map +1 -1
  29. package/dist/cli/resume.js +57 -68
  30. package/dist/cli/resume.js.map +1 -1
  31. package/dist/cli/run.js +60 -30
  32. package/dist/cli/run.js.map +1 -1
  33. package/dist/cli/stop.js +6 -0
  34. package/dist/cli/stop.js.map +1 -1
  35. package/dist/cli/tasks.d.ts +5 -3
  36. package/dist/cli/tasks.js +181 -29
  37. package/dist/cli/tasks.js.map +1 -1
  38. package/dist/core/failure-policy.d.ts +9 -0
  39. package/dist/core/failure-policy.js +9 -0
  40. package/dist/core/failure-policy.js.map +1 -1
  41. package/dist/core/orchestrator.d.ts +20 -6
  42. package/dist/core/orchestrator.js +215 -334
  43. package/dist/core/orchestrator.js.map +1 -1
  44. package/dist/core/runner/agent.d.ts +27 -0
  45. package/dist/core/runner/agent.js +294 -0
  46. package/dist/core/runner/agent.js.map +1 -0
  47. package/dist/core/runner/index.d.ts +5 -0
  48. package/dist/core/runner/index.js +22 -0
  49. package/dist/core/runner/index.js.map +1 -0
  50. package/dist/core/runner/pipeline.d.ts +9 -0
  51. package/dist/core/runner/pipeline.js +539 -0
  52. package/dist/core/runner/pipeline.js.map +1 -0
  53. package/dist/core/runner/prompt.d.ts +25 -0
  54. package/dist/core/runner/prompt.js +175 -0
  55. package/dist/core/runner/prompt.js.map +1 -0
  56. package/dist/core/runner/task.d.ts +26 -0
  57. package/dist/core/runner/task.js +283 -0
  58. package/dist/core/runner/task.js.map +1 -0
  59. package/dist/core/runner/utils.d.ts +37 -0
  60. package/dist/core/runner/utils.js +161 -0
  61. package/dist/core/runner/utils.js.map +1 -0
  62. package/dist/core/runner.d.ts +2 -96
  63. package/dist/core/runner.js +11 -1136
  64. package/dist/core/runner.js.map +1 -1
  65. package/dist/core/stall-detection.d.ts +326 -0
  66. package/dist/core/stall-detection.js +781 -0
  67. package/dist/core/stall-detection.js.map +1 -0
  68. package/dist/services/logging/console.js +2 -1
  69. package/dist/services/logging/console.js.map +1 -1
  70. package/dist/types/config.d.ts +6 -6
  71. package/dist/types/flow.d.ts +84 -0
  72. package/dist/types/flow.js +10 -0
  73. package/dist/types/flow.js.map +1 -0
  74. package/dist/types/index.d.ts +1 -0
  75. package/dist/types/index.js +3 -3
  76. package/dist/types/index.js.map +1 -1
  77. package/dist/types/lane.d.ts +0 -2
  78. package/dist/types/logging.d.ts +5 -1
  79. package/dist/types/task.d.ts +7 -11
  80. package/dist/utils/config.d.ts +5 -1
  81. package/dist/utils/config.js +15 -16
  82. package/dist/utils/config.js.map +1 -1
  83. package/dist/utils/dependency.d.ts +36 -1
  84. package/dist/utils/dependency.js +256 -1
  85. package/dist/utils/dependency.js.map +1 -1
  86. package/dist/utils/doctor.js +40 -8
  87. package/dist/utils/doctor.js.map +1 -1
  88. package/dist/utils/enhanced-logger.d.ts +45 -82
  89. package/dist/utils/enhanced-logger.js +239 -844
  90. package/dist/utils/enhanced-logger.js.map +1 -1
  91. package/dist/utils/flow.d.ts +9 -0
  92. package/dist/utils/flow.js +73 -0
  93. package/dist/utils/flow.js.map +1 -0
  94. package/dist/utils/git.d.ts +29 -0
  95. package/dist/utils/git.js +115 -5
  96. package/dist/utils/git.js.map +1 -1
  97. package/dist/utils/state.js +0 -2
  98. package/dist/utils/state.js.map +1 -1
  99. package/dist/utils/task-service.d.ts +2 -2
  100. package/dist/utils/task-service.js +40 -31
  101. package/dist/utils/task-service.js.map +1 -1
  102. package/package.json +4 -3
  103. package/src/cli/add.ts +397 -0
  104. package/src/cli/clean.ts +1 -0
  105. package/src/cli/config.ts +177 -0
  106. package/src/cli/doctor.ts +48 -4
  107. package/src/cli/index.ts +36 -32
  108. package/src/cli/logs.ts +20 -33
  109. package/src/cli/monitor.ts +70 -75
  110. package/src/cli/new.ts +235 -0
  111. package/src/cli/prepare.ts +98 -205
  112. package/src/cli/resume.ts +61 -76
  113. package/src/cli/run.ts +333 -306
  114. package/src/cli/stop.ts +8 -0
  115. package/src/cli/tasks.ts +200 -21
  116. package/src/core/failure-policy.ts +9 -0
  117. package/src/core/orchestrator.ts +279 -379
  118. package/src/core/runner/agent.ts +314 -0
  119. package/src/core/runner/index.ts +6 -0
  120. package/src/core/runner/pipeline.ts +567 -0
  121. package/src/core/runner/prompt.ts +174 -0
  122. package/src/core/runner/task.ts +320 -0
  123. package/src/core/runner/utils.ts +142 -0
  124. package/src/core/runner.ts +8 -1347
  125. package/src/core/stall-detection.ts +936 -0
  126. package/src/services/logging/console.ts +2 -1
  127. package/src/types/config.ts +6 -6
  128. package/src/types/flow.ts +91 -0
  129. package/src/types/index.ts +15 -3
  130. package/src/types/lane.ts +0 -2
  131. package/src/types/logging.ts +5 -1
  132. package/src/types/task.ts +7 -11
  133. package/src/utils/config.ts +16 -17
  134. package/src/utils/dependency.ts +311 -2
  135. package/src/utils/doctor.ts +36 -8
  136. package/src/utils/enhanced-logger.ts +264 -927
  137. package/src/utils/flow.ts +42 -0
  138. package/src/utils/git.ts +145 -5
  139. package/src/utils/state.ts +0 -2
  140. package/src/utils/task-service.ts +48 -40
  141. package/commands/cursorflow-review.md +0 -56
  142. package/commands/cursorflow-runs.md +0 -59
  143. package/dist/cli/runs.d.ts +0 -5
  144. package/dist/cli/runs.js +0 -214
  145. package/dist/cli/runs.js.map +0 -1
  146. package/dist/core/reviewer.d.ts +0 -66
  147. package/dist/core/reviewer.js +0 -265
  148. package/dist/core/reviewer.js.map +0 -1
  149. package/src/cli/runs.ts +0 -212
  150. package/src/core/reviewer.ts +0 -285
@@ -25,28 +25,35 @@ import {
25
25
  EnhancedLogManager,
26
26
  createLogManager,
27
27
  DEFAULT_LOG_CONFIG,
28
- ParsedMessage
28
+ ParsedMessage,
29
+ stripAnsi
29
30
  } from '../utils/enhanced-logger';
30
31
  import { formatMessageForConsole } from '../utils/log-formatter';
31
- import { analyzeStall, RecoveryAction, logFailure, DEFAULT_STALL_CONFIG, StallDetectionConfig, FailureType } from './failure-policy';
32
+ import { FailureType, analyzeFailure as analyzeFailureFromPolicy } from './failure-policy';
32
33
  import {
33
- getAutoRecoveryManager,
34
- DEFAULT_AUTO_RECOVERY_CONFIG,
35
- AutoRecoveryConfig,
36
34
  savePOF,
37
35
  createPOFFromRecoveryState,
38
36
  getGitPushFailureGuidance,
39
37
  getMergeConflictGuidance,
40
38
  getGitErrorGuidance,
39
+ LaneRecoveryState,
41
40
  } from './auto-recovery';
41
+ import {
42
+ StallDetectionService,
43
+ getStallService,
44
+ StallDetectionConfig,
45
+ DEFAULT_STALL_CONFIG,
46
+ RecoveryAction,
47
+ StallPhase,
48
+ StallAnalysis,
49
+ } from './stall-detection';
42
50
  import { detectCyclicDependencies, validateDependencies, printDependencyGraph, DependencyInfo } from '../utils/dependency';
43
51
  import { preflightCheck, printPreflightReport, autoRepair } from '../utils/health';
44
52
  import { getLatestCheckpoint } from '../utils/checkpoint';
45
53
  import { cleanStaleLocks, getLockDir } from '../utils/lock';
46
54
 
47
55
  /** Default stall detection configuration - 2 minute idle timeout for recovery */
48
- const DEFAULT_ORCHESTRATOR_STALL_CONFIG: StallDetectionConfig = {
49
- ...DEFAULT_STALL_CONFIG,
56
+ const DEFAULT_ORCHESTRATOR_STALL_CONFIG: Partial<StallDetectionConfig> = {
50
57
  idleTimeoutMs: 2 * 60 * 1000, // 2 minutes (idle detection for continue signal)
51
58
  progressTimeoutMs: 10 * 60 * 1000, // 10 minutes (only triggers if no activity at all)
52
59
  maxRestarts: 2,
@@ -55,7 +62,6 @@ const DEFAULT_ORCHESTRATOR_STALL_CONFIG: StallDetectionConfig = {
55
62
  export interface LaneInfo {
56
63
  name: string;
57
64
  path: string;
58
- dependsOn: string[];
59
65
  startIndex?: number; // Current task index to resume from
60
66
  restartCount?: number; // Number of times restarted due to stall
61
67
  lastStateUpdate?: number; // Timestamp of last state file update
@@ -66,24 +72,22 @@ export interface SpawnLaneResult {
66
72
  child: ChildProcess;
67
73
  logPath: string;
68
74
  logManager?: EnhancedLogManager;
75
+ info: RunningLaneInfo;
69
76
  }
70
77
 
71
78
  /**
72
79
  * Lane execution tracking info
80
+ *
81
+ * NOTE: Stall 감지 관련 상태(lastActivity, stallPhase 등)는 StallDetectionService에서 관리
82
+ * 여기서는 프로세스 관리에 필요한 최소한의 정보만 유지
73
83
  */
74
84
  interface RunningLaneInfo {
75
85
  child: ChildProcess;
76
86
  logPath: string;
77
87
  logManager?: EnhancedLogManager;
78
- lastActivity: number;
79
- lastStateUpdate: number;
80
- stallPhase: number; // 0: normal, 1: continued, 2: stronger_prompt, 3: restarted
81
- taskStartTime: number;
82
- lastOutput: string;
83
88
  statePath: string;
84
- bytesReceived: number; // Total bytes received from agent
85
- lastBytesCheck: number; // Bytes at last check (for delta calculation)
86
- continueSignalsSent: number; // Number of continue signals sent
89
+ laneIndex: number;
90
+ currentTaskIndex?: number;
87
91
  }
88
92
 
89
93
  /**
@@ -106,6 +110,109 @@ function logFileTail(filePath: string, lines: number = 10): void {
106
110
  }
107
111
  }
108
112
 
113
+ /**
114
+ * Handle RUN_DOCTOR action - runs async health diagnostics
115
+ */
116
+ async function handleDoctorDiagnostics(
117
+ laneName: string,
118
+ laneRunDir: string,
119
+ runId: string,
120
+ runRoot: string,
121
+ stallService: StallDetectionService,
122
+ child: ChildProcess
123
+ ): Promise<void> {
124
+ // Import health check dynamically to avoid circular dependency
125
+ const { checkAgentHealth, checkAuthHealth } = await import('../utils/health');
126
+
127
+ const [agentHealth, authHealth] = await Promise.all([
128
+ checkAgentHealth(),
129
+ checkAuthHealth(),
130
+ ]);
131
+
132
+ const issues: string[] = [];
133
+ if (!agentHealth.ok) issues.push(`Agent: ${agentHealth.message}`);
134
+ if (!authHealth.ok) issues.push(`Auth: ${authHealth.message}`);
135
+
136
+ if (issues.length > 0) {
137
+ logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
138
+ } else {
139
+ logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
140
+ }
141
+
142
+ // Save diagnostic to file
143
+ const diagnosticPath = safeJoin(laneRunDir, 'diagnostic.json');
144
+ fs.writeFileSync(diagnosticPath, JSON.stringify({
145
+ timestamp: Date.now(),
146
+ agentHealthy: agentHealth.ok,
147
+ authHealthy: authHealth.ok,
148
+ issues,
149
+ }, null, 2));
150
+
151
+ // Kill the process
152
+ try {
153
+ child.kill('SIGKILL');
154
+ } catch {
155
+ // Process might already be dead
156
+ }
157
+
158
+ logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
159
+
160
+ // Save POF for failed recovery
161
+ const stallState = stallService.getState(laneName);
162
+ if (stallState) {
163
+ try {
164
+ const laneStatePath = safeJoin(laneRunDir, 'state.json');
165
+ const laneState = loadState<LaneState>(laneStatePath);
166
+ const pofDir = safeJoin(runRoot, '..', '..', 'pof');
167
+
168
+ // Convert stall state to recovery state format for POF
169
+ // Note: StallPhase and RecoveryStage have compatible numeric values (0-5)
170
+ const recoveryState: LaneRecoveryState = {
171
+ laneName,
172
+ stage: stallState.phase as unknown as number, // Both enums use 0-5
173
+ lastActivityTime: stallState.lastRealActivityTime,
174
+ lastBytesReceived: stallState.bytesSinceLastCheck,
175
+ totalBytesReceived: stallState.totalBytesReceived,
176
+ lastOutput: stallState.lastOutput,
177
+ restartCount: stallState.restartCount,
178
+ continueSignalsSent: stallState.continueSignalCount,
179
+ lastStageChangeTime: stallState.lastPhaseChangeTime,
180
+ isLongOperation: stallState.isLongOperation,
181
+ failureHistory: stallState.failureHistory.map(f => ({
182
+ timestamp: f.timestamp,
183
+ stage: f.phase as unknown as number, // Both enums use 0-5
184
+ action: f.action as string,
185
+ message: f.message,
186
+ idleTimeMs: f.idleTimeMs,
187
+ bytesReceived: f.bytesReceived,
188
+ lastOutput: f.lastOutput,
189
+ })),
190
+ };
191
+
192
+ const diagnosticInfo = {
193
+ timestamp: Date.now(),
194
+ agentHealthy: agentHealth.ok,
195
+ authHealthy: authHealth.ok,
196
+ systemHealthy: true,
197
+ suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
198
+ details: issues.join('\n') || 'No obvious issues found',
199
+ };
200
+
201
+ const pofEntry = createPOFFromRecoveryState(
202
+ runId,
203
+ runRoot,
204
+ laneName,
205
+ recoveryState,
206
+ laneState,
207
+ diagnosticInfo
208
+ );
209
+ savePOF(runId, pofDir, pofEntry);
210
+ } catch (pofError: any) {
211
+ logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
212
+ }
213
+ }
214
+ }
215
+
109
216
  /**
110
217
  * Spawn a lane process
111
218
  */
@@ -120,6 +227,7 @@ export function spawnLane({
120
227
  enhancedLogConfig,
121
228
  noGit = false,
122
229
  onActivity,
230
+ laneIndex = 0,
123
231
  }: {
124
232
  laneName: string;
125
233
  tasksFile: string;
@@ -131,6 +239,7 @@ export function spawnLane({
131
239
  enhancedLogConfig?: Partial<EnhancedLogConfig>;
132
240
  noGit?: boolean;
133
241
  onActivity?: () => void;
242
+ laneIndex?: number;
134
243
  }): SpawnLaneResult {
135
244
  fs.mkdirSync(laneRunDir, { recursive: true});
136
245
 
@@ -169,17 +278,25 @@ export function spawnLane({
169
278
  };
170
279
 
171
280
  if (logConfig.enabled) {
281
+ // Helper to get dynamic lane label like [L1-T1-lanename10]
282
+ const getDynamicLabel = () => {
283
+ const laneNum = `L${laneIndex + 1}`;
284
+ const taskPart = info.currentTaskIndex ? `-T${info.currentTaskIndex}` : '';
285
+ const shortLaneName = laneName.substring(0, 10);
286
+ return `[${laneNum}${taskPart}-${shortLaneName}]`;
287
+ };
288
+
172
289
  // Create callback for clean console output
173
290
  const onParsedMessage = (msg: ParsedMessage) => {
174
291
  if (onActivity) onActivity();
175
292
  const formatted = formatMessageForConsole(msg, {
176
- laneLabel: `[${laneName}]`,
293
+ laneLabel: getDynamicLabel(),
177
294
  includeTimestamp: true
178
295
  });
179
296
  process.stdout.write(formatted + '\n');
180
297
  };
181
298
 
182
- logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage);
299
+ logManager = createLogManager(laneRunDir, laneName, logConfig, onParsedMessage, laneIndex);
183
300
  logPath = logManager.getLogPaths().clean;
184
301
 
185
302
  // Spawn with pipe for enhanced logging
@@ -189,6 +306,16 @@ export function spawnLane({
189
306
  detached: false,
190
307
  });
191
308
 
309
+ // Initialize info object for stdout handler to use
310
+ const info: RunningLaneInfo = {
311
+ child,
312
+ logManager,
313
+ logPath,
314
+ statePath: safeJoin(laneRunDir, 'state.json'),
315
+ laneIndex,
316
+ currentTaskIndex: startIndex > 0 ? startIndex + 1 : 0
317
+ };
318
+
192
319
  // Buffer for non-JSON lines
193
320
  let lineBuffer = '';
194
321
 
@@ -205,24 +332,52 @@ export function spawnLane({
205
332
 
206
333
  for (const line of lines) {
207
334
  const trimmed = line.trim();
335
+ if (!trimmed) continue;
336
+
337
+ // Detect task start/progress to update label
338
+ // Example: [1/1] hello-task
339
+ const cleanLine = stripAnsi(trimmed);
340
+ const taskMatch = cleanLine.match(/^\s*\[(\d+)\/(\d+)\]\s+(.+)$/);
341
+ if (taskMatch) {
342
+ info.currentTaskIndex = parseInt(taskMatch[1]!);
343
+ // Update log manager's task index to keep it in sync for readable log
344
+ if (logManager) {
345
+ logManager.setTask(taskMatch[3]!.trim(), undefined, info.currentTaskIndex - 1);
346
+ }
347
+ }
348
+
208
349
  // Show if it's a timestamped log line (starts with [YYYY-MM-DD... or [HH:MM:SS])
209
350
  // or if it's NOT a noisy JSON line
210
- const hasTimestamp = /^\[\d{4}-\d{2}-\d{2}T|\^\[\d{2}:\d{2}:\d{2}\]/.test(trimmed);
211
351
  const isJson = trimmed.startsWith('{') || trimmed.includes('{"type"');
212
352
  // Filter out heartbeats - they should NOT reset the idle timer
213
353
  const isHeartbeat = trimmed.includes('Heartbeat') && trimmed.includes('bytes received');
214
354
 
215
- if (trimmed && !isJson) {
355
+ if (!isJson) {
216
356
  // Only trigger activity for non-heartbeat lines
217
357
  if (onActivity && !isHeartbeat) onActivity();
218
- // If line alreedy has timestamp format, just add lane prefix
219
- if (hasTimestamp) {
220
- // Insert lane name after first timestamp
221
- const formatted = trimmed.replace(/^(\[[^\]]+\])/, `$1 ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset}`);
222
- process.stdout.write(formatted + '\n');
358
+
359
+ const currentLabel = getDynamicLabel();
360
+ const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
361
+
362
+ // Regex that matches timestamp even if it has ANSI color codes
363
+ // Matches: [24:39:14] or \x1b[90m[24:39:14]\x1b[0m
364
+ const timestampRegex = /^((?:\x1b\[[0-9;]*m)*)\[(\d{4}-\d{2}-\d{2}T|\d{2}:\d{2}:\d{2})\]/;
365
+ const tsMatch = trimmed.match(timestampRegex);
366
+
367
+ if (tsMatch) {
368
+ // If line already has timestamp format, just add lane prefix
369
+ // Check if lane label is already present to avoid triple duplication
370
+ if (!trimmed.includes(currentLabel)) {
371
+ // Insert label after the timestamp part
372
+ const tsPart = tsMatch[0];
373
+ const formatted = trimmed.replace(tsPart, `${tsPart} ${coloredLabel}`);
374
+ process.stdout.write(formatted + '\n');
375
+ } else {
376
+ process.stdout.write(trimmed + '\n');
377
+ }
223
378
  } else {
224
379
  // Add full prefix: timestamp + lane
225
- process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${line}\n`);
380
+ process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${coloredLabel} ${line}\n`);
226
381
  }
227
382
  }
228
383
  }
@@ -244,11 +399,14 @@ export function spawnLane({
244
399
  trimmed.includes('actual output');
245
400
 
246
401
  const ts = new Date().toLocaleTimeString('en-US', { hour12: false });
402
+ const currentLabel = getDynamicLabel();
403
+ const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
404
+
247
405
  if (isStatus) {
248
- process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${trimmed}\n`);
406
+ process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${trimmed}\n`);
249
407
  } else {
250
408
  if (onActivity) onActivity();
251
- process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${logger.COLORS.magenta}[${laneName}]${logger.COLORS.reset} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
409
+ process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
252
410
  }
253
411
  }
254
412
  }
@@ -259,9 +417,11 @@ export function spawnLane({
259
417
  child.on('exit', () => {
260
418
  logManager?.close();
261
419
  });
420
+
421
+ return { child, logPath, logManager, info };
262
422
  } else {
263
423
  // Fallback to simple file logging
264
- logPath = safeJoin(laneRunDir, 'terminal.log');
424
+ logPath = safeJoin(laneRunDir, 'terminal-readable.log');
265
425
  const logFd = fs.openSync(logPath, 'a');
266
426
 
267
427
  child = spawn('node', args, {
@@ -275,9 +435,19 @@ export function spawnLane({
275
435
  } catch {
276
436
  // Ignore
277
437
  }
438
+
439
+ return {
440
+ child,
441
+ logPath,
442
+ logManager,
443
+ info: {
444
+ child,
445
+ logPath,
446
+ statePath: safeJoin(laneRunDir, 'state.json'),
447
+ laneIndex
448
+ }
449
+ };
278
450
  }
279
-
280
- return { child, logPath, logManager };
281
451
  }
282
452
 
283
453
  /**
@@ -296,7 +466,7 @@ export function waitChild(proc: ChildProcess): Promise<number> {
296
466
  }
297
467
 
298
468
  /**
299
- * List lane task files in directory and load their configs for dependencies
469
+ * List lane task files in directory
300
470
  */
301
471
  export function listLaneFiles(tasksDir: string): LaneInfo[] {
302
472
  if (!fs.existsSync(tasksDir)) {
@@ -305,24 +475,15 @@ export function listLaneFiles(tasksDir: string): LaneInfo[] {
305
475
 
306
476
  const files = fs.readdirSync(tasksDir);
307
477
  return files
308
- .filter(f => f.endsWith('.json'))
478
+ .filter(f => f.endsWith('.json') && f !== 'flow.meta.json')
309
479
  .sort()
310
480
  .map(f => {
311
481
  const filePath = safeJoin(tasksDir, f);
312
482
  const name = path.basename(f, '.json');
313
- let dependsOn: string[] = [];
314
-
315
- try {
316
- const config = JSON.parse(fs.readFileSync(filePath, 'utf8')) as RunnerConfig;
317
- dependsOn = config.dependsOn || [];
318
- } catch (e) {
319
- logger.warn(`Failed to parse config for lane ${name}: ${e}`);
320
- }
321
483
 
322
484
  return {
323
485
  name,
324
486
  path: filePath,
325
- dependsOn,
326
487
  };
327
488
  });
328
489
  }
@@ -339,8 +500,7 @@ export function printLaneStatus(lanes: LaneInfo[], laneRunDirs: Record<string, s
339
500
  const state = loadState<LaneState>(statePath);
340
501
 
341
502
  if (!state) {
342
- const isWaiting = lane.dependsOn.length > 0;
343
- return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
503
+ return { lane: lane.name, status: 'pending', task: '-' };
344
504
  }
345
505
 
346
506
  const idx = (state.currentTaskIndex || 0) + 1;
@@ -388,12 +548,12 @@ async function resolveAllDependencies(
388
548
  const worktreeDir = state?.worktreeDir || safeJoin(runRoot, 'resolution-worktree');
389
549
 
390
550
  if (!fs.existsSync(worktreeDir)) {
391
- logger.info(`Creating resolution worktree at ${worktreeDir}`);
551
+ logger.info(`🏗️ Creating resolution worktree at ${worktreeDir}`);
392
552
  git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: git.getCurrentBranch() });
393
553
  }
394
554
 
395
555
  // 3. Resolve on pipeline branch
396
- logger.info(`Resolving dependencies on ${pipelineBranch}`);
556
+ logger.info(`🔄 Resolving dependencies on branch ${pipelineBranch}`);
397
557
  git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
398
558
 
399
559
  for (const cmd of uniqueCommands) {
@@ -474,7 +634,6 @@ export async function orchestrate(tasksDir: string, options: {
474
634
  noGit?: boolean;
475
635
  skipPreflight?: boolean;
476
636
  stallConfig?: Partial<StallDetectionConfig>;
477
- autoRecoveryConfig?: Partial<AutoRecoveryConfig>;
478
637
  } = {}): Promise<{ lanes: LaneInfo[]; exitCodes: Record<string, number>; runRoot: string }> {
479
638
  const lanes = listLaneFiles(tasksDir);
480
639
 
@@ -510,34 +669,11 @@ export async function orchestrate(tasksDir: string, options: {
510
669
  logger.success('✓ Preflight checks passed');
511
670
  }
512
671
 
513
- // Validate dependencies and detect cycles
514
- logger.section('📊 Dependency Analysis');
515
-
516
- const depInfos: DependencyInfo[] = lanes.map(l => ({
517
- name: l.name,
518
- dependsOn: l.dependsOn,
519
- }));
520
-
521
- const depValidation = validateDependencies(depInfos);
522
-
523
- if (!depValidation.valid) {
524
- logger.error('❌ Dependency validation failed:');
525
- for (const err of depValidation.errors) {
526
- logger.error(` • ${err}`);
527
- }
528
- throw new Error('Invalid dependency configuration');
529
- }
530
-
531
- if (depValidation.warnings.length > 0) {
532
- for (const warn of depValidation.warnings) {
533
- logger.warn(`⚠️ ${warn}`);
534
- }
535
- }
536
-
537
- // Print dependency graph
538
- printDependencyGraph(depInfos);
539
-
540
672
  const config = loadConfig();
673
+
674
+ // Set verbose git logging from config
675
+ git.setVerboseGit(config.verboseGit || false);
676
+
541
677
  const logsDir = getLogsDir(config);
542
678
  const runId = `run-${Date.now()}`;
543
679
  // Use absolute path for runRoot to avoid issues with subfolders
@@ -561,17 +697,11 @@ export async function orchestrate(tasksDir: string, options: {
561
697
  const randomSuffix = Math.random().toString(36).substring(2, 7);
562
698
  const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
563
699
 
564
- // Stall detection configuration
565
- const stallConfig: StallDetectionConfig = {
700
+ // Initialize unified stall detection service (Single Source of Truth)
701
+ const stallService = getStallService({
566
702
  ...DEFAULT_ORCHESTRATOR_STALL_CONFIG,
567
703
  ...options.stallConfig,
568
- };
569
-
570
- // Initialize auto-recovery manager
571
- const autoRecoveryManager = getAutoRecoveryManager({
572
- ...DEFAULT_AUTO_RECOVERY_CONFIG,
573
- idleTimeoutMs: stallConfig.idleTimeoutMs, // Sync with stall config
574
- ...options.autoRecoveryConfig,
704
+ verbose: process.env['DEBUG_STALL'] === 'true',
575
705
  });
576
706
 
577
707
  // Initialize event system
@@ -632,6 +762,7 @@ export async function orchestrate(tasksDir: string, options: {
632
762
 
633
763
  laneWorktreeDirs[lane.name] = laneWorktreeDir;
634
764
 
765
+ logger.info(`🏗️ Initializing lane ${lane.name}: branch=${lanePipelineBranch}`);
635
766
  const initialState = createLaneState(lane.name, taskConfig, lane.path, {
636
767
  pipelineBranch: lanePipelineBranch,
637
768
  worktreeDir: laneWorktreeDir
@@ -647,21 +778,6 @@ export async function orchestrate(tasksDir: string, options: {
647
778
  logger.info(`Run directory: ${runRoot}`);
648
779
  logger.info(`Lanes: ${lanes.length}`);
649
780
 
650
- // Display dependency graph
651
- logger.info('\n📊 Dependency Graph:');
652
- for (const lane of lanes) {
653
- const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
654
- console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
655
-
656
- // Simple tree-like visualization for deep dependencies
657
- if (lane.dependsOn.length > 0) {
658
- for (const dep of lane.dependsOn) {
659
- console.log(` └─ ${dep}`);
660
- }
661
- }
662
- }
663
- console.log('');
664
-
665
781
  // Disable auto-resolve when noGit mode is enabled
666
782
  const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
667
783
 
@@ -696,29 +812,12 @@ export async function orchestrate(tasksDir: string, options: {
696
812
 
697
813
  try {
698
814
  while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
699
- // 1. Identify lanes ready to start
815
+ // 1. Identify lanes ready to start (all lanes can start immediately - no lane-level dependencies)
700
816
  const readyToStart = lanes.filter(lane => {
701
817
  // Not already running or completed or failed or blocked
702
818
  if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
703
819
  return false;
704
820
  }
705
-
706
- // Check dependencies
707
- for (const dep of lane.dependsOn) {
708
- if (failedLanes.has(dep)) {
709
- logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
710
- failedLanes.add(lane.name);
711
- exitCodes[lane.name] = 1;
712
- return false;
713
- }
714
- if (blockedLanes.has(dep)) {
715
- // If a dependency is blocked, wait
716
- return false;
717
- }
718
- if (!completedLanes.has(dep)) {
719
- return false;
720
- }
721
- }
722
821
  return true;
723
822
  });
724
823
 
@@ -737,23 +836,23 @@ export async function orchestrate(tasksDir: string, options: {
737
836
  logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
738
837
 
739
838
  const now = Date.now();
740
- // Pre-register lane in running map so onActivity can find it immediately
839
+
840
+ // Register lane with unified stall detection service FIRST
841
+ stallService.registerLane(lane.name, {
842
+ laneRunDir: laneRunDirs[lane.name]!,
843
+ });
844
+
845
+ const laneIdx = lanes.findIndex(l => l.name === lane.name);
846
+
847
+ // Pre-register lane in running map
741
848
  running.set(lane.name, {
742
849
  child: {} as any, // Placeholder, will be replaced below
743
850
  logManager: undefined,
744
851
  logPath: '',
745
- lastActivity: now,
746
- lastStateUpdate: now,
747
- stallPhase: 0,
748
- taskStartTime: now,
749
- lastOutput: '',
750
852
  statePath: laneStatePath,
751
- bytesReceived: 0,
752
- lastBytesCheck: 0,
753
- continueSignalsSent: 0,
853
+ laneIndex: laneIdx >= 0 ? laneIdx : 0,
754
854
  });
755
855
 
756
- let lastOutput = '';
757
856
  const spawnResult = spawnLane({
758
857
  laneName: lane.name,
759
858
  tasksFile: lane.path,
@@ -764,55 +863,40 @@ export async function orchestrate(tasksDir: string, options: {
764
863
  worktreeDir: laneWorktreeDirs[lane.name],
765
864
  enhancedLogConfig: options.enhancedLogging,
766
865
  noGit: options.noGit,
866
+ laneIndex: laneIdx >= 0 ? laneIdx : 0,
767
867
  onActivity: () => {
768
- const info = running.get(lane.name);
769
- if (info) {
770
- const actNow = Date.now();
771
- info.lastActivity = actNow;
772
- info.lastStateUpdate = actNow;
773
- info.stallPhase = 0;
774
- }
868
+ // Record state file update activity
869
+ stallService.recordStateUpdate(lane.name);
775
870
  }
776
871
  });
777
872
 
778
873
  // Update with actual spawn result
779
874
  const existingInfo = running.get(lane.name)!;
780
- Object.assign(existingInfo, spawnResult);
875
+ Object.assign(existingInfo, spawnResult.info);
876
+
877
+ // Update stall service with child process reference
878
+ stallService.setChildProcess(lane.name, spawnResult.child);
781
879
 
782
- // Track last output and bytes received for long operation and stall detection
880
+ // Track stdout for activity detection - delegate to StallDetectionService
783
881
  if (spawnResult.child.stdout) {
784
882
  spawnResult.child.stdout.on('data', (data: Buffer) => {
785
- const info = running.get(lane.name);
786
- if (info) {
787
- const output = data.toString();
788
- const lines = output.split('\n').filter(l => l.trim());
789
-
790
- // Filter out heartbeats from activity tracking to avoid resetting stall detection
791
- const realLines = lines.filter(line => !(line.includes('Heartbeat') && line.includes('bytes received')));
792
-
793
- if (realLines.length > 0) {
794
- // Real activity detected - update lastActivity to reset stall timer
795
- const actNow = Date.now();
796
- info.lastActivity = actNow;
797
- info.stallPhase = 0; // Reset stall phase on real activity
798
-
799
- const lastRealLine = realLines[realLines.length - 1]!;
800
- info.lastOutput = lastRealLine;
801
- info.bytesReceived += data.length;
802
-
803
- // Update auto-recovery manager with real activity
804
- autoRecoveryManager.recordActivity(lane.name, data.length, info.lastOutput);
805
- } else if (lines.length > 0) {
806
- // Only heartbeats received - do NOT update lastActivity (keep stall timer running)
807
- autoRecoveryManager.recordActivity(lane.name, 0, info.lastOutput);
808
- }
883
+ const output = data.toString();
884
+ const lines = output.split('\n').filter(l => l.trim());
885
+
886
+ // Filter out heartbeats from activity tracking
887
+ const realLines = lines.filter(line => !(line.includes('Heartbeat') && line.includes('bytes received')));
888
+
889
+ if (realLines.length > 0) {
890
+ // Real activity - record with bytes
891
+ const lastRealLine = realLines[realLines.length - 1]!;
892
+ stallService.recordActivity(lane.name, data.length, lastRealLine);
893
+ } else if (lines.length > 0) {
894
+ // Heartbeat only - record with 0 bytes (won't reset timer)
895
+ stallService.recordActivity(lane.name, 0);
809
896
  }
810
897
  });
811
898
  }
812
899
 
813
- // Register lane with auto-recovery manager
814
- autoRecoveryManager.registerLane(lane.name);
815
-
816
900
  // Update lane tracking
817
901
  lane.taskStartTime = now;
818
902
 
@@ -843,234 +927,47 @@ export async function orchestrate(tasksDir: string, options: {
843
927
  if (result.name === '__poll__' || (now - lastStallCheck >= 10000)) {
844
928
  lastStallCheck = now;
845
929
 
846
- // Periodic stall check with multi-layer detection and escalating recovery
930
+ // Periodic stall check using unified StallDetectionService
847
931
  for (const [laneName, info] of running.entries()) {
848
- const idleTime = now - info.lastActivity;
849
932
  const lane = lanes.find(l => l.name === laneName)!;
850
933
 
851
- if (process.env['DEBUG_STALL']) {
852
- logger.debug(`[${laneName}] Stall check: idle=${Math.round(idleTime/1000)}s, bytesDelta=${info.bytesReceived - info.lastBytesCheck}, phase=${info.stallPhase}`);
853
- }
854
-
855
934
  // Check state file for progress updates
856
- let progressTime = 0;
857
935
  try {
858
936
  const stateStat = fs.statSync(info.statePath);
859
- const stateUpdateTime = stateStat.mtimeMs;
860
- if (stateUpdateTime > info.lastStateUpdate) {
861
- info.lastStateUpdate = stateUpdateTime;
937
+ const stallState = stallService.getState(laneName);
938
+ if (stallState && stateStat.mtimeMs > stallState.lastStateUpdateTime) {
939
+ stallService.recordStateUpdate(laneName);
862
940
  }
863
- progressTime = now - info.lastStateUpdate;
864
941
  } catch {
865
942
  // State file might not exist yet
866
943
  }
867
944
 
868
- // Calculate bytes received since last check
869
- const bytesDelta = info.bytesReceived - info.lastBytesCheck;
870
- info.lastBytesCheck = info.bytesReceived;
945
+ // Debug logging
946
+ if (process.env['DEBUG_STALL']) {
947
+ logger.debug(`[${laneName}] ${stallService.dumpState(laneName)}`);
948
+ }
871
949
 
872
- // Use multi-layer stall analysis with enhanced context
873
- const analysis = analyzeStall({
874
- stallPhase: info.stallPhase,
875
- idleTimeMs: idleTime,
876
- progressTimeMs: progressTime,
877
- lastOutput: info.lastOutput,
878
- restartCount: lane.restartCount || 0,
879
- taskStartTimeMs: info.taskStartTime,
880
- bytesReceived: bytesDelta, // Bytes since last check
881
- continueSignalsSent: info.continueSignalsSent,
882
- }, stallConfig);
950
+ // Run stall analysis and recovery (all logic is in StallDetectionService)
951
+ const analysis = stallService.checkAndRecover(laneName);
883
952
 
884
- // Only act if action is not NONE
953
+ // Log to lane log manager if there was an action
885
954
  if (analysis.action !== RecoveryAction.NONE) {
886
- logFailure(laneName, analysis);
887
955
  info.logManager?.log('error', analysis.message);
888
-
889
- if (analysis.action === RecoveryAction.CONTINUE_SIGNAL) {
890
- const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
891
- try {
892
- fs.writeFileSync(interventionPath, 'continue');
893
- info.stallPhase = 1;
894
- info.lastActivity = now;
895
- info.continueSignalsSent++;
896
- logger.info(`[${laneName}] Sent continue signal (#${info.continueSignalsSent})`);
897
-
898
- events.emit('recovery.continue_signal', {
899
- laneName,
900
- idleSeconds: Math.round(idleTime / 1000),
901
- signalCount: info.continueSignalsSent,
902
- });
903
- } catch (e) {
904
- logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
905
- }
906
- } else if (analysis.action === RecoveryAction.STRONGER_PROMPT) {
907
- const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
908
- const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck. Please continue with your current task immediately. If you're waiting for something, explain what you need and proceed with what you can do now. If you've completed the task, summarize your work and finish.`;
909
- try {
910
- fs.writeFileSync(interventionPath, strongerPrompt);
911
- info.stallPhase = 2;
912
- info.lastActivity = now;
913
- logger.warn(`[${laneName}] Sent stronger prompt after continue signal failed`);
914
-
915
- events.emit('recovery.stronger_prompt', { laneName });
916
- } catch (e) {
917
- logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
918
- }
919
- } else if (analysis.action === RecoveryAction.KILL_AND_RESTART ||
920
- analysis.action === RecoveryAction.RESTART_LANE ||
921
- analysis.action === RecoveryAction.RESTART_LANE_FROM_CHECKPOINT) {
922
- lane.restartCount = (lane.restartCount || 0) + 1;
923
- info.stallPhase = 3;
924
-
925
- // Try to get checkpoint info
926
- const checkpoint = getLatestCheckpoint(laneRunDirs[laneName]!);
927
- if (checkpoint) {
928
- logger.info(`[${laneName}] Checkpoint available: ${checkpoint.id} (task ${checkpoint.taskIndex})`);
929
- }
930
-
931
- // Kill the process
932
- try {
933
- info.child.kill('SIGKILL');
934
- } catch {
935
- // Process might already be dead
936
- }
937
-
938
- logger.warn(`[${laneName}] Killing and restarting lane (restart #${lane.restartCount})`);
939
-
940
- events.emit('recovery.restart', {
941
- laneName,
942
- restartCount: lane.restartCount,
943
- maxRestarts: stallConfig.maxRestarts,
944
- });
945
- } else if (analysis.action === RecoveryAction.RUN_DOCTOR) {
946
- info.stallPhase = 4;
947
-
948
- // Run diagnostics
949
- logger.error(`[${laneName}] Running diagnostics due to persistent failures...`);
950
-
951
- // Import health check dynamically to avoid circular dependency
952
- const { checkAgentHealth, checkAuthHealth } = await import('../utils/health');
953
-
954
- const [agentHealth, authHealth] = await Promise.all([
955
- checkAgentHealth(),
956
- checkAuthHealth(),
957
- ]);
958
-
959
- const issues: string[] = [];
960
- if (!agentHealth.ok) issues.push(`Agent: ${agentHealth.message}`);
961
- if (!authHealth.ok) issues.push(`Auth: ${authHealth.message}`);
962
-
963
- if (issues.length > 0) {
964
- logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
965
- } else {
966
- logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
967
- }
968
-
969
- // Save diagnostic to file
970
- const diagnosticPath = safeJoin(laneRunDirs[laneName]!, 'diagnostic.json');
971
- fs.writeFileSync(diagnosticPath, JSON.stringify({
972
- timestamp: Date.now(),
973
- agentHealthy: agentHealth.ok,
974
- authHealthy: authHealth.ok,
975
- issues,
976
- analysis,
977
- }, null, 2));
978
-
979
- // Kill the process
980
- try {
981
- info.child.kill('SIGKILL');
982
- } catch {
983
- // Process might already be dead
984
- }
985
-
986
- logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
987
-
988
- // Save POF for failed recovery
989
- const recoveryState = autoRecoveryManager.getState(laneName);
990
- if (recoveryState) {
991
- try {
992
- const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
993
- const laneState = loadState<LaneState>(laneStatePath);
994
- const pofDir = safeJoin(runRoot, '..', '..', 'pof');
995
- const diagnosticInfo = {
996
- timestamp: Date.now(),
997
- agentHealthy: agentHealth.ok,
998
- authHealthy: authHealth.ok,
999
- systemHealthy: true,
1000
- suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
1001
- details: issues.join('\n') || 'No obvious issues found',
1002
- };
1003
- const pofEntry = createPOFFromRecoveryState(
1004
- runId,
1005
- runRoot,
1006
- laneName,
1007
- recoveryState,
1008
- laneState,
1009
- diagnosticInfo
1010
- );
1011
- savePOF(runId, pofDir, pofEntry);
1012
- } catch (pofError: any) {
1013
- logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
1014
- }
1015
- }
1016
-
1017
- events.emit('recovery.diagnosed', {
1018
- laneName,
1019
- diagnostic: { agentHealthy: agentHealth.ok, authHealthy: authHealth.ok, issues },
1020
- });
1021
- } else if (analysis.action === RecoveryAction.ABORT_LANE) {
1022
- info.stallPhase = 5;
1023
-
1024
- try {
1025
- info.child.kill('SIGKILL');
1026
- } catch {
1027
- // Process might already be dead
1028
- }
1029
-
1030
- logger.error(`[${laneName}] Aborting lane due to repeated stalls`);
1031
-
1032
- // Save POF for failed recovery
1033
- const recoveryState = autoRecoveryManager.getState(laneName);
1034
- if (recoveryState) {
1035
- try {
1036
- const laneStatePath = safeJoin(laneRunDirs[laneName]!, 'state.json');
1037
- const laneState = loadState<LaneState>(laneStatePath);
1038
- const pofDir = safeJoin(runRoot, '..', '..', 'pof');
1039
- const pofEntry = createPOFFromRecoveryState(
1040
- runId,
1041
- runRoot,
1042
- laneName,
1043
- recoveryState,
1044
- laneState,
1045
- recoveryState.diagnosticInfo
1046
- );
1047
- savePOF(runId, pofDir, pofEntry);
1048
- } catch (pofError: any) {
1049
- logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
1050
- }
1051
- }
1052
- } else if (analysis.action === RecoveryAction.SEND_GIT_GUIDANCE) {
1053
- // Send guidance message to agent for git issues
1054
- const interventionPath = safeJoin(laneRunDirs[laneName]!, 'intervention.txt');
1055
-
1056
- // Determine which guidance to send based on the failure type
1057
- let guidance: string;
1058
- if (analysis.type === FailureType.GIT_PUSH_REJECTED) {
1059
- guidance = getGitPushFailureGuidance();
1060
- } else if (analysis.type === FailureType.MERGE_CONFLICT) {
1061
- guidance = getMergeConflictGuidance();
1062
- } else {
1063
- guidance = getGitErrorGuidance(analysis.message);
1064
- }
1065
-
1066
- try {
1067
- fs.writeFileSync(interventionPath, guidance);
1068
- info.lastActivity = now;
1069
- logger.info(`[${laneName}] Sent git issue guidance to agent`);
1070
- } catch (e: any) {
1071
- logger.error(`[${laneName}] Failed to send guidance: ${e.message}`);
1072
- }
956
+
957
+ // Handle special case: RUN_DOCTOR needs async operations
958
+ if (analysis.action === RecoveryAction.RUN_DOCTOR) {
959
+ await handleDoctorDiagnostics(
960
+ laneName,
961
+ laneRunDirs[laneName]!,
962
+ runId,
963
+ runRoot,
964
+ stallService,
965
+ info.child
966
+ );
1073
967
  }
968
+
969
+ // Sync restartCount back to lane info (for restart logic in process exit handler)
970
+ lane.restartCount = stallService.getRestartCount(laneName);
1074
971
  }
1075
972
  }
1076
973
  continue;
@@ -1080,8 +977,11 @@ export async function orchestrate(tasksDir: string, options: {
1080
977
  running.delete(finished.name);
1081
978
  exitCodes[finished.name] = finished.code;
1082
979
 
1083
- // Unregister from auto-recovery manager
1084
- autoRecoveryManager.unregisterLane(finished.name);
980
+ // Get stall state before unregistering
981
+ const stallPhase = stallService.getPhase(finished.name);
982
+
983
+ // Unregister from stall detection service
984
+ stallService.unregisterLane(finished.name);
1085
985
 
1086
986
  if (finished.code === 0) {
1087
987
  completedLanes.add(finished.name);
@@ -1111,8 +1011,8 @@ export async function orchestrate(tasksDir: string, options: {
1111
1011
  logger.error(`Lane ${finished.name} exited with code 2 but no dependency request found`);
1112
1012
  }
1113
1013
  } else {
1114
- // Check if it was a restart request
1115
- if (info.stallPhase === 2) {
1014
+ // Check if it was a restart request (RESTART_REQUESTED phase)
1015
+ if (stallPhase === StallPhase.RESTART_REQUESTED) {
1116
1016
  logger.info(`🔄 Lane ${finished.name} is being restarted due to stall...`);
1117
1017
 
1118
1018
  // Update startIndex from current state to resume from the same task
@@ -1133,7 +1033,7 @@ export async function orchestrate(tasksDir: string, options: {
1133
1033
  failedLanes.add(finished.name);
1134
1034
 
1135
1035
  let errorMsg = 'Process exited with non-zero code';
1136
- if (info.stallPhase === 3) {
1036
+ if (stallPhase >= StallPhase.DIAGNOSED) {
1137
1037
  errorMsg = 'Stopped due to repeated stall';
1138
1038
  } else if (info.logManager) {
1139
1039
  const lastError = info.logManager.getLastError();