@litmers/cursorflow-orchestrator 0.1.31 → 0.1.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -52
- package/commands/cursorflow-add.md +159 -0
- package/commands/cursorflow-monitor.md +23 -2
- package/commands/cursorflow-new.md +87 -0
- package/dist/cli/add.d.ts +7 -0
- package/dist/cli/add.js +377 -0
- package/dist/cli/add.js.map +1 -0
- package/dist/cli/clean.js +1 -0
- package/dist/cli/clean.js.map +1 -1
- package/dist/cli/config.d.ts +7 -0
- package/dist/cli/config.js +181 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/index.js +34 -30
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/logs.js +7 -33
- package/dist/cli/logs.js.map +1 -1
- package/dist/cli/monitor.js +51 -62
- package/dist/cli/monitor.js.map +1 -1
- package/dist/cli/new.d.ts +7 -0
- package/dist/cli/new.js +232 -0
- package/dist/cli/new.js.map +1 -0
- package/dist/cli/prepare.js +95 -193
- package/dist/cli/prepare.js.map +1 -1
- package/dist/cli/resume.js +11 -47
- package/dist/cli/resume.js.map +1 -1
- package/dist/cli/run.js +27 -22
- package/dist/cli/run.js.map +1 -1
- package/dist/cli/tasks.js +1 -2
- package/dist/cli/tasks.js.map +1 -1
- package/dist/core/failure-policy.d.ts +9 -0
- package/dist/core/failure-policy.js +9 -0
- package/dist/core/failure-policy.js.map +1 -1
- package/dist/core/orchestrator.d.ts +20 -6
- package/dist/core/orchestrator.js +213 -333
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/runner/agent.d.ts +27 -0
- package/dist/core/runner/agent.js +294 -0
- package/dist/core/runner/agent.js.map +1 -0
- package/dist/core/runner/index.d.ts +5 -0
- package/dist/core/runner/index.js +22 -0
- package/dist/core/runner/index.js.map +1 -0
- package/dist/core/runner/pipeline.d.ts +9 -0
- package/dist/core/runner/pipeline.js +539 -0
- package/dist/core/runner/pipeline.js.map +1 -0
- package/dist/core/runner/prompt.d.ts +25 -0
- package/dist/core/runner/prompt.js +175 -0
- package/dist/core/runner/prompt.js.map +1 -0
- package/dist/core/runner/task.d.ts +26 -0
- package/dist/core/runner/task.js +283 -0
- package/dist/core/runner/task.js.map +1 -0
- package/dist/core/runner/utils.d.ts +37 -0
- package/dist/core/runner/utils.js +161 -0
- package/dist/core/runner/utils.js.map +1 -0
- package/dist/core/runner.d.ts +2 -96
- package/dist/core/runner.js +11 -1136
- package/dist/core/runner.js.map +1 -1
- package/dist/core/stall-detection.d.ts +326 -0
- package/dist/core/stall-detection.js +781 -0
- package/dist/core/stall-detection.js.map +1 -0
- package/dist/types/config.d.ts +6 -6
- package/dist/types/flow.d.ts +84 -0
- package/dist/types/flow.js +10 -0
- package/dist/types/flow.js.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +3 -3
- package/dist/types/index.js.map +1 -1
- package/dist/types/lane.d.ts +0 -2
- package/dist/types/logging.d.ts +5 -1
- package/dist/types/task.d.ts +7 -11
- package/dist/utils/config.js +7 -15
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/dependency.d.ts +36 -1
- package/dist/utils/dependency.js +256 -1
- package/dist/utils/dependency.js.map +1 -1
- package/dist/utils/enhanced-logger.d.ts +45 -82
- package/dist/utils/enhanced-logger.js +238 -844
- package/dist/utils/enhanced-logger.js.map +1 -1
- package/dist/utils/git.d.ts +29 -0
- package/dist/utils/git.js +115 -5
- package/dist/utils/git.js.map +1 -1
- package/dist/utils/state.js +0 -2
- package/dist/utils/state.js.map +1 -1
- package/dist/utils/task-service.d.ts +2 -2
- package/dist/utils/task-service.js +40 -31
- package/dist/utils/task-service.js.map +1 -1
- package/package.json +4 -3
- package/src/cli/add.ts +397 -0
- package/src/cli/clean.ts +1 -0
- package/src/cli/config.ts +177 -0
- package/src/cli/index.ts +36 -32
- package/src/cli/logs.ts +7 -31
- package/src/cli/monitor.ts +55 -71
- package/src/cli/new.ts +235 -0
- package/src/cli/prepare.ts +98 -205
- package/src/cli/resume.ts +13 -56
- package/src/cli/run.ts +311 -306
- package/src/cli/tasks.ts +1 -2
- package/src/core/failure-policy.ts +9 -0
- package/src/core/orchestrator.ts +277 -378
- package/src/core/runner/agent.ts +314 -0
- package/src/core/runner/index.ts +6 -0
- package/src/core/runner/pipeline.ts +567 -0
- package/src/core/runner/prompt.ts +174 -0
- package/src/core/runner/task.ts +320 -0
- package/src/core/runner/utils.ts +142 -0
- package/src/core/runner.ts +8 -1347
- package/src/core/stall-detection.ts +936 -0
- package/src/types/config.ts +6 -6
- package/src/types/flow.ts +91 -0
- package/src/types/index.ts +15 -3
- package/src/types/lane.ts +0 -2
- package/src/types/logging.ts +5 -1
- package/src/types/task.ts +7 -11
- package/src/utils/config.ts +8 -16
- package/src/utils/dependency.ts +311 -2
- package/src/utils/enhanced-logger.ts +263 -927
- package/src/utils/git.ts +145 -5
- package/src/utils/state.ts +0 -2
- package/src/utils/task-service.ts +48 -40
- package/commands/cursorflow-review.md +0 -56
- package/commands/cursorflow-runs.md +0 -59
- package/dist/cli/runs.d.ts +0 -5
- package/dist/cli/runs.js +0 -214
- package/dist/cli/runs.js.map +0 -1
- package/dist/core/reviewer.d.ts +0 -66
- package/dist/core/reviewer.js +0 -265
- package/dist/core/reviewer.js.map +0 -1
- package/src/cli/runs.ts +0 -212
- package/src/core/reviewer.ts +0 -285
|
@@ -60,15 +60,12 @@ const child_process_2 = require("child_process");
|
|
|
60
60
|
const path_1 = require("../utils/path");
|
|
61
61
|
const enhanced_logger_1 = require("../utils/enhanced-logger");
|
|
62
62
|
const log_formatter_1 = require("../utils/log-formatter");
|
|
63
|
-
const failure_policy_1 = require("./failure-policy");
|
|
64
63
|
const auto_recovery_1 = require("./auto-recovery");
|
|
65
|
-
const
|
|
64
|
+
const stall_detection_1 = require("./stall-detection");
|
|
66
65
|
const health_1 = require("../utils/health");
|
|
67
|
-
const checkpoint_1 = require("../utils/checkpoint");
|
|
68
66
|
const lock_1 = require("../utils/lock");
|
|
69
67
|
/** Default stall detection configuration - 2 minute idle timeout for recovery */
|
|
70
68
|
const DEFAULT_ORCHESTRATOR_STALL_CONFIG = {
|
|
71
|
-
...failure_policy_1.DEFAULT_STALL_CONFIG,
|
|
72
69
|
idleTimeoutMs: 2 * 60 * 1000, // 2 minutes (idle detection for continue signal)
|
|
73
70
|
progressTimeoutMs: 10 * 60 * 1000, // 10 minutes (only triggers if no activity at all)
|
|
74
71
|
maxRestarts: 2,
|
|
@@ -94,10 +91,93 @@ function logFileTail(filePath, lines = 10) {
|
|
|
94
91
|
// Ignore log reading errors
|
|
95
92
|
}
|
|
96
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Handle RUN_DOCTOR action - runs async health diagnostics
|
|
96
|
+
*/
|
|
97
|
+
async function handleDoctorDiagnostics(laneName, laneRunDir, runId, runRoot, stallService, child) {
|
|
98
|
+
// Import health check dynamically to avoid circular dependency
|
|
99
|
+
const { checkAgentHealth, checkAuthHealth } = await Promise.resolve().then(() => __importStar(require('../utils/health')));
|
|
100
|
+
const [agentHealth, authHealth] = await Promise.all([
|
|
101
|
+
checkAgentHealth(),
|
|
102
|
+
checkAuthHealth(),
|
|
103
|
+
]);
|
|
104
|
+
const issues = [];
|
|
105
|
+
if (!agentHealth.ok)
|
|
106
|
+
issues.push(`Agent: ${agentHealth.message}`);
|
|
107
|
+
if (!authHealth.ok)
|
|
108
|
+
issues.push(`Auth: ${authHealth.message}`);
|
|
109
|
+
if (issues.length > 0) {
|
|
110
|
+
logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
|
|
114
|
+
}
|
|
115
|
+
// Save diagnostic to file
|
|
116
|
+
const diagnosticPath = (0, path_1.safeJoin)(laneRunDir, 'diagnostic.json');
|
|
117
|
+
fs.writeFileSync(diagnosticPath, JSON.stringify({
|
|
118
|
+
timestamp: Date.now(),
|
|
119
|
+
agentHealthy: agentHealth.ok,
|
|
120
|
+
authHealthy: authHealth.ok,
|
|
121
|
+
issues,
|
|
122
|
+
}, null, 2));
|
|
123
|
+
// Kill the process
|
|
124
|
+
try {
|
|
125
|
+
child.kill('SIGKILL');
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Process might already be dead
|
|
129
|
+
}
|
|
130
|
+
logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
|
|
131
|
+
// Save POF for failed recovery
|
|
132
|
+
const stallState = stallService.getState(laneName);
|
|
133
|
+
if (stallState) {
|
|
134
|
+
try {
|
|
135
|
+
const laneStatePath = (0, path_1.safeJoin)(laneRunDir, 'state.json');
|
|
136
|
+
const laneState = (0, state_1.loadState)(laneStatePath);
|
|
137
|
+
const pofDir = (0, path_1.safeJoin)(runRoot, '..', '..', 'pof');
|
|
138
|
+
// Convert stall state to recovery state format for POF
|
|
139
|
+
// Note: StallPhase and RecoveryStage have compatible numeric values (0-5)
|
|
140
|
+
const recoveryState = {
|
|
141
|
+
laneName,
|
|
142
|
+
stage: stallState.phase, // Both enums use 0-5
|
|
143
|
+
lastActivityTime: stallState.lastRealActivityTime,
|
|
144
|
+
lastBytesReceived: stallState.bytesSinceLastCheck,
|
|
145
|
+
totalBytesReceived: stallState.totalBytesReceived,
|
|
146
|
+
lastOutput: stallState.lastOutput,
|
|
147
|
+
restartCount: stallState.restartCount,
|
|
148
|
+
continueSignalsSent: stallState.continueSignalCount,
|
|
149
|
+
lastStageChangeTime: stallState.lastPhaseChangeTime,
|
|
150
|
+
isLongOperation: stallState.isLongOperation,
|
|
151
|
+
failureHistory: stallState.failureHistory.map(f => ({
|
|
152
|
+
timestamp: f.timestamp,
|
|
153
|
+
stage: f.phase, // Both enums use 0-5
|
|
154
|
+
action: f.action,
|
|
155
|
+
message: f.message,
|
|
156
|
+
idleTimeMs: f.idleTimeMs,
|
|
157
|
+
bytesReceived: f.bytesReceived,
|
|
158
|
+
lastOutput: f.lastOutput,
|
|
159
|
+
})),
|
|
160
|
+
};
|
|
161
|
+
const diagnosticInfo = {
|
|
162
|
+
timestamp: Date.now(),
|
|
163
|
+
agentHealthy: agentHealth.ok,
|
|
164
|
+
authHealthy: authHealth.ok,
|
|
165
|
+
systemHealthy: true,
|
|
166
|
+
suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
|
|
167
|
+
details: issues.join('\n') || 'No obvious issues found',
|
|
168
|
+
};
|
|
169
|
+
const pofEntry = (0, auto_recovery_1.createPOFFromRecoveryState)(runId, runRoot, laneName, recoveryState, laneState, diagnosticInfo);
|
|
170
|
+
(0, auto_recovery_1.savePOF)(runId, pofDir, pofEntry);
|
|
171
|
+
}
|
|
172
|
+
catch (pofError) {
|
|
173
|
+
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
97
177
|
/**
|
|
98
178
|
* Spawn a lane process
|
|
99
179
|
*/
|
|
100
|
-
function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0, pipelineBranch, worktreeDir, enhancedLogConfig, noGit = false, onActivity, }) {
|
|
180
|
+
function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0, pipelineBranch, worktreeDir, enhancedLogConfig, noGit = false, onActivity, laneIndex = 0, }) {
|
|
101
181
|
fs.mkdirSync(laneRunDir, { recursive: true });
|
|
102
182
|
// Use extension-less resolve to handle both .ts (dev) and .js (dist)
|
|
103
183
|
const runnerPath = require.resolve('./runner');
|
|
@@ -127,17 +207,23 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
127
207
|
...process.env,
|
|
128
208
|
};
|
|
129
209
|
if (logConfig.enabled) {
|
|
210
|
+
// Helper to get dynamic lane label like [L01-T01-laneName]
|
|
211
|
+
const getDynamicLabel = () => {
|
|
212
|
+
const laneNum = `L${(laneIndex + 1).toString().padStart(2, '0')}`;
|
|
213
|
+
const taskPart = info.currentTaskIndex ? `-T${info.currentTaskIndex.toString().padStart(2, '0')}` : '';
|
|
214
|
+
return `[${laneNum}${taskPart}-${laneName}]`;
|
|
215
|
+
};
|
|
130
216
|
// Create callback for clean console output
|
|
131
217
|
const onParsedMessage = (msg) => {
|
|
132
218
|
if (onActivity)
|
|
133
219
|
onActivity();
|
|
134
220
|
const formatted = (0, log_formatter_1.formatMessageForConsole)(msg, {
|
|
135
|
-
laneLabel:
|
|
221
|
+
laneLabel: getDynamicLabel(),
|
|
136
222
|
includeTimestamp: true
|
|
137
223
|
});
|
|
138
224
|
process.stdout.write(formatted + '\n');
|
|
139
225
|
};
|
|
140
|
-
logManager = (0, enhanced_logger_1.createLogManager)(laneRunDir, laneName, logConfig, onParsedMessage);
|
|
226
|
+
logManager = (0, enhanced_logger_1.createLogManager)(laneRunDir, laneName, logConfig, onParsedMessage, laneIndex);
|
|
141
227
|
logPath = logManager.getLogPaths().clean;
|
|
142
228
|
// Spawn with pipe for enhanced logging
|
|
143
229
|
child = (0, child_process_1.spawn)('node', args, {
|
|
@@ -145,6 +231,15 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
145
231
|
env: childEnv,
|
|
146
232
|
detached: false,
|
|
147
233
|
});
|
|
234
|
+
// Initialize info object for stdout handler to use
|
|
235
|
+
const info = {
|
|
236
|
+
child,
|
|
237
|
+
logManager,
|
|
238
|
+
logPath,
|
|
239
|
+
statePath: (0, path_1.safeJoin)(laneRunDir, 'state.json'),
|
|
240
|
+
laneIndex,
|
|
241
|
+
currentTaskIndex: startIndex > 0 ? startIndex + 1 : 0
|
|
242
|
+
};
|
|
148
243
|
// Buffer for non-JSON lines
|
|
149
244
|
let lineBuffer = '';
|
|
150
245
|
// Pipe stdout and stderr through enhanced logger
|
|
@@ -158,25 +253,50 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
158
253
|
lineBuffer = lines.pop() || '';
|
|
159
254
|
for (const line of lines) {
|
|
160
255
|
const trimmed = line.trim();
|
|
256
|
+
if (!trimmed)
|
|
257
|
+
continue;
|
|
258
|
+
// Detect task start/progress to update label
|
|
259
|
+
// Example: [1/1] hello-task
|
|
260
|
+
const cleanLine = (0, enhanced_logger_1.stripAnsi)(trimmed);
|
|
261
|
+
const taskMatch = cleanLine.match(/^\s*\[(\d+)\/(\d+)\]\s+(.+)$/);
|
|
262
|
+
if (taskMatch) {
|
|
263
|
+
info.currentTaskIndex = parseInt(taskMatch[1]);
|
|
264
|
+
// Update log manager's task index to keep it in sync for readable log
|
|
265
|
+
if (logManager) {
|
|
266
|
+
logManager.setTask(taskMatch[3].trim(), undefined, info.currentTaskIndex - 1);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
161
269
|
// Show if it's a timestamped log line (starts with [YYYY-MM-DD... or [HH:MM:SS])
|
|
162
270
|
// or if it's NOT a noisy JSON line
|
|
163
|
-
const hasTimestamp = /^\[\d{4}-\d{2}-\d{2}T|\^\[\d{2}:\d{2}:\d{2}\]/.test(trimmed);
|
|
164
271
|
const isJson = trimmed.startsWith('{') || trimmed.includes('{"type"');
|
|
165
272
|
// Filter out heartbeats - they should NOT reset the idle timer
|
|
166
273
|
const isHeartbeat = trimmed.includes('Heartbeat') && trimmed.includes('bytes received');
|
|
167
|
-
if (
|
|
274
|
+
if (!isJson) {
|
|
168
275
|
// Only trigger activity for non-heartbeat lines
|
|
169
276
|
if (onActivity && !isHeartbeat)
|
|
170
277
|
onActivity();
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
278
|
+
const currentLabel = getDynamicLabel();
|
|
279
|
+
const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
|
|
280
|
+
// Regex that matches timestamp even if it has ANSI color codes
|
|
281
|
+
// Matches: [24:39:14] or \x1b[90m[24:39:14]\x1b[0m
|
|
282
|
+
const timestampRegex = /^((?:\x1b\[[0-9;]*m)*)\[(\d{4}-\d{2}-\d{2}T|\d{2}:\d{2}:\d{2})\]/;
|
|
283
|
+
const tsMatch = trimmed.match(timestampRegex);
|
|
284
|
+
if (tsMatch) {
|
|
285
|
+
// If line already has timestamp format, just add lane prefix
|
|
286
|
+
// Check if lane label is already present to avoid triple duplication
|
|
287
|
+
if (!trimmed.includes(currentLabel)) {
|
|
288
|
+
// Insert label after the timestamp part
|
|
289
|
+
const tsPart = tsMatch[0];
|
|
290
|
+
const formatted = trimmed.replace(tsPart, `${tsPart} ${coloredLabel}`);
|
|
291
|
+
process.stdout.write(formatted + '\n');
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
process.stdout.write(trimmed + '\n');
|
|
295
|
+
}
|
|
176
296
|
}
|
|
177
297
|
else {
|
|
178
298
|
// Add full prefix: timestamp + lane
|
|
179
|
-
process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${
|
|
299
|
+
process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${coloredLabel} ${line}\n`);
|
|
180
300
|
}
|
|
181
301
|
}
|
|
182
302
|
}
|
|
@@ -196,13 +316,15 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
196
316
|
trimmed.startsWith('HEAD is now at') ||
|
|
197
317
|
trimmed.includes('actual output');
|
|
198
318
|
const ts = new Date().toLocaleTimeString('en-US', { hour12: false });
|
|
319
|
+
const currentLabel = getDynamicLabel();
|
|
320
|
+
const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
|
|
199
321
|
if (isStatus) {
|
|
200
|
-
process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${
|
|
322
|
+
process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${trimmed}\n`);
|
|
201
323
|
}
|
|
202
324
|
else {
|
|
203
325
|
if (onActivity)
|
|
204
326
|
onActivity();
|
|
205
|
-
process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${
|
|
327
|
+
process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
|
|
206
328
|
}
|
|
207
329
|
}
|
|
208
330
|
}
|
|
@@ -212,10 +334,11 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
212
334
|
child.on('exit', () => {
|
|
213
335
|
logManager?.close();
|
|
214
336
|
});
|
|
337
|
+
return { child, logPath, logManager, info };
|
|
215
338
|
}
|
|
216
339
|
else {
|
|
217
340
|
// Fallback to simple file logging
|
|
218
|
-
logPath = (0, path_1.safeJoin)(laneRunDir, 'terminal.log');
|
|
341
|
+
logPath = (0, path_1.safeJoin)(laneRunDir, 'terminal-readable.log');
|
|
219
342
|
const logFd = fs.openSync(logPath, 'a');
|
|
220
343
|
child = (0, child_process_1.spawn)('node', args, {
|
|
221
344
|
stdio: ['ignore', logFd, logFd],
|
|
@@ -228,8 +351,18 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
228
351
|
catch {
|
|
229
352
|
// Ignore
|
|
230
353
|
}
|
|
354
|
+
return {
|
|
355
|
+
child,
|
|
356
|
+
logPath,
|
|
357
|
+
logManager,
|
|
358
|
+
info: {
|
|
359
|
+
child,
|
|
360
|
+
logPath,
|
|
361
|
+
statePath: (0, path_1.safeJoin)(laneRunDir, 'state.json'),
|
|
362
|
+
laneIndex
|
|
363
|
+
}
|
|
364
|
+
};
|
|
231
365
|
}
|
|
232
|
-
return { child, logPath, logManager };
|
|
233
366
|
}
|
|
234
367
|
/**
|
|
235
368
|
* Wait for child process to exit
|
|
@@ -245,7 +378,7 @@ function waitChild(proc) {
|
|
|
245
378
|
});
|
|
246
379
|
}
|
|
247
380
|
/**
|
|
248
|
-
* List lane task files in directory
|
|
381
|
+
* List lane task files in directory
|
|
249
382
|
*/
|
|
250
383
|
function listLaneFiles(tasksDir) {
|
|
251
384
|
if (!fs.existsSync(tasksDir)) {
|
|
@@ -258,18 +391,9 @@ function listLaneFiles(tasksDir) {
|
|
|
258
391
|
.map(f => {
|
|
259
392
|
const filePath = (0, path_1.safeJoin)(tasksDir, f);
|
|
260
393
|
const name = path.basename(f, '.json');
|
|
261
|
-
let dependsOn = [];
|
|
262
|
-
try {
|
|
263
|
-
const config = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
264
|
-
dependsOn = config.dependsOn || [];
|
|
265
|
-
}
|
|
266
|
-
catch (e) {
|
|
267
|
-
logger.warn(`Failed to parse config for lane ${name}: ${e}`);
|
|
268
|
-
}
|
|
269
394
|
return {
|
|
270
395
|
name,
|
|
271
396
|
path: filePath,
|
|
272
|
-
dependsOn,
|
|
273
397
|
};
|
|
274
398
|
});
|
|
275
399
|
}
|
|
@@ -284,8 +408,7 @@ function printLaneStatus(lanes, laneRunDirs) {
|
|
|
284
408
|
const statePath = (0, path_1.safeJoin)(dir, 'state.json');
|
|
285
409
|
const state = (0, state_1.loadState)(statePath);
|
|
286
410
|
if (!state) {
|
|
287
|
-
|
|
288
|
-
return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
|
|
411
|
+
return { lane: lane.name, status: 'pending', task: '-' };
|
|
289
412
|
}
|
|
290
413
|
const idx = (state.currentTaskIndex || 0) + 1;
|
|
291
414
|
return {
|
|
@@ -322,11 +445,11 @@ async function resolveAllDependencies(blockedLanes, allLanes, laneRunDirs, pipel
|
|
|
322
445
|
const state = (0, state_1.loadState)(statePath);
|
|
323
446
|
const worktreeDir = state?.worktreeDir || (0, path_1.safeJoin)(runRoot, 'resolution-worktree');
|
|
324
447
|
if (!fs.existsSync(worktreeDir)) {
|
|
325
|
-
logger.info(
|
|
448
|
+
logger.info(`🏗️ Creating resolution worktree at ${worktreeDir}`);
|
|
326
449
|
git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: git.getCurrentBranch() });
|
|
327
450
|
}
|
|
328
451
|
// 3. Resolve on pipeline branch
|
|
329
|
-
logger.info(
|
|
452
|
+
logger.info(`🔄 Resolving dependencies on branch ${pipelineBranch}`);
|
|
330
453
|
git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
|
|
331
454
|
for (const cmd of uniqueCommands) {
|
|
332
455
|
logger.info(`Running: ${cmd}`);
|
|
@@ -416,28 +539,9 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
416
539
|
}
|
|
417
540
|
logger.success('✓ Preflight checks passed');
|
|
418
541
|
}
|
|
419
|
-
// Validate dependencies and detect cycles
|
|
420
|
-
logger.section('📊 Dependency Analysis');
|
|
421
|
-
const depInfos = lanes.map(l => ({
|
|
422
|
-
name: l.name,
|
|
423
|
-
dependsOn: l.dependsOn,
|
|
424
|
-
}));
|
|
425
|
-
const depValidation = (0, dependency_1.validateDependencies)(depInfos);
|
|
426
|
-
if (!depValidation.valid) {
|
|
427
|
-
logger.error('❌ Dependency validation failed:');
|
|
428
|
-
for (const err of depValidation.errors) {
|
|
429
|
-
logger.error(` • ${err}`);
|
|
430
|
-
}
|
|
431
|
-
throw new Error('Invalid dependency configuration');
|
|
432
|
-
}
|
|
433
|
-
if (depValidation.warnings.length > 0) {
|
|
434
|
-
for (const warn of depValidation.warnings) {
|
|
435
|
-
logger.warn(`⚠️ ${warn}`);
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
// Print dependency graph
|
|
439
|
-
(0, dependency_1.printDependencyGraph)(depInfos);
|
|
440
542
|
const config = (0, config_1.loadConfig)();
|
|
543
|
+
// Set verbose git logging from config
|
|
544
|
+
git.setVerboseGit(config.verboseGit || false);
|
|
441
545
|
const logsDir = (0, config_1.getLogsDir)(config);
|
|
442
546
|
const runId = `run-${Date.now()}`;
|
|
443
547
|
// Use absolute path for runRoot to avoid issues with subfolders
|
|
@@ -458,16 +562,11 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
458
562
|
}
|
|
459
563
|
const randomSuffix = Math.random().toString(36).substring(2, 7);
|
|
460
564
|
const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
|
|
461
|
-
//
|
|
462
|
-
const
|
|
565
|
+
// Initialize unified stall detection service (Single Source of Truth)
|
|
566
|
+
const stallService = (0, stall_detection_1.getStallService)({
|
|
463
567
|
...DEFAULT_ORCHESTRATOR_STALL_CONFIG,
|
|
464
568
|
...options.stallConfig,
|
|
465
|
-
|
|
466
|
-
// Initialize auto-recovery manager
|
|
467
|
-
const autoRecoveryManager = (0, auto_recovery_1.getAutoRecoveryManager)({
|
|
468
|
-
...auto_recovery_1.DEFAULT_AUTO_RECOVERY_CONFIG,
|
|
469
|
-
idleTimeoutMs: stallConfig.idleTimeoutMs, // Sync with stall config
|
|
470
|
-
...options.autoRecoveryConfig,
|
|
569
|
+
verbose: process.env['DEBUG_STALL'] === 'true',
|
|
471
570
|
});
|
|
472
571
|
// Initialize event system
|
|
473
572
|
events_1.events.setRunId(runId);
|
|
@@ -512,6 +611,7 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
512
611
|
fs.mkdirSync(worktreeParent, { recursive: true });
|
|
513
612
|
}
|
|
514
613
|
laneWorktreeDirs[lane.name] = laneWorktreeDir;
|
|
614
|
+
logger.info(`🏗️ Initializing lane ${lane.name}: branch=${lanePipelineBranch}`);
|
|
515
615
|
const initialState = (0, state_1.createLaneState)(lane.name, taskConfig, lane.path, {
|
|
516
616
|
pipelineBranch: lanePipelineBranch,
|
|
517
617
|
worktreeDir: laneWorktreeDir
|
|
@@ -526,19 +626,6 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
526
626
|
logger.info(`Tasks directory: ${tasksDir}`);
|
|
527
627
|
logger.info(`Run directory: ${runRoot}`);
|
|
528
628
|
logger.info(`Lanes: ${lanes.length}`);
|
|
529
|
-
// Display dependency graph
|
|
530
|
-
logger.info('\n📊 Dependency Graph:');
|
|
531
|
-
for (const lane of lanes) {
|
|
532
|
-
const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
|
|
533
|
-
console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
|
|
534
|
-
// Simple tree-like visualization for deep dependencies
|
|
535
|
-
if (lane.dependsOn.length > 0) {
|
|
536
|
-
for (const dep of lane.dependsOn) {
|
|
537
|
-
console.log(` └─ ${dep}`);
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
console.log('');
|
|
542
629
|
// Disable auto-resolve when noGit mode is enabled
|
|
543
630
|
const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
|
|
544
631
|
if (options.noGit) {
|
|
@@ -568,28 +655,12 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
568
655
|
let lastStallCheck = Date.now();
|
|
569
656
|
try {
|
|
570
657
|
while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
|
|
571
|
-
// 1. Identify lanes ready to start
|
|
658
|
+
// 1. Identify lanes ready to start (all lanes can start immediately - no lane-level dependencies)
|
|
572
659
|
const readyToStart = lanes.filter(lane => {
|
|
573
660
|
// Not already running or completed or failed or blocked
|
|
574
661
|
if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
|
|
575
662
|
return false;
|
|
576
663
|
}
|
|
577
|
-
// Check dependencies
|
|
578
|
-
for (const dep of lane.dependsOn) {
|
|
579
|
-
if (failedLanes.has(dep)) {
|
|
580
|
-
logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
|
|
581
|
-
failedLanes.add(lane.name);
|
|
582
|
-
exitCodes[lane.name] = 1;
|
|
583
|
-
return false;
|
|
584
|
-
}
|
|
585
|
-
if (blockedLanes.has(dep)) {
|
|
586
|
-
// If a dependency is blocked, wait
|
|
587
|
-
return false;
|
|
588
|
-
}
|
|
589
|
-
if (!completedLanes.has(dep)) {
|
|
590
|
-
return false;
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
664
|
return true;
|
|
594
665
|
});
|
|
595
666
|
// 2. Spawn ready lanes up to maxConcurrent
|
|
@@ -604,22 +675,19 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
604
675
|
}
|
|
605
676
|
logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
|
|
606
677
|
const now = Date.now();
|
|
607
|
-
//
|
|
678
|
+
// Register lane with unified stall detection service FIRST
|
|
679
|
+
stallService.registerLane(lane.name, {
|
|
680
|
+
laneRunDir: laneRunDirs[lane.name],
|
|
681
|
+
});
|
|
682
|
+
const laneIdx = lanes.findIndex(l => l.name === lane.name);
|
|
683
|
+
// Pre-register lane in running map
|
|
608
684
|
running.set(lane.name, {
|
|
609
685
|
child: {}, // Placeholder, will be replaced below
|
|
610
686
|
logManager: undefined,
|
|
611
687
|
logPath: '',
|
|
612
|
-
lastActivity: now,
|
|
613
|
-
lastStateUpdate: now,
|
|
614
|
-
stallPhase: 0,
|
|
615
|
-
taskStartTime: now,
|
|
616
|
-
lastOutput: '',
|
|
617
688
|
statePath: laneStatePath,
|
|
618
|
-
|
|
619
|
-
lastBytesCheck: 0,
|
|
620
|
-
continueSignalsSent: 0,
|
|
689
|
+
laneIndex: laneIdx >= 0 ? laneIdx : 0,
|
|
621
690
|
});
|
|
622
|
-
let lastOutput = '';
|
|
623
691
|
const spawnResult = spawnLane({
|
|
624
692
|
laneName: lane.name,
|
|
625
693
|
tasksFile: lane.path,
|
|
@@ -630,48 +698,35 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
630
698
|
worktreeDir: laneWorktreeDirs[lane.name],
|
|
631
699
|
enhancedLogConfig: options.enhancedLogging,
|
|
632
700
|
noGit: options.noGit,
|
|
701
|
+
laneIndex: laneIdx >= 0 ? laneIdx : 0,
|
|
633
702
|
onActivity: () => {
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
const actNow = Date.now();
|
|
637
|
-
info.lastActivity = actNow;
|
|
638
|
-
info.lastStateUpdate = actNow;
|
|
639
|
-
info.stallPhase = 0;
|
|
640
|
-
}
|
|
703
|
+
// Record state file update activity
|
|
704
|
+
stallService.recordStateUpdate(lane.name);
|
|
641
705
|
}
|
|
642
706
|
});
|
|
643
707
|
// Update with actual spawn result
|
|
644
708
|
const existingInfo = running.get(lane.name);
|
|
645
|
-
Object.assign(existingInfo, spawnResult);
|
|
646
|
-
//
|
|
709
|
+
Object.assign(existingInfo, spawnResult.info);
|
|
710
|
+
// Update stall service with child process reference
|
|
711
|
+
stallService.setChildProcess(lane.name, spawnResult.child);
|
|
712
|
+
// Track stdout for activity detection - delegate to StallDetectionService
|
|
647
713
|
if (spawnResult.child.stdout) {
|
|
648
714
|
spawnResult.child.stdout.on('data', (data) => {
|
|
649
|
-
const
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
info.lastOutput = lastRealLine;
|
|
662
|
-
info.bytesReceived += data.length;
|
|
663
|
-
// Update auto-recovery manager with real activity
|
|
664
|
-
autoRecoveryManager.recordActivity(lane.name, data.length, info.lastOutput);
|
|
665
|
-
}
|
|
666
|
-
else if (lines.length > 0) {
|
|
667
|
-
// Only heartbeats received - do NOT update lastActivity (keep stall timer running)
|
|
668
|
-
autoRecoveryManager.recordActivity(lane.name, 0, info.lastOutput);
|
|
669
|
-
}
|
|
715
|
+
const output = data.toString();
|
|
716
|
+
const lines = output.split('\n').filter(l => l.trim());
|
|
717
|
+
// Filter out heartbeats from activity tracking
|
|
718
|
+
const realLines = lines.filter(line => !(line.includes('Heartbeat') && line.includes('bytes received')));
|
|
719
|
+
if (realLines.length > 0) {
|
|
720
|
+
// Real activity - record with bytes
|
|
721
|
+
const lastRealLine = realLines[realLines.length - 1];
|
|
722
|
+
stallService.recordActivity(lane.name, data.length, lastRealLine);
|
|
723
|
+
}
|
|
724
|
+
else if (lines.length > 0) {
|
|
725
|
+
// Heartbeat only - record with 0 bytes (won't reset timer)
|
|
726
|
+
stallService.recordActivity(lane.name, 0);
|
|
670
727
|
}
|
|
671
728
|
});
|
|
672
729
|
}
|
|
673
|
-
// Register lane with auto-recovery manager
|
|
674
|
-
autoRecoveryManager.registerLane(lane.name);
|
|
675
730
|
// Update lane tracking
|
|
676
731
|
lane.taskStartTime = now;
|
|
677
732
|
events_1.events.emit('lane.started', {
|
|
@@ -697,212 +752,35 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
697
752
|
const now = Date.now();
|
|
698
753
|
if (result.name === '__poll__' || (now - lastStallCheck >= 10000)) {
|
|
699
754
|
lastStallCheck = now;
|
|
700
|
-
// Periodic stall check
|
|
755
|
+
// Periodic stall check using unified StallDetectionService
|
|
701
756
|
for (const [laneName, info] of running.entries()) {
|
|
702
|
-
const idleTime = now - info.lastActivity;
|
|
703
757
|
const lane = lanes.find(l => l.name === laneName);
|
|
704
|
-
if (process.env['DEBUG_STALL']) {
|
|
705
|
-
logger.debug(`[${laneName}] Stall check: idle=${Math.round(idleTime / 1000)}s, bytesDelta=${info.bytesReceived - info.lastBytesCheck}, phase=${info.stallPhase}`);
|
|
706
|
-
}
|
|
707
758
|
// Check state file for progress updates
|
|
708
|
-
let progressTime = 0;
|
|
709
759
|
try {
|
|
710
760
|
const stateStat = fs.statSync(info.statePath);
|
|
711
|
-
const
|
|
712
|
-
if (
|
|
713
|
-
|
|
761
|
+
const stallState = stallService.getState(laneName);
|
|
762
|
+
if (stallState && stateStat.mtimeMs > stallState.lastStateUpdateTime) {
|
|
763
|
+
stallService.recordStateUpdate(laneName);
|
|
714
764
|
}
|
|
715
|
-
progressTime = now - info.lastStateUpdate;
|
|
716
765
|
}
|
|
717
766
|
catch {
|
|
718
767
|
// State file might not exist yet
|
|
719
768
|
}
|
|
720
|
-
//
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
lastOutput: info.lastOutput,
|
|
729
|
-
restartCount: lane.restartCount || 0,
|
|
730
|
-
taskStartTimeMs: info.taskStartTime,
|
|
731
|
-
bytesReceived: bytesDelta, // Bytes since last check
|
|
732
|
-
continueSignalsSent: info.continueSignalsSent,
|
|
733
|
-
}, stallConfig);
|
|
734
|
-
// Only act if action is not NONE
|
|
735
|
-
if (analysis.action !== failure_policy_1.RecoveryAction.NONE) {
|
|
736
|
-
(0, failure_policy_1.logFailure)(laneName, analysis);
|
|
769
|
+
// Debug logging
|
|
770
|
+
if (process.env['DEBUG_STALL']) {
|
|
771
|
+
logger.debug(`[${laneName}] ${stallService.dumpState(laneName)}`);
|
|
772
|
+
}
|
|
773
|
+
// Run stall analysis and recovery (all logic is in StallDetectionService)
|
|
774
|
+
const analysis = stallService.checkAndRecover(laneName);
|
|
775
|
+
// Log to lane log manager if there was an action
|
|
776
|
+
if (analysis.action !== stall_detection_1.RecoveryAction.NONE) {
|
|
737
777
|
info.logManager?.log('error', analysis.message);
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
fs.writeFileSync(interventionPath, 'continue');
|
|
742
|
-
info.stallPhase = 1;
|
|
743
|
-
info.lastActivity = now;
|
|
744
|
-
info.continueSignalsSent++;
|
|
745
|
-
logger.info(`[${laneName}] Sent continue signal (#${info.continueSignalsSent})`);
|
|
746
|
-
events_1.events.emit('recovery.continue_signal', {
|
|
747
|
-
laneName,
|
|
748
|
-
idleSeconds: Math.round(idleTime / 1000),
|
|
749
|
-
signalCount: info.continueSignalsSent,
|
|
750
|
-
});
|
|
751
|
-
}
|
|
752
|
-
catch (e) {
|
|
753
|
-
logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.STRONGER_PROMPT) {
|
|
757
|
-
const interventionPath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'intervention.txt');
|
|
758
|
-
const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck. Please continue with your current task immediately. If you're waiting for something, explain what you need and proceed with what you can do now. If you've completed the task, summarize your work and finish.`;
|
|
759
|
-
try {
|
|
760
|
-
fs.writeFileSync(interventionPath, strongerPrompt);
|
|
761
|
-
info.stallPhase = 2;
|
|
762
|
-
info.lastActivity = now;
|
|
763
|
-
logger.warn(`[${laneName}] Sent stronger prompt after continue signal failed`);
|
|
764
|
-
events_1.events.emit('recovery.stronger_prompt', { laneName });
|
|
765
|
-
}
|
|
766
|
-
catch (e) {
|
|
767
|
-
logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
|
|
768
|
-
}
|
|
769
|
-
}
|
|
770
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.KILL_AND_RESTART ||
|
|
771
|
-
analysis.action === failure_policy_1.RecoveryAction.RESTART_LANE ||
|
|
772
|
-
analysis.action === failure_policy_1.RecoveryAction.RESTART_LANE_FROM_CHECKPOINT) {
|
|
773
|
-
lane.restartCount = (lane.restartCount || 0) + 1;
|
|
774
|
-
info.stallPhase = 3;
|
|
775
|
-
// Try to get checkpoint info
|
|
776
|
-
const checkpoint = (0, checkpoint_1.getLatestCheckpoint)(laneRunDirs[laneName]);
|
|
777
|
-
if (checkpoint) {
|
|
778
|
-
logger.info(`[${laneName}] Checkpoint available: ${checkpoint.id} (task ${checkpoint.taskIndex})`);
|
|
779
|
-
}
|
|
780
|
-
// Kill the process
|
|
781
|
-
try {
|
|
782
|
-
info.child.kill('SIGKILL');
|
|
783
|
-
}
|
|
784
|
-
catch {
|
|
785
|
-
// Process might already be dead
|
|
786
|
-
}
|
|
787
|
-
logger.warn(`[${laneName}] Killing and restarting lane (restart #${lane.restartCount})`);
|
|
788
|
-
events_1.events.emit('recovery.restart', {
|
|
789
|
-
laneName,
|
|
790
|
-
restartCount: lane.restartCount,
|
|
791
|
-
maxRestarts: stallConfig.maxRestarts,
|
|
792
|
-
});
|
|
793
|
-
}
|
|
794
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.RUN_DOCTOR) {
|
|
795
|
-
info.stallPhase = 4;
|
|
796
|
-
// Run diagnostics
|
|
797
|
-
logger.error(`[${laneName}] Running diagnostics due to persistent failures...`);
|
|
798
|
-
// Import health check dynamically to avoid circular dependency
|
|
799
|
-
const { checkAgentHealth, checkAuthHealth } = await Promise.resolve().then(() => __importStar(require('../utils/health')));
|
|
800
|
-
const [agentHealth, authHealth] = await Promise.all([
|
|
801
|
-
checkAgentHealth(),
|
|
802
|
-
checkAuthHealth(),
|
|
803
|
-
]);
|
|
804
|
-
const issues = [];
|
|
805
|
-
if (!agentHealth.ok)
|
|
806
|
-
issues.push(`Agent: ${agentHealth.message}`);
|
|
807
|
-
if (!authHealth.ok)
|
|
808
|
-
issues.push(`Auth: ${authHealth.message}`);
|
|
809
|
-
if (issues.length > 0) {
|
|
810
|
-
logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
|
|
811
|
-
}
|
|
812
|
-
else {
|
|
813
|
-
logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
|
|
814
|
-
}
|
|
815
|
-
// Save diagnostic to file
|
|
816
|
-
const diagnosticPath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'diagnostic.json');
|
|
817
|
-
fs.writeFileSync(diagnosticPath, JSON.stringify({
|
|
818
|
-
timestamp: Date.now(),
|
|
819
|
-
agentHealthy: agentHealth.ok,
|
|
820
|
-
authHealthy: authHealth.ok,
|
|
821
|
-
issues,
|
|
822
|
-
analysis,
|
|
823
|
-
}, null, 2));
|
|
824
|
-
// Kill the process
|
|
825
|
-
try {
|
|
826
|
-
info.child.kill('SIGKILL');
|
|
827
|
-
}
|
|
828
|
-
catch {
|
|
829
|
-
// Process might already be dead
|
|
830
|
-
}
|
|
831
|
-
logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
|
|
832
|
-
// Save POF for failed recovery
|
|
833
|
-
const recoveryState = autoRecoveryManager.getState(laneName);
|
|
834
|
-
if (recoveryState) {
|
|
835
|
-
try {
|
|
836
|
-
const laneStatePath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'state.json');
|
|
837
|
-
const laneState = (0, state_1.loadState)(laneStatePath);
|
|
838
|
-
const pofDir = (0, path_1.safeJoin)(runRoot, '..', '..', 'pof');
|
|
839
|
-
const diagnosticInfo = {
|
|
840
|
-
timestamp: Date.now(),
|
|
841
|
-
agentHealthy: agentHealth.ok,
|
|
842
|
-
authHealthy: authHealth.ok,
|
|
843
|
-
systemHealthy: true,
|
|
844
|
-
suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
|
|
845
|
-
details: issues.join('\n') || 'No obvious issues found',
|
|
846
|
-
};
|
|
847
|
-
const pofEntry = (0, auto_recovery_1.createPOFFromRecoveryState)(runId, runRoot, laneName, recoveryState, laneState, diagnosticInfo);
|
|
848
|
-
(0, auto_recovery_1.savePOF)(runId, pofDir, pofEntry);
|
|
849
|
-
}
|
|
850
|
-
catch (pofError) {
|
|
851
|
-
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
852
|
-
}
|
|
853
|
-
}
|
|
854
|
-
events_1.events.emit('recovery.diagnosed', {
|
|
855
|
-
laneName,
|
|
856
|
-
diagnostic: { agentHealthy: agentHealth.ok, authHealthy: authHealth.ok, issues },
|
|
857
|
-
});
|
|
858
|
-
}
|
|
859
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.ABORT_LANE) {
|
|
860
|
-
info.stallPhase = 5;
|
|
861
|
-
try {
|
|
862
|
-
info.child.kill('SIGKILL');
|
|
863
|
-
}
|
|
864
|
-
catch {
|
|
865
|
-
// Process might already be dead
|
|
866
|
-
}
|
|
867
|
-
logger.error(`[${laneName}] Aborting lane due to repeated stalls`);
|
|
868
|
-
// Save POF for failed recovery
|
|
869
|
-
const recoveryState = autoRecoveryManager.getState(laneName);
|
|
870
|
-
if (recoveryState) {
|
|
871
|
-
try {
|
|
872
|
-
const laneStatePath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'state.json');
|
|
873
|
-
const laneState = (0, state_1.loadState)(laneStatePath);
|
|
874
|
-
const pofDir = (0, path_1.safeJoin)(runRoot, '..', '..', 'pof');
|
|
875
|
-
const pofEntry = (0, auto_recovery_1.createPOFFromRecoveryState)(runId, runRoot, laneName, recoveryState, laneState, recoveryState.diagnosticInfo);
|
|
876
|
-
(0, auto_recovery_1.savePOF)(runId, pofDir, pofEntry);
|
|
877
|
-
}
|
|
878
|
-
catch (pofError) {
|
|
879
|
-
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.SEND_GIT_GUIDANCE) {
|
|
884
|
-
// Send guidance message to agent for git issues
|
|
885
|
-
const interventionPath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'intervention.txt');
|
|
886
|
-
// Determine which guidance to send based on the failure type
|
|
887
|
-
let guidance;
|
|
888
|
-
if (analysis.type === failure_policy_1.FailureType.GIT_PUSH_REJECTED) {
|
|
889
|
-
guidance = (0, auto_recovery_1.getGitPushFailureGuidance)();
|
|
890
|
-
}
|
|
891
|
-
else if (analysis.type === failure_policy_1.FailureType.MERGE_CONFLICT) {
|
|
892
|
-
guidance = (0, auto_recovery_1.getMergeConflictGuidance)();
|
|
893
|
-
}
|
|
894
|
-
else {
|
|
895
|
-
guidance = (0, auto_recovery_1.getGitErrorGuidance)(analysis.message);
|
|
896
|
-
}
|
|
897
|
-
try {
|
|
898
|
-
fs.writeFileSync(interventionPath, guidance);
|
|
899
|
-
info.lastActivity = now;
|
|
900
|
-
logger.info(`[${laneName}] Sent git issue guidance to agent`);
|
|
901
|
-
}
|
|
902
|
-
catch (e) {
|
|
903
|
-
logger.error(`[${laneName}] Failed to send guidance: ${e.message}`);
|
|
904
|
-
}
|
|
778
|
+
// Handle special case: RUN_DOCTOR needs async operations
|
|
779
|
+
if (analysis.action === stall_detection_1.RecoveryAction.RUN_DOCTOR) {
|
|
780
|
+
await handleDoctorDiagnostics(laneName, laneRunDirs[laneName], runId, runRoot, stallService, info.child);
|
|
905
781
|
}
|
|
782
|
+
// Sync restartCount back to lane info (for restart logic in process exit handler)
|
|
783
|
+
lane.restartCount = stallService.getRestartCount(laneName);
|
|
906
784
|
}
|
|
907
785
|
}
|
|
908
786
|
continue;
|
|
@@ -912,8 +790,10 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
912
790
|
const info = running.get(finished.name);
|
|
913
791
|
running.delete(finished.name);
|
|
914
792
|
exitCodes[finished.name] = finished.code;
|
|
915
|
-
//
|
|
916
|
-
|
|
793
|
+
// Get stall state before unregistering
|
|
794
|
+
const stallPhase = stallService.getPhase(finished.name);
|
|
795
|
+
// Unregister from stall detection service
|
|
796
|
+
stallService.unregisterLane(finished.name);
|
|
917
797
|
if (finished.code === 0) {
|
|
918
798
|
completedLanes.add(finished.name);
|
|
919
799
|
events_1.events.emit('lane.completed', {
|
|
@@ -943,8 +823,8 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
943
823
|
}
|
|
944
824
|
}
|
|
945
825
|
else {
|
|
946
|
-
// Check if it was a restart request
|
|
947
|
-
if (
|
|
826
|
+
// Check if it was a restart request (RESTART_REQUESTED phase)
|
|
827
|
+
if (stallPhase === stall_detection_1.StallPhase.RESTART_REQUESTED) {
|
|
948
828
|
logger.info(`🔄 Lane ${finished.name} is being restarted due to stall...`);
|
|
949
829
|
// Update startIndex from current state to resume from the same task
|
|
950
830
|
const statePath = (0, path_1.safeJoin)(laneRunDirs[finished.name], 'state.json');
|
|
@@ -961,7 +841,7 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
961
841
|
}
|
|
962
842
|
failedLanes.add(finished.name);
|
|
963
843
|
let errorMsg = 'Process exited with non-zero code';
|
|
964
|
-
if (
|
|
844
|
+
if (stallPhase >= stall_detection_1.StallPhase.DIAGNOSED) {
|
|
965
845
|
errorMsg = 'Stopped due to repeated stall';
|
|
966
846
|
}
|
|
967
847
|
else if (info.logManager) {
|