@litmers/cursorflow-orchestrator 0.1.30 → 0.1.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -52
- package/commands/cursorflow-add.md +159 -0
- package/commands/cursorflow-monitor.md +23 -2
- package/commands/cursorflow-new.md +87 -0
- package/dist/cli/add.d.ts +7 -0
- package/dist/cli/add.js +377 -0
- package/dist/cli/add.js.map +1 -0
- package/dist/cli/clean.js +1 -0
- package/dist/cli/clean.js.map +1 -1
- package/dist/cli/config.d.ts +7 -0
- package/dist/cli/config.js +181 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/index.js +34 -30
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/logs.js +7 -33
- package/dist/cli/logs.js.map +1 -1
- package/dist/cli/monitor.js +51 -62
- package/dist/cli/monitor.js.map +1 -1
- package/dist/cli/new.d.ts +7 -0
- package/dist/cli/new.js +232 -0
- package/dist/cli/new.js.map +1 -0
- package/dist/cli/prepare.js +95 -193
- package/dist/cli/prepare.js.map +1 -1
- package/dist/cli/resume.js +11 -47
- package/dist/cli/resume.js.map +1 -1
- package/dist/cli/run.js +27 -22
- package/dist/cli/run.js.map +1 -1
- package/dist/cli/tasks.js +1 -2
- package/dist/cli/tasks.js.map +1 -1
- package/dist/core/failure-policy.d.ts +9 -0
- package/dist/core/failure-policy.js +9 -0
- package/dist/core/failure-policy.js.map +1 -1
- package/dist/core/orchestrator.d.ts +20 -6
- package/dist/core/orchestrator.js +217 -331
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/runner/agent.d.ts +27 -0
- package/dist/core/runner/agent.js +294 -0
- package/dist/core/runner/agent.js.map +1 -0
- package/dist/core/runner/index.d.ts +5 -0
- package/dist/core/runner/index.js +22 -0
- package/dist/core/runner/index.js.map +1 -0
- package/dist/core/runner/pipeline.d.ts +9 -0
- package/dist/core/runner/pipeline.js +539 -0
- package/dist/core/runner/pipeline.js.map +1 -0
- package/dist/core/runner/prompt.d.ts +25 -0
- package/dist/core/runner/prompt.js +175 -0
- package/dist/core/runner/prompt.js.map +1 -0
- package/dist/core/runner/task.d.ts +26 -0
- package/dist/core/runner/task.js +283 -0
- package/dist/core/runner/task.js.map +1 -0
- package/dist/core/runner/utils.d.ts +37 -0
- package/dist/core/runner/utils.js +161 -0
- package/dist/core/runner/utils.js.map +1 -0
- package/dist/core/runner.d.ts +2 -96
- package/dist/core/runner.js +11 -1136
- package/dist/core/runner.js.map +1 -1
- package/dist/core/stall-detection.d.ts +326 -0
- package/dist/core/stall-detection.js +781 -0
- package/dist/core/stall-detection.js.map +1 -0
- package/dist/types/config.d.ts +6 -6
- package/dist/types/flow.d.ts +84 -0
- package/dist/types/flow.js +10 -0
- package/dist/types/flow.js.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +3 -3
- package/dist/types/index.js.map +1 -1
- package/dist/types/lane.d.ts +0 -2
- package/dist/types/logging.d.ts +5 -1
- package/dist/types/task.d.ts +7 -11
- package/dist/utils/config.js +7 -15
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/dependency.d.ts +36 -1
- package/dist/utils/dependency.js +256 -1
- package/dist/utils/dependency.js.map +1 -1
- package/dist/utils/enhanced-logger.d.ts +45 -82
- package/dist/utils/enhanced-logger.js +238 -844
- package/dist/utils/enhanced-logger.js.map +1 -1
- package/dist/utils/git.d.ts +29 -0
- package/dist/utils/git.js +115 -5
- package/dist/utils/git.js.map +1 -1
- package/dist/utils/state.js +0 -2
- package/dist/utils/state.js.map +1 -1
- package/dist/utils/task-service.d.ts +2 -2
- package/dist/utils/task-service.js +40 -31
- package/dist/utils/task-service.js.map +1 -1
- package/package.json +4 -3
- package/src/cli/add.ts +397 -0
- package/src/cli/clean.ts +1 -0
- package/src/cli/config.ts +177 -0
- package/src/cli/index.ts +36 -32
- package/src/cli/logs.ts +7 -31
- package/src/cli/monitor.ts +55 -71
- package/src/cli/new.ts +235 -0
- package/src/cli/prepare.ts +98 -205
- package/src/cli/resume.ts +13 -56
- package/src/cli/run.ts +311 -306
- package/src/cli/tasks.ts +1 -2
- package/src/core/failure-policy.ts +9 -0
- package/src/core/orchestrator.ts +281 -375
- package/src/core/runner/agent.ts +314 -0
- package/src/core/runner/index.ts +6 -0
- package/src/core/runner/pipeline.ts +567 -0
- package/src/core/runner/prompt.ts +174 -0
- package/src/core/runner/task.ts +320 -0
- package/src/core/runner/utils.ts +142 -0
- package/src/core/runner.ts +8 -1347
- package/src/core/stall-detection.ts +936 -0
- package/src/types/config.ts +6 -6
- package/src/types/flow.ts +91 -0
- package/src/types/index.ts +15 -3
- package/src/types/lane.ts +0 -2
- package/src/types/logging.ts +5 -1
- package/src/types/task.ts +7 -11
- package/src/utils/config.ts +8 -16
- package/src/utils/dependency.ts +311 -2
- package/src/utils/enhanced-logger.ts +263 -927
- package/src/utils/git.ts +145 -5
- package/src/utils/state.ts +0 -2
- package/src/utils/task-service.ts +48 -40
- package/commands/cursorflow-review.md +0 -56
- package/commands/cursorflow-runs.md +0 -59
- package/dist/cli/runs.d.ts +0 -5
- package/dist/cli/runs.js +0 -214
- package/dist/cli/runs.js.map +0 -1
- package/dist/core/reviewer.d.ts +0 -66
- package/dist/core/reviewer.js +0 -265
- package/dist/core/reviewer.js.map +0 -1
- package/src/cli/runs.ts +0 -212
- package/src/core/reviewer.ts +0 -285
|
@@ -60,15 +60,12 @@ const child_process_2 = require("child_process");
|
|
|
60
60
|
const path_1 = require("../utils/path");
|
|
61
61
|
const enhanced_logger_1 = require("../utils/enhanced-logger");
|
|
62
62
|
const log_formatter_1 = require("../utils/log-formatter");
|
|
63
|
-
const failure_policy_1 = require("./failure-policy");
|
|
64
63
|
const auto_recovery_1 = require("./auto-recovery");
|
|
65
|
-
const
|
|
64
|
+
const stall_detection_1 = require("./stall-detection");
|
|
66
65
|
const health_1 = require("../utils/health");
|
|
67
|
-
const checkpoint_1 = require("../utils/checkpoint");
|
|
68
66
|
const lock_1 = require("../utils/lock");
|
|
69
67
|
/** Default stall detection configuration - 2 minute idle timeout for recovery */
|
|
70
68
|
const DEFAULT_ORCHESTRATOR_STALL_CONFIG = {
|
|
71
|
-
...failure_policy_1.DEFAULT_STALL_CONFIG,
|
|
72
69
|
idleTimeoutMs: 2 * 60 * 1000, // 2 minutes (idle detection for continue signal)
|
|
73
70
|
progressTimeoutMs: 10 * 60 * 1000, // 10 minutes (only triggers if no activity at all)
|
|
74
71
|
maxRestarts: 2,
|
|
@@ -94,10 +91,93 @@ function logFileTail(filePath, lines = 10) {
|
|
|
94
91
|
// Ignore log reading errors
|
|
95
92
|
}
|
|
96
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Handle RUN_DOCTOR action - runs async health diagnostics
|
|
96
|
+
*/
|
|
97
|
+
async function handleDoctorDiagnostics(laneName, laneRunDir, runId, runRoot, stallService, child) {
|
|
98
|
+
// Import health check dynamically to avoid circular dependency
|
|
99
|
+
const { checkAgentHealth, checkAuthHealth } = await Promise.resolve().then(() => __importStar(require('../utils/health')));
|
|
100
|
+
const [agentHealth, authHealth] = await Promise.all([
|
|
101
|
+
checkAgentHealth(),
|
|
102
|
+
checkAuthHealth(),
|
|
103
|
+
]);
|
|
104
|
+
const issues = [];
|
|
105
|
+
if (!agentHealth.ok)
|
|
106
|
+
issues.push(`Agent: ${agentHealth.message}`);
|
|
107
|
+
if (!authHealth.ok)
|
|
108
|
+
issues.push(`Auth: ${authHealth.message}`);
|
|
109
|
+
if (issues.length > 0) {
|
|
110
|
+
logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
|
|
114
|
+
}
|
|
115
|
+
// Save diagnostic to file
|
|
116
|
+
const diagnosticPath = (0, path_1.safeJoin)(laneRunDir, 'diagnostic.json');
|
|
117
|
+
fs.writeFileSync(diagnosticPath, JSON.stringify({
|
|
118
|
+
timestamp: Date.now(),
|
|
119
|
+
agentHealthy: agentHealth.ok,
|
|
120
|
+
authHealthy: authHealth.ok,
|
|
121
|
+
issues,
|
|
122
|
+
}, null, 2));
|
|
123
|
+
// Kill the process
|
|
124
|
+
try {
|
|
125
|
+
child.kill('SIGKILL');
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Process might already be dead
|
|
129
|
+
}
|
|
130
|
+
logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
|
|
131
|
+
// Save POF for failed recovery
|
|
132
|
+
const stallState = stallService.getState(laneName);
|
|
133
|
+
if (stallState) {
|
|
134
|
+
try {
|
|
135
|
+
const laneStatePath = (0, path_1.safeJoin)(laneRunDir, 'state.json');
|
|
136
|
+
const laneState = (0, state_1.loadState)(laneStatePath);
|
|
137
|
+
const pofDir = (0, path_1.safeJoin)(runRoot, '..', '..', 'pof');
|
|
138
|
+
// Convert stall state to recovery state format for POF
|
|
139
|
+
// Note: StallPhase and RecoveryStage have compatible numeric values (0-5)
|
|
140
|
+
const recoveryState = {
|
|
141
|
+
laneName,
|
|
142
|
+
stage: stallState.phase, // Both enums use 0-5
|
|
143
|
+
lastActivityTime: stallState.lastRealActivityTime,
|
|
144
|
+
lastBytesReceived: stallState.bytesSinceLastCheck,
|
|
145
|
+
totalBytesReceived: stallState.totalBytesReceived,
|
|
146
|
+
lastOutput: stallState.lastOutput,
|
|
147
|
+
restartCount: stallState.restartCount,
|
|
148
|
+
continueSignalsSent: stallState.continueSignalCount,
|
|
149
|
+
lastStageChangeTime: stallState.lastPhaseChangeTime,
|
|
150
|
+
isLongOperation: stallState.isLongOperation,
|
|
151
|
+
failureHistory: stallState.failureHistory.map(f => ({
|
|
152
|
+
timestamp: f.timestamp,
|
|
153
|
+
stage: f.phase, // Both enums use 0-5
|
|
154
|
+
action: f.action,
|
|
155
|
+
message: f.message,
|
|
156
|
+
idleTimeMs: f.idleTimeMs,
|
|
157
|
+
bytesReceived: f.bytesReceived,
|
|
158
|
+
lastOutput: f.lastOutput,
|
|
159
|
+
})),
|
|
160
|
+
};
|
|
161
|
+
const diagnosticInfo = {
|
|
162
|
+
timestamp: Date.now(),
|
|
163
|
+
agentHealthy: agentHealth.ok,
|
|
164
|
+
authHealthy: authHealth.ok,
|
|
165
|
+
systemHealthy: true,
|
|
166
|
+
suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
|
|
167
|
+
details: issues.join('\n') || 'No obvious issues found',
|
|
168
|
+
};
|
|
169
|
+
const pofEntry = (0, auto_recovery_1.createPOFFromRecoveryState)(runId, runRoot, laneName, recoveryState, laneState, diagnosticInfo);
|
|
170
|
+
(0, auto_recovery_1.savePOF)(runId, pofDir, pofEntry);
|
|
171
|
+
}
|
|
172
|
+
catch (pofError) {
|
|
173
|
+
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
97
177
|
/**
|
|
98
178
|
* Spawn a lane process
|
|
99
179
|
*/
|
|
100
|
-
function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0, pipelineBranch, worktreeDir, enhancedLogConfig, noGit = false, onActivity, }) {
|
|
180
|
+
function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0, pipelineBranch, worktreeDir, enhancedLogConfig, noGit = false, onActivity, laneIndex = 0, }) {
|
|
101
181
|
fs.mkdirSync(laneRunDir, { recursive: true });
|
|
102
182
|
// Use extension-less resolve to handle both .ts (dev) and .js (dist)
|
|
103
183
|
const runnerPath = require.resolve('./runner');
|
|
@@ -127,17 +207,23 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
127
207
|
...process.env,
|
|
128
208
|
};
|
|
129
209
|
if (logConfig.enabled) {
|
|
210
|
+
// Helper to get dynamic lane label like [L01-T01-laneName]
|
|
211
|
+
const getDynamicLabel = () => {
|
|
212
|
+
const laneNum = `L${(laneIndex + 1).toString().padStart(2, '0')}`;
|
|
213
|
+
const taskPart = info.currentTaskIndex ? `-T${info.currentTaskIndex.toString().padStart(2, '0')}` : '';
|
|
214
|
+
return `[${laneNum}${taskPart}-${laneName}]`;
|
|
215
|
+
};
|
|
130
216
|
// Create callback for clean console output
|
|
131
217
|
const onParsedMessage = (msg) => {
|
|
132
218
|
if (onActivity)
|
|
133
219
|
onActivity();
|
|
134
220
|
const formatted = (0, log_formatter_1.formatMessageForConsole)(msg, {
|
|
135
|
-
laneLabel:
|
|
221
|
+
laneLabel: getDynamicLabel(),
|
|
136
222
|
includeTimestamp: true
|
|
137
223
|
});
|
|
138
224
|
process.stdout.write(formatted + '\n');
|
|
139
225
|
};
|
|
140
|
-
logManager = (0, enhanced_logger_1.createLogManager)(laneRunDir, laneName, logConfig, onParsedMessage);
|
|
226
|
+
logManager = (0, enhanced_logger_1.createLogManager)(laneRunDir, laneName, logConfig, onParsedMessage, laneIndex);
|
|
141
227
|
logPath = logManager.getLogPaths().clean;
|
|
142
228
|
// Spawn with pipe for enhanced logging
|
|
143
229
|
child = (0, child_process_1.spawn)('node', args, {
|
|
@@ -145,6 +231,15 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
145
231
|
env: childEnv,
|
|
146
232
|
detached: false,
|
|
147
233
|
});
|
|
234
|
+
// Initialize info object for stdout handler to use
|
|
235
|
+
const info = {
|
|
236
|
+
child,
|
|
237
|
+
logManager,
|
|
238
|
+
logPath,
|
|
239
|
+
statePath: (0, path_1.safeJoin)(laneRunDir, 'state.json'),
|
|
240
|
+
laneIndex,
|
|
241
|
+
currentTaskIndex: startIndex > 0 ? startIndex + 1 : 0
|
|
242
|
+
};
|
|
148
243
|
// Buffer for non-JSON lines
|
|
149
244
|
let lineBuffer = '';
|
|
150
245
|
// Pipe stdout and stderr through enhanced logger
|
|
@@ -158,22 +253,50 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
158
253
|
lineBuffer = lines.pop() || '';
|
|
159
254
|
for (const line of lines) {
|
|
160
255
|
const trimmed = line.trim();
|
|
256
|
+
if (!trimmed)
|
|
257
|
+
continue;
|
|
258
|
+
// Detect task start/progress to update label
|
|
259
|
+
// Example: [1/1] hello-task
|
|
260
|
+
const cleanLine = (0, enhanced_logger_1.stripAnsi)(trimmed);
|
|
261
|
+
const taskMatch = cleanLine.match(/^\s*\[(\d+)\/(\d+)\]\s+(.+)$/);
|
|
262
|
+
if (taskMatch) {
|
|
263
|
+
info.currentTaskIndex = parseInt(taskMatch[1]);
|
|
264
|
+
// Update log manager's task index to keep it in sync for readable log
|
|
265
|
+
if (logManager) {
|
|
266
|
+
logManager.setTask(taskMatch[3].trim(), undefined, info.currentTaskIndex - 1);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
161
269
|
// Show if it's a timestamped log line (starts with [YYYY-MM-DD... or [HH:MM:SS])
|
|
162
270
|
// or if it's NOT a noisy JSON line
|
|
163
|
-
const hasTimestamp = /^\[\d{4}-\d{2}-\d{2}T|\^\[\d{2}:\d{2}:\d{2}\]/.test(trimmed);
|
|
164
271
|
const isJson = trimmed.startsWith('{') || trimmed.includes('{"type"');
|
|
165
|
-
|
|
166
|
-
|
|
272
|
+
// Filter out heartbeats - they should NOT reset the idle timer
|
|
273
|
+
const isHeartbeat = trimmed.includes('Heartbeat') && trimmed.includes('bytes received');
|
|
274
|
+
if (!isJson) {
|
|
275
|
+
// Only trigger activity for non-heartbeat lines
|
|
276
|
+
if (onActivity && !isHeartbeat)
|
|
167
277
|
onActivity();
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
278
|
+
const currentLabel = getDynamicLabel();
|
|
279
|
+
const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
|
|
280
|
+
// Regex that matches timestamp even if it has ANSI color codes
|
|
281
|
+
// Matches: [24:39:14] or \x1b[90m[24:39:14]\x1b[0m
|
|
282
|
+
const timestampRegex = /^((?:\x1b\[[0-9;]*m)*)\[(\d{4}-\d{2}-\d{2}T|\d{2}:\d{2}:\d{2})\]/;
|
|
283
|
+
const tsMatch = trimmed.match(timestampRegex);
|
|
284
|
+
if (tsMatch) {
|
|
285
|
+
// If line already has timestamp format, just add lane prefix
|
|
286
|
+
// Check if lane label is already present to avoid triple duplication
|
|
287
|
+
if (!trimmed.includes(currentLabel)) {
|
|
288
|
+
// Insert label after the timestamp part
|
|
289
|
+
const tsPart = tsMatch[0];
|
|
290
|
+
const formatted = trimmed.replace(tsPart, `${tsPart} ${coloredLabel}`);
|
|
291
|
+
process.stdout.write(formatted + '\n');
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
process.stdout.write(trimmed + '\n');
|
|
295
|
+
}
|
|
173
296
|
}
|
|
174
297
|
else {
|
|
175
298
|
// Add full prefix: timestamp + lane
|
|
176
|
-
process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${
|
|
299
|
+
process.stdout.write(`${logger.COLORS.gray}[${new Date().toLocaleTimeString('en-US', { hour12: false })}]${logger.COLORS.reset} ${coloredLabel} ${line}\n`);
|
|
177
300
|
}
|
|
178
301
|
}
|
|
179
302
|
}
|
|
@@ -193,13 +316,15 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
193
316
|
trimmed.startsWith('HEAD is now at') ||
|
|
194
317
|
trimmed.includes('actual output');
|
|
195
318
|
const ts = new Date().toLocaleTimeString('en-US', { hour12: false });
|
|
319
|
+
const currentLabel = getDynamicLabel();
|
|
320
|
+
const coloredLabel = `${logger.COLORS.magenta}${currentLabel}${logger.COLORS.reset}`;
|
|
196
321
|
if (isStatus) {
|
|
197
|
-
process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${
|
|
322
|
+
process.stdout.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${trimmed}\n`);
|
|
198
323
|
}
|
|
199
324
|
else {
|
|
200
325
|
if (onActivity)
|
|
201
326
|
onActivity();
|
|
202
|
-
process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${
|
|
327
|
+
process.stderr.write(`${logger.COLORS.gray}[${ts}]${logger.COLORS.reset} ${coloredLabel} ${logger.COLORS.red}❌ ERR ${trimmed}${logger.COLORS.reset}\n`);
|
|
203
328
|
}
|
|
204
329
|
}
|
|
205
330
|
}
|
|
@@ -209,10 +334,11 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
209
334
|
child.on('exit', () => {
|
|
210
335
|
logManager?.close();
|
|
211
336
|
});
|
|
337
|
+
return { child, logPath, logManager, info };
|
|
212
338
|
}
|
|
213
339
|
else {
|
|
214
340
|
// Fallback to simple file logging
|
|
215
|
-
logPath = (0, path_1.safeJoin)(laneRunDir, 'terminal.log');
|
|
341
|
+
logPath = (0, path_1.safeJoin)(laneRunDir, 'terminal-readable.log');
|
|
216
342
|
const logFd = fs.openSync(logPath, 'a');
|
|
217
343
|
child = (0, child_process_1.spawn)('node', args, {
|
|
218
344
|
stdio: ['ignore', logFd, logFd],
|
|
@@ -225,8 +351,18 @@ function spawnLane({ laneName, tasksFile, laneRunDir, executor, startIndex = 0,
|
|
|
225
351
|
catch {
|
|
226
352
|
// Ignore
|
|
227
353
|
}
|
|
354
|
+
return {
|
|
355
|
+
child,
|
|
356
|
+
logPath,
|
|
357
|
+
logManager,
|
|
358
|
+
info: {
|
|
359
|
+
child,
|
|
360
|
+
logPath,
|
|
361
|
+
statePath: (0, path_1.safeJoin)(laneRunDir, 'state.json'),
|
|
362
|
+
laneIndex
|
|
363
|
+
}
|
|
364
|
+
};
|
|
228
365
|
}
|
|
229
|
-
return { child, logPath, logManager };
|
|
230
366
|
}
|
|
231
367
|
/**
|
|
232
368
|
* Wait for child process to exit
|
|
@@ -242,7 +378,7 @@ function waitChild(proc) {
|
|
|
242
378
|
});
|
|
243
379
|
}
|
|
244
380
|
/**
|
|
245
|
-
* List lane task files in directory
|
|
381
|
+
* List lane task files in directory
|
|
246
382
|
*/
|
|
247
383
|
function listLaneFiles(tasksDir) {
|
|
248
384
|
if (!fs.existsSync(tasksDir)) {
|
|
@@ -255,18 +391,9 @@ function listLaneFiles(tasksDir) {
|
|
|
255
391
|
.map(f => {
|
|
256
392
|
const filePath = (0, path_1.safeJoin)(tasksDir, f);
|
|
257
393
|
const name = path.basename(f, '.json');
|
|
258
|
-
let dependsOn = [];
|
|
259
|
-
try {
|
|
260
|
-
const config = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
261
|
-
dependsOn = config.dependsOn || [];
|
|
262
|
-
}
|
|
263
|
-
catch (e) {
|
|
264
|
-
logger.warn(`Failed to parse config for lane ${name}: ${e}`);
|
|
265
|
-
}
|
|
266
394
|
return {
|
|
267
395
|
name,
|
|
268
396
|
path: filePath,
|
|
269
|
-
dependsOn,
|
|
270
397
|
};
|
|
271
398
|
});
|
|
272
399
|
}
|
|
@@ -281,8 +408,7 @@ function printLaneStatus(lanes, laneRunDirs) {
|
|
|
281
408
|
const statePath = (0, path_1.safeJoin)(dir, 'state.json');
|
|
282
409
|
const state = (0, state_1.loadState)(statePath);
|
|
283
410
|
if (!state) {
|
|
284
|
-
|
|
285
|
-
return { lane: lane.name, status: isWaiting ? 'waiting' : 'pending', task: '-' };
|
|
411
|
+
return { lane: lane.name, status: 'pending', task: '-' };
|
|
286
412
|
}
|
|
287
413
|
const idx = (state.currentTaskIndex || 0) + 1;
|
|
288
414
|
return {
|
|
@@ -319,11 +445,11 @@ async function resolveAllDependencies(blockedLanes, allLanes, laneRunDirs, pipel
|
|
|
319
445
|
const state = (0, state_1.loadState)(statePath);
|
|
320
446
|
const worktreeDir = state?.worktreeDir || (0, path_1.safeJoin)(runRoot, 'resolution-worktree');
|
|
321
447
|
if (!fs.existsSync(worktreeDir)) {
|
|
322
|
-
logger.info(
|
|
448
|
+
logger.info(`🏗️ Creating resolution worktree at ${worktreeDir}`);
|
|
323
449
|
git.createWorktree(worktreeDir, pipelineBranch, { baseBranch: git.getCurrentBranch() });
|
|
324
450
|
}
|
|
325
451
|
// 3. Resolve on pipeline branch
|
|
326
|
-
logger.info(
|
|
452
|
+
logger.info(`🔄 Resolving dependencies on branch ${pipelineBranch}`);
|
|
327
453
|
git.runGit(['checkout', pipelineBranch], { cwd: worktreeDir });
|
|
328
454
|
for (const cmd of uniqueCommands) {
|
|
329
455
|
logger.info(`Running: ${cmd}`);
|
|
@@ -413,28 +539,9 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
413
539
|
}
|
|
414
540
|
logger.success('✓ Preflight checks passed');
|
|
415
541
|
}
|
|
416
|
-
// Validate dependencies and detect cycles
|
|
417
|
-
logger.section('📊 Dependency Analysis');
|
|
418
|
-
const depInfos = lanes.map(l => ({
|
|
419
|
-
name: l.name,
|
|
420
|
-
dependsOn: l.dependsOn,
|
|
421
|
-
}));
|
|
422
|
-
const depValidation = (0, dependency_1.validateDependencies)(depInfos);
|
|
423
|
-
if (!depValidation.valid) {
|
|
424
|
-
logger.error('❌ Dependency validation failed:');
|
|
425
|
-
for (const err of depValidation.errors) {
|
|
426
|
-
logger.error(` • ${err}`);
|
|
427
|
-
}
|
|
428
|
-
throw new Error('Invalid dependency configuration');
|
|
429
|
-
}
|
|
430
|
-
if (depValidation.warnings.length > 0) {
|
|
431
|
-
for (const warn of depValidation.warnings) {
|
|
432
|
-
logger.warn(`⚠️ ${warn}`);
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
// Print dependency graph
|
|
436
|
-
(0, dependency_1.printDependencyGraph)(depInfos);
|
|
437
542
|
const config = (0, config_1.loadConfig)();
|
|
543
|
+
// Set verbose git logging from config
|
|
544
|
+
git.setVerboseGit(config.verboseGit || false);
|
|
438
545
|
const logsDir = (0, config_1.getLogsDir)(config);
|
|
439
546
|
const runId = `run-${Date.now()}`;
|
|
440
547
|
// Use absolute path for runRoot to avoid issues with subfolders
|
|
@@ -455,16 +562,11 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
455
562
|
}
|
|
456
563
|
const randomSuffix = Math.random().toString(36).substring(2, 7);
|
|
457
564
|
const pipelineBranch = `cursorflow/run-${Date.now().toString(36)}-${randomSuffix}`;
|
|
458
|
-
//
|
|
459
|
-
const
|
|
565
|
+
// Initialize unified stall detection service (Single Source of Truth)
|
|
566
|
+
const stallService = (0, stall_detection_1.getStallService)({
|
|
460
567
|
...DEFAULT_ORCHESTRATOR_STALL_CONFIG,
|
|
461
568
|
...options.stallConfig,
|
|
462
|
-
|
|
463
|
-
// Initialize auto-recovery manager
|
|
464
|
-
const autoRecoveryManager = (0, auto_recovery_1.getAutoRecoveryManager)({
|
|
465
|
-
...auto_recovery_1.DEFAULT_AUTO_RECOVERY_CONFIG,
|
|
466
|
-
idleTimeoutMs: stallConfig.idleTimeoutMs, // Sync with stall config
|
|
467
|
-
...options.autoRecoveryConfig,
|
|
569
|
+
verbose: process.env['DEBUG_STALL'] === 'true',
|
|
468
570
|
});
|
|
469
571
|
// Initialize event system
|
|
470
572
|
events_1.events.setRunId(runId);
|
|
@@ -509,6 +611,7 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
509
611
|
fs.mkdirSync(worktreeParent, { recursive: true });
|
|
510
612
|
}
|
|
511
613
|
laneWorktreeDirs[lane.name] = laneWorktreeDir;
|
|
614
|
+
logger.info(`🏗️ Initializing lane ${lane.name}: branch=${lanePipelineBranch}`);
|
|
512
615
|
const initialState = (0, state_1.createLaneState)(lane.name, taskConfig, lane.path, {
|
|
513
616
|
pipelineBranch: lanePipelineBranch,
|
|
514
617
|
worktreeDir: laneWorktreeDir
|
|
@@ -523,19 +626,6 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
523
626
|
logger.info(`Tasks directory: ${tasksDir}`);
|
|
524
627
|
logger.info(`Run directory: ${runRoot}`);
|
|
525
628
|
logger.info(`Lanes: ${lanes.length}`);
|
|
526
|
-
// Display dependency graph
|
|
527
|
-
logger.info('\n📊 Dependency Graph:');
|
|
528
|
-
for (const lane of lanes) {
|
|
529
|
-
const deps = lane.dependsOn.length > 0 ? ` [depends on: ${lane.dependsOn.join(', ')}]` : '';
|
|
530
|
-
console.log(` ${logger.COLORS.cyan}${lane.name}${logger.COLORS.reset}${deps}`);
|
|
531
|
-
// Simple tree-like visualization for deep dependencies
|
|
532
|
-
if (lane.dependsOn.length > 0) {
|
|
533
|
-
for (const dep of lane.dependsOn) {
|
|
534
|
-
console.log(` └─ ${dep}`);
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
console.log('');
|
|
539
629
|
// Disable auto-resolve when noGit mode is enabled
|
|
540
630
|
const autoResolve = !options.noGit && options.autoResolveDependencies !== false;
|
|
541
631
|
if (options.noGit) {
|
|
@@ -565,28 +655,12 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
565
655
|
let lastStallCheck = Date.now();
|
|
566
656
|
try {
|
|
567
657
|
while (completedLanes.size + failedLanes.size + blockedLanes.size < lanes.length || (blockedLanes.size > 0 && running.size === 0)) {
|
|
568
|
-
// 1. Identify lanes ready to start
|
|
658
|
+
// 1. Identify lanes ready to start (all lanes can start immediately - no lane-level dependencies)
|
|
569
659
|
const readyToStart = lanes.filter(lane => {
|
|
570
660
|
// Not already running or completed or failed or blocked
|
|
571
661
|
if (running.has(lane.name) || completedLanes.has(lane.name) || failedLanes.has(lane.name) || blockedLanes.has(lane.name)) {
|
|
572
662
|
return false;
|
|
573
663
|
}
|
|
574
|
-
// Check dependencies
|
|
575
|
-
for (const dep of lane.dependsOn) {
|
|
576
|
-
if (failedLanes.has(dep)) {
|
|
577
|
-
logger.error(`Lane ${lane.name} will not start because dependency ${dep} failed`);
|
|
578
|
-
failedLanes.add(lane.name);
|
|
579
|
-
exitCodes[lane.name] = 1;
|
|
580
|
-
return false;
|
|
581
|
-
}
|
|
582
|
-
if (blockedLanes.has(dep)) {
|
|
583
|
-
// If a dependency is blocked, wait
|
|
584
|
-
return false;
|
|
585
|
-
}
|
|
586
|
-
if (!completedLanes.has(dep)) {
|
|
587
|
-
return false;
|
|
588
|
-
}
|
|
589
|
-
}
|
|
590
664
|
return true;
|
|
591
665
|
});
|
|
592
666
|
// 2. Spawn ready lanes up to maxConcurrent
|
|
@@ -601,22 +675,19 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
601
675
|
}
|
|
602
676
|
logger.info(`Lane started: ${lane.name}${lane.startIndex ? ` (resuming from ${lane.startIndex})` : ''}`);
|
|
603
677
|
const now = Date.now();
|
|
604
|
-
//
|
|
678
|
+
// Register lane with unified stall detection service FIRST
|
|
679
|
+
stallService.registerLane(lane.name, {
|
|
680
|
+
laneRunDir: laneRunDirs[lane.name],
|
|
681
|
+
});
|
|
682
|
+
const laneIdx = lanes.findIndex(l => l.name === lane.name);
|
|
683
|
+
// Pre-register lane in running map
|
|
605
684
|
running.set(lane.name, {
|
|
606
685
|
child: {}, // Placeholder, will be replaced below
|
|
607
686
|
logManager: undefined,
|
|
608
687
|
logPath: '',
|
|
609
|
-
lastActivity: now,
|
|
610
|
-
lastStateUpdate: now,
|
|
611
|
-
stallPhase: 0,
|
|
612
|
-
taskStartTime: now,
|
|
613
|
-
lastOutput: '',
|
|
614
688
|
statePath: laneStatePath,
|
|
615
|
-
|
|
616
|
-
lastBytesCheck: 0,
|
|
617
|
-
continueSignalsSent: 0,
|
|
689
|
+
laneIndex: laneIdx >= 0 ? laneIdx : 0,
|
|
618
690
|
});
|
|
619
|
-
let lastOutput = '';
|
|
620
691
|
const spawnResult = spawnLane({
|
|
621
692
|
laneName: lane.name,
|
|
622
693
|
tasksFile: lane.path,
|
|
@@ -627,45 +698,35 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
627
698
|
worktreeDir: laneWorktreeDirs[lane.name],
|
|
628
699
|
enhancedLogConfig: options.enhancedLogging,
|
|
629
700
|
noGit: options.noGit,
|
|
701
|
+
laneIndex: laneIdx >= 0 ? laneIdx : 0,
|
|
630
702
|
onActivity: () => {
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
const actNow = Date.now();
|
|
634
|
-
info.lastActivity = actNow;
|
|
635
|
-
info.lastStateUpdate = actNow;
|
|
636
|
-
info.stallPhase = 0;
|
|
637
|
-
}
|
|
703
|
+
// Record state file update activity
|
|
704
|
+
stallService.recordStateUpdate(lane.name);
|
|
638
705
|
}
|
|
639
706
|
});
|
|
640
707
|
// Update with actual spawn result
|
|
641
708
|
const existingInfo = running.get(lane.name);
|
|
642
|
-
Object.assign(existingInfo, spawnResult);
|
|
643
|
-
//
|
|
709
|
+
Object.assign(existingInfo, spawnResult.info);
|
|
710
|
+
// Update stall service with child process reference
|
|
711
|
+
stallService.setChildProcess(lane.name, spawnResult.child);
|
|
712
|
+
// Track stdout for activity detection - delegate to StallDetectionService
|
|
644
713
|
if (spawnResult.child.stdout) {
|
|
645
714
|
spawnResult.child.stdout.on('data', (data) => {
|
|
646
|
-
const
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
autoRecoveryManager.recordActivity(lane.name, data.length, info.lastOutput);
|
|
659
|
-
}
|
|
660
|
-
else if (lines.length > 0) {
|
|
661
|
-
// Only heartbeats received - update auto-recovery manager with 0 bytes to avoid resetting idle timer
|
|
662
|
-
autoRecoveryManager.recordActivity(lane.name, 0, info.lastOutput);
|
|
663
|
-
}
|
|
715
|
+
const output = data.toString();
|
|
716
|
+
const lines = output.split('\n').filter(l => l.trim());
|
|
717
|
+
// Filter out heartbeats from activity tracking
|
|
718
|
+
const realLines = lines.filter(line => !(line.includes('Heartbeat') && line.includes('bytes received')));
|
|
719
|
+
if (realLines.length > 0) {
|
|
720
|
+
// Real activity - record with bytes
|
|
721
|
+
const lastRealLine = realLines[realLines.length - 1];
|
|
722
|
+
stallService.recordActivity(lane.name, data.length, lastRealLine);
|
|
723
|
+
}
|
|
724
|
+
else if (lines.length > 0) {
|
|
725
|
+
// Heartbeat only - record with 0 bytes (won't reset timer)
|
|
726
|
+
stallService.recordActivity(lane.name, 0);
|
|
664
727
|
}
|
|
665
728
|
});
|
|
666
729
|
}
|
|
667
|
-
// Register lane with auto-recovery manager
|
|
668
|
-
autoRecoveryManager.registerLane(lane.name);
|
|
669
730
|
// Update lane tracking
|
|
670
731
|
lane.taskStartTime = now;
|
|
671
732
|
events_1.events.emit('lane.started', {
|
|
@@ -691,212 +752,35 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
691
752
|
const now = Date.now();
|
|
692
753
|
if (result.name === '__poll__' || (now - lastStallCheck >= 10000)) {
|
|
693
754
|
lastStallCheck = now;
|
|
694
|
-
// Periodic stall check
|
|
755
|
+
// Periodic stall check using unified StallDetectionService
|
|
695
756
|
for (const [laneName, info] of running.entries()) {
|
|
696
|
-
const idleTime = now - info.lastActivity;
|
|
697
757
|
const lane = lanes.find(l => l.name === laneName);
|
|
698
|
-
if (process.env['DEBUG_STALL']) {
|
|
699
|
-
logger.debug(`[${laneName}] Stall check: idle=${Math.round(idleTime / 1000)}s, bytesDelta=${info.bytesReceived - info.lastBytesCheck}, phase=${info.stallPhase}`);
|
|
700
|
-
}
|
|
701
758
|
// Check state file for progress updates
|
|
702
|
-
let progressTime = 0;
|
|
703
759
|
try {
|
|
704
760
|
const stateStat = fs.statSync(info.statePath);
|
|
705
|
-
const
|
|
706
|
-
if (
|
|
707
|
-
|
|
761
|
+
const stallState = stallService.getState(laneName);
|
|
762
|
+
if (stallState && stateStat.mtimeMs > stallState.lastStateUpdateTime) {
|
|
763
|
+
stallService.recordStateUpdate(laneName);
|
|
708
764
|
}
|
|
709
|
-
progressTime = now - info.lastStateUpdate;
|
|
710
765
|
}
|
|
711
766
|
catch {
|
|
712
767
|
// State file might not exist yet
|
|
713
768
|
}
|
|
714
|
-
//
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
lastOutput: info.lastOutput,
|
|
723
|
-
restartCount: lane.restartCount || 0,
|
|
724
|
-
taskStartTimeMs: info.taskStartTime,
|
|
725
|
-
bytesReceived: bytesDelta, // Bytes since last check
|
|
726
|
-
continueSignalsSent: info.continueSignalsSent,
|
|
727
|
-
}, stallConfig);
|
|
728
|
-
// Only act if action is not NONE
|
|
729
|
-
if (analysis.action !== failure_policy_1.RecoveryAction.NONE) {
|
|
730
|
-
(0, failure_policy_1.logFailure)(laneName, analysis);
|
|
769
|
+
// Debug logging
|
|
770
|
+
if (process.env['DEBUG_STALL']) {
|
|
771
|
+
logger.debug(`[${laneName}] ${stallService.dumpState(laneName)}`);
|
|
772
|
+
}
|
|
773
|
+
// Run stall analysis and recovery (all logic is in StallDetectionService)
|
|
774
|
+
const analysis = stallService.checkAndRecover(laneName);
|
|
775
|
+
// Log to lane log manager if there was an action
|
|
776
|
+
if (analysis.action !== stall_detection_1.RecoveryAction.NONE) {
|
|
731
777
|
info.logManager?.log('error', analysis.message);
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
fs.writeFileSync(interventionPath, 'continue');
|
|
736
|
-
info.stallPhase = 1;
|
|
737
|
-
info.lastActivity = now;
|
|
738
|
-
info.continueSignalsSent++;
|
|
739
|
-
logger.info(`[${laneName}] Sent continue signal (#${info.continueSignalsSent})`);
|
|
740
|
-
events_1.events.emit('recovery.continue_signal', {
|
|
741
|
-
laneName,
|
|
742
|
-
idleSeconds: Math.round(idleTime / 1000),
|
|
743
|
-
signalCount: info.continueSignalsSent,
|
|
744
|
-
});
|
|
745
|
-
}
|
|
746
|
-
catch (e) {
|
|
747
|
-
logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
|
|
748
|
-
}
|
|
749
|
-
}
|
|
750
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.STRONGER_PROMPT) {
|
|
751
|
-
const interventionPath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'intervention.txt');
|
|
752
|
-
const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck. Please continue with your current task immediately. If you're waiting for something, explain what you need and proceed with what you can do now. If you've completed the task, summarize your work and finish.`;
|
|
753
|
-
try {
|
|
754
|
-
fs.writeFileSync(interventionPath, strongerPrompt);
|
|
755
|
-
info.stallPhase = 2;
|
|
756
|
-
info.lastActivity = now;
|
|
757
|
-
logger.warn(`[${laneName}] Sent stronger prompt after continue signal failed`);
|
|
758
|
-
events_1.events.emit('recovery.stronger_prompt', { laneName });
|
|
759
|
-
}
|
|
760
|
-
catch (e) {
|
|
761
|
-
logger.error(`Failed to write intervention file for ${laneName}: ${e}`);
|
|
762
|
-
}
|
|
763
|
-
}
|
|
764
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.KILL_AND_RESTART ||
|
|
765
|
-
analysis.action === failure_policy_1.RecoveryAction.RESTART_LANE ||
|
|
766
|
-
analysis.action === failure_policy_1.RecoveryAction.RESTART_LANE_FROM_CHECKPOINT) {
|
|
767
|
-
lane.restartCount = (lane.restartCount || 0) + 1;
|
|
768
|
-
info.stallPhase = 3;
|
|
769
|
-
// Try to get checkpoint info
|
|
770
|
-
const checkpoint = (0, checkpoint_1.getLatestCheckpoint)(laneRunDirs[laneName]);
|
|
771
|
-
if (checkpoint) {
|
|
772
|
-
logger.info(`[${laneName}] Checkpoint available: ${checkpoint.id} (task ${checkpoint.taskIndex})`);
|
|
773
|
-
}
|
|
774
|
-
// Kill the process
|
|
775
|
-
try {
|
|
776
|
-
info.child.kill('SIGKILL');
|
|
777
|
-
}
|
|
778
|
-
catch {
|
|
779
|
-
// Process might already be dead
|
|
780
|
-
}
|
|
781
|
-
logger.warn(`[${laneName}] Killing and restarting lane (restart #${lane.restartCount})`);
|
|
782
|
-
events_1.events.emit('recovery.restart', {
|
|
783
|
-
laneName,
|
|
784
|
-
restartCount: lane.restartCount,
|
|
785
|
-
maxRestarts: stallConfig.maxRestarts,
|
|
786
|
-
});
|
|
787
|
-
}
|
|
788
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.RUN_DOCTOR) {
|
|
789
|
-
info.stallPhase = 4;
|
|
790
|
-
// Run diagnostics
|
|
791
|
-
logger.error(`[${laneName}] Running diagnostics due to persistent failures...`);
|
|
792
|
-
// Import health check dynamically to avoid circular dependency
|
|
793
|
-
const { checkAgentHealth, checkAuthHealth } = await Promise.resolve().then(() => __importStar(require('../utils/health')));
|
|
794
|
-
const [agentHealth, authHealth] = await Promise.all([
|
|
795
|
-
checkAgentHealth(),
|
|
796
|
-
checkAuthHealth(),
|
|
797
|
-
]);
|
|
798
|
-
const issues = [];
|
|
799
|
-
if (!agentHealth.ok)
|
|
800
|
-
issues.push(`Agent: ${agentHealth.message}`);
|
|
801
|
-
if (!authHealth.ok)
|
|
802
|
-
issues.push(`Auth: ${authHealth.message}`);
|
|
803
|
-
if (issues.length > 0) {
|
|
804
|
-
logger.error(`[${laneName}] Diagnostic issues found:\n ${issues.join('\n ')}`);
|
|
805
|
-
}
|
|
806
|
-
else {
|
|
807
|
-
logger.warn(`[${laneName}] No obvious issues found. The problem may be with the AI model or network.`);
|
|
808
|
-
}
|
|
809
|
-
// Save diagnostic to file
|
|
810
|
-
const diagnosticPath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'diagnostic.json');
|
|
811
|
-
fs.writeFileSync(diagnosticPath, JSON.stringify({
|
|
812
|
-
timestamp: Date.now(),
|
|
813
|
-
agentHealthy: agentHealth.ok,
|
|
814
|
-
authHealthy: authHealth.ok,
|
|
815
|
-
issues,
|
|
816
|
-
analysis,
|
|
817
|
-
}, null, 2));
|
|
818
|
-
// Kill the process
|
|
819
|
-
try {
|
|
820
|
-
info.child.kill('SIGKILL');
|
|
821
|
-
}
|
|
822
|
-
catch {
|
|
823
|
-
// Process might already be dead
|
|
824
|
-
}
|
|
825
|
-
logger.error(`[${laneName}] Aborting lane after diagnostic. Check ${diagnosticPath} for details.`);
|
|
826
|
-
// Save POF for failed recovery
|
|
827
|
-
const recoveryState = autoRecoveryManager.getState(laneName);
|
|
828
|
-
if (recoveryState) {
|
|
829
|
-
try {
|
|
830
|
-
const laneStatePath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'state.json');
|
|
831
|
-
const laneState = (0, state_1.loadState)(laneStatePath);
|
|
832
|
-
const pofDir = (0, path_1.safeJoin)(runRoot, '..', '..', 'pof');
|
|
833
|
-
const diagnosticInfo = {
|
|
834
|
-
timestamp: Date.now(),
|
|
835
|
-
agentHealthy: agentHealth.ok,
|
|
836
|
-
authHealthy: authHealth.ok,
|
|
837
|
-
systemHealthy: true,
|
|
838
|
-
suggestedAction: issues.length > 0 ? 'Fix the issues above and retry' : 'Try with a different model',
|
|
839
|
-
details: issues.join('\n') || 'No obvious issues found',
|
|
840
|
-
};
|
|
841
|
-
const pofEntry = (0, auto_recovery_1.createPOFFromRecoveryState)(runId, runRoot, laneName, recoveryState, laneState, diagnosticInfo);
|
|
842
|
-
(0, auto_recovery_1.savePOF)(runId, pofDir, pofEntry);
|
|
843
|
-
}
|
|
844
|
-
catch (pofError) {
|
|
845
|
-
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
846
|
-
}
|
|
847
|
-
}
|
|
848
|
-
events_1.events.emit('recovery.diagnosed', {
|
|
849
|
-
laneName,
|
|
850
|
-
diagnostic: { agentHealthy: agentHealth.ok, authHealthy: authHealth.ok, issues },
|
|
851
|
-
});
|
|
852
|
-
}
|
|
853
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.ABORT_LANE) {
|
|
854
|
-
info.stallPhase = 5;
|
|
855
|
-
try {
|
|
856
|
-
info.child.kill('SIGKILL');
|
|
857
|
-
}
|
|
858
|
-
catch {
|
|
859
|
-
// Process might already be dead
|
|
860
|
-
}
|
|
861
|
-
logger.error(`[${laneName}] Aborting lane due to repeated stalls`);
|
|
862
|
-
// Save POF for failed recovery
|
|
863
|
-
const recoveryState = autoRecoveryManager.getState(laneName);
|
|
864
|
-
if (recoveryState) {
|
|
865
|
-
try {
|
|
866
|
-
const laneStatePath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'state.json');
|
|
867
|
-
const laneState = (0, state_1.loadState)(laneStatePath);
|
|
868
|
-
const pofDir = (0, path_1.safeJoin)(runRoot, '..', '..', 'pof');
|
|
869
|
-
const pofEntry = (0, auto_recovery_1.createPOFFromRecoveryState)(runId, runRoot, laneName, recoveryState, laneState, recoveryState.diagnosticInfo);
|
|
870
|
-
(0, auto_recovery_1.savePOF)(runId, pofDir, pofEntry);
|
|
871
|
-
}
|
|
872
|
-
catch (pofError) {
|
|
873
|
-
logger.warn(`[${laneName}] Failed to save POF: ${pofError.message}`);
|
|
874
|
-
}
|
|
875
|
-
}
|
|
876
|
-
}
|
|
877
|
-
else if (analysis.action === failure_policy_1.RecoveryAction.SEND_GIT_GUIDANCE) {
|
|
878
|
-
// Send guidance message to agent for git issues
|
|
879
|
-
const interventionPath = (0, path_1.safeJoin)(laneRunDirs[laneName], 'intervention.txt');
|
|
880
|
-
// Determine which guidance to send based on the failure type
|
|
881
|
-
let guidance;
|
|
882
|
-
if (analysis.type === failure_policy_1.FailureType.GIT_PUSH_REJECTED) {
|
|
883
|
-
guidance = (0, auto_recovery_1.getGitPushFailureGuidance)();
|
|
884
|
-
}
|
|
885
|
-
else if (analysis.type === failure_policy_1.FailureType.MERGE_CONFLICT) {
|
|
886
|
-
guidance = (0, auto_recovery_1.getMergeConflictGuidance)();
|
|
887
|
-
}
|
|
888
|
-
else {
|
|
889
|
-
guidance = (0, auto_recovery_1.getGitErrorGuidance)(analysis.message);
|
|
890
|
-
}
|
|
891
|
-
try {
|
|
892
|
-
fs.writeFileSync(interventionPath, guidance);
|
|
893
|
-
info.lastActivity = now;
|
|
894
|
-
logger.info(`[${laneName}] Sent git issue guidance to agent`);
|
|
895
|
-
}
|
|
896
|
-
catch (e) {
|
|
897
|
-
logger.error(`[${laneName}] Failed to send guidance: ${e.message}`);
|
|
898
|
-
}
|
|
778
|
+
// Handle special case: RUN_DOCTOR needs async operations
|
|
779
|
+
if (analysis.action === stall_detection_1.RecoveryAction.RUN_DOCTOR) {
|
|
780
|
+
await handleDoctorDiagnostics(laneName, laneRunDirs[laneName], runId, runRoot, stallService, info.child);
|
|
899
781
|
}
|
|
782
|
+
// Sync restartCount back to lane info (for restart logic in process exit handler)
|
|
783
|
+
lane.restartCount = stallService.getRestartCount(laneName);
|
|
900
784
|
}
|
|
901
785
|
}
|
|
902
786
|
continue;
|
|
@@ -906,8 +790,10 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
906
790
|
const info = running.get(finished.name);
|
|
907
791
|
running.delete(finished.name);
|
|
908
792
|
exitCodes[finished.name] = finished.code;
|
|
909
|
-
//
|
|
910
|
-
|
|
793
|
+
// Get stall state before unregistering
|
|
794
|
+
const stallPhase = stallService.getPhase(finished.name);
|
|
795
|
+
// Unregister from stall detection service
|
|
796
|
+
stallService.unregisterLane(finished.name);
|
|
911
797
|
if (finished.code === 0) {
|
|
912
798
|
completedLanes.add(finished.name);
|
|
913
799
|
events_1.events.emit('lane.completed', {
|
|
@@ -937,8 +823,8 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
937
823
|
}
|
|
938
824
|
}
|
|
939
825
|
else {
|
|
940
|
-
// Check if it was a restart request
|
|
941
|
-
if (
|
|
826
|
+
// Check if it was a restart request (RESTART_REQUESTED phase)
|
|
827
|
+
if (stallPhase === stall_detection_1.StallPhase.RESTART_REQUESTED) {
|
|
942
828
|
logger.info(`🔄 Lane ${finished.name} is being restarted due to stall...`);
|
|
943
829
|
// Update startIndex from current state to resume from the same task
|
|
944
830
|
const statePath = (0, path_1.safeJoin)(laneRunDirs[finished.name], 'state.json');
|
|
@@ -955,7 +841,7 @@ async function orchestrate(tasksDir, options = {}) {
|
|
|
955
841
|
}
|
|
956
842
|
failedLanes.add(finished.name);
|
|
957
843
|
let errorMsg = 'Process exited with non-zero code';
|
|
958
|
-
if (
|
|
844
|
+
if (stallPhase >= stall_detection_1.StallPhase.DIAGNOSED) {
|
|
959
845
|
errorMsg = 'Stopped due to repeated stall';
|
|
960
846
|
}
|
|
961
847
|
else if (info.logManager) {
|