@litmers/cursorflow-orchestrator 0.1.40 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +0 -2
- package/README.md +8 -3
- package/commands/cursorflow-init.md +0 -4
- package/dist/cli/index.js +0 -6
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/logs.js +108 -9
- package/dist/cli/logs.js.map +1 -1
- package/dist/cli/models.js +20 -3
- package/dist/cli/models.js.map +1 -1
- package/dist/cli/monitor.d.ts +7 -10
- package/dist/cli/monitor.js +1103 -1239
- package/dist/cli/monitor.js.map +1 -1
- package/dist/cli/resume.js +21 -1
- package/dist/cli/resume.js.map +1 -1
- package/dist/cli/run.js +28 -9
- package/dist/cli/run.js.map +1 -1
- package/dist/cli/signal.d.ts +6 -1
- package/dist/cli/signal.js +99 -13
- package/dist/cli/signal.js.map +1 -1
- package/dist/cli/tasks.js +3 -46
- package/dist/cli/tasks.js.map +1 -1
- package/dist/core/agent-supervisor.d.ts +23 -0
- package/dist/core/agent-supervisor.js +42 -0
- package/dist/core/agent-supervisor.js.map +1 -0
- package/dist/core/auto-recovery.d.ts +3 -117
- package/dist/core/auto-recovery.js +4 -482
- package/dist/core/auto-recovery.js.map +1 -1
- package/dist/core/failure-policy.d.ts +0 -53
- package/dist/core/failure-policy.js +7 -175
- package/dist/core/failure-policy.js.map +1 -1
- package/dist/core/git-lifecycle-manager.d.ts +284 -0
- package/dist/core/git-lifecycle-manager.js +778 -0
- package/dist/core/git-lifecycle-manager.js.map +1 -0
- package/dist/core/git-pipeline-coordinator.d.ts +21 -0
- package/dist/core/git-pipeline-coordinator.js +205 -0
- package/dist/core/git-pipeline-coordinator.js.map +1 -0
- package/dist/core/intervention.d.ts +170 -0
- package/dist/core/intervention.js +408 -0
- package/dist/core/intervention.js.map +1 -0
- package/dist/core/lane-state-machine.d.ts +423 -0
- package/dist/core/lane-state-machine.js +890 -0
- package/dist/core/lane-state-machine.js.map +1 -0
- package/dist/core/orchestrator.d.ts +4 -1
- package/dist/core/orchestrator.js +39 -65
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/runner/agent.d.ts +7 -1
- package/dist/core/runner/agent.js +54 -36
- package/dist/core/runner/agent.js.map +1 -1
- package/dist/core/runner/pipeline.js +283 -123
- package/dist/core/runner/pipeline.js.map +1 -1
- package/dist/core/runner/task.d.ts +4 -5
- package/dist/core/runner/task.js +6 -80
- package/dist/core/runner/task.js.map +1 -1
- package/dist/core/runner.js +8 -2
- package/dist/core/runner.js.map +1 -1
- package/dist/core/stall-detection.d.ts +11 -4
- package/dist/core/stall-detection.js +64 -27
- package/dist/core/stall-detection.js.map +1 -1
- package/dist/hooks/contexts/index.d.ts +104 -0
- package/dist/hooks/contexts/index.js +134 -0
- package/dist/hooks/contexts/index.js.map +1 -0
- package/dist/hooks/data-accessor.d.ts +86 -0
- package/dist/hooks/data-accessor.js +410 -0
- package/dist/hooks/data-accessor.js.map +1 -0
- package/dist/hooks/flow-controller.d.ts +136 -0
- package/dist/hooks/flow-controller.js +351 -0
- package/dist/hooks/flow-controller.js.map +1 -0
- package/dist/hooks/index.d.ts +68 -0
- package/dist/hooks/index.js +105 -0
- package/dist/hooks/index.js.map +1 -0
- package/dist/hooks/manager.d.ts +129 -0
- package/dist/hooks/manager.js +389 -0
- package/dist/hooks/manager.js.map +1 -0
- package/dist/hooks/types.d.ts +463 -0
- package/dist/hooks/types.js +45 -0
- package/dist/hooks/types.js.map +1 -0
- package/dist/services/logging/buffer.d.ts +2 -2
- package/dist/services/logging/buffer.js +95 -42
- package/dist/services/logging/buffer.js.map +1 -1
- package/dist/services/logging/console.js +6 -1
- package/dist/services/logging/console.js.map +1 -1
- package/dist/services/logging/formatter.d.ts +9 -4
- package/dist/services/logging/formatter.js +64 -18
- package/dist/services/logging/formatter.js.map +1 -1
- package/dist/services/logging/index.d.ts +0 -1
- package/dist/services/logging/index.js +0 -1
- package/dist/services/logging/index.js.map +1 -1
- package/dist/services/logging/paths.d.ts +8 -0
- package/dist/services/logging/paths.js +48 -0
- package/dist/services/logging/paths.js.map +1 -0
- package/dist/services/logging/raw-log.d.ts +6 -0
- package/dist/services/logging/raw-log.js +37 -0
- package/dist/services/logging/raw-log.js.map +1 -0
- package/dist/services/process/index.js +1 -1
- package/dist/services/process/index.js.map +1 -1
- package/dist/types/agent.d.ts +15 -0
- package/dist/types/config.d.ts +22 -1
- package/dist/types/event-categories.d.ts +601 -0
- package/dist/types/event-categories.js +233 -0
- package/dist/types/event-categories.js.map +1 -0
- package/dist/types/events.d.ts +0 -20
- package/dist/types/flow.d.ts +10 -6
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.js +17 -3
- package/dist/types/index.js.map +1 -1
- package/dist/types/lane.d.ts +1 -1
- package/dist/types/logging.d.ts +1 -1
- package/dist/types/task.d.ts +12 -1
- package/dist/ui/log-viewer.d.ts +3 -0
- package/dist/ui/log-viewer.js +3 -0
- package/dist/ui/log-viewer.js.map +1 -1
- package/dist/utils/config.js +10 -1
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/cursor-agent.d.ts +11 -1
- package/dist/utils/cursor-agent.js +63 -16
- package/dist/utils/cursor-agent.js.map +1 -1
- package/dist/utils/enhanced-logger.d.ts +5 -1
- package/dist/utils/enhanced-logger.js +98 -19
- package/dist/utils/enhanced-logger.js.map +1 -1
- package/dist/utils/event-registry.d.ts +222 -0
- package/dist/utils/event-registry.js +463 -0
- package/dist/utils/event-registry.js.map +1 -0
- package/dist/utils/events.d.ts +1 -13
- package/dist/utils/events.js.map +1 -1
- package/dist/utils/flow.d.ts +10 -0
- package/dist/utils/flow.js +75 -0
- package/dist/utils/flow.js.map +1 -1
- package/dist/utils/log-constants.d.ts +1 -0
- package/dist/utils/log-constants.js +2 -1
- package/dist/utils/log-constants.js.map +1 -1
- package/dist/utils/log-formatter.d.ts +2 -1
- package/dist/utils/log-formatter.js +10 -10
- package/dist/utils/log-formatter.js.map +1 -1
- package/dist/utils/logger.d.ts +11 -0
- package/dist/utils/logger.js +82 -3
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/repro-thinking-logs.js +0 -13
- package/dist/utils/repro-thinking-logs.js.map +1 -1
- package/dist/utils/run-service.js +1 -1
- package/dist/utils/run-service.js.map +1 -1
- package/examples/README.md +0 -2
- package/examples/demo-project/README.md +1 -2
- package/package.json +13 -34
- package/scripts/setup-security.sh +0 -1
- package/scripts/test-log-parser.ts +171 -0
- package/scripts/verify-change.sh +272 -0
- package/src/cli/index.ts +0 -6
- package/src/cli/logs.ts +121 -10
- package/src/cli/models.ts +20 -3
- package/src/cli/monitor.ts +1273 -1342
- package/src/cli/resume.ts +27 -1
- package/src/cli/run.ts +29 -11
- package/src/cli/signal.ts +120 -18
- package/src/cli/tasks.ts +2 -59
- package/src/core/agent-supervisor.ts +64 -0
- package/src/core/auto-recovery.ts +14 -590
- package/src/core/failure-policy.ts +7 -229
- package/src/core/git-lifecycle-manager.ts +1011 -0
- package/src/core/git-pipeline-coordinator.ts +221 -0
- package/src/core/intervention.ts +463 -0
- package/src/core/lane-state-machine.ts +1097 -0
- package/src/core/orchestrator.ts +48 -64
- package/src/core/runner/agent.ts +77 -39
- package/src/core/runner/pipeline.ts +318 -138
- package/src/core/runner/task.ts +12 -97
- package/src/core/runner.ts +8 -2
- package/src/core/stall-detection.ts +74 -27
- package/src/hooks/contexts/index.ts +256 -0
- package/src/hooks/data-accessor.ts +488 -0
- package/src/hooks/flow-controller.ts +425 -0
- package/src/hooks/index.ts +154 -0
- package/src/hooks/manager.ts +434 -0
- package/src/hooks/types.ts +544 -0
- package/src/services/logging/buffer.ts +104 -43
- package/src/services/logging/console.ts +7 -1
- package/src/services/logging/formatter.ts +74 -18
- package/src/services/logging/index.ts +0 -2
- package/src/services/logging/paths.ts +14 -0
- package/src/services/logging/raw-log.ts +43 -0
- package/src/services/process/index.ts +1 -1
- package/src/types/agent.ts +15 -0
- package/src/types/config.ts +23 -1
- package/src/types/event-categories.ts +663 -0
- package/src/types/events.ts +0 -25
- package/src/types/flow.ts +10 -6
- package/src/types/index.ts +50 -4
- package/src/types/lane.ts +1 -2
- package/src/types/logging.ts +2 -1
- package/src/types/task.ts +12 -1
- package/src/ui/log-viewer.ts +3 -0
- package/src/utils/config.ts +11 -1
- package/src/utils/cursor-agent.ts +68 -16
- package/src/utils/enhanced-logger.ts +105 -19
- package/src/utils/event-registry.ts +595 -0
- package/src/utils/events.ts +0 -16
- package/src/utils/flow.ts +83 -0
- package/src/utils/log-constants.ts +2 -1
- package/src/utils/log-formatter.ts +10 -11
- package/src/utils/logger.ts +49 -3
- package/src/utils/repro-thinking-logs.ts +0 -15
- package/src/utils/run-service.ts +1 -1
- package/dist/cli/prepare.d.ts +0 -7
- package/dist/cli/prepare.js +0 -690
- package/dist/cli/prepare.js.map +0 -1
- package/dist/services/logging/file-writer.d.ts +0 -71
- package/dist/services/logging/file-writer.js +0 -516
- package/dist/services/logging/file-writer.js.map +0 -1
- package/dist/types/review.d.ts +0 -17
- package/dist/types/review.js +0 -6
- package/dist/types/review.js.map +0 -1
- package/scripts/ai-security-check.js +0 -233
- package/src/cli/prepare.ts +0 -777
- package/src/services/logging/file-writer.ts +0 -526
- package/src/types/review.ts +0 -20
|
@@ -17,74 +17,23 @@ import { LaneState } from '../utils/types';
|
|
|
17
17
|
import { events } from '../utils/events';
|
|
18
18
|
import { safeJoin } from '../utils/path';
|
|
19
19
|
import { runHealthCheck, checkAgentHealth, checkAuthHealth } from '../utils/health';
|
|
20
|
+
import {
|
|
21
|
+
createInterventionRequest,
|
|
22
|
+
InterventionType,
|
|
23
|
+
createContinueMessage,
|
|
24
|
+
createStrongerPromptMessage,
|
|
25
|
+
createRestartMessage,
|
|
26
|
+
} from './intervention';
|
|
20
27
|
|
|
21
28
|
// ============================================================================
|
|
22
29
|
// Types & Constants
|
|
23
30
|
// ============================================================================
|
|
24
31
|
|
|
25
|
-
/** Recovery stages for escalating interventions */
|
|
26
|
-
export enum RecoveryStage {
|
|
27
|
-
/** Normal operation - monitoring */
|
|
28
|
-
NORMAL = 0,
|
|
29
|
-
/** First intervention - send continue signal */
|
|
30
|
-
CONTINUE_SIGNAL = 1,
|
|
31
|
-
/** Second intervention - send stronger prompt */
|
|
32
|
-
STRONGER_PROMPT = 2,
|
|
33
|
-
/** Third intervention - kill and restart process */
|
|
34
|
-
RESTART_PROCESS = 3,
|
|
35
|
-
/** Final stage - run doctor and report */
|
|
36
|
-
DIAGNOSE = 4,
|
|
37
|
-
/** No more recovery possible */
|
|
38
|
-
ABORT = 5,
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
/** Configuration for auto-recovery behavior */
|
|
42
|
-
export interface AutoRecoveryConfig {
|
|
43
|
-
/** Time without activity before sending continue signal (default: 2 minutes) */
|
|
44
|
-
idleTimeoutMs: number;
|
|
45
|
-
/** Time to wait after continue signal before escalating (default: 2 minutes) */
|
|
46
|
-
continueGraceMs: number;
|
|
47
|
-
/** Time to wait after stronger prompt before escalating (default: 2 minutes) */
|
|
48
|
-
strongerPromptGraceMs: number;
|
|
49
|
-
/** Maximum number of restarts before aborting (default: 2) */
|
|
50
|
-
maxRestarts: number;
|
|
51
|
-
/** Whether to run doctor on persistent failures (default: true) */
|
|
52
|
-
runDoctorOnFailure: boolean;
|
|
53
|
-
/** Patterns indicating long-running operations (won't trigger idle) */
|
|
54
|
-
longOperationPatterns: RegExp[];
|
|
55
|
-
/** Grace period for long operations (default: 10 minutes) */
|
|
56
|
-
longOperationGraceMs: number;
|
|
57
|
-
/** Enable verbose logging */
|
|
58
|
-
verbose: boolean;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
/** Default auto-recovery configuration */
|
|
62
|
-
export const DEFAULT_AUTO_RECOVERY_CONFIG: AutoRecoveryConfig = {
|
|
63
|
-
idleTimeoutMs: 2 * 60 * 1000, // 2 minutes - idle detection
|
|
64
|
-
continueGraceMs: 2 * 60 * 1000, // 2 minutes after continue
|
|
65
|
-
strongerPromptGraceMs: 2 * 60 * 1000, // 2 minutes after stronger prompt
|
|
66
|
-
maxRestarts: 2,
|
|
67
|
-
runDoctorOnFailure: true,
|
|
68
|
-
longOperationPatterns: [
|
|
69
|
-
/installing\s+dependencies/i,
|
|
70
|
-
/npm\s+(i|install|ci)/i,
|
|
71
|
-
/pnpm\s+(i|install)/i,
|
|
72
|
-
/yarn\s+(install)?/i,
|
|
73
|
-
/building/i,
|
|
74
|
-
/compiling/i,
|
|
75
|
-
/bundling/i,
|
|
76
|
-
/downloading/i,
|
|
77
|
-
/fetching/i,
|
|
78
|
-
/cloning/i,
|
|
79
|
-
],
|
|
80
|
-
longOperationGraceMs: 10 * 60 * 1000, // 10 minutes for long ops
|
|
81
|
-
verbose: false,
|
|
82
|
-
};
|
|
83
|
-
|
|
84
32
|
/** State tracking for a single lane's recovery */
|
|
85
33
|
export interface LaneRecoveryState {
|
|
86
34
|
laneName: string;
|
|
87
|
-
|
|
35
|
+
runId: string;
|
|
36
|
+
stage: number;
|
|
88
37
|
lastActivityTime: number;
|
|
89
38
|
lastBytesReceived: number;
|
|
90
39
|
totalBytesReceived: number;
|
|
@@ -107,20 +56,10 @@ export interface DiagnosticInfo {
|
|
|
107
56
|
details: string;
|
|
108
57
|
}
|
|
109
58
|
|
|
110
|
-
/** Recovery action result */
|
|
111
|
-
export interface RecoveryActionResult {
|
|
112
|
-
success: boolean;
|
|
113
|
-
action: string;
|
|
114
|
-
message: string;
|
|
115
|
-
shouldContinue: boolean;
|
|
116
|
-
nextStage?: RecoveryStage;
|
|
117
|
-
diagnostic?: DiagnosticInfo;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
59
|
/** Record of a failure for POF */
|
|
121
60
|
export interface FailureRecord {
|
|
122
61
|
timestamp: number;
|
|
123
|
-
stage:
|
|
62
|
+
stage: number;
|
|
124
63
|
action: string;
|
|
125
64
|
message: string;
|
|
126
65
|
idleTimeMs: number;
|
|
@@ -224,505 +163,7 @@ ${errorMessage}
|
|
|
224
163
|
}
|
|
225
164
|
|
|
226
165
|
// ============================================================================
|
|
227
|
-
//
|
|
228
|
-
// ============================================================================
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Manages recovery state for all lanes
|
|
232
|
-
*/
|
|
233
|
-
export class AutoRecoveryManager {
|
|
234
|
-
private config: AutoRecoveryConfig;
|
|
235
|
-
private laneStates: Map<string, LaneRecoveryState> = new Map();
|
|
236
|
-
private eventHandlers: Map<string, () => void> = new Map();
|
|
237
|
-
|
|
238
|
-
constructor(config: Partial<AutoRecoveryConfig> = {}) {
|
|
239
|
-
this.config = { ...DEFAULT_AUTO_RECOVERY_CONFIG, ...config };
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* Register a lane for recovery monitoring
|
|
244
|
-
*/
|
|
245
|
-
registerLane(laneName: string): void {
|
|
246
|
-
const now = Date.now();
|
|
247
|
-
this.laneStates.set(laneName, {
|
|
248
|
-
laneName,
|
|
249
|
-
stage: RecoveryStage.NORMAL,
|
|
250
|
-
lastActivityTime: now,
|
|
251
|
-
lastBytesReceived: 0,
|
|
252
|
-
totalBytesReceived: 0,
|
|
253
|
-
lastOutput: '',
|
|
254
|
-
restartCount: 0,
|
|
255
|
-
continueSignalsSent: 0,
|
|
256
|
-
lastStageChangeTime: now,
|
|
257
|
-
isLongOperation: false,
|
|
258
|
-
failureHistory: [],
|
|
259
|
-
});
|
|
260
|
-
|
|
261
|
-
if (this.config.verbose) {
|
|
262
|
-
logger.info(`[AutoRecovery] Registered lane: ${laneName}`);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* Unregister a lane from recovery monitoring
|
|
268
|
-
*/
|
|
269
|
-
unregisterLane(laneName: string): void {
|
|
270
|
-
this.laneStates.delete(laneName);
|
|
271
|
-
|
|
272
|
-
const handler = this.eventHandlers.get(laneName);
|
|
273
|
-
if (handler) {
|
|
274
|
-
this.eventHandlers.delete(laneName);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
/**
|
|
279
|
-
* Record activity for a lane
|
|
280
|
-
*/
|
|
281
|
-
recordActivity(laneName: string, bytesReceived: number = 0, output?: string): void {
|
|
282
|
-
const state = this.laneStates.get(laneName);
|
|
283
|
-
if (!state) return;
|
|
284
|
-
|
|
285
|
-
const now = Date.now();
|
|
286
|
-
|
|
287
|
-
// Only update activity time if we actually received bytes
|
|
288
|
-
// This allows heartbeats to be recorded (for logs/bytes) without resetting the idle timer
|
|
289
|
-
if (bytesReceived > 0) {
|
|
290
|
-
state.lastActivityTime = now;
|
|
291
|
-
state.lastBytesReceived = bytesReceived;
|
|
292
|
-
state.totalBytesReceived += bytesReceived;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
if (output) {
|
|
296
|
-
state.lastOutput = output;
|
|
297
|
-
// Check if this is a long operation
|
|
298
|
-
state.isLongOperation = this.config.longOperationPatterns.some(p => p.test(output));
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
// Reset stage if we got meaningful activity
|
|
302
|
-
if (bytesReceived > 0 && state.stage !== RecoveryStage.NORMAL) {
|
|
303
|
-
if (this.config.verbose) {
|
|
304
|
-
logger.info(`[AutoRecovery] [${laneName}] Activity detected, resetting to NORMAL stage`);
|
|
305
|
-
}
|
|
306
|
-
state.stage = RecoveryStage.NORMAL;
|
|
307
|
-
state.lastStageChangeTime = now;
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
/**
|
|
312
|
-
* Get current recovery state for a lane
|
|
313
|
-
*/
|
|
314
|
-
getState(laneName: string): LaneRecoveryState | undefined {
|
|
315
|
-
return this.laneStates.get(laneName);
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
/**
|
|
319
|
-
* Check if a lane needs recovery intervention
|
|
320
|
-
*/
|
|
321
|
-
needsIntervention(laneName: string): boolean {
|
|
322
|
-
const state = this.laneStates.get(laneName);
|
|
323
|
-
if (!state) return false;
|
|
324
|
-
|
|
325
|
-
const now = Date.now();
|
|
326
|
-
const idleTime = now - state.lastActivityTime;
|
|
327
|
-
|
|
328
|
-
// Use longer timeout for long operations
|
|
329
|
-
const effectiveTimeout = state.isLongOperation
|
|
330
|
-
? this.config.longOperationGraceMs
|
|
331
|
-
: this.config.idleTimeoutMs;
|
|
332
|
-
|
|
333
|
-
// Check based on current stage
|
|
334
|
-
switch (state.stage) {
|
|
335
|
-
case RecoveryStage.NORMAL:
|
|
336
|
-
return idleTime > effectiveTimeout;
|
|
337
|
-
|
|
338
|
-
case RecoveryStage.CONTINUE_SIGNAL:
|
|
339
|
-
return (now - state.lastStageChangeTime) > this.config.continueGraceMs;
|
|
340
|
-
|
|
341
|
-
case RecoveryStage.STRONGER_PROMPT:
|
|
342
|
-
return (now - state.lastStageChangeTime) > this.config.strongerPromptGraceMs;
|
|
343
|
-
|
|
344
|
-
case RecoveryStage.RESTART_PROCESS:
|
|
345
|
-
// After restart, use normal timeout to detect if it's working
|
|
346
|
-
return idleTime > effectiveTimeout;
|
|
347
|
-
|
|
348
|
-
case RecoveryStage.DIAGNOSE:
|
|
349
|
-
case RecoveryStage.ABORT:
|
|
350
|
-
return false; // No more interventions
|
|
351
|
-
|
|
352
|
-
default:
|
|
353
|
-
return false;
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
/**
|
|
358
|
-
* Get the next recovery action for a lane
|
|
359
|
-
*/
|
|
360
|
-
async getRecoveryAction(
|
|
361
|
-
laneName: string,
|
|
362
|
-
laneRunDir: string,
|
|
363
|
-
child?: ChildProcess
|
|
364
|
-
): Promise<RecoveryActionResult> {
|
|
365
|
-
const state = this.laneStates.get(laneName);
|
|
366
|
-
if (!state) {
|
|
367
|
-
return {
|
|
368
|
-
success: false,
|
|
369
|
-
action: 'none',
|
|
370
|
-
message: 'Lane not registered',
|
|
371
|
-
shouldContinue: false,
|
|
372
|
-
};
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
const now = Date.now();
|
|
376
|
-
const idleTime = now - state.lastActivityTime;
|
|
377
|
-
const idleSeconds = Math.round(idleTime / 1000);
|
|
378
|
-
|
|
379
|
-
switch (state.stage) {
|
|
380
|
-
case RecoveryStage.NORMAL:
|
|
381
|
-
// Escalate to CONTINUE_SIGNAL
|
|
382
|
-
return await this.sendContinueSignal(laneName, laneRunDir, state, idleSeconds);
|
|
383
|
-
|
|
384
|
-
case RecoveryStage.CONTINUE_SIGNAL:
|
|
385
|
-
// Try a stronger prompt
|
|
386
|
-
return await this.sendStrongerPrompt(laneName, laneRunDir, state);
|
|
387
|
-
|
|
388
|
-
case RecoveryStage.STRONGER_PROMPT:
|
|
389
|
-
// Try restarting the process
|
|
390
|
-
if (state.restartCount < this.config.maxRestarts) {
|
|
391
|
-
return await this.requestRestart(laneName, state, child);
|
|
392
|
-
}
|
|
393
|
-
// Fall through to diagnose
|
|
394
|
-
state.stage = RecoveryStage.DIAGNOSE;
|
|
395
|
-
state.lastStageChangeTime = now;
|
|
396
|
-
return await this.runDiagnosis(laneName, laneRunDir, state);
|
|
397
|
-
|
|
398
|
-
case RecoveryStage.RESTART_PROCESS:
|
|
399
|
-
// After restart, if still no response, diagnose
|
|
400
|
-
if (state.restartCount >= this.config.maxRestarts) {
|
|
401
|
-
state.stage = RecoveryStage.DIAGNOSE;
|
|
402
|
-
state.lastStageChangeTime = now;
|
|
403
|
-
return await this.runDiagnosis(laneName, laneRunDir, state);
|
|
404
|
-
}
|
|
405
|
-
// Try continue signal again after restart
|
|
406
|
-
return await this.sendContinueSignal(laneName, laneRunDir, state, idleSeconds);
|
|
407
|
-
|
|
408
|
-
case RecoveryStage.DIAGNOSE:
|
|
409
|
-
// Final stage - abort
|
|
410
|
-
state.stage = RecoveryStage.ABORT;
|
|
411
|
-
state.lastStageChangeTime = now;
|
|
412
|
-
return {
|
|
413
|
-
success: false,
|
|
414
|
-
action: 'abort',
|
|
415
|
-
message: `Lane ${laneName} failed after all recovery attempts`,
|
|
416
|
-
shouldContinue: false,
|
|
417
|
-
nextStage: RecoveryStage.ABORT,
|
|
418
|
-
diagnostic: state.diagnosticInfo,
|
|
419
|
-
};
|
|
420
|
-
|
|
421
|
-
default:
|
|
422
|
-
return {
|
|
423
|
-
success: false,
|
|
424
|
-
action: 'abort',
|
|
425
|
-
message: 'Recovery exhausted',
|
|
426
|
-
shouldContinue: false,
|
|
427
|
-
};
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
/**
|
|
432
|
-
* Send a continue signal to the lane
|
|
433
|
-
*/
|
|
434
|
-
private async sendContinueSignal(
|
|
435
|
-
laneName: string,
|
|
436
|
-
laneRunDir: string,
|
|
437
|
-
state: LaneRecoveryState,
|
|
438
|
-
idleSeconds: number
|
|
439
|
-
): Promise<RecoveryActionResult> {
|
|
440
|
-
const interventionPath = safeJoin(laneRunDir, 'intervention.txt');
|
|
441
|
-
|
|
442
|
-
try {
|
|
443
|
-
fs.writeFileSync(interventionPath, 'continue');
|
|
444
|
-
|
|
445
|
-
state.stage = RecoveryStage.CONTINUE_SIGNAL;
|
|
446
|
-
state.lastStageChangeTime = Date.now();
|
|
447
|
-
state.continueSignalsSent++;
|
|
448
|
-
|
|
449
|
-
// Record failure history
|
|
450
|
-
state.failureHistory.push({
|
|
451
|
-
timestamp: Date.now(),
|
|
452
|
-
stage: RecoveryStage.CONTINUE_SIGNAL,
|
|
453
|
-
action: 'continue_signal',
|
|
454
|
-
message: `Idle for ${idleSeconds}s`,
|
|
455
|
-
idleTimeMs: idleSeconds * 1000,
|
|
456
|
-
bytesReceived: state.totalBytesReceived,
|
|
457
|
-
lastOutput: state.lastOutput,
|
|
458
|
-
});
|
|
459
|
-
|
|
460
|
-
const message = `[${laneName}] Idle for ${idleSeconds}s - sent continue signal (#${state.continueSignalsSent})`;
|
|
461
|
-
logger.warn(message);
|
|
462
|
-
|
|
463
|
-
events.emit('recovery.continue_signal', {
|
|
464
|
-
laneName,
|
|
465
|
-
idleSeconds,
|
|
466
|
-
signalCount: state.continueSignalsSent,
|
|
467
|
-
});
|
|
468
|
-
|
|
469
|
-
return {
|
|
470
|
-
success: true,
|
|
471
|
-
action: 'continue_signal',
|
|
472
|
-
message,
|
|
473
|
-
shouldContinue: true,
|
|
474
|
-
nextStage: RecoveryStage.CONTINUE_SIGNAL,
|
|
475
|
-
};
|
|
476
|
-
} catch (error: any) {
|
|
477
|
-
logger.error(`[AutoRecovery] Failed to send continue signal to ${laneName}: ${error.message}`);
|
|
478
|
-
return {
|
|
479
|
-
success: false,
|
|
480
|
-
action: 'continue_signal',
|
|
481
|
-
message: `Failed to send continue signal: ${error.message}`,
|
|
482
|
-
shouldContinue: true,
|
|
483
|
-
};
|
|
484
|
-
}
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
/**
|
|
488
|
-
* Send a stronger prompt to nudge the agent
|
|
489
|
-
*/
|
|
490
|
-
private async sendStrongerPrompt(
|
|
491
|
-
laneName: string,
|
|
492
|
-
laneRunDir: string,
|
|
493
|
-
state: LaneRecoveryState
|
|
494
|
-
): Promise<RecoveryActionResult> {
|
|
495
|
-
const interventionPath = safeJoin(laneRunDir, 'intervention.txt');
|
|
496
|
-
|
|
497
|
-
const strongerPrompt = `[SYSTEM INTERVENTION] You seem to be stuck or waiting.
|
|
498
|
-
Please continue with your current task immediately.
|
|
499
|
-
If you're waiting for something, explain what you need and proceed with what you can do now.
|
|
500
|
-
If you've completed the task, please summarize your work and finish.
|
|
501
|
-
If you encountered a git error, resolve it and continue.`;
|
|
502
|
-
|
|
503
|
-
try {
|
|
504
|
-
fs.writeFileSync(interventionPath, strongerPrompt);
|
|
505
|
-
|
|
506
|
-
state.stage = RecoveryStage.STRONGER_PROMPT;
|
|
507
|
-
state.lastStageChangeTime = Date.now();
|
|
508
|
-
|
|
509
|
-
// Record failure history
|
|
510
|
-
state.failureHistory.push({
|
|
511
|
-
timestamp: Date.now(),
|
|
512
|
-
stage: RecoveryStage.STRONGER_PROMPT,
|
|
513
|
-
action: 'stronger_prompt',
|
|
514
|
-
message: 'Still idle after continue signal',
|
|
515
|
-
idleTimeMs: Date.now() - state.lastActivityTime,
|
|
516
|
-
bytesReceived: state.totalBytesReceived,
|
|
517
|
-
lastOutput: state.lastOutput,
|
|
518
|
-
});
|
|
519
|
-
|
|
520
|
-
const message = `[${laneName}] Still idle after continue signal - sent stronger prompt`;
|
|
521
|
-
logger.warn(message);
|
|
522
|
-
|
|
523
|
-
events.emit('recovery.stronger_prompt', {
|
|
524
|
-
laneName,
|
|
525
|
-
prompt: strongerPrompt,
|
|
526
|
-
});
|
|
527
|
-
|
|
528
|
-
return {
|
|
529
|
-
success: true,
|
|
530
|
-
action: 'stronger_prompt',
|
|
531
|
-
message,
|
|
532
|
-
shouldContinue: true,
|
|
533
|
-
nextStage: RecoveryStage.STRONGER_PROMPT,
|
|
534
|
-
};
|
|
535
|
-
} catch (error: any) {
|
|
536
|
-
logger.error(`[AutoRecovery] Failed to send stronger prompt to ${laneName}: ${error.message}`);
|
|
537
|
-
return {
|
|
538
|
-
success: false,
|
|
539
|
-
action: 'stronger_prompt',
|
|
540
|
-
message: `Failed to send stronger prompt: ${error.message}`,
|
|
541
|
-
shouldContinue: true,
|
|
542
|
-
};
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
/**
|
|
547
|
-
* Request process restart
|
|
548
|
-
*/
|
|
549
|
-
private async requestRestart(
|
|
550
|
-
laneName: string,
|
|
551
|
-
state: LaneRecoveryState,
|
|
552
|
-
child?: ChildProcess
|
|
553
|
-
): Promise<RecoveryActionResult> {
|
|
554
|
-
state.restartCount++;
|
|
555
|
-
state.stage = RecoveryStage.RESTART_PROCESS;
|
|
556
|
-
state.lastStageChangeTime = Date.now();
|
|
557
|
-
|
|
558
|
-
// Record failure history
|
|
559
|
-
state.failureHistory.push({
|
|
560
|
-
timestamp: Date.now(),
|
|
561
|
-
stage: RecoveryStage.RESTART_PROCESS,
|
|
562
|
-
action: 'restart',
|
|
563
|
-
message: `Restart attempt ${state.restartCount}/${this.config.maxRestarts}`,
|
|
564
|
-
idleTimeMs: Date.now() - state.lastActivityTime,
|
|
565
|
-
bytesReceived: state.totalBytesReceived,
|
|
566
|
-
lastOutput: state.lastOutput,
|
|
567
|
-
});
|
|
568
|
-
|
|
569
|
-
// Kill the current process if provided
|
|
570
|
-
if (child && child.pid && !child.killed) {
|
|
571
|
-
try {
|
|
572
|
-
child.kill('SIGKILL');
|
|
573
|
-
logger.info(`[AutoRecovery] [${laneName}] Killed process ${child.pid}`);
|
|
574
|
-
} catch (error: any) {
|
|
575
|
-
logger.warn(`[AutoRecovery] [${laneName}] Failed to kill process: ${error.message}`);
|
|
576
|
-
}
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
const message = `[${laneName}] Restarting lane (attempt ${state.restartCount}/${this.config.maxRestarts})`;
|
|
580
|
-
logger.warn(message);
|
|
581
|
-
|
|
582
|
-
events.emit('recovery.restart', {
|
|
583
|
-
laneName,
|
|
584
|
-
restartCount: state.restartCount,
|
|
585
|
-
maxRestarts: this.config.maxRestarts,
|
|
586
|
-
});
|
|
587
|
-
|
|
588
|
-
return {
|
|
589
|
-
success: true,
|
|
590
|
-
action: 'restart',
|
|
591
|
-
message,
|
|
592
|
-
shouldContinue: true,
|
|
593
|
-
nextStage: RecoveryStage.RESTART_PROCESS,
|
|
594
|
-
};
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
/**
|
|
598
|
-
* Run diagnostic checks
|
|
599
|
-
*/
|
|
600
|
-
private async runDiagnosis(
|
|
601
|
-
laneName: string,
|
|
602
|
-
laneRunDir: string,
|
|
603
|
-
state: LaneRecoveryState
|
|
604
|
-
): Promise<RecoveryActionResult> {
|
|
605
|
-
if (!this.config.runDoctorOnFailure) {
|
|
606
|
-
return {
|
|
607
|
-
success: false,
|
|
608
|
-
action: 'diagnose',
|
|
609
|
-
message: 'Diagnosis skipped (disabled in config)',
|
|
610
|
-
shouldContinue: false,
|
|
611
|
-
};
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
logger.info(`[AutoRecovery] [${laneName}] Running diagnostic checks...`);
|
|
615
|
-
|
|
616
|
-
try {
|
|
617
|
-
// Run health checks
|
|
618
|
-
const [agentHealth, authHealth] = await Promise.all([
|
|
619
|
-
checkAgentHealth(),
|
|
620
|
-
checkAuthHealth(),
|
|
621
|
-
]);
|
|
622
|
-
|
|
623
|
-
const systemHealth = await runHealthCheck({ skipRemote: true, skipAuth: true });
|
|
624
|
-
|
|
625
|
-
const diagnostic: DiagnosticInfo = {
|
|
626
|
-
timestamp: Date.now(),
|
|
627
|
-
agentHealthy: agentHealth.ok,
|
|
628
|
-
authHealthy: authHealth.ok,
|
|
629
|
-
systemHealthy: systemHealth.healthy,
|
|
630
|
-
suggestedAction: '',
|
|
631
|
-
details: '',
|
|
632
|
-
};
|
|
633
|
-
|
|
634
|
-
// Analyze and suggest action
|
|
635
|
-
const issues: string[] = [];
|
|
636
|
-
|
|
637
|
-
if (!agentHealth.ok) {
|
|
638
|
-
issues.push(`Agent: ${agentHealth.message}`);
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
if (!authHealth.ok) {
|
|
642
|
-
issues.push(`Auth: ${authHealth.message}`);
|
|
643
|
-
diagnostic.suggestedAction = 'Please sign in to Cursor IDE and verify authentication';
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
if (!systemHealth.healthy) {
|
|
647
|
-
const failedChecks = systemHealth.checks.filter(c => !c.ok);
|
|
648
|
-
issues.push(`System: ${failedChecks.map(c => c.message).join(', ')}`);
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
if (issues.length === 0) {
|
|
652
|
-
diagnostic.details = 'All health checks passed. The issue may be with the AI model or network.';
|
|
653
|
-
diagnostic.suggestedAction = 'Try resuming with a different model or wait and retry.';
|
|
654
|
-
} else {
|
|
655
|
-
diagnostic.details = issues.join('\n');
|
|
656
|
-
}
|
|
657
|
-
|
|
658
|
-
state.diagnosticInfo = diagnostic;
|
|
659
|
-
|
|
660
|
-
// Record failure history
|
|
661
|
-
state.failureHistory.push({
|
|
662
|
-
timestamp: Date.now(),
|
|
663
|
-
stage: RecoveryStage.DIAGNOSE,
|
|
664
|
-
action: 'diagnose',
|
|
665
|
-
message: diagnostic.details,
|
|
666
|
-
idleTimeMs: Date.now() - state.lastActivityTime,
|
|
667
|
-
bytesReceived: state.totalBytesReceived,
|
|
668
|
-
lastOutput: state.lastOutput,
|
|
669
|
-
});
|
|
670
|
-
|
|
671
|
-
// Save diagnostic to file
|
|
672
|
-
const diagnosticPath = safeJoin(laneRunDir, 'diagnostic.json');
|
|
673
|
-
fs.writeFileSync(diagnosticPath, JSON.stringify(diagnostic, null, 2));
|
|
674
|
-
|
|
675
|
-
const message = `[${laneName}] Diagnostic complete:\n${diagnostic.details}\nSuggested action: ${diagnostic.suggestedAction}`;
|
|
676
|
-
logger.error(message);
|
|
677
|
-
|
|
678
|
-
events.emit('recovery.diagnosed', {
|
|
679
|
-
laneName,
|
|
680
|
-
diagnostic,
|
|
681
|
-
});
|
|
682
|
-
|
|
683
|
-
return {
|
|
684
|
-
success: true,
|
|
685
|
-
action: 'diagnose',
|
|
686
|
-
message,
|
|
687
|
-
shouldContinue: false,
|
|
688
|
-
diagnostic,
|
|
689
|
-
};
|
|
690
|
-
} catch (error: any) {
|
|
691
|
-
logger.error(`[AutoRecovery] Diagnostic failed: ${error.message}`);
|
|
692
|
-
return {
|
|
693
|
-
success: false,
|
|
694
|
-
action: 'diagnose',
|
|
695
|
-
message: `Diagnostic failed: ${error.message}`,
|
|
696
|
-
shouldContinue: false,
|
|
697
|
-
};
|
|
698
|
-
}
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
/**
|
|
702
|
-
* Get failure history for a lane
|
|
703
|
-
*/
|
|
704
|
-
getFailureHistory(laneName: string): FailureRecord[] {
|
|
705
|
-
const state = this.laneStates.get(laneName);
|
|
706
|
-
return state?.failureHistory || [];
|
|
707
|
-
}
|
|
708
|
-
|
|
709
|
-
/**
|
|
710
|
-
* Get configuration
|
|
711
|
-
*/
|
|
712
|
-
getConfig(): AutoRecoveryConfig {
|
|
713
|
-
return { ...this.config };
|
|
714
|
-
}
|
|
715
|
-
|
|
716
|
-
/**
|
|
717
|
-
* Update configuration
|
|
718
|
-
*/
|
|
719
|
-
updateConfig(config: Partial<AutoRecoveryConfig>): void {
|
|
720
|
-
this.config = { ...this.config, ...config };
|
|
721
|
-
}
|
|
722
|
-
}
|
|
723
|
-
|
|
724
|
-
// ============================================================================
|
|
725
|
-
// POF (Post-mortem of Failure) Management
|
|
166
|
+
// Post-Mortem of Failure (POF) Management
|
|
726
167
|
// ============================================================================
|
|
727
168
|
|
|
728
169
|
/**
|
|
@@ -888,24 +329,7 @@ export function listPOFs(pofDir: string): string[] {
|
|
|
888
329
|
// Exports
|
|
889
330
|
// ============================================================================
|
|
890
331
|
|
|
891
|
-
|
|
892
|
-
|
|
332
|
+
// AutoRecoveryManager class removed. All stall detection and recovery logic
|
|
333
|
+
// has been moved to StallDetectionService in ./stall-detection.ts.
|
|
334
|
+
// Utility functions for POF and git guidance are kept below.
|
|
893
335
|
|
|
894
|
-
/**
|
|
895
|
-
* Get or create the default auto-recovery manager
|
|
896
|
-
*/
|
|
897
|
-
export function getAutoRecoveryManager(config?: Partial<AutoRecoveryConfig>): AutoRecoveryManager {
|
|
898
|
-
if (!defaultManager) {
|
|
899
|
-
defaultManager = new AutoRecoveryManager(config);
|
|
900
|
-
} else if (config) {
|
|
901
|
-
defaultManager.updateConfig(config);
|
|
902
|
-
}
|
|
903
|
-
return defaultManager;
|
|
904
|
-
}
|
|
905
|
-
|
|
906
|
-
/**
|
|
907
|
-
* Reset the default manager (for testing)
|
|
908
|
-
*/
|
|
909
|
-
export function resetAutoRecoveryManager(): void {
|
|
910
|
-
defaultManager = null;
|
|
911
|
-
}
|