steroids-cli 0.9.41 → 0.9.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/commands/loop-phases.d.ts +12 -0
  2. package/dist/commands/loop-phases.d.ts.map +1 -1
  3. package/dist/commands/loop-phases.js +89 -4
  4. package/dist/commands/loop-phases.js.map +1 -1
  5. package/dist/commands/tasks-reset.d.ts +3 -0
  6. package/dist/commands/tasks-reset.d.ts.map +1 -0
  7. package/dist/commands/tasks-reset.js +203 -0
  8. package/dist/commands/tasks-reset.js.map +1 -0
  9. package/dist/commands/tasks.d.ts.map +1 -1
  10. package/dist/commands/tasks.js +8 -0
  11. package/dist/commands/tasks.js.map +1 -1
  12. package/dist/database/connection.d.ts +3 -1
  13. package/dist/database/connection.d.ts.map +1 -1
  14. package/dist/database/connection.js +5 -3
  15. package/dist/database/connection.js.map +1 -1
  16. package/dist/database/queries.d.ts +9 -0
  17. package/dist/database/queries.d.ts.map +1 -1
  18. package/dist/database/queries.js +36 -2
  19. package/dist/database/queries.js.map +1 -1
  20. package/dist/database/schema.d.ts +2 -2
  21. package/dist/database/schema.d.ts.map +1 -1
  22. package/dist/database/schema.js +2 -0
  23. package/dist/database/schema.js.map +1 -1
  24. package/dist/orchestrator/coder.d.ts +1 -1
  25. package/dist/orchestrator/coder.d.ts.map +1 -1
  26. package/dist/orchestrator/coder.js +95 -11
  27. package/dist/orchestrator/coder.js.map +1 -1
  28. package/dist/orchestrator/reviewer.d.ts +2 -2
  29. package/dist/orchestrator/reviewer.d.ts.map +1 -1
  30. package/dist/orchestrator/reviewer.js +94 -13
  31. package/dist/orchestrator/reviewer.js.map +1 -1
  32. package/dist/providers/claude.d.ts.map +1 -1
  33. package/dist/providers/claude.js +9 -1
  34. package/dist/providers/claude.js.map +1 -1
  35. package/dist/providers/codex.d.ts.map +1 -1
  36. package/dist/providers/codex.js +10 -1
  37. package/dist/providers/codex.js.map +1 -1
  38. package/dist/providers/gemini.d.ts +9 -48
  39. package/dist/providers/gemini.d.ts.map +1 -1
  40. package/dist/providers/gemini.js +133 -101
  41. package/dist/providers/gemini.js.map +1 -1
  42. package/dist/providers/interface.d.ts +9 -1
  43. package/dist/providers/interface.d.ts.map +1 -1
  44. package/dist/providers/interface.js +11 -1
  45. package/dist/providers/interface.js.map +1 -1
  46. package/dist/providers/invocation-logger.d.ts +2 -0
  47. package/dist/providers/invocation-logger.d.ts.map +1 -1
  48. package/dist/providers/invocation-logger.js +3 -3
  49. package/dist/providers/invocation-logger.js.map +1 -1
  50. package/dist/providers/mistral.d.ts.map +1 -1
  51. package/dist/providers/mistral.js +12 -1
  52. package/dist/providers/mistral.js.map +1 -1
  53. package/dist/providers/ping.d.ts +5 -0
  54. package/dist/providers/ping.d.ts.map +1 -0
  55. package/dist/providers/ping.js +35 -0
  56. package/dist/providers/ping.js.map +1 -0
  57. package/dist/providers/registry.d.ts +4 -0
  58. package/dist/providers/registry.d.ts.map +1 -1
  59. package/dist/providers/registry.js +18 -0
  60. package/dist/providers/registry.js.map +1 -1
  61. package/dist/runners/credit-pause.d.ts +2 -3
  62. package/dist/runners/credit-pause.d.ts.map +1 -1
  63. package/dist/runners/credit-pause.js +28 -40
  64. package/dist/runners/credit-pause.js.map +1 -1
  65. package/dist/runners/daemon.d.ts +0 -5
  66. package/dist/runners/daemon.d.ts.map +1 -1
  67. package/dist/runners/daemon.js +0 -26
  68. package/dist/runners/daemon.js.map +1 -1
  69. package/dist/runners/global-db.d.ts +8 -0
  70. package/dist/runners/global-db.d.ts.map +1 -1
  71. package/dist/runners/global-db.js +49 -1
  72. package/dist/runners/global-db.js.map +1 -1
  73. package/dist/runners/projects.d.ts +10 -22
  74. package/dist/runners/projects.d.ts.map +1 -1
  75. package/dist/runners/projects.js +34 -36
  76. package/dist/runners/projects.js.map +1 -1
  77. package/dist/runners/wakeup-checks.d.ts.map +1 -1
  78. package/dist/runners/wakeup-checks.js +14 -10
  79. package/dist/runners/wakeup-checks.js.map +1 -1
  80. package/dist/runners/wakeup.d.ts +1 -1
  81. package/dist/runners/wakeup.d.ts.map +1 -1
  82. package/dist/runners/wakeup.js +415 -348
  83. package/dist/runners/wakeup.js.map +1 -1
  84. package/dist/utils/tokens.d.ts +14 -0
  85. package/dist/utils/tokens.d.ts.map +1 -0
  86. package/dist/utils/tokens.js +62 -0
  87. package/dist/utils/tokens.js.map +1 -0
  88. package/migrations/017_add_invocation_runner_id.sql +8 -0
  89. package/migrations/manifest.json +9 -1
  90. package/package.json +2 -1
@@ -24,6 +24,10 @@ Object.defineProperty(exports, "hasActiveParallelSessionForProject", { enumerabl
24
24
  const wakeup_runner_js_1 = require("./wakeup-runner.js");
25
25
  const wakeup_timing_js_1 = require("./wakeup-timing.js");
26
26
  Object.defineProperty(exports, "getLastWakeupTime", { enumerable: true, get: function () { return wakeup_timing_js_1.getLastWakeupTime; } });
27
+ const projects_js_2 = require("./projects.js");
28
+ const ping_js_1 = require("../providers/ping.js");
29
+ // In-memory mutex to prevent concurrent wakeup cycles in the same process
30
+ let isWakeupRunning = false;
27
31
  /**
28
32
  * Main wake-up function
29
33
  * Called by cron every minute to ensure runners are healthy
@@ -37,216 +41,275 @@ async function wakeup(options = {}) {
37
41
  if (!quiet)
38
42
  console.log(msg);
39
43
  };
40
- // Record wakeup invocation time (even for dry runs)
41
- if (!dryRun) {
42
- (0, wakeup_timing_js_1.recordWakeupTime)();
44
+ if (isWakeupRunning) {
45
+ log('Wakeup cycle already running (in-memory lock), skipping.');
46
+ return [{ action: 'skipped', reason: 'Wakeup cycle already running' }];
43
47
  }
44
- // Step 1: Clean up stale runners first
45
- const global = (0, global_db_js_1.openGlobalDatabase)();
48
+ isWakeupRunning = true;
46
49
  try {
50
+ if (!(0, global_db_js_1.getDaemonActiveStatus)()) {
51
+ log('Daemon paused (is_active=false), skipping wakeup logic.');
52
+ return [{ action: 'skipped', reason: 'Daemon is paused' }];
53
+ }
54
+ // Record wakeup invocation time (even for dry runs)
55
+ if (!dryRun) {
56
+ (0, wakeup_timing_js_1.recordWakeupTime)();
57
+ }
58
+ // Step 1: Clean up stale runners first
59
+ const global = (0, global_db_js_1.openGlobalDatabase)();
47
60
  try {
48
- const staleRunners = (0, heartbeat_js_1.findStaleRunners)(global.db);
49
- if (staleRunners.length > 0) {
50
- log(`Found ${staleRunners.length} stale runner(s), cleaning up...`);
51
- if (!dryRun) {
52
- for (const runner of staleRunners) {
53
- if (runner.pid) {
54
- (0, wakeup_runner_js_1.killProcess)(runner.pid);
55
- }
56
- global.db.prepare(`UPDATE workstreams
61
+ try {
62
+ const staleRunners = (0, heartbeat_js_1.findStaleRunners)(global.db);
63
+ if (staleRunners.length > 0) {
64
+ log(`Found ${staleRunners.length} stale runner(s), cleaning up...`);
65
+ if (!dryRun) {
66
+ for (const runner of staleRunners) {
67
+ if (runner.pid) {
68
+ (0, wakeup_runner_js_1.killProcess)(runner.pid);
69
+ }
70
+ global.db.prepare(`UPDATE workstreams
57
71
  SET runner_id = NULL,
58
72
  lease_expires_at = datetime('now')
59
73
  WHERE runner_id = ?`).run(runner.id);
60
- global.db.prepare('DELETE FROM runners WHERE id = ?').run(runner.id);
74
+ global.db.prepare('DELETE FROM runners WHERE id = ?').run(runner.id);
75
+ }
61
76
  }
77
+ results.push({
78
+ action: 'cleaned',
79
+ reason: `Cleaned ${staleRunners.length} stale runner(s)`,
80
+ staleRunners: staleRunners.length,
81
+ });
62
82
  }
63
- results.push({
64
- action: 'cleaned',
65
- reason: `Cleaned ${staleRunners.length} stale runner(s)`,
66
- staleRunners: staleRunners.length,
67
- });
68
83
  }
69
- }
70
- catch {
71
- // ignore global DB issues; wakeup will still attempt per-project checks
72
- }
73
- try {
74
- const releasedLeases = global.db.prepare(`UPDATE workstreams
84
+ catch {
85
+ // ignore global DB issues; wakeup will still attempt per-project checks
86
+ }
87
+ try {
88
+ const releasedLeases = global.db.prepare(`UPDATE workstreams
75
89
  SET runner_id = NULL,
76
90
  lease_expires_at = NULL
77
91
  WHERE status = 'running'
78
92
  AND lease_expires_at IS NOT NULL
79
93
  AND lease_expires_at <= datetime('now')`).run().changes;
80
- if (releasedLeases > 0) {
81
- log(`Released ${releasedLeases} expired workstream lease(s)`);
94
+ if (releasedLeases > 0) {
95
+ log(`Released ${releasedLeases} expired workstream lease(s)`);
96
+ }
82
97
  }
83
- }
84
- catch {
85
- // ignore lease cleanup issues in wakeup
86
- }
87
- // Step 2: Clean zombie lock if present
88
- const lockStatus = (0, lock_js_1.checkLockStatus)();
89
- if (lockStatus.isZombie && lockStatus.pid) {
90
- log(`Found zombie lock (PID: ${lockStatus.pid}), cleaning...`);
91
- if (!dryRun) {
92
- (0, lock_js_1.removeLock)();
98
+ catch {
99
+ // ignore lease cleanup issues in wakeup
93
100
  }
94
- }
95
- // Step 3: Get all registered projects from global registry
96
- const registeredProjects = (0, projects_js_1.getRegisteredProjects)(false); // enabled only
97
- if (registeredProjects.length === 0) {
98
- log('No registered projects found');
99
- log('Run "steroids projects add <path>" to register a project');
100
- results.push({
101
- action: 'none',
102
- reason: 'No registered projects',
103
- pendingTasks: 0,
104
- });
105
- return results;
106
- }
107
- log(`Checking ${registeredProjects.length} registered project(s)...`);
108
- // Step 4: Check each project and start runners as needed
109
- for (const project of registeredProjects) {
110
- // Skip if project directory doesn't exist
111
- if (!(0, node_fs_1.existsSync)(project.path)) {
112
- log(`Skipping ${project.path}: directory not found`);
101
+ // Step 2: Clean zombie lock if present
102
+ const lockStatus = (0, lock_js_1.checkLockStatus)();
103
+ if (lockStatus.isZombie && lockStatus.pid) {
104
+ log(`Found zombie lock (PID: ${lockStatus.pid}), cleaning...`);
105
+ if (!dryRun) {
106
+ (0, lock_js_1.removeLock)();
107
+ }
108
+ }
109
+ // Step 3: Get all registered projects from global registry
110
+ const registeredProjects = (0, projects_js_1.getRegisteredProjects)(false); // enabled only
111
+ if (registeredProjects.length === 0) {
112
+ log('No registered projects found');
113
+ log('Run "steroids projects add <path>" to register a project');
113
114
  results.push({
114
115
  action: 'none',
115
- reason: 'Directory not found',
116
- projectPath: project.path,
116
+ reason: 'No registered projects',
117
+ pendingTasks: 0,
117
118
  });
118
- continue;
119
+ return results;
119
120
  }
120
- // Phase 6 (live monitoring): best-effort retention cleanup of invocation activity logs.
121
- // This is safe to run even if the project has no pending tasks.
122
- let deletedInvocationLogs = 0;
123
- try {
124
- const cleanup = (0, invocation_logs_js_1.cleanupInvocationLogs)(project.path, { retentionDays: 7, dryRun });
125
- deletedInvocationLogs = cleanup.deletedFiles;
126
- if (cleanup.deletedFiles > 0 && !quiet) {
127
- log(`Cleaned ${cleanup.deletedFiles} old invocation log(s) in ${project.path}`);
121
+ log(`Checking ${registeredProjects.length} registered project(s)...`);
122
+ // Step 4: Check each project and start runners as needed
123
+ for (const project of registeredProjects) {
124
+ // Skip if project directory doesn't exist
125
+ if (!(0, node_fs_1.existsSync)(project.path)) {
126
+ log(`Skipping ${project.path}: directory not found`);
127
+ results.push({
128
+ action: 'none',
129
+ reason: 'Directory not found',
130
+ projectPath: project.path,
131
+ });
132
+ continue;
128
133
  }
129
- }
130
- catch {
131
- // Ignore cleanup errors; wakeup must remain robust.
132
- }
133
- let recoveredActions = 0;
134
- let skippedRecoveryDueToSafetyLimit = false;
135
- let sanitisedActions = 0;
136
- try {
137
- const { db: projectDb, close: closeProjectDb } = (0, connection_js_1.openDatabase)(project.path);
134
+ // Phase 6 (live monitoring): best-effort retention cleanup of invocation activity logs.
135
+ // This is safe to run even if the project has no pending tasks.
136
+ let deletedInvocationLogs = 0;
138
137
  try {
139
- const sanitiseSummary = (0, wakeup_sanitise_js_1.runPeriodicSanitiseForProject)(global.db, projectDb, project.path, dryRun);
140
- sanitisedActions = (0, wakeup_sanitise_js_1.sanitisedActionCount)(sanitiseSummary);
141
- if (sanitisedActions > 0 && !quiet) {
142
- log(`Sanitised ${sanitisedActions} stale item(s) in ${project.path}`);
138
+ const cleanup = (0, invocation_logs_js_1.cleanupInvocationLogs)(project.path, { retentionDays: 7, dryRun });
139
+ deletedInvocationLogs = cleanup.deletedFiles;
140
+ if (cleanup.deletedFiles > 0 && !quiet) {
141
+ log(`Cleaned ${cleanup.deletedFiles} old invocation log(s) in ${project.path}`);
143
142
  }
144
- // Step 4a: Recover stuck tasks (best-effort) before deciding whether to (re)start a runner.
145
- // This is what unblocks orphaned/infinite-hang scenarios without manual intervention.
146
- const config = (0, loader_js_1.loadConfig)(project.path);
147
- const recovery = await (0, stuck_task_recovery_js_1.recoverStuckTasks)({
148
- projectPath: project.path,
149
- projectDb,
150
- globalDb: global.db,
151
- config,
152
- dryRun,
153
- });
154
- recoveredActions = recovery.actions.length;
155
- skippedRecoveryDueToSafetyLimit = recovery.skippedDueToSafetyLimit;
156
- if (recoveredActions > 0 && !quiet) {
157
- log(`Recovered ${recoveredActions} stuck item(s) in ${project.path}`);
143
+ }
144
+ catch {
145
+ // Ignore cleanup errors; wakeup must remain robust.
146
+ }
147
+ let recoveredActions = 0;
148
+ let skippedRecoveryDueToSafetyLimit = false;
149
+ let sanitisedActions = 0;
150
+ try {
151
+ const { db: projectDb, close: closeProjectDb } = (0, connection_js_1.openDatabase)(project.path);
152
+ try {
153
+ const sanitiseSummary = (0, wakeup_sanitise_js_1.runPeriodicSanitiseForProject)(global.db, projectDb, project.path, dryRun);
154
+ sanitisedActions = (0, wakeup_sanitise_js_1.sanitisedActionCount)(sanitiseSummary);
155
+ if (sanitisedActions > 0 && !quiet) {
156
+ log(`Sanitised ${sanitisedActions} stale item(s) in ${project.path}`);
157
+ }
158
+ // Step 4a: Recover stuck tasks (best-effort) before deciding whether to (re)start a runner.
159
+ // This is what unblocks orphaned/infinite-hang scenarios without manual intervention.
160
+ const config = (0, loader_js_1.loadConfig)(project.path);
161
+ const recovery = await (0, stuck_task_recovery_js_1.recoverStuckTasks)({
162
+ projectPath: project.path,
163
+ projectDb,
164
+ globalDb: global.db,
165
+ config,
166
+ dryRun,
167
+ });
168
+ recoveredActions = recovery.actions.length;
169
+ skippedRecoveryDueToSafetyLimit = recovery.skippedDueToSafetyLimit;
170
+ if (recoveredActions > 0 && !quiet) {
171
+ log(`Recovered ${recoveredActions} stuck item(s) in ${project.path}`);
172
+ }
173
+ if (skippedRecoveryDueToSafetyLimit && !quiet) {
174
+ log(`Skipping auto-recovery in ${project.path}: safety limit hit (maxIncidentsPerHour)`);
175
+ }
158
176
  }
159
- if (skippedRecoveryDueToSafetyLimit && !quiet) {
160
- log(`Skipping auto-recovery in ${project.path}: safety limit hit (maxIncidentsPerHour)`);
177
+ finally {
178
+ closeProjectDb();
161
179
  }
162
180
  }
163
- finally {
164
- closeProjectDb();
181
+ catch {
182
+ // If sanitise/recovery can't run (DB missing/corrupt), we still proceed with runner checks.
165
183
  }
166
- }
167
- catch {
168
- // If sanitise/recovery can't run (DB missing/corrupt), we still proceed with runner checks.
169
- }
170
- // Check for pending work after sanitise/recovery
171
- const hasWork = await (0, wakeup_checks_js_1.projectHasPendingWork)(project.path);
172
- if (!hasWork) {
173
- const noWorkReason = sanitisedActions > 0
174
- ? `No pending tasks after sanitise (${sanitisedActions} action(s))`
175
- : 'No pending tasks';
176
- log(`Skipping ${project.path}: ${noWorkReason.toLowerCase()}`);
177
- results.push({
178
- action: 'none',
179
- reason: noWorkReason,
180
- projectPath: project.path,
181
- recoveredActions,
182
- skippedRecoveryDueToSafetyLimit,
183
- deletedInvocationLogs,
184
- sanitisedActions,
185
- });
186
- continue;
187
- }
188
- const projectConfig = (0, loader_js_1.loadConfig)(project.path);
189
- const parallelEnabled = projectConfig.runners?.parallel?.enabled === true;
190
- const configuredMaxClonesRaw = Number(projectConfig.runners?.parallel?.maxClones);
191
- const configuredMaxClones = Number.isFinite(configuredMaxClonesRaw) && configuredMaxClonesRaw > 0
192
- ? configuredMaxClonesRaw
193
- : 3;
194
- // Skip projects currently executing a parallel session before attempting recovery/startup.
195
- // This prevents parallel runners from being interfered with by a cron-managed runner.
196
- if ((0, wakeup_checks_js_1.hasActiveParallelSessionForProject)(project.path)) {
197
- let retrySummary = '';
198
- let skipForParallelSession = true;
199
- let scaledDown = 0;
200
- let resumed = 0;
201
- let wouldScaleDown = 0;
202
- let wouldResume = 0;
203
- const activeSessions = global.db
204
- .prepare(`SELECT id
205
- FROM parallel_sessions
206
- WHERE project_path = ?
207
- AND status NOT IN ('completed', 'failed', 'aborted', 'blocked_validation', 'blocked_recovery')`)
208
- .all(project.path);
209
- // Config-aware mode reconciliation (parallel -> single):
210
- // if parallel is disabled, convert only when the active parallel runners
211
- // are idle to avoid interrupting in-flight tasks.
212
- if (!parallelEnabled && activeSessions.length > 0) {
213
- const sessionRunners = activeSessions.flatMap((session) => global.db
214
- .prepare(`SELECT id, pid, status, current_task_id
215
- FROM runners
216
- WHERE parallel_session_id = ?
217
- AND status != 'stopped'
218
- AND heartbeat_at > datetime('now', '-5 minutes')`)
219
- .all(session.id));
220
- const hasBusyRunner = sessionRunners.some((runner) => (runner.status ?? '').toLowerCase() !== 'idle' || !!runner.current_task_id);
221
- if (hasBusyRunner) {
222
- const reason = 'Parallel->single mode switch pending (active workstream runner busy)';
223
- log(`Skipping ${project.path}: ${reason.toLowerCase()}`);
184
+ // Check for pending work after sanitise/recovery
185
+ const hasWork = await (0, wakeup_checks_js_1.projectHasPendingWork)(project.path);
186
+ if (!hasWork) {
187
+ const noWorkReason = sanitisedActions > 0
188
+ ? `No pending tasks after sanitise (${sanitisedActions} action(s))`
189
+ : 'No pending tasks';
190
+ log(`Skipping ${project.path}: ${noWorkReason.toLowerCase()}`);
191
+ results.push({
192
+ action: 'none',
193
+ reason: noWorkReason,
194
+ projectPath: project.path,
195
+ recoveredActions,
196
+ skippedRecoveryDueToSafetyLimit,
197
+ deletedInvocationLogs,
198
+ sanitisedActions,
199
+ });
200
+ continue;
201
+ }
202
+ const projectConfig = (0, loader_js_1.loadConfig)(project.path);
203
+ // Check if project is hibernating
204
+ if (project.hibernating_until) {
205
+ const untilMs = new Date(project.hibernating_until).getTime();
206
+ if (Date.now() < untilMs) {
207
+ log(`Skipping ${project.path}: Project is hibernating (tier ${project.hibernation_tier}) until ${project.hibernating_until}`);
224
208
  results.push({
225
- action: dryRun ? 'would_start' : 'none',
226
- reason,
209
+ action: 'skipped',
210
+ reason: `Hibernating until ${project.hibernating_until}`,
227
211
  projectPath: project.path,
228
- deletedInvocationLogs,
229
212
  });
230
213
  continue;
231
214
  }
232
- if (dryRun) {
233
- const reason = 'Would recycle idle parallel session to apply single-runner mode';
234
- log(`Would reconcile ${project.path}: ${reason.toLowerCase()}`);
215
+ // Hibernation timer expired, execute lightweight ping
216
+ log(`Hibernation timer expired for ${project.path}. Executing lightweight ping...`);
217
+ let pingSuccess = false;
218
+ if (!dryRun) {
219
+ // We use the orchestrator provider to ping, as it's the general "health" proxy
220
+ const orchestratorProvider = projectConfig.ai?.orchestrator?.provider;
221
+ const orchestratorModel = projectConfig.ai?.orchestrator?.model;
222
+ if (orchestratorProvider && orchestratorModel) {
223
+ pingSuccess = await (0, ping_js_1.pingProvider)(orchestratorProvider, orchestratorModel);
224
+ }
225
+ }
226
+ else {
227
+ pingSuccess = true;
228
+ }
229
+ if (pingSuccess) {
230
+ log(`Ping successful for ${project.path}. Clearing hibernation state.`);
231
+ if (!dryRun)
232
+ (0, projects_js_2.clearProjectHibernation)(project.path);
233
+ }
234
+ else {
235
+ // Ping failed, extend hibernation
236
+ const nextTier = (project.hibernation_tier ?? 1) + 1;
237
+ const backoffMinutes = 30; // 30 minutes for tier 2+
238
+ const newUntilISO = new Date(Date.now() + (backoffMinutes * 60 * 1000)).toISOString();
239
+ log(`Ping failed for ${project.path}. Extending hibernation to tier ${nextTier} until ${newUntilISO}.`);
240
+ if (!dryRun) {
241
+ const { setProjectHibernation } = await import('./projects.js');
242
+ setProjectHibernation(project.path, nextTier, newUntilISO);
243
+ }
235
244
  results.push({
236
- action: 'would_start',
237
- reason,
245
+ action: 'skipped',
246
+ reason: `Ping failed, extended hibernation until ${newUntilISO}`,
238
247
  projectPath: project.path,
239
- deletedInvocationLogs,
240
248
  });
241
249
  continue;
242
250
  }
243
- for (const runner of sessionRunners) {
244
- if (runner.pid)
245
- (0, wakeup_runner_js_1.killProcess)(runner.pid);
246
- global.db.prepare('DELETE FROM runners WHERE id = ?').run(runner.id);
247
- }
248
- for (const session of activeSessions) {
249
- global.db.prepare(`UPDATE workstreams
251
+ }
252
+ const parallelEnabled = projectConfig.runners?.parallel?.enabled === true;
253
+ const configuredMaxClonesRaw = Number(projectConfig.runners?.parallel?.maxClones);
254
+ const configuredMaxClones = Number.isFinite(configuredMaxClonesRaw) && configuredMaxClonesRaw > 0
255
+ ? configuredMaxClonesRaw
256
+ : 3;
257
+ // Skip projects currently executing a parallel session before attempting recovery/startup.
258
+ // This prevents parallel runners from being interfered with by a cron-managed runner.
259
+ if ((0, wakeup_checks_js_1.hasActiveParallelSessionForProject)(project.path)) {
260
+ let retrySummary = '';
261
+ let skipForParallelSession = true;
262
+ let scaledDown = 0;
263
+ let resumed = 0;
264
+ let wouldScaleDown = 0;
265
+ let wouldResume = 0;
266
+ const activeSessions = global.db
267
+ .prepare(`SELECT id
268
+ FROM parallel_sessions
269
+ WHERE project_path = ?
270
+ AND status NOT IN ('completed', 'failed', 'aborted', 'blocked_validation', 'blocked_recovery')`)
271
+ .all(project.path);
272
+ // Config-aware mode reconciliation (parallel -> single):
273
+ // if parallel is disabled, convert only when the active parallel runners
274
+ // are idle to avoid interrupting in-flight tasks.
275
+ if (!parallelEnabled && activeSessions.length > 0) {
276
+ const sessionRunners = activeSessions.flatMap((session) => global.db
277
+ .prepare(`SELECT id, pid, status, current_task_id
278
+ FROM runners
279
+ WHERE parallel_session_id = ?
280
+ AND status != 'stopped'
281
+ AND heartbeat_at > datetime('now', '-5 minutes')`)
282
+ .all(session.id));
283
+ const hasBusyRunner = sessionRunners.some((runner) => (runner.status ?? '').toLowerCase() !== 'idle' || !!runner.current_task_id);
284
+ if (hasBusyRunner) {
285
+ const reason = 'Parallel->single mode switch pending (active workstream runner busy)';
286
+ log(`Skipping ${project.path}: ${reason.toLowerCase()}`);
287
+ results.push({
288
+ action: dryRun ? 'would_start' : 'none',
289
+ reason,
290
+ projectPath: project.path,
291
+ deletedInvocationLogs,
292
+ });
293
+ continue;
294
+ }
295
+ if (dryRun) {
296
+ const reason = 'Would recycle idle parallel session to apply single-runner mode';
297
+ log(`Would reconcile ${project.path}: ${reason.toLowerCase()}`);
298
+ results.push({
299
+ action: 'would_start',
300
+ reason,
301
+ projectPath: project.path,
302
+ deletedInvocationLogs,
303
+ });
304
+ continue;
305
+ }
306
+ for (const runner of sessionRunners) {
307
+ if (runner.pid)
308
+ (0, wakeup_runner_js_1.killProcess)(runner.pid);
309
+ global.db.prepare('DELETE FROM runners WHERE id = ?').run(runner.id);
310
+ }
311
+ for (const session of activeSessions) {
312
+ global.db.prepare(`UPDATE workstreams
250
313
  SET status = 'aborted',
251
314
  runner_id = NULL,
252
315
  lease_expires_at = NULL,
@@ -256,34 +319,34 @@ async function wakeup(options = {}) {
256
319
  completed_at = COALESCE(completed_at, datetime('now'))
257
320
  WHERE session_id = ?
258
321
  AND status NOT IN ('completed', 'failed', 'aborted')`).run(session.id);
259
- global.db.prepare(`UPDATE parallel_sessions
322
+ global.db.prepare(`UPDATE parallel_sessions
260
323
  SET status = 'aborted',
261
324
  completed_at = COALESCE(completed_at, datetime('now'))
262
325
  WHERE id = ?`).run(session.id);
326
+ }
327
+ skipForParallelSession = false;
328
+ retrySummary = ', recycled idle parallel session to apply single-runner mode';
263
329
  }
264
- skipForParallelSession = false;
265
- retrySummary = ', recycled idle parallel session to apply single-runner mode';
266
- }
267
- for (const session of activeSessions) {
268
- const sessionRunners = global.db
269
- .prepare(`SELECT id, pid, status, current_task_id
330
+ for (const session of activeSessions) {
331
+ const sessionRunners = global.db
332
+ .prepare(`SELECT id, pid, status, current_task_id
270
333
  FROM runners
271
334
  WHERE parallel_session_id = ?
272
335
  AND status != 'stopped'
273
336
  AND heartbeat_at > datetime('now', '-5 minutes')
274
337
  ORDER BY started_at DESC, heartbeat_at DESC`)
275
- .all(session.id);
276
- if (sessionRunners.length > configuredMaxClones) {
277
- const idleCandidate = sessionRunners.find((r) => (r.status ?? '').toLowerCase() === 'idle' && !r.current_task_id);
278
- if (idleCandidate) {
279
- if (dryRun) {
280
- wouldScaleDown += 1;
281
- }
282
- else {
283
- if (idleCandidate.pid) {
284
- (0, wakeup_runner_js_1.killProcess)(idleCandidate.pid);
338
+ .all(session.id);
339
+ if (sessionRunners.length > configuredMaxClones) {
340
+ const idleCandidate = sessionRunners.find((r) => (r.status ?? '').toLowerCase() === 'idle' && !r.current_task_id);
341
+ if (idleCandidate) {
342
+ if (dryRun) {
343
+ wouldScaleDown += 1;
285
344
  }
286
- global.db.prepare(`UPDATE workstreams
345
+ else {
346
+ if (idleCandidate.pid) {
347
+ (0, wakeup_runner_js_1.killProcess)(idleCandidate.pid);
348
+ }
349
+ global.db.prepare(`UPDATE workstreams
287
350
  SET runner_id = NULL,
288
351
  lease_expires_at = datetime('now', '+5 minutes'),
289
352
  next_retry_at = datetime('now', '+5 minutes'),
@@ -291,14 +354,14 @@ async function wakeup(options = {}) {
291
354
  last_reconciled_at = datetime('now')
292
355
  WHERE session_id = ?
293
356
  AND runner_id = ?`).run(session.id, idleCandidate.id);
294
- global.db.prepare('DELETE FROM runners WHERE id = ?').run(idleCandidate.id);
295
- scaledDown += 1;
357
+ global.db.prepare('DELETE FROM runners WHERE id = ?').run(idleCandidate.id);
358
+ scaledDown += 1;
359
+ }
296
360
  }
297
361
  }
298
- }
299
- else if (sessionRunners.length < configuredMaxClones) {
300
- const throttled = global.db
301
- .prepare(`SELECT id
362
+ else if (sessionRunners.length < configuredMaxClones) {
363
+ const throttled = global.db
364
+ .prepare(`SELECT id
302
365
  FROM workstreams
303
366
  WHERE session_id = ?
304
367
  AND status = 'running'
@@ -307,79 +370,79 @@ async function wakeup(options = {}) {
307
370
  AND last_reconcile_action = 'concurrency_throttle'
308
371
  ORDER BY last_reconciled_at ASC
309
372
  LIMIT 1`)
310
- .get(session.id);
311
- if (throttled) {
312
- if (dryRun) {
313
- wouldResume += 1;
314
- }
315
- else {
316
- global.db.prepare(`UPDATE workstreams
373
+ .get(session.id);
374
+ if (throttled) {
375
+ if (dryRun) {
376
+ wouldResume += 1;
377
+ }
378
+ else {
379
+ global.db.prepare(`UPDATE workstreams
317
380
  SET lease_expires_at = datetime('now'),
318
381
  next_retry_at = datetime('now'),
319
382
  last_reconcile_action = 'concurrency_resume',
320
383
  last_reconciled_at = datetime('now')
321
384
  WHERE id = ?`).run(throttled.id);
322
- resumed += 1;
385
+ resumed += 1;
386
+ }
323
387
  }
324
388
  }
325
389
  }
326
- }
327
- if (!dryRun) {
328
- const recovery = (0, wakeup_reconcile_js_1.reconcileParallelSessionRecovery)(global.db, project.path);
329
- if (recovery.workstreamsToRestart.length > 0) {
330
- for (const ws of recovery.workstreamsToRestart) {
331
- (0, wakeup_runner_js_1.restartWorkstreamRunner)(ws);
390
+ if (!dryRun) {
391
+ const recovery = (0, wakeup_reconcile_js_1.reconcileParallelSessionRecovery)(global.db, project.path);
392
+ if (recovery.workstreamsToRestart.length > 0) {
393
+ for (const ws of recovery.workstreamsToRestart) {
394
+ (0, wakeup_runner_js_1.restartWorkstreamRunner)(ws);
395
+ }
396
+ retrySummary += `, restarted ${recovery.workstreamsToRestart.length} workstream runner(s)`;
397
+ }
398
+ if (recovery.blockedWorkstreams > 0) {
399
+ retrySummary += `, blocked ${recovery.blockedWorkstreams} workstream(s)`;
400
+ }
401
+ if (scaledDown > 0) {
402
+ retrySummary += `, scaled down ${scaledDown} idle runner(s) to maxClones=${configuredMaxClones}`;
403
+ }
404
+ if (resumed > 0) {
405
+ retrySummary += `, resumed ${resumed} throttled workstream(s)`;
406
+ }
407
+ // Re-check activity after recovery. If reconciliation cleared stale
408
+ // session state for this project, continue to normal startup logic.
409
+ if (!(0, wakeup_checks_js_1.hasActiveParallelSessionForProject)(project.path)) {
410
+ skipForParallelSession = false;
411
+ if (retrySummary.length > 0) {
412
+ retrySummary += ', session state reconciled';
413
+ }
414
+ else {
415
+ retrySummary = ', session state reconciled';
416
+ }
332
417
  }
333
- retrySummary += `, restarted ${recovery.workstreamsToRestart.length} workstream runner(s)`;
334
- }
335
- if (recovery.blockedWorkstreams > 0) {
336
- retrySummary += `, blocked ${recovery.blockedWorkstreams} workstream(s)`;
337
- }
338
- if (scaledDown > 0) {
339
- retrySummary += `, scaled down ${scaledDown} idle runner(s) to maxClones=${configuredMaxClones}`;
340
- }
341
- if (resumed > 0) {
342
- retrySummary += `, resumed ${resumed} throttled workstream(s)`;
343
418
  }
344
- // Re-check activity after recovery. If reconciliation cleared stale
345
- // session state for this project, continue to normal startup logic.
346
- if (!(0, wakeup_checks_js_1.hasActiveParallelSessionForProject)(project.path)) {
347
- skipForParallelSession = false;
348
- if (retrySummary.length > 0) {
349
- retrySummary += ', session state reconciled';
419
+ else {
420
+ if (wouldScaleDown > 0) {
421
+ retrySummary += `, would scale down ${wouldScaleDown} idle runner(s) to maxClones=${configuredMaxClones}`;
350
422
  }
351
- else {
352
- retrySummary = ', session state reconciled';
423
+ if (wouldResume > 0) {
424
+ retrySummary += `, would resume ${wouldResume} throttled workstream(s)`;
353
425
  }
354
426
  }
355
- }
356
- else {
357
- if (wouldScaleDown > 0) {
358
- retrySummary += `, would scale down ${wouldScaleDown} idle runner(s) to maxClones=${configuredMaxClones}`;
427
+ if (!skipForParallelSession) {
428
+ log(`Reconciled stale parallel session for ${project.path}; proceeding with startup`);
359
429
  }
360
- if (wouldResume > 0) {
361
- retrySummary += `, would resume ${wouldResume} throttled workstream(s)`;
430
+ else {
431
+ log(`Skipping ${project.path}: active parallel session in progress${retrySummary}`);
432
+ results.push({
433
+ action: 'none',
434
+ reason: `Parallel session already running${retrySummary}`,
435
+ projectPath: project.path,
436
+ deletedInvocationLogs,
437
+ });
438
+ continue;
362
439
  }
363
440
  }
364
- if (!skipForParallelSession) {
365
- log(`Reconciled stale parallel session for ${project.path}; proceeding with startup`);
366
- }
367
- else {
368
- log(`Skipping ${project.path}: active parallel session in progress${retrySummary}`);
369
- results.push({
370
- action: 'none',
371
- reason: `Parallel session already running${retrySummary}`,
372
- projectPath: project.path,
373
- deletedInvocationLogs,
374
- });
375
- continue;
376
- }
377
- }
378
- // Config-aware mode reconciliation:
379
- // if parallel is enabled but an idle standalone runner is active, recycle it
380
- // so wakeup applies current parallel settings without manual restart.
381
- const activeStandaloneRunner = global.db
382
- .prepare(`SELECT id, pid, status, current_task_id
441
+ // Config-aware mode reconciliation:
442
+ // if parallel is enabled but an idle standalone runner is active, recycle it
443
+ // so wakeup applies current parallel settings without manual restart.
444
+ const activeStandaloneRunner = global.db
445
+ .prepare(`SELECT id, pid, status, current_task_id
383
446
  FROM runners
384
447
  WHERE project_path = ?
385
448
  AND parallel_session_id IS NULL
@@ -387,113 +450,117 @@ async function wakeup(options = {}) {
387
450
  AND heartbeat_at > datetime('now', '-5 minutes')
388
451
  ORDER BY heartbeat_at DESC
389
452
  LIMIT 1`)
390
- .get(project.path);
391
- if (activeStandaloneRunner && parallelEnabled) {
392
- const isIdle = (activeStandaloneRunner.status ?? '').toLowerCase() === 'idle' && !activeStandaloneRunner.current_task_id;
393
- if (isIdle) {
394
- if (dryRun) {
395
- log(`Would recycle idle standalone runner for ${project.path} to apply parallel mode`);
396
- results.push({
397
- action: 'would_start',
398
- reason: 'Would restart idle runner to apply parallel mode',
399
- projectPath: project.path,
400
- deletedInvocationLogs,
401
- });
453
+ .get(project.path);
454
+ if (activeStandaloneRunner && parallelEnabled) {
455
+ const isIdle = (activeStandaloneRunner.status ?? '').toLowerCase() === 'idle' && !activeStandaloneRunner.current_task_id;
456
+ if (isIdle) {
457
+ if (dryRun) {
458
+ log(`Would recycle idle standalone runner for ${project.path} to apply parallel mode`);
459
+ results.push({
460
+ action: 'would_start',
461
+ reason: 'Would restart idle runner to apply parallel mode',
462
+ projectPath: project.path,
463
+ deletedInvocationLogs,
464
+ });
465
+ continue;
466
+ }
467
+ if (activeStandaloneRunner.pid) {
468
+ (0, wakeup_runner_js_1.killProcess)(activeStandaloneRunner.pid);
469
+ }
470
+ global.db.prepare('DELETE FROM runners WHERE id = ?').run(activeStandaloneRunner.id);
471
+ const restartResult = (0, wakeup_runner_js_1.startRunner)(project.path);
472
+ if (restartResult) {
473
+ results.push({
474
+ action: 'restarted',
475
+ reason: 'Restarted idle runner to apply parallel mode',
476
+ pid: restartResult.pid,
477
+ projectPath: project.path,
478
+ deletedInvocationLogs,
479
+ });
480
+ }
481
+ else {
482
+ results.push({
483
+ action: 'none',
484
+ reason: 'Failed to restart idle runner for parallel mode',
485
+ projectPath: project.path,
486
+ deletedInvocationLogs,
487
+ });
488
+ }
402
489
  continue;
403
490
  }
404
- if (activeStandaloneRunner.pid) {
405
- (0, wakeup_runner_js_1.killProcess)(activeStandaloneRunner.pid);
406
- }
407
- global.db.prepare('DELETE FROM runners WHERE id = ?').run(activeStandaloneRunner.id);
408
- const restartResult = (0, wakeup_runner_js_1.startRunner)(project.path);
409
- if (restartResult) {
410
- results.push({
411
- action: 'restarted',
412
- reason: 'Restarted idle runner to apply parallel mode',
413
- pid: restartResult.pid,
414
- projectPath: project.path,
415
- deletedInvocationLogs,
416
- });
417
- }
418
- else {
419
- results.push({
420
- action: 'none',
421
- reason: 'Failed to restart idle runner for parallel mode',
422
- projectPath: project.path,
423
- deletedInvocationLogs,
424
- });
425
- }
491
+ }
492
+ // Skip if project already has an active runner (after recovery, which may have killed/removed it).
493
+ if ((0, wakeup_checks_js_1.hasActiveRunnerForProject)(project.path)) {
494
+ log(`Skipping ${project.path}: runner already active`);
495
+ results.push({
496
+ action: 'none',
497
+ reason: recoveredActions > 0
498
+ ? `Runner already active (recovered ${recoveredActions} stuck item(s))`
499
+ : 'Runner already active',
500
+ projectPath: project.path,
501
+ recoveredActions,
502
+ skippedRecoveryDueToSafetyLimit,
503
+ deletedInvocationLogs,
504
+ sanitisedActions,
505
+ });
426
506
  continue;
427
507
  }
508
+ // Start runner for this project
509
+ const willParallel = parallelEnabled;
510
+ log(`Starting ${willParallel ? 'parallel session' : 'runner'} for: ${project.path}`);
511
+ if (dryRun) {
512
+ results.push({
513
+ action: 'would_start',
514
+ reason: recoveredActions > 0 ? `Recovered ${recoveredActions} stuck item(s); would start runner (dry-run)` : `Would start runner (dry-run)`,
515
+ projectPath: project.path,
516
+ recoveredActions,
517
+ skippedRecoveryDueToSafetyLimit,
518
+ deletedInvocationLogs,
519
+ sanitisedActions,
520
+ });
521
+ continue;
522
+ }
523
+ const startResult = (0, wakeup_runner_js_1.startRunner)(project.path);
524
+ if (startResult) {
525
+ const mode = startResult.parallel ? 'parallel session' : 'runner';
526
+ results.push({
527
+ action: 'started',
528
+ reason: recoveredActions > 0 ? `Recovered ${recoveredActions} stuck item(s); started ${mode}` : `Started ${mode}`,
529
+ pid: startResult.pid,
530
+ projectPath: project.path,
531
+ recoveredActions,
532
+ skippedRecoveryDueToSafetyLimit,
533
+ deletedInvocationLogs,
534
+ sanitisedActions,
535
+ });
536
+ }
537
+ else {
538
+ results.push({
539
+ action: 'none',
540
+ reason: recoveredActions > 0 ? `Recovered ${recoveredActions} stuck item(s); failed to start runner` : 'Failed to start runner',
541
+ projectPath: project.path,
542
+ recoveredActions,
543
+ skippedRecoveryDueToSafetyLimit,
544
+ deletedInvocationLogs,
545
+ sanitisedActions,
546
+ });
547
+ }
428
548
  }
429
- // Skip if project already has an active runner (after recovery, which may have killed/removed it).
430
- if ((0, wakeup_checks_js_1.hasActiveRunnerForProject)(project.path)) {
431
- log(`Skipping ${project.path}: runner already active`);
432
- results.push({
433
- action: 'none',
434
- reason: recoveredActions > 0
435
- ? `Runner already active (recovered ${recoveredActions} stuck item(s))`
436
- : 'Runner already active',
437
- projectPath: project.path,
438
- recoveredActions,
439
- skippedRecoveryDueToSafetyLimit,
440
- deletedInvocationLogs,
441
- sanitisedActions,
442
- });
443
- continue;
444
- }
445
- // Start runner for this project
446
- const willParallel = parallelEnabled;
447
- log(`Starting ${willParallel ? 'parallel session' : 'runner'} for: ${project.path}`);
448
- if (dryRun) {
449
- results.push({
450
- action: 'would_start',
451
- reason: recoveredActions > 0 ? `Recovered ${recoveredActions} stuck item(s); would start runner (dry-run)` : `Would start runner (dry-run)`,
452
- projectPath: project.path,
453
- recoveredActions,
454
- skippedRecoveryDueToSafetyLimit,
455
- deletedInvocationLogs,
456
- sanitisedActions,
457
- });
458
- continue;
459
- }
460
- const startResult = (0, wakeup_runner_js_1.startRunner)(project.path);
461
- if (startResult) {
462
- const mode = startResult.parallel ? 'parallel session' : 'runner';
463
- results.push({
464
- action: 'started',
465
- reason: recoveredActions > 0 ? `Recovered ${recoveredActions} stuck item(s); started ${mode}` : `Started ${mode}`,
466
- pid: startResult.pid,
467
- projectPath: project.path,
468
- recoveredActions,
469
- skippedRecoveryDueToSafetyLimit,
470
- deletedInvocationLogs,
471
- sanitisedActions,
472
- });
473
- }
474
- else {
549
+ // If no specific results, add a summary
550
+ if (results.length === 0) {
475
551
  results.push({
476
552
  action: 'none',
477
- reason: recoveredActions > 0 ? `Recovered ${recoveredActions} stuck item(s); failed to start runner` : 'Failed to start runner',
478
- projectPath: project.path,
479
- recoveredActions,
480
- skippedRecoveryDueToSafetyLimit,
481
- deletedInvocationLogs,
482
- sanitisedActions,
553
+ reason: 'No action needed',
483
554
  });
484
555
  }
556
+ return results;
485
557
  }
486
- // If no specific results, add a summary
487
- if (results.length === 0) {
488
- results.push({
489
- action: 'none',
490
- reason: 'No action needed',
491
- });
558
+ finally {
559
+ global.close();
492
560
  }
493
- return results;
494
561
  }
495
562
  finally {
496
- global.close();
563
+ isWakeupRunning = false;
497
564
  }
498
565
  }
499
566
  /**