@coralai/sps-cli 0.15.12 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -0
- package/dist/commands/monitorTick.d.ts.map +1 -1
- package/dist/commands/monitorTick.js +3 -1
- package/dist/commands/monitorTick.js.map +1 -1
- package/dist/commands/pipelineTick.d.ts.map +1 -1
- package/dist/commands/pipelineTick.js +16 -3
- package/dist/commands/pipelineTick.js.map +1 -1
- package/dist/commands/tick.d.ts +1 -0
- package/dist/commands/tick.d.ts.map +1 -1
- package/dist/commands/tick.js +73 -8
- package/dist/commands/tick.js.map +1 -1
- package/dist/commands/workerLaunch.d.ts.map +1 -1
- package/dist/commands/workerLaunch.js +16 -3
- package/dist/commands/workerLaunch.js.map +1 -1
- package/dist/engines/ExecutionEngine.d.ts +29 -32
- package/dist/engines/ExecutionEngine.d.ts.map +1 -1
- package/dist/engines/ExecutionEngine.js +247 -527
- package/dist/engines/ExecutionEngine.js.map +1 -1
- package/dist/engines/MonitorEngine.d.ts +14 -27
- package/dist/engines/MonitorEngine.d.ts.map +1 -1
- package/dist/engines/MonitorEngine.js +91 -313
- package/dist/engines/MonitorEngine.js.map +1 -1
- package/dist/main.js +0 -0
- package/dist/manager/completion-judge.d.ts +27 -0
- package/dist/manager/completion-judge.d.ts.map +1 -0
- package/dist/manager/completion-judge.js +94 -0
- package/dist/manager/completion-judge.js.map +1 -0
- package/dist/manager/pm-client.d.ts +10 -0
- package/dist/manager/pm-client.d.ts.map +1 -0
- package/dist/manager/pm-client.js +253 -0
- package/dist/manager/pm-client.js.map +1 -0
- package/dist/manager/post-actions.d.ts +60 -0
- package/dist/manager/post-actions.d.ts.map +1 -0
- package/dist/manager/post-actions.js +338 -0
- package/dist/manager/post-actions.js.map +1 -0
- package/dist/manager/recovery.d.ts +39 -0
- package/dist/manager/recovery.d.ts.map +1 -0
- package/dist/manager/recovery.js +135 -0
- package/dist/manager/recovery.js.map +1 -0
- package/dist/manager/resource-limiter.d.ts +44 -0
- package/dist/manager/resource-limiter.d.ts.map +1 -0
- package/dist/manager/resource-limiter.js +79 -0
- package/dist/manager/resource-limiter.js.map +1 -0
- package/dist/manager/supervisor.d.ts +88 -0
- package/dist/manager/supervisor.d.ts.map +1 -0
- package/dist/manager/supervisor.js +267 -0
- package/dist/manager/supervisor.js.map +1 -0
- package/package.json +1 -1
|
@@ -2,20 +2,27 @@ import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
|
2
2
|
import { resolve } from 'node:path';
|
|
3
3
|
import { readState, writeState } from '../core/state.js';
|
|
4
4
|
import { resolveWorktreePath } from '../core/paths.js';
|
|
5
|
+
import { readQueue } from '../core/queue.js';
|
|
5
6
|
import { Logger } from '../core/logger.js';
|
|
6
7
|
const SKIP_LABELS = ['BLOCKED', 'NEEDS-FIX', 'CONFLICT', 'WAITING-CONFIRMATION', 'STALE-RUNTIME'];
|
|
7
8
|
export class ExecutionEngine {
|
|
8
9
|
ctx;
|
|
9
10
|
taskBackend;
|
|
10
|
-
workerProvider;
|
|
11
11
|
repoBackend;
|
|
12
|
+
supervisor;
|
|
13
|
+
completionJudge;
|
|
14
|
+
postActions;
|
|
15
|
+
resourceLimiter;
|
|
12
16
|
notifier;
|
|
13
17
|
log;
|
|
14
|
-
constructor(ctx, taskBackend,
|
|
18
|
+
constructor(ctx, taskBackend, repoBackend, supervisor, completionJudge, postActions, resourceLimiter, notifier) {
|
|
15
19
|
this.ctx = ctx;
|
|
16
20
|
this.taskBackend = taskBackend;
|
|
17
|
-
this.workerProvider = workerProvider;
|
|
18
21
|
this.repoBackend = repoBackend;
|
|
22
|
+
this.supervisor = supervisor;
|
|
23
|
+
this.completionJudge = completionJudge;
|
|
24
|
+
this.postActions = postActions;
|
|
25
|
+
this.resourceLimiter = resourceLimiter;
|
|
19
26
|
this.notifier = notifier;
|
|
20
27
|
this.log = new Logger('pipeline', ctx.projectName, ctx.paths.logsDir);
|
|
21
28
|
}
|
|
@@ -67,8 +74,23 @@ export class ExecutionEngine {
|
|
|
67
74
|
// 3. Process Todo cards (launch: claim + context + worker + move to Inprogress)
|
|
68
75
|
// This is the only step that consumes action quota — it starts
|
|
69
76
|
// resource-intensive AI workers that need system capacity.
|
|
70
|
-
//
|
|
71
|
-
|
|
77
|
+
// Sort by pipeline_order to respect card priority (#5 skip bug fix).
|
|
78
|
+
let todoCards = await this.taskBackend.listByState('Todo');
|
|
79
|
+
const pipelineOrder = readQueue(this.ctx.paths.pipelineOrderFile);
|
|
80
|
+
if (pipelineOrder.length > 0) {
|
|
81
|
+
todoCards = todoCards.sort((a, b) => {
|
|
82
|
+
const aIdx = pipelineOrder.indexOf(parseInt(a.seq, 10));
|
|
83
|
+
const bIdx = pipelineOrder.indexOf(parseInt(b.seq, 10));
|
|
84
|
+
// Cards in pipeline_order come first, in order; others after
|
|
85
|
+
if (aIdx >= 0 && bIdx >= 0)
|
|
86
|
+
return aIdx - bIdx;
|
|
87
|
+
if (aIdx >= 0)
|
|
88
|
+
return -1;
|
|
89
|
+
if (bIdx >= 0)
|
|
90
|
+
return 1;
|
|
91
|
+
return parseInt(a.seq, 10) - parseInt(b.seq, 10);
|
|
92
|
+
});
|
|
93
|
+
}
|
|
72
94
|
let launchedThisTick = 0;
|
|
73
95
|
const failedSlots = new Set(); // track slots that failed launch this tick
|
|
74
96
|
for (const card of todoCards) {
|
|
@@ -78,12 +100,7 @@ export class ExecutionEngine {
|
|
|
78
100
|
actions.push({ action: 'skip', entity: `seq:${card.seq}`, result: 'skip', message: 'Has auxiliary state label' });
|
|
79
101
|
continue;
|
|
80
102
|
}
|
|
81
|
-
// Stagger
|
|
82
|
-
if (launchedThisTick > 0) {
|
|
83
|
-
const delay = this.ctx.config.WORKER_MODE === 'print' ? 2_000 : 10_000;
|
|
84
|
-
this.log.info(`Waiting ${delay / 1000}s before next worker launch...`);
|
|
85
|
-
await new Promise((r) => setTimeout(r, delay));
|
|
86
|
-
}
|
|
103
|
+
// Stagger is handled by ResourceLimiter.enforceStagger() inside launchCard
|
|
87
104
|
const launchResult = await this.launchCard(card, opts, failedSlots);
|
|
88
105
|
actions.push(launchResult);
|
|
89
106
|
if (launchResult.result === 'ok') {
|
|
@@ -162,245 +179,58 @@ export class ExecutionEngine {
|
|
|
162
179
|
shouldSkip(card) {
|
|
163
180
|
return SKIP_LABELS.some((label) => card.labels.includes(label));
|
|
164
181
|
}
|
|
165
|
-
// ─── Inprogress Phase (detect completion →
|
|
182
|
+
// ─── Inprogress Phase (detect completion → Done) ────────────────
|
|
166
183
|
/**
|
|
167
|
-
* Check an Inprogress card:
|
|
168
|
-
* This is the critical Inprogress → QA bridge (01 §10.2).
|
|
184
|
+
* Check an Inprogress card: verify worker is still running or handled by exit callback.
|
|
169
185
|
*
|
|
170
|
-
*
|
|
171
|
-
*
|
|
172
|
-
*
|
|
173
|
-
*
|
|
174
|
-
* BLOCKED → mark BLOCKED
|
|
175
|
-
* ALIVE → no action (worker still working)
|
|
176
|
-
* DEAD → mark STALE-RUNTIME (handled by MonitorEngine)
|
|
177
|
-
* DEAD_EXCEEDED → mark STALE-RUNTIME, notify
|
|
186
|
+
* The Supervisor exit callback triggers CompletionJudge → PostActions automatically,
|
|
187
|
+
* so this method only needs to:
|
|
188
|
+
* - Update heartbeat if worker is still running
|
|
189
|
+
* - Confirm completion if PostActions already processed it
|
|
178
190
|
*/
|
|
179
191
|
async checkInprogressCard(card, opts) {
|
|
180
192
|
const seq = card.seq;
|
|
181
193
|
const state = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
182
|
-
// Find this card's worker slot
|
|
183
194
|
const slotEntry = Object.entries(state.workers).find(([, w]) => w.seq === parseInt(seq, 10) && w.status === 'active');
|
|
184
195
|
if (!slotEntry) {
|
|
185
|
-
//
|
|
196
|
+
// Slot already released (PostActions handled it via exit callback)
|
|
186
197
|
return null;
|
|
187
198
|
}
|
|
188
|
-
const [slotName
|
|
189
|
-
const
|
|
190
|
-
|
|
199
|
+
const [slotName] = slotEntry;
|
|
200
|
+
const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
|
|
201
|
+
const handle = this.supervisor.get(workerId);
|
|
202
|
+
if (handle && handle.exitCode === null) {
|
|
203
|
+
// Worker still running — update heartbeat
|
|
204
|
+
try {
|
|
205
|
+
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
206
|
+
if (freshState.workers[slotName]) {
|
|
207
|
+
freshState.workers[slotName].lastHeartbeat = new Date().toISOString();
|
|
208
|
+
writeState(this.ctx.paths.stateFile, freshState, 'pipeline-heartbeat');
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
catch { /* non-fatal */ }
|
|
191
212
|
return null;
|
|
192
|
-
// Determine logDir for completion marker detection
|
|
193
|
-
const logDir = this.ctx.paths.logsDir;
|
|
194
|
-
const branch = slotState.branch || this.buildBranchName(card);
|
|
195
|
-
let workerStatus;
|
|
196
|
-
try {
|
|
197
|
-
workerStatus = await this.workerProvider.detectCompleted(session, logDir, branch);
|
|
198
213
|
}
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
214
|
+
if (handle && handle.exitCode !== null) {
|
|
215
|
+
// Worker exited but PostActions hasn't finished yet (or just finished)
|
|
216
|
+
// Check if slot is now idle
|
|
217
|
+
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
218
|
+
if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
|
|
219
|
+
this.log.ok(`seq ${seq}: Completed (handled by exit callback)`);
|
|
220
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via exit callback' };
|
|
221
|
+
}
|
|
222
|
+
// PostActions still processing, wait for next tick
|
|
202
223
|
return null;
|
|
203
224
|
}
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if (this.ctx.mrMode === 'none') {
|
|
211
|
-
// ── MR_MODE=none: worker merges directly to target branch ──
|
|
212
|
-
// Check if feature branch is merged into target
|
|
213
|
-
const worktree = slotState.worktree;
|
|
214
|
-
let isMerged = false;
|
|
215
|
-
if (worktree) {
|
|
216
|
-
try {
|
|
217
|
-
// Fetch latest and check if feature commits are in target
|
|
218
|
-
const { execFileSync } = await import('node:child_process');
|
|
219
|
-
try {
|
|
220
|
-
execFileSync('git', ['-C', worktree, 'fetch', 'origin', '--quiet'], { timeout: 10_000, stdio: ['ignore', 'pipe', 'pipe'] });
|
|
221
|
-
}
|
|
222
|
-
catch { /* offline ok */ }
|
|
223
|
-
const unmerged = execFileSync('git', ['-C', worktree, 'rev-list', '--count', `origin/${this.ctx.mergeBranch}..${branch}`], { encoding: 'utf-8', timeout: 5_000, stdio: ['ignore', 'pipe', 'pipe'] }).trim();
|
|
224
|
-
isMerged = parseInt(unmerged, 10) === 0;
|
|
225
|
-
}
|
|
226
|
-
catch { /* git error, fall through */ }
|
|
227
|
-
}
|
|
228
|
-
if (!isMerged) {
|
|
229
|
-
// Worker pushed but didn't merge to target yet — resume it
|
|
230
|
-
this.log.info(`seq ${seq}: Branch not merged into ${this.ctx.mergeBranch}, resuming worker`);
|
|
231
|
-
const isPrintMode = slotState.mode === 'print';
|
|
232
|
-
if (isPrintMode && slotState.sessionId) {
|
|
233
|
-
const resumeResult = await this.attemptMergeResume(seq, slotName, slotState, card);
|
|
234
|
-
if (resumeResult)
|
|
235
|
-
return resumeResult;
|
|
236
|
-
}
|
|
237
|
-
// Resume not possible — system merges directly as fallback
|
|
238
|
-
this.log.info(`seq ${seq}: Resume not possible, system merging directly`);
|
|
239
|
-
let mergeFailed = true;
|
|
240
|
-
if (worktree) {
|
|
241
|
-
try {
|
|
242
|
-
await this.repoBackend.rebase(worktree, this.ctx.mergeBranch);
|
|
243
|
-
await this.repoBackend.push(worktree, branch, true);
|
|
244
|
-
const { execFileSync } = await import('node:child_process');
|
|
245
|
-
execFileSync('git', ['-C', worktree, 'checkout', this.ctx.mergeBranch], { timeout: 10_000, stdio: ['ignore', 'pipe', 'pipe'] });
|
|
246
|
-
execFileSync('git', ['-C', worktree, 'merge', '--no-ff', branch, '-m', `Merge ${branch} into ${this.ctx.mergeBranch}`], { timeout: 10_000, stdio: ['ignore', 'pipe', 'pipe'] });
|
|
247
|
-
execFileSync('git', ['-C', worktree, 'push', 'origin', this.ctx.mergeBranch], { timeout: 30_000, stdio: ['ignore', 'pipe', 'pipe'] });
|
|
248
|
-
this.log.ok(`seq ${seq}: System fallback merged ${branch} into ${this.ctx.mergeBranch}`);
|
|
249
|
-
mergeFailed = false;
|
|
250
|
-
}
|
|
251
|
-
catch (mergeErr) {
|
|
252
|
-
const msg = mergeErr instanceof Error ? mergeErr.message : String(mergeErr);
|
|
253
|
-
this.log.error(`seq ${seq}: System fallback merge failed: ${msg}`);
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
if (mergeFailed) {
|
|
257
|
-
// Merge failed — mark NEEDS-FIX, don't move to Done
|
|
258
|
-
try {
|
|
259
|
-
await this.taskBackend.addLabel(seq, 'NEEDS-FIX');
|
|
260
|
-
}
|
|
261
|
-
catch { /* best effort */ }
|
|
262
|
-
try {
|
|
263
|
-
await this.taskBackend.comment(seq, `Branch pushed but merge to ${this.ctx.mergeBranch} failed. Manual merge needed.`);
|
|
264
|
-
}
|
|
265
|
-
catch { /* best effort */ }
|
|
266
|
-
this.logEvent('merge-failed', seq, 'fail');
|
|
267
|
-
return { action: 'mark-needs-fix', entity: `seq:${seq}`, result: 'ok', message: `System merge to ${this.ctx.mergeBranch} failed — NEEDS-FIX` };
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
// Merge confirmed — move to Done + release resources + cleanup worktree
|
|
271
|
-
return await this.completeAndRelease(card, slotName, slotState);
|
|
272
|
-
}
|
|
273
|
-
else {
|
|
274
|
-
// ── MR_MODE=create: worker creates MR, task is done ──
|
|
275
|
-
let mrExists = false;
|
|
276
|
-
try {
|
|
277
|
-
const mrStatus = await this.repoBackend.getMrStatus(branch);
|
|
278
|
-
mrExists = mrStatus.exists;
|
|
279
|
-
}
|
|
280
|
-
catch { /* can't check */ }
|
|
281
|
-
if (!mrExists) {
|
|
282
|
-
// MR not found — try resume worker to create it
|
|
283
|
-
this.log.info(`seq ${seq}: MR not found, resuming worker to create it`);
|
|
284
|
-
const isPrintMode = slotState.mode === 'print';
|
|
285
|
-
if (isPrintMode && slotState.sessionId) {
|
|
286
|
-
const resumeResult = await this.attemptMergeResume(seq, slotName, slotState, card);
|
|
287
|
-
if (resumeResult)
|
|
288
|
-
return resumeResult;
|
|
289
|
-
}
|
|
290
|
-
// Fallback: system creates MR
|
|
291
|
-
this.log.info(`seq ${seq}: Resume not possible, system creating MR`);
|
|
292
|
-
try {
|
|
293
|
-
await this.repoBackend.createOrUpdateMr(branch, `${card.seq}: ${card.name}`, `Auto-created by pipeline for seq:${card.seq}.\n\nBranch: ${branch}`);
|
|
294
|
-
this.log.ok(`seq ${seq}: System created MR for branch ${branch}`);
|
|
295
|
-
mrExists = true;
|
|
296
|
-
}
|
|
297
|
-
catch (mrErr) {
|
|
298
|
-
const mrMsg = mrErr instanceof Error ? mrErr.message : String(mrErr);
|
|
299
|
-
this.log.error(`seq ${seq}: System MR creation failed: ${mrMsg}`);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
if (!mrExists) {
|
|
303
|
-
// MR creation failed — mark NEEDS-FIX
|
|
304
|
-
this.log.error(`seq ${seq}: MR creation failed after all attempts`);
|
|
305
|
-
try {
|
|
306
|
-
await this.taskBackend.addLabel(seq, 'NEEDS-FIX');
|
|
307
|
-
}
|
|
308
|
-
catch { /* best effort */ }
|
|
309
|
-
try {
|
|
310
|
-
await this.taskBackend.comment(seq, 'Branch pushed but MR creation failed. Manual MR needed.');
|
|
311
|
-
}
|
|
312
|
-
catch { /* best effort */ }
|
|
313
|
-
this.logEvent('mr-creation-failed', seq, 'fail');
|
|
314
|
-
return { action: 'mark-needs-fix', entity: `seq:${seq}`, result: 'ok', message: 'MR creation failed — NEEDS-FIX' };
|
|
315
|
-
}
|
|
316
|
-
// MR confirmed — task is done, release resources
|
|
317
|
-
return await this.completeAndRelease(card, slotName, slotState);
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
case 'AUTO_CONFIRM': {
|
|
321
|
-
// Non-destructive confirmation prompt → auto-confirm
|
|
322
|
-
this.log.info(`seq ${seq}: Worker waiting for non-destructive confirmation, auto-confirming`);
|
|
323
|
-
try {
|
|
324
|
-
await this.workerProvider.sendFix(session, 'y');
|
|
325
|
-
this.logEvent('auto-confirm', seq, 'ok');
|
|
326
|
-
if (this.notifier) {
|
|
327
|
-
await this.notifier.send(`[${this.ctx.projectName}] seq:${seq} auto-confirmed`, 'info').catch(() => { });
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
catch {
|
|
331
|
-
this.log.warn(`seq ${seq}: Auto-confirm failed`);
|
|
332
|
-
}
|
|
333
|
-
return { action: 'auto-confirm', entity: `seq:${seq}`, result: 'ok', message: 'Auto-confirmed non-destructive prompt' };
|
|
334
|
-
}
|
|
335
|
-
case 'NEEDS_INPUT': {
|
|
336
|
-
// Destructive confirmation → mark WAITING-CONFIRMATION, notify Boss
|
|
337
|
-
this.log.warn(`seq ${seq}: Worker waiting for destructive confirmation`);
|
|
338
|
-
try {
|
|
339
|
-
await this.taskBackend.addLabel(seq, 'WAITING-CONFIRMATION');
|
|
340
|
-
}
|
|
341
|
-
catch { /* best effort */ }
|
|
342
|
-
if (this.notifier) {
|
|
343
|
-
await this.notifier.sendWarning(`[${this.ctx.projectName}] seq:${seq} worker waiting for destructive confirmation`).catch(() => { });
|
|
344
|
-
}
|
|
345
|
-
this.logEvent('waiting-destructive', seq, 'ok');
|
|
346
|
-
return { action: 'mark-waiting', entity: `seq:${seq}`, result: 'ok', message: 'Destructive confirmation — waiting for human' };
|
|
347
|
-
}
|
|
348
|
-
case 'BLOCKED': {
|
|
349
|
-
this.log.warn(`seq ${seq}: Worker appears blocked`);
|
|
350
|
-
try {
|
|
351
|
-
await this.taskBackend.addLabel(seq, 'BLOCKED');
|
|
352
|
-
}
|
|
353
|
-
catch { /* best effort */ }
|
|
354
|
-
this.logEvent('blocked', seq, 'ok');
|
|
355
|
-
return { action: 'mark-blocked', entity: `seq:${seq}`, result: 'ok', message: 'Worker blocked' };
|
|
356
|
-
}
|
|
357
|
-
case 'EXITED_INCOMPLETE':
|
|
358
|
-
case 'DEAD':
|
|
359
|
-
case 'DEAD_EXCEEDED': {
|
|
360
|
-
// Worker exited without completing. Attempt auto-resume if:
|
|
361
|
-
// - Print mode (can --resume to continue context)
|
|
362
|
-
// - Retry limit not exhausted
|
|
363
|
-
// Otherwise mark NEEDS-FIX.
|
|
364
|
-
const isPrintMode = slotState.mode === 'print';
|
|
365
|
-
const reason = workerStatus === 'EXITED_INCOMPLETE'
|
|
366
|
-
? 'exited without artifacts (token limit / gave up)'
|
|
367
|
-
: `process died (${workerStatus})`;
|
|
368
|
-
this.log.warn(`seq ${seq}: Worker ${reason}`);
|
|
369
|
-
if (isPrintMode && slotState.sessionId) {
|
|
370
|
-
const retryResult = await this.attemptResume(seq, slotName, slotState, card, reason);
|
|
371
|
-
if (retryResult)
|
|
372
|
-
return retryResult;
|
|
373
|
-
}
|
|
374
|
-
// No resume possible or retries exhausted → NEEDS-FIX
|
|
375
|
-
if (workerStatus === 'DEAD' || workerStatus === 'DEAD_EXCEEDED') {
|
|
376
|
-
// Also defer to MonitorEngine for STALE-RUNTIME marking
|
|
377
|
-
return null;
|
|
378
|
-
}
|
|
379
|
-
try {
|
|
380
|
-
await this.taskBackend.addLabel(seq, 'NEEDS-FIX');
|
|
381
|
-
await this.taskBackend.comment(seq, `Worker ${reason}. Resume retries exhausted.`);
|
|
382
|
-
}
|
|
383
|
-
catch { /* best effort */ }
|
|
384
|
-
if (this.notifier) {
|
|
385
|
-
await this.notifier.sendWarning(`[${this.ctx.projectName}] seq:${seq} worker ${reason} — retries exhausted, NEEDS-FIX`).catch(() => { });
|
|
386
|
-
}
|
|
387
|
-
this.logEvent('exited-incomplete-final', seq, 'ok');
|
|
388
|
-
return { action: 'mark-needs-fix', entity: `seq:${seq}`, result: 'ok', message: `Worker ${reason}, retries exhausted (NEEDS-FIX)` };
|
|
389
|
-
}
|
|
390
|
-
case 'ALIVE':
|
|
391
|
-
default:
|
|
392
|
-
// Worker still running — no action needed
|
|
393
|
-
// Update heartbeat
|
|
394
|
-
try {
|
|
395
|
-
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
396
|
-
if (freshState.workers[slotName]) {
|
|
397
|
-
freshState.workers[slotName].lastHeartbeat = new Date().toISOString();
|
|
398
|
-
writeState(this.ctx.paths.stateFile, freshState, 'pipeline-heartbeat');
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
catch { /* non-fatal */ }
|
|
402
|
-
return null;
|
|
225
|
+
// Handle not found in Supervisor — PostActions already removed it, or after tick restart
|
|
226
|
+
// Re-read state to check if PostActions already completed
|
|
227
|
+
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
228
|
+
if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
|
|
229
|
+
this.log.ok(`seq ${seq}: Completed (PostActions already processed)`);
|
|
230
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (PostActions processed)' };
|
|
403
231
|
}
|
|
232
|
+
// Still active in state but not in Supervisor — MonitorEngine/Recovery handles
|
|
233
|
+
return null;
|
|
404
234
|
}
|
|
405
235
|
// ─── Prepare Phase (Backlog → Todo) ─────────────────────────────
|
|
406
236
|
/**
|
|
@@ -478,25 +308,7 @@ export class ExecutionEngine {
|
|
|
478
308
|
this.log.warn(`No idle worker slot available for seq ${seq}`);
|
|
479
309
|
return { action: 'launch', entity: `seq:${seq}`, result: 'skip', message: 'No idle worker slot' };
|
|
480
310
|
}
|
|
481
|
-
|
|
482
|
-
// Only applies to interactive (tmux) mode — print mode workers are one-shot processes
|
|
483
|
-
let slotEntry = idleSlots[0];
|
|
484
|
-
if (this.ctx.config.WORKER_SESSION_REUSE && this.ctx.config.WORKER_MODE !== 'print') {
|
|
485
|
-
for (const entry of idleSlots) {
|
|
486
|
-
const [name] = entry;
|
|
487
|
-
const sessionName = `${this.ctx.projectName}-${name}`;
|
|
488
|
-
try {
|
|
489
|
-
const inspection = await this.workerProvider.inspect(sessionName);
|
|
490
|
-
if (inspection.alive) {
|
|
491
|
-
slotEntry = entry;
|
|
492
|
-
this.log.info(`Preferring slot ${name} with live session`);
|
|
493
|
-
break;
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
catch { /* ignore */ }
|
|
497
|
-
}
|
|
498
|
-
}
|
|
499
|
-
const [slotName] = slotEntry;
|
|
311
|
+
const [slotName] = idleSlots[0];
|
|
500
312
|
const sessionName = `${this.ctx.projectName}-${slotName}`;
|
|
501
313
|
// Claim slot in state.json
|
|
502
314
|
state.workers[slotName] = {
|
|
@@ -554,25 +366,45 @@ export class ExecutionEngine {
|
|
|
554
366
|
this.logEvent('launch-context', seq, 'fail', { error: msg });
|
|
555
367
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Context build failed: ${msg}` };
|
|
556
368
|
}
|
|
557
|
-
// Step 6: Launch worker
|
|
369
|
+
// Step 6: Launch worker via Supervisor
|
|
558
370
|
try {
|
|
559
371
|
const promptFile = resolve(worktreePath, '.jarvis_task_prompt.txt');
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
this.
|
|
372
|
+
// Check global resource limit
|
|
373
|
+
if (!this.resourceLimiter.tryAcquire()) {
|
|
374
|
+
this.log.warn(`Global worker limit reached, skipping seq ${seq}`);
|
|
375
|
+
// Rollback: release slot
|
|
376
|
+
this.releaseSlot(slotName, seq);
|
|
377
|
+
return { action: 'launch', entity: `seq:${seq}`, result: 'skip', message: 'Global worker limit reached' };
|
|
378
|
+
}
|
|
379
|
+
await this.resourceLimiter.enforceStagger();
|
|
380
|
+
const prompt = readFileSync(promptFile, 'utf-8').trim();
|
|
381
|
+
const outputFile = resolve(this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`, `${sessionName}-${Date.now()}.jsonl`);
|
|
382
|
+
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
383
|
+
const workerHandle = this.supervisor.spawn({
|
|
384
|
+
id: workerId,
|
|
385
|
+
project: this.ctx.projectName,
|
|
386
|
+
seq: card.seq,
|
|
387
|
+
slot: slotName,
|
|
388
|
+
worktree: worktreePath,
|
|
389
|
+
branch: branchName,
|
|
390
|
+
prompt,
|
|
391
|
+
outputFile,
|
|
392
|
+
tool: this.ctx.config.WORKER_TOOL,
|
|
393
|
+
onExit: (exitCode) => {
|
|
394
|
+
this.onWorkerExit(workerId, card, slotName, worktreePath, branchName, exitCode);
|
|
395
|
+
},
|
|
396
|
+
});
|
|
397
|
+
// Store process info in state
|
|
398
|
+
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
399
|
+
if (freshState.workers[slotName]) {
|
|
400
|
+
freshState.workers[slotName].mode = 'print';
|
|
401
|
+
freshState.workers[slotName].pid = workerHandle.pid;
|
|
402
|
+
freshState.workers[slotName].outputFile = workerHandle.outputFile;
|
|
403
|
+
freshState.workers[slotName].sessionId = workerHandle.sessionId || null;
|
|
404
|
+
freshState.workers[slotName].exitCode = null;
|
|
405
|
+
writeState(this.ctx.paths.stateFile, freshState, 'pipeline-launch-print');
|
|
574
406
|
}
|
|
575
|
-
this.log.ok(`Step 6: Worker launched
|
|
407
|
+
this.log.ok(`Step 6: Worker launched for seq ${seq} (pid=${workerHandle.pid})`);
|
|
576
408
|
if (this.notifier) {
|
|
577
409
|
await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
|
|
578
410
|
}
|
|
@@ -581,6 +413,7 @@ export class ExecutionEngine {
|
|
|
581
413
|
const msg = err instanceof Error ? err.message : String(err);
|
|
582
414
|
this.log.error(`Step 6 failed (worker launch) for seq ${seq}: ${msg}`);
|
|
583
415
|
failedSlots.add(slotName);
|
|
416
|
+
this.resourceLimiter.release();
|
|
584
417
|
this.releaseSlot(slotName, seq);
|
|
585
418
|
this.logEvent('launch-worker', seq, 'fail', { error: msg });
|
|
586
419
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Worker launch failed: ${msg}` };
|
|
@@ -601,16 +434,71 @@ export class ExecutionEngine {
|
|
|
601
434
|
catch (err) {
|
|
602
435
|
const msg = err instanceof Error ? err.message : String(err);
|
|
603
436
|
this.log.error(`Step 7 failed (move) for seq ${seq}: ${msg}`);
|
|
604
|
-
// Rollback:
|
|
437
|
+
// Rollback: kill worker, release slot
|
|
438
|
+
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
605
439
|
try {
|
|
606
|
-
await this.
|
|
440
|
+
await this.supervisor.kill(workerId);
|
|
607
441
|
}
|
|
608
442
|
catch { /* best effort */ }
|
|
443
|
+
this.resourceLimiter.release();
|
|
609
444
|
this.releaseSlot(slotName, seq);
|
|
610
445
|
this.logEvent('launch-move', seq, 'fail', { error: msg });
|
|
611
446
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Move to Inprogress failed: ${msg}` };
|
|
612
447
|
}
|
|
613
448
|
}
|
|
449
|
+
// ─── Worker Exit Callback ───────────────────────────────────────
|
|
450
|
+
/**
|
|
451
|
+
* Called by Supervisor when a worker process exits.
|
|
452
|
+
* Wires CompletionJudge → PostActions to handle completion or failure.
|
|
453
|
+
*/
|
|
454
|
+
async onWorkerExit(workerId, card, slotName, worktree, branch, exitCode) {
|
|
455
|
+
const handle = this.supervisor.get(workerId);
|
|
456
|
+
const completion = this.completionJudge.judge({
|
|
457
|
+
worktree,
|
|
458
|
+
branch,
|
|
459
|
+
baseBranch: this.ctx.mergeBranch,
|
|
460
|
+
outputFile: handle?.outputFile || null,
|
|
461
|
+
exitCode,
|
|
462
|
+
logsDir: this.ctx.paths.logsDir,
|
|
463
|
+
});
|
|
464
|
+
const ctx = {
|
|
465
|
+
project: this.ctx.projectName,
|
|
466
|
+
seq: card.seq,
|
|
467
|
+
slot: slotName,
|
|
468
|
+
branch,
|
|
469
|
+
worktree,
|
|
470
|
+
baseBranch: this.ctx.mergeBranch,
|
|
471
|
+
stateFile: this.ctx.paths.stateFile,
|
|
472
|
+
maxWorkers: this.ctx.maxWorkers,
|
|
473
|
+
mrMode: this.ctx.mrMode,
|
|
474
|
+
gitlabProjectId: this.ctx.config.GITLAB_PROJECT_ID,
|
|
475
|
+
gitlabUrl: this.ctx.config.raw.GITLAB_URL || process.env.GITLAB_URL || '',
|
|
476
|
+
gitlabToken: this.ctx.config.raw.GITLAB_TOKEN || process.env.GITLAB_TOKEN || '',
|
|
477
|
+
doneStateId: this.ctx.config.raw.PLANE_STATE_DONE || this.ctx.config.raw.TRELLO_DONE_LIST_ID || '',
|
|
478
|
+
maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
|
|
479
|
+
logsDir: this.ctx.paths.logsDir,
|
|
480
|
+
tool: this.ctx.config.WORKER_TOOL,
|
|
481
|
+
};
|
|
482
|
+
const state = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
483
|
+
const activeCard = state.activeCards[card.seq];
|
|
484
|
+
const retryCount = activeCard?.retryCount ?? 0;
|
|
485
|
+
try {
|
|
486
|
+
if (completion.status === 'completed') {
|
|
487
|
+
const results = await this.postActions.executeCompletion(ctx, completion, handle?.sessionId || null);
|
|
488
|
+
const allOk = results.every(r => r.ok);
|
|
489
|
+
this.log.ok(`seq ${card.seq}: PostActions completed (${allOk ? 'all ok' : 'some failures'})`);
|
|
490
|
+
}
|
|
491
|
+
else {
|
|
492
|
+
await this.postActions.executeFailure(ctx, completion, exitCode, handle?.sessionId || null, retryCount, {
|
|
493
|
+
onExit: (code) => this.onWorkerExit(workerId, card, slotName, worktree, branch, code),
|
|
494
|
+
});
|
|
495
|
+
this.log.info(`seq ${card.seq}: Failure handling done`);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
catch (err) {
|
|
499
|
+
this.log.error(`seq ${card.seq}: PostActions error: ${err}`);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
614
502
|
// ─── Helpers ─────────────────────────────────────────────────────
|
|
615
503
|
/**
|
|
616
504
|
* Build branch name from card: feature/<seq>-<slug>
|
|
@@ -640,7 +528,9 @@ export class ExecutionEngine {
|
|
|
640
528
|
mkdirSync(worktreePath, { recursive: true });
|
|
641
529
|
}
|
|
642
530
|
const branchName = this.buildBranchName(card);
|
|
643
|
-
//
|
|
531
|
+
// ── 1. Skill Profiles (label-driven) ──
|
|
532
|
+
const skillContent = this.loadSkillProfiles(card);
|
|
533
|
+
// ── 2. Project Rules (CLAUDE.md + AGENTS.md) ──
|
|
644
534
|
const claudeMdPath = resolve(worktreePath, 'CLAUDE.md');
|
|
645
535
|
const agentsMdPath = resolve(worktreePath, 'AGENTS.md');
|
|
646
536
|
let projectRules = '';
|
|
@@ -654,27 +544,31 @@ export class ExecutionEngine {
|
|
|
654
544
|
const agentsRules = readFileSync(agentsMdPath, 'utf-8').trim();
|
|
655
545
|
projectRules = projectRules ? `${projectRules}\n\n${agentsRules}` : agentsRules;
|
|
656
546
|
}
|
|
657
|
-
// .
|
|
658
|
-
|
|
659
|
-
//
|
|
547
|
+
// ── 3. Project Knowledge (truncated) ──
|
|
548
|
+
const knowledge = this.loadProjectKnowledge(worktreePath);
|
|
549
|
+
// ── Assemble prompt ──
|
|
660
550
|
const sections = [];
|
|
551
|
+
if (skillContent) {
|
|
552
|
+
sections.push(skillContent);
|
|
553
|
+
sections.push('---');
|
|
554
|
+
}
|
|
661
555
|
if (projectRules) {
|
|
662
556
|
sections.push(projectRules);
|
|
663
557
|
sections.push('---');
|
|
664
558
|
}
|
|
559
|
+
if (knowledge) {
|
|
560
|
+
sections.push(knowledge);
|
|
561
|
+
sections.push('---');
|
|
562
|
+
}
|
|
665
563
|
// Build requirements based on MR mode
|
|
666
564
|
const mrMode = this.ctx.mrMode; // 'none' | 'create'
|
|
667
565
|
const createMR = mrMode === 'create';
|
|
668
|
-
// Generate .jarvis/merge.sh
|
|
566
|
+
// Generate .jarvis/merge.sh
|
|
669
567
|
this.writeMergeScript(worktreePath, branchName, card, createMR);
|
|
670
568
|
const mergeStepDesc = createMR
|
|
671
569
|
? 'Create the Merge Request'
|
|
672
570
|
: `Merge your changes into ${this.ctx.mergeBranch}`;
|
|
673
571
|
const requirements = [
|
|
674
|
-
'0. BEFORE coding, read these files if they exist (project knowledge from previous tasks):',
|
|
675
|
-
' - docs/DECISIONS.md — architecture decisions and tech choices',
|
|
676
|
-
' - docs/CHANGELOG.md — recent changes by previous workers',
|
|
677
|
-
'',
|
|
678
572
|
'1. Implement the changes described above',
|
|
679
573
|
'2. Self-test your changes (run existing tests if any, ensure no regressions)',
|
|
680
574
|
'3. Update project knowledge (create docs/ dir if needed):',
|
|
@@ -809,83 +703,11 @@ ${requirements.join('\n')}`);
|
|
|
809
703
|
writeFileSync(resolve(jarvisDir, 'merge.sh'), lines.join('\n') + '\n', { mode: 0o755 });
|
|
810
704
|
}
|
|
811
705
|
/**
|
|
812
|
-
*
|
|
813
|
-
* Used for
|
|
814
|
-
*/
|
|
815
|
-
async completeAndRelease(card, slotName, slotState) {
|
|
816
|
-
const seq = card.seq;
|
|
817
|
-
const errors = [];
|
|
818
|
-
// 1. Move card to Done — if this fails, abort (don't release slot)
|
|
819
|
-
try {
|
|
820
|
-
await this.taskBackend.move(seq, 'Done');
|
|
821
|
-
this.log.ok(`seq ${seq}: Moved Inprogress → Done`);
|
|
822
|
-
}
|
|
823
|
-
catch (err) {
|
|
824
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
825
|
-
this.log.error(`seq ${seq}: Failed to move to Done: ${msg}. Slot NOT released.`);
|
|
826
|
-
return { action: 'complete-direct', entity: `seq:${seq}`, result: 'fail', message: `Move to Done failed: ${msg}` };
|
|
827
|
-
}
|
|
828
|
-
// 2. Release claim
|
|
829
|
-
try {
|
|
830
|
-
await this.taskBackend.releaseClaim(seq);
|
|
831
|
-
}
|
|
832
|
-
catch { /* best effort */ }
|
|
833
|
-
// 3. Release worker slot (only after Done confirmed)
|
|
834
|
-
try {
|
|
835
|
-
const state = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
836
|
-
if (state.workers[slotName]) {
|
|
837
|
-
const sessionName = state.workers[slotName].tmuxSession;
|
|
838
|
-
state.workers[slotName] = {
|
|
839
|
-
status: 'idle', seq: null, branch: null, worktree: null,
|
|
840
|
-
tmuxSession: null, claimedAt: null, lastHeartbeat: null,
|
|
841
|
-
mode: null, sessionId: null, pid: null, outputFile: null, exitCode: null,
|
|
842
|
-
};
|
|
843
|
-
delete state.activeCards[seq];
|
|
844
|
-
// 4. Mark worktree for cleanup
|
|
845
|
-
const branchName = slotState.branch || this.buildBranchName(card);
|
|
846
|
-
const worktreePath = slotState.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR);
|
|
847
|
-
const cleanup = state.worktreeCleanup ?? [];
|
|
848
|
-
if (!cleanup.some((e) => e.branch === branchName)) {
|
|
849
|
-
cleanup.push({ branch: branchName, worktreePath, markedAt: new Date().toISOString() });
|
|
850
|
-
state.worktreeCleanup = cleanup;
|
|
851
|
-
}
|
|
852
|
-
writeState(this.ctx.paths.stateFile, state, 'pipeline-complete-release');
|
|
853
|
-
this.log.ok(`seq ${seq}: Slot ${slotName} released, worktree marked for cleanup`);
|
|
854
|
-
// 5. Stop worker session
|
|
855
|
-
if (sessionName) {
|
|
856
|
-
this.workerProvider.stop(sessionName).catch(() => { });
|
|
857
|
-
}
|
|
858
|
-
}
|
|
859
|
-
}
|
|
860
|
-
catch (err) {
|
|
861
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
862
|
-
this.log.error(`seq ${seq}: Failed to release resources: ${msg}`);
|
|
863
|
-
errors.push(`release: ${msg}`);
|
|
864
|
-
}
|
|
865
|
-
this.logEvent('complete-direct', seq, errors.length === 0 ? 'ok' : 'fail', { slotName });
|
|
866
|
-
if (this.notifier) {
|
|
867
|
-
const statusMsg = errors.length === 0
|
|
868
|
-
? `seq:${seq} completed — merged to ${this.ctx.mergeBranch}, resources released`
|
|
869
|
-
: `seq:${seq} completed with errors: ${errors.join('; ')}`;
|
|
870
|
-
await this.notifier.sendSuccess(`[${this.ctx.projectName}] ${statusMsg}`).catch(() => { });
|
|
871
|
-
}
|
|
872
|
-
return {
|
|
873
|
-
action: 'complete-direct',
|
|
874
|
-
entity: `seq:${seq}`,
|
|
875
|
-
result: errors.length === 0 ? 'ok' : 'fail',
|
|
876
|
-
message: errors.length === 0
|
|
877
|
-
? `Inprogress → Done (merged to ${this.ctx.mergeBranch}, resources released)`
|
|
878
|
-
: `Completed with errors: ${errors.join('; ')}`,
|
|
879
|
-
};
|
|
880
|
-
}
|
|
881
|
-
/**
|
|
882
|
-
* Release a worker slot, cleanup tmux session, remove card from active cards.
|
|
706
|
+
* Release a worker slot and remove card from active cards.
|
|
707
|
+
* Used for launch failure rollback.
|
|
883
708
|
*/
|
|
884
709
|
releaseSlot(slotName, seq) {
|
|
885
710
|
try {
|
|
886
|
-
// Kill tmux session if it exists (cleanup from failed launch)
|
|
887
|
-
const sessionName = `${this.ctx.projectName}-${slotName}`;
|
|
888
|
-
this.workerProvider.stop(sessionName).catch(() => { });
|
|
889
711
|
const state = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
890
712
|
if (state.workers[slotName]) {
|
|
891
713
|
state.workers[slotName] = {
|
|
@@ -911,192 +733,90 @@ ${requirements.join('\n')}`);
|
|
|
911
733
|
this.log.warn(`Failed to release slot ${slotName} for seq ${seq}`);
|
|
912
734
|
}
|
|
913
735
|
}
|
|
736
|
+
// ─── Skill Profile Loading (label-driven) ─────────────────────
|
|
914
737
|
/**
|
|
915
|
-
*
|
|
916
|
-
*
|
|
738
|
+
* Load skill profiles based on card labels (skill:xxx) or project default.
|
|
739
|
+
* Returns combined profile content for prompt injection.
|
|
917
740
|
*/
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
const sid = parser(launchResult.outputFile);
|
|
929
|
-
if (sid) {
|
|
930
|
-
const state = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
931
|
-
if (state.workers[slotName]?.pid === launchResult.pid) {
|
|
932
|
-
state.workers[slotName].sessionId = sid;
|
|
933
|
-
writeState(this.ctx.paths.stateFile, state, 'pipeline-session-id');
|
|
934
|
-
this.log.info(`Extracted session ID for ${sessionName}: ${sid.slice(0, 8)}...`);
|
|
935
|
-
}
|
|
936
|
-
}
|
|
741
|
+
loadSkillProfiles(card) {
|
|
742
|
+
// 1. Extract skill:xxx labels from card
|
|
743
|
+
let skills = card.labels
|
|
744
|
+
.filter(l => l.startsWith('skill:'))
|
|
745
|
+
.map(l => l.slice('skill:'.length));
|
|
746
|
+
// 2. Fallback to project default
|
|
747
|
+
if (skills.length === 0) {
|
|
748
|
+
const defaultSkills = this.ctx.config.raw.DEFAULT_WORKER_SKILLS;
|
|
749
|
+
if (defaultSkills) {
|
|
750
|
+
skills = defaultSkills.split(',').map(s => s.trim()).filter(Boolean);
|
|
937
751
|
}
|
|
938
|
-
catch { /* non-fatal */ }
|
|
939
|
-
}, 5_000);
|
|
940
|
-
}
|
|
941
|
-
/**
|
|
942
|
-
* Attempt to resume a failed/incomplete worker via --resume.
|
|
943
|
-
*
|
|
944
|
-
* Uses metaRead/metaWrite to track resumeAttempts per card.
|
|
945
|
-
* Max retries = WORKER_RESTART_LIMIT (default 2).
|
|
946
|
-
*
|
|
947
|
-
* Returns an ActionRecord if resume was initiated, or null if retries exhausted.
|
|
948
|
-
*/
|
|
949
|
-
async attemptResume(seq, slotName, slotState, card, reason) {
|
|
950
|
-
const maxRetries = this.ctx.config.WORKER_RESTART_LIMIT;
|
|
951
|
-
let meta;
|
|
952
|
-
try {
|
|
953
|
-
meta = await this.taskBackend.metaRead(seq);
|
|
954
|
-
}
|
|
955
|
-
catch {
|
|
956
|
-
meta = {};
|
|
957
|
-
}
|
|
958
|
-
const resumeAttempts = typeof meta.resumeAttempts === 'number' ? meta.resumeAttempts : 0;
|
|
959
|
-
if (resumeAttempts >= maxRetries) {
|
|
960
|
-
this.log.warn(`seq ${seq}: Resume retries exhausted (${resumeAttempts}/${maxRetries})`);
|
|
961
|
-
return null; // caller handles NEEDS-FIX
|
|
962
|
-
}
|
|
963
|
-
const session = slotState.tmuxSession;
|
|
964
|
-
const sessionId = slotState.sessionId;
|
|
965
|
-
if (!session || !sessionId) {
|
|
966
|
-
this.log.warn(`seq ${seq}: No session ID for resume`);
|
|
967
|
-
return null;
|
|
968
752
|
}
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
''
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
`4. git add, commit, and push to branch ${branch}`,
|
|
985
|
-
'5. Create and merge the MR by running: bash .jarvis/merge.sh',
|
|
986
|
-
'6. Say "done" when finished',
|
|
987
|
-
'',
|
|
988
|
-
'IMPORTANT: Step 5 (bash .jarvis/merge.sh) is MANDATORY. Do NOT skip it.',
|
|
989
|
-
].join('\n');
|
|
990
|
-
const resumeResult = await this.workerProvider.sendFix(session, continuePrompt, sessionId);
|
|
991
|
-
// Update state with new process info
|
|
992
|
-
if (resumeResult && typeof resumeResult === 'object' && 'pid' in resumeResult) {
|
|
993
|
-
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
994
|
-
if (freshState.workers[slotName]) {
|
|
995
|
-
freshState.workers[slotName].pid = resumeResult.pid;
|
|
996
|
-
freshState.workers[slotName].outputFile = resumeResult.outputFile;
|
|
997
|
-
if (resumeResult.sessionId) {
|
|
998
|
-
freshState.workers[slotName].sessionId = resumeResult.sessionId;
|
|
999
|
-
}
|
|
1000
|
-
freshState.workers[slotName].exitCode = null;
|
|
1001
|
-
freshState.workers[slotName].lastHeartbeat = new Date().toISOString();
|
|
1002
|
-
writeState(this.ctx.paths.stateFile, freshState, 'pipeline-resume');
|
|
1003
|
-
}
|
|
753
|
+
if (skills.length === 0)
|
|
754
|
+
return '';
|
|
755
|
+
// 3. Load profile files
|
|
756
|
+
const frameworkDir = this.ctx.config.raw.FRAMEWORK_DIR
|
|
757
|
+
|| resolve(process.env.HOME || '~', 'jarvis-skills');
|
|
758
|
+
const profilesDir = resolve(frameworkDir, 'skills', 'worker-profiles');
|
|
759
|
+
const sections = ['# Skill Profiles'];
|
|
760
|
+
for (const skill of skills) {
|
|
761
|
+
const filePath = resolve(profilesDir, `${skill}.md`);
|
|
762
|
+
if (existsSync(filePath)) {
|
|
763
|
+
const content = readFileSync(filePath, 'utf-8').trim();
|
|
764
|
+
// Strip YAML frontmatter
|
|
765
|
+
const body = content.replace(/^---[\s\S]*?---\s*/, '');
|
|
766
|
+
sections.push(body);
|
|
767
|
+
this.log.ok(`Loaded skill profile: ${skill}`);
|
|
1004
768
|
}
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
...meta,
|
|
1008
|
-
resumeAttempts: resumeAttempts + 1,
|
|
1009
|
-
});
|
|
1010
|
-
this.log.info(`seq ${seq}: Resumed worker (attempt ${resumeAttempts + 1}/${maxRetries}), reason: ${reason}`);
|
|
1011
|
-
if (this.notifier) {
|
|
1012
|
-
await this.notifier.send(`[${this.ctx.projectName}] seq:${seq} worker resumed (${resumeAttempts + 1}/${maxRetries}): ${reason}`, 'info').catch(() => { });
|
|
769
|
+
else {
|
|
770
|
+
this.log.warn(`Skill profile not found: ${filePath}`);
|
|
1013
771
|
}
|
|
1014
|
-
this.logEvent('resume', seq, 'ok', { attempt: resumeAttempts + 1, max: maxRetries, reason });
|
|
1015
|
-
return {
|
|
1016
|
-
action: 'resume',
|
|
1017
|
-
entity: `seq:${seq}`,
|
|
1018
|
-
result: 'ok',
|
|
1019
|
-
message: `Worker resumed (${resumeAttempts + 1}/${maxRetries}): ${reason}`,
|
|
1020
|
-
};
|
|
1021
|
-
}
|
|
1022
|
-
catch (err) {
|
|
1023
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
1024
|
-
this.log.error(`seq ${seq}: Resume failed: ${msg}`);
|
|
1025
|
-
return null; // caller handles NEEDS-FIX
|
|
1026
772
|
}
|
|
773
|
+
return sections.length > 1 ? sections.join('\n\n') : '';
|
|
1027
774
|
}
|
|
775
|
+
// ─── Project Knowledge Loading (truncated) ────────────────────
|
|
1028
776
|
/**
|
|
1029
|
-
*
|
|
1030
|
-
*
|
|
777
|
+
* Load recent project knowledge from docs/DECISIONS.md and docs/CHANGELOG.md.
|
|
778
|
+
* Truncates to recent entries to keep prompt size manageable.
|
|
1031
779
|
*/
|
|
1032
|
-
|
|
1033
|
-
const
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
meta = {};
|
|
1045
|
-
}
|
|
1046
|
-
const mergeAttempts = typeof meta.mergeResumeAttempts === 'number' ? meta.mergeResumeAttempts : 0;
|
|
1047
|
-
if (mergeAttempts >= maxRetries) {
|
|
1048
|
-
this.log.warn(`seq ${seq}: Merge resume retries exhausted (${mergeAttempts}/${maxRetries})`);
|
|
1049
|
-
return null; // fall through to system fallback
|
|
780
|
+
loadProjectKnowledge(worktreePath) {
|
|
781
|
+
const sections = ['# Project Knowledge (from previous tasks)'];
|
|
782
|
+
let hasContent = false;
|
|
783
|
+
// Recent decisions (last 10 sections)
|
|
784
|
+
const decisionsPath = resolve(worktreePath, 'docs', 'DECISIONS.md');
|
|
785
|
+
if (existsSync(decisionsPath)) {
|
|
786
|
+
const content = readFileSync(decisionsPath, 'utf-8');
|
|
787
|
+
const recent = this.extractRecentSections(content, 10);
|
|
788
|
+
if (recent) {
|
|
789
|
+
sections.push('## Recent Decisions\n' + recent);
|
|
790
|
+
hasContent = true;
|
|
791
|
+
}
|
|
1050
792
|
}
|
|
1051
|
-
|
|
1052
|
-
const
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
'',
|
|
1060
|
-
'Run this ONE command to create and merge the MR:',
|
|
1061
|
-
'',
|
|
1062
|
-
' bash .jarvis/merge.sh',
|
|
1063
|
-
'',
|
|
1064
|
-
'Then say "done".',
|
|
1065
|
-
].join('\n');
|
|
1066
|
-
try {
|
|
1067
|
-
const resumeResult = await this.workerProvider.sendFix(session, mergePrompt, sessionId);
|
|
1068
|
-
if (resumeResult && typeof resumeResult === 'object' && 'pid' in resumeResult) {
|
|
1069
|
-
const freshState = readState(this.ctx.paths.stateFile, this.ctx.maxWorkers);
|
|
1070
|
-
if (freshState.workers[slotName]) {
|
|
1071
|
-
freshState.workers[slotName].pid = resumeResult.pid;
|
|
1072
|
-
freshState.workers[slotName].outputFile = resumeResult.outputFile;
|
|
1073
|
-
if (resumeResult.sessionId) {
|
|
1074
|
-
freshState.workers[slotName].sessionId = resumeResult.sessionId;
|
|
1075
|
-
}
|
|
1076
|
-
freshState.workers[slotName].exitCode = null;
|
|
1077
|
-
freshState.workers[slotName].lastHeartbeat = new Date().toISOString();
|
|
1078
|
-
writeState(this.ctx.paths.stateFile, freshState, 'pipeline-merge-resume');
|
|
1079
|
-
}
|
|
793
|
+
// Recent changelog (last 5 sections)
|
|
794
|
+
const changelogPath = resolve(worktreePath, 'docs', 'CHANGELOG.md');
|
|
795
|
+
if (existsSync(changelogPath)) {
|
|
796
|
+
const content = readFileSync(changelogPath, 'utf-8');
|
|
797
|
+
const recent = this.extractRecentSections(content, 5);
|
|
798
|
+
if (recent) {
|
|
799
|
+
sections.push('## Recent Changes\n' + recent);
|
|
800
|
+
hasContent = true;
|
|
1080
801
|
}
|
|
1081
|
-
// Increment merge resume counter
|
|
1082
|
-
await this.taskBackend.metaWrite(seq, {
|
|
1083
|
-
...meta,
|
|
1084
|
-
mergeResumeAttempts: mergeAttempts + 1,
|
|
1085
|
-
});
|
|
1086
|
-
this.log.info(`seq ${seq}: Resumed worker to create/merge MR (attempt ${mergeAttempts + 1}/${maxRetries})`);
|
|
1087
|
-
this.logEvent('merge-resume', seq, 'ok', { branch, attempt: mergeAttempts + 1 });
|
|
1088
|
-
return {
|
|
1089
|
-
action: 'merge-resume',
|
|
1090
|
-
entity: `seq:${seq}`,
|
|
1091
|
-
result: 'ok',
|
|
1092
|
-
message: `Worker resumed to create/merge MR (${mergeAttempts + 1}/${maxRetries})`,
|
|
1093
|
-
};
|
|
1094
802
|
}
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
803
|
+
return hasContent ? sections.join('\n\n') : '';
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Extract the last N ## sections from a markdown file.
|
|
807
|
+
*/
|
|
808
|
+
extractRecentSections(content, maxSections) {
|
|
809
|
+
const lines = content.split('\n');
|
|
810
|
+
const sectionStarts = [];
|
|
811
|
+
for (let i = 0; i < lines.length; i++) {
|
|
812
|
+
if (lines[i].startsWith('## ')) {
|
|
813
|
+
sectionStarts.push(i);
|
|
814
|
+
}
|
|
1099
815
|
}
|
|
816
|
+
if (sectionStarts.length === 0)
|
|
817
|
+
return content.trim();
|
|
818
|
+
const start = sectionStarts[Math.max(0, sectionStarts.length - maxSections)];
|
|
819
|
+
return lines.slice(start).join('\n').trim();
|
|
1100
820
|
}
|
|
1101
821
|
logEvent(action, seq, result, meta) {
|
|
1102
822
|
this.log.event({
|