@coralai/sps-cli 0.23.22 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -2
- package/dist/commands/cardDashboard.js +3 -3
- package/dist/commands/cardDashboard.js.map +1 -1
- package/dist/commands/pipelineTick.d.ts.map +1 -1
- package/dist/commands/pipelineTick.js +17 -5
- package/dist/commands/pipelineTick.js.map +1 -1
- package/dist/commands/qaTick.d.ts.map +1 -1
- package/dist/commands/qaTick.js +31 -3
- package/dist/commands/qaTick.js.map +1 -1
- package/dist/commands/status.d.ts.map +1 -1
- package/dist/commands/status.js +2 -5
- package/dist/commands/status.js.map +1 -1
- package/dist/commands/tick.d.ts.map +1 -1
- package/dist/commands/tick.js +50 -33
- package/dist/commands/tick.js.map +1 -1
- package/dist/commands/workerDashboard.d.ts.map +1 -1
- package/dist/commands/workerDashboard.js +9 -9
- package/dist/commands/workerDashboard.js.map +1 -1
- package/dist/commands/workerLaunch.d.ts.map +1 -1
- package/dist/commands/workerLaunch.js +17 -5
- package/dist/commands/workerLaunch.js.map +1 -1
- package/dist/core/acpState.js +1 -1
- package/dist/core/acpState.js.map +1 -1
- package/dist/core/runtimeSnapshot.d.ts +1 -0
- package/dist/core/runtimeSnapshot.d.ts.map +1 -1
- package/dist/core/runtimeSnapshot.js +6 -6
- package/dist/core/runtimeSnapshot.js.map +1 -1
- package/dist/core/runtimeStore.d.ts +23 -1
- package/dist/core/runtimeStore.d.ts.map +1 -1
- package/dist/core/runtimeStore.js +71 -32
- package/dist/core/runtimeStore.js.map +1 -1
- package/dist/core/state.d.ts +33 -0
- package/dist/core/state.d.ts.map +1 -1
- package/dist/core/state.js +6 -0
- package/dist/core/state.js.map +1 -1
- package/dist/core/workerRuntimeSummary.d.ts +1 -2
- package/dist/core/workerRuntimeSummary.d.ts.map +1 -1
- package/dist/core/workerRuntimeSummary.js +2 -2
- package/dist/core/workerRuntimeSummary.js.map +1 -1
- package/dist/engines/CloseoutEngine.d.ts +3 -6
- package/dist/engines/CloseoutEngine.d.ts.map +1 -1
- package/dist/engines/CloseoutEngine.js +112 -288
- package/dist/engines/CloseoutEngine.js.map +1 -1
- package/dist/engines/EventHandler.d.ts +57 -0
- package/dist/engines/EventHandler.d.ts.map +1 -0
- package/dist/engines/EventHandler.js +210 -0
- package/dist/engines/EventHandler.js.map +1 -0
- package/dist/engines/ExecutionEngine.d.ts +5 -17
- package/dist/engines/ExecutionEngine.d.ts.map +1 -1
- package/dist/engines/ExecutionEngine.js +108 -367
- package/dist/engines/ExecutionEngine.js.map +1 -1
- package/dist/engines/MonitorEngine.d.ts.map +1 -1
- package/dist/engines/MonitorEngine.js +8 -9
- package/dist/engines/MonitorEngine.js.map +1 -1
- package/dist/manager/integration-queue.d.ts +65 -0
- package/dist/manager/integration-queue.d.ts.map +1 -0
- package/dist/manager/integration-queue.js +123 -0
- package/dist/manager/integration-queue.js.map +1 -0
- package/dist/manager/runtime-coordinator.d.ts +1 -3
- package/dist/manager/runtime-coordinator.d.ts.map +1 -1
- package/dist/manager/runtime-coordinator.js +13 -15
- package/dist/manager/runtime-coordinator.js.map +1 -1
- package/dist/manager/worker-manager-impl.d.ts +81 -0
- package/dist/manager/worker-manager-impl.d.ts.map +1 -0
- package/dist/manager/worker-manager-impl.js +648 -0
- package/dist/manager/worker-manager-impl.js.map +1 -0
- package/dist/manager/worker-manager.d.ts +176 -0
- package/dist/manager/worker-manager.d.ts.map +1 -0
- package/dist/manager/worker-manager.js +12 -0
- package/dist/manager/worker-manager.js.map +1 -0
- package/dist/models/acp.d.ts +4 -0
- package/dist/models/acp.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -4,29 +4,23 @@ import { RuntimeStore } from '../core/runtimeStore.js';
|
|
|
4
4
|
import { resolveGitlabProjectId, resolveWorkflowTransport } from '../core/config.js';
|
|
5
5
|
import { resolveWorktreePath } from '../core/paths.js';
|
|
6
6
|
import { readQueue } from '../core/queue.js';
|
|
7
|
-
import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE,
|
|
7
|
+
import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE, } from '../core/taskPrompts.js';
|
|
8
8
|
import { Logger } from '../core/logger.js';
|
|
9
9
|
const SKIP_LABELS = ['BLOCKED', 'NEEDS-FIX', 'CONFLICT', 'WAITING-CONFIRMATION', 'STALE-RUNTIME'];
|
|
10
10
|
export class ExecutionEngine {
|
|
11
11
|
ctx;
|
|
12
12
|
taskBackend;
|
|
13
13
|
repoBackend;
|
|
14
|
-
|
|
15
|
-
completionJudge;
|
|
16
|
-
postActions;
|
|
17
|
-
resourceLimiter;
|
|
14
|
+
workerManager;
|
|
18
15
|
notifier;
|
|
19
16
|
agentRuntime;
|
|
20
17
|
log;
|
|
21
18
|
runtimeStore;
|
|
22
|
-
constructor(ctx, taskBackend, repoBackend,
|
|
19
|
+
constructor(ctx, taskBackend, repoBackend, workerManager, notifier, agentRuntime) {
|
|
23
20
|
this.ctx = ctx;
|
|
24
21
|
this.taskBackend = taskBackend;
|
|
25
22
|
this.repoBackend = repoBackend;
|
|
26
|
-
this.
|
|
27
|
-
this.completionJudge = completionJudge;
|
|
28
|
-
this.postActions = postActions;
|
|
29
|
-
this.resourceLimiter = resourceLimiter;
|
|
23
|
+
this.workerManager = workerManager;
|
|
30
24
|
this.notifier = notifier;
|
|
31
25
|
this.agentRuntime = agentRuntime;
|
|
32
26
|
this.log = new Logger('pipeline', ctx.projectName, ctx.paths.logsDir);
|
|
@@ -298,15 +292,10 @@ export class ExecutionEngine {
|
|
|
298
292
|
// Slot already released (PostActions handled it via exit callback)
|
|
299
293
|
return null;
|
|
300
294
|
}
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
return this.checkAcpInprogressCard(card, slotName);
|
|
306
|
-
}
|
|
307
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
|
|
308
|
-
const handle = this.supervisor.get(workerId);
|
|
309
|
-
if (handle && handle.exitCode === null) {
|
|
295
|
+
// Use WorkerManager.inspect() to check worker state
|
|
296
|
+
const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
|
|
297
|
+
const snapshot = snapshots[0];
|
|
298
|
+
if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
|
|
310
299
|
// Worker still running — update heartbeat
|
|
311
300
|
try {
|
|
312
301
|
this.runtimeStore.updateState('pipeline-heartbeat', (freshState) => {
|
|
@@ -318,25 +307,28 @@ export class ExecutionEngine {
|
|
|
318
307
|
catch { /* non-fatal */ }
|
|
319
308
|
return null;
|
|
320
309
|
}
|
|
321
|
-
if (
|
|
322
|
-
// Worker
|
|
323
|
-
|
|
324
|
-
const freshState = this.runtimeStore.readState();
|
|
325
|
-
if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
|
|
326
|
-
this.log.ok(`seq ${seq}: Completed (handled by exit callback)`);
|
|
327
|
-
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via exit callback' };
|
|
328
|
-
}
|
|
329
|
-
// PostActions still processing, wait for next tick
|
|
310
|
+
if (snapshot && (snapshot.state === 'waiting_input' || snapshot.state === 'needs_confirmation')) {
|
|
311
|
+
// Worker waiting for input — log and wait
|
|
312
|
+
this.log.info(`seq ${seq}: worker in state ${snapshot.state}`);
|
|
330
313
|
return null;
|
|
331
314
|
}
|
|
332
|
-
|
|
333
|
-
|
|
315
|
+
if (snapshot && snapshot.state === 'completed') {
|
|
316
|
+
// WM exit callback handled completion
|
|
317
|
+
this.log.ok(`seq ${seq}: Completed (handled by WM exit callback)`);
|
|
318
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via WM exit callback' };
|
|
319
|
+
}
|
|
320
|
+
if (snapshot && snapshot.state === 'failed') {
|
|
321
|
+
// WM exit callback handled failure
|
|
322
|
+
this.log.info(`seq ${seq}: Failed (handled by WM exit callback)`);
|
|
323
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'fail', message: 'Failed via WM exit callback' };
|
|
324
|
+
}
|
|
325
|
+
// No snapshot found — WM already processed and released the slot
|
|
334
326
|
const freshState = this.runtimeStore.readState();
|
|
335
327
|
if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
|
|
336
|
-
this.log.ok(`seq ${seq}: Completed (
|
|
337
|
-
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (
|
|
328
|
+
this.log.ok(`seq ${seq}: Completed (WM already processed)`);
|
|
329
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (WM processed)' };
|
|
338
330
|
}
|
|
339
|
-
// Still active in state but
|
|
331
|
+
// Still active in state but no snapshot — MonitorEngine/Recovery handles
|
|
340
332
|
return null;
|
|
341
333
|
}
|
|
342
334
|
// ─── Prepare Phase (Backlog → Todo) ─────────────────────────────
|
|
@@ -407,89 +399,15 @@ export class ExecutionEngine {
|
|
|
407
399
|
this.log.info(`[dry-run] Would launch seq ${seq}`);
|
|
408
400
|
return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: 'dry-run' };
|
|
409
401
|
}
|
|
410
|
-
// Step
|
|
411
|
-
// Exclude slots that failed launch this tick to prevent repeated failures
|
|
412
|
-
const state = this.runtimeStore.readState();
|
|
413
|
-
const idleSlots = Object.entries(state.workers)
|
|
414
|
-
.filter(([name, w]) => w.status === 'idle' && !failedSlots.has(name));
|
|
415
|
-
if (idleSlots.length === 0) {
|
|
416
|
-
this.log.warn(`No idle worker slot available for seq ${seq}`);
|
|
417
|
-
return { action: 'launch', entity: `seq:${seq}`, result: 'skip', message: 'No idle worker slot' };
|
|
418
|
-
}
|
|
419
|
-
const [slotName] = idleSlots[0];
|
|
420
|
-
const sessionName = `${this.ctx.projectName}-${slotName}`;
|
|
421
|
-
// Claim slot in state.json
|
|
422
|
-
state.workers[slotName] = {
|
|
423
|
-
status: 'active',
|
|
424
|
-
seq: parseInt(seq, 10),
|
|
425
|
-
branch: branchName,
|
|
426
|
-
worktree: worktreePath,
|
|
427
|
-
tmuxSession: sessionName,
|
|
428
|
-
claimedAt: new Date().toISOString(),
|
|
429
|
-
lastHeartbeat: new Date().toISOString(),
|
|
430
|
-
mode: workflowTransport === 'proc'
|
|
431
|
-
? this.ctx.config.WORKER_MODE
|
|
432
|
-
: workflowTransport,
|
|
433
|
-
transport: workflowTransport,
|
|
434
|
-
agent: (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL),
|
|
435
|
-
sessionId: null,
|
|
436
|
-
runId: null,
|
|
437
|
-
sessionState: null,
|
|
438
|
-
remoteStatus: null,
|
|
439
|
-
lastEventAt: null,
|
|
440
|
-
pid: null,
|
|
441
|
-
outputFile: null,
|
|
442
|
-
exitCode: null,
|
|
443
|
-
mergeRetries: 0,
|
|
444
|
-
completedAt: null,
|
|
445
|
-
};
|
|
446
|
-
// Add to active cards
|
|
447
|
-
const conflictDomains = card.labels
|
|
448
|
-
.filter((l) => l.startsWith('conflict:'))
|
|
449
|
-
.map((l) => l.slice('conflict:'.length));
|
|
450
|
-
state.activeCards[seq] = {
|
|
451
|
-
seq: parseInt(seq, 10),
|
|
452
|
-
state: 'Todo',
|
|
453
|
-
worker: slotName,
|
|
454
|
-
mrUrl: null,
|
|
455
|
-
conflictDomains,
|
|
456
|
-
startedAt: new Date().toISOString(),
|
|
457
|
-
};
|
|
458
|
-
state.leases[seq] = {
|
|
459
|
-
seq: parseInt(seq, 10),
|
|
460
|
-
pmStateObserved: card.state,
|
|
461
|
-
phase: 'preparing',
|
|
462
|
-
slot: slotName,
|
|
463
|
-
branch: branchName,
|
|
464
|
-
worktree: worktreePath,
|
|
465
|
-
sessionId: null,
|
|
466
|
-
runId: null,
|
|
467
|
-
claimedAt: state.workers[slotName].claimedAt,
|
|
468
|
-
retryCount: 0,
|
|
469
|
-
lastTransitionAt: new Date().toISOString(),
|
|
470
|
-
};
|
|
471
|
-
try {
|
|
472
|
-
this.runtimeStore.updateState('pipeline-launch', (draft) => {
|
|
473
|
-
draft.workers[slotName] = state.workers[slotName];
|
|
474
|
-
draft.activeCards[seq] = state.activeCards[seq];
|
|
475
|
-
draft.leases[seq] = state.leases[seq];
|
|
476
|
-
});
|
|
477
|
-
this.log.ok(`Step 4: Claimed slot ${slotName} for seq ${seq}`);
|
|
478
|
-
}
|
|
479
|
-
catch (err) {
|
|
480
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
481
|
-
this.log.error(`Step 4 failed (claim) for seq ${seq}: ${msg}`);
|
|
482
|
-
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Claim slot failed: ${msg}` };
|
|
483
|
-
}
|
|
484
|
-
// Also claim in PM backend
|
|
402
|
+
// Step 5: PM claim (kept in Engine — PM backend awareness)
|
|
485
403
|
try {
|
|
486
|
-
await this.taskBackend.claim(seq,
|
|
404
|
+
await this.taskBackend.claim(seq, `pending-wm`);
|
|
487
405
|
}
|
|
488
406
|
catch (err) {
|
|
489
407
|
const msg = err instanceof Error ? err.message : String(err);
|
|
490
408
|
this.log.warn(`PM claim for seq ${seq} failed (non-fatal): ${msg}`);
|
|
491
409
|
}
|
|
492
|
-
// Step
|
|
410
|
+
// Step 5b: Build task context (.sps/development_prompt.txt + .sps/integration_prompt.txt)
|
|
493
411
|
try {
|
|
494
412
|
this.buildTaskContext(card, worktreePath);
|
|
495
413
|
this.log.ok(`Step 5: Task context built for seq ${seq}`);
|
|
@@ -497,117 +415,61 @@ export class ExecutionEngine {
|
|
|
497
415
|
catch (err) {
|
|
498
416
|
const msg = err instanceof Error ? err.message : String(err);
|
|
499
417
|
this.log.error(`Step 5 failed (context) for seq ${seq}: ${msg}`);
|
|
500
|
-
this.releaseSlot(slotName, seq);
|
|
501
418
|
this.logEvent('launch-context', seq, 'fail', { error: msg });
|
|
502
419
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Context build failed: ${msg}` };
|
|
503
420
|
}
|
|
504
|
-
// Step 6: Launch worker via
|
|
421
|
+
// Step 6: Launch worker via WorkerManager.run()
|
|
422
|
+
const logsDir = this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`;
|
|
423
|
+
const promptFile = resolve(worktreePath, '.sps', LEGACY_TASK_PROMPT_FILE);
|
|
424
|
+
let prompt;
|
|
505
425
|
try {
|
|
506
|
-
|
|
507
|
-
// Check global resource limit
|
|
508
|
-
const acquire = this.resourceLimiter.tryAcquireDetailed();
|
|
509
|
-
if (!acquire.acquired) {
|
|
510
|
-
const reason = this.resourceLimiter.formatBlockReason(acquire.stats);
|
|
511
|
-
this.log.warn(`Global resource limit reached, skipping seq ${seq}: ${reason}`);
|
|
512
|
-
// Rollback: release slot
|
|
513
|
-
this.releaseSlot(slotName, seq);
|
|
514
|
-
return {
|
|
515
|
-
action: 'launch',
|
|
516
|
-
entity: `seq:${seq}`,
|
|
517
|
-
result: 'skip',
|
|
518
|
-
message: `Global resource limit reached: ${reason}`,
|
|
519
|
-
};
|
|
520
|
-
}
|
|
521
|
-
await this.resourceLimiter.enforceStagger();
|
|
522
|
-
const prompt = readFileSync(promptFile, 'utf-8').trim();
|
|
523
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
524
|
-
if (workflowTransport !== 'proc') {
|
|
525
|
-
const runtime = this.requireAgentRuntime();
|
|
526
|
-
const session = await runtime.startRun(slotName, prompt, (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL), worktreePath);
|
|
527
|
-
this.runtimeStore.updateState('pipeline-launch-acp', (freshState) => {
|
|
528
|
-
if (freshState.workers[slotName]) {
|
|
529
|
-
this.applyAcpSessionToSlot(freshState.workers[slotName], session);
|
|
530
|
-
if (freshState.leases[seq]) {
|
|
531
|
-
freshState.leases[seq].sessionId = session.sessionId;
|
|
532
|
-
freshState.leases[seq].runId = session.currentRun?.runId || null;
|
|
533
|
-
freshState.leases[seq].phase = session.pendingInput ? 'waiting_confirmation' : 'coding';
|
|
534
|
-
freshState.leases[seq].lastTransitionAt = new Date().toISOString();
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
});
|
|
538
|
-
this.supervisor.registerAcpHandle({
|
|
539
|
-
id: workerId,
|
|
540
|
-
pid: null,
|
|
541
|
-
outputFile: null,
|
|
542
|
-
project: this.ctx.projectName,
|
|
543
|
-
seq: card.seq,
|
|
544
|
-
slot: slotName,
|
|
545
|
-
branch: branchName,
|
|
546
|
-
worktree: worktreePath,
|
|
547
|
-
tool: session.tool,
|
|
548
|
-
exitCode: null,
|
|
549
|
-
sessionId: session.sessionId,
|
|
550
|
-
runId: session.currentRun?.runId || null,
|
|
551
|
-
sessionState: session.sessionState,
|
|
552
|
-
remoteStatus: session.currentRun?.status || null,
|
|
553
|
-
lastEventAt: session.lastSeenAt,
|
|
554
|
-
startedAt: new Date().toISOString(),
|
|
555
|
-
exitedAt: null,
|
|
556
|
-
});
|
|
557
|
-
this.log.ok(`Step 6: ${workflowTransport.toUpperCase()} worker launched for seq ${seq} ` +
|
|
558
|
-
`(session=${session.sessionId}, run=${session.currentRun?.runId || 'none'})`);
|
|
559
|
-
}
|
|
560
|
-
else {
|
|
561
|
-
const outputFile = resolve(this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`, `${sessionName}-${Date.now()}.jsonl`);
|
|
562
|
-
const workerHandle = this.supervisor.spawn({
|
|
563
|
-
id: workerId,
|
|
564
|
-
project: this.ctx.projectName,
|
|
565
|
-
seq: card.seq,
|
|
566
|
-
slot: slotName,
|
|
567
|
-
worktree: worktreePath,
|
|
568
|
-
branch: branchName,
|
|
569
|
-
prompt,
|
|
570
|
-
outputFile,
|
|
571
|
-
tool: this.ctx.config.WORKER_TOOL,
|
|
572
|
-
onExit: (exitCode) => {
|
|
573
|
-
this.onWorkerExit(workerId, card, slotName, worktreePath, branchName, exitCode);
|
|
574
|
-
},
|
|
575
|
-
});
|
|
576
|
-
// Store process info in state
|
|
577
|
-
this.runtimeStore.updateState('pipeline-launch-print', (freshState) => {
|
|
578
|
-
if (freshState.workers[slotName]) {
|
|
579
|
-
freshState.workers[slotName].mode = 'print';
|
|
580
|
-
freshState.workers[slotName].transport = 'proc';
|
|
581
|
-
freshState.workers[slotName].agent = this.ctx.config.WORKER_TOOL;
|
|
582
|
-
freshState.workers[slotName].pid = workerHandle.pid;
|
|
583
|
-
freshState.workers[slotName].outputFile = workerHandle.outputFile;
|
|
584
|
-
freshState.workers[slotName].sessionId = workerHandle.sessionId || null;
|
|
585
|
-
freshState.workers[slotName].runId = null;
|
|
586
|
-
freshState.workers[slotName].sessionState = null;
|
|
587
|
-
freshState.workers[slotName].remoteStatus = null;
|
|
588
|
-
freshState.workers[slotName].lastEventAt = null;
|
|
589
|
-
freshState.workers[slotName].exitCode = null;
|
|
590
|
-
if (freshState.leases[seq]) {
|
|
591
|
-
freshState.leases[seq].phase = 'coding';
|
|
592
|
-
freshState.leases[seq].lastTransitionAt = new Date().toISOString();
|
|
593
|
-
}
|
|
594
|
-
}
|
|
595
|
-
});
|
|
596
|
-
this.log.ok(`Step 6: Worker launched for seq ${seq} (pid=${workerHandle.pid})`);
|
|
597
|
-
}
|
|
598
|
-
if (this.notifier) {
|
|
599
|
-
await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
|
|
600
|
-
}
|
|
426
|
+
prompt = readFileSync(promptFile, 'utf-8').trim();
|
|
601
427
|
}
|
|
602
428
|
catch (err) {
|
|
603
429
|
const msg = err instanceof Error ? err.message : String(err);
|
|
604
|
-
this.log.error(`
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
430
|
+
this.log.error(`Failed to read prompt file for seq ${seq}: ${msg}`);
|
|
431
|
+
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Prompt file read failed: ${msg}` };
|
|
432
|
+
}
|
|
433
|
+
const runRequest = {
|
|
434
|
+
taskId: String(card.seq),
|
|
435
|
+
cardId: String(card.seq),
|
|
436
|
+
project: this.ctx.projectName,
|
|
437
|
+
phase: 'development',
|
|
438
|
+
prompt,
|
|
439
|
+
cwd: worktreePath,
|
|
440
|
+
branch: branchName,
|
|
441
|
+
targetBranch: this.ctx.mergeBranch,
|
|
442
|
+
tool: this.ctx.config.WORKER_TOOL,
|
|
443
|
+
transport: workflowTransport,
|
|
444
|
+
outputFile: resolve(logsDir, `${this.ctx.projectName}-worker-${card.seq}-${Date.now()}.jsonl`),
|
|
445
|
+
timeoutSec: this.ctx.config.WORKER_LAUNCH_TIMEOUT_S,
|
|
446
|
+
maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
|
|
447
|
+
};
|
|
448
|
+
let response;
|
|
449
|
+
try {
|
|
450
|
+
response = await this.workerManager.run(runRequest);
|
|
451
|
+
}
|
|
452
|
+
catch (err) {
|
|
453
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
454
|
+
this.log.error(`Step 6 failed (WM.run) for seq ${seq}: ${msg}`);
|
|
455
|
+
failedSlots.add(`wm-error-${seq}`);
|
|
608
456
|
this.logEvent('launch-worker', seq, 'fail', { error: msg });
|
|
609
457
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Worker launch failed: ${msg}` };
|
|
610
458
|
}
|
|
459
|
+
if (!response.accepted) {
|
|
460
|
+
this.log.warn(`WM rejected seq ${seq}: ${response.rejectReason}`);
|
|
461
|
+
return {
|
|
462
|
+
action: 'launch',
|
|
463
|
+
entity: `seq:${seq}`,
|
|
464
|
+
result: response.rejectReason === 'resource_exhausted' ? 'skip' : 'fail',
|
|
465
|
+
message: `WM rejected: ${response.rejectReason}`,
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
const slotName = response.slot;
|
|
469
|
+
this.log.ok(`Step 6: WM launched worker for seq ${seq} (slot=${slotName}, pid=${response.pid ?? 'n/a'})`);
|
|
470
|
+
if (this.notifier) {
|
|
471
|
+
await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
|
|
472
|
+
}
|
|
611
473
|
// Step 7: Move card to Inprogress
|
|
612
474
|
try {
|
|
613
475
|
await this.taskBackend.move(seq, 'Inprogress');
|
|
@@ -625,107 +487,72 @@ export class ExecutionEngine {
|
|
|
625
487
|
}
|
|
626
488
|
});
|
|
627
489
|
this.log.ok(`Step 7: Moved seq ${seq} Todo → Inprogress`);
|
|
628
|
-
this.logEvent('launch', seq, 'ok', { worker: slotName
|
|
490
|
+
this.logEvent('launch', seq, 'ok', { worker: slotName });
|
|
629
491
|
return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: `Todo → Inprogress (${slotName})` };
|
|
630
492
|
}
|
|
631
493
|
catch (err) {
|
|
632
494
|
const msg = err instanceof Error ? err.message : String(err);
|
|
633
495
|
this.log.error(`Step 7 failed (move) for seq ${seq}: ${msg}`);
|
|
634
|
-
// Rollback:
|
|
635
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
496
|
+
// Rollback: cancel worker via WM (handles kill + resource release)
|
|
636
497
|
try {
|
|
637
|
-
|
|
638
|
-
await this.agentRuntime.stopSession(slotName);
|
|
639
|
-
}
|
|
640
|
-
else {
|
|
641
|
-
await this.supervisor.kill(workerId);
|
|
642
|
-
}
|
|
498
|
+
await this.workerManager.cancel({ taskId: String(card.seq), project: this.ctx.projectName, reason: 'anomaly' });
|
|
643
499
|
}
|
|
644
500
|
catch { /* best effort */ }
|
|
645
|
-
this.supervisor.remove(workerId);
|
|
646
|
-
this.resourceLimiter.release();
|
|
647
501
|
this.releaseSlot(slotName, seq);
|
|
648
502
|
this.logEvent('launch-move', seq, 'fail', { error: msg });
|
|
649
503
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Move to Inprogress failed: ${msg}` };
|
|
650
504
|
}
|
|
651
505
|
}
|
|
652
|
-
// ─── Worker Exit Callback ───────────────────────────────────────
|
|
653
506
|
/**
|
|
654
|
-
*
|
|
655
|
-
*
|
|
507
|
+
* @deprecated Phase 1 transitional — WM's internal exit callback handles ACP inspection.
|
|
508
|
+
* Kept for edge-case fallback; will be removed when WM fully owns ACP lifecycle.
|
|
656
509
|
*/
|
|
657
|
-
async onWorkerExit(workerId, card, slotName, worktree, branch, exitCode) {
|
|
658
|
-
const handle = this.supervisor.get(workerId);
|
|
659
|
-
await this.handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle || null, 'proc');
|
|
660
|
-
}
|
|
661
510
|
async checkAcpInprogressCard(card, slotName) {
|
|
662
|
-
const runtime = this.requireAgentRuntime();
|
|
663
511
|
const seq = card.seq;
|
|
664
|
-
const inspected = await runtime.inspect(slotName);
|
|
665
|
-
const session = inspected.sessions[slotName];
|
|
666
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
|
|
667
512
|
const state = this.runtimeStore.readState();
|
|
668
513
|
const slot = state.workers[slotName];
|
|
669
514
|
if (!slot)
|
|
670
515
|
return null;
|
|
671
|
-
|
|
516
|
+
// Use WorkerManager.inspect() for normalized worker state
|
|
517
|
+
const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
|
|
518
|
+
const snapshot = snapshots[0];
|
|
519
|
+
if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
|
|
520
|
+
// Worker still active — update heartbeat
|
|
672
521
|
this.runtimeStore.updateState('pipeline-acp-heartbeat', (freshState) => {
|
|
673
522
|
const freshSlot = freshState.workers[slotName];
|
|
674
523
|
if (freshSlot) {
|
|
675
|
-
this.applyAcpSessionToSlot(freshSlot, session);
|
|
676
524
|
freshSlot.lastHeartbeat = new Date().toISOString();
|
|
677
525
|
}
|
|
678
526
|
});
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
this.log.info(`seq ${seq}: worker waiting for input — ${session.pendingInput?.prompt || 'input required'}`);
|
|
701
|
-
}
|
|
702
|
-
else if (session.currentRun.status === 'needs_confirmation') {
|
|
703
|
-
this.log.warn(`seq ${seq}: worker needs confirmation — ${session.pendingInput?.prompt || 'confirmation required'}`);
|
|
704
|
-
}
|
|
705
|
-
else if (session.currentRun.status === 'stalled_submit') {
|
|
706
|
-
this.log.warn(`seq ${seq}: worker prompt submission stalled — ${session.stalledReason || 'auto-repair pending'}`);
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
if (!session.currentRun || this.isAcpRunActive(session.currentRun.status)) {
|
|
710
|
-
return null;
|
|
711
|
-
}
|
|
712
|
-
const handle = this.supervisor.updateAcpHandle(workerId, {
|
|
713
|
-
exitCode: this.acpRunExitCode(session.currentRun.status),
|
|
714
|
-
exitedAt: new Date().toISOString(),
|
|
715
|
-
sessionId: session.sessionId,
|
|
716
|
-
runId: session.currentRun.runId,
|
|
717
|
-
sessionState: session.sessionState,
|
|
718
|
-
remoteStatus: session.currentRun.status,
|
|
719
|
-
lastEventAt: session.lastSeenAt,
|
|
720
|
-
}) || this.supervisor.get(workerId) || null;
|
|
721
|
-
await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), this.acpRunExitCode(session.currentRun.status), handle, resolveWorkflowTransport(this.ctx.config) === 'pty' ? 'pty' : 'acp');
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
if (snapshot && snapshot.state === 'waiting_input') {
|
|
530
|
+
this.log.info(`seq ${seq}: worker waiting for input`);
|
|
531
|
+
return null;
|
|
532
|
+
}
|
|
533
|
+
if (snapshot && snapshot.state === 'needs_confirmation') {
|
|
534
|
+
this.log.warn(`seq ${seq}: worker needs confirmation`);
|
|
535
|
+
return null;
|
|
536
|
+
}
|
|
537
|
+
if (snapshot && snapshot.state === 'completed') {
|
|
538
|
+
this.log.ok(`seq ${seq}: ACP/PTY run completed (via WM)`);
|
|
539
|
+
return {
|
|
540
|
+
action: 'complete',
|
|
541
|
+
entity: `seq:${seq}`,
|
|
542
|
+
result: 'ok',
|
|
543
|
+
message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run completed`,
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
if (snapshot && snapshot.state === 'failed') {
|
|
547
|
+
this.log.info(`seq ${seq}: ACP/PTY run failed (via WM)`);
|
|
722
548
|
return {
|
|
723
549
|
action: 'complete',
|
|
724
550
|
entity: `seq:${seq}`,
|
|
725
|
-
result:
|
|
726
|
-
message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run
|
|
551
|
+
result: 'fail',
|
|
552
|
+
message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run failed`,
|
|
727
553
|
};
|
|
728
554
|
}
|
|
555
|
+
// No snapshot — session lost or already cleaned up
|
|
729
556
|
this.runtimeStore.updateState('pipeline-acp-lost', (freshState) => {
|
|
730
557
|
const lostSlot = freshState.workers[slotName];
|
|
731
558
|
if (lostSlot) {
|
|
@@ -735,14 +562,7 @@ export class ExecutionEngine {
|
|
|
735
562
|
lostSlot.lastHeartbeat = new Date().toISOString();
|
|
736
563
|
}
|
|
737
564
|
});
|
|
738
|
-
|
|
739
|
-
exitCode: 1,
|
|
740
|
-
exitedAt: new Date().toISOString(),
|
|
741
|
-
sessionState: 'offline',
|
|
742
|
-
remoteStatus: 'lost',
|
|
743
|
-
lastEventAt: new Date().toISOString(),
|
|
744
|
-
}) || this.supervisor.get(workerId) || null;
|
|
745
|
-
await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), 1, handle, resolveWorkflowTransport(this.ctx.config) === 'pty' ? 'pty' : 'acp');
|
|
565
|
+
this.log.warn(`seq ${seq}: ACP session lost — no WM snapshot found`);
|
|
746
566
|
return {
|
|
747
567
|
action: 'complete',
|
|
748
568
|
entity: `seq:${seq}`,
|
|
@@ -750,85 +570,6 @@ export class ExecutionEngine {
|
|
|
750
570
|
message: 'ACP session lost',
|
|
751
571
|
};
|
|
752
572
|
}
|
|
753
|
-
async handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle, transport) {
|
|
754
|
-
const completion = this.completionJudge.judge({
|
|
755
|
-
worktree,
|
|
756
|
-
branch,
|
|
757
|
-
baseBranch: this.ctx.mergeBranch,
|
|
758
|
-
outputFile: handle?.outputFile || null,
|
|
759
|
-
exitCode,
|
|
760
|
-
logsDir: this.ctx.paths.logsDir,
|
|
761
|
-
phase: selectWorkerPhase(card.state, this.runtimeStore.readState().leases[card.seq]?.phase),
|
|
762
|
-
});
|
|
763
|
-
const ctx = {
|
|
764
|
-
project: this.ctx.projectName,
|
|
765
|
-
seq: card.seq,
|
|
766
|
-
slot: slotName,
|
|
767
|
-
transport,
|
|
768
|
-
branch,
|
|
769
|
-
worktree,
|
|
770
|
-
baseBranch: this.ctx.mergeBranch,
|
|
771
|
-
stateFile: this.ctx.paths.stateFile,
|
|
772
|
-
maxWorkers: this.ctx.maxWorkers,
|
|
773
|
-
mrMode: this.ctx.mrMode,
|
|
774
|
-
gitlabProjectId: resolveGitlabProjectId(this.ctx.config),
|
|
775
|
-
gitlabUrl: this.ctx.config.raw.GITLAB_URL || process.env.GITLAB_URL || '',
|
|
776
|
-
gitlabToken: this.ctx.config.raw.GITLAB_TOKEN || process.env.GITLAB_TOKEN || '',
|
|
777
|
-
qaStateId: this.ctx.config.raw.PLANE_STATE_QA || this.ctx.config.raw.TRELLO_QA_LIST_ID || 'QA',
|
|
778
|
-
doneStateId: this.ctx.config.raw.PLANE_STATE_DONE || this.ctx.config.raw.TRELLO_DONE_LIST_ID || '',
|
|
779
|
-
maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
|
|
780
|
-
logsDir: this.ctx.paths.logsDir,
|
|
781
|
-
tool: handle?.tool || this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL,
|
|
782
|
-
pmStateObserved: card.state,
|
|
783
|
-
};
|
|
784
|
-
const state = this.runtimeStore.readState();
|
|
785
|
-
const retryCount = this.getRetryCount(state, card.seq);
|
|
786
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
787
|
-
try {
|
|
788
|
-
if (completion.status === 'completed') {
|
|
789
|
-
const results = await this.postActions.executeCompletion(ctx, completion, handle?.sessionId || null);
|
|
790
|
-
const allOk = results.every(r => r.ok);
|
|
791
|
-
this.log.ok(`seq ${card.seq}: PostActions completed (${allOk ? 'all ok' : 'some failures'})`);
|
|
792
|
-
}
|
|
793
|
-
else {
|
|
794
|
-
const retrySessionId = transport === 'proc' ? (handle?.sessionId || null) : null;
|
|
795
|
-
await this.postActions.executeFailure(ctx, completion, exitCode, retrySessionId, retryCount, {
|
|
796
|
-
onExit: (code) => this.onWorkerExit(workerId, card, slotName, worktree, branch, code),
|
|
797
|
-
});
|
|
798
|
-
this.log.info(`seq ${card.seq}: Failure handling done`);
|
|
799
|
-
}
|
|
800
|
-
}
|
|
801
|
-
catch (err) {
|
|
802
|
-
this.log.error(`seq ${card.seq}: PostActions error: ${err}`);
|
|
803
|
-
}
|
|
804
|
-
}
|
|
805
|
-
requireAgentRuntime() {
|
|
806
|
-
if (!this.agentRuntime) {
|
|
807
|
-
throw new Error('ACP transport requested but AgentRuntime is not configured');
|
|
808
|
-
}
|
|
809
|
-
return this.agentRuntime;
|
|
810
|
-
}
|
|
811
|
-
applyAcpSessionToSlot(slot, session) {
|
|
812
|
-
const transport = resolveWorkflowTransport(this.ctx.config) === 'pty' ? 'pty' : 'acp';
|
|
813
|
-
slot.mode = transport;
|
|
814
|
-
slot.transport = transport;
|
|
815
|
-
slot.agent = session.tool;
|
|
816
|
-
slot.tmuxSession = session.sessionName;
|
|
817
|
-
slot.sessionId = session.sessionId;
|
|
818
|
-
slot.runId = session.currentRun?.runId || null;
|
|
819
|
-
slot.sessionState = session.sessionState;
|
|
820
|
-
slot.remoteStatus = session.currentRun?.status || null;
|
|
821
|
-
slot.lastEventAt = session.lastSeenAt;
|
|
822
|
-
slot.pid = null;
|
|
823
|
-
slot.outputFile = null;
|
|
824
|
-
slot.exitCode = null;
|
|
825
|
-
}
|
|
826
|
-
isAcpRunActive(status) {
|
|
827
|
-
return ['submitted', 'running', 'waiting_input', 'needs_confirmation', 'stalled_submit'].includes(status);
|
|
828
|
-
}
|
|
829
|
-
acpRunExitCode(status) {
|
|
830
|
-
return status === 'completed' ? 0 : 1;
|
|
831
|
-
}
|
|
832
573
|
findRuntimeSlotName(state, seq, lease) {
|
|
833
574
|
if (lease?.slot && state.workers[lease.slot])
|
|
834
575
|
return lease.slot;
|