@coralai/sps-cli 0.23.22 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +11 -2
  2. package/dist/commands/cardDashboard.js +3 -3
  3. package/dist/commands/cardDashboard.js.map +1 -1
  4. package/dist/commands/pipelineTick.d.ts.map +1 -1
  5. package/dist/commands/pipelineTick.js +17 -5
  6. package/dist/commands/pipelineTick.js.map +1 -1
  7. package/dist/commands/qaTick.d.ts.map +1 -1
  8. package/dist/commands/qaTick.js +31 -3
  9. package/dist/commands/qaTick.js.map +1 -1
  10. package/dist/commands/status.d.ts.map +1 -1
  11. package/dist/commands/status.js +2 -5
  12. package/dist/commands/status.js.map +1 -1
  13. package/dist/commands/tick.d.ts.map +1 -1
  14. package/dist/commands/tick.js +50 -33
  15. package/dist/commands/tick.js.map +1 -1
  16. package/dist/commands/workerDashboard.d.ts.map +1 -1
  17. package/dist/commands/workerDashboard.js +9 -9
  18. package/dist/commands/workerDashboard.js.map +1 -1
  19. package/dist/commands/workerLaunch.d.ts.map +1 -1
  20. package/dist/commands/workerLaunch.js +17 -5
  21. package/dist/commands/workerLaunch.js.map +1 -1
  22. package/dist/core/acpState.js +1 -1
  23. package/dist/core/acpState.js.map +1 -1
  24. package/dist/core/runtimeSnapshot.d.ts +1 -0
  25. package/dist/core/runtimeSnapshot.d.ts.map +1 -1
  26. package/dist/core/runtimeSnapshot.js +6 -6
  27. package/dist/core/runtimeSnapshot.js.map +1 -1
  28. package/dist/core/runtimeStore.d.ts +23 -1
  29. package/dist/core/runtimeStore.d.ts.map +1 -1
  30. package/dist/core/runtimeStore.js +71 -32
  31. package/dist/core/runtimeStore.js.map +1 -1
  32. package/dist/core/state.d.ts +33 -0
  33. package/dist/core/state.d.ts.map +1 -1
  34. package/dist/core/state.js +6 -0
  35. package/dist/core/state.js.map +1 -1
  36. package/dist/core/workerRuntimeSummary.d.ts +1 -2
  37. package/dist/core/workerRuntimeSummary.d.ts.map +1 -1
  38. package/dist/core/workerRuntimeSummary.js +2 -2
  39. package/dist/core/workerRuntimeSummary.js.map +1 -1
  40. package/dist/engines/CloseoutEngine.d.ts +3 -6
  41. package/dist/engines/CloseoutEngine.d.ts.map +1 -1
  42. package/dist/engines/CloseoutEngine.js +112 -288
  43. package/dist/engines/CloseoutEngine.js.map +1 -1
  44. package/dist/engines/EventHandler.d.ts +57 -0
  45. package/dist/engines/EventHandler.d.ts.map +1 -0
  46. package/dist/engines/EventHandler.js +210 -0
  47. package/dist/engines/EventHandler.js.map +1 -0
  48. package/dist/engines/ExecutionEngine.d.ts +5 -17
  49. package/dist/engines/ExecutionEngine.d.ts.map +1 -1
  50. package/dist/engines/ExecutionEngine.js +108 -367
  51. package/dist/engines/ExecutionEngine.js.map +1 -1
  52. package/dist/engines/MonitorEngine.d.ts.map +1 -1
  53. package/dist/engines/MonitorEngine.js +8 -9
  54. package/dist/engines/MonitorEngine.js.map +1 -1
  55. package/dist/manager/integration-queue.d.ts +65 -0
  56. package/dist/manager/integration-queue.d.ts.map +1 -0
  57. package/dist/manager/integration-queue.js +123 -0
  58. package/dist/manager/integration-queue.js.map +1 -0
  59. package/dist/manager/runtime-coordinator.d.ts +1 -3
  60. package/dist/manager/runtime-coordinator.d.ts.map +1 -1
  61. package/dist/manager/runtime-coordinator.js +13 -15
  62. package/dist/manager/runtime-coordinator.js.map +1 -1
  63. package/dist/manager/worker-manager-impl.d.ts +81 -0
  64. package/dist/manager/worker-manager-impl.d.ts.map +1 -0
  65. package/dist/manager/worker-manager-impl.js +648 -0
  66. package/dist/manager/worker-manager-impl.js.map +1 -0
  67. package/dist/manager/worker-manager.d.ts +176 -0
  68. package/dist/manager/worker-manager.d.ts.map +1 -0
  69. package/dist/manager/worker-manager.js +12 -0
  70. package/dist/manager/worker-manager.js.map +1 -0
  71. package/dist/models/acp.d.ts +4 -0
  72. package/dist/models/acp.d.ts.map +1 -1
  73. package/package.json +1 -1
@@ -4,29 +4,23 @@ import { RuntimeStore } from '../core/runtimeStore.js';
4
4
  import { resolveGitlabProjectId, resolveWorkflowTransport } from '../core/config.js';
5
5
  import { resolveWorktreePath } from '../core/paths.js';
6
6
  import { readQueue } from '../core/queue.js';
7
- import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE, selectWorkerPhase, } from '../core/taskPrompts.js';
7
+ import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE, } from '../core/taskPrompts.js';
8
8
  import { Logger } from '../core/logger.js';
9
9
  const SKIP_LABELS = ['BLOCKED', 'NEEDS-FIX', 'CONFLICT', 'WAITING-CONFIRMATION', 'STALE-RUNTIME'];
10
10
  export class ExecutionEngine {
11
11
  ctx;
12
12
  taskBackend;
13
13
  repoBackend;
14
- supervisor;
15
- completionJudge;
16
- postActions;
17
- resourceLimiter;
14
+ workerManager;
18
15
  notifier;
19
16
  agentRuntime;
20
17
  log;
21
18
  runtimeStore;
22
- constructor(ctx, taskBackend, repoBackend, supervisor, completionJudge, postActions, resourceLimiter, notifier, agentRuntime) {
19
+ constructor(ctx, taskBackend, repoBackend, workerManager, notifier, agentRuntime) {
23
20
  this.ctx = ctx;
24
21
  this.taskBackend = taskBackend;
25
22
  this.repoBackend = repoBackend;
26
- this.supervisor = supervisor;
27
- this.completionJudge = completionJudge;
28
- this.postActions = postActions;
29
- this.resourceLimiter = resourceLimiter;
23
+ this.workerManager = workerManager;
30
24
  this.notifier = notifier;
31
25
  this.agentRuntime = agentRuntime;
32
26
  this.log = new Logger('pipeline', ctx.projectName, ctx.paths.logsDir);
@@ -298,15 +292,10 @@ export class ExecutionEngine {
298
292
  // Slot already released (PostActions handled it via exit callback)
299
293
  return null;
300
294
  }
301
- if (state.workers[slotName]?.transport === 'acp' ||
302
- state.workers[slotName]?.transport === 'pty' ||
303
- state.workers[slotName]?.mode === 'acp' ||
304
- state.workers[slotName]?.mode === 'pty') {
305
- return this.checkAcpInprogressCard(card, slotName);
306
- }
307
- const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
308
- const handle = this.supervisor.get(workerId);
309
- if (handle && handle.exitCode === null) {
295
+ // Use WorkerManager.inspect() to check worker state
296
+ const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
297
+ const snapshot = snapshots[0];
298
+ if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
310
299
  // Worker still running — update heartbeat
311
300
  try {
312
301
  this.runtimeStore.updateState('pipeline-heartbeat', (freshState) => {
@@ -318,25 +307,28 @@ export class ExecutionEngine {
318
307
  catch { /* non-fatal */ }
319
308
  return null;
320
309
  }
321
- if (handle && handle.exitCode !== null) {
322
- // Worker exited but PostActions hasn't finished yet (or just finished)
323
- // Check if slot is now idle
324
- const freshState = this.runtimeStore.readState();
325
- if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
326
- this.log.ok(`seq ${seq}: Completed (handled by exit callback)`);
327
- return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via exit callback' };
328
- }
329
- // PostActions still processing, wait for next tick
310
+ if (snapshot && (snapshot.state === 'waiting_input' || snapshot.state === 'needs_confirmation')) {
311
+ // Worker waiting for input log and wait
312
+ this.log.info(`seq ${seq}: worker in state ${snapshot.state}`);
330
313
  return null;
331
314
  }
332
- // Handle not found in Supervisor — PostActions already removed it, or after tick restart
333
- // Re-read state to check if PostActions already completed
315
+ if (snapshot && snapshot.state === 'completed') {
316
+ // WM exit callback handled completion
317
+ this.log.ok(`seq ${seq}: Completed (handled by WM exit callback)`);
318
+ return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via WM exit callback' };
319
+ }
320
+ if (snapshot && snapshot.state === 'failed') {
321
+ // WM exit callback handled failure
322
+ this.log.info(`seq ${seq}: Failed (handled by WM exit callback)`);
323
+ return { action: 'complete', entity: `seq:${seq}`, result: 'fail', message: 'Failed via WM exit callback' };
324
+ }
325
+ // No snapshot found — WM already processed and released the slot
334
326
  const freshState = this.runtimeStore.readState();
335
327
  if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
336
- this.log.ok(`seq ${seq}: Completed (PostActions already processed)`);
337
- return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (PostActions processed)' };
328
+ this.log.ok(`seq ${seq}: Completed (WM already processed)`);
329
+ return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (WM processed)' };
338
330
  }
339
- // Still active in state but not in Supervisor — MonitorEngine/Recovery handles
331
+ // Still active in state but no snapshot — MonitorEngine/Recovery handles
340
332
  return null;
341
333
  }
342
334
  // ─── Prepare Phase (Backlog → Todo) ─────────────────────────────
@@ -407,89 +399,15 @@ export class ExecutionEngine {
407
399
  this.log.info(`[dry-run] Would launch seq ${seq}`);
408
400
  return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: 'dry-run' };
409
401
  }
410
- // Step 4: Claim worker slot
411
- // Exclude slots that failed launch this tick to prevent repeated failures
412
- const state = this.runtimeStore.readState();
413
- const idleSlots = Object.entries(state.workers)
414
- .filter(([name, w]) => w.status === 'idle' && !failedSlots.has(name));
415
- if (idleSlots.length === 0) {
416
- this.log.warn(`No idle worker slot available for seq ${seq}`);
417
- return { action: 'launch', entity: `seq:${seq}`, result: 'skip', message: 'No idle worker slot' };
418
- }
419
- const [slotName] = idleSlots[0];
420
- const sessionName = `${this.ctx.projectName}-${slotName}`;
421
- // Claim slot in state.json
422
- state.workers[slotName] = {
423
- status: 'active',
424
- seq: parseInt(seq, 10),
425
- branch: branchName,
426
- worktree: worktreePath,
427
- tmuxSession: sessionName,
428
- claimedAt: new Date().toISOString(),
429
- lastHeartbeat: new Date().toISOString(),
430
- mode: workflowTransport === 'proc'
431
- ? this.ctx.config.WORKER_MODE
432
- : workflowTransport,
433
- transport: workflowTransport,
434
- agent: (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL),
435
- sessionId: null,
436
- runId: null,
437
- sessionState: null,
438
- remoteStatus: null,
439
- lastEventAt: null,
440
- pid: null,
441
- outputFile: null,
442
- exitCode: null,
443
- mergeRetries: 0,
444
- completedAt: null,
445
- };
446
- // Add to active cards
447
- const conflictDomains = card.labels
448
- .filter((l) => l.startsWith('conflict:'))
449
- .map((l) => l.slice('conflict:'.length));
450
- state.activeCards[seq] = {
451
- seq: parseInt(seq, 10),
452
- state: 'Todo',
453
- worker: slotName,
454
- mrUrl: null,
455
- conflictDomains,
456
- startedAt: new Date().toISOString(),
457
- };
458
- state.leases[seq] = {
459
- seq: parseInt(seq, 10),
460
- pmStateObserved: card.state,
461
- phase: 'preparing',
462
- slot: slotName,
463
- branch: branchName,
464
- worktree: worktreePath,
465
- sessionId: null,
466
- runId: null,
467
- claimedAt: state.workers[slotName].claimedAt,
468
- retryCount: 0,
469
- lastTransitionAt: new Date().toISOString(),
470
- };
471
- try {
472
- this.runtimeStore.updateState('pipeline-launch', (draft) => {
473
- draft.workers[slotName] = state.workers[slotName];
474
- draft.activeCards[seq] = state.activeCards[seq];
475
- draft.leases[seq] = state.leases[seq];
476
- });
477
- this.log.ok(`Step 4: Claimed slot ${slotName} for seq ${seq}`);
478
- }
479
- catch (err) {
480
- const msg = err instanceof Error ? err.message : String(err);
481
- this.log.error(`Step 4 failed (claim) for seq ${seq}: ${msg}`);
482
- return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Claim slot failed: ${msg}` };
483
- }
484
- // Also claim in PM backend
402
+ // Step 5: PM claim (kept in Engine — PM backend awareness)
485
403
  try {
486
- await this.taskBackend.claim(seq, slotName);
404
+ await this.taskBackend.claim(seq, `pending-wm`);
487
405
  }
488
406
  catch (err) {
489
407
  const msg = err instanceof Error ? err.message : String(err);
490
408
  this.log.warn(`PM claim for seq ${seq} failed (non-fatal): ${msg}`);
491
409
  }
492
- // Step 5: Build task context (.sps/development_prompt.txt + .sps/integration_prompt.txt)
410
+ // Step 5b: Build task context (.sps/development_prompt.txt + .sps/integration_prompt.txt)
493
411
  try {
494
412
  this.buildTaskContext(card, worktreePath);
495
413
  this.log.ok(`Step 5: Task context built for seq ${seq}`);
@@ -497,117 +415,61 @@ export class ExecutionEngine {
497
415
  catch (err) {
498
416
  const msg = err instanceof Error ? err.message : String(err);
499
417
  this.log.error(`Step 5 failed (context) for seq ${seq}: ${msg}`);
500
- this.releaseSlot(slotName, seq);
501
418
  this.logEvent('launch-context', seq, 'fail', { error: msg });
502
419
  return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Context build failed: ${msg}` };
503
420
  }
504
- // Step 6: Launch worker via Supervisor
421
+ // Step 6: Launch worker via WorkerManager.run()
422
+ const logsDir = this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`;
423
+ const promptFile = resolve(worktreePath, '.sps', LEGACY_TASK_PROMPT_FILE);
424
+ let prompt;
505
425
  try {
506
- const promptFile = resolve(worktreePath, '.sps', LEGACY_TASK_PROMPT_FILE);
507
- // Check global resource limit
508
- const acquire = this.resourceLimiter.tryAcquireDetailed();
509
- if (!acquire.acquired) {
510
- const reason = this.resourceLimiter.formatBlockReason(acquire.stats);
511
- this.log.warn(`Global resource limit reached, skipping seq ${seq}: ${reason}`);
512
- // Rollback: release slot
513
- this.releaseSlot(slotName, seq);
514
- return {
515
- action: 'launch',
516
- entity: `seq:${seq}`,
517
- result: 'skip',
518
- message: `Global resource limit reached: ${reason}`,
519
- };
520
- }
521
- await this.resourceLimiter.enforceStagger();
522
- const prompt = readFileSync(promptFile, 'utf-8').trim();
523
- const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
524
- if (workflowTransport !== 'proc') {
525
- const runtime = this.requireAgentRuntime();
526
- const session = await runtime.startRun(slotName, prompt, (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL), worktreePath);
527
- this.runtimeStore.updateState('pipeline-launch-acp', (freshState) => {
528
- if (freshState.workers[slotName]) {
529
- this.applyAcpSessionToSlot(freshState.workers[slotName], session);
530
- if (freshState.leases[seq]) {
531
- freshState.leases[seq].sessionId = session.sessionId;
532
- freshState.leases[seq].runId = session.currentRun?.runId || null;
533
- freshState.leases[seq].phase = session.pendingInput ? 'waiting_confirmation' : 'coding';
534
- freshState.leases[seq].lastTransitionAt = new Date().toISOString();
535
- }
536
- }
537
- });
538
- this.supervisor.registerAcpHandle({
539
- id: workerId,
540
- pid: null,
541
- outputFile: null,
542
- project: this.ctx.projectName,
543
- seq: card.seq,
544
- slot: slotName,
545
- branch: branchName,
546
- worktree: worktreePath,
547
- tool: session.tool,
548
- exitCode: null,
549
- sessionId: session.sessionId,
550
- runId: session.currentRun?.runId || null,
551
- sessionState: session.sessionState,
552
- remoteStatus: session.currentRun?.status || null,
553
- lastEventAt: session.lastSeenAt,
554
- startedAt: new Date().toISOString(),
555
- exitedAt: null,
556
- });
557
- this.log.ok(`Step 6: ${workflowTransport.toUpperCase()} worker launched for seq ${seq} ` +
558
- `(session=${session.sessionId}, run=${session.currentRun?.runId || 'none'})`);
559
- }
560
- else {
561
- const outputFile = resolve(this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`, `${sessionName}-${Date.now()}.jsonl`);
562
- const workerHandle = this.supervisor.spawn({
563
- id: workerId,
564
- project: this.ctx.projectName,
565
- seq: card.seq,
566
- slot: slotName,
567
- worktree: worktreePath,
568
- branch: branchName,
569
- prompt,
570
- outputFile,
571
- tool: this.ctx.config.WORKER_TOOL,
572
- onExit: (exitCode) => {
573
- this.onWorkerExit(workerId, card, slotName, worktreePath, branchName, exitCode);
574
- },
575
- });
576
- // Store process info in state
577
- this.runtimeStore.updateState('pipeline-launch-print', (freshState) => {
578
- if (freshState.workers[slotName]) {
579
- freshState.workers[slotName].mode = 'print';
580
- freshState.workers[slotName].transport = 'proc';
581
- freshState.workers[slotName].agent = this.ctx.config.WORKER_TOOL;
582
- freshState.workers[slotName].pid = workerHandle.pid;
583
- freshState.workers[slotName].outputFile = workerHandle.outputFile;
584
- freshState.workers[slotName].sessionId = workerHandle.sessionId || null;
585
- freshState.workers[slotName].runId = null;
586
- freshState.workers[slotName].sessionState = null;
587
- freshState.workers[slotName].remoteStatus = null;
588
- freshState.workers[slotName].lastEventAt = null;
589
- freshState.workers[slotName].exitCode = null;
590
- if (freshState.leases[seq]) {
591
- freshState.leases[seq].phase = 'coding';
592
- freshState.leases[seq].lastTransitionAt = new Date().toISOString();
593
- }
594
- }
595
- });
596
- this.log.ok(`Step 6: Worker launched for seq ${seq} (pid=${workerHandle.pid})`);
597
- }
598
- if (this.notifier) {
599
- await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
600
- }
426
+ prompt = readFileSync(promptFile, 'utf-8').trim();
601
427
  }
602
428
  catch (err) {
603
429
  const msg = err instanceof Error ? err.message : String(err);
604
- this.log.error(`Step 6 failed (worker launch) for seq ${seq}: ${msg}`);
605
- failedSlots.add(slotName);
606
- this.resourceLimiter.release();
607
- this.releaseSlot(slotName, seq);
430
+ this.log.error(`Failed to read prompt file for seq ${seq}: ${msg}`);
431
+ return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Prompt file read failed: ${msg}` };
432
+ }
433
+ const runRequest = {
434
+ taskId: String(card.seq),
435
+ cardId: String(card.seq),
436
+ project: this.ctx.projectName,
437
+ phase: 'development',
438
+ prompt,
439
+ cwd: worktreePath,
440
+ branch: branchName,
441
+ targetBranch: this.ctx.mergeBranch,
442
+ tool: this.ctx.config.WORKER_TOOL,
443
+ transport: workflowTransport,
444
+ outputFile: resolve(logsDir, `${this.ctx.projectName}-worker-${card.seq}-${Date.now()}.jsonl`),
445
+ timeoutSec: this.ctx.config.WORKER_LAUNCH_TIMEOUT_S,
446
+ maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
447
+ };
448
+ let response;
449
+ try {
450
+ response = await this.workerManager.run(runRequest);
451
+ }
452
+ catch (err) {
453
+ const msg = err instanceof Error ? err.message : String(err);
454
+ this.log.error(`Step 6 failed (WM.run) for seq ${seq}: ${msg}`);
455
+ failedSlots.add(`wm-error-${seq}`);
608
456
  this.logEvent('launch-worker', seq, 'fail', { error: msg });
609
457
  return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Worker launch failed: ${msg}` };
610
458
  }
459
+ if (!response.accepted) {
460
+ this.log.warn(`WM rejected seq ${seq}: ${response.rejectReason}`);
461
+ return {
462
+ action: 'launch',
463
+ entity: `seq:${seq}`,
464
+ result: response.rejectReason === 'resource_exhausted' ? 'skip' : 'fail',
465
+ message: `WM rejected: ${response.rejectReason}`,
466
+ };
467
+ }
468
+ const slotName = response.slot;
469
+ this.log.ok(`Step 6: WM launched worker for seq ${seq} (slot=${slotName}, pid=${response.pid ?? 'n/a'})`);
470
+ if (this.notifier) {
471
+ await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
472
+ }
611
473
  // Step 7: Move card to Inprogress
612
474
  try {
613
475
  await this.taskBackend.move(seq, 'Inprogress');
@@ -625,107 +487,72 @@ export class ExecutionEngine {
625
487
  }
626
488
  });
627
489
  this.log.ok(`Step 7: Moved seq ${seq} Todo → Inprogress`);
628
- this.logEvent('launch', seq, 'ok', { worker: slotName, session: sessionName });
490
+ this.logEvent('launch', seq, 'ok', { worker: slotName });
629
491
  return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: `Todo → Inprogress (${slotName})` };
630
492
  }
631
493
  catch (err) {
632
494
  const msg = err instanceof Error ? err.message : String(err);
633
495
  this.log.error(`Step 7 failed (move) for seq ${seq}: ${msg}`);
634
- // Rollback: kill worker, release slot
635
- const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
496
+ // Rollback: cancel worker via WM (handles kill + resource release)
636
497
  try {
637
- if (workflowTransport !== 'proc' && this.agentRuntime) {
638
- await this.agentRuntime.stopSession(slotName);
639
- }
640
- else {
641
- await this.supervisor.kill(workerId);
642
- }
498
+ await this.workerManager.cancel({ taskId: String(card.seq), project: this.ctx.projectName, reason: 'anomaly' });
643
499
  }
644
500
  catch { /* best effort */ }
645
- this.supervisor.remove(workerId);
646
- this.resourceLimiter.release();
647
501
  this.releaseSlot(slotName, seq);
648
502
  this.logEvent('launch-move', seq, 'fail', { error: msg });
649
503
  return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Move to Inprogress failed: ${msg}` };
650
504
  }
651
505
  }
652
- // ─── Worker Exit Callback ───────────────────────────────────────
653
506
  /**
654
- * Called by Supervisor when a worker process exits.
655
- * Wires CompletionJudge PostActions to handle completion or failure.
507
+ * @deprecated Phase 1 transitional WM's internal exit callback handles ACP inspection.
508
+ * Kept for edge-case fallback; will be removed when WM fully owns ACP lifecycle.
656
509
  */
657
- async onWorkerExit(workerId, card, slotName, worktree, branch, exitCode) {
658
- const handle = this.supervisor.get(workerId);
659
- await this.handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle || null, 'proc');
660
- }
661
510
  async checkAcpInprogressCard(card, slotName) {
662
- const runtime = this.requireAgentRuntime();
663
511
  const seq = card.seq;
664
- const inspected = await runtime.inspect(slotName);
665
- const session = inspected.sessions[slotName];
666
- const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
667
512
  const state = this.runtimeStore.readState();
668
513
  const slot = state.workers[slotName];
669
514
  if (!slot)
670
515
  return null;
671
- if (session) {
516
+ // Use WorkerManager.inspect() for normalized worker state
517
+ const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
518
+ const snapshot = snapshots[0];
519
+ if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
520
+ // Worker still active — update heartbeat
672
521
  this.runtimeStore.updateState('pipeline-acp-heartbeat', (freshState) => {
673
522
  const freshSlot = freshState.workers[slotName];
674
523
  if (freshSlot) {
675
- this.applyAcpSessionToSlot(freshSlot, session);
676
524
  freshSlot.lastHeartbeat = new Date().toISOString();
677
525
  }
678
526
  });
679
- this.supervisor.registerAcpHandle({
680
- id: workerId,
681
- pid: null,
682
- outputFile: null,
683
- project: this.ctx.projectName,
684
- seq,
685
- slot: slotName,
686
- branch: slot.branch || this.buildBranchName(card),
687
- worktree: slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR),
688
- tool: session.tool,
689
- exitCode: null,
690
- sessionId: session.sessionId,
691
- runId: session.currentRun?.runId || null,
692
- sessionState: session.sessionState,
693
- remoteStatus: session.currentRun?.status || null,
694
- lastEventAt: session.lastSeenAt,
695
- startedAt: slot.claimedAt || new Date().toISOString(),
696
- exitedAt: null,
697
- });
698
- if (session.currentRun?.status && session.currentRun.status !== slot.remoteStatus) {
699
- if (session.currentRun.status === 'waiting_input') {
700
- this.log.info(`seq ${seq}: worker waiting for input — ${session.pendingInput?.prompt || 'input required'}`);
701
- }
702
- else if (session.currentRun.status === 'needs_confirmation') {
703
- this.log.warn(`seq ${seq}: worker needs confirmation — ${session.pendingInput?.prompt || 'confirmation required'}`);
704
- }
705
- else if (session.currentRun.status === 'stalled_submit') {
706
- this.log.warn(`seq ${seq}: worker prompt submission stalled — ${session.stalledReason || 'auto-repair pending'}`);
707
- }
708
- }
709
- if (!session.currentRun || this.isAcpRunActive(session.currentRun.status)) {
710
- return null;
711
- }
712
- const handle = this.supervisor.updateAcpHandle(workerId, {
713
- exitCode: this.acpRunExitCode(session.currentRun.status),
714
- exitedAt: new Date().toISOString(),
715
- sessionId: session.sessionId,
716
- runId: session.currentRun.runId,
717
- sessionState: session.sessionState,
718
- remoteStatus: session.currentRun.status,
719
- lastEventAt: session.lastSeenAt,
720
- }) || this.supervisor.get(workerId) || null;
721
- await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), this.acpRunExitCode(session.currentRun.status), handle, resolveWorkflowTransport(this.ctx.config) === 'pty' ? 'pty' : 'acp');
527
+ return null;
528
+ }
529
+ if (snapshot && snapshot.state === 'waiting_input') {
530
+ this.log.info(`seq ${seq}: worker waiting for input`);
531
+ return null;
532
+ }
533
+ if (snapshot && snapshot.state === 'needs_confirmation') {
534
+ this.log.warn(`seq ${seq}: worker needs confirmation`);
535
+ return null;
536
+ }
537
+ if (snapshot && snapshot.state === 'completed') {
538
+ this.log.ok(`seq ${seq}: ACP/PTY run completed (via WM)`);
539
+ return {
540
+ action: 'complete',
541
+ entity: `seq:${seq}`,
542
+ result: 'ok',
543
+ message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run completed`,
544
+ };
545
+ }
546
+ if (snapshot && snapshot.state === 'failed') {
547
+ this.log.info(`seq ${seq}: ACP/PTY run failed (via WM)`);
722
548
  return {
723
549
  action: 'complete',
724
550
  entity: `seq:${seq}`,
725
- result: session.currentRun.status === 'completed' ? 'ok' : 'fail',
726
- message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run ${session.currentRun.status}`,
551
+ result: 'fail',
552
+ message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run failed`,
727
553
  };
728
554
  }
555
+ // No snapshot — session lost or already cleaned up
729
556
  this.runtimeStore.updateState('pipeline-acp-lost', (freshState) => {
730
557
  const lostSlot = freshState.workers[slotName];
731
558
  if (lostSlot) {
@@ -735,14 +562,7 @@ export class ExecutionEngine {
735
562
  lostSlot.lastHeartbeat = new Date().toISOString();
736
563
  }
737
564
  });
738
- const handle = this.supervisor.updateAcpHandle(workerId, {
739
- exitCode: 1,
740
- exitedAt: new Date().toISOString(),
741
- sessionState: 'offline',
742
- remoteStatus: 'lost',
743
- lastEventAt: new Date().toISOString(),
744
- }) || this.supervisor.get(workerId) || null;
745
- await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), 1, handle, resolveWorkflowTransport(this.ctx.config) === 'pty' ? 'pty' : 'acp');
565
+ this.log.warn(`seq ${seq}: ACP session lost — no WM snapshot found`);
746
566
  return {
747
567
  action: 'complete',
748
568
  entity: `seq:${seq}`,
@@ -750,85 +570,6 @@ export class ExecutionEngine {
750
570
  message: 'ACP session lost',
751
571
  };
752
572
  }
753
- async handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle, transport) {
754
- const completion = this.completionJudge.judge({
755
- worktree,
756
- branch,
757
- baseBranch: this.ctx.mergeBranch,
758
- outputFile: handle?.outputFile || null,
759
- exitCode,
760
- logsDir: this.ctx.paths.logsDir,
761
- phase: selectWorkerPhase(card.state, this.runtimeStore.readState().leases[card.seq]?.phase),
762
- });
763
- const ctx = {
764
- project: this.ctx.projectName,
765
- seq: card.seq,
766
- slot: slotName,
767
- transport,
768
- branch,
769
- worktree,
770
- baseBranch: this.ctx.mergeBranch,
771
- stateFile: this.ctx.paths.stateFile,
772
- maxWorkers: this.ctx.maxWorkers,
773
- mrMode: this.ctx.mrMode,
774
- gitlabProjectId: resolveGitlabProjectId(this.ctx.config),
775
- gitlabUrl: this.ctx.config.raw.GITLAB_URL || process.env.GITLAB_URL || '',
776
- gitlabToken: this.ctx.config.raw.GITLAB_TOKEN || process.env.GITLAB_TOKEN || '',
777
- qaStateId: this.ctx.config.raw.PLANE_STATE_QA || this.ctx.config.raw.TRELLO_QA_LIST_ID || 'QA',
778
- doneStateId: this.ctx.config.raw.PLANE_STATE_DONE || this.ctx.config.raw.TRELLO_DONE_LIST_ID || '',
779
- maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
780
- logsDir: this.ctx.paths.logsDir,
781
- tool: handle?.tool || this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL,
782
- pmStateObserved: card.state,
783
- };
784
- const state = this.runtimeStore.readState();
785
- const retryCount = this.getRetryCount(state, card.seq);
786
- const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
787
- try {
788
- if (completion.status === 'completed') {
789
- const results = await this.postActions.executeCompletion(ctx, completion, handle?.sessionId || null);
790
- const allOk = results.every(r => r.ok);
791
- this.log.ok(`seq ${card.seq}: PostActions completed (${allOk ? 'all ok' : 'some failures'})`);
792
- }
793
- else {
794
- const retrySessionId = transport === 'proc' ? (handle?.sessionId || null) : null;
795
- await this.postActions.executeFailure(ctx, completion, exitCode, retrySessionId, retryCount, {
796
- onExit: (code) => this.onWorkerExit(workerId, card, slotName, worktree, branch, code),
797
- });
798
- this.log.info(`seq ${card.seq}: Failure handling done`);
799
- }
800
- }
801
- catch (err) {
802
- this.log.error(`seq ${card.seq}: PostActions error: ${err}`);
803
- }
804
- }
805
- requireAgentRuntime() {
806
- if (!this.agentRuntime) {
807
- throw new Error('ACP transport requested but AgentRuntime is not configured');
808
- }
809
- return this.agentRuntime;
810
- }
811
- applyAcpSessionToSlot(slot, session) {
812
- const transport = resolveWorkflowTransport(this.ctx.config) === 'pty' ? 'pty' : 'acp';
813
- slot.mode = transport;
814
- slot.transport = transport;
815
- slot.agent = session.tool;
816
- slot.tmuxSession = session.sessionName;
817
- slot.sessionId = session.sessionId;
818
- slot.runId = session.currentRun?.runId || null;
819
- slot.sessionState = session.sessionState;
820
- slot.remoteStatus = session.currentRun?.status || null;
821
- slot.lastEventAt = session.lastSeenAt;
822
- slot.pid = null;
823
- slot.outputFile = null;
824
- slot.exitCode = null;
825
- }
826
- isAcpRunActive(status) {
827
- return ['submitted', 'running', 'waiting_input', 'needs_confirmation', 'stalled_submit'].includes(status);
828
- }
829
- acpRunExitCode(status) {
830
- return status === 'completed' ? 0 : 1;
831
- }
832
573
  findRuntimeSlotName(state, seq, lease) {
833
574
  if (lease?.slot && state.workers[lease.slot])
834
575
  return lease.slot;