@coralai/sps-cli 0.23.21 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +16 -7
  2. package/dist/commands/cardDashboard.js +3 -3
  3. package/dist/commands/cardDashboard.js.map +1 -1
  4. package/dist/commands/pipelineTick.d.ts.map +1 -1
  5. package/dist/commands/pipelineTick.js +19 -6
  6. package/dist/commands/pipelineTick.js.map +1 -1
  7. package/dist/commands/qaTick.d.ts.map +1 -1
  8. package/dist/commands/qaTick.js +33 -4
  9. package/dist/commands/qaTick.js.map +1 -1
  10. package/dist/commands/status.d.ts.map +1 -1
  11. package/dist/commands/status.js +2 -5
  12. package/dist/commands/status.js.map +1 -1
  13. package/dist/commands/tick.d.ts.map +1 -1
  14. package/dist/commands/tick.js +56 -35
  15. package/dist/commands/tick.js.map +1 -1
  16. package/dist/commands/workerDashboard.d.ts.map +1 -1
  17. package/dist/commands/workerDashboard.js +9 -9
  18. package/dist/commands/workerDashboard.js.map +1 -1
  19. package/dist/commands/workerLaunch.d.ts.map +1 -1
  20. package/dist/commands/workerLaunch.js +19 -6
  21. package/dist/commands/workerLaunch.js.map +1 -1
  22. package/dist/core/acpState.js +1 -1
  23. package/dist/core/acpState.js.map +1 -1
  24. package/dist/core/config.d.ts +9 -0
  25. package/dist/core/config.d.ts.map +1 -1
  26. package/dist/core/config.js +13 -0
  27. package/dist/core/config.js.map +1 -1
  28. package/dist/core/runtimeSnapshot.d.ts +1 -0
  29. package/dist/core/runtimeSnapshot.d.ts.map +1 -1
  30. package/dist/core/runtimeSnapshot.js +6 -6
  31. package/dist/core/runtimeSnapshot.js.map +1 -1
  32. package/dist/core/runtimeStore.d.ts +23 -1
  33. package/dist/core/runtimeStore.d.ts.map +1 -1
  34. package/dist/core/runtimeStore.js +71 -32
  35. package/dist/core/runtimeStore.js.map +1 -1
  36. package/dist/core/state.d.ts +33 -0
  37. package/dist/core/state.d.ts.map +1 -1
  38. package/dist/core/state.js +6 -0
  39. package/dist/core/state.js.map +1 -1
  40. package/dist/core/taskPrompts.d.ts.map +1 -1
  41. package/dist/core/taskPrompts.js +13 -9
  42. package/dist/core/taskPrompts.js.map +1 -1
  43. package/dist/core/workerRuntimeSummary.d.ts +1 -2
  44. package/dist/core/workerRuntimeSummary.d.ts.map +1 -1
  45. package/dist/core/workerRuntimeSummary.js +2 -2
  46. package/dist/core/workerRuntimeSummary.js.map +1 -1
  47. package/dist/engines/CloseoutEngine.d.ts +3 -6
  48. package/dist/engines/CloseoutEngine.d.ts.map +1 -1
  49. package/dist/engines/CloseoutEngine.js +113 -285
  50. package/dist/engines/CloseoutEngine.js.map +1 -1
  51. package/dist/engines/EventHandler.d.ts +57 -0
  52. package/dist/engines/EventHandler.d.ts.map +1 -0
  53. package/dist/engines/EventHandler.js +210 -0
  54. package/dist/engines/EventHandler.js.map +1 -0
  55. package/dist/engines/ExecutionEngine.d.ts +5 -17
  56. package/dist/engines/ExecutionEngine.d.ts.map +1 -1
  57. package/dist/engines/ExecutionEngine.js +110 -368
  58. package/dist/engines/ExecutionEngine.js.map +1 -1
  59. package/dist/engines/MonitorEngine.d.ts.map +1 -1
  60. package/dist/engines/MonitorEngine.js +8 -9
  61. package/dist/engines/MonitorEngine.js.map +1 -1
  62. package/dist/manager/integration-queue.d.ts +65 -0
  63. package/dist/manager/integration-queue.d.ts.map +1 -0
  64. package/dist/manager/integration-queue.js +123 -0
  65. package/dist/manager/integration-queue.js.map +1 -0
  66. package/dist/manager/recovery.d.ts.map +1 -1
  67. package/dist/manager/recovery.js +10 -9
  68. package/dist/manager/recovery.js.map +1 -1
  69. package/dist/manager/runtime-coordinator.d.ts +1 -3
  70. package/dist/manager/runtime-coordinator.d.ts.map +1 -1
  71. package/dist/manager/runtime-coordinator.js +13 -15
  72. package/dist/manager/runtime-coordinator.js.map +1 -1
  73. package/dist/manager/worker-manager-impl.d.ts +81 -0
  74. package/dist/manager/worker-manager-impl.d.ts.map +1 -0
  75. package/dist/manager/worker-manager-impl.js +648 -0
  76. package/dist/manager/worker-manager-impl.js.map +1 -0
  77. package/dist/manager/worker-manager.d.ts +176 -0
  78. package/dist/manager/worker-manager.d.ts.map +1 -0
  79. package/dist/manager/worker-manager.js +12 -0
  80. package/dist/manager/worker-manager.js.map +1 -0
  81. package/dist/models/acp.d.ts +4 -0
  82. package/dist/models/acp.d.ts.map +1 -1
  83. package/package.json +1 -1
@@ -1,32 +1,26 @@
1
1
  import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'node:fs';
2
2
  import { resolve } from 'node:path';
3
3
  import { RuntimeStore } from '../core/runtimeStore.js';
4
- import { resolveGitlabProjectId } from '../core/config.js';
4
+ import { resolveGitlabProjectId, resolveWorkflowTransport } from '../core/config.js';
5
5
  import { resolveWorktreePath } from '../core/paths.js';
6
6
  import { readQueue } from '../core/queue.js';
7
- import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE, selectWorkerPhase, } from '../core/taskPrompts.js';
7
+ import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE, } from '../core/taskPrompts.js';
8
8
  import { Logger } from '../core/logger.js';
9
9
  const SKIP_LABELS = ['BLOCKED', 'NEEDS-FIX', 'CONFLICT', 'WAITING-CONFIRMATION', 'STALE-RUNTIME'];
10
10
  export class ExecutionEngine {
11
11
  ctx;
12
12
  taskBackend;
13
13
  repoBackend;
14
- supervisor;
15
- completionJudge;
16
- postActions;
17
- resourceLimiter;
14
+ workerManager;
18
15
  notifier;
19
16
  agentRuntime;
20
17
  log;
21
18
  runtimeStore;
22
- constructor(ctx, taskBackend, repoBackend, supervisor, completionJudge, postActions, resourceLimiter, notifier, agentRuntime) {
19
+ constructor(ctx, taskBackend, repoBackend, workerManager, notifier, agentRuntime) {
23
20
  this.ctx = ctx;
24
21
  this.taskBackend = taskBackend;
25
22
  this.repoBackend = repoBackend;
26
- this.supervisor = supervisor;
27
- this.completionJudge = completionJudge;
28
- this.postActions = postActions;
29
- this.resourceLimiter = resourceLimiter;
23
+ this.workerManager = workerManager;
30
24
  this.notifier = notifier;
31
25
  this.agentRuntime = agentRuntime;
32
26
  this.log = new Logger('pipeline', ctx.projectName, ctx.paths.logsDir);
@@ -298,15 +292,10 @@ export class ExecutionEngine {
298
292
  // Slot already released (PostActions handled it via exit callback)
299
293
  return null;
300
294
  }
301
- if (state.workers[slotName]?.transport === 'acp' ||
302
- state.workers[slotName]?.transport === 'pty' ||
303
- state.workers[slotName]?.mode === 'acp' ||
304
- state.workers[slotName]?.mode === 'pty') {
305
- return this.checkAcpInprogressCard(card, slotName);
306
- }
307
- const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
308
- const handle = this.supervisor.get(workerId);
309
- if (handle && handle.exitCode === null) {
295
+ // Use WorkerManager.inspect() to check worker state
296
+ const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
297
+ const snapshot = snapshots[0];
298
+ if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
310
299
  // Worker still running — update heartbeat
311
300
  try {
312
301
  this.runtimeStore.updateState('pipeline-heartbeat', (freshState) => {
@@ -318,25 +307,28 @@ export class ExecutionEngine {
318
307
  catch { /* non-fatal */ }
319
308
  return null;
320
309
  }
321
- if (handle && handle.exitCode !== null) {
322
- // Worker exited but PostActions hasn't finished yet (or just finished)
323
- // Check if slot is now idle
324
- const freshState = this.runtimeStore.readState();
325
- if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
326
- this.log.ok(`seq ${seq}: Completed (handled by exit callback)`);
327
- return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via exit callback' };
328
- }
329
- // PostActions still processing, wait for next tick
310
+ if (snapshot && (snapshot.state === 'waiting_input' || snapshot.state === 'needs_confirmation')) {
311
+ // Worker waiting for input log and wait
312
+ this.log.info(`seq ${seq}: worker in state ${snapshot.state}`);
330
313
  return null;
331
314
  }
332
- // Handle not found in Supervisor — PostActions already removed it, or after tick restart
333
- // Re-read state to check if PostActions already completed
315
+ if (snapshot && snapshot.state === 'completed') {
316
+ // WM exit callback handled completion
317
+ this.log.ok(`seq ${seq}: Completed (handled by WM exit callback)`);
318
+ return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via WM exit callback' };
319
+ }
320
+ if (snapshot && snapshot.state === 'failed') {
321
+ // WM exit callback handled failure
322
+ this.log.info(`seq ${seq}: Failed (handled by WM exit callback)`);
323
+ return { action: 'complete', entity: `seq:${seq}`, result: 'fail', message: 'Failed via WM exit callback' };
324
+ }
325
+ // No snapshot found — WM already processed and released the slot
334
326
  const freshState = this.runtimeStore.readState();
335
327
  if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
336
- this.log.ok(`seq ${seq}: Completed (PostActions already processed)`);
337
- return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (PostActions processed)' };
328
+ this.log.ok(`seq ${seq}: Completed (WM already processed)`);
329
+ return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (WM processed)' };
338
330
  }
339
- // Still active in state but not in Supervisor — MonitorEngine/Recovery handles
331
+ // Still active in state but no snapshot — MonitorEngine/Recovery handles
340
332
  return null;
341
333
  }
342
334
  // ─── Prepare Phase (Backlog → Todo) ─────────────────────────────
@@ -402,93 +394,20 @@ export class ExecutionEngine {
402
394
  const seq = card.seq;
403
395
  const branchName = this.buildBranchName(card);
404
396
  const worktreePath = resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR);
397
+ const workflowTransport = resolveWorkflowTransport(this.ctx.config);
405
398
  if (opts.dryRun) {
406
399
  this.log.info(`[dry-run] Would launch seq ${seq}`);
407
400
  return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: 'dry-run' };
408
401
  }
409
- // Step 4: Claim worker slot
410
- // Exclude slots that failed launch this tick to prevent repeated failures
411
- const state = this.runtimeStore.readState();
412
- const idleSlots = Object.entries(state.workers)
413
- .filter(([name, w]) => w.status === 'idle' && !failedSlots.has(name));
414
- if (idleSlots.length === 0) {
415
- this.log.warn(`No idle worker slot available for seq ${seq}`);
416
- return { action: 'launch', entity: `seq:${seq}`, result: 'skip', message: 'No idle worker slot' };
417
- }
418
- const [slotName] = idleSlots[0];
419
- const sessionName = `${this.ctx.projectName}-${slotName}`;
420
- // Claim slot in state.json
421
- state.workers[slotName] = {
422
- status: 'active',
423
- seq: parseInt(seq, 10),
424
- branch: branchName,
425
- worktree: worktreePath,
426
- tmuxSession: sessionName,
427
- claimedAt: new Date().toISOString(),
428
- lastHeartbeat: new Date().toISOString(),
429
- mode: this.ctx.config.WORKER_TRANSPORT === 'proc'
430
- ? this.ctx.config.WORKER_MODE
431
- : this.ctx.config.WORKER_TRANSPORT,
432
- transport: this.ctx.config.WORKER_TRANSPORT,
433
- agent: (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL),
434
- sessionId: null,
435
- runId: null,
436
- sessionState: null,
437
- remoteStatus: null,
438
- lastEventAt: null,
439
- pid: null,
440
- outputFile: null,
441
- exitCode: null,
442
- mergeRetries: 0,
443
- completedAt: null,
444
- };
445
- // Add to active cards
446
- const conflictDomains = card.labels
447
- .filter((l) => l.startsWith('conflict:'))
448
- .map((l) => l.slice('conflict:'.length));
449
- state.activeCards[seq] = {
450
- seq: parseInt(seq, 10),
451
- state: 'Todo',
452
- worker: slotName,
453
- mrUrl: null,
454
- conflictDomains,
455
- startedAt: new Date().toISOString(),
456
- };
457
- state.leases[seq] = {
458
- seq: parseInt(seq, 10),
459
- pmStateObserved: card.state,
460
- phase: 'preparing',
461
- slot: slotName,
462
- branch: branchName,
463
- worktree: worktreePath,
464
- sessionId: null,
465
- runId: null,
466
- claimedAt: state.workers[slotName].claimedAt,
467
- retryCount: 0,
468
- lastTransitionAt: new Date().toISOString(),
469
- };
470
- try {
471
- this.runtimeStore.updateState('pipeline-launch', (draft) => {
472
- draft.workers[slotName] = state.workers[slotName];
473
- draft.activeCards[seq] = state.activeCards[seq];
474
- draft.leases[seq] = state.leases[seq];
475
- });
476
- this.log.ok(`Step 4: Claimed slot ${slotName} for seq ${seq}`);
477
- }
478
- catch (err) {
479
- const msg = err instanceof Error ? err.message : String(err);
480
- this.log.error(`Step 4 failed (claim) for seq ${seq}: ${msg}`);
481
- return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Claim slot failed: ${msg}` };
482
- }
483
- // Also claim in PM backend
402
+ // Step 5: PM claim (kept in Engine — PM backend awareness)
484
403
  try {
485
- await this.taskBackend.claim(seq, slotName);
404
+ await this.taskBackend.claim(seq, `pending-wm`);
486
405
  }
487
406
  catch (err) {
488
407
  const msg = err instanceof Error ? err.message : String(err);
489
408
  this.log.warn(`PM claim for seq ${seq} failed (non-fatal): ${msg}`);
490
409
  }
491
- // Step 5: Build task context (.sps/development_prompt.txt + .sps/integration_prompt.txt)
410
+ // Step 5b: Build task context (.sps/development_prompt.txt + .sps/integration_prompt.txt)
492
411
  try {
493
412
  this.buildTaskContext(card, worktreePath);
494
413
  this.log.ok(`Step 5: Task context built for seq ${seq}`);
@@ -496,117 +415,61 @@ export class ExecutionEngine {
496
415
  catch (err) {
497
416
  const msg = err instanceof Error ? err.message : String(err);
498
417
  this.log.error(`Step 5 failed (context) for seq ${seq}: ${msg}`);
499
- this.releaseSlot(slotName, seq);
500
418
  this.logEvent('launch-context', seq, 'fail', { error: msg });
501
419
  return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Context build failed: ${msg}` };
502
420
  }
503
- // Step 6: Launch worker via Supervisor
421
+ // Step 6: Launch worker via WorkerManager.run()
422
+ const logsDir = this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`;
423
+ const promptFile = resolve(worktreePath, '.sps', LEGACY_TASK_PROMPT_FILE);
424
+ let prompt;
504
425
  try {
505
- const promptFile = resolve(worktreePath, '.sps', LEGACY_TASK_PROMPT_FILE);
506
- // Check global resource limit
507
- const acquire = this.resourceLimiter.tryAcquireDetailed();
508
- if (!acquire.acquired) {
509
- const reason = this.resourceLimiter.formatBlockReason(acquire.stats);
510
- this.log.warn(`Global resource limit reached, skipping seq ${seq}: ${reason}`);
511
- // Rollback: release slot
512
- this.releaseSlot(slotName, seq);
513
- return {
514
- action: 'launch',
515
- entity: `seq:${seq}`,
516
- result: 'skip',
517
- message: `Global resource limit reached: ${reason}`,
518
- };
519
- }
520
- await this.resourceLimiter.enforceStagger();
521
- const prompt = readFileSync(promptFile, 'utf-8').trim();
522
- const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
523
- if (this.ctx.config.WORKER_TRANSPORT !== 'proc') {
524
- const runtime = this.requireAgentRuntime();
525
- const session = await runtime.startRun(slotName, prompt, (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL), worktreePath);
526
- this.runtimeStore.updateState('pipeline-launch-acp', (freshState) => {
527
- if (freshState.workers[slotName]) {
528
- this.applyAcpSessionToSlot(freshState.workers[slotName], session);
529
- if (freshState.leases[seq]) {
530
- freshState.leases[seq].sessionId = session.sessionId;
531
- freshState.leases[seq].runId = session.currentRun?.runId || null;
532
- freshState.leases[seq].phase = session.pendingInput ? 'waiting_confirmation' : 'coding';
533
- freshState.leases[seq].lastTransitionAt = new Date().toISOString();
534
- }
535
- }
536
- });
537
- this.supervisor.registerAcpHandle({
538
- id: workerId,
539
- pid: null,
540
- outputFile: null,
541
- project: this.ctx.projectName,
542
- seq: card.seq,
543
- slot: slotName,
544
- branch: branchName,
545
- worktree: worktreePath,
546
- tool: session.tool,
547
- exitCode: null,
548
- sessionId: session.sessionId,
549
- runId: session.currentRun?.runId || null,
550
- sessionState: session.sessionState,
551
- remoteStatus: session.currentRun?.status || null,
552
- lastEventAt: session.lastSeenAt,
553
- startedAt: new Date().toISOString(),
554
- exitedAt: null,
555
- });
556
- this.log.ok(`Step 6: ${this.ctx.config.WORKER_TRANSPORT.toUpperCase()} worker launched for seq ${seq} ` +
557
- `(session=${session.sessionId}, run=${session.currentRun?.runId || 'none'})`);
558
- }
559
- else {
560
- const outputFile = resolve(this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`, `${sessionName}-${Date.now()}.jsonl`);
561
- const workerHandle = this.supervisor.spawn({
562
- id: workerId,
563
- project: this.ctx.projectName,
564
- seq: card.seq,
565
- slot: slotName,
566
- worktree: worktreePath,
567
- branch: branchName,
568
- prompt,
569
- outputFile,
570
- tool: this.ctx.config.WORKER_TOOL,
571
- onExit: (exitCode) => {
572
- this.onWorkerExit(workerId, card, slotName, worktreePath, branchName, exitCode);
573
- },
574
- });
575
- // Store process info in state
576
- this.runtimeStore.updateState('pipeline-launch-print', (freshState) => {
577
- if (freshState.workers[slotName]) {
578
- freshState.workers[slotName].mode = 'print';
579
- freshState.workers[slotName].transport = 'proc';
580
- freshState.workers[slotName].agent = this.ctx.config.WORKER_TOOL;
581
- freshState.workers[slotName].pid = workerHandle.pid;
582
- freshState.workers[slotName].outputFile = workerHandle.outputFile;
583
- freshState.workers[slotName].sessionId = workerHandle.sessionId || null;
584
- freshState.workers[slotName].runId = null;
585
- freshState.workers[slotName].sessionState = null;
586
- freshState.workers[slotName].remoteStatus = null;
587
- freshState.workers[slotName].lastEventAt = null;
588
- freshState.workers[slotName].exitCode = null;
589
- if (freshState.leases[seq]) {
590
- freshState.leases[seq].phase = 'coding';
591
- freshState.leases[seq].lastTransitionAt = new Date().toISOString();
592
- }
593
- }
594
- });
595
- this.log.ok(`Step 6: Worker launched for seq ${seq} (pid=${workerHandle.pid})`);
596
- }
597
- if (this.notifier) {
598
- await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
599
- }
426
+ prompt = readFileSync(promptFile, 'utf-8').trim();
600
427
  }
601
428
  catch (err) {
602
429
  const msg = err instanceof Error ? err.message : String(err);
603
- this.log.error(`Step 6 failed (worker launch) for seq ${seq}: ${msg}`);
604
- failedSlots.add(slotName);
605
- this.resourceLimiter.release();
606
- this.releaseSlot(slotName, seq);
430
+ this.log.error(`Failed to read prompt file for seq ${seq}: ${msg}`);
431
+ return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Prompt file read failed: ${msg}` };
432
+ }
433
+ const runRequest = {
434
+ taskId: String(card.seq),
435
+ cardId: String(card.seq),
436
+ project: this.ctx.projectName,
437
+ phase: 'development',
438
+ prompt,
439
+ cwd: worktreePath,
440
+ branch: branchName,
441
+ targetBranch: this.ctx.mergeBranch,
442
+ tool: this.ctx.config.WORKER_TOOL,
443
+ transport: workflowTransport,
444
+ outputFile: resolve(logsDir, `${this.ctx.projectName}-worker-${card.seq}-${Date.now()}.jsonl`),
445
+ timeoutSec: this.ctx.config.WORKER_LAUNCH_TIMEOUT_S,
446
+ maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
447
+ };
448
+ let response;
449
+ try {
450
+ response = await this.workerManager.run(runRequest);
451
+ }
452
+ catch (err) {
453
+ const msg = err instanceof Error ? err.message : String(err);
454
+ this.log.error(`Step 6 failed (WM.run) for seq ${seq}: ${msg}`);
455
+ failedSlots.add(`wm-error-${seq}`);
607
456
  this.logEvent('launch-worker', seq, 'fail', { error: msg });
608
457
  return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Worker launch failed: ${msg}` };
609
458
  }
459
+ if (!response.accepted) {
460
+ this.log.warn(`WM rejected seq ${seq}: ${response.rejectReason}`);
461
+ return {
462
+ action: 'launch',
463
+ entity: `seq:${seq}`,
464
+ result: response.rejectReason === 'resource_exhausted' ? 'skip' : 'fail',
465
+ message: `WM rejected: ${response.rejectReason}`,
466
+ };
467
+ }
468
+ const slotName = response.slot;
469
+ this.log.ok(`Step 6: WM launched worker for seq ${seq} (slot=${slotName}, pid=${response.pid ?? 'n/a'})`);
470
+ if (this.notifier) {
471
+ await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
472
+ }
610
473
  // Step 7: Move card to Inprogress
611
474
  try {
612
475
  await this.taskBackend.move(seq, 'Inprogress');
@@ -624,107 +487,72 @@ export class ExecutionEngine {
624
487
  }
625
488
  });
626
489
  this.log.ok(`Step 7: Moved seq ${seq} Todo → Inprogress`);
627
- this.logEvent('launch', seq, 'ok', { worker: slotName, session: sessionName });
490
+ this.logEvent('launch', seq, 'ok', { worker: slotName });
628
491
  return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: `Todo → Inprogress (${slotName})` };
629
492
  }
630
493
  catch (err) {
631
494
  const msg = err instanceof Error ? err.message : String(err);
632
495
  this.log.error(`Step 7 failed (move) for seq ${seq}: ${msg}`);
633
- // Rollback: kill worker, release slot
634
- const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
496
+ // Rollback: cancel worker via WM (handles kill + resource release)
635
497
  try {
636
- if (this.ctx.config.WORKER_TRANSPORT !== 'proc' && this.agentRuntime) {
637
- await this.agentRuntime.stopSession(slotName);
638
- }
639
- else {
640
- await this.supervisor.kill(workerId);
641
- }
498
+ await this.workerManager.cancel({ taskId: String(card.seq), project: this.ctx.projectName, reason: 'anomaly' });
642
499
  }
643
500
  catch { /* best effort */ }
644
- this.supervisor.remove(workerId);
645
- this.resourceLimiter.release();
646
501
  this.releaseSlot(slotName, seq);
647
502
  this.logEvent('launch-move', seq, 'fail', { error: msg });
648
503
  return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Move to Inprogress failed: ${msg}` };
649
504
  }
650
505
  }
651
- // ─── Worker Exit Callback ───────────────────────────────────────
652
506
  /**
653
- * Called by Supervisor when a worker process exits.
654
- * Wires CompletionJudge PostActions to handle completion or failure.
507
+ * @deprecated Phase 1 transitional WM's internal exit callback handles ACP inspection.
508
+ * Kept for edge-case fallback; will be removed when WM fully owns ACP lifecycle.
655
509
  */
656
- async onWorkerExit(workerId, card, slotName, worktree, branch, exitCode) {
657
- const handle = this.supervisor.get(workerId);
658
- await this.handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle || null, 'proc');
659
- }
660
510
  async checkAcpInprogressCard(card, slotName) {
661
- const runtime = this.requireAgentRuntime();
662
511
  const seq = card.seq;
663
- const inspected = await runtime.inspect(slotName);
664
- const session = inspected.sessions[slotName];
665
- const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
666
512
  const state = this.runtimeStore.readState();
667
513
  const slot = state.workers[slotName];
668
514
  if (!slot)
669
515
  return null;
670
- if (session) {
516
+ // Use WorkerManager.inspect() for normalized worker state
517
+ const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
518
+ const snapshot = snapshots[0];
519
+ if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
520
+ // Worker still active — update heartbeat
671
521
  this.runtimeStore.updateState('pipeline-acp-heartbeat', (freshState) => {
672
522
  const freshSlot = freshState.workers[slotName];
673
523
  if (freshSlot) {
674
- this.applyAcpSessionToSlot(freshSlot, session);
675
524
  freshSlot.lastHeartbeat = new Date().toISOString();
676
525
  }
677
526
  });
678
- this.supervisor.registerAcpHandle({
679
- id: workerId,
680
- pid: null,
681
- outputFile: null,
682
- project: this.ctx.projectName,
683
- seq,
684
- slot: slotName,
685
- branch: slot.branch || this.buildBranchName(card),
686
- worktree: slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR),
687
- tool: session.tool,
688
- exitCode: null,
689
- sessionId: session.sessionId,
690
- runId: session.currentRun?.runId || null,
691
- sessionState: session.sessionState,
692
- remoteStatus: session.currentRun?.status || null,
693
- lastEventAt: session.lastSeenAt,
694
- startedAt: slot.claimedAt || new Date().toISOString(),
695
- exitedAt: null,
696
- });
697
- if (session.currentRun?.status && session.currentRun.status !== slot.remoteStatus) {
698
- if (session.currentRun.status === 'waiting_input') {
699
- this.log.info(`seq ${seq}: worker waiting for input — ${session.pendingInput?.prompt || 'input required'}`);
700
- }
701
- else if (session.currentRun.status === 'needs_confirmation') {
702
- this.log.warn(`seq ${seq}: worker needs confirmation — ${session.pendingInput?.prompt || 'confirmation required'}`);
703
- }
704
- else if (session.currentRun.status === 'stalled_submit') {
705
- this.log.warn(`seq ${seq}: worker prompt submission stalled — ${session.stalledReason || 'auto-repair pending'}`);
706
- }
707
- }
708
- if (!session.currentRun || this.isAcpRunActive(session.currentRun.status)) {
709
- return null;
710
- }
711
- const handle = this.supervisor.updateAcpHandle(workerId, {
712
- exitCode: this.acpRunExitCode(session.currentRun.status),
713
- exitedAt: new Date().toISOString(),
714
- sessionId: session.sessionId,
715
- runId: session.currentRun.runId,
716
- sessionState: session.sessionState,
717
- remoteStatus: session.currentRun.status,
718
- lastEventAt: session.lastSeenAt,
719
- }) || this.supervisor.get(workerId) || null;
720
- await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), this.acpRunExitCode(session.currentRun.status), handle, this.ctx.config.WORKER_TRANSPORT === 'pty' ? 'pty' : 'acp');
527
+ return null;
528
+ }
529
+ if (snapshot && snapshot.state === 'waiting_input') {
530
+ this.log.info(`seq ${seq}: worker waiting for input`);
531
+ return null;
532
+ }
533
+ if (snapshot && snapshot.state === 'needs_confirmation') {
534
+ this.log.warn(`seq ${seq}: worker needs confirmation`);
535
+ return null;
536
+ }
537
+ if (snapshot && snapshot.state === 'completed') {
538
+ this.log.ok(`seq ${seq}: ACP/PTY run completed (via WM)`);
721
539
  return {
722
540
  action: 'complete',
723
541
  entity: `seq:${seq}`,
724
- result: session.currentRun.status === 'completed' ? 'ok' : 'fail',
725
- message: `${this.ctx.config.WORKER_TRANSPORT.toUpperCase()} run ${session.currentRun.status}`,
542
+ result: 'ok',
543
+ message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run completed`,
726
544
  };
727
545
  }
546
+ if (snapshot && snapshot.state === 'failed') {
547
+ this.log.info(`seq ${seq}: ACP/PTY run failed (via WM)`);
548
+ return {
549
+ action: 'complete',
550
+ entity: `seq:${seq}`,
551
+ result: 'fail',
552
+ message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run failed`,
553
+ };
554
+ }
555
+ // No snapshot — session lost or already cleaned up
728
556
  this.runtimeStore.updateState('pipeline-acp-lost', (freshState) => {
729
557
  const lostSlot = freshState.workers[slotName];
730
558
  if (lostSlot) {
@@ -734,14 +562,7 @@ export class ExecutionEngine {
734
562
  lostSlot.lastHeartbeat = new Date().toISOString();
735
563
  }
736
564
  });
737
- const handle = this.supervisor.updateAcpHandle(workerId, {
738
- exitCode: 1,
739
- exitedAt: new Date().toISOString(),
740
- sessionState: 'offline',
741
- remoteStatus: 'lost',
742
- lastEventAt: new Date().toISOString(),
743
- }) || this.supervisor.get(workerId) || null;
744
- await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), 1, handle, this.ctx.config.WORKER_TRANSPORT === 'pty' ? 'pty' : 'acp');
565
+ this.log.warn(`seq ${seq}: ACP session lost — no WM snapshot found`);
745
566
  return {
746
567
  action: 'complete',
747
568
  entity: `seq:${seq}`,
@@ -749,85 +570,6 @@ export class ExecutionEngine {
749
570
  message: 'ACP session lost',
750
571
  };
751
572
  }
752
- async handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle, transport) {
753
- const completion = this.completionJudge.judge({
754
- worktree,
755
- branch,
756
- baseBranch: this.ctx.mergeBranch,
757
- outputFile: handle?.outputFile || null,
758
- exitCode,
759
- logsDir: this.ctx.paths.logsDir,
760
- phase: selectWorkerPhase(card.state, this.runtimeStore.readState().leases[card.seq]?.phase),
761
- });
762
- const ctx = {
763
- project: this.ctx.projectName,
764
- seq: card.seq,
765
- slot: slotName,
766
- transport,
767
- branch,
768
- worktree,
769
- baseBranch: this.ctx.mergeBranch,
770
- stateFile: this.ctx.paths.stateFile,
771
- maxWorkers: this.ctx.maxWorkers,
772
- mrMode: this.ctx.mrMode,
773
- gitlabProjectId: resolveGitlabProjectId(this.ctx.config),
774
- gitlabUrl: this.ctx.config.raw.GITLAB_URL || process.env.GITLAB_URL || '',
775
- gitlabToken: this.ctx.config.raw.GITLAB_TOKEN || process.env.GITLAB_TOKEN || '',
776
- qaStateId: this.ctx.config.raw.PLANE_STATE_QA || this.ctx.config.raw.TRELLO_QA_LIST_ID || 'QA',
777
- doneStateId: this.ctx.config.raw.PLANE_STATE_DONE || this.ctx.config.raw.TRELLO_DONE_LIST_ID || '',
778
- maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
779
- logsDir: this.ctx.paths.logsDir,
780
- tool: handle?.tool || this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL,
781
- pmStateObserved: card.state,
782
- };
783
- const state = this.runtimeStore.readState();
784
- const retryCount = this.getRetryCount(state, card.seq);
785
- const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
786
- try {
787
- if (completion.status === 'completed') {
788
- const results = await this.postActions.executeCompletion(ctx, completion, handle?.sessionId || null);
789
- const allOk = results.every(r => r.ok);
790
- this.log.ok(`seq ${card.seq}: PostActions completed (${allOk ? 'all ok' : 'some failures'})`);
791
- }
792
- else {
793
- const retrySessionId = transport === 'proc' ? (handle?.sessionId || null) : null;
794
- await this.postActions.executeFailure(ctx, completion, exitCode, retrySessionId, retryCount, {
795
- onExit: (code) => this.onWorkerExit(workerId, card, slotName, worktree, branch, code),
796
- });
797
- this.log.info(`seq ${card.seq}: Failure handling done`);
798
- }
799
- }
800
- catch (err) {
801
- this.log.error(`seq ${card.seq}: PostActions error: ${err}`);
802
- }
803
- }
804
- requireAgentRuntime() {
805
- if (!this.agentRuntime) {
806
- throw new Error('ACP transport requested but AgentRuntime is not configured');
807
- }
808
- return this.agentRuntime;
809
- }
810
- applyAcpSessionToSlot(slot, session) {
811
- const transport = this.ctx.config.WORKER_TRANSPORT === 'pty' ? 'pty' : 'acp';
812
- slot.mode = transport;
813
- slot.transport = transport;
814
- slot.agent = session.tool;
815
- slot.tmuxSession = session.sessionName;
816
- slot.sessionId = session.sessionId;
817
- slot.runId = session.currentRun?.runId || null;
818
- slot.sessionState = session.sessionState;
819
- slot.remoteStatus = session.currentRun?.status || null;
820
- slot.lastEventAt = session.lastSeenAt;
821
- slot.pid = null;
822
- slot.outputFile = null;
823
- slot.exitCode = null;
824
- }
825
- isAcpRunActive(status) {
826
- return ['submitted', 'running', 'waiting_input', 'needs_confirmation', 'stalled_submit'].includes(status);
827
- }
828
- acpRunExitCode(status) {
829
- return status === 'completed' ? 0 : 1;
830
- }
831
573
  findRuntimeSlotName(state, seq, lease) {
832
574
  if (lease?.slot && state.workers[lease.slot])
833
575
  return lease.slot;