@coralai/sps-cli 0.23.21 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -7
- package/dist/commands/cardDashboard.js +3 -3
- package/dist/commands/cardDashboard.js.map +1 -1
- package/dist/commands/pipelineTick.d.ts.map +1 -1
- package/dist/commands/pipelineTick.js +19 -6
- package/dist/commands/pipelineTick.js.map +1 -1
- package/dist/commands/qaTick.d.ts.map +1 -1
- package/dist/commands/qaTick.js +33 -4
- package/dist/commands/qaTick.js.map +1 -1
- package/dist/commands/status.d.ts.map +1 -1
- package/dist/commands/status.js +2 -5
- package/dist/commands/status.js.map +1 -1
- package/dist/commands/tick.d.ts.map +1 -1
- package/dist/commands/tick.js +56 -35
- package/dist/commands/tick.js.map +1 -1
- package/dist/commands/workerDashboard.d.ts.map +1 -1
- package/dist/commands/workerDashboard.js +9 -9
- package/dist/commands/workerDashboard.js.map +1 -1
- package/dist/commands/workerLaunch.d.ts.map +1 -1
- package/dist/commands/workerLaunch.js +19 -6
- package/dist/commands/workerLaunch.js.map +1 -1
- package/dist/core/acpState.js +1 -1
- package/dist/core/acpState.js.map +1 -1
- package/dist/core/config.d.ts +9 -0
- package/dist/core/config.d.ts.map +1 -1
- package/dist/core/config.js +13 -0
- package/dist/core/config.js.map +1 -1
- package/dist/core/runtimeSnapshot.d.ts +1 -0
- package/dist/core/runtimeSnapshot.d.ts.map +1 -1
- package/dist/core/runtimeSnapshot.js +6 -6
- package/dist/core/runtimeSnapshot.js.map +1 -1
- package/dist/core/runtimeStore.d.ts +23 -1
- package/dist/core/runtimeStore.d.ts.map +1 -1
- package/dist/core/runtimeStore.js +71 -32
- package/dist/core/runtimeStore.js.map +1 -1
- package/dist/core/state.d.ts +33 -0
- package/dist/core/state.d.ts.map +1 -1
- package/dist/core/state.js +6 -0
- package/dist/core/state.js.map +1 -1
- package/dist/core/taskPrompts.d.ts.map +1 -1
- package/dist/core/taskPrompts.js +13 -9
- package/dist/core/taskPrompts.js.map +1 -1
- package/dist/core/workerRuntimeSummary.d.ts +1 -2
- package/dist/core/workerRuntimeSummary.d.ts.map +1 -1
- package/dist/core/workerRuntimeSummary.js +2 -2
- package/dist/core/workerRuntimeSummary.js.map +1 -1
- package/dist/engines/CloseoutEngine.d.ts +3 -6
- package/dist/engines/CloseoutEngine.d.ts.map +1 -1
- package/dist/engines/CloseoutEngine.js +113 -285
- package/dist/engines/CloseoutEngine.js.map +1 -1
- package/dist/engines/EventHandler.d.ts +57 -0
- package/dist/engines/EventHandler.d.ts.map +1 -0
- package/dist/engines/EventHandler.js +210 -0
- package/dist/engines/EventHandler.js.map +1 -0
- package/dist/engines/ExecutionEngine.d.ts +5 -17
- package/dist/engines/ExecutionEngine.d.ts.map +1 -1
- package/dist/engines/ExecutionEngine.js +110 -368
- package/dist/engines/ExecutionEngine.js.map +1 -1
- package/dist/engines/MonitorEngine.d.ts.map +1 -1
- package/dist/engines/MonitorEngine.js +8 -9
- package/dist/engines/MonitorEngine.js.map +1 -1
- package/dist/manager/integration-queue.d.ts +65 -0
- package/dist/manager/integration-queue.d.ts.map +1 -0
- package/dist/manager/integration-queue.js +123 -0
- package/dist/manager/integration-queue.js.map +1 -0
- package/dist/manager/recovery.d.ts.map +1 -1
- package/dist/manager/recovery.js +10 -9
- package/dist/manager/recovery.js.map +1 -1
- package/dist/manager/runtime-coordinator.d.ts +1 -3
- package/dist/manager/runtime-coordinator.d.ts.map +1 -1
- package/dist/manager/runtime-coordinator.js +13 -15
- package/dist/manager/runtime-coordinator.js.map +1 -1
- package/dist/manager/worker-manager-impl.d.ts +81 -0
- package/dist/manager/worker-manager-impl.d.ts.map +1 -0
- package/dist/manager/worker-manager-impl.js +648 -0
- package/dist/manager/worker-manager-impl.js.map +1 -0
- package/dist/manager/worker-manager.d.ts +176 -0
- package/dist/manager/worker-manager.d.ts.map +1 -0
- package/dist/manager/worker-manager.js +12 -0
- package/dist/manager/worker-manager.js.map +1 -0
- package/dist/models/acp.d.ts +4 -0
- package/dist/models/acp.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -1,32 +1,26 @@
|
|
|
1
1
|
import { writeFileSync, readFileSync, mkdirSync, existsSync } from 'node:fs';
|
|
2
2
|
import { resolve } from 'node:path';
|
|
3
3
|
import { RuntimeStore } from '../core/runtimeStore.js';
|
|
4
|
-
import { resolveGitlabProjectId } from '../core/config.js';
|
|
4
|
+
import { resolveGitlabProjectId, resolveWorkflowTransport } from '../core/config.js';
|
|
5
5
|
import { resolveWorktreePath } from '../core/paths.js';
|
|
6
6
|
import { readQueue } from '../core/queue.js';
|
|
7
|
-
import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE,
|
|
7
|
+
import { buildPhasePrompt, DEVELOPMENT_PROMPT_FILE, INTEGRATION_PROMPT_FILE, LEGACY_TASK_PROMPT_FILE, } from '../core/taskPrompts.js';
|
|
8
8
|
import { Logger } from '../core/logger.js';
|
|
9
9
|
const SKIP_LABELS = ['BLOCKED', 'NEEDS-FIX', 'CONFLICT', 'WAITING-CONFIRMATION', 'STALE-RUNTIME'];
|
|
10
10
|
export class ExecutionEngine {
|
|
11
11
|
ctx;
|
|
12
12
|
taskBackend;
|
|
13
13
|
repoBackend;
|
|
14
|
-
|
|
15
|
-
completionJudge;
|
|
16
|
-
postActions;
|
|
17
|
-
resourceLimiter;
|
|
14
|
+
workerManager;
|
|
18
15
|
notifier;
|
|
19
16
|
agentRuntime;
|
|
20
17
|
log;
|
|
21
18
|
runtimeStore;
|
|
22
|
-
constructor(ctx, taskBackend, repoBackend,
|
|
19
|
+
constructor(ctx, taskBackend, repoBackend, workerManager, notifier, agentRuntime) {
|
|
23
20
|
this.ctx = ctx;
|
|
24
21
|
this.taskBackend = taskBackend;
|
|
25
22
|
this.repoBackend = repoBackend;
|
|
26
|
-
this.
|
|
27
|
-
this.completionJudge = completionJudge;
|
|
28
|
-
this.postActions = postActions;
|
|
29
|
-
this.resourceLimiter = resourceLimiter;
|
|
23
|
+
this.workerManager = workerManager;
|
|
30
24
|
this.notifier = notifier;
|
|
31
25
|
this.agentRuntime = agentRuntime;
|
|
32
26
|
this.log = new Logger('pipeline', ctx.projectName, ctx.paths.logsDir);
|
|
@@ -298,15 +292,10 @@ export class ExecutionEngine {
|
|
|
298
292
|
// Slot already released (PostActions handled it via exit callback)
|
|
299
293
|
return null;
|
|
300
294
|
}
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
return this.checkAcpInprogressCard(card, slotName);
|
|
306
|
-
}
|
|
307
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
|
|
308
|
-
const handle = this.supervisor.get(workerId);
|
|
309
|
-
if (handle && handle.exitCode === null) {
|
|
295
|
+
// Use WorkerManager.inspect() to check worker state
|
|
296
|
+
const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
|
|
297
|
+
const snapshot = snapshots[0];
|
|
298
|
+
if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
|
|
310
299
|
// Worker still running — update heartbeat
|
|
311
300
|
try {
|
|
312
301
|
this.runtimeStore.updateState('pipeline-heartbeat', (freshState) => {
|
|
@@ -318,25 +307,28 @@ export class ExecutionEngine {
|
|
|
318
307
|
catch { /* non-fatal */ }
|
|
319
308
|
return null;
|
|
320
309
|
}
|
|
321
|
-
if (
|
|
322
|
-
// Worker
|
|
323
|
-
|
|
324
|
-
const freshState = this.runtimeStore.readState();
|
|
325
|
-
if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
|
|
326
|
-
this.log.ok(`seq ${seq}: Completed (handled by exit callback)`);
|
|
327
|
-
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via exit callback' };
|
|
328
|
-
}
|
|
329
|
-
// PostActions still processing, wait for next tick
|
|
310
|
+
if (snapshot && (snapshot.state === 'waiting_input' || snapshot.state === 'needs_confirmation')) {
|
|
311
|
+
// Worker waiting for input — log and wait
|
|
312
|
+
this.log.info(`seq ${seq}: worker in state ${snapshot.state}`);
|
|
330
313
|
return null;
|
|
331
314
|
}
|
|
332
|
-
|
|
333
|
-
|
|
315
|
+
if (snapshot && snapshot.state === 'completed') {
|
|
316
|
+
// WM exit callback handled completion
|
|
317
|
+
this.log.ok(`seq ${seq}: Completed (handled by WM exit callback)`);
|
|
318
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed via WM exit callback' };
|
|
319
|
+
}
|
|
320
|
+
if (snapshot && snapshot.state === 'failed') {
|
|
321
|
+
// WM exit callback handled failure
|
|
322
|
+
this.log.info(`seq ${seq}: Failed (handled by WM exit callback)`);
|
|
323
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'fail', message: 'Failed via WM exit callback' };
|
|
324
|
+
}
|
|
325
|
+
// No snapshot found — WM already processed and released the slot
|
|
334
326
|
const freshState = this.runtimeStore.readState();
|
|
335
327
|
if (!freshState.workers[slotName] || freshState.workers[slotName].status === 'idle') {
|
|
336
|
-
this.log.ok(`seq ${seq}: Completed (
|
|
337
|
-
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (
|
|
328
|
+
this.log.ok(`seq ${seq}: Completed (WM already processed)`);
|
|
329
|
+
return { action: 'complete', entity: `seq:${seq}`, result: 'ok', message: 'Completed (WM processed)' };
|
|
338
330
|
}
|
|
339
|
-
// Still active in state but
|
|
331
|
+
// Still active in state but no snapshot — MonitorEngine/Recovery handles
|
|
340
332
|
return null;
|
|
341
333
|
}
|
|
342
334
|
// ─── Prepare Phase (Backlog → Todo) ─────────────────────────────
|
|
@@ -402,93 +394,20 @@ export class ExecutionEngine {
|
|
|
402
394
|
const seq = card.seq;
|
|
403
395
|
const branchName = this.buildBranchName(card);
|
|
404
396
|
const worktreePath = resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR);
|
|
397
|
+
const workflowTransport = resolveWorkflowTransport(this.ctx.config);
|
|
405
398
|
if (opts.dryRun) {
|
|
406
399
|
this.log.info(`[dry-run] Would launch seq ${seq}`);
|
|
407
400
|
return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: 'dry-run' };
|
|
408
401
|
}
|
|
409
|
-
// Step
|
|
410
|
-
// Exclude slots that failed launch this tick to prevent repeated failures
|
|
411
|
-
const state = this.runtimeStore.readState();
|
|
412
|
-
const idleSlots = Object.entries(state.workers)
|
|
413
|
-
.filter(([name, w]) => w.status === 'idle' && !failedSlots.has(name));
|
|
414
|
-
if (idleSlots.length === 0) {
|
|
415
|
-
this.log.warn(`No idle worker slot available for seq ${seq}`);
|
|
416
|
-
return { action: 'launch', entity: `seq:${seq}`, result: 'skip', message: 'No idle worker slot' };
|
|
417
|
-
}
|
|
418
|
-
const [slotName] = idleSlots[0];
|
|
419
|
-
const sessionName = `${this.ctx.projectName}-${slotName}`;
|
|
420
|
-
// Claim slot in state.json
|
|
421
|
-
state.workers[slotName] = {
|
|
422
|
-
status: 'active',
|
|
423
|
-
seq: parseInt(seq, 10),
|
|
424
|
-
branch: branchName,
|
|
425
|
-
worktree: worktreePath,
|
|
426
|
-
tmuxSession: sessionName,
|
|
427
|
-
claimedAt: new Date().toISOString(),
|
|
428
|
-
lastHeartbeat: new Date().toISOString(),
|
|
429
|
-
mode: this.ctx.config.WORKER_TRANSPORT === 'proc'
|
|
430
|
-
? this.ctx.config.WORKER_MODE
|
|
431
|
-
: this.ctx.config.WORKER_TRANSPORT,
|
|
432
|
-
transport: this.ctx.config.WORKER_TRANSPORT,
|
|
433
|
-
agent: (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL),
|
|
434
|
-
sessionId: null,
|
|
435
|
-
runId: null,
|
|
436
|
-
sessionState: null,
|
|
437
|
-
remoteStatus: null,
|
|
438
|
-
lastEventAt: null,
|
|
439
|
-
pid: null,
|
|
440
|
-
outputFile: null,
|
|
441
|
-
exitCode: null,
|
|
442
|
-
mergeRetries: 0,
|
|
443
|
-
completedAt: null,
|
|
444
|
-
};
|
|
445
|
-
// Add to active cards
|
|
446
|
-
const conflictDomains = card.labels
|
|
447
|
-
.filter((l) => l.startsWith('conflict:'))
|
|
448
|
-
.map((l) => l.slice('conflict:'.length));
|
|
449
|
-
state.activeCards[seq] = {
|
|
450
|
-
seq: parseInt(seq, 10),
|
|
451
|
-
state: 'Todo',
|
|
452
|
-
worker: slotName,
|
|
453
|
-
mrUrl: null,
|
|
454
|
-
conflictDomains,
|
|
455
|
-
startedAt: new Date().toISOString(),
|
|
456
|
-
};
|
|
457
|
-
state.leases[seq] = {
|
|
458
|
-
seq: parseInt(seq, 10),
|
|
459
|
-
pmStateObserved: card.state,
|
|
460
|
-
phase: 'preparing',
|
|
461
|
-
slot: slotName,
|
|
462
|
-
branch: branchName,
|
|
463
|
-
worktree: worktreePath,
|
|
464
|
-
sessionId: null,
|
|
465
|
-
runId: null,
|
|
466
|
-
claimedAt: state.workers[slotName].claimedAt,
|
|
467
|
-
retryCount: 0,
|
|
468
|
-
lastTransitionAt: new Date().toISOString(),
|
|
469
|
-
};
|
|
470
|
-
try {
|
|
471
|
-
this.runtimeStore.updateState('pipeline-launch', (draft) => {
|
|
472
|
-
draft.workers[slotName] = state.workers[slotName];
|
|
473
|
-
draft.activeCards[seq] = state.activeCards[seq];
|
|
474
|
-
draft.leases[seq] = state.leases[seq];
|
|
475
|
-
});
|
|
476
|
-
this.log.ok(`Step 4: Claimed slot ${slotName} for seq ${seq}`);
|
|
477
|
-
}
|
|
478
|
-
catch (err) {
|
|
479
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
480
|
-
this.log.error(`Step 4 failed (claim) for seq ${seq}: ${msg}`);
|
|
481
|
-
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Claim slot failed: ${msg}` };
|
|
482
|
-
}
|
|
483
|
-
// Also claim in PM backend
|
|
402
|
+
// Step 5: PM claim (kept in Engine — PM backend awareness)
|
|
484
403
|
try {
|
|
485
|
-
await this.taskBackend.claim(seq,
|
|
404
|
+
await this.taskBackend.claim(seq, `pending-wm`);
|
|
486
405
|
}
|
|
487
406
|
catch (err) {
|
|
488
407
|
const msg = err instanceof Error ? err.message : String(err);
|
|
489
408
|
this.log.warn(`PM claim for seq ${seq} failed (non-fatal): ${msg}`);
|
|
490
409
|
}
|
|
491
|
-
// Step
|
|
410
|
+
// Step 5b: Build task context (.sps/development_prompt.txt + .sps/integration_prompt.txt)
|
|
492
411
|
try {
|
|
493
412
|
this.buildTaskContext(card, worktreePath);
|
|
494
413
|
this.log.ok(`Step 5: Task context built for seq ${seq}`);
|
|
@@ -496,117 +415,61 @@ export class ExecutionEngine {
|
|
|
496
415
|
catch (err) {
|
|
497
416
|
const msg = err instanceof Error ? err.message : String(err);
|
|
498
417
|
this.log.error(`Step 5 failed (context) for seq ${seq}: ${msg}`);
|
|
499
|
-
this.releaseSlot(slotName, seq);
|
|
500
418
|
this.logEvent('launch-context', seq, 'fail', { error: msg });
|
|
501
419
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Context build failed: ${msg}` };
|
|
502
420
|
}
|
|
503
|
-
// Step 6: Launch worker via
|
|
421
|
+
// Step 6: Launch worker via WorkerManager.run()
|
|
422
|
+
const logsDir = this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`;
|
|
423
|
+
const promptFile = resolve(worktreePath, '.sps', LEGACY_TASK_PROMPT_FILE);
|
|
424
|
+
let prompt;
|
|
504
425
|
try {
|
|
505
|
-
|
|
506
|
-
// Check global resource limit
|
|
507
|
-
const acquire = this.resourceLimiter.tryAcquireDetailed();
|
|
508
|
-
if (!acquire.acquired) {
|
|
509
|
-
const reason = this.resourceLimiter.formatBlockReason(acquire.stats);
|
|
510
|
-
this.log.warn(`Global resource limit reached, skipping seq ${seq}: ${reason}`);
|
|
511
|
-
// Rollback: release slot
|
|
512
|
-
this.releaseSlot(slotName, seq);
|
|
513
|
-
return {
|
|
514
|
-
action: 'launch',
|
|
515
|
-
entity: `seq:${seq}`,
|
|
516
|
-
result: 'skip',
|
|
517
|
-
message: `Global resource limit reached: ${reason}`,
|
|
518
|
-
};
|
|
519
|
-
}
|
|
520
|
-
await this.resourceLimiter.enforceStagger();
|
|
521
|
-
const prompt = readFileSync(promptFile, 'utf-8').trim();
|
|
522
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
523
|
-
if (this.ctx.config.WORKER_TRANSPORT !== 'proc') {
|
|
524
|
-
const runtime = this.requireAgentRuntime();
|
|
525
|
-
const session = await runtime.startRun(slotName, prompt, (this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL), worktreePath);
|
|
526
|
-
this.runtimeStore.updateState('pipeline-launch-acp', (freshState) => {
|
|
527
|
-
if (freshState.workers[slotName]) {
|
|
528
|
-
this.applyAcpSessionToSlot(freshState.workers[slotName], session);
|
|
529
|
-
if (freshState.leases[seq]) {
|
|
530
|
-
freshState.leases[seq].sessionId = session.sessionId;
|
|
531
|
-
freshState.leases[seq].runId = session.currentRun?.runId || null;
|
|
532
|
-
freshState.leases[seq].phase = session.pendingInput ? 'waiting_confirmation' : 'coding';
|
|
533
|
-
freshState.leases[seq].lastTransitionAt = new Date().toISOString();
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
});
|
|
537
|
-
this.supervisor.registerAcpHandle({
|
|
538
|
-
id: workerId,
|
|
539
|
-
pid: null,
|
|
540
|
-
outputFile: null,
|
|
541
|
-
project: this.ctx.projectName,
|
|
542
|
-
seq: card.seq,
|
|
543
|
-
slot: slotName,
|
|
544
|
-
branch: branchName,
|
|
545
|
-
worktree: worktreePath,
|
|
546
|
-
tool: session.tool,
|
|
547
|
-
exitCode: null,
|
|
548
|
-
sessionId: session.sessionId,
|
|
549
|
-
runId: session.currentRun?.runId || null,
|
|
550
|
-
sessionState: session.sessionState,
|
|
551
|
-
remoteStatus: session.currentRun?.status || null,
|
|
552
|
-
lastEventAt: session.lastSeenAt,
|
|
553
|
-
startedAt: new Date().toISOString(),
|
|
554
|
-
exitedAt: null,
|
|
555
|
-
});
|
|
556
|
-
this.log.ok(`Step 6: ${this.ctx.config.WORKER_TRANSPORT.toUpperCase()} worker launched for seq ${seq} ` +
|
|
557
|
-
`(session=${session.sessionId}, run=${session.currentRun?.runId || 'none'})`);
|
|
558
|
-
}
|
|
559
|
-
else {
|
|
560
|
-
const outputFile = resolve(this.ctx.config.raw.LOGS_DIR || `/tmp/sps-${this.ctx.projectName}`, `${sessionName}-${Date.now()}.jsonl`);
|
|
561
|
-
const workerHandle = this.supervisor.spawn({
|
|
562
|
-
id: workerId,
|
|
563
|
-
project: this.ctx.projectName,
|
|
564
|
-
seq: card.seq,
|
|
565
|
-
slot: slotName,
|
|
566
|
-
worktree: worktreePath,
|
|
567
|
-
branch: branchName,
|
|
568
|
-
prompt,
|
|
569
|
-
outputFile,
|
|
570
|
-
tool: this.ctx.config.WORKER_TOOL,
|
|
571
|
-
onExit: (exitCode) => {
|
|
572
|
-
this.onWorkerExit(workerId, card, slotName, worktreePath, branchName, exitCode);
|
|
573
|
-
},
|
|
574
|
-
});
|
|
575
|
-
// Store process info in state
|
|
576
|
-
this.runtimeStore.updateState('pipeline-launch-print', (freshState) => {
|
|
577
|
-
if (freshState.workers[slotName]) {
|
|
578
|
-
freshState.workers[slotName].mode = 'print';
|
|
579
|
-
freshState.workers[slotName].transport = 'proc';
|
|
580
|
-
freshState.workers[slotName].agent = this.ctx.config.WORKER_TOOL;
|
|
581
|
-
freshState.workers[slotName].pid = workerHandle.pid;
|
|
582
|
-
freshState.workers[slotName].outputFile = workerHandle.outputFile;
|
|
583
|
-
freshState.workers[slotName].sessionId = workerHandle.sessionId || null;
|
|
584
|
-
freshState.workers[slotName].runId = null;
|
|
585
|
-
freshState.workers[slotName].sessionState = null;
|
|
586
|
-
freshState.workers[slotName].remoteStatus = null;
|
|
587
|
-
freshState.workers[slotName].lastEventAt = null;
|
|
588
|
-
freshState.workers[slotName].exitCode = null;
|
|
589
|
-
if (freshState.leases[seq]) {
|
|
590
|
-
freshState.leases[seq].phase = 'coding';
|
|
591
|
-
freshState.leases[seq].lastTransitionAt = new Date().toISOString();
|
|
592
|
-
}
|
|
593
|
-
}
|
|
594
|
-
});
|
|
595
|
-
this.log.ok(`Step 6: Worker launched for seq ${seq} (pid=${workerHandle.pid})`);
|
|
596
|
-
}
|
|
597
|
-
if (this.notifier) {
|
|
598
|
-
await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
|
|
599
|
-
}
|
|
426
|
+
prompt = readFileSync(promptFile, 'utf-8').trim();
|
|
600
427
|
}
|
|
601
428
|
catch (err) {
|
|
602
429
|
const msg = err instanceof Error ? err.message : String(err);
|
|
603
|
-
this.log.error(`
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
430
|
+
this.log.error(`Failed to read prompt file for seq ${seq}: ${msg}`);
|
|
431
|
+
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Prompt file read failed: ${msg}` };
|
|
432
|
+
}
|
|
433
|
+
const runRequest = {
|
|
434
|
+
taskId: String(card.seq),
|
|
435
|
+
cardId: String(card.seq),
|
|
436
|
+
project: this.ctx.projectName,
|
|
437
|
+
phase: 'development',
|
|
438
|
+
prompt,
|
|
439
|
+
cwd: worktreePath,
|
|
440
|
+
branch: branchName,
|
|
441
|
+
targetBranch: this.ctx.mergeBranch,
|
|
442
|
+
tool: this.ctx.config.WORKER_TOOL,
|
|
443
|
+
transport: workflowTransport,
|
|
444
|
+
outputFile: resolve(logsDir, `${this.ctx.projectName}-worker-${card.seq}-${Date.now()}.jsonl`),
|
|
445
|
+
timeoutSec: this.ctx.config.WORKER_LAUNCH_TIMEOUT_S,
|
|
446
|
+
maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
|
|
447
|
+
};
|
|
448
|
+
let response;
|
|
449
|
+
try {
|
|
450
|
+
response = await this.workerManager.run(runRequest);
|
|
451
|
+
}
|
|
452
|
+
catch (err) {
|
|
453
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
454
|
+
this.log.error(`Step 6 failed (WM.run) for seq ${seq}: ${msg}`);
|
|
455
|
+
failedSlots.add(`wm-error-${seq}`);
|
|
607
456
|
this.logEvent('launch-worker', seq, 'fail', { error: msg });
|
|
608
457
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Worker launch failed: ${msg}` };
|
|
609
458
|
}
|
|
459
|
+
if (!response.accepted) {
|
|
460
|
+
this.log.warn(`WM rejected seq ${seq}: ${response.rejectReason}`);
|
|
461
|
+
return {
|
|
462
|
+
action: 'launch',
|
|
463
|
+
entity: `seq:${seq}`,
|
|
464
|
+
result: response.rejectReason === 'resource_exhausted' ? 'skip' : 'fail',
|
|
465
|
+
message: `WM rejected: ${response.rejectReason}`,
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
const slotName = response.slot;
|
|
469
|
+
this.log.ok(`Step 6: WM launched worker for seq ${seq} (slot=${slotName}, pid=${response.pid ?? 'n/a'})`);
|
|
470
|
+
if (this.notifier) {
|
|
471
|
+
await this.notifier.sendSuccess(`[${this.ctx.projectName}] seq:${seq} worker started (${slotName})`).catch(() => { });
|
|
472
|
+
}
|
|
610
473
|
// Step 7: Move card to Inprogress
|
|
611
474
|
try {
|
|
612
475
|
await this.taskBackend.move(seq, 'Inprogress');
|
|
@@ -624,107 +487,72 @@ export class ExecutionEngine {
|
|
|
624
487
|
}
|
|
625
488
|
});
|
|
626
489
|
this.log.ok(`Step 7: Moved seq ${seq} Todo → Inprogress`);
|
|
627
|
-
this.logEvent('launch', seq, 'ok', { worker: slotName
|
|
490
|
+
this.logEvent('launch', seq, 'ok', { worker: slotName });
|
|
628
491
|
return { action: 'launch', entity: `seq:${seq}`, result: 'ok', message: `Todo → Inprogress (${slotName})` };
|
|
629
492
|
}
|
|
630
493
|
catch (err) {
|
|
631
494
|
const msg = err instanceof Error ? err.message : String(err);
|
|
632
495
|
this.log.error(`Step 7 failed (move) for seq ${seq}: ${msg}`);
|
|
633
|
-
// Rollback:
|
|
634
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
496
|
+
// Rollback: cancel worker via WM (handles kill + resource release)
|
|
635
497
|
try {
|
|
636
|
-
|
|
637
|
-
await this.agentRuntime.stopSession(slotName);
|
|
638
|
-
}
|
|
639
|
-
else {
|
|
640
|
-
await this.supervisor.kill(workerId);
|
|
641
|
-
}
|
|
498
|
+
await this.workerManager.cancel({ taskId: String(card.seq), project: this.ctx.projectName, reason: 'anomaly' });
|
|
642
499
|
}
|
|
643
500
|
catch { /* best effort */ }
|
|
644
|
-
this.supervisor.remove(workerId);
|
|
645
|
-
this.resourceLimiter.release();
|
|
646
501
|
this.releaseSlot(slotName, seq);
|
|
647
502
|
this.logEvent('launch-move', seq, 'fail', { error: msg });
|
|
648
503
|
return { action: 'launch', entity: `seq:${seq}`, result: 'fail', message: `Move to Inprogress failed: ${msg}` };
|
|
649
504
|
}
|
|
650
505
|
}
|
|
651
|
-
// ─── Worker Exit Callback ───────────────────────────────────────
|
|
652
506
|
/**
|
|
653
|
-
*
|
|
654
|
-
*
|
|
507
|
+
* @deprecated Phase 1 transitional — WM's internal exit callback handles ACP inspection.
|
|
508
|
+
* Kept for edge-case fallback; will be removed when WM fully owns ACP lifecycle.
|
|
655
509
|
*/
|
|
656
|
-
async onWorkerExit(workerId, card, slotName, worktree, branch, exitCode) {
|
|
657
|
-
const handle = this.supervisor.get(workerId);
|
|
658
|
-
await this.handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle || null, 'proc');
|
|
659
|
-
}
|
|
660
510
|
async checkAcpInprogressCard(card, slotName) {
|
|
661
|
-
const runtime = this.requireAgentRuntime();
|
|
662
511
|
const seq = card.seq;
|
|
663
|
-
const inspected = await runtime.inspect(slotName);
|
|
664
|
-
const session = inspected.sessions[slotName];
|
|
665
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${seq}`;
|
|
666
512
|
const state = this.runtimeStore.readState();
|
|
667
513
|
const slot = state.workers[slotName];
|
|
668
514
|
if (!slot)
|
|
669
515
|
return null;
|
|
670
|
-
|
|
516
|
+
// Use WorkerManager.inspect() for normalized worker state
|
|
517
|
+
const snapshots = this.workerManager.inspect({ project: this.ctx.projectName, taskId: seq });
|
|
518
|
+
const snapshot = snapshots[0];
|
|
519
|
+
if (snapshot && (snapshot.state === 'running' || snapshot.state === 'starting')) {
|
|
520
|
+
// Worker still active — update heartbeat
|
|
671
521
|
this.runtimeStore.updateState('pipeline-acp-heartbeat', (freshState) => {
|
|
672
522
|
const freshSlot = freshState.workers[slotName];
|
|
673
523
|
if (freshSlot) {
|
|
674
|
-
this.applyAcpSessionToSlot(freshSlot, session);
|
|
675
524
|
freshSlot.lastHeartbeat = new Date().toISOString();
|
|
676
525
|
}
|
|
677
526
|
});
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
runId: session.currentRun?.runId || null,
|
|
691
|
-
sessionState: session.sessionState,
|
|
692
|
-
remoteStatus: session.currentRun?.status || null,
|
|
693
|
-
lastEventAt: session.lastSeenAt,
|
|
694
|
-
startedAt: slot.claimedAt || new Date().toISOString(),
|
|
695
|
-
exitedAt: null,
|
|
696
|
-
});
|
|
697
|
-
if (session.currentRun?.status && session.currentRun.status !== slot.remoteStatus) {
|
|
698
|
-
if (session.currentRun.status === 'waiting_input') {
|
|
699
|
-
this.log.info(`seq ${seq}: worker waiting for input — ${session.pendingInput?.prompt || 'input required'}`);
|
|
700
|
-
}
|
|
701
|
-
else if (session.currentRun.status === 'needs_confirmation') {
|
|
702
|
-
this.log.warn(`seq ${seq}: worker needs confirmation — ${session.pendingInput?.prompt || 'confirmation required'}`);
|
|
703
|
-
}
|
|
704
|
-
else if (session.currentRun.status === 'stalled_submit') {
|
|
705
|
-
this.log.warn(`seq ${seq}: worker prompt submission stalled — ${session.stalledReason || 'auto-repair pending'}`);
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
if (!session.currentRun || this.isAcpRunActive(session.currentRun.status)) {
|
|
709
|
-
return null;
|
|
710
|
-
}
|
|
711
|
-
const handle = this.supervisor.updateAcpHandle(workerId, {
|
|
712
|
-
exitCode: this.acpRunExitCode(session.currentRun.status),
|
|
713
|
-
exitedAt: new Date().toISOString(),
|
|
714
|
-
sessionId: session.sessionId,
|
|
715
|
-
runId: session.currentRun.runId,
|
|
716
|
-
sessionState: session.sessionState,
|
|
717
|
-
remoteStatus: session.currentRun.status,
|
|
718
|
-
lastEventAt: session.lastSeenAt,
|
|
719
|
-
}) || this.supervisor.get(workerId) || null;
|
|
720
|
-
await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), this.acpRunExitCode(session.currentRun.status), handle, this.ctx.config.WORKER_TRANSPORT === 'pty' ? 'pty' : 'acp');
|
|
527
|
+
return null;
|
|
528
|
+
}
|
|
529
|
+
if (snapshot && snapshot.state === 'waiting_input') {
|
|
530
|
+
this.log.info(`seq ${seq}: worker waiting for input`);
|
|
531
|
+
return null;
|
|
532
|
+
}
|
|
533
|
+
if (snapshot && snapshot.state === 'needs_confirmation') {
|
|
534
|
+
this.log.warn(`seq ${seq}: worker needs confirmation`);
|
|
535
|
+
return null;
|
|
536
|
+
}
|
|
537
|
+
if (snapshot && snapshot.state === 'completed') {
|
|
538
|
+
this.log.ok(`seq ${seq}: ACP/PTY run completed (via WM)`);
|
|
721
539
|
return {
|
|
722
540
|
action: 'complete',
|
|
723
541
|
entity: `seq:${seq}`,
|
|
724
|
-
result:
|
|
725
|
-
message: `${this.ctx.config.
|
|
542
|
+
result: 'ok',
|
|
543
|
+
message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run completed`,
|
|
726
544
|
};
|
|
727
545
|
}
|
|
546
|
+
if (snapshot && snapshot.state === 'failed') {
|
|
547
|
+
this.log.info(`seq ${seq}: ACP/PTY run failed (via WM)`);
|
|
548
|
+
return {
|
|
549
|
+
action: 'complete',
|
|
550
|
+
entity: `seq:${seq}`,
|
|
551
|
+
result: 'fail',
|
|
552
|
+
message: `${resolveWorkflowTransport(this.ctx.config).toUpperCase()} run failed`,
|
|
553
|
+
};
|
|
554
|
+
}
|
|
555
|
+
// No snapshot — session lost or already cleaned up
|
|
728
556
|
this.runtimeStore.updateState('pipeline-acp-lost', (freshState) => {
|
|
729
557
|
const lostSlot = freshState.workers[slotName];
|
|
730
558
|
if (lostSlot) {
|
|
@@ -734,14 +562,7 @@ export class ExecutionEngine {
|
|
|
734
562
|
lostSlot.lastHeartbeat = new Date().toISOString();
|
|
735
563
|
}
|
|
736
564
|
});
|
|
737
|
-
|
|
738
|
-
exitCode: 1,
|
|
739
|
-
exitedAt: new Date().toISOString(),
|
|
740
|
-
sessionState: 'offline',
|
|
741
|
-
remoteStatus: 'lost',
|
|
742
|
-
lastEventAt: new Date().toISOString(),
|
|
743
|
-
}) || this.supervisor.get(workerId) || null;
|
|
744
|
-
await this.handleWorkerFinalization(card, slotName, slot.worktree || resolveWorktreePath(this.ctx.projectName, seq, this.ctx.config.WORKTREE_DIR), slot.branch || this.buildBranchName(card), 1, handle, this.ctx.config.WORKER_TRANSPORT === 'pty' ? 'pty' : 'acp');
|
|
565
|
+
this.log.warn(`seq ${seq}: ACP session lost — no WM snapshot found`);
|
|
745
566
|
return {
|
|
746
567
|
action: 'complete',
|
|
747
568
|
entity: `seq:${seq}`,
|
|
@@ -749,85 +570,6 @@ export class ExecutionEngine {
|
|
|
749
570
|
message: 'ACP session lost',
|
|
750
571
|
};
|
|
751
572
|
}
|
|
752
|
-
async handleWorkerFinalization(card, slotName, worktree, branch, exitCode, handle, transport) {
|
|
753
|
-
const completion = this.completionJudge.judge({
|
|
754
|
-
worktree,
|
|
755
|
-
branch,
|
|
756
|
-
baseBranch: this.ctx.mergeBranch,
|
|
757
|
-
outputFile: handle?.outputFile || null,
|
|
758
|
-
exitCode,
|
|
759
|
-
logsDir: this.ctx.paths.logsDir,
|
|
760
|
-
phase: selectWorkerPhase(card.state, this.runtimeStore.readState().leases[card.seq]?.phase),
|
|
761
|
-
});
|
|
762
|
-
const ctx = {
|
|
763
|
-
project: this.ctx.projectName,
|
|
764
|
-
seq: card.seq,
|
|
765
|
-
slot: slotName,
|
|
766
|
-
transport,
|
|
767
|
-
branch,
|
|
768
|
-
worktree,
|
|
769
|
-
baseBranch: this.ctx.mergeBranch,
|
|
770
|
-
stateFile: this.ctx.paths.stateFile,
|
|
771
|
-
maxWorkers: this.ctx.maxWorkers,
|
|
772
|
-
mrMode: this.ctx.mrMode,
|
|
773
|
-
gitlabProjectId: resolveGitlabProjectId(this.ctx.config),
|
|
774
|
-
gitlabUrl: this.ctx.config.raw.GITLAB_URL || process.env.GITLAB_URL || '',
|
|
775
|
-
gitlabToken: this.ctx.config.raw.GITLAB_TOKEN || process.env.GITLAB_TOKEN || '',
|
|
776
|
-
qaStateId: this.ctx.config.raw.PLANE_STATE_QA || this.ctx.config.raw.TRELLO_QA_LIST_ID || 'QA',
|
|
777
|
-
doneStateId: this.ctx.config.raw.PLANE_STATE_DONE || this.ctx.config.raw.TRELLO_DONE_LIST_ID || '',
|
|
778
|
-
maxRetries: this.ctx.config.WORKER_RESTART_LIMIT,
|
|
779
|
-
logsDir: this.ctx.paths.logsDir,
|
|
780
|
-
tool: handle?.tool || this.ctx.config.ACP_AGENT || this.ctx.config.WORKER_TOOL,
|
|
781
|
-
pmStateObserved: card.state,
|
|
782
|
-
};
|
|
783
|
-
const state = this.runtimeStore.readState();
|
|
784
|
-
const retryCount = this.getRetryCount(state, card.seq);
|
|
785
|
-
const workerId = `${this.ctx.projectName}:${slotName}:${card.seq}`;
|
|
786
|
-
try {
|
|
787
|
-
if (completion.status === 'completed') {
|
|
788
|
-
const results = await this.postActions.executeCompletion(ctx, completion, handle?.sessionId || null);
|
|
789
|
-
const allOk = results.every(r => r.ok);
|
|
790
|
-
this.log.ok(`seq ${card.seq}: PostActions completed (${allOk ? 'all ok' : 'some failures'})`);
|
|
791
|
-
}
|
|
792
|
-
else {
|
|
793
|
-
const retrySessionId = transport === 'proc' ? (handle?.sessionId || null) : null;
|
|
794
|
-
await this.postActions.executeFailure(ctx, completion, exitCode, retrySessionId, retryCount, {
|
|
795
|
-
onExit: (code) => this.onWorkerExit(workerId, card, slotName, worktree, branch, code),
|
|
796
|
-
});
|
|
797
|
-
this.log.info(`seq ${card.seq}: Failure handling done`);
|
|
798
|
-
}
|
|
799
|
-
}
|
|
800
|
-
catch (err) {
|
|
801
|
-
this.log.error(`seq ${card.seq}: PostActions error: ${err}`);
|
|
802
|
-
}
|
|
803
|
-
}
|
|
804
|
-
requireAgentRuntime() {
|
|
805
|
-
if (!this.agentRuntime) {
|
|
806
|
-
throw new Error('ACP transport requested but AgentRuntime is not configured');
|
|
807
|
-
}
|
|
808
|
-
return this.agentRuntime;
|
|
809
|
-
}
|
|
810
|
-
applyAcpSessionToSlot(slot, session) {
|
|
811
|
-
const transport = this.ctx.config.WORKER_TRANSPORT === 'pty' ? 'pty' : 'acp';
|
|
812
|
-
slot.mode = transport;
|
|
813
|
-
slot.transport = transport;
|
|
814
|
-
slot.agent = session.tool;
|
|
815
|
-
slot.tmuxSession = session.sessionName;
|
|
816
|
-
slot.sessionId = session.sessionId;
|
|
817
|
-
slot.runId = session.currentRun?.runId || null;
|
|
818
|
-
slot.sessionState = session.sessionState;
|
|
819
|
-
slot.remoteStatus = session.currentRun?.status || null;
|
|
820
|
-
slot.lastEventAt = session.lastSeenAt;
|
|
821
|
-
slot.pid = null;
|
|
822
|
-
slot.outputFile = null;
|
|
823
|
-
slot.exitCode = null;
|
|
824
|
-
}
|
|
825
|
-
isAcpRunActive(status) {
|
|
826
|
-
return ['submitted', 'running', 'waiting_input', 'needs_confirmation', 'stalled_submit'].includes(status);
|
|
827
|
-
}
|
|
828
|
-
acpRunExitCode(status) {
|
|
829
|
-
return status === 'completed' ? 0 : 1;
|
|
830
|
-
}
|
|
831
573
|
findRuntimeSlotName(state, seq, lease) {
|
|
832
574
|
if (lease?.slot && state.workers[lease.slot])
|
|
833
575
|
return lease.slot;
|