@agent-relay/sdk 3.1.23 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  2. package/bin/agent-relay-broker-darwin-x64 +0 -0
  3. package/bin/agent-relay-broker-linux-arm64 +0 -0
  4. package/bin/agent-relay-broker-linux-x64 +0 -0
  5. package/dist/__tests__/completion-pipeline.test.d.ts +14 -0
  6. package/dist/__tests__/completion-pipeline.test.d.ts.map +1 -0
  7. package/dist/__tests__/completion-pipeline.test.js +1476 -0
  8. package/dist/__tests__/completion-pipeline.test.js.map +1 -0
  9. package/dist/__tests__/e2e-owner-review.test.js +2 -2
  10. package/dist/__tests__/e2e-owner-review.test.js.map +1 -1
  11. package/dist/__tests__/unit.test.js +8 -0
  12. package/dist/__tests__/unit.test.js.map +1 -1
  13. package/dist/client.js +2 -2
  14. package/dist/client.js.map +1 -1
  15. package/dist/examples/example.js +1 -1
  16. package/dist/examples/example.js.map +1 -1
  17. package/dist/examples/ralph-loop.js +6 -6
  18. package/dist/examples/ralph-loop.js.map +1 -1
  19. package/dist/relay-adapter.js +4 -4
  20. package/dist/relay-adapter.js.map +1 -1
  21. package/dist/relay.d.ts +1 -0
  22. package/dist/relay.d.ts.map +1 -1
  23. package/dist/relay.js +2 -0
  24. package/dist/relay.js.map +1 -1
  25. package/dist/workflows/runner.d.ts +53 -2
  26. package/dist/workflows/runner.d.ts.map +1 -1
  27. package/dist/workflows/runner.js +1277 -94
  28. package/dist/workflows/runner.js.map +1 -1
  29. package/dist/workflows/trajectory.d.ts +6 -2
  30. package/dist/workflows/trajectory.d.ts.map +1 -1
  31. package/dist/workflows/trajectory.js +37 -2
  32. package/dist/workflows/trajectory.js.map +1 -1
  33. package/dist/workflows/types.d.ts +88 -0
  34. package/dist/workflows/types.d.ts.map +1 -1
  35. package/dist/workflows/types.js.map +1 -1
  36. package/dist/workflows/validator.js +4 -4
  37. package/dist/workflows/validator.js.map +1 -1
  38. package/package.json +2 -2
@@ -5,11 +5,12 @@
5
5
  */
6
6
  import { spawn as cpSpawn, execFileSync } from 'node:child_process';
7
7
  import { randomBytes } from 'node:crypto';
8
- import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
8
+ import { createWriteStream, existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, writeFileSync, } from 'node:fs';
9
9
  import { readFile, writeFile } from 'node:fs/promises';
10
10
  import path from 'node:path';
11
11
  import { parse as parseYaml } from 'yaml';
12
12
  import { stripAnsi as stripAnsiFn } from '../pty.js';
13
+ import { resolveSpawnPolicy } from '../spawn-from-env.js';
13
14
  import { loadCustomSteps, resolveAllCustomSteps, validateCustomStepsUsage, CustomStepsParseError, CustomStepResolutionError, } from './custom-steps.js';
14
15
  import { InMemoryWorkflowDb } from './memory-db.js';
15
16
  import { WorkflowTrajectory } from './trajectory.js';
@@ -28,6 +29,14 @@ class SpawnExitError extends Error {
28
29
  this.exitSignal = exitSignal ?? undefined;
29
30
  }
30
31
  }
32
+ class WorkflowCompletionError extends Error {
33
+ completionReason;
34
+ constructor(message, completionReason) {
35
+ super(message);
36
+ this.name = 'WorkflowCompletionError';
37
+ this.completionReason = completionReason;
38
+ }
39
+ }
31
40
  // ── CLI resolution ───────────────────────────────────────────────────────────
32
41
  /**
33
42
  * Resolve `cursor` to the concrete cursor agent binary available in PATH.
@@ -101,6 +110,12 @@ export class WorkflowRunner {
101
110
  lastActivity = new Map();
102
111
  /** Runtime-name lookup for agents participating in supervised owner flows. */
103
112
  supervisedRuntimeAgents = new Map();
113
+ /** Runtime-name lookup for active step agents so channel messages can be attributed to a step. */
114
+ runtimeStepAgents = new Map();
115
+ /** Per-step completion evidence collected across output, channel, files, and tool side-effects. */
116
+ stepCompletionEvidence = new Map();
117
+ /** Expected owner/worker identities per step so coordination signals can be validated by sender. */
118
+ stepSignalParticipants = new Map();
104
119
  /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
105
120
  resolvedPaths = new Map();
106
121
  constructor(options = {}) {
@@ -182,6 +197,441 @@ export class WorkflowRunner {
182
197
  }
183
198
  return resolved;
184
199
  }
200
+ static EVIDENCE_IGNORED_DIRS = new Set([
201
+ '.git',
202
+ '.agent-relay',
203
+ '.trajectories',
204
+ 'node_modules',
205
+ ]);
206
+ getStepCompletionEvidence(stepName) {
207
+ const record = this.stepCompletionEvidence.get(stepName);
208
+ if (!record)
209
+ return undefined;
210
+ const evidence = structuredClone(record.evidence);
211
+ return this.filterStepEvidenceBySignalProvenance(stepName, evidence);
212
+ }
213
+ getOrCreateStepEvidenceRecord(stepName) {
214
+ const existing = this.stepCompletionEvidence.get(stepName);
215
+ if (existing)
216
+ return existing;
217
+ const now = new Date().toISOString();
218
+ const record = {
219
+ evidence: {
220
+ stepName,
221
+ lastUpdatedAt: now,
222
+ roots: [],
223
+ output: {
224
+ stdout: '',
225
+ stderr: '',
226
+ combined: '',
227
+ },
228
+ channelPosts: [],
229
+ files: [],
230
+ process: {},
231
+ toolSideEffects: [],
232
+ coordinationSignals: [],
233
+ },
234
+ baselineSnapshots: new Map(),
235
+ filesCaptured: false,
236
+ };
237
+ this.stepCompletionEvidence.set(stepName, record);
238
+ return record;
239
+ }
240
+ initializeStepSignalParticipants(stepName, ownerSender, workerSender) {
241
+ this.stepSignalParticipants.set(stepName, {
242
+ ownerSenders: new Set(),
243
+ workerSenders: new Set(),
244
+ });
245
+ this.rememberStepSignalSender(stepName, 'owner', ownerSender);
246
+ this.rememberStepSignalSender(stepName, 'worker', workerSender);
247
+ }
248
+ rememberStepSignalSender(stepName, participant, ...senders) {
249
+ const participants = this.stepSignalParticipants.get(stepName) ??
250
+ {
251
+ ownerSenders: new Set(),
252
+ workerSenders: new Set(),
253
+ };
254
+ this.stepSignalParticipants.set(stepName, participants);
255
+ const target = participant === 'owner' ? participants.ownerSenders : participants.workerSenders;
256
+ for (const sender of senders) {
257
+ const trimmed = sender?.trim();
258
+ if (trimmed)
259
+ target.add(trimmed);
260
+ }
261
+ }
262
+ resolveSignalParticipantKind(role) {
263
+ const roleLC = role?.toLowerCase().trim();
264
+ if (!roleLC)
265
+ return undefined;
266
+ if (/\b(owner|lead|supervisor)\b/.test(roleLC))
267
+ return 'owner';
268
+ if (/\b(worker|specialist|engineer|implementer)\b/.test(roleLC))
269
+ return 'worker';
270
+ return undefined;
271
+ }
272
+ isSignalFromExpectedSender(stepName, signal) {
273
+ const expectedParticipant = signal.kind === 'worker_done'
274
+ ? 'worker'
275
+ : signal.kind === 'lead_done'
276
+ ? 'owner'
277
+ : undefined;
278
+ if (!expectedParticipant)
279
+ return true;
280
+ const participants = this.stepSignalParticipants.get(stepName);
281
+ if (!participants)
282
+ return true;
283
+ const allowedSenders = expectedParticipant === 'owner' ? participants.ownerSenders : participants.workerSenders;
284
+ if (allowedSenders.size === 0)
285
+ return true;
286
+ const sender = signal.sender ?? signal.actor;
287
+ if (sender) {
288
+ return allowedSenders.has(sender);
289
+ }
290
+ const observedParticipant = this.resolveSignalParticipantKind(signal.role);
291
+ if (observedParticipant) {
292
+ return observedParticipant === expectedParticipant;
293
+ }
294
+ return signal.source !== 'channel';
295
+ }
296
+ filterStepEvidenceBySignalProvenance(stepName, evidence) {
297
+ evidence.channelPosts = evidence.channelPosts.map((post) => {
298
+ const signals = post.signals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
299
+ return {
300
+ ...post,
301
+ completionRelevant: signals.length > 0,
302
+ signals,
303
+ };
304
+ });
305
+ evidence.coordinationSignals = evidence.coordinationSignals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
306
+ return evidence;
307
+ }
308
+ beginStepEvidence(stepName, roots, startedAt) {
309
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
310
+ const evidence = record.evidence;
311
+ const now = startedAt ?? new Date().toISOString();
312
+ evidence.startedAt ??= now;
313
+ evidence.status = 'running';
314
+ evidence.lastUpdatedAt = now;
315
+ for (const root of this.uniqueEvidenceRoots(roots)) {
316
+ if (!evidence.roots.includes(root)) {
317
+ evidence.roots.push(root);
318
+ }
319
+ if (!record.baselineSnapshots.has(root)) {
320
+ record.baselineSnapshots.set(root, this.captureFileSnapshot(root));
321
+ }
322
+ }
323
+ }
324
+ captureStepTerminalEvidence(stepName, output, process, meta) {
325
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
326
+ const evidence = record.evidence;
327
+ const observedAt = new Date().toISOString();
328
+ const append = (current, next) => {
329
+ if (!next)
330
+ return current;
331
+ return current ? `${current}\n${next}` : next;
332
+ };
333
+ if (output.stdout) {
334
+ evidence.output.stdout = append(evidence.output.stdout, output.stdout);
335
+ for (const signal of this.extractCompletionSignals(output.stdout, 'stdout', observedAt, meta)) {
336
+ evidence.coordinationSignals.push(signal);
337
+ }
338
+ }
339
+ if (output.stderr) {
340
+ evidence.output.stderr = append(evidence.output.stderr, output.stderr);
341
+ for (const signal of this.extractCompletionSignals(output.stderr, 'stderr', observedAt, meta)) {
342
+ evidence.coordinationSignals.push(signal);
343
+ }
344
+ }
345
+ const combinedOutput = output.combined ??
346
+ [output.stdout, output.stderr].filter((value) => Boolean(value)).join('\n');
347
+ if (combinedOutput) {
348
+ evidence.output.combined = append(evidence.output.combined, combinedOutput);
349
+ }
350
+ if (process) {
351
+ if (process.exitCode !== undefined) {
352
+ evidence.process.exitCode = process.exitCode;
353
+ evidence.coordinationSignals.push({
354
+ kind: 'process_exit',
355
+ source: 'process',
356
+ text: `Process exited with code ${process.exitCode}`,
357
+ observedAt,
358
+ value: String(process.exitCode),
359
+ });
360
+ }
361
+ if (process.exitSignal !== undefined) {
362
+ evidence.process.exitSignal = process.exitSignal;
363
+ }
364
+ }
365
+ evidence.lastUpdatedAt = observedAt;
366
+ }
367
+ finalizeStepEvidence(stepName, status, completedAt, completionReason) {
368
+ const record = this.stepCompletionEvidence.get(stepName);
369
+ if (!record)
370
+ return;
371
+ const evidence = record.evidence;
372
+ const observedAt = completedAt ?? new Date().toISOString();
373
+ evidence.status = status;
374
+ if (status !== 'running') {
375
+ evidence.completedAt = observedAt;
376
+ }
377
+ evidence.lastUpdatedAt = observedAt;
378
+ if (!record.filesCaptured) {
379
+ const existing = new Set(evidence.files.map((file) => `${file.kind}:${file.path}`));
380
+ for (const root of evidence.roots) {
381
+ const before = record.baselineSnapshots.get(root) ?? new Map();
382
+ const after = this.captureFileSnapshot(root);
383
+ for (const change of this.diffFileSnapshots(before, after, root, observedAt)) {
384
+ const key = `${change.kind}:${change.path}`;
385
+ if (existing.has(key))
386
+ continue;
387
+ existing.add(key);
388
+ evidence.files.push(change);
389
+ }
390
+ }
391
+ record.filesCaptured = true;
392
+ }
393
+ if (completionReason) {
394
+ const decision = this.buildStepCompletionDecision(stepName, completionReason);
395
+ if (decision) {
396
+ void this.trajectory?.stepCompletionDecision(stepName, decision);
397
+ }
398
+ }
399
+ }
400
+ recordStepToolSideEffect(stepName, effect) {
401
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
402
+ const observedAt = effect.observedAt ?? new Date().toISOString();
403
+ record.evidence.toolSideEffects.push({
404
+ ...effect,
405
+ observedAt,
406
+ });
407
+ record.evidence.lastUpdatedAt = observedAt;
408
+ }
409
+ recordChannelEvidence(text, options = {}) {
410
+ const stepName = options.stepName ??
411
+ this.inferStepNameFromChannelText(text) ??
412
+ (options.actor ? this.runtimeStepAgents.get(options.actor)?.stepName : undefined);
413
+ if (!stepName)
414
+ return;
415
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
416
+ const postedAt = new Date().toISOString();
417
+ const sender = options.sender ?? options.actor;
418
+ const signals = this.extractCompletionSignals(text, 'channel', postedAt, {
419
+ sender,
420
+ actor: options.actor,
421
+ role: options.role,
422
+ });
423
+ const channelPost = {
424
+ stepName,
425
+ text,
426
+ postedAt,
427
+ origin: options.origin ?? 'runner_post',
428
+ completionRelevant: signals.length > 0,
429
+ sender,
430
+ actor: options.actor,
431
+ role: options.role,
432
+ target: options.target,
433
+ signals,
434
+ };
435
+ record.evidence.channelPosts.push(channelPost);
436
+ record.evidence.coordinationSignals.push(...signals);
437
+ record.evidence.lastUpdatedAt = postedAt;
438
+ }
439
+ extractCompletionSignals(text, source, observedAt, meta) {
440
+ const signals = [];
441
+ const seen = new Set();
442
+ const add = (kind, signalText, value) => {
443
+ const trimmed = signalText.trim().slice(0, 280);
444
+ if (!trimmed)
445
+ return;
446
+ const key = `${kind}:${trimmed}:${value ?? ''}`;
447
+ if (seen.has(key))
448
+ return;
449
+ seen.add(key);
450
+ signals.push({
451
+ kind,
452
+ source,
453
+ text: trimmed,
454
+ observedAt,
455
+ sender: meta?.sender,
456
+ actor: meta?.actor,
457
+ role: meta?.role,
458
+ value,
459
+ });
460
+ };
461
+ for (const match of text.matchAll(/\bWORKER_DONE\b(?::\s*([^\n]+))?/gi)) {
462
+ add('worker_done', match[0], match[1]?.trim());
463
+ }
464
+ for (const match of text.matchAll(/\bLEAD_DONE\b(?::\s*([^\n]+))?/gi)) {
465
+ add('lead_done', match[0], match[1]?.trim());
466
+ }
467
+ for (const match of text.matchAll(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/g)) {
468
+ add('step_complete', match[0], match[1]);
469
+ }
470
+ for (const match of text.matchAll(/\bOWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi)) {
471
+ add('owner_decision', match[0], match[1].toUpperCase());
472
+ }
473
+ for (const match of text.matchAll(/\bREVIEW_DECISION:\s*(APPROVE|REJECT)\b/gi)) {
474
+ add('review_decision', match[0], match[1].toUpperCase());
475
+ }
476
+ if (/\bverification gate observed\b|\bverification passed\b/i.test(text)) {
477
+ add('verification_passed', this.firstMeaningfulLine(text) ?? text);
478
+ }
479
+ if (/\bverification failed\b/i.test(text)) {
480
+ add('verification_failed', this.firstMeaningfulLine(text) ?? text);
481
+ }
482
+ if (/\b(summary|handoff|ready for review|ready for handoff|task complete|work complete|completed work|finished work)\b/i.test(text)) {
483
+ add('task_summary', this.firstMeaningfulLine(text) ?? text);
484
+ }
485
+ return signals;
486
+ }
487
+ inferStepNameFromChannelText(text) {
488
+ const bracketMatch = text.match(/^\*\*\[([^\]]+)\]/);
489
+ if (bracketMatch?.[1])
490
+ return bracketMatch[1];
491
+ const markerMatch = text.match(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/);
492
+ if (markerMatch?.[1])
493
+ return markerMatch[1];
494
+ return undefined;
495
+ }
496
+ uniqueEvidenceRoots(roots) {
497
+ return [...new Set(roots.filter((root) => Boolean(root)).map((root) => path.resolve(root)))];
498
+ }
499
+ captureFileSnapshot(root) {
500
+ const snapshot = new Map();
501
+ if (!existsSync(root))
502
+ return snapshot;
503
+ const visit = (currentPath) => {
504
+ let entries;
505
+ try {
506
+ entries = readdirSync(currentPath, { withFileTypes: true });
507
+ }
508
+ catch {
509
+ return;
510
+ }
511
+ for (const entry of entries) {
512
+ if (entry.isDirectory() && WorkflowRunner.EVIDENCE_IGNORED_DIRS.has(entry.name)) {
513
+ continue;
514
+ }
515
+ const fullPath = path.join(currentPath, entry.name);
516
+ if (entry.isDirectory()) {
517
+ visit(fullPath);
518
+ continue;
519
+ }
520
+ try {
521
+ const stats = statSync(fullPath);
522
+ if (!stats.isFile())
523
+ continue;
524
+ snapshot.set(fullPath, { mtimeMs: stats.mtimeMs, size: stats.size });
525
+ }
526
+ catch {
527
+ // Best-effort evidence collection only.
528
+ }
529
+ }
530
+ };
531
+ try {
532
+ const stats = statSync(root);
533
+ if (stats.isFile()) {
534
+ snapshot.set(root, { mtimeMs: stats.mtimeMs, size: stats.size });
535
+ return snapshot;
536
+ }
537
+ }
538
+ catch {
539
+ return snapshot;
540
+ }
541
+ visit(root);
542
+ return snapshot;
543
+ }
544
+ diffFileSnapshots(before, after, root, observedAt) {
545
+ const allPaths = new Set([...before.keys(), ...after.keys()]);
546
+ const changes = [];
547
+ for (const filePath of allPaths) {
548
+ const prior = before.get(filePath);
549
+ const next = after.get(filePath);
550
+ let kind;
551
+ if (!prior && next) {
552
+ kind = 'created';
553
+ }
554
+ else if (prior && !next) {
555
+ kind = 'deleted';
556
+ }
557
+ else if (prior && next && (prior.mtimeMs !== next.mtimeMs || prior.size !== next.size)) {
558
+ kind = 'modified';
559
+ }
560
+ if (!kind)
561
+ continue;
562
+ changes.push({
563
+ path: this.normalizeEvidencePath(filePath),
564
+ kind,
565
+ observedAt,
566
+ root,
567
+ });
568
+ }
569
+ return changes.sort((a, b) => a.path.localeCompare(b.path));
570
+ }
571
+ normalizeEvidencePath(filePath) {
572
+ const relative = path.relative(this.cwd, filePath);
573
+ if (!relative || relative === '')
574
+ return path.basename(filePath);
575
+ return relative.startsWith('..') ? filePath : relative;
576
+ }
577
+ buildStepCompletionDecision(stepName, completionReason) {
578
+ let reason;
579
+ let mode;
580
+ switch (completionReason) {
581
+ case 'completed_verified':
582
+ mode = 'verification';
583
+ reason = 'Verification passed';
584
+ break;
585
+ case 'completed_by_evidence':
586
+ mode = 'evidence';
587
+ reason = 'Completion inferred from collected evidence';
588
+ break;
589
+ case 'completed_by_owner_decision': {
590
+ const evidence = this.getStepCompletionEvidence(stepName);
591
+ const markerObserved = evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete');
592
+ mode = markerObserved ? 'marker' : 'owner_decision';
593
+ reason = markerObserved ? 'Legacy STEP_COMPLETE marker observed' : 'Owner approved completion';
594
+ break;
595
+ }
596
+ default:
597
+ return undefined;
598
+ }
599
+ return {
600
+ mode,
601
+ reason,
602
+ evidence: this.buildTrajectoryCompletionEvidence(stepName),
603
+ };
604
+ }
605
+ buildTrajectoryCompletionEvidence(stepName) {
606
+ const evidence = this.getStepCompletionEvidence(stepName);
607
+ if (!evidence)
608
+ return undefined;
609
+ const signals = evidence.coordinationSignals
610
+ .slice(-6)
611
+ .map((signal) => signal.value ?? signal.text);
612
+ const channelPosts = evidence.channelPosts
613
+ .filter((post) => post.completionRelevant)
614
+ .slice(-3)
615
+ .map((post) => post.text.slice(0, 160));
616
+ const files = evidence.files.slice(0, 6).map((file) => `${file.kind}:${file.path}`);
617
+ const summaryParts = [];
618
+ if (signals.length > 0)
619
+ summaryParts.push(`${signals.length} signal(s)`);
620
+ if (channelPosts.length > 0)
621
+ summaryParts.push(`${channelPosts.length} relevant channel post(s)`);
622
+ if (files.length > 0)
623
+ summaryParts.push(`${files.length} file change(s)`);
624
+ if (evidence.process.exitCode !== undefined) {
625
+ summaryParts.push(`exit=${evidence.process.exitCode}`);
626
+ }
627
+ return {
628
+ summary: summaryParts.length > 0 ? summaryParts.join(', ') : undefined,
629
+ signals: signals.length > 0 ? signals : undefined,
630
+ channelPosts: channelPosts.length > 0 ? channelPosts : undefined,
631
+ files: files.length > 0 ? files : undefined,
632
+ exitCode: evidence.process.exitCode,
633
+ };
634
+ }
185
635
  // ── Progress logging ────────────────────────────────────────────────────
186
636
  /** Log a progress message with elapsed time since run start. */
187
637
  log(msg) {
@@ -985,9 +1435,11 @@ export class WorkflowRunner {
985
1435
  if (state.row.status === 'failed') {
986
1436
  state.row.status = 'pending';
987
1437
  state.row.error = undefined;
1438
+ state.row.completionReason = undefined;
988
1439
  await this.db.updateStep(state.row.id, {
989
1440
  status: 'pending',
990
1441
  error: undefined,
1442
+ completionReason: undefined,
991
1443
  updatedAt: new Date().toISOString(),
992
1444
  });
993
1445
  }
@@ -1007,6 +1459,8 @@ export class WorkflowRunner {
1007
1459
  this.currentConfig = config;
1008
1460
  this.currentRunId = runId;
1009
1461
  this.runStartTime = Date.now();
1462
+ this.runtimeStepAgents.clear();
1463
+ this.stepCompletionEvidence.clear();
1010
1464
  this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
1011
1465
  // Initialize trajectory recording
1012
1466
  this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
@@ -1132,8 +1586,24 @@ export class WorkflowRunner {
1132
1586
  const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
1133
1587
  const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
1134
1588
  this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
1589
+ if (this.channel && (msg.to === this.channel || msg.to === `#${this.channel}`)) {
1590
+ const runtimeAgent = this.runtimeStepAgents.get(msg.from);
1591
+ this.recordChannelEvidence(msg.text, {
1592
+ sender: runtimeAgent?.logicalName ?? msg.from,
1593
+ actor: msg.from,
1594
+ role: runtimeAgent?.role,
1595
+ target: msg.to,
1596
+ origin: 'relay_message',
1597
+ stepName: runtimeAgent?.stepName,
1598
+ });
1599
+ }
1135
1600
  const supervision = this.supervisedRuntimeAgents.get(msg.from);
1136
1601
  if (supervision?.role === 'owner') {
1602
+ this.recordStepToolSideEffect(supervision.stepName, {
1603
+ type: 'owner_monitoring',
1604
+ detail: `Owner messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
1605
+ raw: { to: msg.to, text: msg.text },
1606
+ });
1137
1607
  void this.trajectory?.ownerMonitoringEvent(supervision.stepName, supervision.logicalName, `Messaged ${msg.to}: ${msg.text.slice(0, 120)}`, { to: msg.to, text: msg.text });
1138
1608
  }
1139
1609
  };
@@ -1288,6 +1758,7 @@ export class WorkflowRunner {
1288
1758
  updatedAt: new Date().toISOString(),
1289
1759
  });
1290
1760
  this.emit({ type: 'step:failed', runId, stepName, error: 'Cancelled' });
1761
+ this.finalizeStepEvidence(stepName, 'failed');
1291
1762
  }
1292
1763
  }
1293
1764
  this.emit({ type: 'run:cancelled', runId });
@@ -1328,6 +1799,7 @@ export class WorkflowRunner {
1328
1799
  this.lastIdleLog.clear();
1329
1800
  this.lastActivity.clear();
1330
1801
  this.supervisedRuntimeAgents.clear();
1802
+ this.runtimeStepAgents.clear();
1331
1803
  this.log('Shutting down broker...');
1332
1804
  await this.relay?.shutdown();
1333
1805
  this.relay = undefined;
@@ -1435,6 +1907,9 @@ export class WorkflowRunner {
1435
1907
  attempts: (state?.row.retryCount ?? 0) + 1,
1436
1908
  output: state?.row.output,
1437
1909
  verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
1910
+ completionMode: state?.row.completionReason
1911
+ ? this.buildStepCompletionDecision(step.name, state.row.completionReason)?.mode
1912
+ : undefined,
1438
1913
  });
1439
1914
  }
1440
1915
  }
@@ -1595,11 +2070,21 @@ export class WorkflowRunner {
1595
2070
  const maxRetries = step.retries ?? errorHandling?.maxRetries ?? 0;
1596
2071
  const retryDelay = errorHandling?.retryDelayMs ?? 1000;
1597
2072
  let lastError;
2073
+ let lastCompletionReason;
2074
+ let lastExitCode;
2075
+ let lastExitSignal;
1598
2076
  for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
1599
2077
  this.checkAborted();
2078
+ lastExitCode = undefined;
2079
+ lastExitSignal = undefined;
1600
2080
  if (attempt > 0) {
1601
2081
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
1602
2082
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
2083
+ this.recordStepToolSideEffect(step.name, {
2084
+ type: 'retry',
2085
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
2086
+ raw: { attempt, maxRetries },
2087
+ });
1603
2088
  state.row.retryCount = attempt;
1604
2089
  await this.db.updateStep(state.row.id, {
1605
2090
  retryCount: attempt,
@@ -1609,9 +2094,13 @@ export class WorkflowRunner {
1609
2094
  }
1610
2095
  // Mark step as running
1611
2096
  state.row.status = 'running';
2097
+ state.row.error = undefined;
2098
+ state.row.completionReason = undefined;
1612
2099
  state.row.startedAt = new Date().toISOString();
1613
2100
  await this.db.updateStep(state.row.id, {
1614
2101
  status: 'running',
2102
+ error: undefined,
2103
+ completionReason: undefined,
1615
2104
  startedAt: state.row.startedAt,
1616
2105
  updatedAt: new Date().toISOString(),
1617
2106
  });
@@ -1629,32 +2118,40 @@ export class WorkflowRunner {
1629
2118
  });
1630
2119
  // Resolve step workdir (named path reference) for deterministic steps
1631
2120
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2121
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
1632
2122
  try {
1633
2123
  // Delegate to executor if present
1634
2124
  if (this.executor?.executeDeterministicStep) {
1635
2125
  const result = await this.executor.executeDeterministicStep(step, resolvedCommand, stepCwd);
2126
+ lastExitCode = result.exitCode;
1636
2127
  const failOnError = step.failOnError !== false;
1637
2128
  if (failOnError && result.exitCode !== 0) {
1638
2129
  throw new Error(`Command failed with exit code ${result.exitCode}: ${result.output.slice(0, 500)}`);
1639
2130
  }
1640
2131
  const output = step.captureOutput !== false ? result.output : `Command completed (exit code ${result.exitCode})`;
1641
- if (step.verification) {
1642
- this.runVerification(step.verification, output, step.name);
1643
- }
2132
+ this.captureStepTerminalEvidence(step.name, { stdout: result.output, combined: result.output }, { exitCode: result.exitCode });
2133
+ const verificationResult = step.verification
2134
+ ? this.runVerification(step.verification, output, step.name)
2135
+ : undefined;
1644
2136
  // Mark completed
1645
2137
  state.row.status = 'completed';
1646
2138
  state.row.output = output;
2139
+ state.row.completionReason = verificationResult?.completionReason;
1647
2140
  state.row.completedAt = new Date().toISOString();
1648
2141
  await this.db.updateStep(state.row.id, {
1649
2142
  status: 'completed',
1650
2143
  output,
2144
+ completionReason: verificationResult?.completionReason,
1651
2145
  completedAt: state.row.completedAt,
1652
2146
  updatedAt: new Date().toISOString(),
1653
2147
  });
1654
2148
  await this.persistStepOutput(runId, step.name, output);
1655
2149
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2150
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
1656
2151
  return;
1657
2152
  }
2153
+ let commandStdout = '';
2154
+ let commandStderr = '';
1658
2155
  const output = await new Promise((resolve, reject) => {
1659
2156
  const child = cpSpawn('sh', ['-c', resolvedCommand], {
1660
2157
  stdio: 'pipe',
@@ -1689,7 +2186,7 @@ export class WorkflowRunner {
1689
2186
  child.stderr?.on('data', (chunk) => {
1690
2187
  stderrChunks.push(chunk.toString());
1691
2188
  });
1692
- child.on('close', (code) => {
2189
+ child.on('close', (code, signal) => {
1693
2190
  if (timer)
1694
2191
  clearTimeout(timer);
1695
2192
  if (abortHandler && abortSignal) {
@@ -1705,6 +2202,10 @@ export class WorkflowRunner {
1705
2202
  }
1706
2203
  const stdout = stdoutChunks.join('');
1707
2204
  const stderr = stderrChunks.join('');
2205
+ commandStdout = stdout;
2206
+ commandStderr = stderr;
2207
+ lastExitCode = code ?? undefined;
2208
+ lastExitSignal = signal ?? undefined;
1708
2209
  // Check exit code unless failOnError is explicitly false
1709
2210
  const failOnError = step.failOnError !== false;
1710
2211
  if (failOnError && code !== 0 && code !== null) {
@@ -1722,31 +2223,41 @@ export class WorkflowRunner {
1722
2223
  reject(new Error(`Failed to execute command: ${err.message}`));
1723
2224
  });
1724
2225
  });
1725
- if (step.verification) {
1726
- this.runVerification(step.verification, output, step.name);
1727
- }
2226
+ this.captureStepTerminalEvidence(step.name, {
2227
+ stdout: commandStdout || output,
2228
+ stderr: commandStderr,
2229
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
2230
+ }, { exitCode: lastExitCode, exitSignal: lastExitSignal });
2231
+ const verificationResult = step.verification
2232
+ ? this.runVerification(step.verification, output, step.name)
2233
+ : undefined;
1728
2234
  // Mark completed
1729
2235
  state.row.status = 'completed';
1730
2236
  state.row.output = output;
2237
+ state.row.completionReason = verificationResult?.completionReason;
1731
2238
  state.row.completedAt = new Date().toISOString();
1732
2239
  await this.db.updateStep(state.row.id, {
1733
2240
  status: 'completed',
1734
2241
  output,
2242
+ completionReason: verificationResult?.completionReason,
1735
2243
  completedAt: state.row.completedAt,
1736
2244
  updatedAt: new Date().toISOString(),
1737
2245
  });
1738
2246
  // Persist step output
1739
2247
  await this.persistStepOutput(runId, step.name, output);
1740
2248
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2249
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
1741
2250
  return;
1742
2251
  }
1743
2252
  catch (err) {
1744
2253
  lastError = err instanceof Error ? err.message : String(err);
2254
+ lastCompletionReason =
2255
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
1745
2256
  }
1746
2257
  }
1747
2258
  const errorMsg = lastError ?? 'Unknown error';
1748
2259
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
1749
- await this.markStepFailed(state, errorMsg, runId);
2260
+ await this.markStepFailed(state, errorMsg, runId, { exitCode: lastExitCode, exitSignal: lastExitSignal }, lastCompletionReason);
1750
2261
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1751
2262
  }
1752
2263
  /**
@@ -1758,12 +2269,18 @@ export class WorkflowRunner {
1758
2269
  const state = stepStates.get(step.name);
1759
2270
  if (!state)
1760
2271
  throw new Error(`Step state not found: ${step.name}`);
2272
+ let lastExitCode;
2273
+ let lastExitSignal;
1761
2274
  this.checkAborted();
1762
2275
  // Mark step as running
1763
2276
  state.row.status = 'running';
2277
+ state.row.error = undefined;
2278
+ state.row.completionReason = undefined;
1764
2279
  state.row.startedAt = new Date().toISOString();
1765
2280
  await this.db.updateStep(state.row.id, {
1766
2281
  status: 'running',
2282
+ error: undefined,
2283
+ completionReason: undefined,
1767
2284
  startedAt: state.row.startedAt,
1768
2285
  updatedAt: new Date().toISOString(),
1769
2286
  });
@@ -1781,6 +2298,7 @@ export class WorkflowRunner {
1781
2298
  const createBranch = step.createBranch !== false;
1782
2299
  // Resolve workdir for worktree steps (same as deterministic/agent steps)
1783
2300
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2301
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
1784
2302
  if (!branch) {
1785
2303
  const errorMsg = 'Worktree step missing required "branch" field';
1786
2304
  await this.markStepFailed(state, errorMsg, runId);
@@ -1821,6 +2339,10 @@ export class WorkflowRunner {
1821
2339
  await this.markStepFailed(state, errorMsg, runId);
1822
2340
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1823
2341
  }
2342
+ let commandStdout = '';
2343
+ let commandStderr = '';
2344
+ let commandExitCode;
2345
+ let commandExitSignal;
1824
2346
  const output = await new Promise((resolve, reject) => {
1825
2347
  const child = cpSpawn('sh', ['-c', worktreeCmd], {
1826
2348
  stdio: 'pipe',
@@ -1855,7 +2377,7 @@ export class WorkflowRunner {
1855
2377
  child.stderr?.on('data', (chunk) => {
1856
2378
  stderrChunks.push(chunk.toString());
1857
2379
  });
1858
- child.on('close', (code) => {
2380
+ child.on('close', (code, signal) => {
1859
2381
  if (timer)
1860
2382
  clearTimeout(timer);
1861
2383
  if (abortHandler && abortSignal) {
@@ -1869,7 +2391,13 @@ export class WorkflowRunner {
1869
2391
  reject(new Error(`Step "${step.name}" timed out (no step timeout set, check global swarm.timeoutMs)`));
1870
2392
  return;
1871
2393
  }
2394
+ commandStdout = stdoutChunks.join('');
1872
2395
  const stderr = stderrChunks.join('');
2396
+ commandStderr = stderr;
2397
+ commandExitCode = code ?? undefined;
2398
+ commandExitSignal = signal ?? undefined;
2399
+ lastExitCode = commandExitCode;
2400
+ lastExitSignal = commandExitSignal;
1873
2401
  if (code !== 0 && code !== null) {
1874
2402
  reject(new Error(`git worktree add failed with exit code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
1875
2403
  return;
@@ -1886,6 +2414,11 @@ export class WorkflowRunner {
1886
2414
  reject(new Error(`Failed to execute git worktree command: ${err.message}`));
1887
2415
  });
1888
2416
  });
2417
+ this.captureStepTerminalEvidence(step.name, {
2418
+ stdout: commandStdout || output,
2419
+ stderr: commandStderr,
2420
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
2421
+ }, { exitCode: commandExitCode, exitSignal: commandExitSignal });
1889
2422
  // Mark completed
1890
2423
  state.row.status = 'completed';
1891
2424
  state.row.output = output;
@@ -1900,11 +2433,20 @@ export class WorkflowRunner {
1900
2433
  await this.persistStepOutput(runId, step.name, output);
1901
2434
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
1902
2435
  this.postToChannel(`**[${step.name}]** Worktree created at: ${output}\n Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`);
2436
+ this.recordStepToolSideEffect(step.name, {
2437
+ type: 'worktree_created',
2438
+ detail: `Worktree created at ${output}`,
2439
+ raw: { branch, createdBranch: !branchExists && createBranch },
2440
+ });
2441
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt);
1903
2442
  }
1904
2443
  catch (err) {
1905
2444
  const errorMsg = err instanceof Error ? err.message : String(err);
1906
2445
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
1907
- await this.markStepFailed(state, errorMsg, runId);
2446
+ await this.markStepFailed(state, errorMsg, runId, {
2447
+ exitCode: lastExitCode,
2448
+ exitSignal: lastExitSignal,
2449
+ });
1908
2450
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1909
2451
  }
1910
2452
  }
@@ -1925,8 +2467,9 @@ export class WorkflowRunner {
1925
2467
  }
1926
2468
  const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
1927
2469
  const usesOwnerFlow = specialistDef.interactive !== false;
1928
- const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
1929
- const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
2470
+ const usesAutoHardening = usesOwnerFlow && !this.isExplicitInteractiveWorker(specialistDef);
2471
+ const ownerDef = usesAutoHardening ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
2472
+ const reviewDef = usesAutoHardening ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
1930
2473
  const supervised = {
1931
2474
  specialist: specialistDef,
1932
2475
  owner: ownerDef,
@@ -1946,6 +2489,12 @@ export class WorkflowRunner {
1946
2489
  let lastError;
1947
2490
  let lastExitCode;
1948
2491
  let lastExitSignal;
2492
+ let lastCompletionReason;
2493
+ // OWNER_DECISION: INCOMPLETE_RETRY is enforced here at the attempt-loop level so every
2494
+ // interactive execution path shares the same contract:
2495
+ // - retries remaining => throw back into the loop and retry
2496
+ // - maxRetries = 0 => fail immediately after the first retry request
2497
+ // - retry budget exhausted => fail with retry_requested_by_owner, never "completed"
1949
2498
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
1950
2499
  this.checkAborted();
1951
2500
  // Reset per-attempt exit info so stale values don't leak across retries
@@ -1954,6 +2503,11 @@ export class WorkflowRunner {
1954
2503
  if (attempt > 0) {
1955
2504
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
1956
2505
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
2506
+ this.recordStepToolSideEffect(step.name, {
2507
+ type: 'retry',
2508
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
2509
+ raw: { attempt, maxRetries },
2510
+ });
1957
2511
  state.row.retryCount = attempt;
1958
2512
  await this.db.updateStep(state.row.id, {
1959
2513
  retryCount: attempt,
@@ -1965,14 +2519,19 @@ export class WorkflowRunner {
1965
2519
  try {
1966
2520
  // Mark step as running
1967
2521
  state.row.status = 'running';
2522
+ state.row.error = undefined;
2523
+ state.row.completionReason = undefined;
1968
2524
  state.row.startedAt = new Date().toISOString();
1969
2525
  await this.db.updateStep(state.row.id, {
1970
2526
  status: 'running',
2527
+ error: undefined,
2528
+ completionReason: undefined,
1971
2529
  startedAt: state.row.startedAt,
1972
2530
  updatedAt: new Date().toISOString(),
1973
2531
  });
1974
2532
  this.emit({ type: 'step:started', runId, stepName: step.name });
1975
- this.postToChannel(`**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
2533
+ this.log(`[${step.name}] Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
2534
+ this.initializeStepSignalParticipants(step.name, ownerDef.name, specialistDef.name);
1976
2535
  await this.trajectory?.stepStarted(step, ownerDef.name, {
1977
2536
  role: usesDedicatedOwner ? 'owner' : 'specialist',
1978
2537
  owner: ownerDef.name,
@@ -2021,37 +2580,104 @@ export class WorkflowRunner {
2021
2580
  };
2022
2581
  const effectiveSpecialist = applyStepWorkdir(specialistDef);
2023
2582
  const effectiveOwner = applyStepWorkdir(ownerDef);
2583
+ const effectiveReviewer = reviewDef ? applyStepWorkdir(reviewDef) : undefined;
2584
+ this.beginStepEvidence(step.name, [
2585
+ this.resolveAgentCwd(effectiveSpecialist),
2586
+ this.resolveAgentCwd(effectiveOwner),
2587
+ effectiveReviewer ? this.resolveAgentCwd(effectiveReviewer) : undefined,
2588
+ ], state.row.startedAt);
2024
2589
  let specialistOutput;
2025
2590
  let ownerOutput;
2026
2591
  let ownerElapsed;
2592
+ let completionReason;
2027
2593
  if (usesDedicatedOwner) {
2028
2594
  const result = await this.executeSupervisedAgentStep(step, { specialist: effectiveSpecialist, owner: effectiveOwner, reviewer: reviewDef }, resolvedTask, timeoutMs);
2029
2595
  specialistOutput = result.specialistOutput;
2030
2596
  ownerOutput = result.ownerOutput;
2031
2597
  ownerElapsed = result.ownerElapsed;
2598
+ completionReason = result.completionReason;
2032
2599
  }
2033
2600
  else {
2034
2601
  const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
2602
+ const explicitInteractiveWorker = this.isExplicitInteractiveWorker(effectiveOwner);
2603
+ let explicitWorkerHandle;
2604
+ let explicitWorkerCompleted = false;
2605
+ let explicitWorkerOutput = '';
2035
2606
  this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
2036
2607
  const resolvedStep = { ...step, task: ownerTask };
2037
2608
  const ownerStartTime = Date.now();
2038
2609
  const spawnResult = this.executor
2039
2610
  ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
2040
- : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
2611
+ : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, {
2612
+ evidenceStepName: step.name,
2613
+ evidenceRole: usesOwnerFlow ? 'owner' : 'specialist',
2614
+ logicalName: effectiveOwner.name,
2615
+ onSpawned: explicitInteractiveWorker
2616
+ ? ({ agent }) => {
2617
+ explicitWorkerHandle = agent;
2618
+ }
2619
+ : undefined,
2620
+ onChunk: explicitInteractiveWorker
2621
+ ? ({ chunk }) => {
2622
+ explicitWorkerOutput += WorkflowRunner.stripAnsi(chunk);
2623
+ if (!explicitWorkerCompleted &&
2624
+ this.hasExplicitInteractiveWorkerCompletionEvidence(step, explicitWorkerOutput, ownerTask, resolvedTask)) {
2625
+ explicitWorkerCompleted = true;
2626
+ void explicitWorkerHandle?.release().catch(() => undefined);
2627
+ }
2628
+ }
2629
+ : undefined,
2630
+ });
2041
2631
  const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
2042
2632
  lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
2043
2633
  lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
2044
2634
  ownerElapsed = Date.now() - ownerStartTime;
2045
2635
  this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
2046
2636
  if (usesOwnerFlow) {
2047
- this.assertOwnerCompletionMarker(step, output, ownerTask);
2637
+ try {
2638
+ const completionDecision = this.resolveOwnerCompletionDecision(step, output, output, ownerTask, resolvedTask);
2639
+ completionReason = completionDecision.completionReason;
2640
+ }
2641
+ catch (error) {
2642
+ const canUseVerificationFallback = !usesDedicatedOwner &&
2643
+ step.verification &&
2644
+ error instanceof WorkflowCompletionError &&
2645
+ error.completionReason === 'failed_no_evidence';
2646
+ if (!canUseVerificationFallback) {
2647
+ throw error;
2648
+ }
2649
+ }
2048
2650
  }
2049
2651
  specialistOutput = output;
2050
2652
  ownerOutput = output;
2051
2653
  }
2052
- // Run verification if configured
2053
- if (step.verification) {
2054
- this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
2654
+ // Even non-interactive steps can emit an explicit OWNER_DECISION contract.
2655
+ // Honor retry/fail/clarification signals before verification-driven success so
2656
+ // real runs stay consistent with interactive owner flows.
2657
+ if (!usesOwnerFlow) {
2658
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
2659
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
2660
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
2661
+ }
2662
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
2663
+ throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
2664
+ }
2665
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
2666
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
2667
+ }
2668
+ }
2669
+ // Run verification if configured.
2670
+ // Self-owned interactive steps still need verification fallback so
2671
+ // explicit OWNER_DECISION output is not mandatory for the happy path.
2672
+ if (step.verification && (!usesOwnerFlow || !usesDedicatedOwner) && !completionReason) {
2673
+ const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
2674
+ completionReason = verificationResult.completionReason;
2675
+ }
2676
+ // Retry-style owner decisions are control-flow signals, not terminal success states.
2677
+ // Guard here so they cannot accidentally fall through into review or completed-step
2678
+ // persistence if a future branch returns a completionReason instead of throwing.
2679
+ if (completionReason === 'retry_requested_by_owner') {
2680
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested another attempt`, 'retry_requested_by_owner');
2055
2681
  }
2056
2682
  // Every interactive step gets a review pass; pick a dedicated reviewer when available.
2057
2683
  let combinedOutput = specialistOutput;
@@ -2063,21 +2689,29 @@ export class WorkflowRunner {
2063
2689
  // Mark completed
2064
2690
  state.row.status = 'completed';
2065
2691
  state.row.output = combinedOutput;
2692
+ state.row.completionReason = completionReason;
2066
2693
  state.row.completedAt = new Date().toISOString();
2067
2694
  await this.db.updateStep(state.row.id, {
2068
2695
  status: 'completed',
2069
2696
  output: combinedOutput,
2697
+ completionReason,
2070
2698
  completedAt: state.row.completedAt,
2071
2699
  updatedAt: new Date().toISOString(),
2072
2700
  });
2073
2701
  // Persist step output to disk so it survives restarts and is inspectable
2074
2702
  await this.persistStepOutput(runId, step.name, combinedOutput);
2075
2703
  this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput, exitCode: lastExitCode, exitSignal: lastExitSignal });
2704
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, completionReason);
2076
2705
  await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
2077
2706
  return;
2078
2707
  }
2079
2708
  catch (err) {
2080
2709
  lastError = err instanceof Error ? err.message : String(err);
2710
+ lastCompletionReason =
2711
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
2712
+ if (lastCompletionReason === 'retry_requested_by_owner' && attempt >= maxRetries) {
2713
+ lastError = this.buildOwnerRetryBudgetExceededMessage(step.name, maxRetries, lastError);
2714
+ }
2081
2715
  if (err instanceof SpawnExitError) {
2082
2716
  lastExitCode = err.exitCode;
2083
2717
  lastExitSignal = err.exitSignal;
@@ -2104,9 +2738,27 @@ export class WorkflowRunner {
2104
2738
  await this.markStepFailed(state, lastError ?? 'Unknown error', runId, {
2105
2739
  exitCode: lastExitCode,
2106
2740
  exitSignal: lastExitSignal,
2107
- });
2741
+ }, lastCompletionReason);
2108
2742
  throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
2109
2743
  }
2744
+ buildOwnerRetryBudgetExceededMessage(stepName, maxRetries, ownerDecisionError) {
2745
+ const attempts = maxRetries + 1;
2746
+ const prefix = `Step "${stepName}" `;
2747
+ const normalizedDecision = ownerDecisionError?.startsWith(prefix)
2748
+ ? ownerDecisionError.slice(prefix.length).trim()
2749
+ : ownerDecisionError?.trim();
2750
+ const decisionSuffix = normalizedDecision
2751
+ ? ` Latest owner decision: ${normalizedDecision}`
2752
+ : '';
2753
+ if (maxRetries === 0) {
2754
+ return (`Step "${stepName}" owner requested another attempt, but no retries are configured ` +
2755
+ `(maxRetries=0). Configure retries > 0 to allow OWNER_DECISION: INCOMPLETE_RETRY.` +
2756
+ decisionSuffix);
2757
+ }
2758
+ return (`Step "${stepName}" owner requested another attempt after ${attempts} total attempts, ` +
2759
+ `but the retry budget is exhausted (maxRetries=${maxRetries}).` +
2760
+ decisionSuffix);
2761
+ }
2110
2762
  injectStepOwnerContract(step, resolvedTask, ownerDef, specialistDef) {
2111
2763
  if (ownerDef.interactive === false)
2112
2764
  return resolvedTask;
@@ -2119,12 +2771,19 @@ export class WorkflowRunner {
2119
2771
  `- You are the accountable owner for step "${step.name}".\n` +
2120
2772
  (specialistNote ? `- ${specialistNote}\n` : '') +
2121
2773
  `- If you delegate, you must still verify completion yourself.\n` +
2122
- `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
2774
+ `- Preferred final decision format:\n` +
2775
+ ` OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
2776
+ ` REASON: <one sentence>\n` +
2777
+ `- Legacy completion marker still supported: STEP_COMPLETE:${step.name}\n` +
2123
2778
  `- Then self-terminate immediately with /exit.`);
2124
2779
  }
2125
2780
  buildOwnerSupervisorTask(step, originalTask, supervised, workerRuntimeName) {
2126
2781
  const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
2127
2782
  const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
2783
+ const channelContract = this.channel
2784
+ ? `- Prefer Relaycast/group-chat handoff signals over terminal sentinels: wait for the worker to post \`WORKER_DONE: <brief summary>\` in ${channelLine}\n` +
2785
+ `- When you have validated the handoff, post \`LEAD_DONE: <brief summary>\` to ${channelLine} before you exit\n`
2786
+ : '';
2128
2787
  return (`You are the step owner/supervisor for step "${step.name}".\n\n` +
2129
2788
  `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
2130
2789
  `Task: ${originalTask}\n\n` +
@@ -2133,9 +2792,22 @@ export class WorkflowRunner {
2133
2792
  `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
2134
2793
  `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
2135
2794
  `- Ask the worker directly on ${channelLine} if you need a status update\n` +
2795
+ channelContract +
2136
2796
  verificationGuide +
2137
- `\nWhen you're satisfied the work is done correctly:\n` +
2138
- `Output exactly: STEP_COMPLETE:${step.name}`);
2797
+ `\nWhen you have enough evidence, return:\n` +
2798
+ `OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
2799
+ `REASON: <one sentence>\n` +
2800
+ `Legacy completion marker still supported: STEP_COMPLETE:${step.name}`);
2801
+ }
2802
+ buildWorkerHandoffTask(step, originalTask, supervised) {
2803
+ if (!this.channel)
2804
+ return originalTask;
2805
+ return (`${originalTask}\n\n---\n` +
2806
+ `WORKER COMPLETION CONTRACT:\n` +
2807
+ `- You are handing work off to owner "${supervised.owner.name}" for step "${step.name}".\n` +
2808
+ `- When your work is ready for review, post to #${this.channel}: \`WORKER_DONE: <brief summary>\`\n` +
2809
+ `- Do not rely on terminal output alone for handoff; use the workflow group chat signal above.\n` +
2810
+ `- After posting your handoff signal, self-terminate with /exit unless the owner asks for follow-up.`);
2139
2811
  }
2140
2812
  buildSupervisorVerificationGuide(verification) {
2141
2813
  if (!verification)
@@ -2155,8 +2827,9 @@ export class WorkflowRunner {
2155
2827
  }
2156
2828
  async executeSupervisedAgentStep(step, supervised, resolvedTask, timeoutMs) {
2157
2829
  if (this.executor) {
2830
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
2158
2831
  const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, supervised.specialist.name);
2159
- const specialistStep = { ...step, task: resolvedTask };
2832
+ const specialistStep = { ...step, task: specialistTask };
2160
2833
  const ownerStep = {
2161
2834
  ...step,
2162
2835
  name: `${step.name}-owner`,
@@ -2164,16 +2837,21 @@ export class WorkflowRunner {
2164
2837
  task: supervisorTask,
2165
2838
  };
2166
2839
  this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" and owner "${supervised.owner.name}"`);
2167
- const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, resolvedTask, timeoutMs);
2840
+ const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, specialistTask, timeoutMs);
2168
2841
  // Guard against unhandled rejection if owner fails before specialist settles
2169
2842
  const specialistSettled = specialistPromise.catch(() => undefined);
2170
2843
  try {
2171
2844
  const ownerStartTime = Date.now();
2172
2845
  const ownerOutput = await this.executor.executeAgentStep(ownerStep, supervised.owner, supervisorTask, timeoutMs);
2173
2846
  const ownerElapsed = Date.now() - ownerStartTime;
2174
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2175
2847
  const specialistOutput = await specialistPromise;
2176
- return { specialistOutput, ownerOutput, ownerElapsed };
2848
+ const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
2849
+ return {
2850
+ specialistOutput,
2851
+ ownerOutput,
2852
+ ownerElapsed,
2853
+ completionReason: completionDecision.completionReason,
2854
+ };
2177
2855
  }
2178
2856
  catch (error) {
2179
2857
  await specialistSettled;
@@ -2190,10 +2868,14 @@ export class WorkflowRunner {
2190
2868
  resolveWorkerSpawn = resolve;
2191
2869
  rejectWorkerSpawn = reject;
2192
2870
  });
2193
- const specialistStep = { ...step, task: resolvedTask };
2871
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
2872
+ const specialistStep = { ...step, task: specialistTask };
2194
2873
  this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`);
2195
2874
  const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
2196
2875
  agentNameSuffix: 'worker',
2876
+ evidenceStepName: step.name,
2877
+ evidenceRole: 'worker',
2878
+ logicalName: supervised.specialist.name,
2197
2879
  onSpawned: ({ actualName, agent }) => {
2198
2880
  workerHandle = agent;
2199
2881
  workerRuntimeName = actualName;
@@ -2208,7 +2890,7 @@ export class WorkflowRunner {
2208
2890
  }
2209
2891
  },
2210
2892
  onChunk: ({ agentName, chunk }) => {
2211
- this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
2893
+ this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk, supervised.specialist.name);
2212
2894
  },
2213
2895
  }).catch((error) => {
2214
2896
  if (!workerSpawned) {
@@ -2221,14 +2903,24 @@ export class WorkflowRunner {
2221
2903
  workerPromise
2222
2904
  .then((result) => {
2223
2905
  workerReleased = true;
2224
- this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
2906
+ this.log(`[${step.name}] Worker ${workerRuntimeName} exited`);
2907
+ this.recordStepToolSideEffect(step.name, {
2908
+ type: 'worker_exit',
2909
+ detail: `Worker ${workerRuntimeName} exited`,
2910
+ raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
2911
+ });
2225
2912
  if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
2226
- this.postToChannel(`**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
2913
+ this.log(`[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
2227
2914
  }
2228
2915
  })
2229
2916
  .catch((error) => {
2230
2917
  const message = error instanceof Error ? error.message : String(error);
2231
2918
  this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`);
2919
+ this.recordStepToolSideEffect(step.name, {
2920
+ type: 'worker_error',
2921
+ detail: `Worker ${workerRuntimeName} exited with error: ${message}`,
2922
+ raw: { worker: workerRuntimeName, error: message },
2923
+ });
2232
2924
  });
2233
2925
  await workerReady;
2234
2926
  const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, workerRuntimeName);
@@ -2243,6 +2935,9 @@ export class WorkflowRunner {
2243
2935
  try {
2244
2936
  const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
2245
2937
  agentNameSuffix: 'owner',
2938
+ evidenceStepName: step.name,
2939
+ evidenceRole: 'owner',
2940
+ logicalName: supervised.owner.name,
2246
2941
  onSpawned: ({ actualName }) => {
2247
2942
  this.supervisedRuntimeAgents.set(actualName, {
2248
2943
  stepName: step.name,
@@ -2257,9 +2952,14 @@ export class WorkflowRunner {
2257
2952
  const ownerElapsed = Date.now() - ownerStartTime;
2258
2953
  const ownerOutput = ownerResultObj.output;
2259
2954
  this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
2260
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2261
2955
  const specialistOutput = (await workerPromise).output;
2262
- return { specialistOutput, ownerOutput, ownerElapsed };
2956
+ const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
2957
+ return {
2958
+ specialistOutput,
2959
+ ownerOutput,
2960
+ ownerElapsed,
2961
+ completionReason: completionDecision.completionReason,
2962
+ };
2263
2963
  }
2264
2964
  catch (error) {
2265
2965
  const message = error instanceof Error ? error.message : String(error);
@@ -2273,14 +2973,20 @@ export class WorkflowRunner {
2273
2973
  throw error;
2274
2974
  }
2275
2975
  }
2276
- forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk) {
2277
- const lines = WorkflowRunner.stripAnsi(chunk)
2976
+ forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk, sender) {
2977
+ const lines = WorkflowRunner.scrubForChannel(chunk)
2278
2978
  .split('\n')
2279
2979
  .map((line) => line.trim())
2280
2980
  .filter(Boolean)
2281
2981
  .slice(0, 3);
2282
2982
  for (const line of lines) {
2283
- this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
2983
+ this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`, {
2984
+ stepName,
2985
+ sender,
2986
+ actor: agentName,
2987
+ role: roleLabel,
2988
+ origin: 'forwarded_chunk',
2989
+ });
2284
2990
  }
2285
2991
  }
2286
2992
  async recordOwnerMonitoringChunk(step, ownerDef, chunk) {
@@ -2295,6 +3001,11 @@ export class WorkflowRunner {
2295
3001
  if (/STEP_COMPLETE:/i.test(stripped))
2296
3002
  details.push('Declared the step complete');
2297
3003
  for (const detail of details) {
3004
+ this.recordStepToolSideEffect(step.name, {
3005
+ type: 'owner_monitoring',
3006
+ detail,
3007
+ raw: { output: stripped.slice(0, 240), owner: ownerDef.name },
3008
+ });
2298
3009
  await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
2299
3010
  output: stripped.slice(0, 240),
2300
3011
  });
@@ -2335,6 +3046,7 @@ export class WorkflowRunner {
2335
3046
  }
2336
3047
  resolveAutoReviewAgent(ownerDef, agentMap) {
2337
3048
  const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
3049
+ const eligible = (def) => def.name !== ownerDef.name && !this.isExplicitInteractiveWorker(def);
2338
3050
  const isReviewer = (def) => {
2339
3051
  const roleLC = def.role?.toLowerCase() ?? '';
2340
3052
  const nameLC = def.name.toLowerCase();
@@ -2359,32 +3071,239 @@ export class WorkflowRunner {
2359
3071
  return isReviewer(def) ? 1 : 0;
2360
3072
  };
2361
3073
  const dedicated = allDefs
2362
- .filter((d) => d.name !== ownerDef.name && isReviewer(d))
3074
+ .filter((d) => eligible(d) && isReviewer(d))
2363
3075
  .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
2364
3076
  if (dedicated)
2365
3077
  return dedicated;
2366
- const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
3078
+ const alternate = allDefs.find((d) => eligible(d) && d.interactive !== false);
2367
3079
  if (alternate)
2368
3080
  return alternate;
2369
3081
  // Self-review fallback — log a warning since owner reviewing itself is weak.
2370
3082
  return ownerDef;
2371
3083
  }
2372
- assertOwnerCompletionMarker(step, output, injectedTaskText) {
3084
+ isExplicitInteractiveWorker(agentDef) {
3085
+ return agentDef.preset === 'worker' && agentDef.interactive !== false;
3086
+ }
3087
+ resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, injectedTaskText, verificationTaskText) {
3088
+ const hasMarker = this.hasOwnerCompletionMarker(step, ownerOutput, injectedTaskText);
3089
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
3090
+ // INCOMPLETE_RETRY / NEEDS_CLARIFICATION are non-terminal owner outcomes. They never mark
3091
+ // the step complete here; instead they throw back to executeAgentStep(), which decides
3092
+ // whether to retry or fail based on the remaining retry budget for this step.
3093
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
3094
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
3095
+ }
3096
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
3097
+ throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
3098
+ }
3099
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
3100
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
3101
+ }
3102
+ const verificationResult = step.verification
3103
+ ? this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, {
3104
+ allowFailure: true,
3105
+ completionMarkerFound: hasMarker,
3106
+ })
3107
+ : { passed: false };
3108
+ if (verificationResult.error) {
3109
+ throw new WorkflowCompletionError(`Step "${step.name}" verification failed and no owner decision or evidence established completion: ${verificationResult.error}`, 'failed_verification');
3110
+ }
3111
+ if (explicitOwnerDecision?.decision === 'COMPLETE') {
3112
+ if (!hasMarker) {
3113
+ this.log(`[${step.name}] Structured OWNER_DECISION completed the step without legacy STEP_COMPLETE marker`);
3114
+ }
3115
+ return {
3116
+ completionReason: 'completed_by_owner_decision',
3117
+ ownerDecision: explicitOwnerDecision.decision,
3118
+ reason: explicitOwnerDecision.reason,
3119
+ };
3120
+ }
3121
+ if (verificationResult.passed) {
3122
+ return { completionReason: 'completed_verified' };
3123
+ }
3124
+ const ownerDecision = this.parseOwnerDecision(step, ownerOutput, hasMarker);
3125
+ if (ownerDecision?.decision === 'COMPLETE') {
3126
+ return {
3127
+ completionReason: 'completed_by_owner_decision',
3128
+ ownerDecision: ownerDecision.decision,
3129
+ reason: ownerDecision.reason,
3130
+ };
3131
+ }
3132
+ if (!explicitOwnerDecision) {
3133
+ const evidenceReason = this.judgeOwnerCompletionByEvidence(step.name, ownerOutput);
3134
+ if (evidenceReason) {
3135
+ if (!hasMarker) {
3136
+ this.log(`[${step.name}] Evidence-based completion resolved without legacy STEP_COMPLETE marker`);
3137
+ }
3138
+ return {
3139
+ completionReason: 'completed_by_evidence',
3140
+ reason: evidenceReason,
3141
+ };
3142
+ }
3143
+ }
3144
+ // Process-exit fallback: if the agent exited cleanly (code 0) and verification
3145
+ // passes (or no verification is configured), infer completion rather than failing.
3146
+ // This reduces dependence on agents posting exact coordination signals.
3147
+ const processExitFallback = this.tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput);
3148
+ if (processExitFallback) {
3149
+ this.log(`[${step.name}] Completion inferred from clean process exit (code 0)` +
3150
+ (step.verification ? ' + verification passed' : '') +
3151
+ ' — no coordination signal was required');
3152
+ return processExitFallback;
3153
+ }
3154
+ throw new WorkflowCompletionError(`Step "${step.name}" owner completion decision missing: no OWNER_DECISION, legacy STEP_COMPLETE marker, or evidence-backed completion signal`, 'failed_no_evidence');
3155
+ }
3156
+ hasExplicitInteractiveWorkerCompletionEvidence(step, output, injectedTaskText, verificationTaskText) {
3157
+ try {
3158
+ this.resolveOwnerCompletionDecision(step, output, output, injectedTaskText, verificationTaskText);
3159
+ return true;
3160
+ }
3161
+ catch {
3162
+ return false;
3163
+ }
3164
+ }
3165
+ hasOwnerCompletionMarker(step, output, injectedTaskText) {
2373
3166
  const marker = `STEP_COMPLETE:${step.name}`;
2374
3167
  const taskHasMarker = injectedTaskText.includes(marker);
2375
3168
  const first = output.indexOf(marker);
2376
3169
  if (first === -1) {
2377
- throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
2378
- }
2379
- // PTY output includes injected task text, so require a second marker occurrence
2380
- // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
2381
- const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
3170
+ return false;
3171
+ }
3172
+ // PTY output often includes echoed prompt text, so when the injected task
3173
+ // itself contains the legacy marker require a second occurrence from the
3174
+ // agent response.
3175
+ const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') ||
3176
+ output.includes('Preferred final decision format') ||
3177
+ output.includes('Legacy completion marker still supported') ||
3178
+ output.includes('Output exactly: STEP_COMPLETE:');
2382
3179
  if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
2383
- const hasSecond = output.includes(marker, first + marker.length);
2384
- if (!hasSecond) {
2385
- throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
2386
- }
3180
+ return output.includes(marker, first + marker.length);
3181
+ }
3182
+ return true;
3183
+ }
3184
+ parseOwnerDecision(step, ownerOutput, hasMarker) {
3185
+ const decisionPattern = /OWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi;
3186
+ const decisionMatches = [...ownerOutput.matchAll(decisionPattern)];
3187
+ const outputLikelyContainsEchoedPrompt = ownerOutput.includes('STEP OWNER CONTRACT') ||
3188
+ ownerOutput.includes('Preferred final decision format') ||
3189
+ ownerOutput.includes('one of COMPLETE, INCOMPLETE_RETRY') ||
3190
+ ownerOutput.includes('COMPLETE|INCOMPLETE_RETRY');
3191
+ if (decisionMatches.length === 0) {
3192
+ if (!hasMarker)
3193
+ return null;
3194
+ return {
3195
+ decision: 'COMPLETE',
3196
+ reason: `Legacy completion marker observed: STEP_COMPLETE:${step.name}`,
3197
+ };
3198
+ }
3199
+ // Filter out matches that appear on a template/instruction line (e.g.
3200
+ // "COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION") to avoid
3201
+ // picking up the template format as the agent's actual decision.
3202
+ const realMatches = outputLikelyContainsEchoedPrompt
3203
+ ? decisionMatches.filter((m) => {
3204
+ const lineStart = ownerOutput.lastIndexOf('\n', m.index) + 1;
3205
+ const lineEnd = ownerOutput.indexOf('\n', m.index);
3206
+ const line = ownerOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
3207
+ return !line.includes('COMPLETE|INCOMPLETE_RETRY');
3208
+ })
3209
+ : decisionMatches;
3210
+ const decisionMatch = realMatches.length > 0
3211
+ ? realMatches[realMatches.length - 1]
3212
+ : decisionMatches[decisionMatches.length - 1];
3213
+ const decision = decisionMatch?.[1]?.toUpperCase();
3214
+ if (decision !== 'COMPLETE' &&
3215
+ decision !== 'INCOMPLETE_RETRY' &&
3216
+ decision !== 'INCOMPLETE_FAIL' &&
3217
+ decision !== 'NEEDS_CLARIFICATION') {
3218
+ return null;
3219
+ }
3220
+ const reasonPattern = /(?:^|\n)REASON:\s*(.+)/gi;
3221
+ const reasonMatches = [...ownerOutput.matchAll(reasonPattern)];
3222
+ const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
3223
+ ? reasonMatches[reasonMatches.length - 1]
3224
+ : reasonMatches[0];
3225
+ const reason = reasonMatch?.[1]?.trim();
3226
+ return {
3227
+ decision,
3228
+ reason: reason && reason !== '<one sentence>' ? reason : undefined,
3229
+ };
3230
+ }
3231
+ stripEchoedPromptLines(output, patterns) {
3232
+ return output
3233
+ .split('\n')
3234
+ .map((line) => line.trim())
3235
+ .filter(Boolean)
3236
+ .filter((line) => patterns.every((pattern) => !pattern.test(line)))
3237
+ .join('\n');
3238
+ }
3239
+ firstMeaningfulLine(output) {
3240
+ return output
3241
+ .split('\n')
3242
+ .map((line) => line.trim())
3243
+ .find(Boolean);
3244
+ }
3245
+ judgeOwnerCompletionByEvidence(stepName, ownerOutput) {
3246
+ // Never infer completion when the raw output contains an explicit retry/fail/clarification signal.
3247
+ if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
3248
+ return null;
2387
3249
  }
3250
+ const sanitized = this.stripEchoedPromptLines(ownerOutput, [
3251
+ /^STEP OWNER CONTRACT:?$/i,
3252
+ /^Preferred final decision format:?$/i,
3253
+ /^OWNER_DECISION:\s*(?:COMPLETE\|INCOMPLETE_RETRY|<one of COMPLETE, INCOMPLETE_RETRY)/i,
3254
+ /^REASON:\s*<one sentence>$/i,
3255
+ /^Legacy completion marker still supported:/i,
3256
+ /^STEP_COMPLETE:/i,
3257
+ ]);
3258
+ if (!sanitized)
3259
+ return null;
3260
+ const hasExplicitSelfRelease = /Calling\s+(?:[\w.-]+\.)?remove_agent\(\{[^<\n]*"reason":"task completed"/i.test(sanitized);
3261
+ const hasPositiveConclusion = /\b(complete(?:d)?|done|verified|looks correct|safe handoff|artifact verified)\b/i.test(sanitized) ||
3262
+ /\bartifacts?\b.*\b(correct|verified|complete)\b/i.test(sanitized) ||
3263
+ hasExplicitSelfRelease;
3264
+ const evidence = this.getStepCompletionEvidence(stepName);
3265
+ const hasValidatedCoordinationSignal = evidence?.coordinationSignals.some((signal) => signal.kind === 'worker_done' ||
3266
+ signal.kind === 'lead_done' ||
3267
+ signal.kind === 'verification_passed' ||
3268
+ (signal.kind === 'process_exit' && signal.value === '0')) ?? false;
3269
+ const hasValidatedInspectionSignal = evidence?.toolSideEffects.some((effect) => effect.type === 'owner_monitoring' &&
3270
+ (/Checked git diff stats/i.test(effect.detail) ||
3271
+ /Listed files for verification/i.test(effect.detail))) ?? false;
3272
+ const hasEvidenceSignal = hasValidatedCoordinationSignal || hasValidatedInspectionSignal;
3273
+ if (!hasPositiveConclusion || !hasEvidenceSignal) {
3274
+ return null;
3275
+ }
3276
+ return this.firstMeaningfulLine(sanitized) ?? 'Evidence-backed completion';
3277
+ }
3278
+ /**
3279
+ * Process-exit fallback: when agent exits with code 0 but posts no coordination
3280
+ * signal, check if verification passes (or no verification is configured) and
3281
+ * infer completion. This is the key mechanism for reducing agent compliance
3282
+ * dependence — the runner trusts a clean exit + passing verification over
3283
+ * requiring exact signal text.
3284
+ */
3285
+ tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput) {
3286
+ const gracePeriodMs = this.currentConfig?.swarm.completionGracePeriodMs ?? 5000;
3287
+ if (gracePeriodMs === 0)
3288
+ return null;
3289
+ // Never infer completion when the owner explicitly requested retry/fail/clarification.
3290
+ if (ownerOutput && /OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
3291
+ return null;
3292
+ }
3293
+ const evidence = this.getStepCompletionEvidence(step.name);
3294
+ const hasCleanExit = evidence?.coordinationSignals.some((signal) => signal.kind === 'process_exit' && signal.value === '0') ?? false;
3295
+ if (!hasCleanExit)
3296
+ return null;
3297
+ // If verification is configured, it must pass for the fallback to succeed.
3298
+ if (step.verification) {
3299
+ const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, { allowFailure: true });
3300
+ if (!verificationResult.passed)
3301
+ return null;
3302
+ }
3303
+ return {
3304
+ completionReason: 'completed_by_process_exit',
3305
+ reason: `Process exited with code 0${step.verification ? ' and verification passed' : ''} — coordination signal not required`,
3306
+ };
2388
3307
  }
2389
3308
  async runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewerDef, timeoutMs) {
2390
3309
  const reviewSnippetMax = 12_000;
@@ -2426,7 +3345,17 @@ export class WorkflowRunner {
2426
3345
  };
2427
3346
  await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
2428
3347
  this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
3348
+ this.recordStepToolSideEffect(step.name, {
3349
+ type: 'review_started',
3350
+ detail: `Review started with ${reviewerDef.name}`,
3351
+ raw: { reviewer: reviewerDef.name },
3352
+ });
2429
3353
  const emitReviewCompleted = async (decision, reason) => {
3354
+ this.recordStepToolSideEffect(step.name, {
3355
+ type: 'review_completed',
3356
+ detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`,
3357
+ raw: { reviewer: reviewerDef.name, decision, reason },
3358
+ });
2430
3359
  await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
2431
3360
  this.emit({
2432
3361
  type: 'step:review-completed',
@@ -2470,6 +3399,9 @@ export class WorkflowRunner {
2470
3399
  };
2471
3400
  try {
2472
3401
  await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
3402
+ evidenceStepName: step.name,
3403
+ evidenceRole: 'reviewer',
3404
+ logicalName: reviewerDef.name,
2473
3405
  onSpawned: ({ agent }) => {
2474
3406
  reviewerHandle = agent;
2475
3407
  },
@@ -2507,15 +3439,34 @@ export class WorkflowRunner {
2507
3439
  return reviewOutput;
2508
3440
  }
2509
3441
  parseReviewDecision(reviewOutput) {
3442
+ const strict = this.parseStrictReviewDecision(reviewOutput);
3443
+ if (strict) {
3444
+ return strict;
3445
+ }
3446
+ const tolerant = this.parseTolerantReviewDecision(reviewOutput);
3447
+ if (tolerant) {
3448
+ return tolerant;
3449
+ }
3450
+ return this.judgeReviewDecisionFromEvidence(reviewOutput);
3451
+ }
3452
+ parseStrictReviewDecision(reviewOutput) {
2510
3453
  const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
2511
3454
  const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
2512
3455
  if (decisionMatches.length === 0) {
2513
3456
  return null;
2514
3457
  }
2515
3458
  const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
2516
- const decisionMatch = outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
2517
- ? decisionMatches[decisionMatches.length - 1]
2518
- : decisionMatches[0];
3459
+ const realReviewMatches = outputLikelyContainsEchoedPrompt
3460
+ ? decisionMatches.filter((m) => {
3461
+ const lineStart = reviewOutput.lastIndexOf('\n', m.index) + 1;
3462
+ const lineEnd = reviewOutput.indexOf('\n', m.index);
3463
+ const line = reviewOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
3464
+ return !line.includes('APPROVE or REJECT');
3465
+ })
3466
+ : decisionMatches;
3467
+ const decisionMatch = realReviewMatches.length > 0
3468
+ ? realReviewMatches[realReviewMatches.length - 1]
3469
+ : decisionMatches[decisionMatches.length - 1];
2519
3470
  const decision = decisionMatch?.[1]?.toUpperCase();
2520
3471
  if (decision !== 'APPROVE' && decision !== 'REJECT') {
2521
3472
  return null;
@@ -2531,6 +3482,85 @@ export class WorkflowRunner {
2531
3482
  reason: reason && reason !== '<one sentence>' ? reason : undefined,
2532
3483
  };
2533
3484
  }
3485
+ parseTolerantReviewDecision(reviewOutput) {
3486
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
3487
+ /^Return exactly:?$/i,
3488
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
3489
+ /^REVIEW_REASON:\s*<one sentence>$/i,
3490
+ ]);
3491
+ if (!sanitized) {
3492
+ return null;
3493
+ }
3494
+ const lines = sanitized
3495
+ .split('\n')
3496
+ .map((line) => line.trim())
3497
+ .filter(Boolean);
3498
+ for (const line of lines) {
3499
+ const candidate = line.replace(/^REVIEW_DECISION:\s*/i, '').trim();
3500
+ const decision = this.normalizeReviewDecisionCandidate(candidate);
3501
+ if (decision) {
3502
+ return {
3503
+ decision,
3504
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3505
+ };
3506
+ }
3507
+ }
3508
+ const decision = this.normalizeReviewDecisionCandidate(lines.join(' '));
3509
+ if (!decision) {
3510
+ return null;
3511
+ }
3512
+ return {
3513
+ decision,
3514
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3515
+ };
3516
+ }
3517
+ normalizeReviewDecisionCandidate(candidate) {
3518
+ const value = candidate.trim().toLowerCase();
3519
+ if (!value)
3520
+ return null;
3521
+ if (/^(approve|approved|complete|completed|pass|passed|accept|accepted|lgtm|ship it|looks good|looks fine)\b/i.test(value)) {
3522
+ return 'approved';
3523
+ }
3524
+ if (/^(reject|rejected|retry|retry requested|fail|failed|incomplete|needs clarification|not complete|not ready|insufficient evidence)\b/i.test(value)) {
3525
+ return 'rejected';
3526
+ }
3527
+ return null;
3528
+ }
3529
+ parseReviewReason(reviewOutput) {
3530
+ const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
3531
+ const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
3532
+ const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
3533
+ const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
3534
+ ? reasonMatches[reasonMatches.length - 1]
3535
+ : reasonMatches[0];
3536
+ const reason = reasonMatch?.[1]?.trim();
3537
+ return reason && reason !== '<one sentence>' ? reason : undefined;
3538
+ }
3539
+ judgeReviewDecisionFromEvidence(reviewOutput) {
3540
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
3541
+ /^Return exactly:?$/i,
3542
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
3543
+ /^REVIEW_REASON:\s*<one sentence>$/i,
3544
+ ]);
3545
+ if (!sanitized) {
3546
+ return null;
3547
+ }
3548
+ const hasPositiveEvidence = /\b(approved?|complete(?:d)?|verified|looks good|looks fine|safe handoff|pass(?:ed)?)\b/i.test(sanitized);
3549
+ const hasNegativeEvidence = /\b(reject(?:ed)?|retry|fail(?:ed)?|incomplete|missing checks|insufficient evidence|not safe)\b/i.test(sanitized);
3550
+ if (hasNegativeEvidence) {
3551
+ return {
3552
+ decision: 'rejected',
3553
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3554
+ };
3555
+ }
3556
+ if (!hasPositiveEvidence) {
3557
+ return null;
3558
+ }
3559
+ return {
3560
+ decision: 'approved',
3561
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3562
+ };
3563
+ }
2534
3564
  combineStepAndReviewOutput(stepOutput, reviewOutput) {
2535
3565
  const primary = stepOutput.trimEnd();
2536
3566
  const review = reviewOutput.trim();
@@ -2600,8 +3630,8 @@ export class WorkflowRunner {
2600
3630
  switch (preset) {
2601
3631
  case 'worker':
2602
3632
  return ('You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
2603
- 'Do NOT use relay_spawn, add_agent, or any MCP tool to spawn sub-agents.\n' +
2604
- 'Do NOT use relay_send or any Relaycast messaging tools — you have no relay connection.\n\n');
3633
+ 'Do NOT use mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn sub-agents.\n' +
3634
+ 'Do NOT use mcp__relaycast__dm_send or any Relaycast messaging tools — you have no relay connection.\n\n');
2605
3635
  case 'reviewer':
2606
3636
  return ('You are a non-interactive reviewer agent. Read the specified files/artifacts and produce a clear verdict.\n' +
2607
3637
  'Do NOT spawn sub-agents or use any Relaycast messaging tools.\n\n');
@@ -2627,7 +3657,7 @@ export class WorkflowRunner {
2627
3657
  step.task +
2628
3658
  '\n\n---\n' +
2629
3659
  'IMPORTANT: You are running as a non-interactive subprocess. ' +
2630
- 'Do NOT call relay_spawn, add_agent, or any MCP tool to spawn or manage other agents.\n\n' +
3660
+ 'Do NOT call mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn or manage other agents.\n\n' +
2631
3661
  'CRITICAL REQUIREMENT — YOU MUST FOLLOW THIS EXACTLY:\n' +
2632
3662
  'You are running in non-interactive mode. There is NO opportunity for follow-up, ' +
2633
3663
  'clarification, or additional input. Your stdout output is your ONLY deliverable.\n\n' +
@@ -2759,11 +3789,19 @@ export class WorkflowRunner {
2759
3789
  reject(new Error(`Failed to spawn ${cmd}: ${err.message}`));
2760
3790
  });
2761
3791
  });
3792
+ this.captureStepTerminalEvidence(step.name, {}, { exitCode, exitSignal });
2762
3793
  return { output, exitCode, exitSignal };
2763
3794
  }
2764
3795
  finally {
2765
- const combinedOutput = stdoutChunks.join('') + stderrChunks.join('');
3796
+ const stdout = stdoutChunks.join('');
3797
+ const stderr = stderrChunks.join('');
3798
+ const combinedOutput = stdout + stderr;
2766
3799
  this.lastFailedStepOutput.set(step.name, combinedOutput);
3800
+ this.captureStepTerminalEvidence(step.name, {
3801
+ stdout,
3802
+ stderr,
3803
+ combined: combinedOutput,
3804
+ });
2767
3805
  stopHeartbeat?.();
2768
3806
  logStream.end();
2769
3807
  this.unregisterWorker(agentName);
@@ -2777,6 +3815,7 @@ export class WorkflowRunner {
2777
3815
  if (!this.relay) {
2778
3816
  throw new Error('AgentRelay not initialized');
2779
3817
  }
3818
+ const evidenceStepName = options.evidenceStepName ?? step.name;
2780
3819
  // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
2781
3820
  const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
2782
3821
  let agentName = requestedName;
@@ -2823,11 +3862,17 @@ export class WorkflowRunner {
2823
3862
  let ptyChunks = [];
2824
3863
  try {
2825
3864
  const agentCwd = this.resolveAgentCwd(agentDef);
3865
+ const interactiveSpawnPolicy = resolveSpawnPolicy({
3866
+ AGENT_NAME: agentName,
3867
+ AGENT_CLI: agentDef.cli,
3868
+ RELAY_API_KEY: this.relayApiKey ?? 'workflow-runner',
3869
+ AGENT_CHANNELS: (agentChannels ?? []).join(','),
3870
+ });
2826
3871
  agent = await this.relay.spawnPty({
2827
3872
  name: agentName,
2828
3873
  cli: agentDef.cli,
2829
3874
  model: agentDef.constraints?.model,
2830
- args: [],
3875
+ args: interactiveSpawnPolicy.args,
2831
3876
  channels: agentChannels,
2832
3877
  task: taskWithExit,
2833
3878
  idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
@@ -2859,16 +3904,27 @@ export class WorkflowRunner {
2859
3904
  const oldListener = this.ptyListeners.get(oldName);
2860
3905
  if (oldListener) {
2861
3906
  this.ptyListeners.delete(oldName);
2862
- this.ptyListeners.set(agent.name, (chunk) => {
3907
+ const resolvedAgentName = agent.name;
3908
+ this.ptyListeners.set(resolvedAgentName, (chunk) => {
2863
3909
  const stripped = WorkflowRunner.stripAnsi(chunk);
2864
- this.ptyOutputBuffers.get(agent.name)?.push(stripped);
3910
+ this.ptyOutputBuffers.get(resolvedAgentName)?.push(stripped);
2865
3911
  newLogStream.write(chunk);
2866
- options.onChunk?.({ agentName: agent.name, chunk });
3912
+ options.onChunk?.({ agentName: resolvedAgentName, chunk });
2867
3913
  });
2868
3914
  }
2869
3915
  agentName = agent.name;
2870
3916
  }
2871
- await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
3917
+ const liveAgent = agent;
3918
+ await options.onSpawned?.({ requestedName, actualName: liveAgent.name, agent: liveAgent });
3919
+ this.runtimeStepAgents.set(liveAgent.name, {
3920
+ stepName: evidenceStepName,
3921
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
3922
+ logicalName: options.logicalName ?? agentDef.name,
3923
+ });
3924
+ const signalParticipant = this.resolveSignalParticipantKind(options.evidenceRole ?? agentDef.role ?? 'agent');
3925
+ if (signalParticipant) {
3926
+ this.rememberStepSignalSender(evidenceStepName, signalParticipant, liveAgent.name, options.logicalName ?? agentDef.name);
3927
+ }
2872
3928
  // Register in workers.json so `agents:kill` can find this agent
2873
3929
  let workerPid;
2874
3930
  try {
@@ -2881,8 +3937,8 @@ export class WorkflowRunner {
2881
3937
  this.registerWorker(agentName, agentDef.cli, step.task ?? '', workerPid);
2882
3938
  // Register the spawned agent in Relaycast for observability + start heartbeat
2883
3939
  if (this.relayApiKey) {
2884
- const agentClient = await this.registerRelaycastExternalAgent(agent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
2885
- console.warn(`[WorkflowRunner] Failed to register ${agent.name} in Relaycast:`, err?.message ?? err);
3940
+ const agentClient = await this.registerRelaycastExternalAgent(liveAgent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
3941
+ console.warn(`[WorkflowRunner] Failed to register ${liveAgent.name} in Relaycast:`, err?.message ?? err);
2886
3942
  return null;
2887
3943
  });
2888
3944
  // Keep the agent online in the dashboard while it's working
@@ -2895,30 +3951,30 @@ export class WorkflowRunner {
2895
3951
  const channelAgent = await this.ensureRelaycastRunnerAgent().catch(() => null);
2896
3952
  await channelAgent?.channels.invite(this.channel, agent.name).catch(() => { });
2897
3953
  }
2898
- // Post assignment notification (no task content task arrives via direct broker injection)
2899
- this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\``);
3954
+ // Keep operational assignment chatter out of the agent coordination channel.
3955
+ this.log(`[${step.name}] Assigned to ${agent.name}`);
2900
3956
  // Register agent handle for hub-mediated nudging
2901
3957
  this.activeAgentHandles.set(agentName, agent);
2902
3958
  // Wait for agent to exit, with idle nudging if configured
2903
- exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
3959
+ exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole));
2904
3960
  // Stop heartbeat now that agent has exited
2905
3961
  stopHeartbeat?.();
2906
3962
  if (exitResult === 'timeout') {
2907
- // Safety net: check if the verification file exists before giving up.
2908
- // The agent may have completed work but failed to /exit.
2909
- if (step.verification?.type === 'file_exists') {
2910
- const verifyPath = path.resolve(this.cwd, step.verification.value);
2911
- if (existsSync(verifyPath)) {
2912
- this.postToChannel(`**[${step.name}]** Agent idle after completing work — releasing`);
2913
- await agent.release();
2914
- // Fall through to read output below
2915
- }
2916
- else {
3963
+ // Grace-period fallback: before failing, check if the agent completed
3964
+ // its work but just failed to self-terminate. Run verification if
3965
+ // configured a passing gate + timeout is better than a hard failure.
3966
+ let timeoutRecovered = false;
3967
+ if (step.verification) {
3968
+ const ptyOutput = (this.ptyOutputBuffers.get(agentName) ?? []).join('');
3969
+ const verificationResult = this.runVerification(step.verification, ptyOutput, step.name, undefined, { allowFailure: true });
3970
+ if (verificationResult.passed) {
3971
+ this.log(`[${step.name}] Agent timed out but verification passed — treating as complete`);
3972
+ this.postToChannel(`**[${step.name}]** Agent idle after completing work — verification passed, releasing`);
2917
3973
  await agent.release();
2918
- throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
3974
+ timeoutRecovered = true;
2919
3975
  }
2920
3976
  }
2921
- else {
3977
+ if (!timeoutRecovered) {
2922
3978
  await agent.release();
2923
3979
  throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
2924
3980
  }
@@ -2931,6 +3987,19 @@ export class WorkflowRunner {
2931
3987
  // Snapshot PTY chunks before cleanup — we need them for output reading below
2932
3988
  ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
2933
3989
  this.lastFailedStepOutput.set(step.name, ptyChunks.join(''));
3990
+ if (ptyChunks.length > 0 || agent?.exitCode !== undefined || agent?.exitSignal !== undefined) {
3991
+ this.captureStepTerminalEvidence(evidenceStepName, {
3992
+ stdout: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
3993
+ combined: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
3994
+ }, {
3995
+ exitCode: agent?.exitCode,
3996
+ exitSignal: agent?.exitSignal,
3997
+ }, {
3998
+ sender: options.logicalName ?? agentDef.name,
3999
+ actor: agent?.name ?? agentName,
4000
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
4001
+ });
4002
+ }
2934
4003
  // Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
2935
4004
  stopHeartbeat?.();
2936
4005
  this.activeAgentHandles.delete(agentName);
@@ -2943,6 +4012,7 @@ export class WorkflowRunner {
2943
4012
  }
2944
4013
  this.unregisterWorker(agentName);
2945
4014
  this.supervisedRuntimeAgents.delete(agentName);
4015
+ this.runtimeStepAgents.delete(agentName);
2946
4016
  }
2947
4017
  let output;
2948
4018
  if (ptyChunks.length > 0) {
@@ -2959,6 +4029,13 @@ export class WorkflowRunner {
2959
4029
  ? 'Agent completed (idle — treated as done)'
2960
4030
  : `Agent exited (${exitResult})`;
2961
4031
  }
4032
+ if (ptyChunks.length === 0) {
4033
+ this.captureStepTerminalEvidence(evidenceStepName, { stdout: output, combined: output }, { exitCode: agent?.exitCode, exitSignal: agent?.exitSignal }, {
4034
+ sender: options.logicalName ?? agentDef.name,
4035
+ actor: agent?.name ?? agentName,
4036
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
4037
+ });
4038
+ }
2962
4039
  return {
2963
4040
  output,
2964
4041
  exitCode: agent?.exitCode,
@@ -2986,13 +4063,35 @@ export class WorkflowRunner {
2986
4063
  'orchestrator',
2987
4064
  'auctioneer',
2988
4065
  ]);
4066
+ isLeadLikeAgent(agentDef, roleOverride) {
4067
+ if (agentDef.preset === 'lead')
4068
+ return true;
4069
+ const role = (roleOverride ?? agentDef.role ?? '').toLowerCase();
4070
+ const nameLC = agentDef.name.toLowerCase();
4071
+ return [...WorkflowRunner.HUB_ROLES].some((hubRole) => new RegExp(`\\b${hubRole}\\b`, 'i').test(nameLC) ||
4072
+ new RegExp(`\\b${hubRole}\\b`, 'i').test(role));
4073
+ }
4074
+ shouldPreserveIdleSupervisor(agentDef, step, evidenceRole) {
4075
+ if (evidenceRole && /\bowner\b/i.test(evidenceRole)) {
4076
+ return true;
4077
+ }
4078
+ if (!this.isLeadLikeAgent(agentDef, evidenceRole)) {
4079
+ return false;
4080
+ }
4081
+ const task = step.task ?? '';
4082
+ return /\b(wait|waiting|monitor|supervis|check inbox|check.*channel|poll|DONE|_DONE|signal|handoff)\b/i.test(task);
4083
+ }
2989
4084
  /**
2990
4085
  * Wait for agent exit with idle detection and nudging.
2991
4086
  * If no idle nudge config is set, falls through to simple waitForExit.
2992
4087
  */
2993
- async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs) {
4088
+ async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, preserveIdleSupervisor = false) {
2994
4089
  const nudgeConfig = this.currentConfig?.swarm.idleNudge;
2995
4090
  if (!nudgeConfig) {
4091
+ if (preserveIdleSupervisor) {
4092
+ this.log(`[${step.name}] Supervising agent "${agent.name}" may idle while waiting — using exit-only completion`);
4093
+ return agent.waitForExit(timeoutMs);
4094
+ }
2996
4095
  // Idle = done: race exit against idle. Whichever fires first completes the step.
2997
4096
  const result = await Promise.race([
2998
4097
  agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit', result: r })),
@@ -3011,6 +4110,7 @@ export class WorkflowRunner {
3011
4110
  const escalateAfterMs = nudgeConfig.escalateAfterMs ?? 120_000;
3012
4111
  const maxNudges = nudgeConfig.maxNudges ?? 1;
3013
4112
  let nudgeCount = 0;
4113
+ let preservedSupervisorNoticeSent = false;
3014
4114
  const startTime = Date.now();
3015
4115
  while (true) {
3016
4116
  // Calculate remaining time from overall timeout
@@ -3045,6 +4145,14 @@ export class WorkflowRunner {
3045
4145
  this.emit({ type: 'step:nudged', runId: this.currentRunId ?? '', stepName: step.name, nudgeCount });
3046
4146
  continue;
3047
4147
  }
4148
+ if (preserveIdleSupervisor) {
4149
+ if (!preservedSupervisorNoticeSent) {
4150
+ this.log(`[${step.name}] Supervising agent "${agent.name}" stayed idle after ${nudgeCount} nudge(s) — preserving until exit or timeout`);
4151
+ this.postToChannel(`**[${step.name}]** Supervising agent \`${agent.name}\` is waiting on handoff — keeping it alive until it exits or the step times out`);
4152
+ preservedSupervisorNoticeSent = true;
4153
+ }
4154
+ continue;
4155
+ }
3048
4156
  // Exhausted nudges — force-release
3049
4157
  this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`);
3050
4158
  this.emit({ type: 'step:force-released', runId: this.currentRunId ?? '', stepName: step.name });
@@ -3114,7 +4222,31 @@ export class WorkflowRunner {
3114
4222
  return undefined;
3115
4223
  }
3116
4224
  // ── Verification ────────────────────────────────────────────────────────
3117
- runVerification(check, output, stepName, injectedTaskText) {
4225
+ runVerification(check, output, stepName, injectedTaskText, options) {
4226
+ const fail = (message) => {
4227
+ const observedAt = new Date().toISOString();
4228
+ this.recordStepToolSideEffect(stepName, {
4229
+ type: 'verification_observed',
4230
+ detail: message,
4231
+ observedAt,
4232
+ raw: { passed: false, type: check.type, value: check.value },
4233
+ });
4234
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
4235
+ kind: 'verification_failed',
4236
+ source: 'verification',
4237
+ text: message,
4238
+ observedAt,
4239
+ value: check.value,
4240
+ });
4241
+ if (options?.allowFailure) {
4242
+ return {
4243
+ passed: false,
4244
+ completionReason: 'failed_verification',
4245
+ error: message,
4246
+ };
4247
+ }
4248
+ throw new WorkflowCompletionError(message, 'failed_verification');
4249
+ };
3118
4250
  switch (check.type) {
3119
4251
  case 'output_contains': {
3120
4252
  // Guard against false positives: the PTY captures the injected task text
@@ -3128,12 +4260,12 @@ export class WorkflowRunner {
3128
4260
  const first = output.indexOf(token);
3129
4261
  const hasSecond = first !== -1 && output.includes(token, first + token.length);
3130
4262
  if (!hasSecond) {
3131
- throw new Error(`Verification failed for "${stepName}": output does not contain "${token}" ` +
4263
+ return fail(`Verification failed for "${stepName}": output does not contain "${token}" ` +
3132
4264
  `(token found only in task injection — agent must output it explicitly)`);
3133
4265
  }
3134
4266
  }
3135
4267
  else if (!output.includes(token)) {
3136
- throw new Error(`Verification failed for "${stepName}": output does not contain "${token}"`);
4268
+ return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
3137
4269
  }
3138
4270
  break;
3139
4271
  }
@@ -3142,13 +4274,37 @@ export class WorkflowRunner {
3142
4274
  break;
3143
4275
  case 'file_exists':
3144
4276
  if (!existsSync(path.resolve(this.cwd, check.value))) {
3145
- throw new Error(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
4277
+ return fail(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
3146
4278
  }
3147
4279
  break;
3148
4280
  case 'custom':
3149
4281
  // Custom verifications are evaluated by callers; no-op here
3150
- break;
3151
- }
4282
+ return { passed: false };
4283
+ }
4284
+ if (options?.completionMarkerFound === false) {
4285
+ this.log(`[${stepName}] Verification passed without legacy STEP_COMPLETE marker; allowing completion`);
4286
+ }
4287
+ const successMessage = options?.completionMarkerFound === false
4288
+ ? `Verification passed without legacy STEP_COMPLETE marker`
4289
+ : `Verification passed`;
4290
+ const observedAt = new Date().toISOString();
4291
+ this.recordStepToolSideEffect(stepName, {
4292
+ type: 'verification_observed',
4293
+ detail: successMessage,
4294
+ observedAt,
4295
+ raw: { passed: true, type: check.type, value: check.value },
4296
+ });
4297
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
4298
+ kind: 'verification_passed',
4299
+ source: 'verification',
4300
+ text: successMessage,
4301
+ observedAt,
4302
+ value: check.value,
4303
+ });
4304
+ return {
4305
+ passed: true,
4306
+ completionReason: 'completed_verified',
4307
+ };
3152
4308
  }
3153
4309
  // ── State helpers ─────────────────────────────────────────────────────
3154
4310
  async updateRunStatus(runId, status, error) {
@@ -3164,13 +4320,16 @@ export class WorkflowRunner {
3164
4320
  }
3165
4321
  await this.db.updateRun(runId, patch);
3166
4322
  }
3167
- async markStepFailed(state, error, runId, exitInfo) {
4323
+ async markStepFailed(state, error, runId, exitInfo, completionReason) {
4324
+ this.captureStepTerminalEvidence(state.row.stepName, {}, exitInfo);
3168
4325
  state.row.status = 'failed';
3169
4326
  state.row.error = error;
4327
+ state.row.completionReason = completionReason;
3170
4328
  state.row.completedAt = new Date().toISOString();
3171
4329
  await this.db.updateStep(state.row.id, {
3172
4330
  status: 'failed',
3173
4331
  error,
4332
+ completionReason,
3174
4333
  completedAt: state.row.completedAt,
3175
4334
  updatedAt: new Date().toISOString(),
3176
4335
  });
@@ -3182,6 +4341,7 @@ export class WorkflowRunner {
3182
4341
  exitCode: exitInfo?.exitCode,
3183
4342
  exitSignal: exitInfo?.exitSignal,
3184
4343
  });
4344
+ this.finalizeStepEvidence(state.row.stepName, 'failed', state.row.completedAt, completionReason);
3185
4345
  }
3186
4346
  async markDownstreamSkipped(failedStepName, allSteps, stepStates, runId) {
3187
4347
  const queue = [failedStepName];
@@ -3275,7 +4435,7 @@ export class WorkflowRunner {
3275
4435
  'RELAY SETUP — do this FIRST before any other relay tool:\n' +
3276
4436
  `1. Call: register(name="${agentName}")\n` +
3277
4437
  ' This authenticates you in the Relaycast workspace.\n' +
3278
- ' ALL relay tools (relay_send, relay_inbox, post_message, etc.) require\n' +
4438
+ ' ALL relay tools (mcp__relaycast__dm_send, mcp__relaycast__inbox_check, mcp__relaycast__message_post, etc.) require\n' +
3279
4439
  ' registration first — they will fail with "Not registered" otherwise.\n' +
3280
4440
  `2. Your agent name is "${agentName}" — use this exact name when registering.`);
3281
4441
  }
@@ -3297,10 +4457,10 @@ export class WorkflowRunner {
3297
4457
  'If it involves multiple independent subtasks, touches many files, or could take a long time, ' +
3298
4458
  'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
3299
4459
  'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
3300
- ' - relay_spawn(name="helper-1", cli="claude", task="Specific subtask description")\n' +
3301
- ' - Coordinate via relay_send(to="helper-1", message="...")\n' +
3302
- ' - Check on them with relay_inbox()\n' +
3303
- ' - Clean up when done: relay_release(name="helper-1")\n\n' +
4460
+ ' - mcp__relaycast__agent_add(name="helper-1", cli="claude", task="Specific subtask description")\n' +
4461
+ ' - Coordinate via mcp__relaycast__dm_send(to="helper-1", text="...")\n' +
4462
+ ' - Check on them with mcp__relaycast__inbox_check()\n' +
4463
+ ' - Clean up when done: mcp__relaycast__agent_remove(name="helper-1")\n\n' +
3304
4464
  subAgentOption +
3305
4465
  'Guidelines:\n' +
3306
4466
  '- You are the lead — delegate but stay in control, track progress, integrate results\n' +
@@ -3311,9 +4471,23 @@ export class WorkflowRunner {
3311
4471
  ' "RELAY SETUP: First call register(name=\'<exact-agent-name>\') before any other relay tool."');
3312
4472
  }
3313
4473
  /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
3314
- postToChannel(text) {
4474
+ postToChannel(text, options = {}) {
3315
4475
  if (!this.relayApiKey || !this.channel)
3316
4476
  return;
4477
+ this.recordChannelEvidence(text, options);
4478
+ const stepName = options.stepName ?? this.inferStepNameFromChannelText(text);
4479
+ if (stepName) {
4480
+ this.recordStepToolSideEffect(stepName, {
4481
+ type: 'post_channel_message',
4482
+ detail: text.slice(0, 240),
4483
+ raw: {
4484
+ actor: options.actor,
4485
+ role: options.role,
4486
+ target: options.target ?? this.channel,
4487
+ origin: options.origin ?? 'runner_post',
4488
+ },
4489
+ });
4490
+ }
3317
4491
  this.ensureRelaycastRunnerAgent()
3318
4492
  .then((agent) => agent.send(this.channel, text))
3319
4493
  .catch(() => {
@@ -3471,6 +4645,9 @@ export class WorkflowRunner {
3471
4645
  output: state.row.output,
3472
4646
  error: state.row.error,
3473
4647
  verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
4648
+ completionMode: state.row.completionReason
4649
+ ? this.buildStepCompletionDecision(name, state.row.completionReason)?.mode
4650
+ : undefined,
3474
4651
  });
3475
4652
  }
3476
4653
  return outcomes;
@@ -3603,24 +4780,30 @@ export class WorkflowRunner {
3603
4780
  /** Persist step output to disk and post full output as a channel message. */
3604
4781
  async persistStepOutput(runId, stepName, output) {
3605
4782
  // 1. Write to disk
4783
+ const outputPath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
3606
4784
  try {
3607
4785
  const dir = this.getStepOutputDir(runId);
3608
4786
  mkdirSync(dir, { recursive: true });
3609
4787
  const cleaned = WorkflowRunner.stripAnsi(output);
3610
- await writeFile(path.join(dir, `${stepName}.md`), cleaned);
4788
+ await writeFile(outputPath, cleaned);
3611
4789
  }
3612
4790
  catch {
3613
4791
  // Non-critical
3614
4792
  }
4793
+ this.recordStepToolSideEffect(stepName, {
4794
+ type: 'persist_step_output',
4795
+ detail: `Persisted step output to ${this.normalizeEvidencePath(outputPath)}`,
4796
+ raw: { path: outputPath },
4797
+ });
3615
4798
  // 2. Post scrubbed output as a single channel message (most recent tail only)
3616
4799
  const scrubbed = WorkflowRunner.scrubForChannel(output);
3617
4800
  if (scrubbed.length === 0) {
3618
- this.postToChannel(`**[${stepName}]** Step completed — output written to disk`);
4801
+ this.postToChannel(`**[${stepName}]** Step completed — output written to disk`, { stepName });
3619
4802
  return;
3620
4803
  }
3621
4804
  const maxMsg = 2000;
3622
4805
  const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
3623
- this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``);
4806
+ this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``, { stepName });
3624
4807
  }
3625
4808
  /** Load persisted step output from disk. */
3626
4809
  loadStepOutput(runId, stepName) {