agent-relay 3.1.23 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +2 -0
  2. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  3. package/bin/agent-relay-broker-darwin-x64 +0 -0
  4. package/bin/agent-relay-broker-linux-arm64 +0 -0
  5. package/bin/agent-relay-broker-linux-x64 +0 -0
  6. package/dist/index.cjs +4053 -16716
  7. package/dist/src/cli/commands/setup.js +1 -1
  8. package/dist/src/cli/commands/setup.js.map +1 -1
  9. package/dist/src/cli/lib/broker-lifecycle.d.ts.map +1 -1
  10. package/dist/src/cli/lib/broker-lifecycle.js +11 -0
  11. package/dist/src/cli/lib/broker-lifecycle.js.map +1 -1
  12. package/dist/src/cli/lib/relaycast-mcp-command.d.ts +5 -0
  13. package/dist/src/cli/lib/relaycast-mcp-command.d.ts.map +1 -0
  14. package/dist/src/cli/lib/relaycast-mcp-command.js +13 -0
  15. package/dist/src/cli/lib/relaycast-mcp-command.js.map +1 -0
  16. package/dist/src/cli/relaycast-mcp.d.ts +39 -0
  17. package/dist/src/cli/relaycast-mcp.d.ts.map +1 -0
  18. package/dist/src/cli/relaycast-mcp.js +432 -0
  19. package/dist/src/cli/relaycast-mcp.js.map +1 -0
  20. package/package.json +9 -8
  21. package/packages/acp-bridge/package.json +2 -2
  22. package/packages/config/package.json +1 -1
  23. package/packages/hooks/package.json +4 -4
  24. package/packages/memory/package.json +2 -2
  25. package/packages/openclaw/README.md +7 -7
  26. package/packages/openclaw/dist/identity/files.js +5 -5
  27. package/packages/openclaw/dist/identity/files.js.map +1 -1
  28. package/packages/openclaw/dist/setup.js +4 -4
  29. package/packages/openclaw/package.json +2 -2
  30. package/packages/openclaw/skill/SKILL.md +24 -24
  31. package/packages/openclaw/src/identity/files.ts +5 -5
  32. package/packages/openclaw/src/setup.ts +4 -4
  33. package/packages/openclaw/templates/SOUL.md.template +5 -5
  34. package/packages/policy/package.json +2 -2
  35. package/packages/sdk/dist/__tests__/completion-pipeline.test.d.ts +14 -0
  36. package/packages/sdk/dist/__tests__/completion-pipeline.test.d.ts.map +1 -0
  37. package/packages/sdk/dist/__tests__/completion-pipeline.test.js +1476 -0
  38. package/packages/sdk/dist/__tests__/completion-pipeline.test.js.map +1 -0
  39. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js +2 -2
  40. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js.map +1 -1
  41. package/packages/sdk/dist/__tests__/unit.test.js +8 -0
  42. package/packages/sdk/dist/__tests__/unit.test.js.map +1 -1
  43. package/packages/sdk/dist/client.js +2 -2
  44. package/packages/sdk/dist/client.js.map +1 -1
  45. package/packages/sdk/dist/examples/example.js +1 -1
  46. package/packages/sdk/dist/examples/example.js.map +1 -1
  47. package/packages/sdk/dist/examples/ralph-loop.js +6 -6
  48. package/packages/sdk/dist/examples/ralph-loop.js.map +1 -1
  49. package/packages/sdk/dist/relay-adapter.js +4 -4
  50. package/packages/sdk/dist/relay-adapter.js.map +1 -1
  51. package/packages/sdk/dist/relay.d.ts +1 -0
  52. package/packages/sdk/dist/relay.d.ts.map +1 -1
  53. package/packages/sdk/dist/relay.js +2 -0
  54. package/packages/sdk/dist/relay.js.map +1 -1
  55. package/packages/sdk/dist/workflows/runner.d.ts +53 -2
  56. package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
  57. package/packages/sdk/dist/workflows/runner.js +1277 -94
  58. package/packages/sdk/dist/workflows/runner.js.map +1 -1
  59. package/packages/sdk/dist/workflows/trajectory.d.ts +6 -2
  60. package/packages/sdk/dist/workflows/trajectory.d.ts.map +1 -1
  61. package/packages/sdk/dist/workflows/trajectory.js +37 -2
  62. package/packages/sdk/dist/workflows/trajectory.js.map +1 -1
  63. package/packages/sdk/dist/workflows/types.d.ts +88 -0
  64. package/packages/sdk/dist/workflows/types.d.ts.map +1 -1
  65. package/packages/sdk/dist/workflows/types.js.map +1 -1
  66. package/packages/sdk/dist/workflows/validator.js +4 -4
  67. package/packages/sdk/dist/workflows/validator.js.map +1 -1
  68. package/packages/sdk/package.json +2 -2
  69. package/packages/sdk/src/__tests__/completion-pipeline.test.ts +1820 -0
  70. package/packages/sdk/src/__tests__/e2e-owner-review.test.ts +2 -2
  71. package/packages/sdk/src/__tests__/idle-nudge.test.ts +68 -0
  72. package/packages/sdk/src/__tests__/unit.test.ts +10 -0
  73. package/packages/sdk/src/__tests__/workflow-runner.test.ts +113 -4
  74. package/packages/sdk/src/client.ts +2 -2
  75. package/packages/sdk/src/examples/example.ts +1 -1
  76. package/packages/sdk/src/examples/ralph-loop.ts +6 -6
  77. package/packages/sdk/src/relay-adapter.ts +4 -4
  78. package/packages/sdk/src/relay.ts +2 -0
  79. package/packages/sdk/src/workflows/README.md +43 -11
  80. package/packages/sdk/src/workflows/runner.ts +1759 -102
  81. package/packages/sdk/src/workflows/schema.json +6 -0
  82. package/packages/sdk/src/workflows/trajectory.ts +52 -3
  83. package/packages/sdk/src/workflows/types.ts +149 -0
  84. package/packages/sdk/src/workflows/validator.ts +4 -4
  85. package/packages/sdk-py/pyproject.toml +1 -1
  86. package/packages/sdk-py/src/agent_relay/models.py +11 -0
  87. package/packages/sdk-py/src/agent_relay/relay.py +9 -6
  88. package/packages/sdk-py/tests/test_relay_lifecycle_hooks.py +23 -0
  89. package/packages/telemetry/package.json +1 -1
  90. package/packages/trajectory/package.json +2 -2
  91. package/packages/user-directory/package.json +2 -2
  92. package/packages/utils/package.json +2 -2
  93. package/relay-snippets/agent-relay-protocol.md +4 -4
  94. package/relay-snippets/agent-relay-snippet.md +31 -43
@@ -6,14 +6,24 @@
6
6
 
7
7
  import { spawn as cpSpawn, execFileSync } from 'node:child_process';
8
8
  import { randomBytes } from 'node:crypto';
9
- import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
10
- import type { WriteStream } from 'node:fs';
9
+ import {
10
+ createWriteStream,
11
+ existsSync,
12
+ mkdirSync,
13
+ readFileSync,
14
+ readdirSync,
15
+ renameSync,
16
+ statSync,
17
+ writeFileSync,
18
+ } from 'node:fs';
19
+ import type { Dirent, WriteStream } from 'node:fs';
11
20
  import { readFile, writeFile, mkdir } from 'node:fs/promises';
12
21
  import path from 'node:path';
13
22
 
14
23
  import { parse as parseYaml } from 'yaml';
15
24
  import { stripAnsi as stripAnsiFn } from '../pty.js';
16
25
  import type { BrokerEvent } from '../protocol.js';
26
+ import { resolveSpawnPolicy } from '../spawn-from-env.js';
17
27
 
18
28
  import {
19
29
  loadCustomSteps,
@@ -27,6 +37,12 @@ import type {
27
37
  AgentCli,
28
38
  AgentDefinition,
29
39
  AgentPreset,
40
+ CompletionEvidenceChannelOrigin,
41
+ CompletionEvidenceChannelPost,
42
+ CompletionEvidenceFileChange,
43
+ CompletionEvidenceSignal,
44
+ CompletionEvidenceSignalKind,
45
+ CompletionEvidenceToolSideEffect,
30
46
  DryRunReport,
31
47
  DryRunWave,
32
48
  ErrorHandlingConfig,
@@ -34,12 +50,16 @@ import type {
34
50
  PathDefinition,
35
51
  PreflightCheck,
36
52
  RelayYamlConfig,
53
+ StepCompletionDecision,
54
+ StepCompletionEvidence,
37
55
  SwarmPattern,
38
56
  VerificationCheck,
39
57
  WorkflowDefinition,
58
+ WorkflowOwnerDecision,
40
59
  WorkflowRunRow,
41
60
  WorkflowRunStatus,
42
61
  WorkflowStep,
62
+ WorkflowStepCompletionReason,
43
63
  WorkflowStepRow,
44
64
  WorkflowStepStatus,
45
65
  } from './types.js';
@@ -84,6 +104,33 @@ class SpawnExitError extends Error {
84
104
  }
85
105
  }
86
106
 
107
+ class WorkflowCompletionError extends Error {
108
+ completionReason?: WorkflowStepCompletionReason;
109
+
110
+ constructor(message: string, completionReason?: WorkflowStepCompletionReason) {
111
+ super(message);
112
+ this.name = 'WorkflowCompletionError';
113
+ this.completionReason = completionReason;
114
+ }
115
+ }
116
+
117
+ interface VerificationResult {
118
+ passed: boolean;
119
+ completionReason?: WorkflowStepCompletionReason;
120
+ error?: string;
121
+ }
122
+
123
+ interface VerificationOptions {
124
+ allowFailure?: boolean;
125
+ completionMarkerFound?: boolean;
126
+ }
127
+
128
+ interface CompletionDecisionResult {
129
+ completionReason: WorkflowStepCompletionReason;
130
+ ownerDecision?: WorkflowOwnerDecision;
131
+ reason?: string;
132
+ }
133
+
87
134
  // ── Events ──────────────────────────────────────────────────────────────────
88
135
 
89
136
  export type WorkflowEvent =
@@ -177,6 +224,10 @@ interface SpawnedAgentInfo {
177
224
 
178
225
  interface SpawnAndWaitOptions {
179
226
  agentNameSuffix?: string;
227
+ evidenceStepName?: string;
228
+ evidenceRole?: string;
229
+ logicalName?: string;
230
+ preserveOnIdle?: boolean;
180
231
  onSpawned?: (info: SpawnedAgentInfo) => void | Promise<void>;
181
232
  onChunk?: (info: { agentName: string; chunk: string }) => void;
182
233
  }
@@ -187,6 +238,37 @@ interface SupervisedRuntimeAgent {
187
238
  logicalName: string;
188
239
  }
189
240
 
241
+ interface RuntimeStepAgent {
242
+ stepName: string;
243
+ role: string;
244
+ logicalName: string;
245
+ }
246
+
247
+ interface FileSnapshotEntry {
248
+ mtimeMs: number;
249
+ size: number;
250
+ }
251
+
252
+ interface StepEvidenceRecord {
253
+ evidence: StepCompletionEvidence;
254
+ baselineSnapshots: Map<string, Map<string, FileSnapshotEntry>>;
255
+ filesCaptured: boolean;
256
+ }
257
+
258
+ interface StepSignalParticipants {
259
+ ownerSenders: Set<string>;
260
+ workerSenders: Set<string>;
261
+ }
262
+
263
+ interface ChannelEvidenceOptions {
264
+ stepName?: string;
265
+ sender?: string;
266
+ actor?: string;
267
+ role?: string;
268
+ target?: string;
269
+ origin?: CompletionEvidenceChannelOrigin;
270
+ }
271
+
190
272
  // ── CLI resolution ───────────────────────────────────────────────────────────
191
273
 
192
274
  /**
@@ -267,6 +349,12 @@ export class WorkflowRunner {
267
349
  private readonly lastActivity = new Map<string, string>();
268
350
  /** Runtime-name lookup for agents participating in supervised owner flows. */
269
351
  private readonly supervisedRuntimeAgents = new Map<string, SupervisedRuntimeAgent>();
352
+ /** Runtime-name lookup for active step agents so channel messages can be attributed to a step. */
353
+ private readonly runtimeStepAgents = new Map<string, RuntimeStepAgent>();
354
+ /** Per-step completion evidence collected across output, channel, files, and tool side-effects. */
355
+ private readonly stepCompletionEvidence = new Map<string, StepEvidenceRecord>();
356
+ /** Expected owner/worker identities per step so coordination signals can be validated by sender. */
357
+ private readonly stepSignalParticipants = new Map<string, StepSignalParticipants>();
270
358
  /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
271
359
  private resolvedPaths = new Map<string, string>();
272
360
 
@@ -363,6 +451,531 @@ export class WorkflowRunner {
363
451
  return resolved;
364
452
  }
365
453
 
454
+ private static readonly EVIDENCE_IGNORED_DIRS = new Set([
455
+ '.git',
456
+ '.agent-relay',
457
+ '.trajectories',
458
+ 'node_modules',
459
+ ]);
460
+
461
+ public getStepCompletionEvidence(stepName: string): StepCompletionEvidence | undefined {
462
+ const record = this.stepCompletionEvidence.get(stepName);
463
+ if (!record) return undefined;
464
+
465
+ const evidence = structuredClone(record.evidence);
466
+ return this.filterStepEvidenceBySignalProvenance(stepName, evidence);
467
+ }
468
+
469
+ private getOrCreateStepEvidenceRecord(stepName: string): StepEvidenceRecord {
470
+ const existing = this.stepCompletionEvidence.get(stepName);
471
+ if (existing) return existing;
472
+
473
+ const now = new Date().toISOString();
474
+ const record: StepEvidenceRecord = {
475
+ evidence: {
476
+ stepName,
477
+ lastUpdatedAt: now,
478
+ roots: [],
479
+ output: {
480
+ stdout: '',
481
+ stderr: '',
482
+ combined: '',
483
+ },
484
+ channelPosts: [],
485
+ files: [],
486
+ process: {},
487
+ toolSideEffects: [],
488
+ coordinationSignals: [],
489
+ },
490
+ baselineSnapshots: new Map(),
491
+ filesCaptured: false,
492
+ };
493
+ this.stepCompletionEvidence.set(stepName, record);
494
+ return record;
495
+ }
496
+
497
+ private initializeStepSignalParticipants(
498
+ stepName: string,
499
+ ownerSender?: string,
500
+ workerSender?: string
501
+ ): void {
502
+ this.stepSignalParticipants.set(stepName, {
503
+ ownerSenders: new Set(),
504
+ workerSenders: new Set(),
505
+ });
506
+ this.rememberStepSignalSender(stepName, 'owner', ownerSender);
507
+ this.rememberStepSignalSender(stepName, 'worker', workerSender);
508
+ }
509
+
510
+ private rememberStepSignalSender(
511
+ stepName: string,
512
+ participant: 'owner' | 'worker',
513
+ ...senders: Array<string | undefined>
514
+ ): void {
515
+ const participants =
516
+ this.stepSignalParticipants.get(stepName) ??
517
+ {
518
+ ownerSenders: new Set<string>(),
519
+ workerSenders: new Set<string>(),
520
+ };
521
+ this.stepSignalParticipants.set(stepName, participants);
522
+
523
+ const target =
524
+ participant === 'owner' ? participants.ownerSenders : participants.workerSenders;
525
+ for (const sender of senders) {
526
+ const trimmed = sender?.trim();
527
+ if (trimmed) target.add(trimmed);
528
+ }
529
+ }
530
+
531
+ private resolveSignalParticipantKind(role?: string): 'owner' | 'worker' | undefined {
532
+ const roleLC = role?.toLowerCase().trim();
533
+ if (!roleLC) return undefined;
534
+ if (/\b(owner|lead|supervisor)\b/.test(roleLC)) return 'owner';
535
+ if (/\b(worker|specialist|engineer|implementer)\b/.test(roleLC)) return 'worker';
536
+ return undefined;
537
+ }
538
+
539
+ private isSignalFromExpectedSender(stepName: string, signal: CompletionEvidenceSignal): boolean {
540
+ const expectedParticipant =
541
+ signal.kind === 'worker_done'
542
+ ? 'worker'
543
+ : signal.kind === 'lead_done'
544
+ ? 'owner'
545
+ : undefined;
546
+ if (!expectedParticipant) return true;
547
+
548
+ const participants = this.stepSignalParticipants.get(stepName);
549
+ if (!participants) return true;
550
+
551
+ const allowedSenders =
552
+ expectedParticipant === 'owner' ? participants.ownerSenders : participants.workerSenders;
553
+ if (allowedSenders.size === 0) return true;
554
+
555
+ const sender = signal.sender ?? signal.actor;
556
+ if (sender) {
557
+ return allowedSenders.has(sender);
558
+ }
559
+
560
+ const observedParticipant = this.resolveSignalParticipantKind(signal.role);
561
+ if (observedParticipant) {
562
+ return observedParticipant === expectedParticipant;
563
+ }
564
+
565
+ return signal.source !== 'channel';
566
+ }
567
+
568
+ private filterStepEvidenceBySignalProvenance(
569
+ stepName: string,
570
+ evidence: StepCompletionEvidence
571
+ ): StepCompletionEvidence {
572
+ evidence.channelPosts = evidence.channelPosts.map((post) => {
573
+ const signals = post.signals.filter((signal) =>
574
+ this.isSignalFromExpectedSender(stepName, signal)
575
+ );
576
+ return {
577
+ ...post,
578
+ completionRelevant: signals.length > 0,
579
+ signals,
580
+ };
581
+ });
582
+ evidence.coordinationSignals = evidence.coordinationSignals.filter((signal) =>
583
+ this.isSignalFromExpectedSender(stepName, signal)
584
+ );
585
+ return evidence;
586
+ }
587
+
588
+ private beginStepEvidence(stepName: string, roots: Array<string | undefined>, startedAt?: string): void {
589
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
590
+ const evidence = record.evidence;
591
+ const now = startedAt ?? new Date().toISOString();
592
+
593
+ evidence.startedAt ??= now;
594
+ evidence.status = 'running';
595
+ evidence.lastUpdatedAt = now;
596
+
597
+ for (const root of this.uniqueEvidenceRoots(roots)) {
598
+ if (!evidence.roots.includes(root)) {
599
+ evidence.roots.push(root);
600
+ }
601
+ if (!record.baselineSnapshots.has(root)) {
602
+ record.baselineSnapshots.set(root, this.captureFileSnapshot(root));
603
+ }
604
+ }
605
+ }
606
+
607
+ private captureStepTerminalEvidence(
608
+ stepName: string,
609
+ output: { stdout?: string; stderr?: string; combined?: string },
610
+ process?: { exitCode?: number; exitSignal?: string },
611
+ meta?: { sender?: string; actor?: string; role?: string }
612
+ ): void {
613
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
614
+ const evidence = record.evidence;
615
+ const observedAt = new Date().toISOString();
616
+
617
+ const append = (current: string, next?: string): string => {
618
+ if (!next) return current;
619
+ return current ? `${current}\n${next}` : next;
620
+ };
621
+
622
+ if (output.stdout) {
623
+ evidence.output.stdout = append(evidence.output.stdout, output.stdout);
624
+ for (const signal of this.extractCompletionSignals(output.stdout, 'stdout', observedAt, meta)) {
625
+ evidence.coordinationSignals.push(signal);
626
+ }
627
+ }
628
+ if (output.stderr) {
629
+ evidence.output.stderr = append(evidence.output.stderr, output.stderr);
630
+ for (const signal of this.extractCompletionSignals(output.stderr, 'stderr', observedAt, meta)) {
631
+ evidence.coordinationSignals.push(signal);
632
+ }
633
+ }
634
+
635
+ const combinedOutput =
636
+ output.combined ??
637
+ [output.stdout, output.stderr].filter((value): value is string => Boolean(value)).join('\n');
638
+ if (combinedOutput) {
639
+ evidence.output.combined = append(evidence.output.combined, combinedOutput);
640
+ }
641
+
642
+ if (process) {
643
+ if (process.exitCode !== undefined) {
644
+ evidence.process.exitCode = process.exitCode;
645
+ evidence.coordinationSignals.push({
646
+ kind: 'process_exit',
647
+ source: 'process',
648
+ text: `Process exited with code ${process.exitCode}`,
649
+ observedAt,
650
+ value: String(process.exitCode),
651
+ });
652
+ }
653
+ if (process.exitSignal !== undefined) {
654
+ evidence.process.exitSignal = process.exitSignal;
655
+ }
656
+ }
657
+
658
+ evidence.lastUpdatedAt = observedAt;
659
+ }
660
+
661
+ private finalizeStepEvidence(
662
+ stepName: string,
663
+ status: WorkflowStepStatus,
664
+ completedAt?: string,
665
+ completionReason?: WorkflowStepCompletionReason
666
+ ): void {
667
+ const record = this.stepCompletionEvidence.get(stepName);
668
+ if (!record) return;
669
+
670
+ const evidence = record.evidence;
671
+ const observedAt = completedAt ?? new Date().toISOString();
672
+ evidence.status = status;
673
+ if (status !== 'running') {
674
+ evidence.completedAt = observedAt;
675
+ }
676
+ evidence.lastUpdatedAt = observedAt;
677
+
678
+ if (!record.filesCaptured) {
679
+ const existing = new Set(evidence.files.map((file) => `${file.kind}:${file.path}`));
680
+ for (const root of evidence.roots) {
681
+ const before = record.baselineSnapshots.get(root) ?? new Map<string, FileSnapshotEntry>();
682
+ const after = this.captureFileSnapshot(root);
683
+ for (const change of this.diffFileSnapshots(before, after, root, observedAt)) {
684
+ const key = `${change.kind}:${change.path}`;
685
+ if (existing.has(key)) continue;
686
+ existing.add(key);
687
+ evidence.files.push(change);
688
+ }
689
+ }
690
+ record.filesCaptured = true;
691
+ }
692
+
693
+ if (completionReason) {
694
+ const decision = this.buildStepCompletionDecision(stepName, completionReason);
695
+ if (decision) {
696
+ void this.trajectory?.stepCompletionDecision(stepName, decision);
697
+ }
698
+ }
699
+ }
700
+
701
+ private recordStepToolSideEffect(
702
+ stepName: string,
703
+ effect: Omit<CompletionEvidenceToolSideEffect, 'observedAt'> & { observedAt?: string }
704
+ ): void {
705
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
706
+ const observedAt = effect.observedAt ?? new Date().toISOString();
707
+ record.evidence.toolSideEffects.push({
708
+ ...effect,
709
+ observedAt,
710
+ });
711
+ record.evidence.lastUpdatedAt = observedAt;
712
+ }
713
+
714
+ private recordChannelEvidence(text: string, options: ChannelEvidenceOptions = {}): void {
715
+ const stepName =
716
+ options.stepName ??
717
+ this.inferStepNameFromChannelText(text) ??
718
+ (options.actor ? this.runtimeStepAgents.get(options.actor)?.stepName : undefined);
719
+ if (!stepName) return;
720
+
721
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
722
+ const postedAt = new Date().toISOString();
723
+ const sender = options.sender ?? options.actor;
724
+ const signals = this.extractCompletionSignals(text, 'channel', postedAt, {
725
+ sender,
726
+ actor: options.actor,
727
+ role: options.role,
728
+ });
729
+
730
+ const channelPost: CompletionEvidenceChannelPost = {
731
+ stepName,
732
+ text,
733
+ postedAt,
734
+ origin: options.origin ?? 'runner_post',
735
+ completionRelevant: signals.length > 0,
736
+ sender,
737
+ actor: options.actor,
738
+ role: options.role,
739
+ target: options.target,
740
+ signals,
741
+ };
742
+
743
+ record.evidence.channelPosts.push(channelPost);
744
+ record.evidence.coordinationSignals.push(...signals);
745
+ record.evidence.lastUpdatedAt = postedAt;
746
+ }
747
+
748
+ private extractCompletionSignals(
749
+ text: string,
750
+ source: CompletionEvidenceSignal['source'],
751
+ observedAt: string,
752
+ meta?: { sender?: string; actor?: string; role?: string }
753
+ ): CompletionEvidenceSignal[] {
754
+ const signals: CompletionEvidenceSignal[] = [];
755
+ const seen = new Set<string>();
756
+ const add = (
757
+ kind: CompletionEvidenceSignalKind,
758
+ signalText: string,
759
+ value?: string
760
+ ): void => {
761
+ const trimmed = signalText.trim().slice(0, 280);
762
+ if (!trimmed) return;
763
+ const key = `${kind}:${trimmed}:${value ?? ''}`;
764
+ if (seen.has(key)) return;
765
+ seen.add(key);
766
+ signals.push({
767
+ kind,
768
+ source,
769
+ text: trimmed,
770
+ observedAt,
771
+ sender: meta?.sender,
772
+ actor: meta?.actor,
773
+ role: meta?.role,
774
+ value,
775
+ });
776
+ };
777
+
778
+ for (const match of text.matchAll(/\bWORKER_DONE\b(?::\s*([^\n]+))?/gi)) {
779
+ add('worker_done', match[0], match[1]?.trim());
780
+ }
781
+ for (const match of text.matchAll(/\bLEAD_DONE\b(?::\s*([^\n]+))?/gi)) {
782
+ add('lead_done', match[0], match[1]?.trim());
783
+ }
784
+ for (const match of text.matchAll(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/g)) {
785
+ add('step_complete', match[0], match[1]);
786
+ }
787
+ for (const match of text.matchAll(
788
+ /\bOWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi
789
+ )) {
790
+ add('owner_decision', match[0], match[1].toUpperCase());
791
+ }
792
+ for (const match of text.matchAll(/\bREVIEW_DECISION:\s*(APPROVE|REJECT)\b/gi)) {
793
+ add('review_decision', match[0], match[1].toUpperCase());
794
+ }
795
+ if (/\bverification gate observed\b|\bverification passed\b/i.test(text)) {
796
+ add('verification_passed', this.firstMeaningfulLine(text) ?? text);
797
+ }
798
+ if (/\bverification failed\b/i.test(text)) {
799
+ add('verification_failed', this.firstMeaningfulLine(text) ?? text);
800
+ }
801
+ if (
802
+ /\b(summary|handoff|ready for review|ready for handoff|task complete|work complete|completed work|finished work)\b/i.test(
803
+ text
804
+ )
805
+ ) {
806
+ add('task_summary', this.firstMeaningfulLine(text) ?? text);
807
+ }
808
+
809
+ return signals;
810
+ }
811
+
812
+ private inferStepNameFromChannelText(text: string): string | undefined {
813
+ const bracketMatch = text.match(/^\*\*\[([^\]]+)\]/);
814
+ if (bracketMatch?.[1]) return bracketMatch[1];
815
+
816
+ const markerMatch = text.match(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/);
817
+ if (markerMatch?.[1]) return markerMatch[1];
818
+
819
+ return undefined;
820
+ }
821
+
822
+ private uniqueEvidenceRoots(roots: Array<string | undefined>): string[] {
823
+ return [...new Set(roots.filter((root): root is string => Boolean(root)).map((root) => path.resolve(root)))];
824
+ }
825
+
826
+ private captureFileSnapshot(root: string): Map<string, FileSnapshotEntry> {
827
+ const snapshot = new Map<string, FileSnapshotEntry>();
828
+ if (!existsSync(root)) return snapshot;
829
+
830
+ const visit = (currentPath: string): void => {
831
+ let entries: Dirent[];
832
+ try {
833
+ entries = readdirSync(currentPath, { withFileTypes: true });
834
+ } catch {
835
+ return;
836
+ }
837
+
838
+ for (const entry of entries) {
839
+ if (entry.isDirectory() && WorkflowRunner.EVIDENCE_IGNORED_DIRS.has(entry.name)) {
840
+ continue;
841
+ }
842
+
843
+ const fullPath = path.join(currentPath, entry.name);
844
+ if (entry.isDirectory()) {
845
+ visit(fullPath);
846
+ continue;
847
+ }
848
+
849
+ try {
850
+ const stats = statSync(fullPath);
851
+ if (!stats.isFile()) continue;
852
+ snapshot.set(fullPath, { mtimeMs: stats.mtimeMs, size: stats.size });
853
+ } catch {
854
+ // Best-effort evidence collection only.
855
+ }
856
+ }
857
+ };
858
+
859
+ try {
860
+ const stats = statSync(root);
861
+ if (stats.isFile()) {
862
+ snapshot.set(root, { mtimeMs: stats.mtimeMs, size: stats.size });
863
+ return snapshot;
864
+ }
865
+ } catch {
866
+ return snapshot;
867
+ }
868
+
869
+ visit(root);
870
+ return snapshot;
871
+ }
872
+
873
+ private diffFileSnapshots(
874
+ before: Map<string, FileSnapshotEntry>,
875
+ after: Map<string, FileSnapshotEntry>,
876
+ root: string,
877
+ observedAt: string
878
+ ): CompletionEvidenceFileChange[] {
879
+ const allPaths = new Set([...before.keys(), ...after.keys()]);
880
+ const changes: CompletionEvidenceFileChange[] = [];
881
+
882
+ for (const filePath of allPaths) {
883
+ const prior = before.get(filePath);
884
+ const next = after.get(filePath);
885
+
886
+ let kind: CompletionEvidenceFileChange['kind'] | undefined;
887
+ if (!prior && next) {
888
+ kind = 'created';
889
+ } else if (prior && !next) {
890
+ kind = 'deleted';
891
+ } else if (prior && next && (prior.mtimeMs !== next.mtimeMs || prior.size !== next.size)) {
892
+ kind = 'modified';
893
+ }
894
+
895
+ if (!kind) continue;
896
+
897
+ changes.push({
898
+ path: this.normalizeEvidencePath(filePath),
899
+ kind,
900
+ observedAt,
901
+ root,
902
+ });
903
+ }
904
+
905
+ return changes.sort((a, b) => a.path.localeCompare(b.path));
906
+ }
907
+
908
+ private normalizeEvidencePath(filePath: string): string {
909
+ const relative = path.relative(this.cwd, filePath);
910
+ if (!relative || relative === '') return path.basename(filePath);
911
+ return relative.startsWith('..') ? filePath : relative;
912
+ }
913
+
914
+ private buildStepCompletionDecision(
915
+ stepName: string,
916
+ completionReason: WorkflowStepCompletionReason
917
+ ): StepCompletionDecision | undefined {
918
+ let reason: string | undefined;
919
+ let mode: StepCompletionDecision['mode'];
920
+ switch (completionReason) {
921
+ case 'completed_verified':
922
+ mode = 'verification';
923
+ reason = 'Verification passed';
924
+ break;
925
+ case 'completed_by_evidence':
926
+ mode = 'evidence';
927
+ reason = 'Completion inferred from collected evidence';
928
+ break;
929
+ case 'completed_by_owner_decision': {
930
+ const evidence = this.getStepCompletionEvidence(stepName);
931
+ const markerObserved = evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete');
932
+ mode = markerObserved ? 'marker' : 'owner_decision';
933
+ reason = markerObserved ? 'Legacy STEP_COMPLETE marker observed' : 'Owner approved completion';
934
+ break;
935
+ }
936
+ default:
937
+ return undefined;
938
+ }
939
+
940
+ return {
941
+ mode,
942
+ reason,
943
+ evidence: this.buildTrajectoryCompletionEvidence(stepName),
944
+ };
945
+ }
946
+
947
+ private buildTrajectoryCompletionEvidence(
948
+ stepName: string
949
+ ): StepCompletionDecision['evidence'] | undefined {
950
+ const evidence = this.getStepCompletionEvidence(stepName);
951
+ if (!evidence) return undefined;
952
+
953
+ const signals = evidence.coordinationSignals
954
+ .slice(-6)
955
+ .map((signal) => signal.value ?? signal.text);
956
+ const channelPosts = evidence.channelPosts
957
+ .filter((post) => post.completionRelevant)
958
+ .slice(-3)
959
+ .map((post) => post.text.slice(0, 160));
960
+ const files = evidence.files.slice(0, 6).map((file) => `${file.kind}:${file.path}`);
961
+
962
+ const summaryParts: string[] = [];
963
+ if (signals.length > 0) summaryParts.push(`${signals.length} signal(s)`);
964
+ if (channelPosts.length > 0) summaryParts.push(`${channelPosts.length} relevant channel post(s)`);
965
+ if (files.length > 0) summaryParts.push(`${files.length} file change(s)`);
966
+ if (evidence.process.exitCode !== undefined) {
967
+ summaryParts.push(`exit=${evidence.process.exitCode}`);
968
+ }
969
+
970
+ return {
971
+ summary: summaryParts.length > 0 ? summaryParts.join(', ') : undefined,
972
+ signals: signals.length > 0 ? signals : undefined,
973
+ channelPosts: channelPosts.length > 0 ? channelPosts : undefined,
974
+ files: files.length > 0 ? files : undefined,
975
+ exitCode: evidence.process.exitCode,
976
+ };
977
+ }
978
+
366
979
  // ── Progress logging ────────────────────────────────────────────────────
367
980
 
368
981
  /** Log a progress message with elapsed time since run start. */
@@ -1296,9 +1909,11 @@ export class WorkflowRunner {
1296
1909
  if (state.row.status === 'failed') {
1297
1910
  state.row.status = 'pending';
1298
1911
  state.row.error = undefined;
1912
+ state.row.completionReason = undefined;
1299
1913
  await this.db.updateStep(state.row.id, {
1300
1914
  status: 'pending',
1301
1915
  error: undefined,
1916
+ completionReason: undefined,
1302
1917
  updatedAt: new Date().toISOString(),
1303
1918
  });
1304
1919
  }
@@ -1327,6 +1942,8 @@ export class WorkflowRunner {
1327
1942
  this.currentConfig = config;
1328
1943
  this.currentRunId = runId;
1329
1944
  this.runStartTime = Date.now();
1945
+ this.runtimeStepAgents.clear();
1946
+ this.stepCompletionEvidence.clear();
1330
1947
 
1331
1948
  this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
1332
1949
 
@@ -1468,8 +2085,25 @@ export class WorkflowRunner {
1468
2085
  const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
1469
2086
  this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
1470
2087
 
2088
+ if (this.channel && (msg.to === this.channel || msg.to === `#${this.channel}`)) {
2089
+ const runtimeAgent = this.runtimeStepAgents.get(msg.from);
2090
+ this.recordChannelEvidence(msg.text, {
2091
+ sender: runtimeAgent?.logicalName ?? msg.from,
2092
+ actor: msg.from,
2093
+ role: runtimeAgent?.role,
2094
+ target: msg.to,
2095
+ origin: 'relay_message',
2096
+ stepName: runtimeAgent?.stepName,
2097
+ });
2098
+ }
2099
+
1471
2100
  const supervision = this.supervisedRuntimeAgents.get(msg.from);
1472
2101
  if (supervision?.role === 'owner') {
2102
+ this.recordStepToolSideEffect(supervision.stepName, {
2103
+ type: 'owner_monitoring',
2104
+ detail: `Owner messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
2105
+ raw: { to: msg.to, text: msg.text },
2106
+ });
1473
2107
  void this.trajectory?.ownerMonitoringEvent(
1474
2108
  supervision.stepName,
1475
2109
  supervision.logicalName,
@@ -1651,6 +2285,7 @@ export class WorkflowRunner {
1651
2285
  updatedAt: new Date().toISOString(),
1652
2286
  });
1653
2287
  this.emit({ type: 'step:failed', runId, stepName, error: 'Cancelled' });
2288
+ this.finalizeStepEvidence(stepName, 'failed');
1654
2289
  }
1655
2290
  }
1656
2291
  this.emit({ type: 'run:cancelled', runId });
@@ -1690,6 +2325,7 @@ export class WorkflowRunner {
1690
2325
  this.lastIdleLog.clear();
1691
2326
  this.lastActivity.clear();
1692
2327
  this.supervisedRuntimeAgents.clear();
2328
+ this.runtimeStepAgents.clear();
1693
2329
 
1694
2330
  this.log('Shutting down broker...');
1695
2331
  await this.relay?.shutdown();
@@ -1824,6 +2460,9 @@ export class WorkflowRunner {
1824
2460
  attempts: (state?.row.retryCount ?? 0) + 1,
1825
2461
  output: state?.row.output,
1826
2462
  verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
2463
+ completionMode: state?.row.completionReason
2464
+ ? this.buildStepCompletionDecision(step.name, state.row.completionReason)?.mode
2465
+ : undefined,
1827
2466
  });
1828
2467
  }
1829
2468
  }
@@ -2029,13 +2668,24 @@ export class WorkflowRunner {
2029
2668
  const maxRetries = step.retries ?? errorHandling?.maxRetries ?? 0;
2030
2669
  const retryDelay = errorHandling?.retryDelayMs ?? 1000;
2031
2670
  let lastError: string | undefined;
2671
+ let lastCompletionReason: WorkflowStepCompletionReason | undefined;
2672
+ let lastExitCode: number | undefined;
2673
+ let lastExitSignal: string | undefined;
2032
2674
 
2033
2675
  for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
2034
2676
  this.checkAborted();
2035
2677
 
2678
+ lastExitCode = undefined;
2679
+ lastExitSignal = undefined;
2680
+
2036
2681
  if (attempt > 0) {
2037
2682
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
2038
2683
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
2684
+ this.recordStepToolSideEffect(step.name, {
2685
+ type: 'retry',
2686
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
2687
+ raw: { attempt, maxRetries },
2688
+ });
2039
2689
  state.row.retryCount = attempt;
2040
2690
  await this.db.updateStep(state.row.id, {
2041
2691
  retryCount: attempt,
@@ -2046,9 +2696,13 @@ export class WorkflowRunner {
2046
2696
 
2047
2697
  // Mark step as running
2048
2698
  state.row.status = 'running';
2699
+ state.row.error = undefined;
2700
+ state.row.completionReason = undefined;
2049
2701
  state.row.startedAt = new Date().toISOString();
2050
2702
  await this.db.updateStep(state.row.id, {
2051
2703
  status: 'running',
2704
+ error: undefined,
2705
+ completionReason: undefined,
2052
2706
  startedAt: state.row.startedAt,
2053
2707
  updatedAt: new Date().toISOString(),
2054
2708
  });
@@ -2068,11 +2722,13 @@ export class WorkflowRunner {
2068
2722
 
2069
2723
  // Resolve step workdir (named path reference) for deterministic steps
2070
2724
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2725
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
2071
2726
 
2072
2727
  try {
2073
2728
  // Delegate to executor if present
2074
2729
  if (this.executor?.executeDeterministicStep) {
2075
2730
  const result = await this.executor.executeDeterministicStep(step, resolvedCommand, stepCwd);
2731
+ lastExitCode = result.exitCode;
2076
2732
  const failOnError = step.failOnError !== false;
2077
2733
  if (failOnError && result.exitCode !== 0) {
2078
2734
  throw new Error(
@@ -2081,25 +2737,40 @@ export class WorkflowRunner {
2081
2737
  }
2082
2738
  const output =
2083
2739
  step.captureOutput !== false ? result.output : `Command completed (exit code ${result.exitCode})`;
2084
- if (step.verification) {
2085
- this.runVerification(step.verification, output, step.name);
2086
- }
2740
+ this.captureStepTerminalEvidence(
2741
+ step.name,
2742
+ { stdout: result.output, combined: result.output },
2743
+ { exitCode: result.exitCode }
2744
+ );
2745
+ const verificationResult = step.verification
2746
+ ? this.runVerification(step.verification, output, step.name)
2747
+ : undefined;
2087
2748
 
2088
2749
  // Mark completed
2089
2750
  state.row.status = 'completed';
2090
2751
  state.row.output = output;
2752
+ state.row.completionReason = verificationResult?.completionReason;
2091
2753
  state.row.completedAt = new Date().toISOString();
2092
2754
  await this.db.updateStep(state.row.id, {
2093
2755
  status: 'completed',
2094
2756
  output,
2757
+ completionReason: verificationResult?.completionReason,
2095
2758
  completedAt: state.row.completedAt,
2096
2759
  updatedAt: new Date().toISOString(),
2097
2760
  });
2098
2761
  await this.persistStepOutput(runId, step.name, output);
2099
2762
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2763
+ this.finalizeStepEvidence(
2764
+ step.name,
2765
+ 'completed',
2766
+ state.row.completedAt,
2767
+ verificationResult?.completionReason
2768
+ );
2100
2769
  return;
2101
2770
  }
2102
2771
 
2772
+ let commandStdout = '';
2773
+ let commandStderr = '';
2103
2774
  const output = await new Promise<string>((resolve, reject) => {
2104
2775
  const child = cpSpawn('sh', ['-c', resolvedCommand], {
2105
2776
  stdio: 'pipe',
@@ -2140,7 +2811,7 @@ export class WorkflowRunner {
2140
2811
  stderrChunks.push(chunk.toString());
2141
2812
  });
2142
2813
 
2143
- child.on('close', (code) => {
2814
+ child.on('close', (code, signal) => {
2144
2815
  if (timer) clearTimeout(timer);
2145
2816
  if (abortHandler && abortSignal) {
2146
2817
  abortSignal.removeEventListener('abort', abortHandler);
@@ -2160,6 +2831,10 @@ export class WorkflowRunner {
2160
2831
 
2161
2832
  const stdout = stdoutChunks.join('');
2162
2833
  const stderr = stderrChunks.join('');
2834
+ commandStdout = stdout;
2835
+ commandStderr = stderr;
2836
+ lastExitCode = code ?? undefined;
2837
+ lastExitSignal = signal ?? undefined;
2163
2838
 
2164
2839
  // Check exit code unless failOnError is explicitly false
2165
2840
  const failOnError = step.failOnError !== false;
@@ -2183,18 +2858,29 @@ export class WorkflowRunner {
2183
2858
  reject(new Error(`Failed to execute command: ${err.message}`));
2184
2859
  });
2185
2860
  });
2861
+ this.captureStepTerminalEvidence(
2862
+ step.name,
2863
+ {
2864
+ stdout: commandStdout || output,
2865
+ stderr: commandStderr,
2866
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
2867
+ },
2868
+ { exitCode: lastExitCode, exitSignal: lastExitSignal }
2869
+ );
2186
2870
 
2187
- if (step.verification) {
2188
- this.runVerification(step.verification, output, step.name);
2189
- }
2871
+ const verificationResult = step.verification
2872
+ ? this.runVerification(step.verification, output, step.name)
2873
+ : undefined;
2190
2874
 
2191
2875
  // Mark completed
2192
2876
  state.row.status = 'completed';
2193
2877
  state.row.output = output;
2878
+ state.row.completionReason = verificationResult?.completionReason;
2194
2879
  state.row.completedAt = new Date().toISOString();
2195
2880
  await this.db.updateStep(state.row.id, {
2196
2881
  status: 'completed',
2197
2882
  output,
2883
+ completionReason: verificationResult?.completionReason,
2198
2884
  completedAt: state.row.completedAt,
2199
2885
  updatedAt: new Date().toISOString(),
2200
2886
  });
@@ -2203,15 +2889,29 @@ export class WorkflowRunner {
2203
2889
  await this.persistStepOutput(runId, step.name, output);
2204
2890
 
2205
2891
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2892
+ this.finalizeStepEvidence(
2893
+ step.name,
2894
+ 'completed',
2895
+ state.row.completedAt,
2896
+ verificationResult?.completionReason
2897
+ );
2206
2898
  return;
2207
2899
  } catch (err) {
2208
2900
  lastError = err instanceof Error ? err.message : String(err);
2901
+ lastCompletionReason =
2902
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
2209
2903
  }
2210
2904
  }
2211
2905
 
2212
2906
  const errorMsg = lastError ?? 'Unknown error';
2213
2907
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
2214
- await this.markStepFailed(state, errorMsg, runId);
2908
+ await this.markStepFailed(
2909
+ state,
2910
+ errorMsg,
2911
+ runId,
2912
+ { exitCode: lastExitCode, exitSignal: lastExitSignal },
2913
+ lastCompletionReason
2914
+ );
2215
2915
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
2216
2916
  }
2217
2917
 
@@ -2227,14 +2927,20 @@ export class WorkflowRunner {
2227
2927
  ): Promise<void> {
2228
2928
  const state = stepStates.get(step.name);
2229
2929
  if (!state) throw new Error(`Step state not found: ${step.name}`);
2930
+ let lastExitCode: number | undefined;
2931
+ let lastExitSignal: string | undefined;
2230
2932
 
2231
2933
  this.checkAborted();
2232
2934
 
2233
2935
  // Mark step as running
2234
2936
  state.row.status = 'running';
2937
+ state.row.error = undefined;
2938
+ state.row.completionReason = undefined;
2235
2939
  state.row.startedAt = new Date().toISOString();
2236
2940
  await this.db.updateStep(state.row.id, {
2237
2941
  status: 'running',
2942
+ error: undefined,
2943
+ completionReason: undefined,
2238
2944
  startedAt: state.row.startedAt,
2239
2945
  updatedAt: new Date().toISOString(),
2240
2946
  });
@@ -2254,6 +2960,7 @@ export class WorkflowRunner {
2254
2960
 
2255
2961
  // Resolve workdir for worktree steps (same as deterministic/agent steps)
2256
2962
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2963
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
2257
2964
 
2258
2965
  if (!branch) {
2259
2966
  const errorMsg = 'Worktree step missing required "branch" field';
@@ -2298,6 +3005,10 @@ export class WorkflowRunner {
2298
3005
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
2299
3006
  }
2300
3007
 
3008
+ let commandStdout = '';
3009
+ let commandStderr = '';
3010
+ let commandExitCode: number | undefined;
3011
+ let commandExitSignal: string | undefined;
2301
3012
  const output = await new Promise<string>((resolve, reject) => {
2302
3013
  const child = cpSpawn('sh', ['-c', worktreeCmd], {
2303
3014
  stdio: 'pipe',
@@ -2338,7 +3049,7 @@ export class WorkflowRunner {
2338
3049
  stderrChunks.push(chunk.toString());
2339
3050
  });
2340
3051
 
2341
- child.on('close', (code) => {
3052
+ child.on('close', (code, signal) => {
2342
3053
  if (timer) clearTimeout(timer);
2343
3054
  if (abortHandler && abortSignal) {
2344
3055
  abortSignal.removeEventListener('abort', abortHandler);
@@ -2356,7 +3067,13 @@ export class WorkflowRunner {
2356
3067
  return;
2357
3068
  }
2358
3069
 
3070
+ commandStdout = stdoutChunks.join('');
2359
3071
  const stderr = stderrChunks.join('');
3072
+ commandStderr = stderr;
3073
+ commandExitCode = code ?? undefined;
3074
+ commandExitSignal = signal ?? undefined;
3075
+ lastExitCode = commandExitCode;
3076
+ lastExitSignal = commandExitSignal;
2360
3077
 
2361
3078
  if (code !== 0 && code !== null) {
2362
3079
  reject(
@@ -2379,6 +3096,15 @@ export class WorkflowRunner {
2379
3096
  reject(new Error(`Failed to execute git worktree command: ${err.message}`));
2380
3097
  });
2381
3098
  });
3099
+ this.captureStepTerminalEvidence(
3100
+ step.name,
3101
+ {
3102
+ stdout: commandStdout || output,
3103
+ stderr: commandStderr,
3104
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
3105
+ },
3106
+ { exitCode: commandExitCode, exitSignal: commandExitSignal }
3107
+ );
2382
3108
 
2383
3109
  // Mark completed
2384
3110
  state.row.status = 'completed';
@@ -2398,10 +3124,19 @@ export class WorkflowRunner {
2398
3124
  this.postToChannel(
2399
3125
  `**[${step.name}]** Worktree created at: ${output}\n Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`
2400
3126
  );
3127
+ this.recordStepToolSideEffect(step.name, {
3128
+ type: 'worktree_created',
3129
+ detail: `Worktree created at ${output}`,
3130
+ raw: { branch, createdBranch: !branchExists && createBranch },
3131
+ });
3132
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt);
2401
3133
  } catch (err) {
2402
3134
  const errorMsg = err instanceof Error ? err.message : String(err);
2403
3135
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
2404
- await this.markStepFailed(state, errorMsg, runId);
3136
+ await this.markStepFailed(state, errorMsg, runId, {
3137
+ exitCode: lastExitCode,
3138
+ exitSignal: lastExitSignal,
3139
+ });
2405
3140
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
2406
3141
  }
2407
3142
  }
@@ -2429,8 +3164,9 @@ export class WorkflowRunner {
2429
3164
  }
2430
3165
  const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
2431
3166
  const usesOwnerFlow = specialistDef.interactive !== false;
2432
- const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
2433
- const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
3167
+ const usesAutoHardening = usesOwnerFlow && !this.isExplicitInteractiveWorker(specialistDef);
3168
+ const ownerDef = usesAutoHardening ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
3169
+ const reviewDef = usesAutoHardening ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
2434
3170
  const supervised: SupervisedStep = {
2435
3171
  specialist: specialistDef,
2436
3172
  owner: ownerDef,
@@ -2454,7 +3190,13 @@ export class WorkflowRunner {
2454
3190
  let lastError: string | undefined;
2455
3191
  let lastExitCode: number | undefined;
2456
3192
  let lastExitSignal: string | undefined;
3193
+ let lastCompletionReason: WorkflowStepCompletionReason | undefined;
2457
3194
 
3195
+ // OWNER_DECISION: INCOMPLETE_RETRY is enforced here at the attempt-loop level so every
3196
+ // interactive execution path shares the same contract:
3197
+ // - retries remaining => throw back into the loop and retry
3198
+ // - maxRetries = 0 => fail immediately after the first retry request
3199
+ // - retry budget exhausted => fail with retry_requested_by_owner, never "completed"
2458
3200
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
2459
3201
  this.checkAborted();
2460
3202
 
@@ -2465,6 +3207,11 @@ export class WorkflowRunner {
2465
3207
  if (attempt > 0) {
2466
3208
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
2467
3209
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
3210
+ this.recordStepToolSideEffect(step.name, {
3211
+ type: 'retry',
3212
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
3213
+ raw: { attempt, maxRetries },
3214
+ });
2468
3215
  state.row.retryCount = attempt;
2469
3216
  await this.db.updateStep(state.row.id, {
2470
3217
  retryCount: attempt,
@@ -2477,16 +3224,21 @@ export class WorkflowRunner {
2477
3224
  try {
2478
3225
  // Mark step as running
2479
3226
  state.row.status = 'running';
3227
+ state.row.error = undefined;
3228
+ state.row.completionReason = undefined;
2480
3229
  state.row.startedAt = new Date().toISOString();
2481
3230
  await this.db.updateStep(state.row.id, {
2482
3231
  status: 'running',
3232
+ error: undefined,
3233
+ completionReason: undefined,
2483
3234
  startedAt: state.row.startedAt,
2484
3235
  updatedAt: new Date().toISOString(),
2485
3236
  });
2486
3237
  this.emit({ type: 'step:started', runId, stepName: step.name });
2487
- this.postToChannel(
2488
- `**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`
3238
+ this.log(
3239
+ `[${step.name}] Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`
2489
3240
  );
3241
+ this.initializeStepSignalParticipants(step.name, ownerDef.name, specialistDef.name);
2490
3242
  await this.trajectory?.stepStarted(step, ownerDef.name, {
2491
3243
  role: usesDedicatedOwner ? 'owner' : 'specialist',
2492
3244
  owner: ownerDef.name,
@@ -2539,10 +3291,21 @@ export class WorkflowRunner {
2539
3291
  };
2540
3292
  const effectiveSpecialist = applyStepWorkdir(specialistDef);
2541
3293
  const effectiveOwner = applyStepWorkdir(ownerDef);
3294
+ const effectiveReviewer = reviewDef ? applyStepWorkdir(reviewDef) : undefined;
3295
+ this.beginStepEvidence(
3296
+ step.name,
3297
+ [
3298
+ this.resolveAgentCwd(effectiveSpecialist),
3299
+ this.resolveAgentCwd(effectiveOwner),
3300
+ effectiveReviewer ? this.resolveAgentCwd(effectiveReviewer) : undefined,
3301
+ ],
3302
+ state.row.startedAt
3303
+ );
2542
3304
 
2543
3305
  let specialistOutput: string;
2544
3306
  let ownerOutput: string;
2545
3307
  let ownerElapsed: number;
3308
+ let completionReason: WorkflowStepCompletionReason | undefined;
2546
3309
 
2547
3310
  if (usesDedicatedOwner) {
2548
3311
  const result = await this.executeSupervisedAgentStep(
@@ -2554,35 +3317,122 @@ export class WorkflowRunner {
2554
3317
  specialistOutput = result.specialistOutput;
2555
3318
  ownerOutput = result.ownerOutput;
2556
3319
  ownerElapsed = result.ownerElapsed;
3320
+ completionReason = result.completionReason;
2557
3321
  } else {
2558
3322
  const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
3323
+ const explicitInteractiveWorker = this.isExplicitInteractiveWorker(effectiveOwner);
3324
+ let explicitWorkerHandle: Agent | undefined;
3325
+ let explicitWorkerCompleted = false;
3326
+ let explicitWorkerOutput = '';
2559
3327
 
2560
3328
  this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
2561
3329
  const resolvedStep = { ...step, task: ownerTask };
2562
3330
  const ownerStartTime = Date.now();
2563
3331
  const spawnResult = this.executor
2564
3332
  ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
2565
- : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
3333
+ : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, {
3334
+ evidenceStepName: step.name,
3335
+ evidenceRole: usesOwnerFlow ? 'owner' : 'specialist',
3336
+ logicalName: effectiveOwner.name,
3337
+ onSpawned: explicitInteractiveWorker
3338
+ ? ({ agent }) => {
3339
+ explicitWorkerHandle = agent;
3340
+ }
3341
+ : undefined,
3342
+ onChunk: explicitInteractiveWorker
3343
+ ? ({ chunk }) => {
3344
+ explicitWorkerOutput += WorkflowRunner.stripAnsi(chunk);
3345
+ if (
3346
+ !explicitWorkerCompleted &&
3347
+ this.hasExplicitInteractiveWorkerCompletionEvidence(
3348
+ step,
3349
+ explicitWorkerOutput,
3350
+ ownerTask,
3351
+ resolvedTask
3352
+ )
3353
+ ) {
3354
+ explicitWorkerCompleted = true;
3355
+ void explicitWorkerHandle?.release().catch(() => undefined);
3356
+ }
3357
+ }
3358
+ : undefined,
3359
+ });
2566
3360
  const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
2567
3361
  lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
2568
3362
  lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
2569
3363
  ownerElapsed = Date.now() - ownerStartTime;
2570
3364
  this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
2571
3365
  if (usesOwnerFlow) {
2572
- this.assertOwnerCompletionMarker(step, output, ownerTask);
3366
+ try {
3367
+ const completionDecision = this.resolveOwnerCompletionDecision(
3368
+ step,
3369
+ output,
3370
+ output,
3371
+ ownerTask,
3372
+ resolvedTask
3373
+ );
3374
+ completionReason = completionDecision.completionReason;
3375
+ } catch (error) {
3376
+ const canUseVerificationFallback =
3377
+ !usesDedicatedOwner &&
3378
+ step.verification &&
3379
+ error instanceof WorkflowCompletionError &&
3380
+ error.completionReason === 'failed_no_evidence';
3381
+ if (!canUseVerificationFallback) {
3382
+ throw error;
3383
+ }
3384
+ }
2573
3385
  }
2574
3386
  specialistOutput = output;
2575
3387
  ownerOutput = output;
2576
3388
  }
2577
3389
 
2578
- // Run verification if configured
2579
- if (step.verification) {
2580
- this.runVerification(
3390
+ // Even non-interactive steps can emit an explicit OWNER_DECISION contract.
3391
+ // Honor retry/fail/clarification signals before verification-driven success so
3392
+ // real runs stay consistent with interactive owner flows.
3393
+ if (!usesOwnerFlow) {
3394
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
3395
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
3396
+ throw new WorkflowCompletionError(
3397
+ `Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3398
+ 'retry_requested_by_owner'
3399
+ );
3400
+ }
3401
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
3402
+ throw new WorkflowCompletionError(
3403
+ `Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3404
+ 'failed_owner_decision'
3405
+ );
3406
+ }
3407
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
3408
+ throw new WorkflowCompletionError(
3409
+ `Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3410
+ 'retry_requested_by_owner'
3411
+ );
3412
+ }
3413
+ }
3414
+
3415
+ // Run verification if configured.
3416
+ // Self-owned interactive steps still need verification fallback so
3417
+ // explicit OWNER_DECISION output is not mandatory for the happy path.
3418
+ if (step.verification && (!usesOwnerFlow || !usesDedicatedOwner) && !completionReason) {
3419
+ const verificationResult = this.runVerification(
2581
3420
  step.verification,
2582
3421
  specialistOutput,
2583
3422
  step.name,
2584
3423
  effectiveOwner.interactive === false ? undefined : resolvedTask
2585
3424
  );
3425
+ completionReason = verificationResult.completionReason;
3426
+ }
3427
+
3428
+ // Retry-style owner decisions are control-flow signals, not terminal success states.
3429
+ // Guard here so they cannot accidentally fall through into review or completed-step
3430
+ // persistence if a future branch returns a completionReason instead of throwing.
3431
+ if (completionReason === 'retry_requested_by_owner') {
3432
+ throw new WorkflowCompletionError(
3433
+ `Step "${step.name}" owner requested another attempt`,
3434
+ 'retry_requested_by_owner'
3435
+ );
2586
3436
  }
2587
3437
 
2588
3438
  // Every interactive step gets a review pass; pick a dedicated reviewer when available.
@@ -2604,10 +3454,12 @@ export class WorkflowRunner {
2604
3454
  // Mark completed
2605
3455
  state.row.status = 'completed';
2606
3456
  state.row.output = combinedOutput;
3457
+ state.row.completionReason = completionReason;
2607
3458
  state.row.completedAt = new Date().toISOString();
2608
3459
  await this.db.updateStep(state.row.id, {
2609
3460
  status: 'completed',
2610
3461
  output: combinedOutput,
3462
+ completionReason,
2611
3463
  completedAt: state.row.completedAt,
2612
3464
  updatedAt: new Date().toISOString(),
2613
3465
  });
@@ -2616,10 +3468,21 @@ export class WorkflowRunner {
2616
3468
  await this.persistStepOutput(runId, step.name, combinedOutput);
2617
3469
 
2618
3470
  this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput, exitCode: lastExitCode, exitSignal: lastExitSignal });
3471
+ this.finalizeStepEvidence(
3472
+ step.name,
3473
+ 'completed',
3474
+ state.row.completedAt,
3475
+ completionReason
3476
+ );
2619
3477
  await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
2620
3478
  return;
2621
3479
  } catch (err) {
2622
3480
  lastError = err instanceof Error ? err.message : String(err);
3481
+ lastCompletionReason =
3482
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
3483
+ if (lastCompletionReason === 'retry_requested_by_owner' && attempt >= maxRetries) {
3484
+ lastError = this.buildOwnerRetryBudgetExceededMessage(step.name, maxRetries, lastError);
3485
+ }
2623
3486
  if (err instanceof SpawnExitError) {
2624
3487
  lastExitCode = err.exitCode;
2625
3488
  lastExitSignal = err.exitSignal;
@@ -2649,12 +3512,41 @@ export class WorkflowRunner {
2649
3512
  await this.markStepFailed(state, lastError ?? 'Unknown error', runId, {
2650
3513
  exitCode: lastExitCode,
2651
3514
  exitSignal: lastExitSignal,
2652
- });
3515
+ }, lastCompletionReason);
2653
3516
  throw new Error(
2654
3517
  `Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`
2655
3518
  );
2656
3519
  }
2657
3520
 
3521
+ private buildOwnerRetryBudgetExceededMessage(
3522
+ stepName: string,
3523
+ maxRetries: number,
3524
+ ownerDecisionError?: string
3525
+ ): string {
3526
+ const attempts = maxRetries + 1;
3527
+ const prefix = `Step "${stepName}" `;
3528
+ const normalizedDecision = ownerDecisionError?.startsWith(prefix)
3529
+ ? ownerDecisionError.slice(prefix.length).trim()
3530
+ : ownerDecisionError?.trim();
3531
+ const decisionSuffix = normalizedDecision
3532
+ ? ` Latest owner decision: ${normalizedDecision}`
3533
+ : '';
3534
+
3535
+ if (maxRetries === 0) {
3536
+ return (
3537
+ `Step "${stepName}" owner requested another attempt, but no retries are configured ` +
3538
+ `(maxRetries=0). Configure retries > 0 to allow OWNER_DECISION: INCOMPLETE_RETRY.` +
3539
+ decisionSuffix
3540
+ );
3541
+ }
3542
+
3543
+ return (
3544
+ `Step "${stepName}" owner requested another attempt after ${attempts} total attempts, ` +
3545
+ `but the retry budget is exhausted (maxRetries=${maxRetries}).` +
3546
+ decisionSuffix
3547
+ );
3548
+ }
3549
+
2658
3550
  private injectStepOwnerContract(
2659
3551
  step: WorkflowStep,
2660
3552
  resolvedTask: string,
@@ -2673,7 +3565,10 @@ export class WorkflowRunner {
2673
3565
  `- You are the accountable owner for step "${step.name}".\n` +
2674
3566
  (specialistNote ? `- ${specialistNote}\n` : '') +
2675
3567
  `- If you delegate, you must still verify completion yourself.\n` +
2676
- `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
3568
+ `- Preferred final decision format:\n` +
3569
+ ` OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
3570
+ ` REASON: <one sentence>\n` +
3571
+ `- Legacy completion marker still supported: STEP_COMPLETE:${step.name}\n` +
2677
3572
  `- Then self-terminate immediately with /exit.`
2678
3573
  );
2679
3574
  }
@@ -2686,6 +3581,10 @@ export class WorkflowRunner {
2686
3581
  ): string {
2687
3582
  const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
2688
3583
  const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
3584
+ const channelContract = this.channel
3585
+ ? `- Prefer Relaycast/group-chat handoff signals over terminal sentinels: wait for the worker to post \`WORKER_DONE: <brief summary>\` in ${channelLine}\n` +
3586
+ `- When you have validated the handoff, post \`LEAD_DONE: <brief summary>\` to ${channelLine} before you exit\n`
3587
+ : '';
2689
3588
  return (
2690
3589
  `You are the step owner/supervisor for step "${step.name}".\n\n` +
2691
3590
  `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
@@ -2695,15 +3594,35 @@ export class WorkflowRunner {
2695
3594
  `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
2696
3595
  `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
2697
3596
  `- Ask the worker directly on ${channelLine} if you need a status update\n` +
3597
+ channelContract +
2698
3598
  verificationGuide +
2699
- `\nWhen you're satisfied the work is done correctly:\n` +
2700
- `Output exactly: STEP_COMPLETE:${step.name}`
3599
+ `\nWhen you have enough evidence, return:\n` +
3600
+ `OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
3601
+ `REASON: <one sentence>\n` +
3602
+ `Legacy completion marker still supported: STEP_COMPLETE:${step.name}`
2701
3603
  );
2702
3604
  }
2703
3605
 
2704
- private buildSupervisorVerificationGuide(verification?: VerificationCheck): string {
2705
- if (!verification) return '';
2706
- switch (verification.type) {
3606
+ private buildWorkerHandoffTask(
3607
+ step: WorkflowStep,
3608
+ originalTask: string,
3609
+ supervised: SupervisedStep
3610
+ ): string {
3611
+ if (!this.channel) return originalTask;
3612
+
3613
+ return (
3614
+ `${originalTask}\n\n---\n` +
3615
+ `WORKER COMPLETION CONTRACT:\n` +
3616
+ `- You are handing work off to owner "${supervised.owner.name}" for step "${step.name}".\n` +
3617
+ `- When your work is ready for review, post to #${this.channel}: \`WORKER_DONE: <brief summary>\`\n` +
3618
+ `- Do not rely on terminal output alone for handoff; use the workflow group chat signal above.\n` +
3619
+ `- After posting your handoff signal, self-terminate with /exit unless the owner asks for follow-up.`
3620
+ );
3621
+ }
3622
+
3623
+ private buildSupervisorVerificationGuide(verification?: VerificationCheck): string {
3624
+ if (!verification) return '';
3625
+ switch (verification.type) {
2707
3626
  case 'output_contains':
2708
3627
  return `- Verification gate: confirm the worker output contains ${JSON.stringify(verification.value)}\n`;
2709
3628
  case 'file_exists':
@@ -2722,15 +3641,21 @@ export class WorkflowRunner {
2722
3641
  supervised: SupervisedStep,
2723
3642
  resolvedTask: string,
2724
3643
  timeoutMs?: number
2725
- ): Promise<{ specialistOutput: string; ownerOutput: string; ownerElapsed: number }> {
3644
+ ): Promise<{
3645
+ specialistOutput: string;
3646
+ ownerOutput: string;
3647
+ ownerElapsed: number;
3648
+ completionReason: WorkflowStepCompletionReason;
3649
+ }> {
2726
3650
  if (this.executor) {
3651
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
2727
3652
  const supervisorTask = this.buildOwnerSupervisorTask(
2728
3653
  step,
2729
3654
  resolvedTask,
2730
3655
  supervised,
2731
3656
  supervised.specialist.name
2732
3657
  );
2733
- const specialistStep = { ...step, task: resolvedTask };
3658
+ const specialistStep = { ...step, task: specialistTask };
2734
3659
  const ownerStep: WorkflowStep = {
2735
3660
  ...step,
2736
3661
  name: `${step.name}-owner`,
@@ -2744,7 +3669,7 @@ export class WorkflowRunner {
2744
3669
  const specialistPromise = this.executor.executeAgentStep(
2745
3670
  specialistStep,
2746
3671
  supervised.specialist,
2747
- resolvedTask,
3672
+ specialistTask,
2748
3673
  timeoutMs
2749
3674
  );
2750
3675
  // Guard against unhandled rejection if owner fails before specialist settles
@@ -2759,10 +3684,20 @@ export class WorkflowRunner {
2759
3684
  timeoutMs
2760
3685
  );
2761
3686
  const ownerElapsed = Date.now() - ownerStartTime;
2762
-
2763
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2764
3687
  const specialistOutput = await specialistPromise;
2765
- return { specialistOutput, ownerOutput, ownerElapsed };
3688
+ const completionDecision = this.resolveOwnerCompletionDecision(
3689
+ step,
3690
+ ownerOutput,
3691
+ specialistOutput,
3692
+ supervisorTask,
3693
+ resolvedTask
3694
+ );
3695
+ return {
3696
+ specialistOutput,
3697
+ ownerOutput,
3698
+ ownerElapsed,
3699
+ completionReason: completionDecision.completionReason,
3700
+ };
2766
3701
  } catch (error) {
2767
3702
  await specialistSettled;
2768
3703
  throw error;
@@ -2780,12 +3715,16 @@ export class WorkflowRunner {
2780
3715
  rejectWorkerSpawn = reject;
2781
3716
  });
2782
3717
 
2783
- const specialistStep = { ...step, task: resolvedTask };
3718
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
3719
+ const specialistStep = { ...step, task: specialistTask };
2784
3720
  this.log(
2785
3721
  `[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`
2786
3722
  );
2787
3723
  const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
2788
3724
  agentNameSuffix: 'worker',
3725
+ evidenceStepName: step.name,
3726
+ evidenceRole: 'worker',
3727
+ logicalName: supervised.specialist.name,
2789
3728
  onSpawned: ({ actualName, agent }) => {
2790
3729
  workerHandle = agent;
2791
3730
  workerRuntimeName = actualName;
@@ -2800,7 +3739,13 @@ export class WorkflowRunner {
2800
3739
  }
2801
3740
  },
2802
3741
  onChunk: ({ agentName, chunk }) => {
2803
- this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
3742
+ this.forwardAgentChunkToChannel(
3743
+ step.name,
3744
+ 'Worker',
3745
+ agentName,
3746
+ chunk,
3747
+ supervised.specialist.name
3748
+ );
2804
3749
  },
2805
3750
  }).catch((error) => {
2806
3751
  if (!workerSpawned) {
@@ -2814,10 +3759,15 @@ export class WorkflowRunner {
2814
3759
  workerPromise
2815
3760
  .then((result) => {
2816
3761
  workerReleased = true;
2817
- this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
3762
+ this.log(`[${step.name}] Worker ${workerRuntimeName} exited`);
3763
+ this.recordStepToolSideEffect(step.name, {
3764
+ type: 'worker_exit',
3765
+ detail: `Worker ${workerRuntimeName} exited`,
3766
+ raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
3767
+ });
2818
3768
  if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
2819
- this.postToChannel(
2820
- `**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`
3769
+ this.log(
3770
+ `[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`
2821
3771
  );
2822
3772
  }
2823
3773
  })
@@ -2826,6 +3776,11 @@ export class WorkflowRunner {
2826
3776
  this.postToChannel(
2827
3777
  `**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`
2828
3778
  );
3779
+ this.recordStepToolSideEffect(step.name, {
3780
+ type: 'worker_error',
3781
+ detail: `Worker ${workerRuntimeName} exited with error: ${message}`,
3782
+ raw: { worker: workerRuntimeName, error: message },
3783
+ });
2829
3784
  });
2830
3785
 
2831
3786
  await workerReady;
@@ -2844,6 +3799,9 @@ export class WorkflowRunner {
2844
3799
  try {
2845
3800
  const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
2846
3801
  agentNameSuffix: 'owner',
3802
+ evidenceStepName: step.name,
3803
+ evidenceRole: 'owner',
3804
+ logicalName: supervised.owner.name,
2847
3805
  onSpawned: ({ actualName }) => {
2848
3806
  this.supervisedRuntimeAgents.set(actualName, {
2849
3807
  stepName: step.name,
@@ -2858,10 +3816,20 @@ export class WorkflowRunner {
2858
3816
  const ownerElapsed = Date.now() - ownerStartTime;
2859
3817
  const ownerOutput = ownerResultObj.output;
2860
3818
  this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
2861
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2862
-
2863
3819
  const specialistOutput = (await workerPromise).output;
2864
- return { specialistOutput, ownerOutput, ownerElapsed };
3820
+ const completionDecision = this.resolveOwnerCompletionDecision(
3821
+ step,
3822
+ ownerOutput,
3823
+ specialistOutput,
3824
+ supervisorTask,
3825
+ resolvedTask
3826
+ );
3827
+ return {
3828
+ specialistOutput,
3829
+ ownerOutput,
3830
+ ownerElapsed,
3831
+ completionReason: completionDecision.completionReason,
3832
+ };
2865
3833
  } catch (error) {
2866
3834
  const message = error instanceof Error ? error.message : String(error);
2867
3835
  if (!workerReleased && workerHandle) {
@@ -2879,15 +3847,22 @@ export class WorkflowRunner {
2879
3847
  stepName: string,
2880
3848
  roleLabel: string,
2881
3849
  agentName: string,
2882
- chunk: string
3850
+ chunk: string,
3851
+ sender?: string
2883
3852
  ): void {
2884
- const lines = WorkflowRunner.stripAnsi(chunk)
3853
+ const lines = WorkflowRunner.scrubForChannel(chunk)
2885
3854
  .split('\n')
2886
3855
  .map((line) => line.trim())
2887
3856
  .filter(Boolean)
2888
3857
  .slice(0, 3);
2889
3858
  for (const line of lines) {
2890
- this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
3859
+ this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`, {
3860
+ stepName,
3861
+ sender,
3862
+ actor: agentName,
3863
+ role: roleLabel,
3864
+ origin: 'forwarded_chunk',
3865
+ });
2891
3866
  }
2892
3867
  }
2893
3868
 
@@ -2904,6 +3879,11 @@ export class WorkflowRunner {
2904
3879
  if (/STEP_COMPLETE:/i.test(stripped)) details.push('Declared the step complete');
2905
3880
 
2906
3881
  for (const detail of details) {
3882
+ this.recordStepToolSideEffect(step.name, {
3883
+ type: 'owner_monitoring',
3884
+ detail,
3885
+ raw: { output: stripped.slice(0, 240), owner: ownerDef.name },
3886
+ });
2907
3887
  await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
2908
3888
  output: stripped.slice(0, 240),
2909
3889
  });
@@ -2947,6 +3927,8 @@ export class WorkflowRunner {
2947
3927
  agentMap: Map<string, AgentDefinition>
2948
3928
  ): AgentDefinition {
2949
3929
  const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
3930
+ const eligible = (def: AgentDefinition): boolean =>
3931
+ def.name !== ownerDef.name && !this.isExplicitInteractiveWorker(def);
2950
3932
  const isReviewer = (def: AgentDefinition): boolean => {
2951
3933
  const roleLC = def.role?.toLowerCase() ?? '';
2952
3934
  const nameLC = def.name.toLowerCase();
@@ -2969,34 +3951,337 @@ export class WorkflowRunner {
2969
3951
  return isReviewer(def) ? 1 : 0;
2970
3952
  };
2971
3953
  const dedicated = allDefs
2972
- .filter((d) => d.name !== ownerDef.name && isReviewer(d))
3954
+ .filter((d) => eligible(d) && isReviewer(d))
2973
3955
  .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
2974
3956
  if (dedicated) return dedicated;
2975
3957
 
2976
- const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
3958
+ const alternate = allDefs.find((d) => eligible(d) && d.interactive !== false);
2977
3959
  if (alternate) return alternate;
2978
3960
 
2979
3961
  // Self-review fallback — log a warning since owner reviewing itself is weak.
2980
3962
  return ownerDef;
2981
3963
  }
2982
3964
 
2983
- private assertOwnerCompletionMarker(step: WorkflowStep, output: string, injectedTaskText: string): void {
3965
+ private isExplicitInteractiveWorker(agentDef: AgentDefinition): boolean {
3966
+ return agentDef.preset === 'worker' && agentDef.interactive !== false;
3967
+ }
3968
+
3969
+ private resolveOwnerCompletionDecision(
3970
+ step: WorkflowStep,
3971
+ ownerOutput: string,
3972
+ specialistOutput: string,
3973
+ injectedTaskText: string,
3974
+ verificationTaskText?: string
3975
+ ): CompletionDecisionResult {
3976
+ const hasMarker = this.hasOwnerCompletionMarker(step, ownerOutput, injectedTaskText);
3977
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
3978
+
3979
+ // INCOMPLETE_RETRY / NEEDS_CLARIFICATION are non-terminal owner outcomes. They never mark
3980
+ // the step complete here; instead they throw back to executeAgentStep(), which decides
3981
+ // whether to retry or fail based on the remaining retry budget for this step.
3982
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
3983
+ throw new WorkflowCompletionError(
3984
+ `Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3985
+ 'retry_requested_by_owner'
3986
+ );
3987
+ }
3988
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
3989
+ throw new WorkflowCompletionError(
3990
+ `Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3991
+ 'failed_owner_decision'
3992
+ );
3993
+ }
3994
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
3995
+ throw new WorkflowCompletionError(
3996
+ `Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3997
+ 'retry_requested_by_owner'
3998
+ );
3999
+ }
4000
+
4001
+ const verificationResult = step.verification
4002
+ ? this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, {
4003
+ allowFailure: true,
4004
+ completionMarkerFound: hasMarker,
4005
+ })
4006
+ : { passed: false };
4007
+
4008
+ if (verificationResult.error) {
4009
+ throw new WorkflowCompletionError(
4010
+ `Step "${step.name}" verification failed and no owner decision or evidence established completion: ${verificationResult.error}`,
4011
+ 'failed_verification'
4012
+ );
4013
+ }
4014
+
4015
+ if (explicitOwnerDecision?.decision === 'COMPLETE') {
4016
+ if (!hasMarker) {
4017
+ this.log(
4018
+ `[${step.name}] Structured OWNER_DECISION completed the step without legacy STEP_COMPLETE marker`
4019
+ );
4020
+ }
4021
+ return {
4022
+ completionReason: 'completed_by_owner_decision',
4023
+ ownerDecision: explicitOwnerDecision.decision,
4024
+ reason: explicitOwnerDecision.reason,
4025
+ };
4026
+ }
4027
+ if (verificationResult.passed) {
4028
+ return { completionReason: 'completed_verified' };
4029
+ }
4030
+
4031
+ const ownerDecision = this.parseOwnerDecision(step, ownerOutput, hasMarker);
4032
+ if (ownerDecision?.decision === 'COMPLETE') {
4033
+ return {
4034
+ completionReason: 'completed_by_owner_decision',
4035
+ ownerDecision: ownerDecision.decision,
4036
+ reason: ownerDecision.reason,
4037
+ };
4038
+ }
4039
+
4040
+ if (!explicitOwnerDecision) {
4041
+ const evidenceReason = this.judgeOwnerCompletionByEvidence(step.name, ownerOutput);
4042
+ if (evidenceReason) {
4043
+ if (!hasMarker) {
4044
+ this.log(
4045
+ `[${step.name}] Evidence-based completion resolved without legacy STEP_COMPLETE marker`
4046
+ );
4047
+ }
4048
+ return {
4049
+ completionReason: 'completed_by_evidence',
4050
+ reason: evidenceReason,
4051
+ };
4052
+ }
4053
+ }
4054
+
4055
+ // Process-exit fallback: if the agent exited cleanly (code 0) and verification
4056
+ // passes (or no verification is configured), infer completion rather than failing.
4057
+ // This reduces dependence on agents posting exact coordination signals.
4058
+ const processExitFallback = this.tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput);
4059
+ if (processExitFallback) {
4060
+ this.log(
4061
+ `[${step.name}] Completion inferred from clean process exit (code 0)` +
4062
+ (step.verification ? ' + verification passed' : '') +
4063
+ ' — no coordination signal was required'
4064
+ );
4065
+ return processExitFallback;
4066
+ }
4067
+
4068
+ throw new WorkflowCompletionError(
4069
+ `Step "${step.name}" owner completion decision missing: no OWNER_DECISION, legacy STEP_COMPLETE marker, or evidence-backed completion signal`,
4070
+ 'failed_no_evidence'
4071
+ );
4072
+ }
4073
+
4074
+ private hasExplicitInteractiveWorkerCompletionEvidence(
4075
+ step: WorkflowStep,
4076
+ output: string,
4077
+ injectedTaskText: string,
4078
+ verificationTaskText: string
4079
+ ): boolean {
4080
+ try {
4081
+ this.resolveOwnerCompletionDecision(step, output, output, injectedTaskText, verificationTaskText);
4082
+ return true;
4083
+ } catch {
4084
+ return false;
4085
+ }
4086
+ }
4087
+
4088
+ private hasOwnerCompletionMarker(
4089
+ step: WorkflowStep,
4090
+ output: string,
4091
+ injectedTaskText: string
4092
+ ): boolean {
2984
4093
  const marker = `STEP_COMPLETE:${step.name}`;
2985
4094
  const taskHasMarker = injectedTaskText.includes(marker);
2986
4095
  const first = output.indexOf(marker);
2987
4096
  if (first === -1) {
2988
- throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
4097
+ return false;
2989
4098
  }
2990
- // PTY output includes injected task text, so require a second marker occurrence
2991
- // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
4099
+ // PTY output often includes echoed prompt text, so when the injected task
4100
+ // itself contains the legacy marker require a second occurrence from the
4101
+ // agent response.
2992
4102
  const outputLikelyContainsInjectedPrompt =
2993
- output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
4103
+ output.includes('STEP OWNER CONTRACT') ||
4104
+ output.includes('Preferred final decision format') ||
4105
+ output.includes('Legacy completion marker still supported') ||
4106
+ output.includes('Output exactly: STEP_COMPLETE:');
2994
4107
  if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
2995
- const hasSecond = output.includes(marker, first + marker.length);
2996
- if (!hasSecond) {
2997
- throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
2998
- }
4108
+ return output.includes(marker, first + marker.length);
4109
+ }
4110
+ return true;
4111
+ }
4112
+
4113
+ private parseOwnerDecision(
4114
+ step: WorkflowStep,
4115
+ ownerOutput: string,
4116
+ hasMarker: boolean
4117
+ ): { decision: WorkflowOwnerDecision; reason?: string } | null {
4118
+ const decisionPattern =
4119
+ /OWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi;
4120
+ const decisionMatches = [...ownerOutput.matchAll(decisionPattern)];
4121
+ const outputLikelyContainsEchoedPrompt =
4122
+ ownerOutput.includes('STEP OWNER CONTRACT') ||
4123
+ ownerOutput.includes('Preferred final decision format') ||
4124
+ ownerOutput.includes('one of COMPLETE, INCOMPLETE_RETRY') ||
4125
+ ownerOutput.includes('COMPLETE|INCOMPLETE_RETRY');
4126
+
4127
+ if (decisionMatches.length === 0) {
4128
+ if (!hasMarker) return null;
4129
+ return {
4130
+ decision: 'COMPLETE',
4131
+ reason: `Legacy completion marker observed: STEP_COMPLETE:${step.name}`,
4132
+ };
2999
4133
  }
4134
+
4135
+ // Filter out matches that appear on a template/instruction line (e.g.
4136
+ // "COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION") to avoid
4137
+ // picking up the template format as the agent's actual decision.
4138
+ const realMatches = outputLikelyContainsEchoedPrompt
4139
+ ? decisionMatches.filter((m) => {
4140
+ const lineStart = ownerOutput.lastIndexOf('\n', m.index!) + 1;
4141
+ const lineEnd = ownerOutput.indexOf('\n', m.index!);
4142
+ const line = ownerOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
4143
+ return !line.includes('COMPLETE|INCOMPLETE_RETRY');
4144
+ })
4145
+ : decisionMatches;
4146
+ const decisionMatch =
4147
+ realMatches.length > 0
4148
+ ? realMatches[realMatches.length - 1]
4149
+ : decisionMatches[decisionMatches.length - 1];
4150
+ const decision = decisionMatch?.[1]?.toUpperCase() as WorkflowOwnerDecision | undefined;
4151
+ if (
4152
+ decision !== 'COMPLETE' &&
4153
+ decision !== 'INCOMPLETE_RETRY' &&
4154
+ decision !== 'INCOMPLETE_FAIL' &&
4155
+ decision !== 'NEEDS_CLARIFICATION'
4156
+ ) {
4157
+ return null;
4158
+ }
4159
+
4160
+ const reasonPattern = /(?:^|\n)REASON:\s*(.+)/gi;
4161
+ const reasonMatches = [...ownerOutput.matchAll(reasonPattern)];
4162
+ const reasonMatch =
4163
+ outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
4164
+ ? reasonMatches[reasonMatches.length - 1]
4165
+ : reasonMatches[0];
4166
+ const reason = reasonMatch?.[1]?.trim();
4167
+
4168
+ return {
4169
+ decision,
4170
+ reason: reason && reason !== '<one sentence>' ? reason : undefined,
4171
+ };
4172
+ }
4173
+
4174
+ private stripEchoedPromptLines(output: string, patterns: RegExp[]): string {
4175
+ return output
4176
+ .split('\n')
4177
+ .map((line) => line.trim())
4178
+ .filter(Boolean)
4179
+ .filter((line) => patterns.every((pattern) => !pattern.test(line)))
4180
+ .join('\n');
4181
+ }
4182
+
4183
+ private firstMeaningfulLine(output: string): string | undefined {
4184
+ return output
4185
+ .split('\n')
4186
+ .map((line) => line.trim())
4187
+ .find(Boolean);
4188
+ }
4189
+
4190
+ private judgeOwnerCompletionByEvidence(stepName: string, ownerOutput: string): string | null {
4191
+ // Never infer completion when the raw output contains an explicit retry/fail/clarification signal.
4192
+ if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
4193
+ return null;
4194
+ }
4195
+ const sanitized = this.stripEchoedPromptLines(ownerOutput, [
4196
+ /^STEP OWNER CONTRACT:?$/i,
4197
+ /^Preferred final decision format:?$/i,
4198
+ /^OWNER_DECISION:\s*(?:COMPLETE\|INCOMPLETE_RETRY|<one of COMPLETE, INCOMPLETE_RETRY)/i,
4199
+ /^REASON:\s*<one sentence>$/i,
4200
+ /^Legacy completion marker still supported:/i,
4201
+ /^STEP_COMPLETE:/i,
4202
+ ]);
4203
+ if (!sanitized) return null;
4204
+
4205
+ const hasExplicitSelfRelease =
4206
+ /Calling\s+(?:[\w.-]+\.)?remove_agent\(\{[^<\n]*"reason":"task completed"/i.test(
4207
+ sanitized
4208
+ );
4209
+ const hasPositiveConclusion =
4210
+ /\b(complete(?:d)?|done|verified|looks correct|safe handoff|artifact verified)\b/i.test(
4211
+ sanitized
4212
+ ) ||
4213
+ /\bartifacts?\b.*\b(correct|verified|complete)\b/i.test(sanitized) ||
4214
+ hasExplicitSelfRelease;
4215
+ const evidence = this.getStepCompletionEvidence(stepName);
4216
+ const hasValidatedCoordinationSignal =
4217
+ evidence?.coordinationSignals.some(
4218
+ (signal) =>
4219
+ signal.kind === 'worker_done' ||
4220
+ signal.kind === 'lead_done' ||
4221
+ signal.kind === 'verification_passed' ||
4222
+ (signal.kind === 'process_exit' && signal.value === '0')
4223
+ ) ?? false;
4224
+ const hasValidatedInspectionSignal =
4225
+ evidence?.toolSideEffects.some(
4226
+ (effect) =>
4227
+ effect.type === 'owner_monitoring' &&
4228
+ (/Checked git diff stats/i.test(effect.detail) ||
4229
+ /Listed files for verification/i.test(effect.detail))
4230
+ ) ?? false;
4231
+ const hasEvidenceSignal = hasValidatedCoordinationSignal || hasValidatedInspectionSignal;
4232
+
4233
+ if (!hasPositiveConclusion || !hasEvidenceSignal) {
4234
+ return null;
4235
+ }
4236
+
4237
+ return this.firstMeaningfulLine(sanitized) ?? 'Evidence-backed completion';
4238
+ }
4239
+
4240
+ /**
4241
+ * Process-exit fallback: when agent exits with code 0 but posts no coordination
4242
+ * signal, check if verification passes (or no verification is configured) and
4243
+ * infer completion. This is the key mechanism for reducing agent compliance
4244
+ * dependence — the runner trusts a clean exit + passing verification over
4245
+ * requiring exact signal text.
4246
+ */
4247
+ private tryProcessExitFallback(
4248
+ step: WorkflowStep,
4249
+ specialistOutput: string,
4250
+ verificationTaskText?: string,
4251
+ ownerOutput?: string
4252
+ ): CompletionDecisionResult | null {
4253
+ const gracePeriodMs = this.currentConfig?.swarm.completionGracePeriodMs ?? 5000;
4254
+ if (gracePeriodMs === 0) return null;
4255
+
4256
+ // Never infer completion when the owner explicitly requested retry/fail/clarification.
4257
+ if (ownerOutput && /OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
4258
+ return null;
4259
+ }
4260
+
4261
+ const evidence = this.getStepCompletionEvidence(step.name);
4262
+ const hasCleanExit = evidence?.coordinationSignals.some(
4263
+ (signal) =>
4264
+ signal.kind === 'process_exit' && signal.value === '0'
4265
+ ) ?? false;
4266
+
4267
+ if (!hasCleanExit) return null;
4268
+
4269
+ // If verification is configured, it must pass for the fallback to succeed.
4270
+ if (step.verification) {
4271
+ const verificationResult = this.runVerification(
4272
+ step.verification,
4273
+ specialistOutput,
4274
+ step.name,
4275
+ verificationTaskText,
4276
+ { allowFailure: true }
4277
+ );
4278
+ if (!verificationResult.passed) return null;
4279
+ }
4280
+
4281
+ return {
4282
+ completionReason: 'completed_by_process_exit',
4283
+ reason: `Process exited with code 0${step.verification ? ' and verification passed' : ''} — coordination signal not required`,
4284
+ };
3000
4285
  }
3001
4286
 
3002
4287
  private async runStepReviewGate(
@@ -3052,7 +4337,17 @@ export class WorkflowRunner {
3052
4337
 
3053
4338
  await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
3054
4339
  this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
4340
+ this.recordStepToolSideEffect(step.name, {
4341
+ type: 'review_started',
4342
+ detail: `Review started with ${reviewerDef.name}`,
4343
+ raw: { reviewer: reviewerDef.name },
4344
+ });
3055
4345
  const emitReviewCompleted = async (decision: 'approved' | 'rejected', reason?: string) => {
4346
+ this.recordStepToolSideEffect(step.name, {
4347
+ type: 'review_completed',
4348
+ detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`,
4349
+ raw: { reviewer: reviewerDef.name, decision, reason },
4350
+ });
3056
4351
  await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
3057
4352
  this.emit({
3058
4353
  type: 'step:review-completed',
@@ -3108,6 +4403,9 @@ export class WorkflowRunner {
3108
4403
 
3109
4404
  try {
3110
4405
  await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
4406
+ evidenceStepName: step.name,
4407
+ evidenceRole: 'reviewer',
4408
+ logicalName: reviewerDef.name,
3111
4409
  onSpawned: ({ agent }) => {
3112
4410
  reviewerHandle = agent;
3113
4411
  },
@@ -3153,6 +4451,22 @@ export class WorkflowRunner {
3153
4451
 
3154
4452
  private parseReviewDecision(
3155
4453
  reviewOutput: string
4454
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
4455
+ const strict = this.parseStrictReviewDecision(reviewOutput);
4456
+ if (strict) {
4457
+ return strict;
4458
+ }
4459
+
4460
+ const tolerant = this.parseTolerantReviewDecision(reviewOutput);
4461
+ if (tolerant) {
4462
+ return tolerant;
4463
+ }
4464
+
4465
+ return this.judgeReviewDecisionFromEvidence(reviewOutput);
4466
+ }
4467
+
4468
+ private parseStrictReviewDecision(
4469
+ reviewOutput: string
3156
4470
  ): { decision: 'approved' | 'rejected'; reason?: string } | null {
3157
4471
  const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
3158
4472
  const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
@@ -3162,10 +4476,18 @@ export class WorkflowRunner {
3162
4476
 
3163
4477
  const outputLikelyContainsEchoedPrompt =
3164
4478
  reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
4479
+ const realReviewMatches = outputLikelyContainsEchoedPrompt
4480
+ ? decisionMatches.filter((m) => {
4481
+ const lineStart = reviewOutput.lastIndexOf('\n', m.index!) + 1;
4482
+ const lineEnd = reviewOutput.indexOf('\n', m.index!);
4483
+ const line = reviewOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
4484
+ return !line.includes('APPROVE or REJECT');
4485
+ })
4486
+ : decisionMatches;
3165
4487
  const decisionMatch =
3166
- outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
3167
- ? decisionMatches[decisionMatches.length - 1]
3168
- : decisionMatches[0];
4488
+ realReviewMatches.length > 0
4489
+ ? realReviewMatches[realReviewMatches.length - 1]
4490
+ : decisionMatches[decisionMatches.length - 1];
3169
4491
  const decision = decisionMatch?.[1]?.toUpperCase();
3170
4492
  if (decision !== 'APPROVE' && decision !== 'REJECT') {
3171
4493
  return null;
@@ -3185,6 +4507,115 @@ export class WorkflowRunner {
3185
4507
  };
3186
4508
  }
3187
4509
 
4510
+ private parseTolerantReviewDecision(
4511
+ reviewOutput: string
4512
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
4513
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
4514
+ /^Return exactly:?$/i,
4515
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
4516
+ /^REVIEW_REASON:\s*<one sentence>$/i,
4517
+ ]);
4518
+ if (!sanitized) {
4519
+ return null;
4520
+ }
4521
+
4522
+ const lines = sanitized
4523
+ .split('\n')
4524
+ .map((line) => line.trim())
4525
+ .filter(Boolean);
4526
+ for (const line of lines) {
4527
+ const candidate = line.replace(/^REVIEW_DECISION:\s*/i, '').trim();
4528
+ const decision = this.normalizeReviewDecisionCandidate(candidate);
4529
+ if (decision) {
4530
+ return {
4531
+ decision,
4532
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4533
+ };
4534
+ }
4535
+ }
4536
+
4537
+ const decision = this.normalizeReviewDecisionCandidate(lines.join(' '));
4538
+ if (!decision) {
4539
+ return null;
4540
+ }
4541
+
4542
+ return {
4543
+ decision,
4544
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4545
+ };
4546
+ }
4547
+
4548
+ private normalizeReviewDecisionCandidate(candidate: string): 'approved' | 'rejected' | null {
4549
+ const value = candidate.trim().toLowerCase();
4550
+ if (!value) return null;
4551
+
4552
+ if (
4553
+ /^(approve|approved|complete|completed|pass|passed|accept|accepted|lgtm|ship it|looks good|looks fine)\b/i.test(
4554
+ value
4555
+ )
4556
+ ) {
4557
+ return 'approved';
4558
+ }
4559
+ if (
4560
+ /^(reject|rejected|retry|retry requested|fail|failed|incomplete|needs clarification|not complete|not ready|insufficient evidence)\b/i.test(
4561
+ value
4562
+ )
4563
+ ) {
4564
+ return 'rejected';
4565
+ }
4566
+ return null;
4567
+ }
4568
+
4569
+ private parseReviewReason(reviewOutput: string): string | undefined {
4570
+ const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
4571
+ const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
4572
+ const outputLikelyContainsEchoedPrompt =
4573
+ reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
4574
+ const reasonMatch =
4575
+ outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
4576
+ ? reasonMatches[reasonMatches.length - 1]
4577
+ : reasonMatches[0];
4578
+ const reason = reasonMatch?.[1]?.trim();
4579
+ return reason && reason !== '<one sentence>' ? reason : undefined;
4580
+ }
4581
+
4582
+ private judgeReviewDecisionFromEvidence(
4583
+ reviewOutput: string
4584
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
4585
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
4586
+ /^Return exactly:?$/i,
4587
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
4588
+ /^REVIEW_REASON:\s*<one sentence>$/i,
4589
+ ]);
4590
+ if (!sanitized) {
4591
+ return null;
4592
+ }
4593
+
4594
+ const hasPositiveEvidence =
4595
+ /\b(approved?|complete(?:d)?|verified|looks good|looks fine|safe handoff|pass(?:ed)?)\b/i.test(
4596
+ sanitized
4597
+ );
4598
+ const hasNegativeEvidence =
4599
+ /\b(reject(?:ed)?|retry|fail(?:ed)?|incomplete|missing checks|insufficient evidence|not safe)\b/i.test(
4600
+ sanitized
4601
+ );
4602
+
4603
+ if (hasNegativeEvidence) {
4604
+ return {
4605
+ decision: 'rejected',
4606
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4607
+ };
4608
+ }
4609
+ if (!hasPositiveEvidence) {
4610
+ return null;
4611
+ }
4612
+
4613
+ return {
4614
+ decision: 'approved',
4615
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4616
+ };
4617
+ }
4618
+
3188
4619
  private combineStepAndReviewOutput(stepOutput: string, reviewOutput: string): string {
3189
4620
  const primary = stepOutput.trimEnd();
3190
4621
  const review = reviewOutput.trim();
@@ -3260,8 +4691,8 @@ export class WorkflowRunner {
3260
4691
  case 'worker':
3261
4692
  return (
3262
4693
  'You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
3263
- 'Do NOT use relay_spawn, add_agent, or any MCP tool to spawn sub-agents.\n' +
3264
- 'Do NOT use relay_send or any Relaycast messaging tools — you have no relay connection.\n\n'
4694
+ 'Do NOT use mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn sub-agents.\n' +
4695
+ 'Do NOT use mcp__relaycast__dm_send or any Relaycast messaging tools — you have no relay connection.\n\n'
3265
4696
  );
3266
4697
  case 'reviewer':
3267
4698
  return (
@@ -3299,7 +4730,7 @@ export class WorkflowRunner {
3299
4730
  step.task +
3300
4731
  '\n\n---\n' +
3301
4732
  'IMPORTANT: You are running as a non-interactive subprocess. ' +
3302
- 'Do NOT call relay_spawn, add_agent, or any MCP tool to spawn or manage other agents.\n\n' +
4733
+ 'Do NOT call mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn or manage other agents.\n\n' +
3303
4734
  'CRITICAL REQUIREMENT — YOU MUST FOLLOW THIS EXACTLY:\n' +
3304
4735
  'You are running in non-interactive mode. There is NO opportunity for follow-up, ' +
3305
4736
  'clarification, or additional input. Your stdout output is your ONLY deliverable.\n\n' +
@@ -3462,10 +4893,21 @@ export class WorkflowRunner {
3462
4893
  });
3463
4894
  });
3464
4895
 
4896
+ this.captureStepTerminalEvidence(step.name, {}, { exitCode, exitSignal });
3465
4897
  return { output, exitCode, exitSignal };
3466
4898
  } finally {
3467
- const combinedOutput = stdoutChunks.join('') + stderrChunks.join('');
4899
+ const stdout = stdoutChunks.join('');
4900
+ const stderr = stderrChunks.join('');
4901
+ const combinedOutput = stdout + stderr;
3468
4902
  this.lastFailedStepOutput.set(step.name, combinedOutput);
4903
+ this.captureStepTerminalEvidence(
4904
+ step.name,
4905
+ {
4906
+ stdout,
4907
+ stderr,
4908
+ combined: combinedOutput,
4909
+ }
4910
+ );
3469
4911
  stopHeartbeat?.();
3470
4912
  logStream.end();
3471
4913
  this.unregisterWorker(agentName);
@@ -3487,6 +4929,8 @@ export class WorkflowRunner {
3487
4929
  throw new Error('AgentRelay not initialized');
3488
4930
  }
3489
4931
 
4932
+ const evidenceStepName = options.evidenceStepName ?? step.name;
4933
+
3490
4934
  // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
3491
4935
  const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
3492
4936
  let agentName = requestedName;
@@ -3538,18 +4982,24 @@ export class WorkflowRunner {
3538
4982
 
3539
4983
  const agentChannels = this.channel ? [this.channel] : agentDef.channels;
3540
4984
 
3541
- let agent: Awaited<ReturnType<typeof this.relay.spawnPty>>;
4985
+ let agent: Awaited<ReturnType<typeof this.relay.spawnPty>> | undefined;
3542
4986
  let exitResult: string = 'unknown';
3543
4987
  let stopHeartbeat: (() => void) | undefined;
3544
4988
  let ptyChunks: string[] = [];
3545
4989
 
3546
4990
  try {
3547
4991
  const agentCwd = this.resolveAgentCwd(agentDef);
4992
+ const interactiveSpawnPolicy = resolveSpawnPolicy({
4993
+ AGENT_NAME: agentName,
4994
+ AGENT_CLI: agentDef.cli,
4995
+ RELAY_API_KEY: this.relayApiKey ?? 'workflow-runner',
4996
+ AGENT_CHANNELS: (agentChannels ?? []).join(','),
4997
+ });
3548
4998
  agent = await this.relay.spawnPty({
3549
4999
  name: agentName,
3550
5000
  cli: agentDef.cli,
3551
5001
  model: agentDef.constraints?.model,
3552
- args: [],
5002
+ args: interactiveSpawnPolicy.args,
3553
5003
  channels: agentChannels,
3554
5004
  task: taskWithExit,
3555
5005
  idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
@@ -3584,18 +5034,36 @@ export class WorkflowRunner {
3584
5034
  const oldListener = this.ptyListeners.get(oldName);
3585
5035
  if (oldListener) {
3586
5036
  this.ptyListeners.delete(oldName);
3587
- this.ptyListeners.set(agent.name, (chunk: string) => {
5037
+ const resolvedAgentName = agent.name;
5038
+ this.ptyListeners.set(resolvedAgentName, (chunk: string) => {
3588
5039
  const stripped = WorkflowRunner.stripAnsi(chunk);
3589
- this.ptyOutputBuffers.get(agent.name)?.push(stripped);
5040
+ this.ptyOutputBuffers.get(resolvedAgentName)?.push(stripped);
3590
5041
  newLogStream.write(chunk);
3591
- options.onChunk?.({ agentName: agent.name, chunk });
5042
+ options.onChunk?.({ agentName: resolvedAgentName, chunk });
3592
5043
  });
3593
5044
  }
3594
5045
 
3595
5046
  agentName = agent.name;
3596
5047
  }
3597
5048
 
3598
- await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
5049
+ const liveAgent = agent;
5050
+ await options.onSpawned?.({ requestedName, actualName: liveAgent.name, agent: liveAgent });
5051
+ this.runtimeStepAgents.set(liveAgent.name, {
5052
+ stepName: evidenceStepName,
5053
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
5054
+ logicalName: options.logicalName ?? agentDef.name,
5055
+ });
5056
+ const signalParticipant = this.resolveSignalParticipantKind(
5057
+ options.evidenceRole ?? agentDef.role ?? 'agent'
5058
+ );
5059
+ if (signalParticipant) {
5060
+ this.rememberStepSignalSender(
5061
+ evidenceStepName,
5062
+ signalParticipant,
5063
+ liveAgent.name,
5064
+ options.logicalName ?? agentDef.name
5065
+ );
5066
+ }
3599
5067
 
3600
5068
  // Register in workers.json so `agents:kill` can find this agent
3601
5069
  let workerPid: number | undefined;
@@ -3610,11 +5078,11 @@ export class WorkflowRunner {
3610
5078
  // Register the spawned agent in Relaycast for observability + start heartbeat
3611
5079
  if (this.relayApiKey) {
3612
5080
  const agentClient = await this.registerRelaycastExternalAgent(
3613
- agent.name,
5081
+ liveAgent.name,
3614
5082
  `Workflow agent for step "${step.name}" (${agentDef.cli})`
3615
5083
  ).catch((err) => {
3616
5084
  console.warn(
3617
- `[WorkflowRunner] Failed to register ${agent.name} in Relaycast:`,
5085
+ `[WorkflowRunner] Failed to register ${liveAgent.name} in Relaycast:`,
3618
5086
  err?.message ?? err
3619
5087
  );
3620
5088
  return null;
@@ -3632,32 +5100,50 @@ export class WorkflowRunner {
3632
5100
  await channelAgent?.channels.invite(this.channel, agent.name).catch(() => {});
3633
5101
  }
3634
5102
 
3635
- // Post assignment notification (no task content task arrives via direct broker injection)
3636
- this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\``);
5103
+ // Keep operational assignment chatter out of the agent coordination channel.
5104
+ this.log(`[${step.name}] Assigned to ${agent.name}`);
3637
5105
 
3638
5106
  // Register agent handle for hub-mediated nudging
3639
5107
  this.activeAgentHandles.set(agentName, agent);
3640
5108
 
3641
5109
  // Wait for agent to exit, with idle nudging if configured
3642
- exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
5110
+ exitResult = await this.waitForExitWithIdleNudging(
5111
+ agent,
5112
+ agentDef,
5113
+ step,
5114
+ timeoutMs,
5115
+ options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole)
5116
+ );
3643
5117
 
3644
5118
  // Stop heartbeat now that agent has exited
3645
5119
  stopHeartbeat?.();
3646
5120
 
3647
5121
  if (exitResult === 'timeout') {
3648
- // Safety net: check if the verification file exists before giving up.
3649
- // The agent may have completed work but failed to /exit.
3650
- if (step.verification?.type === 'file_exists') {
3651
- const verifyPath = path.resolve(this.cwd, step.verification.value);
3652
- if (existsSync(verifyPath)) {
3653
- this.postToChannel(`**[${step.name}]** Agent idle after completing work — releasing`);
3654
- await agent.release();
3655
- // Fall through to read output below
3656
- } else {
5122
+ // Grace-period fallback: before failing, check if the agent completed
5123
+ // its work but just failed to self-terminate. Run verification if
5124
+ // configured a passing gate + timeout is better than a hard failure.
5125
+ let timeoutRecovered = false;
5126
+ if (step.verification) {
5127
+ const ptyOutput = (this.ptyOutputBuffers.get(agentName) ?? []).join('');
5128
+ const verificationResult = this.runVerification(
5129
+ step.verification,
5130
+ ptyOutput,
5131
+ step.name,
5132
+ undefined,
5133
+ { allowFailure: true }
5134
+ );
5135
+ if (verificationResult.passed) {
5136
+ this.log(
5137
+ `[${step.name}] Agent timed out but verification passed — treating as complete`
5138
+ );
5139
+ this.postToChannel(
5140
+ `**[${step.name}]** Agent idle after completing work — verification passed, releasing`
5141
+ );
3657
5142
  await agent.release();
3658
- throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
5143
+ timeoutRecovered = true;
3659
5144
  }
3660
- } else {
5145
+ }
5146
+ if (!timeoutRecovered) {
3661
5147
  await agent.release();
3662
5148
  throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
3663
5149
  }
@@ -3672,6 +5158,24 @@ export class WorkflowRunner {
3672
5158
  // Snapshot PTY chunks before cleanup — we need them for output reading below
3673
5159
  ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
3674
5160
  this.lastFailedStepOutput.set(step.name, ptyChunks.join(''));
5161
+ if (ptyChunks.length > 0 || agent?.exitCode !== undefined || agent?.exitSignal !== undefined) {
5162
+ this.captureStepTerminalEvidence(
5163
+ evidenceStepName,
5164
+ {
5165
+ stdout: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
5166
+ combined: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
5167
+ },
5168
+ {
5169
+ exitCode: agent?.exitCode,
5170
+ exitSignal: agent?.exitSignal,
5171
+ },
5172
+ {
5173
+ sender: options.logicalName ?? agentDef.name,
5174
+ actor: agent?.name ?? agentName,
5175
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
5176
+ }
5177
+ );
5178
+ }
3675
5179
 
3676
5180
  // Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
3677
5181
  stopHeartbeat?.();
@@ -3685,6 +5189,7 @@ export class WorkflowRunner {
3685
5189
  }
3686
5190
  this.unregisterWorker(agentName);
3687
5191
  this.supervisedRuntimeAgents.delete(agentName);
5192
+ this.runtimeStepAgents.delete(agentName);
3688
5193
  }
3689
5194
 
3690
5195
  let output: string;
@@ -3702,6 +5207,19 @@ export class WorkflowRunner {
3702
5207
  : `Agent exited (${exitResult})`;
3703
5208
  }
3704
5209
 
5210
+ if (ptyChunks.length === 0) {
5211
+ this.captureStepTerminalEvidence(
5212
+ evidenceStepName,
5213
+ { stdout: output, combined: output },
5214
+ { exitCode: agent?.exitCode, exitSignal: agent?.exitSignal },
5215
+ {
5216
+ sender: options.logicalName ?? agentDef.name,
5217
+ actor: agent?.name ?? agentName,
5218
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
5219
+ }
5220
+ );
5221
+ }
5222
+
3705
5223
  return {
3706
5224
  output,
3707
5225
  exitCode: agent?.exitCode,
@@ -3733,6 +5251,37 @@ export class WorkflowRunner {
3733
5251
  'auctioneer',
3734
5252
  ]);
3735
5253
 
5254
+ private isLeadLikeAgent(agentDef: AgentDefinition, roleOverride?: string): boolean {
5255
+ if (agentDef.preset === 'lead') return true;
5256
+
5257
+ const role = (roleOverride ?? agentDef.role ?? '').toLowerCase();
5258
+ const nameLC = agentDef.name.toLowerCase();
5259
+ return [...WorkflowRunner.HUB_ROLES].some(
5260
+ (hubRole) =>
5261
+ new RegExp(`\\b${hubRole}\\b`, 'i').test(nameLC) ||
5262
+ new RegExp(`\\b${hubRole}\\b`, 'i').test(role)
5263
+ );
5264
+ }
5265
+
5266
+ private shouldPreserveIdleSupervisor(
5267
+ agentDef: AgentDefinition,
5268
+ step: WorkflowStep,
5269
+ evidenceRole?: string
5270
+ ): boolean {
5271
+ if (evidenceRole && /\bowner\b/i.test(evidenceRole)) {
5272
+ return true;
5273
+ }
5274
+
5275
+ if (!this.isLeadLikeAgent(agentDef, evidenceRole)) {
5276
+ return false;
5277
+ }
5278
+
5279
+ const task = step.task ?? '';
5280
+ return /\b(wait|waiting|monitor|supervis|check inbox|check.*channel|poll|DONE|_DONE|signal|handoff)\b/i.test(
5281
+ task
5282
+ );
5283
+ }
5284
+
3736
5285
  /**
3737
5286
  * Wait for agent exit with idle detection and nudging.
3738
5287
  * If no idle nudge config is set, falls through to simple waitForExit.
@@ -3741,10 +5290,18 @@ export class WorkflowRunner {
3741
5290
  agent: Agent,
3742
5291
  agentDef: AgentDefinition,
3743
5292
  step: WorkflowStep,
3744
- timeoutMs?: number
5293
+ timeoutMs?: number,
5294
+ preserveIdleSupervisor = false
3745
5295
  ): Promise<'exited' | 'timeout' | 'released' | 'force-released'> {
3746
5296
  const nudgeConfig = this.currentConfig?.swarm.idleNudge;
3747
5297
  if (!nudgeConfig) {
5298
+ if (preserveIdleSupervisor) {
5299
+ this.log(
5300
+ `[${step.name}] Supervising agent "${agent.name}" may idle while waiting — using exit-only completion`
5301
+ );
5302
+ return agent.waitForExit(timeoutMs);
5303
+ }
5304
+
3748
5305
  // Idle = done: race exit against idle. Whichever fires first completes the step.
3749
5306
  const result = await Promise.race([
3750
5307
  agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit' as const, result: r })),
@@ -3765,6 +5322,7 @@ export class WorkflowRunner {
3765
5322
  const maxNudges = nudgeConfig.maxNudges ?? 1;
3766
5323
 
3767
5324
  let nudgeCount = 0;
5325
+ let preservedSupervisorNoticeSent = false;
3768
5326
  const startTime = Date.now();
3769
5327
 
3770
5328
  while (true) {
@@ -3806,6 +5364,19 @@ export class WorkflowRunner {
3806
5364
  continue;
3807
5365
  }
3808
5366
 
5367
+ if (preserveIdleSupervisor) {
5368
+ if (!preservedSupervisorNoticeSent) {
5369
+ this.log(
5370
+ `[${step.name}] Supervising agent "${agent.name}" stayed idle after ${nudgeCount} nudge(s) — preserving until exit or timeout`
5371
+ );
5372
+ this.postToChannel(
5373
+ `**[${step.name}]** Supervising agent \`${agent.name}\` is waiting on handoff — keeping it alive until it exits or the step times out`
5374
+ );
5375
+ preservedSupervisorNoticeSent = true;
5376
+ }
5377
+ continue;
5378
+ }
5379
+
3809
5380
  // Exhausted nudges — force-release
3810
5381
  this.postToChannel(
3811
5382
  `**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`
@@ -3890,8 +5461,34 @@ export class WorkflowRunner {
3890
5461
  check: VerificationCheck,
3891
5462
  output: string,
3892
5463
  stepName: string,
3893
- injectedTaskText?: string
3894
- ): void {
5464
+ injectedTaskText?: string,
5465
+ options?: VerificationOptions
5466
+ ): VerificationResult {
5467
+ const fail = (message: string): VerificationResult => {
5468
+ const observedAt = new Date().toISOString();
5469
+ this.recordStepToolSideEffect(stepName, {
5470
+ type: 'verification_observed',
5471
+ detail: message,
5472
+ observedAt,
5473
+ raw: { passed: false, type: check.type, value: check.value },
5474
+ });
5475
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
5476
+ kind: 'verification_failed',
5477
+ source: 'verification',
5478
+ text: message,
5479
+ observedAt,
5480
+ value: check.value,
5481
+ });
5482
+ if (options?.allowFailure) {
5483
+ return {
5484
+ passed: false,
5485
+ completionReason: 'failed_verification',
5486
+ error: message,
5487
+ };
5488
+ }
5489
+ throw new WorkflowCompletionError(message, 'failed_verification');
5490
+ };
5491
+
3895
5492
  switch (check.type) {
3896
5493
  case 'output_contains': {
3897
5494
  // Guard against false positives: the PTY captures the injected task text
@@ -3905,13 +5502,13 @@ export class WorkflowRunner {
3905
5502
  const first = output.indexOf(token);
3906
5503
  const hasSecond = first !== -1 && output.includes(token, first + token.length);
3907
5504
  if (!hasSecond) {
3908
- throw new Error(
5505
+ return fail(
3909
5506
  `Verification failed for "${stepName}": output does not contain "${token}" ` +
3910
5507
  `(token found only in task injection — agent must output it explicitly)`
3911
5508
  );
3912
5509
  }
3913
5510
  } else if (!output.includes(token)) {
3914
- throw new Error(`Verification failed for "${stepName}": output does not contain "${token}"`);
5511
+ return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
3915
5512
  }
3916
5513
  break;
3917
5514
  }
@@ -3922,14 +5519,44 @@ export class WorkflowRunner {
3922
5519
 
3923
5520
  case 'file_exists':
3924
5521
  if (!existsSync(path.resolve(this.cwd, check.value))) {
3925
- throw new Error(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
5522
+ return fail(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
3926
5523
  }
3927
5524
  break;
3928
5525
 
3929
5526
  case 'custom':
3930
5527
  // Custom verifications are evaluated by callers; no-op here
3931
- break;
5528
+ return { passed: false };
3932
5529
  }
5530
+
5531
+ if (options?.completionMarkerFound === false) {
5532
+ this.log(
5533
+ `[${stepName}] Verification passed without legacy STEP_COMPLETE marker; allowing completion`
5534
+ );
5535
+ }
5536
+
5537
+ const successMessage =
5538
+ options?.completionMarkerFound === false
5539
+ ? `Verification passed without legacy STEP_COMPLETE marker`
5540
+ : `Verification passed`;
5541
+ const observedAt = new Date().toISOString();
5542
+ this.recordStepToolSideEffect(stepName, {
5543
+ type: 'verification_observed',
5544
+ detail: successMessage,
5545
+ observedAt,
5546
+ raw: { passed: true, type: check.type, value: check.value },
5547
+ });
5548
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
5549
+ kind: 'verification_passed',
5550
+ source: 'verification',
5551
+ text: successMessage,
5552
+ observedAt,
5553
+ value: check.value,
5554
+ });
5555
+
5556
+ return {
5557
+ passed: true,
5558
+ completionReason: 'completed_verified',
5559
+ };
3933
5560
  }
3934
5561
 
3935
5562
  // ── State helpers ─────────────────────────────────────────────────────
@@ -3952,14 +5579,18 @@ export class WorkflowRunner {
3952
5579
  state: StepState,
3953
5580
  error: string,
3954
5581
  runId: string,
3955
- exitInfo?: { exitCode?: number; exitSignal?: string }
5582
+ exitInfo?: { exitCode?: number; exitSignal?: string },
5583
+ completionReason?: WorkflowStepCompletionReason
3956
5584
  ): Promise<void> {
5585
+ this.captureStepTerminalEvidence(state.row.stepName, {}, exitInfo);
3957
5586
  state.row.status = 'failed';
3958
5587
  state.row.error = error;
5588
+ state.row.completionReason = completionReason;
3959
5589
  state.row.completedAt = new Date().toISOString();
3960
5590
  await this.db.updateStep(state.row.id, {
3961
5591
  status: 'failed',
3962
5592
  error,
5593
+ completionReason,
3963
5594
  completedAt: state.row.completedAt,
3964
5595
  updatedAt: new Date().toISOString(),
3965
5596
  });
@@ -3971,6 +5602,7 @@ export class WorkflowRunner {
3971
5602
  exitCode: exitInfo?.exitCode,
3972
5603
  exitSignal: exitInfo?.exitSignal,
3973
5604
  });
5605
+ this.finalizeStepEvidence(state.row.stepName, 'failed', state.row.completedAt, completionReason);
3974
5606
  }
3975
5607
 
3976
5608
  private async markDownstreamSkipped(
@@ -4085,7 +5717,7 @@ export class WorkflowRunner {
4085
5717
  'RELAY SETUP — do this FIRST before any other relay tool:\n' +
4086
5718
  `1. Call: register(name="${agentName}")\n` +
4087
5719
  ' This authenticates you in the Relaycast workspace.\n' +
4088
- ' ALL relay tools (relay_send, relay_inbox, post_message, etc.) require\n' +
5720
+ ' ALL relay tools (mcp__relaycast__dm_send, mcp__relaycast__inbox_check, mcp__relaycast__message_post, etc.) require\n' +
4089
5721
  ' registration first — they will fail with "Not registered" otherwise.\n' +
4090
5722
  `2. Your agent name is "${agentName}" — use this exact name when registering.`
4091
5723
  );
@@ -4113,10 +5745,10 @@ export class WorkflowRunner {
4113
5745
  'If it involves multiple independent subtasks, touches many files, or could take a long time, ' +
4114
5746
  'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
4115
5747
  'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
4116
- ' - relay_spawn(name="helper-1", cli="claude", task="Specific subtask description")\n' +
4117
- ' - Coordinate via relay_send(to="helper-1", message="...")\n' +
4118
- ' - Check on them with relay_inbox()\n' +
4119
- ' - Clean up when done: relay_release(name="helper-1")\n\n' +
5748
+ ' - mcp__relaycast__agent_add(name="helper-1", cli="claude", task="Specific subtask description")\n' +
5749
+ ' - Coordinate via mcp__relaycast__dm_send(to="helper-1", text="...")\n' +
5750
+ ' - Check on them with mcp__relaycast__inbox_check()\n' +
5751
+ ' - Clean up when done: mcp__relaycast__agent_remove(name="helper-1")\n\n' +
4120
5752
  subAgentOption +
4121
5753
  'Guidelines:\n' +
4122
5754
  '- You are the lead — delegate but stay in control, track progress, integrate results\n' +
@@ -4129,8 +5761,24 @@ export class WorkflowRunner {
4129
5761
  }
4130
5762
 
4131
5763
  /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
4132
- private postToChannel(text: string): void {
5764
+ private postToChannel(text: string, options: ChannelEvidenceOptions = {}): void {
4133
5765
  if (!this.relayApiKey || !this.channel) return;
5766
+ this.recordChannelEvidence(text, options);
5767
+
5768
+ const stepName = options.stepName ?? this.inferStepNameFromChannelText(text);
5769
+ if (stepName) {
5770
+ this.recordStepToolSideEffect(stepName, {
5771
+ type: 'post_channel_message',
5772
+ detail: text.slice(0, 240),
5773
+ raw: {
5774
+ actor: options.actor,
5775
+ role: options.role,
5776
+ target: options.target ?? this.channel,
5777
+ origin: options.origin ?? 'runner_post',
5778
+ },
5779
+ });
5780
+ }
5781
+
4134
5782
  this.ensureRelaycastRunnerAgent()
4135
5783
  .then((agent) => agent.send(this.channel!, text))
4136
5784
  .catch(() => {
@@ -4308,6 +5956,9 @@ export class WorkflowRunner {
4308
5956
  output: state.row.output,
4309
5957
  error: state.row.error,
4310
5958
  verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
5959
+ completionMode: state.row.completionReason
5960
+ ? this.buildStepCompletionDecision(name, state.row.completionReason)?.mode
5961
+ : undefined,
4311
5962
  });
4312
5963
  }
4313
5964
  return outcomes;
@@ -4449,25 +6100,31 @@ export class WorkflowRunner {
4449
6100
  /** Persist step output to disk and post full output as a channel message. */
4450
6101
  private async persistStepOutput(runId: string, stepName: string, output: string): Promise<void> {
4451
6102
  // 1. Write to disk
6103
+ const outputPath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
4452
6104
  try {
4453
6105
  const dir = this.getStepOutputDir(runId);
4454
6106
  mkdirSync(dir, { recursive: true });
4455
6107
  const cleaned = WorkflowRunner.stripAnsi(output);
4456
- await writeFile(path.join(dir, `${stepName}.md`), cleaned);
6108
+ await writeFile(outputPath, cleaned);
4457
6109
  } catch {
4458
6110
  // Non-critical
4459
6111
  }
6112
+ this.recordStepToolSideEffect(stepName, {
6113
+ type: 'persist_step_output',
6114
+ detail: `Persisted step output to ${this.normalizeEvidencePath(outputPath)}`,
6115
+ raw: { path: outputPath },
6116
+ });
4460
6117
 
4461
6118
  // 2. Post scrubbed output as a single channel message (most recent tail only)
4462
6119
  const scrubbed = WorkflowRunner.scrubForChannel(output);
4463
6120
  if (scrubbed.length === 0) {
4464
- this.postToChannel(`**[${stepName}]** Step completed — output written to disk`);
6121
+ this.postToChannel(`**[${stepName}]** Step completed — output written to disk`, { stepName });
4465
6122
  return;
4466
6123
  }
4467
6124
 
4468
6125
  const maxMsg = 2000;
4469
6126
  const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
4470
- this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``);
6127
+ this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``, { stepName });
4471
6128
  }
4472
6129
 
4473
6130
  /** Load persisted step output from disk. */