agent-relay 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  2. package/bin/agent-relay-broker-darwin-x64 +0 -0
  3. package/bin/agent-relay-broker-linux-arm64 +0 -0
  4. package/bin/agent-relay-broker-linux-x64 +0 -0
  5. package/dist/index.cjs +1421 -246
  6. package/dist/src/cli/commands/core.d.ts +1 -0
  7. package/dist/src/cli/commands/core.d.ts.map +1 -1
  8. package/dist/src/cli/commands/core.js +18 -0
  9. package/dist/src/cli/commands/core.js.map +1 -1
  10. package/dist/src/cli/lib/broker-lifecycle.d.ts.map +1 -1
  11. package/dist/src/cli/lib/broker-lifecycle.js +16 -13
  12. package/dist/src/cli/lib/broker-lifecycle.js.map +1 -1
  13. package/dist/src/cli/relaycast-mcp.d.ts +4 -0
  14. package/dist/src/cli/relaycast-mcp.d.ts.map +1 -1
  15. package/dist/src/cli/relaycast-mcp.js +4 -4
  16. package/dist/src/cli/relaycast-mcp.js.map +1 -1
  17. package/package.json +8 -8
  18. package/packages/acp-bridge/package.json +2 -2
  19. package/packages/config/package.json +1 -1
  20. package/packages/hooks/package.json +4 -4
  21. package/packages/memory/package.json +2 -2
  22. package/packages/openclaw/README.md +2 -2
  23. package/packages/openclaw/dist/identity/files.js +2 -2
  24. package/packages/openclaw/dist/identity/files.js.map +1 -1
  25. package/packages/openclaw/dist/setup.js +2 -2
  26. package/packages/openclaw/package.json +2 -2
  27. package/packages/openclaw/skill/SKILL.md +8 -8
  28. package/packages/openclaw/src/identity/files.ts +2 -2
  29. package/packages/openclaw/src/setup.ts +2 -2
  30. package/packages/openclaw/templates/SOUL.md.template +2 -2
  31. package/packages/policy/package.json +2 -2
  32. package/packages/sdk/dist/__tests__/completion-pipeline.test.d.ts +14 -0
  33. package/packages/sdk/dist/__tests__/completion-pipeline.test.d.ts.map +1 -0
  34. package/packages/sdk/dist/__tests__/completion-pipeline.test.js +1476 -0
  35. package/packages/sdk/dist/__tests__/completion-pipeline.test.js.map +1 -0
  36. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js +2 -2
  37. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js.map +1 -1
  38. package/packages/sdk/dist/examples/example.js +1 -1
  39. package/packages/sdk/dist/examples/example.js.map +1 -1
  40. package/packages/sdk/dist/relay-adapter.js +4 -4
  41. package/packages/sdk/dist/relay-adapter.js.map +1 -1
  42. package/packages/sdk/dist/workflows/builder.d.ts +18 -3
  43. package/packages/sdk/dist/workflows/builder.d.ts.map +1 -1
  44. package/packages/sdk/dist/workflows/builder.js +24 -12
  45. package/packages/sdk/dist/workflows/builder.js.map +1 -1
  46. package/packages/sdk/dist/workflows/runner.d.ts +55 -2
  47. package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
  48. package/packages/sdk/dist/workflows/runner.js +1370 -108
  49. package/packages/sdk/dist/workflows/runner.js.map +1 -1
  50. package/packages/sdk/dist/workflows/trajectory.d.ts +6 -2
  51. package/packages/sdk/dist/workflows/trajectory.d.ts.map +1 -1
  52. package/packages/sdk/dist/workflows/trajectory.js +37 -2
  53. package/packages/sdk/dist/workflows/trajectory.js.map +1 -1
  54. package/packages/sdk/dist/workflows/types.d.ts +88 -0
  55. package/packages/sdk/dist/workflows/types.d.ts.map +1 -1
  56. package/packages/sdk/dist/workflows/types.js.map +1 -1
  57. package/packages/sdk/dist/workflows/validator.js +1 -1
  58. package/packages/sdk/dist/workflows/validator.js.map +1 -1
  59. package/packages/sdk/package.json +2 -2
  60. package/packages/sdk/src/__tests__/completion-pipeline.test.ts +1820 -0
  61. package/packages/sdk/src/__tests__/e2e-owner-review.test.ts +2 -2
  62. package/packages/sdk/src/__tests__/idle-nudge.test.ts +68 -0
  63. package/packages/sdk/src/__tests__/workflow-runner.test.ts +113 -4
  64. package/packages/sdk/src/examples/example.ts +1 -1
  65. package/packages/sdk/src/relay-adapter.ts +4 -4
  66. package/packages/sdk/src/workflows/README.md +43 -11
  67. package/packages/sdk/src/workflows/builder.ts +38 -11
  68. package/packages/sdk/src/workflows/runner.ts +1860 -127
  69. package/packages/sdk/src/workflows/schema.json +6 -0
  70. package/packages/sdk/src/workflows/trajectory.ts +52 -3
  71. package/packages/sdk/src/workflows/types.ts +149 -0
  72. package/packages/sdk/src/workflows/validator.ts +1 -1
  73. package/packages/sdk-py/pyproject.toml +1 -1
  74. package/packages/telemetry/package.json +1 -1
  75. package/packages/trajectory/package.json +2 -2
  76. package/packages/user-directory/package.json +2 -2
  77. package/packages/utils/package.json +2 -2
  78. package/relay-snippets/agent-relay-protocol.md +4 -4
  79. package/relay-snippets/agent-relay-snippet.md +9 -9
@@ -6,14 +6,24 @@
6
6
 
7
7
  import { spawn as cpSpawn, execFileSync } from 'node:child_process';
8
8
  import { randomBytes } from 'node:crypto';
9
- import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
10
- import type { WriteStream } from 'node:fs';
9
+ import {
10
+ createWriteStream,
11
+ existsSync,
12
+ mkdirSync,
13
+ readFileSync,
14
+ readdirSync,
15
+ renameSync,
16
+ statSync,
17
+ writeFileSync,
18
+ } from 'node:fs';
19
+ import type { Dirent, WriteStream } from 'node:fs';
11
20
  import { readFile, writeFile, mkdir } from 'node:fs/promises';
12
21
  import path from 'node:path';
13
22
 
14
23
  import { parse as parseYaml } from 'yaml';
15
24
  import { stripAnsi as stripAnsiFn } from '../pty.js';
16
25
  import type { BrokerEvent } from '../protocol.js';
26
+ import { resolveSpawnPolicy } from '../spawn-from-env.js';
17
27
 
18
28
  import {
19
29
  loadCustomSteps,
@@ -27,6 +37,12 @@ import type {
27
37
  AgentCli,
28
38
  AgentDefinition,
29
39
  AgentPreset,
40
+ CompletionEvidenceChannelOrigin,
41
+ CompletionEvidenceChannelPost,
42
+ CompletionEvidenceFileChange,
43
+ CompletionEvidenceSignal,
44
+ CompletionEvidenceSignalKind,
45
+ CompletionEvidenceToolSideEffect,
30
46
  DryRunReport,
31
47
  DryRunWave,
32
48
  ErrorHandlingConfig,
@@ -34,12 +50,16 @@ import type {
34
50
  PathDefinition,
35
51
  PreflightCheck,
36
52
  RelayYamlConfig,
53
+ StepCompletionDecision,
54
+ StepCompletionEvidence,
37
55
  SwarmPattern,
38
56
  VerificationCheck,
39
57
  WorkflowDefinition,
58
+ WorkflowOwnerDecision,
40
59
  WorkflowRunRow,
41
60
  WorkflowRunStatus,
42
61
  WorkflowStep,
62
+ WorkflowStepCompletionReason,
43
63
  WorkflowStepRow,
44
64
  WorkflowStepStatus,
45
65
  } from './types.js';
@@ -84,6 +104,33 @@ class SpawnExitError extends Error {
84
104
  }
85
105
  }
86
106
 
107
+ class WorkflowCompletionError extends Error {
108
+ completionReason?: WorkflowStepCompletionReason;
109
+
110
+ constructor(message: string, completionReason?: WorkflowStepCompletionReason) {
111
+ super(message);
112
+ this.name = 'WorkflowCompletionError';
113
+ this.completionReason = completionReason;
114
+ }
115
+ }
116
+
117
+ interface VerificationResult {
118
+ passed: boolean;
119
+ completionReason?: WorkflowStepCompletionReason;
120
+ error?: string;
121
+ }
122
+
123
+ interface VerificationOptions {
124
+ allowFailure?: boolean;
125
+ completionMarkerFound?: boolean;
126
+ }
127
+
128
+ interface CompletionDecisionResult {
129
+ completionReason: WorkflowStepCompletionReason;
130
+ ownerDecision?: WorkflowOwnerDecision;
131
+ reason?: string;
132
+ }
133
+
87
134
  // ── Events ──────────────────────────────────────────────────────────────────
88
135
 
89
136
  export type WorkflowEvent =
@@ -177,6 +224,10 @@ interface SpawnedAgentInfo {
177
224
 
178
225
  interface SpawnAndWaitOptions {
179
226
  agentNameSuffix?: string;
227
+ evidenceStepName?: string;
228
+ evidenceRole?: string;
229
+ logicalName?: string;
230
+ preserveOnIdle?: boolean;
180
231
  onSpawned?: (info: SpawnedAgentInfo) => void | Promise<void>;
181
232
  onChunk?: (info: { agentName: string; chunk: string }) => void;
182
233
  }
@@ -187,6 +238,37 @@ interface SupervisedRuntimeAgent {
187
238
  logicalName: string;
188
239
  }
189
240
 
241
+ interface RuntimeStepAgent {
242
+ stepName: string;
243
+ role: string;
244
+ logicalName: string;
245
+ }
246
+
247
+ interface FileSnapshotEntry {
248
+ mtimeMs: number;
249
+ size: number;
250
+ }
251
+
252
+ interface StepEvidenceRecord {
253
+ evidence: StepCompletionEvidence;
254
+ baselineSnapshots: Map<string, Map<string, FileSnapshotEntry>>;
255
+ filesCaptured: boolean;
256
+ }
257
+
258
+ interface StepSignalParticipants {
259
+ ownerSenders: Set<string>;
260
+ workerSenders: Set<string>;
261
+ }
262
+
263
+ interface ChannelEvidenceOptions {
264
+ stepName?: string;
265
+ sender?: string;
266
+ actor?: string;
267
+ role?: string;
268
+ target?: string;
269
+ origin?: CompletionEvidenceChannelOrigin;
270
+ }
271
+
190
272
  // ── CLI resolution ───────────────────────────────────────────────────────────
191
273
 
192
274
  /**
@@ -267,8 +349,16 @@ export class WorkflowRunner {
267
349
  private readonly lastActivity = new Map<string, string>();
268
350
  /** Runtime-name lookup for agents participating in supervised owner flows. */
269
351
  private readonly supervisedRuntimeAgents = new Map<string, SupervisedRuntimeAgent>();
352
+ /** Runtime-name lookup for active step agents so channel messages can be attributed to a step. */
353
+ private readonly runtimeStepAgents = new Map<string, RuntimeStepAgent>();
354
+ /** Per-step completion evidence collected across output, channel, files, and tool side-effects. */
355
+ private readonly stepCompletionEvidence = new Map<string, StepEvidenceRecord>();
356
+ /** Expected owner/worker identities per step so coordination signals can be validated by sender. */
357
+ private readonly stepSignalParticipants = new Map<string, StepSignalParticipants>();
270
358
  /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
271
359
  private resolvedPaths = new Map<string, string>();
360
+ /** Tracks agent names currently assigned as reviewers (ref-counted to handle concurrent usage). */
361
+ private readonly activeReviewers = new Map<string, number>();
272
362
 
273
363
  constructor(options: WorkflowRunnerOptions = {}) {
274
364
  this.db = options.db ?? new InMemoryWorkflowDb();
@@ -363,6 +453,531 @@ export class WorkflowRunner {
363
453
  return resolved;
364
454
  }
365
455
 
456
+ private static readonly EVIDENCE_IGNORED_DIRS = new Set([
457
+ '.git',
458
+ '.agent-relay',
459
+ '.trajectories',
460
+ 'node_modules',
461
+ ]);
462
+
463
+ public getStepCompletionEvidence(stepName: string): StepCompletionEvidence | undefined {
464
+ const record = this.stepCompletionEvidence.get(stepName);
465
+ if (!record) return undefined;
466
+
467
+ const evidence = structuredClone(record.evidence);
468
+ return this.filterStepEvidenceBySignalProvenance(stepName, evidence);
469
+ }
470
+
471
+ private getOrCreateStepEvidenceRecord(stepName: string): StepEvidenceRecord {
472
+ const existing = this.stepCompletionEvidence.get(stepName);
473
+ if (existing) return existing;
474
+
475
+ const now = new Date().toISOString();
476
+ const record: StepEvidenceRecord = {
477
+ evidence: {
478
+ stepName,
479
+ lastUpdatedAt: now,
480
+ roots: [],
481
+ output: {
482
+ stdout: '',
483
+ stderr: '',
484
+ combined: '',
485
+ },
486
+ channelPosts: [],
487
+ files: [],
488
+ process: {},
489
+ toolSideEffects: [],
490
+ coordinationSignals: [],
491
+ },
492
+ baselineSnapshots: new Map(),
493
+ filesCaptured: false,
494
+ };
495
+ this.stepCompletionEvidence.set(stepName, record);
496
+ return record;
497
+ }
498
+
499
+ private initializeStepSignalParticipants(
500
+ stepName: string,
501
+ ownerSender?: string,
502
+ workerSender?: string
503
+ ): void {
504
+ this.stepSignalParticipants.set(stepName, {
505
+ ownerSenders: new Set(),
506
+ workerSenders: new Set(),
507
+ });
508
+ this.rememberStepSignalSender(stepName, 'owner', ownerSender);
509
+ this.rememberStepSignalSender(stepName, 'worker', workerSender);
510
+ }
511
+
512
+ private rememberStepSignalSender(
513
+ stepName: string,
514
+ participant: 'owner' | 'worker',
515
+ ...senders: Array<string | undefined>
516
+ ): void {
517
+ const participants =
518
+ this.stepSignalParticipants.get(stepName) ??
519
+ {
520
+ ownerSenders: new Set<string>(),
521
+ workerSenders: new Set<string>(),
522
+ };
523
+ this.stepSignalParticipants.set(stepName, participants);
524
+
525
+ const target =
526
+ participant === 'owner' ? participants.ownerSenders : participants.workerSenders;
527
+ for (const sender of senders) {
528
+ const trimmed = sender?.trim();
529
+ if (trimmed) target.add(trimmed);
530
+ }
531
+ }
532
+
533
+ private resolveSignalParticipantKind(role?: string): 'owner' | 'worker' | undefined {
534
+ const roleLC = role?.toLowerCase().trim();
535
+ if (!roleLC) return undefined;
536
+ if (/\b(owner|lead|supervisor)\b/.test(roleLC)) return 'owner';
537
+ if (/\b(worker|specialist|engineer|implementer)\b/.test(roleLC)) return 'worker';
538
+ return undefined;
539
+ }
540
+
541
+ private isSignalFromExpectedSender(stepName: string, signal: CompletionEvidenceSignal): boolean {
542
+ const expectedParticipant =
543
+ signal.kind === 'worker_done'
544
+ ? 'worker'
545
+ : signal.kind === 'lead_done'
546
+ ? 'owner'
547
+ : undefined;
548
+ if (!expectedParticipant) return true;
549
+
550
+ const participants = this.stepSignalParticipants.get(stepName);
551
+ if (!participants) return true;
552
+
553
+ const allowedSenders =
554
+ expectedParticipant === 'owner' ? participants.ownerSenders : participants.workerSenders;
555
+ if (allowedSenders.size === 0) return true;
556
+
557
+ const sender = signal.sender ?? signal.actor;
558
+ if (sender) {
559
+ return allowedSenders.has(sender);
560
+ }
561
+
562
+ const observedParticipant = this.resolveSignalParticipantKind(signal.role);
563
+ if (observedParticipant) {
564
+ return observedParticipant === expectedParticipant;
565
+ }
566
+
567
+ return signal.source !== 'channel';
568
+ }
569
+
570
+ private filterStepEvidenceBySignalProvenance(
571
+ stepName: string,
572
+ evidence: StepCompletionEvidence
573
+ ): StepCompletionEvidence {
574
+ evidence.channelPosts = evidence.channelPosts.map((post) => {
575
+ const signals = post.signals.filter((signal) =>
576
+ this.isSignalFromExpectedSender(stepName, signal)
577
+ );
578
+ return {
579
+ ...post,
580
+ completionRelevant: signals.length > 0,
581
+ signals,
582
+ };
583
+ });
584
+ evidence.coordinationSignals = evidence.coordinationSignals.filter((signal) =>
585
+ this.isSignalFromExpectedSender(stepName, signal)
586
+ );
587
+ return evidence;
588
+ }
589
+
590
+ private beginStepEvidence(stepName: string, roots: Array<string | undefined>, startedAt?: string): void {
591
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
592
+ const evidence = record.evidence;
593
+ const now = startedAt ?? new Date().toISOString();
594
+
595
+ evidence.startedAt ??= now;
596
+ evidence.status = 'running';
597
+ evidence.lastUpdatedAt = now;
598
+
599
+ for (const root of this.uniqueEvidenceRoots(roots)) {
600
+ if (!evidence.roots.includes(root)) {
601
+ evidence.roots.push(root);
602
+ }
603
+ if (!record.baselineSnapshots.has(root)) {
604
+ record.baselineSnapshots.set(root, this.captureFileSnapshot(root));
605
+ }
606
+ }
607
+ }
608
+
609
+ private captureStepTerminalEvidence(
610
+ stepName: string,
611
+ output: { stdout?: string; stderr?: string; combined?: string },
612
+ process?: { exitCode?: number; exitSignal?: string },
613
+ meta?: { sender?: string; actor?: string; role?: string }
614
+ ): void {
615
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
616
+ const evidence = record.evidence;
617
+ const observedAt = new Date().toISOString();
618
+
619
+ const append = (current: string, next?: string): string => {
620
+ if (!next) return current;
621
+ return current ? `${current}\n${next}` : next;
622
+ };
623
+
624
+ if (output.stdout) {
625
+ evidence.output.stdout = append(evidence.output.stdout, output.stdout);
626
+ for (const signal of this.extractCompletionSignals(output.stdout, 'stdout', observedAt, meta)) {
627
+ evidence.coordinationSignals.push(signal);
628
+ }
629
+ }
630
+ if (output.stderr) {
631
+ evidence.output.stderr = append(evidence.output.stderr, output.stderr);
632
+ for (const signal of this.extractCompletionSignals(output.stderr, 'stderr', observedAt, meta)) {
633
+ evidence.coordinationSignals.push(signal);
634
+ }
635
+ }
636
+
637
+ const combinedOutput =
638
+ output.combined ??
639
+ [output.stdout, output.stderr].filter((value): value is string => Boolean(value)).join('\n');
640
+ if (combinedOutput) {
641
+ evidence.output.combined = append(evidence.output.combined, combinedOutput);
642
+ }
643
+
644
+ if (process) {
645
+ if (process.exitCode !== undefined) {
646
+ evidence.process.exitCode = process.exitCode;
647
+ evidence.coordinationSignals.push({
648
+ kind: 'process_exit',
649
+ source: 'process',
650
+ text: `Process exited with code ${process.exitCode}`,
651
+ observedAt,
652
+ value: String(process.exitCode),
653
+ });
654
+ }
655
+ if (process.exitSignal !== undefined) {
656
+ evidence.process.exitSignal = process.exitSignal;
657
+ }
658
+ }
659
+
660
+ evidence.lastUpdatedAt = observedAt;
661
+ }
662
+
663
+ private finalizeStepEvidence(
664
+ stepName: string,
665
+ status: WorkflowStepStatus,
666
+ completedAt?: string,
667
+ completionReason?: WorkflowStepCompletionReason
668
+ ): void {
669
+ const record = this.stepCompletionEvidence.get(stepName);
670
+ if (!record) return;
671
+
672
+ const evidence = record.evidence;
673
+ const observedAt = completedAt ?? new Date().toISOString();
674
+ evidence.status = status;
675
+ if (status !== 'running') {
676
+ evidence.completedAt = observedAt;
677
+ }
678
+ evidence.lastUpdatedAt = observedAt;
679
+
680
+ if (!record.filesCaptured) {
681
+ const existing = new Set(evidence.files.map((file) => `${file.kind}:${file.path}`));
682
+ for (const root of evidence.roots) {
683
+ const before = record.baselineSnapshots.get(root) ?? new Map<string, FileSnapshotEntry>();
684
+ const after = this.captureFileSnapshot(root);
685
+ for (const change of this.diffFileSnapshots(before, after, root, observedAt)) {
686
+ const key = `${change.kind}:${change.path}`;
687
+ if (existing.has(key)) continue;
688
+ existing.add(key);
689
+ evidence.files.push(change);
690
+ }
691
+ }
692
+ record.filesCaptured = true;
693
+ }
694
+
695
+ if (completionReason) {
696
+ const decision = this.buildStepCompletionDecision(stepName, completionReason);
697
+ if (decision) {
698
+ void this.trajectory?.stepCompletionDecision(stepName, decision);
699
+ }
700
+ }
701
+ }
702
+
703
+ private recordStepToolSideEffect(
704
+ stepName: string,
705
+ effect: Omit<CompletionEvidenceToolSideEffect, 'observedAt'> & { observedAt?: string }
706
+ ): void {
707
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
708
+ const observedAt = effect.observedAt ?? new Date().toISOString();
709
+ record.evidence.toolSideEffects.push({
710
+ ...effect,
711
+ observedAt,
712
+ });
713
+ record.evidence.lastUpdatedAt = observedAt;
714
+ }
715
+
716
+ private recordChannelEvidence(text: string, options: ChannelEvidenceOptions = {}): void {
717
+ const stepName =
718
+ options.stepName ??
719
+ this.inferStepNameFromChannelText(text) ??
720
+ (options.actor ? this.runtimeStepAgents.get(options.actor)?.stepName : undefined);
721
+ if (!stepName) return;
722
+
723
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
724
+ const postedAt = new Date().toISOString();
725
+ const sender = options.sender ?? options.actor;
726
+ const signals = this.extractCompletionSignals(text, 'channel', postedAt, {
727
+ sender,
728
+ actor: options.actor,
729
+ role: options.role,
730
+ });
731
+
732
+ const channelPost: CompletionEvidenceChannelPost = {
733
+ stepName,
734
+ text,
735
+ postedAt,
736
+ origin: options.origin ?? 'runner_post',
737
+ completionRelevant: signals.length > 0,
738
+ sender,
739
+ actor: options.actor,
740
+ role: options.role,
741
+ target: options.target,
742
+ signals,
743
+ };
744
+
745
+ record.evidence.channelPosts.push(channelPost);
746
+ record.evidence.coordinationSignals.push(...signals);
747
+ record.evidence.lastUpdatedAt = postedAt;
748
+ }
749
+
750
+ private extractCompletionSignals(
751
+ text: string,
752
+ source: CompletionEvidenceSignal['source'],
753
+ observedAt: string,
754
+ meta?: { sender?: string; actor?: string; role?: string }
755
+ ): CompletionEvidenceSignal[] {
756
+ const signals: CompletionEvidenceSignal[] = [];
757
+ const seen = new Set<string>();
758
+ const add = (
759
+ kind: CompletionEvidenceSignalKind,
760
+ signalText: string,
761
+ value?: string
762
+ ): void => {
763
+ const trimmed = signalText.trim().slice(0, 280);
764
+ if (!trimmed) return;
765
+ const key = `${kind}:${trimmed}:${value ?? ''}`;
766
+ if (seen.has(key)) return;
767
+ seen.add(key);
768
+ signals.push({
769
+ kind,
770
+ source,
771
+ text: trimmed,
772
+ observedAt,
773
+ sender: meta?.sender,
774
+ actor: meta?.actor,
775
+ role: meta?.role,
776
+ value,
777
+ });
778
+ };
779
+
780
+ for (const match of text.matchAll(/\bWORKER_DONE\b(?::\s*([^\n]+))?/gi)) {
781
+ add('worker_done', match[0], match[1]?.trim());
782
+ }
783
+ for (const match of text.matchAll(/\bLEAD_DONE\b(?::\s*([^\n]+))?/gi)) {
784
+ add('lead_done', match[0], match[1]?.trim());
785
+ }
786
+ for (const match of text.matchAll(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/g)) {
787
+ add('step_complete', match[0], match[1]);
788
+ }
789
+ for (const match of text.matchAll(
790
+ /\bOWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi
791
+ )) {
792
+ add('owner_decision', match[0], match[1].toUpperCase());
793
+ }
794
+ for (const match of text.matchAll(/\bREVIEW_DECISION:\s*(APPROVE|REJECT)\b/gi)) {
795
+ add('review_decision', match[0], match[1].toUpperCase());
796
+ }
797
+ if (/\bverification gate observed\b|\bverification passed\b/i.test(text)) {
798
+ add('verification_passed', this.firstMeaningfulLine(text) ?? text);
799
+ }
800
+ if (/\bverification failed\b/i.test(text)) {
801
+ add('verification_failed', this.firstMeaningfulLine(text) ?? text);
802
+ }
803
+ if (
804
+ /\b(summary|handoff|ready for review|ready for handoff|task complete|work complete|completed work|finished work)\b/i.test(
805
+ text
806
+ )
807
+ ) {
808
+ add('task_summary', this.firstMeaningfulLine(text) ?? text);
809
+ }
810
+
811
+ return signals;
812
+ }
813
+
814
+ private inferStepNameFromChannelText(text: string): string | undefined {
815
+ const bracketMatch = text.match(/^\*\*\[([^\]]+)\]/);
816
+ if (bracketMatch?.[1]) return bracketMatch[1];
817
+
818
+ const markerMatch = text.match(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/);
819
+ if (markerMatch?.[1]) return markerMatch[1];
820
+
821
+ return undefined;
822
+ }
823
+
824
+ private uniqueEvidenceRoots(roots: Array<string | undefined>): string[] {
825
+ return [...new Set(roots.filter((root): root is string => Boolean(root)).map((root) => path.resolve(root)))];
826
+ }
827
+
828
+ private captureFileSnapshot(root: string): Map<string, FileSnapshotEntry> {
829
+ const snapshot = new Map<string, FileSnapshotEntry>();
830
+ if (!existsSync(root)) return snapshot;
831
+
832
+ const visit = (currentPath: string): void => {
833
+ let entries: Dirent[];
834
+ try {
835
+ entries = readdirSync(currentPath, { withFileTypes: true });
836
+ } catch {
837
+ return;
838
+ }
839
+
840
+ for (const entry of entries) {
841
+ if (entry.isDirectory() && WorkflowRunner.EVIDENCE_IGNORED_DIRS.has(entry.name)) {
842
+ continue;
843
+ }
844
+
845
+ const fullPath = path.join(currentPath, entry.name);
846
+ if (entry.isDirectory()) {
847
+ visit(fullPath);
848
+ continue;
849
+ }
850
+
851
+ try {
852
+ const stats = statSync(fullPath);
853
+ if (!stats.isFile()) continue;
854
+ snapshot.set(fullPath, { mtimeMs: stats.mtimeMs, size: stats.size });
855
+ } catch {
856
+ // Best-effort evidence collection only.
857
+ }
858
+ }
859
+ };
860
+
861
+ try {
862
+ const stats = statSync(root);
863
+ if (stats.isFile()) {
864
+ snapshot.set(root, { mtimeMs: stats.mtimeMs, size: stats.size });
865
+ return snapshot;
866
+ }
867
+ } catch {
868
+ return snapshot;
869
+ }
870
+
871
+ visit(root);
872
+ return snapshot;
873
+ }
874
+
875
+ private diffFileSnapshots(
876
+ before: Map<string, FileSnapshotEntry>,
877
+ after: Map<string, FileSnapshotEntry>,
878
+ root: string,
879
+ observedAt: string
880
+ ): CompletionEvidenceFileChange[] {
881
+ const allPaths = new Set([...before.keys(), ...after.keys()]);
882
+ const changes: CompletionEvidenceFileChange[] = [];
883
+
884
+ for (const filePath of allPaths) {
885
+ const prior = before.get(filePath);
886
+ const next = after.get(filePath);
887
+
888
+ let kind: CompletionEvidenceFileChange['kind'] | undefined;
889
+ if (!prior && next) {
890
+ kind = 'created';
891
+ } else if (prior && !next) {
892
+ kind = 'deleted';
893
+ } else if (prior && next && (prior.mtimeMs !== next.mtimeMs || prior.size !== next.size)) {
894
+ kind = 'modified';
895
+ }
896
+
897
+ if (!kind) continue;
898
+
899
+ changes.push({
900
+ path: this.normalizeEvidencePath(filePath),
901
+ kind,
902
+ observedAt,
903
+ root,
904
+ });
905
+ }
906
+
907
+ return changes.sort((a, b) => a.path.localeCompare(b.path));
908
+ }
909
+
910
+ private normalizeEvidencePath(filePath: string): string {
911
+ const relative = path.relative(this.cwd, filePath);
912
+ if (!relative || relative === '') return path.basename(filePath);
913
+ return relative.startsWith('..') ? filePath : relative;
914
+ }
915
+
916
+ private buildStepCompletionDecision(
917
+ stepName: string,
918
+ completionReason: WorkflowStepCompletionReason
919
+ ): StepCompletionDecision | undefined {
920
+ let reason: string | undefined;
921
+ let mode: StepCompletionDecision['mode'];
922
+ switch (completionReason) {
923
+ case 'completed_verified':
924
+ mode = 'verification';
925
+ reason = 'Verification passed';
926
+ break;
927
+ case 'completed_by_evidence':
928
+ mode = 'evidence';
929
+ reason = 'Completion inferred from collected evidence';
930
+ break;
931
+ case 'completed_by_owner_decision': {
932
+ const evidence = this.getStepCompletionEvidence(stepName);
933
+ const markerObserved = evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete');
934
+ mode = markerObserved ? 'marker' : 'owner_decision';
935
+ reason = markerObserved ? 'Legacy STEP_COMPLETE marker observed' : 'Owner approved completion';
936
+ break;
937
+ }
938
+ default:
939
+ return undefined;
940
+ }
941
+
942
+ return {
943
+ mode,
944
+ reason,
945
+ evidence: this.buildTrajectoryCompletionEvidence(stepName),
946
+ };
947
+ }
948
+
949
+ private buildTrajectoryCompletionEvidence(
950
+ stepName: string
951
+ ): StepCompletionDecision['evidence'] | undefined {
952
+ const evidence = this.getStepCompletionEvidence(stepName);
953
+ if (!evidence) return undefined;
954
+
955
+ const signals = evidence.coordinationSignals
956
+ .slice(-6)
957
+ .map((signal) => signal.value ?? signal.text);
958
+ const channelPosts = evidence.channelPosts
959
+ .filter((post) => post.completionRelevant)
960
+ .slice(-3)
961
+ .map((post) => post.text.slice(0, 160));
962
+ const files = evidence.files.slice(0, 6).map((file) => `${file.kind}:${file.path}`);
963
+
964
+ const summaryParts: string[] = [];
965
+ if (signals.length > 0) summaryParts.push(`${signals.length} signal(s)`);
966
+ if (channelPosts.length > 0) summaryParts.push(`${channelPosts.length} relevant channel post(s)`);
967
+ if (files.length > 0) summaryParts.push(`${files.length} file change(s)`);
968
+ if (evidence.process.exitCode !== undefined) {
969
+ summaryParts.push(`exit=${evidence.process.exitCode}`);
970
+ }
971
+
972
+ return {
973
+ summary: summaryParts.length > 0 ? summaryParts.join(', ') : undefined,
974
+ signals: signals.length > 0 ? signals : undefined,
975
+ channelPosts: channelPosts.length > 0 ? channelPosts : undefined,
976
+ files: files.length > 0 ? files : undefined,
977
+ exitCode: evidence.process.exitCode,
978
+ };
979
+ }
980
+
366
981
  // ── Progress logging ────────────────────────────────────────────────────
367
982
 
368
983
  /** Log a progress message with elapsed time since run start. */
@@ -1296,9 +1911,11 @@ export class WorkflowRunner {
1296
1911
  if (state.row.status === 'failed') {
1297
1912
  state.row.status = 'pending';
1298
1913
  state.row.error = undefined;
1914
+ state.row.completionReason = undefined;
1299
1915
  await this.db.updateStep(state.row.id, {
1300
1916
  status: 'pending',
1301
1917
  error: undefined,
1918
+ completionReason: undefined,
1302
1919
  updatedAt: new Date().toISOString(),
1303
1920
  });
1304
1921
  }
@@ -1327,6 +1944,8 @@ export class WorkflowRunner {
1327
1944
  this.currentConfig = config;
1328
1945
  this.currentRunId = runId;
1329
1946
  this.runStartTime = Date.now();
1947
+ this.runtimeStepAgents.clear();
1948
+ this.stepCompletionEvidence.clear();
1330
1949
 
1331
1950
  this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
1332
1951
 
@@ -1468,8 +2087,25 @@ export class WorkflowRunner {
1468
2087
  const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
1469
2088
  this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
1470
2089
 
2090
+ if (this.channel && (msg.to === this.channel || msg.to === `#${this.channel}`)) {
2091
+ const runtimeAgent = this.runtimeStepAgents.get(msg.from);
2092
+ this.recordChannelEvidence(msg.text, {
2093
+ sender: runtimeAgent?.logicalName ?? msg.from,
2094
+ actor: msg.from,
2095
+ role: runtimeAgent?.role,
2096
+ target: msg.to,
2097
+ origin: 'relay_message',
2098
+ stepName: runtimeAgent?.stepName,
2099
+ });
2100
+ }
2101
+
1471
2102
  const supervision = this.supervisedRuntimeAgents.get(msg.from);
1472
2103
  if (supervision?.role === 'owner') {
2104
+ this.recordStepToolSideEffect(supervision.stepName, {
2105
+ type: 'owner_monitoring',
2106
+ detail: `Owner messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
2107
+ raw: { to: msg.to, text: msg.text },
2108
+ });
1473
2109
  void this.trajectory?.ownerMonitoringEvent(
1474
2110
  supervision.stepName,
1475
2111
  supervision.logicalName,
@@ -1651,6 +2287,7 @@ export class WorkflowRunner {
1651
2287
  updatedAt: new Date().toISOString(),
1652
2288
  });
1653
2289
  this.emit({ type: 'step:failed', runId, stepName, error: 'Cancelled' });
2290
+ this.finalizeStepEvidence(stepName, 'failed');
1654
2291
  }
1655
2292
  }
1656
2293
  this.emit({ type: 'run:cancelled', runId });
@@ -1690,6 +2327,8 @@ export class WorkflowRunner {
1690
2327
  this.lastIdleLog.clear();
1691
2328
  this.lastActivity.clear();
1692
2329
  this.supervisedRuntimeAgents.clear();
2330
+ this.runtimeStepAgents.clear();
2331
+ this.activeReviewers.clear();
1693
2332
 
1694
2333
  this.log('Shutting down broker...');
1695
2334
  await this.relay?.shutdown();
@@ -1824,6 +2463,9 @@ export class WorkflowRunner {
1824
2463
  attempts: (state?.row.retryCount ?? 0) + 1,
1825
2464
  output: state?.row.output,
1826
2465
  verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
2466
+ completionMode: state?.row.completionReason
2467
+ ? this.buildStepCompletionDecision(step.name, state.row.completionReason)?.mode
2468
+ : undefined,
1827
2469
  });
1828
2470
  }
1829
2471
  }
@@ -2029,13 +2671,24 @@ export class WorkflowRunner {
2029
2671
  const maxRetries = step.retries ?? errorHandling?.maxRetries ?? 0;
2030
2672
  const retryDelay = errorHandling?.retryDelayMs ?? 1000;
2031
2673
  let lastError: string | undefined;
2674
+ let lastCompletionReason: WorkflowStepCompletionReason | undefined;
2675
+ let lastExitCode: number | undefined;
2676
+ let lastExitSignal: string | undefined;
2032
2677
 
2033
2678
  for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
2034
2679
  this.checkAborted();
2035
2680
 
2681
+ lastExitCode = undefined;
2682
+ lastExitSignal = undefined;
2683
+
2036
2684
  if (attempt > 0) {
2037
2685
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
2038
2686
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
2687
+ this.recordStepToolSideEffect(step.name, {
2688
+ type: 'retry',
2689
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
2690
+ raw: { attempt, maxRetries },
2691
+ });
2039
2692
  state.row.retryCount = attempt;
2040
2693
  await this.db.updateStep(state.row.id, {
2041
2694
  retryCount: attempt,
@@ -2046,9 +2699,13 @@ export class WorkflowRunner {
2046
2699
 
2047
2700
  // Mark step as running
2048
2701
  state.row.status = 'running';
2702
+ state.row.error = undefined;
2703
+ state.row.completionReason = undefined;
2049
2704
  state.row.startedAt = new Date().toISOString();
2050
2705
  await this.db.updateStep(state.row.id, {
2051
2706
  status: 'running',
2707
+ error: undefined,
2708
+ completionReason: undefined,
2052
2709
  startedAt: state.row.startedAt,
2053
2710
  updatedAt: new Date().toISOString(),
2054
2711
  });
@@ -2068,11 +2725,13 @@ export class WorkflowRunner {
2068
2725
 
2069
2726
  // Resolve step workdir (named path reference) for deterministic steps
2070
2727
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2728
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
2071
2729
 
2072
2730
  try {
2073
2731
  // Delegate to executor if present
2074
2732
  if (this.executor?.executeDeterministicStep) {
2075
2733
  const result = await this.executor.executeDeterministicStep(step, resolvedCommand, stepCwd);
2734
+ lastExitCode = result.exitCode;
2076
2735
  const failOnError = step.failOnError !== false;
2077
2736
  if (failOnError && result.exitCode !== 0) {
2078
2737
  throw new Error(
@@ -2081,25 +2740,40 @@ export class WorkflowRunner {
2081
2740
  }
2082
2741
  const output =
2083
2742
  step.captureOutput !== false ? result.output : `Command completed (exit code ${result.exitCode})`;
2084
- if (step.verification) {
2085
- this.runVerification(step.verification, output, step.name);
2086
- }
2743
+ this.captureStepTerminalEvidence(
2744
+ step.name,
2745
+ { stdout: result.output, combined: result.output },
2746
+ { exitCode: result.exitCode }
2747
+ );
2748
+ const verificationResult = step.verification
2749
+ ? this.runVerification(step.verification, output, step.name)
2750
+ : undefined;
2087
2751
 
2088
2752
  // Mark completed
2089
2753
  state.row.status = 'completed';
2090
2754
  state.row.output = output;
2755
+ state.row.completionReason = verificationResult?.completionReason;
2091
2756
  state.row.completedAt = new Date().toISOString();
2092
2757
  await this.db.updateStep(state.row.id, {
2093
2758
  status: 'completed',
2094
2759
  output,
2760
+ completionReason: verificationResult?.completionReason,
2095
2761
  completedAt: state.row.completedAt,
2096
2762
  updatedAt: new Date().toISOString(),
2097
2763
  });
2098
2764
  await this.persistStepOutput(runId, step.name, output);
2099
2765
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2766
+ this.finalizeStepEvidence(
2767
+ step.name,
2768
+ 'completed',
2769
+ state.row.completedAt,
2770
+ verificationResult?.completionReason
2771
+ );
2100
2772
  return;
2101
2773
  }
2102
2774
 
2775
+ let commandStdout = '';
2776
+ let commandStderr = '';
2103
2777
  const output = await new Promise<string>((resolve, reject) => {
2104
2778
  const child = cpSpawn('sh', ['-c', resolvedCommand], {
2105
2779
  stdio: 'pipe',
@@ -2140,7 +2814,7 @@ export class WorkflowRunner {
2140
2814
  stderrChunks.push(chunk.toString());
2141
2815
  });
2142
2816
 
2143
- child.on('close', (code) => {
2817
+ child.on('close', (code, signal) => {
2144
2818
  if (timer) clearTimeout(timer);
2145
2819
  if (abortHandler && abortSignal) {
2146
2820
  abortSignal.removeEventListener('abort', abortHandler);
@@ -2160,6 +2834,10 @@ export class WorkflowRunner {
2160
2834
 
2161
2835
  const stdout = stdoutChunks.join('');
2162
2836
  const stderr = stderrChunks.join('');
2837
+ commandStdout = stdout;
2838
+ commandStderr = stderr;
2839
+ lastExitCode = code ?? undefined;
2840
+ lastExitSignal = signal ?? undefined;
2163
2841
 
2164
2842
  // Check exit code unless failOnError is explicitly false
2165
2843
  const failOnError = step.failOnError !== false;
@@ -2183,18 +2861,29 @@ export class WorkflowRunner {
2183
2861
  reject(new Error(`Failed to execute command: ${err.message}`));
2184
2862
  });
2185
2863
  });
2864
+ this.captureStepTerminalEvidence(
2865
+ step.name,
2866
+ {
2867
+ stdout: commandStdout || output,
2868
+ stderr: commandStderr,
2869
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
2870
+ },
2871
+ { exitCode: lastExitCode, exitSignal: lastExitSignal }
2872
+ );
2186
2873
 
2187
- if (step.verification) {
2188
- this.runVerification(step.verification, output, step.name);
2189
- }
2874
+ const verificationResult = step.verification
2875
+ ? this.runVerification(step.verification, output, step.name)
2876
+ : undefined;
2190
2877
 
2191
2878
  // Mark completed
2192
2879
  state.row.status = 'completed';
2193
2880
  state.row.output = output;
2881
+ state.row.completionReason = verificationResult?.completionReason;
2194
2882
  state.row.completedAt = new Date().toISOString();
2195
2883
  await this.db.updateStep(state.row.id, {
2196
2884
  status: 'completed',
2197
2885
  output,
2886
+ completionReason: verificationResult?.completionReason,
2198
2887
  completedAt: state.row.completedAt,
2199
2888
  updatedAt: new Date().toISOString(),
2200
2889
  });
@@ -2203,15 +2892,29 @@ export class WorkflowRunner {
2203
2892
  await this.persistStepOutput(runId, step.name, output);
2204
2893
 
2205
2894
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2895
+ this.finalizeStepEvidence(
2896
+ step.name,
2897
+ 'completed',
2898
+ state.row.completedAt,
2899
+ verificationResult?.completionReason
2900
+ );
2206
2901
  return;
2207
2902
  } catch (err) {
2208
2903
  lastError = err instanceof Error ? err.message : String(err);
2904
+ lastCompletionReason =
2905
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
2209
2906
  }
2210
2907
  }
2211
2908
 
2212
2909
  const errorMsg = lastError ?? 'Unknown error';
2213
2910
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
2214
- await this.markStepFailed(state, errorMsg, runId);
2911
+ await this.markStepFailed(
2912
+ state,
2913
+ errorMsg,
2914
+ runId,
2915
+ { exitCode: lastExitCode, exitSignal: lastExitSignal },
2916
+ lastCompletionReason
2917
+ );
2215
2918
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
2216
2919
  }
2217
2920
 
@@ -2227,14 +2930,20 @@ export class WorkflowRunner {
2227
2930
  ): Promise<void> {
2228
2931
  const state = stepStates.get(step.name);
2229
2932
  if (!state) throw new Error(`Step state not found: ${step.name}`);
2933
+ let lastExitCode: number | undefined;
2934
+ let lastExitSignal: string | undefined;
2230
2935
 
2231
2936
  this.checkAborted();
2232
2937
 
2233
2938
  // Mark step as running
2234
2939
  state.row.status = 'running';
2940
+ state.row.error = undefined;
2941
+ state.row.completionReason = undefined;
2235
2942
  state.row.startedAt = new Date().toISOString();
2236
2943
  await this.db.updateStep(state.row.id, {
2237
2944
  status: 'running',
2945
+ error: undefined,
2946
+ completionReason: undefined,
2238
2947
  startedAt: state.row.startedAt,
2239
2948
  updatedAt: new Date().toISOString(),
2240
2949
  });
@@ -2254,6 +2963,7 @@ export class WorkflowRunner {
2254
2963
 
2255
2964
  // Resolve workdir for worktree steps (same as deterministic/agent steps)
2256
2965
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2966
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
2257
2967
 
2258
2968
  if (!branch) {
2259
2969
  const errorMsg = 'Worktree step missing required "branch" field';
@@ -2298,6 +3008,10 @@ export class WorkflowRunner {
2298
3008
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
2299
3009
  }
2300
3010
 
3011
+ let commandStdout = '';
3012
+ let commandStderr = '';
3013
+ let commandExitCode: number | undefined;
3014
+ let commandExitSignal: string | undefined;
2301
3015
  const output = await new Promise<string>((resolve, reject) => {
2302
3016
  const child = cpSpawn('sh', ['-c', worktreeCmd], {
2303
3017
  stdio: 'pipe',
@@ -2338,7 +3052,7 @@ export class WorkflowRunner {
2338
3052
  stderrChunks.push(chunk.toString());
2339
3053
  });
2340
3054
 
2341
- child.on('close', (code) => {
3055
+ child.on('close', (code, signal) => {
2342
3056
  if (timer) clearTimeout(timer);
2343
3057
  if (abortHandler && abortSignal) {
2344
3058
  abortSignal.removeEventListener('abort', abortHandler);
@@ -2356,7 +3070,13 @@ export class WorkflowRunner {
2356
3070
  return;
2357
3071
  }
2358
3072
 
3073
+ commandStdout = stdoutChunks.join('');
2359
3074
  const stderr = stderrChunks.join('');
3075
+ commandStderr = stderr;
3076
+ commandExitCode = code ?? undefined;
3077
+ commandExitSignal = signal ?? undefined;
3078
+ lastExitCode = commandExitCode;
3079
+ lastExitSignal = commandExitSignal;
2360
3080
 
2361
3081
  if (code !== 0 && code !== null) {
2362
3082
  reject(
@@ -2379,6 +3099,15 @@ export class WorkflowRunner {
2379
3099
  reject(new Error(`Failed to execute git worktree command: ${err.message}`));
2380
3100
  });
2381
3101
  });
3102
+ this.captureStepTerminalEvidence(
3103
+ step.name,
3104
+ {
3105
+ stdout: commandStdout || output,
3106
+ stderr: commandStderr,
3107
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
3108
+ },
3109
+ { exitCode: commandExitCode, exitSignal: commandExitSignal }
3110
+ );
2382
3111
 
2383
3112
  // Mark completed
2384
3113
  state.row.status = 'completed';
@@ -2398,10 +3127,19 @@ export class WorkflowRunner {
2398
3127
  this.postToChannel(
2399
3128
  `**[${step.name}]** Worktree created at: ${output}\n Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`
2400
3129
  );
3130
+ this.recordStepToolSideEffect(step.name, {
3131
+ type: 'worktree_created',
3132
+ detail: `Worktree created at ${output}`,
3133
+ raw: { branch, createdBranch: !branchExists && createBranch },
3134
+ });
3135
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt);
2401
3136
  } catch (err) {
2402
3137
  const errorMsg = err instanceof Error ? err.message : String(err);
2403
3138
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
2404
- await this.markStepFailed(state, errorMsg, runId);
3139
+ await this.markStepFailed(state, errorMsg, runId, {
3140
+ exitCode: lastExitCode,
3141
+ exitSignal: lastExitSignal,
3142
+ });
2405
3143
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
2406
3144
  }
2407
3145
  }
@@ -2429,8 +3167,13 @@ export class WorkflowRunner {
2429
3167
  }
2430
3168
  const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
2431
3169
  const usesOwnerFlow = specialistDef.interactive !== false;
2432
- const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
2433
- const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
3170
+ const currentPattern = this.currentConfig?.swarm?.pattern ?? '';
3171
+ const isHubPattern = WorkflowRunner.HUB_PATTERNS.has(currentPattern);
3172
+ const usesAutoHardening = usesOwnerFlow && isHubPattern && !this.isExplicitInteractiveWorker(specialistDef);
3173
+ const ownerDef = usesAutoHardening ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
3174
+ // Reviewer resolution is deferred to just before the review gate runs (see below)
3175
+ // so that activeReviewers is up-to-date for concurrent steps.
3176
+ let reviewDef: ReturnType<typeof this.resolveAutoReviewAgent> | undefined;
2434
3177
  const supervised: SupervisedStep = {
2435
3178
  specialist: specialistDef,
2436
3179
  owner: ownerDef,
@@ -2454,7 +3197,13 @@ export class WorkflowRunner {
2454
3197
  let lastError: string | undefined;
2455
3198
  let lastExitCode: number | undefined;
2456
3199
  let lastExitSignal: string | undefined;
3200
+ let lastCompletionReason: WorkflowStepCompletionReason | undefined;
2457
3201
 
3202
+ // OWNER_DECISION: INCOMPLETE_RETRY is enforced here at the attempt-loop level so every
3203
+ // interactive execution path shares the same contract:
3204
+ // - retries remaining => throw back into the loop and retry
3205
+ // - maxRetries = 0 => fail immediately after the first retry request
3206
+ // - retry budget exhausted => fail with retry_requested_by_owner, never "completed"
2458
3207
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
2459
3208
  this.checkAborted();
2460
3209
 
@@ -2465,6 +3214,11 @@ export class WorkflowRunner {
2465
3214
  if (attempt > 0) {
2466
3215
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
2467
3216
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
3217
+ this.recordStepToolSideEffect(step.name, {
3218
+ type: 'retry',
3219
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
3220
+ raw: { attempt, maxRetries },
3221
+ });
2468
3222
  state.row.retryCount = attempt;
2469
3223
  await this.db.updateStep(state.row.id, {
2470
3224
  retryCount: attempt,
@@ -2477,16 +3231,21 @@ export class WorkflowRunner {
2477
3231
  try {
2478
3232
  // Mark step as running
2479
3233
  state.row.status = 'running';
3234
+ state.row.error = undefined;
3235
+ state.row.completionReason = undefined;
2480
3236
  state.row.startedAt = new Date().toISOString();
2481
3237
  await this.db.updateStep(state.row.id, {
2482
3238
  status: 'running',
3239
+ error: undefined,
3240
+ completionReason: undefined,
2483
3241
  startedAt: state.row.startedAt,
2484
3242
  updatedAt: new Date().toISOString(),
2485
3243
  });
2486
3244
  this.emit({ type: 'step:started', runId, stepName: step.name });
2487
- this.postToChannel(
2488
- `**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`
3245
+ this.log(
3246
+ `[${step.name}] Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`
2489
3247
  );
3248
+ this.initializeStepSignalParticipants(step.name, ownerDef.name, specialistDef.name);
2490
3249
  await this.trajectory?.stepStarted(step, ownerDef.name, {
2491
3250
  role: usesDedicatedOwner ? 'owner' : 'specialist',
2492
3251
  owner: ownerDef.name,
@@ -2539,10 +3298,21 @@ export class WorkflowRunner {
2539
3298
  };
2540
3299
  const effectiveSpecialist = applyStepWorkdir(specialistDef);
2541
3300
  const effectiveOwner = applyStepWorkdir(ownerDef);
3301
+ const effectiveReviewer = reviewDef ? applyStepWorkdir(reviewDef) : undefined;
3302
+ this.beginStepEvidence(
3303
+ step.name,
3304
+ [
3305
+ this.resolveAgentCwd(effectiveSpecialist),
3306
+ this.resolveAgentCwd(effectiveOwner),
3307
+ effectiveReviewer ? this.resolveAgentCwd(effectiveReviewer) : undefined,
3308
+ ],
3309
+ state.row.startedAt
3310
+ );
2542
3311
 
2543
3312
  let specialistOutput: string;
2544
3313
  let ownerOutput: string;
2545
3314
  let ownerElapsed: number;
3315
+ let completionReason: WorkflowStepCompletionReason | undefined;
2546
3316
 
2547
3317
  if (usesDedicatedOwner) {
2548
3318
  const result = await this.executeSupervisedAgentStep(
@@ -2554,60 +3324,162 @@ export class WorkflowRunner {
2554
3324
  specialistOutput = result.specialistOutput;
2555
3325
  ownerOutput = result.ownerOutput;
2556
3326
  ownerElapsed = result.ownerElapsed;
3327
+ completionReason = result.completionReason;
2557
3328
  } else {
2558
3329
  const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
3330
+ const explicitInteractiveWorker = this.isExplicitInteractiveWorker(effectiveOwner);
3331
+ let explicitWorkerHandle: Agent | undefined;
3332
+ let explicitWorkerCompleted = false;
3333
+ let explicitWorkerOutput = '';
2559
3334
 
2560
3335
  this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
2561
3336
  const resolvedStep = { ...step, task: ownerTask };
2562
3337
  const ownerStartTime = Date.now();
2563
3338
  const spawnResult = this.executor
2564
3339
  ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
2565
- : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
3340
+ : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, {
3341
+ evidenceStepName: step.name,
3342
+ evidenceRole: usesOwnerFlow ? 'owner' : 'specialist',
3343
+ preserveOnIdle: (!isHubPattern || !this.isLeadLikeAgent(effectiveOwner)) ? false : undefined,
3344
+ logicalName: effectiveOwner.name,
3345
+ onSpawned: explicitInteractiveWorker
3346
+ ? ({ agent }) => {
3347
+ explicitWorkerHandle = agent;
3348
+ }
3349
+ : undefined,
3350
+ onChunk: explicitInteractiveWorker
3351
+ ? ({ chunk }) => {
3352
+ explicitWorkerOutput += WorkflowRunner.stripAnsi(chunk);
3353
+ if (
3354
+ !explicitWorkerCompleted &&
3355
+ this.hasExplicitInteractiveWorkerCompletionEvidence(
3356
+ step,
3357
+ explicitWorkerOutput,
3358
+ ownerTask,
3359
+ resolvedTask
3360
+ )
3361
+ ) {
3362
+ explicitWorkerCompleted = true;
3363
+ void explicitWorkerHandle?.release().catch(() => undefined);
3364
+ }
3365
+ }
3366
+ : undefined,
3367
+ });
2566
3368
  const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
2567
3369
  lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
2568
3370
  lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
2569
3371
  ownerElapsed = Date.now() - ownerStartTime;
2570
3372
  this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
2571
3373
  if (usesOwnerFlow) {
2572
- this.assertOwnerCompletionMarker(step, output, ownerTask);
3374
+ try {
3375
+ const completionDecision = this.resolveOwnerCompletionDecision(
3376
+ step,
3377
+ output,
3378
+ output,
3379
+ ownerTask,
3380
+ resolvedTask
3381
+ );
3382
+ completionReason = completionDecision.completionReason;
3383
+ } catch (error) {
3384
+ const canUseVerificationFallback =
3385
+ !usesDedicatedOwner &&
3386
+ step.verification &&
3387
+ error instanceof WorkflowCompletionError &&
3388
+ error.completionReason === 'failed_no_evidence';
3389
+ if (!canUseVerificationFallback) {
3390
+ throw error;
3391
+ }
3392
+ }
2573
3393
  }
2574
3394
  specialistOutput = output;
2575
3395
  ownerOutput = output;
2576
3396
  }
2577
3397
 
2578
- // Run verification if configured
2579
- if (step.verification) {
2580
- this.runVerification(
3398
+ // Even non-interactive steps can emit an explicit OWNER_DECISION contract.
3399
+ // Honor retry/fail/clarification signals before verification-driven success so
3400
+ // real runs stay consistent with interactive owner flows.
3401
+ if (!usesOwnerFlow) {
3402
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
3403
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
3404
+ throw new WorkflowCompletionError(
3405
+ `Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3406
+ 'retry_requested_by_owner'
3407
+ );
3408
+ }
3409
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
3410
+ throw new WorkflowCompletionError(
3411
+ `Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3412
+ 'failed_owner_decision'
3413
+ );
3414
+ }
3415
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
3416
+ throw new WorkflowCompletionError(
3417
+ `Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
3418
+ 'retry_requested_by_owner'
3419
+ );
3420
+ }
3421
+ }
3422
+
3423
+ // Run verification if configured.
3424
+ // Self-owned interactive steps still need verification fallback so
3425
+ // explicit OWNER_DECISION output is not mandatory for the happy path.
3426
+ if (step.verification && (!usesOwnerFlow || !usesDedicatedOwner) && !completionReason) {
3427
+ const verificationResult = this.runVerification(
2581
3428
  step.verification,
2582
3429
  specialistOutput,
2583
3430
  step.name,
2584
3431
  effectiveOwner.interactive === false ? undefined : resolvedTask
2585
3432
  );
3433
+ completionReason = verificationResult.completionReason;
3434
+ }
3435
+
3436
+ // Retry-style owner decisions are control-flow signals, not terminal success states.
3437
+ // Guard here so they cannot accidentally fall through into review or completed-step
3438
+ // persistence if a future branch returns a completionReason instead of throwing.
3439
+ if (completionReason === 'retry_requested_by_owner') {
3440
+ throw new WorkflowCompletionError(
3441
+ `Step "${step.name}" owner requested another attempt`,
3442
+ 'retry_requested_by_owner'
3443
+ );
2586
3444
  }
2587
3445
 
2588
3446
  // Every interactive step gets a review pass; pick a dedicated reviewer when available.
3447
+ // Resolve reviewer JIT so activeReviewers reflects concurrent steps that started earlier.
3448
+ if (usesAutoHardening && usesDedicatedOwner && !reviewDef) {
3449
+ reviewDef = this.resolveAutoReviewAgent(ownerDef, agentMap);
3450
+ supervised.reviewer = reviewDef;
3451
+ }
2589
3452
  let combinedOutput = specialistOutput;
2590
3453
  if (usesOwnerFlow && reviewDef) {
2591
- const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
2592
- const reviewOutput = await this.runStepReviewGate(
2593
- step,
2594
- resolvedTask,
2595
- specialistOutput,
2596
- ownerOutput,
2597
- ownerDef,
2598
- reviewDef,
2599
- remainingMs
2600
- );
2601
- combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
3454
+ this.activeReviewers.set(reviewDef.name, (this.activeReviewers.get(reviewDef.name) ?? 0) + 1);
3455
+ try {
3456
+ const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
3457
+ const reviewOutput = await this.runStepReviewGate(
3458
+ step,
3459
+ resolvedTask,
3460
+ specialistOutput,
3461
+ ownerOutput,
3462
+ ownerDef,
3463
+ reviewDef,
3464
+ remainingMs
3465
+ );
3466
+ combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
3467
+ } finally {
3468
+ const count = (this.activeReviewers.get(reviewDef.name) ?? 1) - 1;
3469
+ if (count <= 0) this.activeReviewers.delete(reviewDef.name);
3470
+ else this.activeReviewers.set(reviewDef.name, count);
3471
+ }
2602
3472
  }
2603
3473
 
2604
3474
  // Mark completed
2605
3475
  state.row.status = 'completed';
2606
3476
  state.row.output = combinedOutput;
3477
+ state.row.completionReason = completionReason;
2607
3478
  state.row.completedAt = new Date().toISOString();
2608
3479
  await this.db.updateStep(state.row.id, {
2609
3480
  status: 'completed',
2610
3481
  output: combinedOutput,
3482
+ completionReason,
2611
3483
  completedAt: state.row.completedAt,
2612
3484
  updatedAt: new Date().toISOString(),
2613
3485
  });
@@ -2616,10 +3488,21 @@ export class WorkflowRunner {
2616
3488
  await this.persistStepOutput(runId, step.name, combinedOutput);
2617
3489
 
2618
3490
  this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput, exitCode: lastExitCode, exitSignal: lastExitSignal });
3491
+ this.finalizeStepEvidence(
3492
+ step.name,
3493
+ 'completed',
3494
+ state.row.completedAt,
3495
+ completionReason
3496
+ );
2619
3497
  await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
2620
3498
  return;
2621
3499
  } catch (err) {
2622
3500
  lastError = err instanceof Error ? err.message : String(err);
3501
+ lastCompletionReason =
3502
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
3503
+ if (lastCompletionReason === 'retry_requested_by_owner' && attempt >= maxRetries) {
3504
+ lastError = this.buildOwnerRetryBudgetExceededMessage(step.name, maxRetries, lastError);
3505
+ }
2623
3506
  if (err instanceof SpawnExitError) {
2624
3507
  lastExitCode = err.exitCode;
2625
3508
  lastExitSignal = err.exitSignal;
@@ -2649,20 +3532,49 @@ export class WorkflowRunner {
2649
3532
  await this.markStepFailed(state, lastError ?? 'Unknown error', runId, {
2650
3533
  exitCode: lastExitCode,
2651
3534
  exitSignal: lastExitSignal,
2652
- });
3535
+ }, lastCompletionReason);
2653
3536
  throw new Error(
2654
3537
  `Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`
2655
3538
  );
2656
3539
  }
2657
3540
 
2658
- private injectStepOwnerContract(
2659
- step: WorkflowStep,
2660
- resolvedTask: string,
2661
- ownerDef: AgentDefinition,
2662
- specialistDef: AgentDefinition
3541
+ private buildOwnerRetryBudgetExceededMessage(
3542
+ stepName: string,
3543
+ maxRetries: number,
3544
+ ownerDecisionError?: string
2663
3545
  ): string {
2664
- if (ownerDef.interactive === false) return resolvedTask;
2665
- const specialistNote =
3546
+ const attempts = maxRetries + 1;
3547
+ const prefix = `Step "${stepName}" `;
3548
+ const normalizedDecision = ownerDecisionError?.startsWith(prefix)
3549
+ ? ownerDecisionError.slice(prefix.length).trim()
3550
+ : ownerDecisionError?.trim();
3551
+ const decisionSuffix = normalizedDecision
3552
+ ? ` Latest owner decision: ${normalizedDecision}`
3553
+ : '';
3554
+
3555
+ if (maxRetries === 0) {
3556
+ return (
3557
+ `Step "${stepName}" owner requested another attempt, but no retries are configured ` +
3558
+ `(maxRetries=0). Configure retries > 0 to allow OWNER_DECISION: INCOMPLETE_RETRY.` +
3559
+ decisionSuffix
3560
+ );
3561
+ }
3562
+
3563
+ return (
3564
+ `Step "${stepName}" owner requested another attempt after ${attempts} total attempts, ` +
3565
+ `but the retry budget is exhausted (maxRetries=${maxRetries}).` +
3566
+ decisionSuffix
3567
+ );
3568
+ }
3569
+
3570
+ private injectStepOwnerContract(
3571
+ step: WorkflowStep,
3572
+ resolvedTask: string,
3573
+ ownerDef: AgentDefinition,
3574
+ specialistDef: AgentDefinition
3575
+ ): string {
3576
+ if (ownerDef.interactive === false) return resolvedTask;
3577
+ const specialistNote =
2666
3578
  ownerDef.name === specialistDef.name
2667
3579
  ? ''
2668
3580
  : `Specialist intended for this step: "${specialistDef.name}" (${specialistDef.role ?? specialistDef.cli}).`;
@@ -2673,7 +3585,10 @@ export class WorkflowRunner {
2673
3585
  `- You are the accountable owner for step "${step.name}".\n` +
2674
3586
  (specialistNote ? `- ${specialistNote}\n` : '') +
2675
3587
  `- If you delegate, you must still verify completion yourself.\n` +
2676
- `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
3588
+ `- Preferred final decision format:\n` +
3589
+ ` OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
3590
+ ` REASON: <one sentence>\n` +
3591
+ `- Legacy completion marker still supported: STEP_COMPLETE:${step.name}\n` +
2677
3592
  `- Then self-terminate immediately with /exit.`
2678
3593
  );
2679
3594
  }
@@ -2686,6 +3601,10 @@ export class WorkflowRunner {
2686
3601
  ): string {
2687
3602
  const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
2688
3603
  const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
3604
+ const channelContract = this.channel
3605
+ ? `- Prefer Relaycast/group-chat handoff signals over terminal sentinels: wait for the worker to post \`WORKER_DONE: <brief summary>\` in ${channelLine}\n` +
3606
+ `- When you have validated the handoff, post \`LEAD_DONE: <brief summary>\` to ${channelLine} before you exit\n`
3607
+ : '';
2689
3608
  return (
2690
3609
  `You are the step owner/supervisor for step "${step.name}".\n\n` +
2691
3610
  `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
@@ -2695,9 +3614,29 @@ export class WorkflowRunner {
2695
3614
  `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
2696
3615
  `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
2697
3616
  `- Ask the worker directly on ${channelLine} if you need a status update\n` +
3617
+ channelContract +
2698
3618
  verificationGuide +
2699
- `\nWhen you're satisfied the work is done correctly:\n` +
2700
- `Output exactly: STEP_COMPLETE:${step.name}`
3619
+ `\nWhen you have enough evidence, return:\n` +
3620
+ `OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
3621
+ `REASON: <one sentence>\n` +
3622
+ `Legacy completion marker still supported: STEP_COMPLETE:${step.name}`
3623
+ );
3624
+ }
3625
+
3626
+ private buildWorkerHandoffTask(
3627
+ step: WorkflowStep,
3628
+ originalTask: string,
3629
+ supervised: SupervisedStep
3630
+ ): string {
3631
+ if (!this.channel) return originalTask;
3632
+
3633
+ return (
3634
+ `${originalTask}\n\n---\n` +
3635
+ `WORKER COMPLETION CONTRACT:\n` +
3636
+ `- You are handing work off to owner "${supervised.owner.name}" for step "${step.name}".\n` +
3637
+ `- When your work is ready for review, post to #${this.channel}: \`WORKER_DONE: <brief summary>\`\n` +
3638
+ `- Do not rely on terminal output alone for handoff; use the workflow group chat signal above.\n` +
3639
+ `- After posting your handoff signal, self-terminate with /exit unless the owner asks for follow-up.`
2701
3640
  );
2702
3641
  }
2703
3642
 
@@ -2722,15 +3661,21 @@ export class WorkflowRunner {
2722
3661
  supervised: SupervisedStep,
2723
3662
  resolvedTask: string,
2724
3663
  timeoutMs?: number
2725
- ): Promise<{ specialistOutput: string; ownerOutput: string; ownerElapsed: number }> {
3664
+ ): Promise<{
3665
+ specialistOutput: string;
3666
+ ownerOutput: string;
3667
+ ownerElapsed: number;
3668
+ completionReason: WorkflowStepCompletionReason;
3669
+ }> {
2726
3670
  if (this.executor) {
3671
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
2727
3672
  const supervisorTask = this.buildOwnerSupervisorTask(
2728
3673
  step,
2729
3674
  resolvedTask,
2730
3675
  supervised,
2731
3676
  supervised.specialist.name
2732
3677
  );
2733
- const specialistStep = { ...step, task: resolvedTask };
3678
+ const specialistStep = { ...step, task: specialistTask };
2734
3679
  const ownerStep: WorkflowStep = {
2735
3680
  ...step,
2736
3681
  name: `${step.name}-owner`,
@@ -2744,7 +3689,7 @@ export class WorkflowRunner {
2744
3689
  const specialistPromise = this.executor.executeAgentStep(
2745
3690
  specialistStep,
2746
3691
  supervised.specialist,
2747
- resolvedTask,
3692
+ specialistTask,
2748
3693
  timeoutMs
2749
3694
  );
2750
3695
  // Guard against unhandled rejection if owner fails before specialist settles
@@ -2759,10 +3704,20 @@ export class WorkflowRunner {
2759
3704
  timeoutMs
2760
3705
  );
2761
3706
  const ownerElapsed = Date.now() - ownerStartTime;
2762
-
2763
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2764
3707
  const specialistOutput = await specialistPromise;
2765
- return { specialistOutput, ownerOutput, ownerElapsed };
3708
+ const completionDecision = this.resolveOwnerCompletionDecision(
3709
+ step,
3710
+ ownerOutput,
3711
+ specialistOutput,
3712
+ supervisorTask,
3713
+ resolvedTask
3714
+ );
3715
+ return {
3716
+ specialistOutput,
3717
+ ownerOutput,
3718
+ ownerElapsed,
3719
+ completionReason: completionDecision.completionReason,
3720
+ };
2766
3721
  } catch (error) {
2767
3722
  await specialistSettled;
2768
3723
  throw error;
@@ -2780,12 +3735,16 @@ export class WorkflowRunner {
2780
3735
  rejectWorkerSpawn = reject;
2781
3736
  });
2782
3737
 
2783
- const specialistStep = { ...step, task: resolvedTask };
3738
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
3739
+ const specialistStep = { ...step, task: specialistTask };
2784
3740
  this.log(
2785
3741
  `[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`
2786
3742
  );
2787
3743
  const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
2788
3744
  agentNameSuffix: 'worker',
3745
+ evidenceStepName: step.name,
3746
+ evidenceRole: 'worker',
3747
+ logicalName: supervised.specialist.name,
2789
3748
  onSpawned: ({ actualName, agent }) => {
2790
3749
  workerHandle = agent;
2791
3750
  workerRuntimeName = actualName;
@@ -2800,7 +3759,13 @@ export class WorkflowRunner {
2800
3759
  }
2801
3760
  },
2802
3761
  onChunk: ({ agentName, chunk }) => {
2803
- this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
3762
+ this.forwardAgentChunkToChannel(
3763
+ step.name,
3764
+ 'Worker',
3765
+ agentName,
3766
+ chunk,
3767
+ supervised.specialist.name
3768
+ );
2804
3769
  },
2805
3770
  }).catch((error) => {
2806
3771
  if (!workerSpawned) {
@@ -2814,10 +3779,15 @@ export class WorkflowRunner {
2814
3779
  workerPromise
2815
3780
  .then((result) => {
2816
3781
  workerReleased = true;
2817
- this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
3782
+ this.log(`[${step.name}] Worker ${workerRuntimeName} exited`);
3783
+ this.recordStepToolSideEffect(step.name, {
3784
+ type: 'worker_exit',
3785
+ detail: `Worker ${workerRuntimeName} exited`,
3786
+ raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
3787
+ });
2818
3788
  if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
2819
- this.postToChannel(
2820
- `**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`
3789
+ this.log(
3790
+ `[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`
2821
3791
  );
2822
3792
  }
2823
3793
  })
@@ -2826,6 +3796,11 @@ export class WorkflowRunner {
2826
3796
  this.postToChannel(
2827
3797
  `**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`
2828
3798
  );
3799
+ this.recordStepToolSideEffect(step.name, {
3800
+ type: 'worker_error',
3801
+ detail: `Worker ${workerRuntimeName} exited with error: ${message}`,
3802
+ raw: { worker: workerRuntimeName, error: message },
3803
+ });
2829
3804
  });
2830
3805
 
2831
3806
  await workerReady;
@@ -2844,6 +3819,9 @@ export class WorkflowRunner {
2844
3819
  try {
2845
3820
  const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
2846
3821
  agentNameSuffix: 'owner',
3822
+ evidenceStepName: step.name,
3823
+ evidenceRole: 'owner',
3824
+ logicalName: supervised.owner.name,
2847
3825
  onSpawned: ({ actualName }) => {
2848
3826
  this.supervisedRuntimeAgents.set(actualName, {
2849
3827
  stepName: step.name,
@@ -2858,10 +3836,20 @@ export class WorkflowRunner {
2858
3836
  const ownerElapsed = Date.now() - ownerStartTime;
2859
3837
  const ownerOutput = ownerResultObj.output;
2860
3838
  this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
2861
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2862
-
2863
3839
  const specialistOutput = (await workerPromise).output;
2864
- return { specialistOutput, ownerOutput, ownerElapsed };
3840
+ const completionDecision = this.resolveOwnerCompletionDecision(
3841
+ step,
3842
+ ownerOutput,
3843
+ specialistOutput,
3844
+ supervisorTask,
3845
+ resolvedTask
3846
+ );
3847
+ return {
3848
+ specialistOutput,
3849
+ ownerOutput,
3850
+ ownerElapsed,
3851
+ completionReason: completionDecision.completionReason,
3852
+ };
2865
3853
  } catch (error) {
2866
3854
  const message = error instanceof Error ? error.message : String(error);
2867
3855
  if (!workerReleased && workerHandle) {
@@ -2879,15 +3867,22 @@ export class WorkflowRunner {
2879
3867
  stepName: string,
2880
3868
  roleLabel: string,
2881
3869
  agentName: string,
2882
- chunk: string
3870
+ chunk: string,
3871
+ sender?: string
2883
3872
  ): void {
2884
- const lines = WorkflowRunner.stripAnsi(chunk)
3873
+ const lines = WorkflowRunner.scrubForChannel(chunk)
2885
3874
  .split('\n')
2886
3875
  .map((line) => line.trim())
2887
3876
  .filter(Boolean)
2888
3877
  .slice(0, 3);
2889
3878
  for (const line of lines) {
2890
- this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
3879
+ this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`, {
3880
+ stepName,
3881
+ sender,
3882
+ actor: agentName,
3883
+ role: roleLabel,
3884
+ origin: 'forwarded_chunk',
3885
+ });
2891
3886
  }
2892
3887
  }
2893
3888
 
@@ -2904,6 +3899,11 @@ export class WorkflowRunner {
2904
3899
  if (/STEP_COMPLETE:/i.test(stripped)) details.push('Declared the step complete');
2905
3900
 
2906
3901
  for (const detail of details) {
3902
+ this.recordStepToolSideEffect(step.name, {
3903
+ type: 'owner_monitoring',
3904
+ detail,
3905
+ raw: { output: stripped.slice(0, 240), owner: ownerDef.name },
3906
+ });
2907
3907
  await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
2908
3908
  output: stripped.slice(0, 240),
2909
3909
  });
@@ -2947,6 +3947,8 @@ export class WorkflowRunner {
2947
3947
  agentMap: Map<string, AgentDefinition>
2948
3948
  ): AgentDefinition {
2949
3949
  const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
3950
+ const eligible = (def: AgentDefinition): boolean =>
3951
+ def.name !== ownerDef.name && !this.isExplicitInteractiveWorker(def);
2950
3952
  const isReviewer = (def: AgentDefinition): boolean => {
2951
3953
  const roleLC = def.role?.toLowerCase() ?? '';
2952
3954
  const nameLC = def.name.toLowerCase();
@@ -2968,35 +3970,343 @@ export class WorkflowRunner {
2968
3970
  if (roleLC.includes('critic')) return 2;
2969
3971
  return isReviewer(def) ? 1 : 0;
2970
3972
  };
2971
- const dedicated = allDefs
2972
- .filter((d) => d.name !== ownerDef.name && isReviewer(d))
2973
- .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
3973
+ // Prefer agents not currently assigned as reviewers to avoid double-booking
3974
+ const notBusy = (def: AgentDefinition): boolean => !this.activeReviewers.has(def.name);
3975
+
3976
+ const dedicatedCandidates = allDefs
3977
+ .filter((d) => eligible(d) && isReviewer(d))
3978
+ .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name));
3979
+ const dedicated = dedicatedCandidates.find(notBusy) ?? dedicatedCandidates[0];
2974
3980
  if (dedicated) return dedicated;
2975
3981
 
2976
- const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
3982
+ const alternateCandidates = allDefs.filter((d) => eligible(d) && d.interactive !== false);
3983
+ const alternate = alternateCandidates.find(notBusy) ?? alternateCandidates[0];
2977
3984
  if (alternate) return alternate;
2978
3985
 
2979
3986
  // Self-review fallback — log a warning since owner reviewing itself is weak.
2980
3987
  return ownerDef;
2981
3988
  }
2982
3989
 
2983
- private assertOwnerCompletionMarker(step: WorkflowStep, output: string, injectedTaskText: string): void {
3990
+ private isExplicitInteractiveWorker(agentDef: AgentDefinition): boolean {
3991
+ return agentDef.preset === 'worker' && agentDef.interactive !== false;
3992
+ }
3993
+
3994
+ private resolveOwnerCompletionDecision(
3995
+ step: WorkflowStep,
3996
+ ownerOutput: string,
3997
+ specialistOutput: string,
3998
+ injectedTaskText: string,
3999
+ verificationTaskText?: string
4000
+ ): CompletionDecisionResult {
4001
+ const hasMarker = this.hasOwnerCompletionMarker(step, ownerOutput, injectedTaskText);
4002
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
4003
+
4004
+ // INCOMPLETE_RETRY / NEEDS_CLARIFICATION are non-terminal owner outcomes. They never mark
4005
+ // the step complete here; instead they throw back to executeAgentStep(), which decides
4006
+ // whether to retry or fail based on the remaining retry budget for this step.
4007
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
4008
+ throw new WorkflowCompletionError(
4009
+ `Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
4010
+ 'retry_requested_by_owner'
4011
+ );
4012
+ }
4013
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
4014
+ throw new WorkflowCompletionError(
4015
+ `Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
4016
+ 'failed_owner_decision'
4017
+ );
4018
+ }
4019
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
4020
+ throw new WorkflowCompletionError(
4021
+ `Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`,
4022
+ 'retry_requested_by_owner'
4023
+ );
4024
+ }
4025
+
4026
+ const verificationResult = step.verification
4027
+ ? this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, {
4028
+ allowFailure: true,
4029
+ completionMarkerFound: hasMarker,
4030
+ })
4031
+ : { passed: false };
4032
+
4033
+ if (verificationResult.error) {
4034
+ throw new WorkflowCompletionError(
4035
+ `Step "${step.name}" verification failed and no owner decision or evidence established completion: ${verificationResult.error}`,
4036
+ 'failed_verification'
4037
+ );
4038
+ }
4039
+
4040
+ if (explicitOwnerDecision?.decision === 'COMPLETE') {
4041
+ if (!hasMarker) {
4042
+ this.log(
4043
+ `[${step.name}] Structured OWNER_DECISION completed the step without legacy STEP_COMPLETE marker`
4044
+ );
4045
+ }
4046
+ return {
4047
+ completionReason: 'completed_by_owner_decision',
4048
+ ownerDecision: explicitOwnerDecision.decision,
4049
+ reason: explicitOwnerDecision.reason,
4050
+ };
4051
+ }
4052
+ if (verificationResult.passed) {
4053
+ return { completionReason: 'completed_verified' };
4054
+ }
4055
+
4056
+ const ownerDecision = this.parseOwnerDecision(step, ownerOutput, hasMarker);
4057
+ if (ownerDecision?.decision === 'COMPLETE') {
4058
+ return {
4059
+ completionReason: 'completed_by_owner_decision',
4060
+ ownerDecision: ownerDecision.decision,
4061
+ reason: ownerDecision.reason,
4062
+ };
4063
+ }
4064
+
4065
+ if (!explicitOwnerDecision) {
4066
+ const evidenceReason = this.judgeOwnerCompletionByEvidence(step.name, ownerOutput);
4067
+ if (evidenceReason) {
4068
+ if (!hasMarker) {
4069
+ this.log(
4070
+ `[${step.name}] Evidence-based completion resolved without legacy STEP_COMPLETE marker`
4071
+ );
4072
+ }
4073
+ return {
4074
+ completionReason: 'completed_by_evidence',
4075
+ reason: evidenceReason,
4076
+ };
4077
+ }
4078
+ }
4079
+
4080
+ // Process-exit fallback: if the agent exited cleanly (code 0) and verification
4081
+ // passes (or no verification is configured), infer completion rather than failing.
4082
+ // This reduces dependence on agents posting exact coordination signals.
4083
+ const processExitFallback = this.tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput);
4084
+ if (processExitFallback) {
4085
+ this.log(
4086
+ `[${step.name}] Completion inferred from clean process exit (code 0)` +
4087
+ (step.verification ? ' + verification passed' : '') +
4088
+ ' — no coordination signal was required'
4089
+ );
4090
+ return processExitFallback;
4091
+ }
4092
+
4093
+ throw new WorkflowCompletionError(
4094
+ `Step "${step.name}" owner completion decision missing: no OWNER_DECISION, legacy STEP_COMPLETE marker, or evidence-backed completion signal`,
4095
+ 'failed_no_evidence'
4096
+ );
4097
+ }
4098
+
4099
+ private hasExplicitInteractiveWorkerCompletionEvidence(
4100
+ step: WorkflowStep,
4101
+ output: string,
4102
+ injectedTaskText: string,
4103
+ verificationTaskText: string
4104
+ ): boolean {
4105
+ try {
4106
+ this.resolveOwnerCompletionDecision(step, output, output, injectedTaskText, verificationTaskText);
4107
+ return true;
4108
+ } catch {
4109
+ return false;
4110
+ }
4111
+ }
4112
+
4113
+ private hasOwnerCompletionMarker(
4114
+ step: WorkflowStep,
4115
+ output: string,
4116
+ injectedTaskText: string
4117
+ ): boolean {
2984
4118
  const marker = `STEP_COMPLETE:${step.name}`;
2985
4119
  const taskHasMarker = injectedTaskText.includes(marker);
2986
4120
  const first = output.indexOf(marker);
2987
4121
  if (first === -1) {
2988
- throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
4122
+ return false;
2989
4123
  }
2990
- // PTY output includes injected task text, so require a second marker occurrence
2991
- // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
4124
+ // PTY output often includes echoed prompt text, so when the injected task
4125
+ // itself contains the legacy marker require a second occurrence from the
4126
+ // agent response.
2992
4127
  const outputLikelyContainsInjectedPrompt =
2993
- output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
4128
+ output.includes('STEP OWNER CONTRACT') ||
4129
+ output.includes('Preferred final decision format') ||
4130
+ output.includes('Legacy completion marker still supported') ||
4131
+ output.includes('Output exactly: STEP_COMPLETE:');
2994
4132
  if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
2995
- const hasSecond = output.includes(marker, first + marker.length);
2996
- if (!hasSecond) {
2997
- throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
2998
- }
4133
+ return output.includes(marker, first + marker.length);
2999
4134
  }
4135
+ return true;
4136
+ }
4137
+
4138
+ private parseOwnerDecision(
4139
+ step: WorkflowStep,
4140
+ ownerOutput: string,
4141
+ hasMarker: boolean
4142
+ ): { decision: WorkflowOwnerDecision; reason?: string } | null {
4143
+ const decisionPattern =
4144
+ /OWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi;
4145
+ const decisionMatches = [...ownerOutput.matchAll(decisionPattern)];
4146
+ const outputLikelyContainsEchoedPrompt =
4147
+ ownerOutput.includes('STEP OWNER CONTRACT') ||
4148
+ ownerOutput.includes('Preferred final decision format') ||
4149
+ ownerOutput.includes('one of COMPLETE, INCOMPLETE_RETRY') ||
4150
+ ownerOutput.includes('COMPLETE|INCOMPLETE_RETRY');
4151
+
4152
+ if (decisionMatches.length === 0) {
4153
+ if (!hasMarker) return null;
4154
+ return {
4155
+ decision: 'COMPLETE',
4156
+ reason: `Legacy completion marker observed: STEP_COMPLETE:${step.name}`,
4157
+ };
4158
+ }
4159
+
4160
+ // Filter out matches that appear on a template/instruction line (e.g.
4161
+ // "COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION") to avoid
4162
+ // picking up the template format as the agent's actual decision.
4163
+ const realMatches = outputLikelyContainsEchoedPrompt
4164
+ ? decisionMatches.filter((m) => {
4165
+ const lineStart = ownerOutput.lastIndexOf('\n', m.index!) + 1;
4166
+ const lineEnd = ownerOutput.indexOf('\n', m.index!);
4167
+ const line = ownerOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
4168
+ return !line.includes('COMPLETE|INCOMPLETE_RETRY');
4169
+ })
4170
+ : decisionMatches;
4171
+ const decisionMatch =
4172
+ realMatches.length > 0
4173
+ ? realMatches[realMatches.length - 1]
4174
+ : decisionMatches[decisionMatches.length - 1];
4175
+ const decision = decisionMatch?.[1]?.toUpperCase() as WorkflowOwnerDecision | undefined;
4176
+ if (
4177
+ decision !== 'COMPLETE' &&
4178
+ decision !== 'INCOMPLETE_RETRY' &&
4179
+ decision !== 'INCOMPLETE_FAIL' &&
4180
+ decision !== 'NEEDS_CLARIFICATION'
4181
+ ) {
4182
+ return null;
4183
+ }
4184
+
4185
+ const reasonPattern = /(?:^|\n)REASON:\s*(.+)/gi;
4186
+ const reasonMatches = [...ownerOutput.matchAll(reasonPattern)];
4187
+ const reasonMatch =
4188
+ outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
4189
+ ? reasonMatches[reasonMatches.length - 1]
4190
+ : reasonMatches[0];
4191
+ const reason = reasonMatch?.[1]?.trim();
4192
+
4193
+ return {
4194
+ decision,
4195
+ reason: reason && reason !== '<one sentence>' ? reason : undefined,
4196
+ };
4197
+ }
4198
+
4199
+ private stripEchoedPromptLines(output: string, patterns: RegExp[]): string {
4200
+ return output
4201
+ .split('\n')
4202
+ .map((line) => line.trim())
4203
+ .filter(Boolean)
4204
+ .filter((line) => patterns.every((pattern) => !pattern.test(line)))
4205
+ .join('\n');
4206
+ }
4207
+
4208
+ private firstMeaningfulLine(output: string): string | undefined {
4209
+ return output
4210
+ .split('\n')
4211
+ .map((line) => line.trim())
4212
+ .find(Boolean);
4213
+ }
4214
+
4215
+ private judgeOwnerCompletionByEvidence(stepName: string, ownerOutput: string): string | null {
4216
+ // Never infer completion when the raw output contains an explicit retry/fail/clarification signal.
4217
+ if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
4218
+ return null;
4219
+ }
4220
+ const sanitized = this.stripEchoedPromptLines(ownerOutput, [
4221
+ /^STEP OWNER CONTRACT:?$/i,
4222
+ /^Preferred final decision format:?$/i,
4223
+ /^OWNER_DECISION:\s*(?:COMPLETE\|INCOMPLETE_RETRY|<one of COMPLETE, INCOMPLETE_RETRY)/i,
4224
+ /^REASON:\s*<one sentence>$/i,
4225
+ /^Legacy completion marker still supported:/i,
4226
+ /^STEP_COMPLETE:/i,
4227
+ ]);
4228
+ if (!sanitized) return null;
4229
+
4230
+ const hasExplicitSelfRelease =
4231
+ /Calling\s+(?:[\w.-]+\.)?remove_agent\(\{[^<\n]*"reason":"task completed"/i.test(
4232
+ sanitized
4233
+ );
4234
+ const hasPositiveConclusion =
4235
+ /\b(complete(?:d)?|done|verified|looks correct|safe handoff|artifact verified)\b/i.test(
4236
+ sanitized
4237
+ ) ||
4238
+ /\bartifacts?\b.*\b(correct|verified|complete)\b/i.test(sanitized) ||
4239
+ hasExplicitSelfRelease;
4240
+ const evidence = this.getStepCompletionEvidence(stepName);
4241
+ const hasValidatedCoordinationSignal =
4242
+ evidence?.coordinationSignals.some(
4243
+ (signal) =>
4244
+ signal.kind === 'worker_done' ||
4245
+ signal.kind === 'lead_done' ||
4246
+ signal.kind === 'verification_passed' ||
4247
+ (signal.kind === 'process_exit' && signal.value === '0')
4248
+ ) ?? false;
4249
+ const hasValidatedInspectionSignal =
4250
+ evidence?.toolSideEffects.some(
4251
+ (effect) =>
4252
+ effect.type === 'owner_monitoring' &&
4253
+ (/Checked git diff stats/i.test(effect.detail) ||
4254
+ /Listed files for verification/i.test(effect.detail))
4255
+ ) ?? false;
4256
+ const hasEvidenceSignal = hasValidatedCoordinationSignal || hasValidatedInspectionSignal;
4257
+
4258
+ if (!hasPositiveConclusion || !hasEvidenceSignal) {
4259
+ return null;
4260
+ }
4261
+
4262
+ return this.firstMeaningfulLine(sanitized) ?? 'Evidence-backed completion';
4263
+ }
4264
+
4265
+ /**
4266
+ * Process-exit fallback: when agent exits with code 0 but posts no coordination
4267
+ * signal, check if verification passes (or no verification is configured) and
4268
+ * infer completion. This is the key mechanism for reducing agent compliance
4269
+ * dependence — the runner trusts a clean exit + passing verification over
4270
+ * requiring exact signal text.
4271
+ */
4272
+ private tryProcessExitFallback(
4273
+ step: WorkflowStep,
4274
+ specialistOutput: string,
4275
+ verificationTaskText?: string,
4276
+ ownerOutput?: string
4277
+ ): CompletionDecisionResult | null {
4278
+ const gracePeriodMs = this.currentConfig?.swarm.completionGracePeriodMs ?? 5000;
4279
+ if (gracePeriodMs === 0) return null;
4280
+
4281
+ // Never infer completion when the owner explicitly requested retry/fail/clarification.
4282
+ if (ownerOutput && /OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
4283
+ return null;
4284
+ }
4285
+
4286
+ const evidence = this.getStepCompletionEvidence(step.name);
4287
+ const hasCleanExit = evidence?.coordinationSignals.some(
4288
+ (signal) =>
4289
+ signal.kind === 'process_exit' && signal.value === '0'
4290
+ ) ?? false;
4291
+
4292
+ if (!hasCleanExit) return null;
4293
+
4294
+ // If verification is configured, it must pass for the fallback to succeed.
4295
+ if (step.verification) {
4296
+ const verificationResult = this.runVerification(
4297
+ step.verification,
4298
+ specialistOutput,
4299
+ step.name,
4300
+ verificationTaskText,
4301
+ { allowFailure: true }
4302
+ );
4303
+ if (!verificationResult.passed) return null;
4304
+ }
4305
+
4306
+ return {
4307
+ completionReason: 'completed_by_process_exit',
4308
+ reason: `Process exited with code 0${step.verification ? ' and verification passed' : ''} — coordination signal not required`,
4309
+ };
3000
4310
  }
3001
4311
 
3002
4312
  private async runStepReviewGate(
@@ -3052,7 +4362,17 @@ export class WorkflowRunner {
3052
4362
 
3053
4363
  await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
3054
4364
  this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
4365
+ this.recordStepToolSideEffect(step.name, {
4366
+ type: 'review_started',
4367
+ detail: `Review started with ${reviewerDef.name}`,
4368
+ raw: { reviewer: reviewerDef.name },
4369
+ });
3055
4370
  const emitReviewCompleted = async (decision: 'approved' | 'rejected', reason?: string) => {
4371
+ this.recordStepToolSideEffect(step.name, {
4372
+ type: 'review_completed',
4373
+ detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`,
4374
+ raw: { reviewer: reviewerDef.name, decision, reason },
4375
+ });
3056
4376
  await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
3057
4377
  this.emit({
3058
4378
  type: 'step:review-completed',
@@ -3108,6 +4428,9 @@ export class WorkflowRunner {
3108
4428
 
3109
4429
  try {
3110
4430
  await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
4431
+ evidenceStepName: step.name,
4432
+ evidenceRole: 'reviewer',
4433
+ logicalName: reviewerDef.name,
3111
4434
  onSpawned: ({ agent }) => {
3112
4435
  reviewerHandle = agent;
3113
4436
  },
@@ -3153,6 +4476,22 @@ export class WorkflowRunner {
3153
4476
 
3154
4477
  private parseReviewDecision(
3155
4478
  reviewOutput: string
4479
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
4480
+ const strict = this.parseStrictReviewDecision(reviewOutput);
4481
+ if (strict) {
4482
+ return strict;
4483
+ }
4484
+
4485
+ const tolerant = this.parseTolerantReviewDecision(reviewOutput);
4486
+ if (tolerant) {
4487
+ return tolerant;
4488
+ }
4489
+
4490
+ return this.judgeReviewDecisionFromEvidence(reviewOutput);
4491
+ }
4492
+
4493
+ private parseStrictReviewDecision(
4494
+ reviewOutput: string
3156
4495
  ): { decision: 'approved' | 'rejected'; reason?: string } | null {
3157
4496
  const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
3158
4497
  const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
@@ -3162,10 +4501,18 @@ export class WorkflowRunner {
3162
4501
 
3163
4502
  const outputLikelyContainsEchoedPrompt =
3164
4503
  reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
4504
+ const realReviewMatches = outputLikelyContainsEchoedPrompt
4505
+ ? decisionMatches.filter((m) => {
4506
+ const lineStart = reviewOutput.lastIndexOf('\n', m.index!) + 1;
4507
+ const lineEnd = reviewOutput.indexOf('\n', m.index!);
4508
+ const line = reviewOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
4509
+ return !line.includes('APPROVE or REJECT');
4510
+ })
4511
+ : decisionMatches;
3165
4512
  const decisionMatch =
3166
- outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
3167
- ? decisionMatches[decisionMatches.length - 1]
3168
- : decisionMatches[0];
4513
+ realReviewMatches.length > 0
4514
+ ? realReviewMatches[realReviewMatches.length - 1]
4515
+ : decisionMatches[decisionMatches.length - 1];
3169
4516
  const decision = decisionMatch?.[1]?.toUpperCase();
3170
4517
  if (decision !== 'APPROVE' && decision !== 'REJECT') {
3171
4518
  return null;
@@ -3185,6 +4532,115 @@ export class WorkflowRunner {
3185
4532
  };
3186
4533
  }
3187
4534
 
4535
+ private parseTolerantReviewDecision(
4536
+ reviewOutput: string
4537
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
4538
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
4539
+ /^Return exactly:?$/i,
4540
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
4541
+ /^REVIEW_REASON:\s*<one sentence>$/i,
4542
+ ]);
4543
+ if (!sanitized) {
4544
+ return null;
4545
+ }
4546
+
4547
+ const lines = sanitized
4548
+ .split('\n')
4549
+ .map((line) => line.trim())
4550
+ .filter(Boolean);
4551
+ for (const line of lines) {
4552
+ const candidate = line.replace(/^REVIEW_DECISION:\s*/i, '').trim();
4553
+ const decision = this.normalizeReviewDecisionCandidate(candidate);
4554
+ if (decision) {
4555
+ return {
4556
+ decision,
4557
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4558
+ };
4559
+ }
4560
+ }
4561
+
4562
+ const decision = this.normalizeReviewDecisionCandidate(lines.join(' '));
4563
+ if (!decision) {
4564
+ return null;
4565
+ }
4566
+
4567
+ return {
4568
+ decision,
4569
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4570
+ };
4571
+ }
4572
+
4573
+ private normalizeReviewDecisionCandidate(candidate: string): 'approved' | 'rejected' | null {
4574
+ const value = candidate.trim().toLowerCase();
4575
+ if (!value) return null;
4576
+
4577
+ if (
4578
+ /^(approve|approved|complete|completed|pass|passed|accept|accepted|lgtm|ship it|looks good|looks fine)\b/i.test(
4579
+ value
4580
+ )
4581
+ ) {
4582
+ return 'approved';
4583
+ }
4584
+ if (
4585
+ /^(reject|rejected|retry|retry requested|fail|failed|incomplete|needs clarification|not complete|not ready|insufficient evidence)\b/i.test(
4586
+ value
4587
+ )
4588
+ ) {
4589
+ return 'rejected';
4590
+ }
4591
+ return null;
4592
+ }
4593
+
4594
+ private parseReviewReason(reviewOutput: string): string | undefined {
4595
+ const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
4596
+ const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
4597
+ const outputLikelyContainsEchoedPrompt =
4598
+ reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
4599
+ const reasonMatch =
4600
+ outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
4601
+ ? reasonMatches[reasonMatches.length - 1]
4602
+ : reasonMatches[0];
4603
+ const reason = reasonMatch?.[1]?.trim();
4604
+ return reason && reason !== '<one sentence>' ? reason : undefined;
4605
+ }
4606
+
4607
+ private judgeReviewDecisionFromEvidence(
4608
+ reviewOutput: string
4609
+ ): { decision: 'approved' | 'rejected'; reason?: string } | null {
4610
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
4611
+ /^Return exactly:?$/i,
4612
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
4613
+ /^REVIEW_REASON:\s*<one sentence>$/i,
4614
+ ]);
4615
+ if (!sanitized) {
4616
+ return null;
4617
+ }
4618
+
4619
+ const hasPositiveEvidence =
4620
+ /\b(approved?|complete(?:d)?|verified|looks good|looks fine|safe handoff|pass(?:ed)?)\b/i.test(
4621
+ sanitized
4622
+ );
4623
+ const hasNegativeEvidence =
4624
+ /\b(reject(?:ed)?|retry|fail(?:ed)?|incomplete|missing checks|insufficient evidence|not safe)\b/i.test(
4625
+ sanitized
4626
+ );
4627
+
4628
+ if (hasNegativeEvidence) {
4629
+ return {
4630
+ decision: 'rejected',
4631
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4632
+ };
4633
+ }
4634
+ if (!hasPositiveEvidence) {
4635
+ return null;
4636
+ }
4637
+
4638
+ return {
4639
+ decision: 'approved',
4640
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
4641
+ };
4642
+ }
4643
+
3188
4644
  private combineStepAndReviewOutput(stepOutput: string, reviewOutput: string): string {
3189
4645
  const primary = stepOutput.trimEnd();
3190
4646
  const review = reviewOutput.trim();
@@ -3261,7 +4717,7 @@ export class WorkflowRunner {
3261
4717
  return (
3262
4718
  'You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
3263
4719
  'Do NOT use mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn sub-agents.\n' +
3264
- 'Do NOT use mcp__relaycast__dm_send or any Relaycast messaging tools — you have no relay connection.\n\n'
4720
+ 'Do NOT use mcp__relaycast__message_dm_send or any Relaycast messaging tools — you have no relay connection.\n\n'
3265
4721
  );
3266
4722
  case 'reviewer':
3267
4723
  return (
@@ -3462,10 +4918,21 @@ export class WorkflowRunner {
3462
4918
  });
3463
4919
  });
3464
4920
 
4921
+ this.captureStepTerminalEvidence(step.name, {}, { exitCode, exitSignal });
3465
4922
  return { output, exitCode, exitSignal };
3466
4923
  } finally {
3467
- const combinedOutput = stdoutChunks.join('') + stderrChunks.join('');
4924
+ const stdout = stdoutChunks.join('');
4925
+ const stderr = stderrChunks.join('');
4926
+ const combinedOutput = stdout + stderr;
3468
4927
  this.lastFailedStepOutput.set(step.name, combinedOutput);
4928
+ this.captureStepTerminalEvidence(
4929
+ step.name,
4930
+ {
4931
+ stdout,
4932
+ stderr,
4933
+ combined: combinedOutput,
4934
+ }
4935
+ );
3469
4936
  stopHeartbeat?.();
3470
4937
  logStream.end();
3471
4938
  this.unregisterWorker(agentName);
@@ -3487,6 +4954,8 @@ export class WorkflowRunner {
3487
4954
  throw new Error('AgentRelay not initialized');
3488
4955
  }
3489
4956
 
4957
+ const evidenceStepName = options.evidenceStepName ?? step.name;
4958
+
3490
4959
  // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
3491
4960
  const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
3492
4961
  let agentName = requestedName;
@@ -3538,18 +5007,24 @@ export class WorkflowRunner {
3538
5007
 
3539
5008
  const agentChannels = this.channel ? [this.channel] : agentDef.channels;
3540
5009
 
3541
- let agent: Awaited<ReturnType<typeof this.relay.spawnPty>>;
5010
+ let agent: Awaited<ReturnType<typeof this.relay.spawnPty>> | undefined;
3542
5011
  let exitResult: string = 'unknown';
3543
5012
  let stopHeartbeat: (() => void) | undefined;
3544
5013
  let ptyChunks: string[] = [];
3545
5014
 
3546
5015
  try {
3547
5016
  const agentCwd = this.resolveAgentCwd(agentDef);
5017
+ const interactiveSpawnPolicy = resolveSpawnPolicy({
5018
+ AGENT_NAME: agentName,
5019
+ AGENT_CLI: agentDef.cli,
5020
+ RELAY_API_KEY: this.relayApiKey ?? 'workflow-runner',
5021
+ AGENT_CHANNELS: (agentChannels ?? []).join(','),
5022
+ });
3548
5023
  agent = await this.relay.spawnPty({
3549
5024
  name: agentName,
3550
5025
  cli: agentDef.cli,
3551
5026
  model: agentDef.constraints?.model,
3552
- args: [],
5027
+ args: interactiveSpawnPolicy.args,
3553
5028
  channels: agentChannels,
3554
5029
  task: taskWithExit,
3555
5030
  idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
@@ -3584,18 +5059,36 @@ export class WorkflowRunner {
3584
5059
  const oldListener = this.ptyListeners.get(oldName);
3585
5060
  if (oldListener) {
3586
5061
  this.ptyListeners.delete(oldName);
3587
- this.ptyListeners.set(agent.name, (chunk: string) => {
5062
+ const resolvedAgentName = agent.name;
5063
+ this.ptyListeners.set(resolvedAgentName, (chunk: string) => {
3588
5064
  const stripped = WorkflowRunner.stripAnsi(chunk);
3589
- this.ptyOutputBuffers.get(agent.name)?.push(stripped);
5065
+ this.ptyOutputBuffers.get(resolvedAgentName)?.push(stripped);
3590
5066
  newLogStream.write(chunk);
3591
- options.onChunk?.({ agentName: agent.name, chunk });
5067
+ options.onChunk?.({ agentName: resolvedAgentName, chunk });
3592
5068
  });
3593
5069
  }
3594
5070
 
3595
5071
  agentName = agent.name;
3596
5072
  }
3597
5073
 
3598
- await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
5074
+ const liveAgent = agent;
5075
+ await options.onSpawned?.({ requestedName, actualName: liveAgent.name, agent: liveAgent });
5076
+ this.runtimeStepAgents.set(liveAgent.name, {
5077
+ stepName: evidenceStepName,
5078
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
5079
+ logicalName: options.logicalName ?? agentDef.name,
5080
+ });
5081
+ const signalParticipant = this.resolveSignalParticipantKind(
5082
+ options.evidenceRole ?? agentDef.role ?? 'agent'
5083
+ );
5084
+ if (signalParticipant) {
5085
+ this.rememberStepSignalSender(
5086
+ evidenceStepName,
5087
+ signalParticipant,
5088
+ liveAgent.name,
5089
+ options.logicalName ?? agentDef.name
5090
+ );
5091
+ }
3599
5092
 
3600
5093
  // Register in workers.json so `agents:kill` can find this agent
3601
5094
  let workerPid: number | undefined;
@@ -3610,11 +5103,11 @@ export class WorkflowRunner {
3610
5103
  // Register the spawned agent in Relaycast for observability + start heartbeat
3611
5104
  if (this.relayApiKey) {
3612
5105
  const agentClient = await this.registerRelaycastExternalAgent(
3613
- agent.name,
5106
+ liveAgent.name,
3614
5107
  `Workflow agent for step "${step.name}" (${agentDef.cli})`
3615
5108
  ).catch((err) => {
3616
5109
  console.warn(
3617
- `[WorkflowRunner] Failed to register ${agent.name} in Relaycast:`,
5110
+ `[WorkflowRunner] Failed to register ${liveAgent.name} in Relaycast:`,
3618
5111
  err?.message ?? err
3619
5112
  );
3620
5113
  return null;
@@ -3632,32 +5125,50 @@ export class WorkflowRunner {
3632
5125
  await channelAgent?.channels.invite(this.channel, agent.name).catch(() => {});
3633
5126
  }
3634
5127
 
3635
- // Post assignment notification (no task content task arrives via direct broker injection)
3636
- this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\``);
5128
+ // Keep operational assignment chatter out of the agent coordination channel.
5129
+ this.log(`[${step.name}] Assigned to ${agent.name}`);
3637
5130
 
3638
5131
  // Register agent handle for hub-mediated nudging
3639
5132
  this.activeAgentHandles.set(agentName, agent);
3640
5133
 
3641
5134
  // Wait for agent to exit, with idle nudging if configured
3642
- exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
5135
+ exitResult = await this.waitForExitWithIdleNudging(
5136
+ agent,
5137
+ agentDef,
5138
+ step,
5139
+ timeoutMs,
5140
+ options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole)
5141
+ );
3643
5142
 
3644
5143
  // Stop heartbeat now that agent has exited
3645
5144
  stopHeartbeat?.();
3646
5145
 
3647
5146
  if (exitResult === 'timeout') {
3648
- // Safety net: check if the verification file exists before giving up.
3649
- // The agent may have completed work but failed to /exit.
3650
- if (step.verification?.type === 'file_exists') {
3651
- const verifyPath = path.resolve(this.cwd, step.verification.value);
3652
- if (existsSync(verifyPath)) {
3653
- this.postToChannel(`**[${step.name}]** Agent idle after completing work — releasing`);
3654
- await agent.release();
3655
- // Fall through to read output below
3656
- } else {
5147
+ // Grace-period fallback: before failing, check if the agent completed
5148
+ // its work but just failed to self-terminate. Run verification if
5149
+ // configured a passing gate + timeout is better than a hard failure.
5150
+ let timeoutRecovered = false;
5151
+ if (step.verification) {
5152
+ const ptyOutput = (this.ptyOutputBuffers.get(agentName) ?? []).join('');
5153
+ const verificationResult = this.runVerification(
5154
+ step.verification,
5155
+ ptyOutput,
5156
+ step.name,
5157
+ undefined,
5158
+ { allowFailure: true }
5159
+ );
5160
+ if (verificationResult.passed) {
5161
+ this.log(
5162
+ `[${step.name}] Agent timed out but verification passed — treating as complete`
5163
+ );
5164
+ this.postToChannel(
5165
+ `**[${step.name}]** Agent idle after completing work — verification passed, releasing`
5166
+ );
3657
5167
  await agent.release();
3658
- throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
5168
+ timeoutRecovered = true;
3659
5169
  }
3660
- } else {
5170
+ }
5171
+ if (!timeoutRecovered) {
3661
5172
  await agent.release();
3662
5173
  throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
3663
5174
  }
@@ -3672,6 +5183,24 @@ export class WorkflowRunner {
3672
5183
  // Snapshot PTY chunks before cleanup — we need them for output reading below
3673
5184
  ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
3674
5185
  this.lastFailedStepOutput.set(step.name, ptyChunks.join(''));
5186
+ if (ptyChunks.length > 0 || agent?.exitCode !== undefined || agent?.exitSignal !== undefined) {
5187
+ this.captureStepTerminalEvidence(
5188
+ evidenceStepName,
5189
+ {
5190
+ stdout: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
5191
+ combined: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
5192
+ },
5193
+ {
5194
+ exitCode: agent?.exitCode,
5195
+ exitSignal: agent?.exitSignal,
5196
+ },
5197
+ {
5198
+ sender: options.logicalName ?? agentDef.name,
5199
+ actor: agent?.name ?? agentName,
5200
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
5201
+ }
5202
+ );
5203
+ }
3675
5204
 
3676
5205
  // Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
3677
5206
  stopHeartbeat?.();
@@ -3685,6 +5214,7 @@ export class WorkflowRunner {
3685
5214
  }
3686
5215
  this.unregisterWorker(agentName);
3687
5216
  this.supervisedRuntimeAgents.delete(agentName);
5217
+ this.runtimeStepAgents.delete(agentName);
3688
5218
  }
3689
5219
 
3690
5220
  let output: string;
@@ -3702,6 +5232,19 @@ export class WorkflowRunner {
3702
5232
  : `Agent exited (${exitResult})`;
3703
5233
  }
3704
5234
 
5235
+ if (ptyChunks.length === 0) {
5236
+ this.captureStepTerminalEvidence(
5237
+ evidenceStepName,
5238
+ { stdout: output, combined: output },
5239
+ { exitCode: agent?.exitCode, exitSignal: agent?.exitSignal },
5240
+ {
5241
+ sender: options.logicalName ?? agentDef.name,
5242
+ actor: agent?.name ?? agentName,
5243
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
5244
+ }
5245
+ );
5246
+ }
5247
+
3705
5248
  return {
3706
5249
  output,
3707
5250
  exitCode: agent?.exitCode,
@@ -3733,6 +5276,37 @@ export class WorkflowRunner {
3733
5276
  'auctioneer',
3734
5277
  ]);
3735
5278
 
5279
+ private isLeadLikeAgent(agentDef: AgentDefinition, roleOverride?: string): boolean {
5280
+ if (agentDef.preset === 'lead') return true;
5281
+
5282
+ const role = (roleOverride ?? agentDef.role ?? '').toLowerCase();
5283
+ const nameLC = agentDef.name.toLowerCase();
5284
+ return [...WorkflowRunner.HUB_ROLES].some(
5285
+ (hubRole) =>
5286
+ new RegExp(`\\b${hubRole}\\b`, 'i').test(nameLC) ||
5287
+ new RegExp(`\\b${hubRole}\\b`, 'i').test(role)
5288
+ );
5289
+ }
5290
+
5291
+ private shouldPreserveIdleSupervisor(
5292
+ agentDef: AgentDefinition,
5293
+ step: WorkflowStep,
5294
+ evidenceRole?: string
5295
+ ): boolean {
5296
+ if (evidenceRole && /\bowner\b/i.test(evidenceRole)) {
5297
+ return true;
5298
+ }
5299
+
5300
+ if (!this.isLeadLikeAgent(agentDef, evidenceRole)) {
5301
+ return false;
5302
+ }
5303
+
5304
+ const task = step.task ?? '';
5305
+ return /\b(wait|waiting|monitor|supervis|check inbox|check.*channel|poll|DONE|_DONE|signal|handoff)\b/i.test(
5306
+ task
5307
+ );
5308
+ }
5309
+
3736
5310
  /**
3737
5311
  * Wait for agent exit with idle detection and nudging.
3738
5312
  * If no idle nudge config is set, falls through to simple waitForExit.
@@ -3741,23 +5315,82 @@ export class WorkflowRunner {
3741
5315
  agent: Agent,
3742
5316
  agentDef: AgentDefinition,
3743
5317
  step: WorkflowStep,
3744
- timeoutMs?: number
5318
+ timeoutMs?: number,
5319
+ preserveIdleSupervisor = false
3745
5320
  ): Promise<'exited' | 'timeout' | 'released' | 'force-released'> {
3746
5321
  const nudgeConfig = this.currentConfig?.swarm.idleNudge;
3747
5322
  if (!nudgeConfig) {
3748
- // Idle = done: race exit against idle. Whichever fires first completes the step.
3749
- const result = await Promise.race([
3750
- agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit' as const, result: r })),
3751
- agent.waitForIdle(timeoutMs).then((r) => ({ kind: 'idle' as const, result: r })),
3752
- ]);
3753
- if (result.kind === 'idle' && result.result === 'idle') {
3754
- this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`);
3755
- this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle treating as complete`);
3756
- await agent.release();
3757
- return 'released';
5323
+ if (preserveIdleSupervisor) {
5324
+ this.log(
5325
+ `[${step.name}] Supervising agent "${agent.name}" may idle while waiting using exit-only completion`
5326
+ );
5327
+ return agent.waitForExit(timeoutMs);
5328
+ }
5329
+
5330
+ // Idle = done: race exit against idle, but only accept idle if verification passes.
5331
+ const idleLoopStart = Date.now();
5332
+ while (true) {
5333
+ const elapsed = Date.now() - idleLoopStart;
5334
+ const remaining = timeoutMs != null ? Math.max(0, timeoutMs - elapsed) : undefined;
5335
+ if (remaining != null && remaining <= 0) {
5336
+ return 'timeout';
5337
+ }
5338
+ const result = await Promise.race([
5339
+ agent.waitForExit(remaining).then((r) => ({ kind: 'exit' as const, result: r })),
5340
+ agent.waitForIdle(remaining).then((r) => ({ kind: 'idle' as const, result: r })),
5341
+ ]);
5342
+ if (result.kind === 'idle' && result.result === 'idle') {
5343
+ // Check verification before treating idle as complete.
5344
+ // Mirror runVerification's double-occurrence guard: if the task text
5345
+ // contains the token (from the prompt instruction), require a second
5346
+ // occurrence from the agent's actual output to avoid false positives.
5347
+ if (step.verification && step.verification.type === 'output_contains') {
5348
+ const token = step.verification.value;
5349
+ const ptyOutput = (this.ptyOutputBuffers.get(agent.name) ?? []).join('');
5350
+ const taskText = step.task ?? '';
5351
+ const taskHasToken = taskText.includes(token);
5352
+ let verificationPassed = true;
5353
+ if (taskHasToken) {
5354
+ const first = ptyOutput.indexOf(token);
5355
+ verificationPassed = first !== -1 && ptyOutput.includes(token, first + token.length);
5356
+ } else {
5357
+ verificationPassed = ptyOutput.includes(token);
5358
+ }
5359
+ if (!verificationPassed) {
5360
+ // The broker fires agent_idle only once per idle transition.
5361
+ // If the agent is still working (will produce output then idle again),
5362
+ // continuing the loop works. But if the agent is permanently idle,
5363
+ // waitForIdle won't resolve again. Wait briefly for new output,
5364
+ // then release and let upstream verification handle the result.
5365
+ this.log(`[${step.name}] Agent "${agent.name}" went idle but verification not yet passed — waiting for more output`);
5366
+ const idleGraceSecs = 15;
5367
+ const graceResult = await Promise.race([
5368
+ agent.waitForExit(idleGraceSecs * 1000).then((r) => ({ kind: 'exit' as const, result: r })),
5369
+ agent.waitForIdle(idleGraceSecs * 1000).then((r) => ({ kind: 'idle' as const, result: r })),
5370
+ ]);
5371
+ if (graceResult.kind === 'idle' && graceResult.result === 'idle') {
5372
+ // Agent went idle again after producing output — re-check verification
5373
+ continue;
5374
+ }
5375
+ if (graceResult.kind === 'exit') {
5376
+ return graceResult.result as 'exited' | 'timeout' | 'released';
5377
+ }
5378
+ // Grace period timed out — agent is permanently idle without verification.
5379
+ // Release and let upstream executeAgentStep handle verification.
5380
+ this.log(`[${step.name}] Agent "${agent.name}" still idle after ${idleGraceSecs}s grace — releasing`);
5381
+ this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — releasing (verification pending)`);
5382
+ await agent.release();
5383
+ return 'released';
5384
+ }
5385
+ }
5386
+ this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`);
5387
+ this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — treating as complete`);
5388
+ await agent.release();
5389
+ return 'released';
5390
+ }
5391
+ // Exit won the race, or idle returned 'exited'/'timeout' — pass through.
5392
+ return result.result as 'exited' | 'timeout' | 'released';
3758
5393
  }
3759
- // Exit won the race, or idle returned 'exited'/'timeout' — pass through.
3760
- return result.result as 'exited' | 'timeout' | 'released';
3761
5394
  }
3762
5395
 
3763
5396
  const nudgeAfterMs = nudgeConfig.nudgeAfterMs ?? 120_000;
@@ -3765,6 +5398,7 @@ export class WorkflowRunner {
3765
5398
  const maxNudges = nudgeConfig.maxNudges ?? 1;
3766
5399
 
3767
5400
  let nudgeCount = 0;
5401
+ let preservedSupervisorNoticeSent = false;
3768
5402
  const startTime = Date.now();
3769
5403
 
3770
5404
  while (true) {
@@ -3806,6 +5440,19 @@ export class WorkflowRunner {
3806
5440
  continue;
3807
5441
  }
3808
5442
 
5443
+ if (preserveIdleSupervisor) {
5444
+ if (!preservedSupervisorNoticeSent) {
5445
+ this.log(
5446
+ `[${step.name}] Supervising agent "${agent.name}" stayed idle after ${nudgeCount} nudge(s) — preserving until exit or timeout`
5447
+ );
5448
+ this.postToChannel(
5449
+ `**[${step.name}]** Supervising agent \`${agent.name}\` is waiting on handoff — keeping it alive until it exits or the step times out`
5450
+ );
5451
+ preservedSupervisorNoticeSent = true;
5452
+ }
5453
+ continue;
5454
+ }
5455
+
3809
5456
  // Exhausted nudges — force-release
3810
5457
  this.postToChannel(
3811
5458
  `**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`
@@ -3890,8 +5537,34 @@ export class WorkflowRunner {
3890
5537
  check: VerificationCheck,
3891
5538
  output: string,
3892
5539
  stepName: string,
3893
- injectedTaskText?: string
3894
- ): void {
5540
+ injectedTaskText?: string,
5541
+ options?: VerificationOptions
5542
+ ): VerificationResult {
5543
+ const fail = (message: string): VerificationResult => {
5544
+ const observedAt = new Date().toISOString();
5545
+ this.recordStepToolSideEffect(stepName, {
5546
+ type: 'verification_observed',
5547
+ detail: message,
5548
+ observedAt,
5549
+ raw: { passed: false, type: check.type, value: check.value },
5550
+ });
5551
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
5552
+ kind: 'verification_failed',
5553
+ source: 'verification',
5554
+ text: message,
5555
+ observedAt,
5556
+ value: check.value,
5557
+ });
5558
+ if (options?.allowFailure) {
5559
+ return {
5560
+ passed: false,
5561
+ completionReason: 'failed_verification',
5562
+ error: message,
5563
+ };
5564
+ }
5565
+ throw new WorkflowCompletionError(message, 'failed_verification');
5566
+ };
5567
+
3895
5568
  switch (check.type) {
3896
5569
  case 'output_contains': {
3897
5570
  // Guard against false positives: the PTY captures the injected task text
@@ -3905,13 +5578,13 @@ export class WorkflowRunner {
3905
5578
  const first = output.indexOf(token);
3906
5579
  const hasSecond = first !== -1 && output.includes(token, first + token.length);
3907
5580
  if (!hasSecond) {
3908
- throw new Error(
5581
+ return fail(
3909
5582
  `Verification failed for "${stepName}": output does not contain "${token}" ` +
3910
5583
  `(token found only in task injection — agent must output it explicitly)`
3911
5584
  );
3912
5585
  }
3913
5586
  } else if (!output.includes(token)) {
3914
- throw new Error(`Verification failed for "${stepName}": output does not contain "${token}"`);
5587
+ return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
3915
5588
  }
3916
5589
  break;
3917
5590
  }
@@ -3922,14 +5595,44 @@ export class WorkflowRunner {
3922
5595
 
3923
5596
  case 'file_exists':
3924
5597
  if (!existsSync(path.resolve(this.cwd, check.value))) {
3925
- throw new Error(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
5598
+ return fail(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
3926
5599
  }
3927
5600
  break;
3928
5601
 
3929
5602
  case 'custom':
3930
5603
  // Custom verifications are evaluated by callers; no-op here
3931
- break;
5604
+ return { passed: false };
5605
+ }
5606
+
5607
+ if (options?.completionMarkerFound === false) {
5608
+ this.log(
5609
+ `[${stepName}] Verification passed without legacy STEP_COMPLETE marker; allowing completion`
5610
+ );
3932
5611
  }
5612
+
5613
+ const successMessage =
5614
+ options?.completionMarkerFound === false
5615
+ ? `Verification passed without legacy STEP_COMPLETE marker`
5616
+ : `Verification passed`;
5617
+ const observedAt = new Date().toISOString();
5618
+ this.recordStepToolSideEffect(stepName, {
5619
+ type: 'verification_observed',
5620
+ detail: successMessage,
5621
+ observedAt,
5622
+ raw: { passed: true, type: check.type, value: check.value },
5623
+ });
5624
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
5625
+ kind: 'verification_passed',
5626
+ source: 'verification',
5627
+ text: successMessage,
5628
+ observedAt,
5629
+ value: check.value,
5630
+ });
5631
+
5632
+ return {
5633
+ passed: true,
5634
+ completionReason: 'completed_verified',
5635
+ };
3933
5636
  }
3934
5637
 
3935
5638
  // ── State helpers ─────────────────────────────────────────────────────
@@ -3952,14 +5655,18 @@ export class WorkflowRunner {
3952
5655
  state: StepState,
3953
5656
  error: string,
3954
5657
  runId: string,
3955
- exitInfo?: { exitCode?: number; exitSignal?: string }
5658
+ exitInfo?: { exitCode?: number; exitSignal?: string },
5659
+ completionReason?: WorkflowStepCompletionReason
3956
5660
  ): Promise<void> {
5661
+ this.captureStepTerminalEvidence(state.row.stepName, {}, exitInfo);
3957
5662
  state.row.status = 'failed';
3958
5663
  state.row.error = error;
5664
+ state.row.completionReason = completionReason;
3959
5665
  state.row.completedAt = new Date().toISOString();
3960
5666
  await this.db.updateStep(state.row.id, {
3961
5667
  status: 'failed',
3962
5668
  error,
5669
+ completionReason,
3963
5670
  completedAt: state.row.completedAt,
3964
5671
  updatedAt: new Date().toISOString(),
3965
5672
  });
@@ -3971,6 +5678,7 @@ export class WorkflowRunner {
3971
5678
  exitCode: exitInfo?.exitCode,
3972
5679
  exitSignal: exitInfo?.exitSignal,
3973
5680
  });
5681
+ this.finalizeStepEvidence(state.row.stepName, 'failed', state.row.completedAt, completionReason);
3974
5682
  }
3975
5683
 
3976
5684
  private async markDownstreamSkipped(
@@ -4085,7 +5793,7 @@ export class WorkflowRunner {
4085
5793
  'RELAY SETUP — do this FIRST before any other relay tool:\n' +
4086
5794
  `1. Call: register(name="${agentName}")\n` +
4087
5795
  ' This authenticates you in the Relaycast workspace.\n' +
4088
- ' ALL relay tools (mcp__relaycast__dm_send, mcp__relaycast__inbox_check, mcp__relaycast__message_post, etc.) require\n' +
5796
+ ' ALL relay tools (mcp__relaycast__message_dm_send, mcp__relaycast__message_inbox_check, mcp__relaycast__message_post, etc.) require\n' +
4089
5797
  ' registration first — they will fail with "Not registered" otherwise.\n' +
4090
5798
  `2. Your agent name is "${agentName}" — use this exact name when registering.`
4091
5799
  );
@@ -4114,8 +5822,8 @@ export class WorkflowRunner {
4114
5822
  'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
4115
5823
  'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
4116
5824
  ' - mcp__relaycast__agent_add(name="helper-1", cli="claude", task="Specific subtask description")\n' +
4117
- ' - Coordinate via mcp__relaycast__dm_send(to="helper-1", text="...")\n' +
4118
- ' - Check on them with mcp__relaycast__inbox_check()\n' +
5825
+ ' - Coordinate via mcp__relaycast__message_dm_send(to="helper-1", text="...")\n' +
5826
+ ' - Check on them with mcp__relaycast__message_inbox_check()\n' +
4119
5827
  ' - Clean up when done: mcp__relaycast__agent_remove(name="helper-1")\n\n' +
4120
5828
  subAgentOption +
4121
5829
  'Guidelines:\n' +
@@ -4129,8 +5837,24 @@ export class WorkflowRunner {
4129
5837
  }
4130
5838
 
4131
5839
  /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
4132
- private postToChannel(text: string): void {
5840
+ private postToChannel(text: string, options: ChannelEvidenceOptions = {}): void {
4133
5841
  if (!this.relayApiKey || !this.channel) return;
5842
+ this.recordChannelEvidence(text, options);
5843
+
5844
+ const stepName = options.stepName ?? this.inferStepNameFromChannelText(text);
5845
+ if (stepName) {
5846
+ this.recordStepToolSideEffect(stepName, {
5847
+ type: 'post_channel_message',
5848
+ detail: text.slice(0, 240),
5849
+ raw: {
5850
+ actor: options.actor,
5851
+ role: options.role,
5852
+ target: options.target ?? this.channel,
5853
+ origin: options.origin ?? 'runner_post',
5854
+ },
5855
+ });
5856
+ }
5857
+
4134
5858
  this.ensureRelaycastRunnerAgent()
4135
5859
  .then((agent) => agent.send(this.channel!, text))
4136
5860
  .catch(() => {
@@ -4308,6 +6032,9 @@ export class WorkflowRunner {
4308
6032
  output: state.row.output,
4309
6033
  error: state.row.error,
4310
6034
  verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
6035
+ completionMode: state.row.completionReason
6036
+ ? this.buildStepCompletionDecision(name, state.row.completionReason)?.mode
6037
+ : undefined,
4311
6038
  });
4312
6039
  }
4313
6040
  return outcomes;
@@ -4449,25 +6176,31 @@ export class WorkflowRunner {
4449
6176
  /** Persist step output to disk and post full output as a channel message. */
4450
6177
  private async persistStepOutput(runId: string, stepName: string, output: string): Promise<void> {
4451
6178
  // 1. Write to disk
6179
+ const outputPath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
4452
6180
  try {
4453
6181
  const dir = this.getStepOutputDir(runId);
4454
6182
  mkdirSync(dir, { recursive: true });
4455
6183
  const cleaned = WorkflowRunner.stripAnsi(output);
4456
- await writeFile(path.join(dir, `${stepName}.md`), cleaned);
6184
+ await writeFile(outputPath, cleaned);
4457
6185
  } catch {
4458
6186
  // Non-critical
4459
6187
  }
6188
+ this.recordStepToolSideEffect(stepName, {
6189
+ type: 'persist_step_output',
6190
+ detail: `Persisted step output to ${this.normalizeEvidencePath(outputPath)}`,
6191
+ raw: { path: outputPath },
6192
+ });
4460
6193
 
4461
6194
  // 2. Post scrubbed output as a single channel message (most recent tail only)
4462
6195
  const scrubbed = WorkflowRunner.scrubForChannel(output);
4463
6196
  if (scrubbed.length === 0) {
4464
- this.postToChannel(`**[${stepName}]** Step completed — output written to disk`);
6197
+ this.postToChannel(`**[${stepName}]** Step completed — output written to disk`, { stepName });
4465
6198
  return;
4466
6199
  }
4467
6200
 
4468
6201
  const maxMsg = 2000;
4469
6202
  const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
4470
- this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``);
6203
+ this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``, { stepName });
4471
6204
  }
4472
6205
 
4473
6206
  /** Load persisted step output from disk. */