npm - @agent-relay/sdk - Versions diffs - 3.2.0 → 3.2.2 - Mend

@agent-relay/sdk 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/bin/agent-relay-broker-darwin-arm64 +0 -0
package/bin/agent-relay-broker-darwin-x64 +0 -0
package/bin/agent-relay-broker-linux-arm64 +0 -0
package/bin/agent-relay-broker-linux-x64 +0 -0
package/dist/__tests__/completion-pipeline.test.d.ts +14 -0
package/dist/__tests__/completion-pipeline.test.d.ts.map +1 -0
package/dist/__tests__/completion-pipeline.test.js +1476 -0
package/dist/__tests__/completion-pipeline.test.js.map +1 -0
package/dist/__tests__/e2e-owner-review.test.js +2 -2
package/dist/__tests__/e2e-owner-review.test.js.map +1 -1
package/dist/examples/example.js +1 -1
package/dist/examples/example.js.map +1 -1
package/dist/relay-adapter.js +4 -4
package/dist/relay-adapter.js.map +1 -1
package/dist/workflows/builder.d.ts +18 -3
package/dist/workflows/builder.d.ts.map +1 -1
package/dist/workflows/builder.js +24 -12
package/dist/workflows/builder.js.map +1 -1
package/dist/workflows/runner.d.ts +55 -2
package/dist/workflows/runner.d.ts.map +1 -1
package/dist/workflows/runner.js +1370 -108
package/dist/workflows/runner.js.map +1 -1
package/dist/workflows/trajectory.d.ts +6 -2
package/dist/workflows/trajectory.d.ts.map +1 -1
package/dist/workflows/trajectory.js +37 -2
package/dist/workflows/trajectory.js.map +1 -1
package/dist/workflows/types.d.ts +88 -0
package/dist/workflows/types.d.ts.map +1 -1
package/dist/workflows/types.js.map +1 -1
package/dist/workflows/validator.js +1 -1
package/dist/workflows/validator.js.map +1 -1
package/package.json +2 -2

package/dist/workflows/runner.js CHANGED Viewed

@@ -5,11 +5,12 @@
  */
 import { spawn as cpSpawn, execFileSync } from 'node:child_process';
 import { randomBytes } from 'node:crypto';
-import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
+import { createWriteStream, existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, writeFileSync, } from 'node:fs';
 import { readFile, writeFile } from 'node:fs/promises';
 import path from 'node:path';
 import { parse as parseYaml } from 'yaml';
 import { stripAnsi as stripAnsiFn } from '../pty.js';
+import { resolveSpawnPolicy } from '../spawn-from-env.js';
 import { loadCustomSteps, resolveAllCustomSteps, validateCustomStepsUsage, CustomStepsParseError, CustomStepResolutionError, } from './custom-steps.js';
 import { InMemoryWorkflowDb } from './memory-db.js';
 import { WorkflowTrajectory } from './trajectory.js';
@@ -28,6 +29,14 @@ class SpawnExitError extends Error {
         this.exitSignal = exitSignal ?? undefined;
     }
 }
+class WorkflowCompletionError extends Error {
+    completionReason;
+    constructor(message, completionReason) {
+        super(message);
+        this.name = 'WorkflowCompletionError';
+        this.completionReason = completionReason;
+    }
+}
 // ── CLI resolution ───────────────────────────────────────────────────────────
 /**
  * Resolve `cursor` to the concrete cursor agent binary available in PATH.
@@ -101,8 +110,16 @@ export class WorkflowRunner {
     lastActivity = new Map();
     /** Runtime-name lookup for agents participating in supervised owner flows. */
     supervisedRuntimeAgents = new Map();
+    /** Runtime-name lookup for active step agents so channel messages can be attributed to a step. */
+    runtimeStepAgents = new Map();
+    /** Per-step completion evidence collected across output, channel, files, and tool side-effects. */
+    stepCompletionEvidence = new Map();
+    /** Expected owner/worker identities per step so coordination signals can be validated by sender. */
+    stepSignalParticipants = new Map();
     /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
     resolvedPaths = new Map();
+    /** Tracks agent names currently assigned as reviewers (ref-counted to handle concurrent usage). */
+    activeReviewers = new Map();
     constructor(options = {}) {
         this.db = options.db ?? new InMemoryWorkflowDb();
         this.workspaceId = options.workspaceId ?? 'local';
@@ -182,6 +199,441 @@ export class WorkflowRunner {
         }
         return resolved;
     }
+    static EVIDENCE_IGNORED_DIRS = new Set([
+        '.git',
+        '.agent-relay',
+        '.trajectories',
+        'node_modules',
+    ]);
+    getStepCompletionEvidence(stepName) {
+        const record = this.stepCompletionEvidence.get(stepName);
+        if (!record)
+            return undefined;
+        const evidence = structuredClone(record.evidence);
+        return this.filterStepEvidenceBySignalProvenance(stepName, evidence);
+    }
+    getOrCreateStepEvidenceRecord(stepName) {
+        const existing = this.stepCompletionEvidence.get(stepName);
+        if (existing)
+            return existing;
+        const now = new Date().toISOString();
+        const record = {
+            evidence: {
+                stepName,
+                lastUpdatedAt: now,
+                roots: [],
+                output: {
+                    stdout: '',
+                    stderr: '',
+                    combined: '',
+                },
+                channelPosts: [],
+                files: [],
+                process: {},
+                toolSideEffects: [],
+                coordinationSignals: [],
+            },
+            baselineSnapshots: new Map(),
+            filesCaptured: false,
+        };
+        this.stepCompletionEvidence.set(stepName, record);
+        return record;
+    }
+    initializeStepSignalParticipants(stepName, ownerSender, workerSender) {
+        this.stepSignalParticipants.set(stepName, {
+            ownerSenders: new Set(),
+            workerSenders: new Set(),
+        });
+        this.rememberStepSignalSender(stepName, 'owner', ownerSender);
+        this.rememberStepSignalSender(stepName, 'worker', workerSender);
+    }
+    rememberStepSignalSender(stepName, participant, ...senders) {
+        const participants = this.stepSignalParticipants.get(stepName) ??
+            {
+                ownerSenders: new Set(),
+                workerSenders: new Set(),
+            };
+        this.stepSignalParticipants.set(stepName, participants);
+        const target = participant === 'owner' ? participants.ownerSenders : participants.workerSenders;
+        for (const sender of senders) {
+            const trimmed = sender?.trim();
+            if (trimmed)
+                target.add(trimmed);
+        }
+    }
+    resolveSignalParticipantKind(role) {
+        const roleLC = role?.toLowerCase().trim();
+        if (!roleLC)
+            return undefined;
+        if (/\b(owner|lead|supervisor)\b/.test(roleLC))
+            return 'owner';
+        if (/\b(worker|specialist|engineer|implementer)\b/.test(roleLC))
+            return 'worker';
+        return undefined;
+    }
+    isSignalFromExpectedSender(stepName, signal) {
+        const expectedParticipant = signal.kind === 'worker_done'
+            ? 'worker'
+            : signal.kind === 'lead_done'
+                ? 'owner'
+                : undefined;
+        if (!expectedParticipant)
+            return true;
+        const participants = this.stepSignalParticipants.get(stepName);
+        if (!participants)
+            return true;
+        const allowedSenders = expectedParticipant === 'owner' ? participants.ownerSenders : participants.workerSenders;
+        if (allowedSenders.size === 0)
+            return true;
+        const sender = signal.sender ?? signal.actor;
+        if (sender) {
+            return allowedSenders.has(sender);
+        }
+        const observedParticipant = this.resolveSignalParticipantKind(signal.role);
+        if (observedParticipant) {
+            return observedParticipant === expectedParticipant;
+        }
+        return signal.source !== 'channel';
+    }
+    filterStepEvidenceBySignalProvenance(stepName, evidence) {
+        evidence.channelPosts = evidence.channelPosts.map((post) => {
+            const signals = post.signals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
+            return {
+                ...post,
+                completionRelevant: signals.length > 0,
+                signals,
+            };
+        });
+        evidence.coordinationSignals = evidence.coordinationSignals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
+        return evidence;
+    }
+    beginStepEvidence(stepName, roots, startedAt) {
+        const record = this.getOrCreateStepEvidenceRecord(stepName);
+        const evidence = record.evidence;
+        const now = startedAt ?? new Date().toISOString();
+        evidence.startedAt ??= now;
+        evidence.status = 'running';
+        evidence.lastUpdatedAt = now;
+        for (const root of this.uniqueEvidenceRoots(roots)) {
+            if (!evidence.roots.includes(root)) {
+                evidence.roots.push(root);
+            }
+            if (!record.baselineSnapshots.has(root)) {
+                record.baselineSnapshots.set(root, this.captureFileSnapshot(root));
+            }
+        }
+    }
+    captureStepTerminalEvidence(stepName, output, process, meta) {
+        const record = this.getOrCreateStepEvidenceRecord(stepName);
+        const evidence = record.evidence;
+        const observedAt = new Date().toISOString();
+        const append = (current, next) => {
+            if (!next)
+                return current;
+            return current ? `${current}\n${next}` : next;
+        };
+        if (output.stdout) {
+            evidence.output.stdout = append(evidence.output.stdout, output.stdout);
+            for (const signal of this.extractCompletionSignals(output.stdout, 'stdout', observedAt, meta)) {
+                evidence.coordinationSignals.push(signal);
+            }
+        }
+        if (output.stderr) {
+            evidence.output.stderr = append(evidence.output.stderr, output.stderr);
+            for (const signal of this.extractCompletionSignals(output.stderr, 'stderr', observedAt, meta)) {
+                evidence.coordinationSignals.push(signal);
+            }
+        }
+        const combinedOutput = output.combined ??
+            [output.stdout, output.stderr].filter((value) => Boolean(value)).join('\n');
+        if (combinedOutput) {
+            evidence.output.combined = append(evidence.output.combined, combinedOutput);
+        }
+        if (process) {
+            if (process.exitCode !== undefined) {
+                evidence.process.exitCode = process.exitCode;
+                evidence.coordinationSignals.push({
+                    kind: 'process_exit',
+                    source: 'process',
+                    text: `Process exited with code ${process.exitCode}`,
+                    observedAt,
+                    value: String(process.exitCode),
+                });
+            }
+            if (process.exitSignal !== undefined) {
+                evidence.process.exitSignal = process.exitSignal;
+            }
+        }
+        evidence.lastUpdatedAt = observedAt;
+    }
+    finalizeStepEvidence(stepName, status, completedAt, completionReason) {
+        const record = this.stepCompletionEvidence.get(stepName);
+        if (!record)
+            return;
+        const evidence = record.evidence;
+        const observedAt = completedAt ?? new Date().toISOString();
+        evidence.status = status;
+        if (status !== 'running') {
+            evidence.completedAt = observedAt;
+        }
+        evidence.lastUpdatedAt = observedAt;
+        if (!record.filesCaptured) {
+            const existing = new Set(evidence.files.map((file) => `${file.kind}:${file.path}`));
+            for (const root of evidence.roots) {
+                const before = record.baselineSnapshots.get(root) ?? new Map();
+                const after = this.captureFileSnapshot(root);
+                for (const change of this.diffFileSnapshots(before, after, root, observedAt)) {
+                    const key = `${change.kind}:${change.path}`;
+                    if (existing.has(key))
+                        continue;
+                    existing.add(key);
+                    evidence.files.push(change);
+                }
+            }
+            record.filesCaptured = true;
+        }
+        if (completionReason) {
+            const decision = this.buildStepCompletionDecision(stepName, completionReason);
+            if (decision) {
+                void this.trajectory?.stepCompletionDecision(stepName, decision);
+            }
+        }
+    }
+    recordStepToolSideEffect(stepName, effect) {
+        const record = this.getOrCreateStepEvidenceRecord(stepName);
+        const observedAt = effect.observedAt ?? new Date().toISOString();
+        record.evidence.toolSideEffects.push({
+            ...effect,
+            observedAt,
+        });
+        record.evidence.lastUpdatedAt = observedAt;
+    }
+    recordChannelEvidence(text, options = {}) {
+        const stepName = options.stepName ??
+            this.inferStepNameFromChannelText(text) ??
+            (options.actor ? this.runtimeStepAgents.get(options.actor)?.stepName : undefined);
+        if (!stepName)
+            return;
+        const record = this.getOrCreateStepEvidenceRecord(stepName);
+        const postedAt = new Date().toISOString();
+        const sender = options.sender ?? options.actor;
+        const signals = this.extractCompletionSignals(text, 'channel', postedAt, {
+            sender,
+            actor: options.actor,
+            role: options.role,
+        });
+        const channelPost = {
+            stepName,
+            text,
+            postedAt,
+            origin: options.origin ?? 'runner_post',
+            completionRelevant: signals.length > 0,
+            sender,
+            actor: options.actor,
+            role: options.role,
+            target: options.target,
+            signals,
+        };
+        record.evidence.channelPosts.push(channelPost);
+        record.evidence.coordinationSignals.push(...signals);
+        record.evidence.lastUpdatedAt = postedAt;
+    }
+    extractCompletionSignals(text, source, observedAt, meta) {
+        const signals = [];
+        const seen = new Set();
+        const add = (kind, signalText, value) => {
+            const trimmed = signalText.trim().slice(0, 280);
+            if (!trimmed)
+                return;
+            const key = `${kind}:${trimmed}:${value ?? ''}`;
+            if (seen.has(key))
+                return;
+            seen.add(key);
+            signals.push({
+                kind,
+                source,
+                text: trimmed,
+                observedAt,
+                sender: meta?.sender,
+                actor: meta?.actor,
+                role: meta?.role,
+                value,
+            });
+        };
+        for (const match of text.matchAll(/\bWORKER_DONE\b(?::\s*([^\n]+))?/gi)) {
+            add('worker_done', match[0], match[1]?.trim());
+        }
+        for (const match of text.matchAll(/\bLEAD_DONE\b(?::\s*([^\n]+))?/gi)) {
+            add('lead_done', match[0], match[1]?.trim());
+        }
+        for (const match of text.matchAll(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/g)) {
+            add('step_complete', match[0], match[1]);
+        }
+        for (const match of text.matchAll(/\bOWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi)) {
+            add('owner_decision', match[0], match[1].toUpperCase());
+        }
+        for (const match of text.matchAll(/\bREVIEW_DECISION:\s*(APPROVE|REJECT)\b/gi)) {
+            add('review_decision', match[0], match[1].toUpperCase());
+        }
+        if (/\bverification gate observed\b|\bverification passed\b/i.test(text)) {
+            add('verification_passed', this.firstMeaningfulLine(text) ?? text);
+        }
+        if (/\bverification failed\b/i.test(text)) {
+            add('verification_failed', this.firstMeaningfulLine(text) ?? text);
+        }
+        if (/\b(summary|handoff|ready for review|ready for handoff|task complete|work complete|completed work|finished work)\b/i.test(text)) {
+            add('task_summary', this.firstMeaningfulLine(text) ?? text);
+        }
+        return signals;
+    }
+    inferStepNameFromChannelText(text) {
+        const bracketMatch = text.match(/^\*\*\[([^\]]+)\]/);
+        if (bracketMatch?.[1])
+            return bracketMatch[1];
+        const markerMatch = text.match(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/);
+        if (markerMatch?.[1])
+            return markerMatch[1];
+        return undefined;
+    }
+    uniqueEvidenceRoots(roots) {
+        return [...new Set(roots.filter((root) => Boolean(root)).map((root) => path.resolve(root)))];
+    }
+    captureFileSnapshot(root) {
+        const snapshot = new Map();
+        if (!existsSync(root))
+            return snapshot;
+        const visit = (currentPath) => {
+            let entries;
+            try {
+                entries = readdirSync(currentPath, { withFileTypes: true });
+            }
+            catch {
+                return;
+            }
+            for (const entry of entries) {
+                if (entry.isDirectory() && WorkflowRunner.EVIDENCE_IGNORED_DIRS.has(entry.name)) {
+                    continue;
+                }
+                const fullPath = path.join(currentPath, entry.name);
+                if (entry.isDirectory()) {
+                    visit(fullPath);
+                    continue;
+                }
+                try {
+                    const stats = statSync(fullPath);
+                    if (!stats.isFile())
+                        continue;
+                    snapshot.set(fullPath, { mtimeMs: stats.mtimeMs, size: stats.size });
+                }
+                catch {
+                    // Best-effort evidence collection only.
+                }
+            }
+        };
+        try {
+            const stats = statSync(root);
+            if (stats.isFile()) {
+                snapshot.set(root, { mtimeMs: stats.mtimeMs, size: stats.size });
+                return snapshot;
+            }
+        }
+        catch {
+            return snapshot;
+        }
+        visit(root);
+        return snapshot;
+    }
+    diffFileSnapshots(before, after, root, observedAt) {
+        const allPaths = new Set([...before.keys(), ...after.keys()]);
+        const changes = [];
+        for (const filePath of allPaths) {
+            const prior = before.get(filePath);
+            const next = after.get(filePath);
+            let kind;
+            if (!prior && next) {
+                kind = 'created';
+            }
+            else if (prior && !next) {
+                kind = 'deleted';
+            }
+            else if (prior && next && (prior.mtimeMs !== next.mtimeMs || prior.size !== next.size)) {
+                kind = 'modified';
+            }
+            if (!kind)
+                continue;
+            changes.push({
+                path: this.normalizeEvidencePath(filePath),
+                kind,
+                observedAt,
+                root,
+            });
+        }
+        return changes.sort((a, b) => a.path.localeCompare(b.path));
+    }
+    normalizeEvidencePath(filePath) {
+        const relative = path.relative(this.cwd, filePath);
+        if (!relative || relative === '')
+            return path.basename(filePath);
+        return relative.startsWith('..') ? filePath : relative;
+    }
+    buildStepCompletionDecision(stepName, completionReason) {
+        let reason;
+        let mode;
+        switch (completionReason) {
+            case 'completed_verified':
+                mode = 'verification';
+                reason = 'Verification passed';
+                break;
+            case 'completed_by_evidence':
+                mode = 'evidence';
+                reason = 'Completion inferred from collected evidence';
+                break;
+            case 'completed_by_owner_decision': {
+                const evidence = this.getStepCompletionEvidence(stepName);
+                const markerObserved = evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete');
+                mode = markerObserved ? 'marker' : 'owner_decision';
+                reason = markerObserved ? 'Legacy STEP_COMPLETE marker observed' : 'Owner approved completion';
+                break;
+            }
+            default:
+                return undefined;
+        }
+        return {
+            mode,
+            reason,
+            evidence: this.buildTrajectoryCompletionEvidence(stepName),
+        };
+    }
+    buildTrajectoryCompletionEvidence(stepName) {
+        const evidence = this.getStepCompletionEvidence(stepName);
+        if (!evidence)
+            return undefined;
+        const signals = evidence.coordinationSignals
+            .slice(-6)
+            .map((signal) => signal.value ?? signal.text);
+        const channelPosts = evidence.channelPosts
+            .filter((post) => post.completionRelevant)
+            .slice(-3)
+            .map((post) => post.text.slice(0, 160));
+        const files = evidence.files.slice(0, 6).map((file) => `${file.kind}:${file.path}`);
+        const summaryParts = [];
+        if (signals.length > 0)
+            summaryParts.push(`${signals.length} signal(s)`);
+        if (channelPosts.length > 0)
+            summaryParts.push(`${channelPosts.length} relevant channel post(s)`);
+        if (files.length > 0)
+            summaryParts.push(`${files.length} file change(s)`);
+        if (evidence.process.exitCode !== undefined) {
+            summaryParts.push(`exit=${evidence.process.exitCode}`);
+        }
+        return {
+            summary: summaryParts.length > 0 ? summaryParts.join(', ') : undefined,
+            signals: signals.length > 0 ? signals : undefined,
+            channelPosts: channelPosts.length > 0 ? channelPosts : undefined,
+            files: files.length > 0 ? files : undefined,
+            exitCode: evidence.process.exitCode,
+        };
+    }
     // ── Progress logging ────────────────────────────────────────────────────
     /** Log a progress message with elapsed time since run start. */
     log(msg) {
@@ -985,9 +1437,11 @@ export class WorkflowRunner {
             if (state.row.status === 'failed') {
                 state.row.status = 'pending';
                 state.row.error = undefined;
+                state.row.completionReason = undefined;
                 await this.db.updateStep(state.row.id, {
                     status: 'pending',
                     error: undefined,
+                    completionReason: undefined,
                     updatedAt: new Date().toISOString(),
                 });
             }
@@ -1007,6 +1461,8 @@ export class WorkflowRunner {
         this.currentConfig = config;
         this.currentRunId = runId;
         this.runStartTime = Date.now();
+        this.runtimeStepAgents.clear();
+        this.stepCompletionEvidence.clear();
         this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
         // Initialize trajectory recording
         this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
@@ -1132,8 +1588,24 @@ export class WorkflowRunner {
                     const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
                     const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
                     this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
+                    if (this.channel && (msg.to === this.channel || msg.to === `#${this.channel}`)) {
+                        const runtimeAgent = this.runtimeStepAgents.get(msg.from);
+                        this.recordChannelEvidence(msg.text, {
+                            sender: runtimeAgent?.logicalName ?? msg.from,
+                            actor: msg.from,
+                            role: runtimeAgent?.role,
+                            target: msg.to,
+                            origin: 'relay_message',
+                            stepName: runtimeAgent?.stepName,
+                        });
+                    }
                     const supervision = this.supervisedRuntimeAgents.get(msg.from);
                     if (supervision?.role === 'owner') {
+                        this.recordStepToolSideEffect(supervision.stepName, {
+                            type: 'owner_monitoring',
+                            detail: `Owner messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
+                            raw: { to: msg.to, text: msg.text },
+                        });
                         void this.trajectory?.ownerMonitoringEvent(supervision.stepName, supervision.logicalName, `Messaged ${msg.to}: ${msg.text.slice(0, 120)}`, { to: msg.to, text: msg.text });
                     }
                 };
@@ -1288,6 +1760,7 @@ export class WorkflowRunner {
                             updatedAt: new Date().toISOString(),
                         });
                         this.emit({ type: 'step:failed', runId, stepName, error: 'Cancelled' });
+                        this.finalizeStepEvidence(stepName, 'failed');
                     }
                 }
                 this.emit({ type: 'run:cancelled', runId });
@@ -1328,6 +1801,8 @@ export class WorkflowRunner {
             this.lastIdleLog.clear();
             this.lastActivity.clear();
             this.supervisedRuntimeAgents.clear();
+            this.runtimeStepAgents.clear();
+            this.activeReviewers.clear();
             this.log('Shutting down broker...');
             await this.relay?.shutdown();
             this.relay = undefined;
@@ -1435,6 +1910,9 @@ export class WorkflowRunner {
                         attempts: (state?.row.retryCount ?? 0) + 1,
                         output: state?.row.output,
                         verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
+                        completionMode: state?.row.completionReason
+                            ? this.buildStepCompletionDecision(step.name, state.row.completionReason)?.mode
+                            : undefined,
                     });
                 }
             }
@@ -1595,11 +2073,21 @@ export class WorkflowRunner {
         const maxRetries = step.retries ?? errorHandling?.maxRetries ?? 0;
         const retryDelay = errorHandling?.retryDelayMs ?? 1000;
         let lastError;
+        let lastCompletionReason;
+        let lastExitCode;
+        let lastExitSignal;
         for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
             this.checkAborted();
+            lastExitCode = undefined;
+            lastExitSignal = undefined;
             if (attempt > 0) {
                 this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
                 this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
+                this.recordStepToolSideEffect(step.name, {
+                    type: 'retry',
+                    detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
+                    raw: { attempt, maxRetries },
+                });
                 state.row.retryCount = attempt;
                 await this.db.updateStep(state.row.id, {
                     retryCount: attempt,
@@ -1609,9 +2097,13 @@ export class WorkflowRunner {
             }
             // Mark step as running
             state.row.status = 'running';
+            state.row.error = undefined;
+            state.row.completionReason = undefined;
             state.row.startedAt = new Date().toISOString();
             await this.db.updateStep(state.row.id, {
                 status: 'running',
+                error: undefined,
+                completionReason: undefined,
                 startedAt: state.row.startedAt,
                 updatedAt: new Date().toISOString(),
             });
@@ -1629,32 +2121,40 @@ export class WorkflowRunner {
             });
             // Resolve step workdir (named path reference) for deterministic steps
             const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
+            this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
             try {
                 // Delegate to executor if present
                 if (this.executor?.executeDeterministicStep) {
                     const result = await this.executor.executeDeterministicStep(step, resolvedCommand, stepCwd);
+                    lastExitCode = result.exitCode;
                     const failOnError = step.failOnError !== false;
                     if (failOnError && result.exitCode !== 0) {
                         throw new Error(`Command failed with exit code ${result.exitCode}: ${result.output.slice(0, 500)}`);
                     }
                     const output = step.captureOutput !== false ? result.output : `Command completed (exit code ${result.exitCode})`;
-                    if (step.verification) {
-                        this.runVerification(step.verification, output, step.name);
-                    }
+                    this.captureStepTerminalEvidence(step.name, { stdout: result.output, combined: result.output }, { exitCode: result.exitCode });
+                    const verificationResult = step.verification
+                        ? this.runVerification(step.verification, output, step.name)
+                        : undefined;
                     // Mark completed
                     state.row.status = 'completed';
                     state.row.output = output;
+                    state.row.completionReason = verificationResult?.completionReason;
                     state.row.completedAt = new Date().toISOString();
                     await this.db.updateStep(state.row.id, {
                         status: 'completed',
                         output,
+                        completionReason: verificationResult?.completionReason,
                         completedAt: state.row.completedAt,
                         updatedAt: new Date().toISOString(),
                     });
                     await this.persistStepOutput(runId, step.name, output);
                     this.emit({ type: 'step:completed', runId, stepName: step.name, output });
+                    this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
                     return;
                 }
+                let commandStdout = '';
+                let commandStderr = '';
                 const output = await new Promise((resolve, reject) => {
                     const child = cpSpawn('sh', ['-c', resolvedCommand], {
                         stdio: 'pipe',
@@ -1689,7 +2189,7 @@ export class WorkflowRunner {
                     child.stderr?.on('data', (chunk) => {
                         stderrChunks.push(chunk.toString());
                     });
-                    child.on('close', (code) => {
+                    child.on('close', (code, signal) => {
                         if (timer)
                             clearTimeout(timer);
                         if (abortHandler && abortSignal) {
@@ -1705,6 +2205,10 @@ export class WorkflowRunner {
                         }
                         const stdout = stdoutChunks.join('');
                         const stderr = stderrChunks.join('');
+                        commandStdout = stdout;
+                        commandStderr = stderr;
+                        lastExitCode = code ?? undefined;
+                        lastExitSignal = signal ?? undefined;
                         // Check exit code unless failOnError is explicitly false
                         const failOnError = step.failOnError !== false;
                         if (failOnError && code !== 0 && code !== null) {
@@ -1722,31 +2226,41 @@ export class WorkflowRunner {
                         reject(new Error(`Failed to execute command: ${err.message}`));
                     });
                 });
-                if (step.verification) {
-                    this.runVerification(step.verification, output, step.name);
-                }
+                this.captureStepTerminalEvidence(step.name, {
+                    stdout: commandStdout || output,
+                    stderr: commandStderr,
+                    combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
+                }, { exitCode: lastExitCode, exitSignal: lastExitSignal });
+                const verificationResult = step.verification
+                    ? this.runVerification(step.verification, output, step.name)
+                    : undefined;
                 // Mark completed
                 state.row.status = 'completed';
                 state.row.output = output;
+                state.row.completionReason = verificationResult?.completionReason;
                 state.row.completedAt = new Date().toISOString();
                 await this.db.updateStep(state.row.id, {
                     status: 'completed',
                     output,
+                    completionReason: verificationResult?.completionReason,
                     completedAt: state.row.completedAt,
                     updatedAt: new Date().toISOString(),
                 });
                 // Persist step output
                 await this.persistStepOutput(runId, step.name, output);
                 this.emit({ type: 'step:completed', runId, stepName: step.name, output });
+                this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
                 return;
             }
             catch (err) {
                 lastError = err instanceof Error ? err.message : String(err);
+                lastCompletionReason =
+                    err instanceof WorkflowCompletionError ? err.completionReason : undefined;
             }
         }
         const errorMsg = lastError ?? 'Unknown error';
         this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
-        await this.markStepFailed(state, errorMsg, runId);
+        await this.markStepFailed(state, errorMsg, runId, { exitCode: lastExitCode, exitSignal: lastExitSignal }, lastCompletionReason);
         throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
     }
     /**
@@ -1758,12 +2272,18 @@ export class WorkflowRunner {
         const state = stepStates.get(step.name);
         if (!state)
             throw new Error(`Step state not found: ${step.name}`);
+        let lastExitCode;
+        let lastExitSignal;
         this.checkAborted();
         // Mark step as running
         state.row.status = 'running';
+        state.row.error = undefined;
+        state.row.completionReason = undefined;
         state.row.startedAt = new Date().toISOString();
         await this.db.updateStep(state.row.id, {
             status: 'running',
+            error: undefined,
+            completionReason: undefined,
             startedAt: state.row.startedAt,
             updatedAt: new Date().toISOString(),
         });
@@ -1781,6 +2301,7 @@ export class WorkflowRunner {
         const createBranch = step.createBranch !== false;
         // Resolve workdir for worktree steps (same as deterministic/agent steps)
         const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
+        this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
         if (!branch) {
             const errorMsg = 'Worktree step missing required "branch" field';
             await this.markStepFailed(state, errorMsg, runId);
@@ -1821,6 +2342,10 @@ export class WorkflowRunner {
                 await this.markStepFailed(state, errorMsg, runId);
                 throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
             }
+            let commandStdout = '';
+            let commandStderr = '';
+            let commandExitCode;
+            let commandExitSignal;
             const output = await new Promise((resolve, reject) => {
                 const child = cpSpawn('sh', ['-c', worktreeCmd], {
                     stdio: 'pipe',
@@ -1855,7 +2380,7 @@ export class WorkflowRunner {
                 child.stderr?.on('data', (chunk) => {
                     stderrChunks.push(chunk.toString());
                 });
-                child.on('close', (code) => {
+                child.on('close', (code, signal) => {
                     if (timer)
                         clearTimeout(timer);
                     if (abortHandler && abortSignal) {
@@ -1869,7 +2394,13 @@ export class WorkflowRunner {
                         reject(new Error(`Step "${step.name}" timed out (no step timeout set, check global swarm.timeoutMs)`));
                         return;
                     }
+                    commandStdout = stdoutChunks.join('');
                     const stderr = stderrChunks.join('');
+                    commandStderr = stderr;
+                    commandExitCode = code ?? undefined;
+                    commandExitSignal = signal ?? undefined;
+                    lastExitCode = commandExitCode;
+                    lastExitSignal = commandExitSignal;
                     if (code !== 0 && code !== null) {
                         reject(new Error(`git worktree add failed with exit code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
                         return;
@@ -1886,6 +2417,11 @@ export class WorkflowRunner {
                     reject(new Error(`Failed to execute git worktree command: ${err.message}`));
                 });
             });
+            this.captureStepTerminalEvidence(step.name, {
+                stdout: commandStdout || output,
+                stderr: commandStderr,
+                combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
+            }, { exitCode: commandExitCode, exitSignal: commandExitSignal });
             // Mark completed
             state.row.status = 'completed';
             state.row.output = output;
@@ -1900,11 +2436,20 @@ export class WorkflowRunner {
             await this.persistStepOutput(runId, step.name, output);
             this.emit({ type: 'step:completed', runId, stepName: step.name, output });
             this.postToChannel(`**[${step.name}]** Worktree created at: ${output}\n  Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`);
+            this.recordStepToolSideEffect(step.name, {
+                type: 'worktree_created',
+                detail: `Worktree created at ${output}`,
+                raw: { branch, createdBranch: !branchExists && createBranch },
+            });
+            this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt);
         }
         catch (err) {
             const errorMsg = err instanceof Error ? err.message : String(err);
             this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
-            await this.markStepFailed(state, errorMsg, runId);
+            await this.markStepFailed(state, errorMsg, runId, {
+                exitCode: lastExitCode,
+                exitSignal: lastExitSignal,
+            });
             throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
         }
     }
@@ -1925,8 +2470,13 @@ export class WorkflowRunner {
         }
         const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
         const usesOwnerFlow = specialistDef.interactive !== false;
-        const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
-        const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
+        const currentPattern = this.currentConfig?.swarm?.pattern ?? '';
+        const isHubPattern = WorkflowRunner.HUB_PATTERNS.has(currentPattern);
+        const usesAutoHardening = usesOwnerFlow && isHubPattern && !this.isExplicitInteractiveWorker(specialistDef);
+        const ownerDef = usesAutoHardening ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
+        // Reviewer resolution is deferred to just before the review gate runs (see below)
+        // so that activeReviewers is up-to-date for concurrent steps.
+        let reviewDef;
         const supervised = {
             specialist: specialistDef,
             owner: ownerDef,
@@ -1946,6 +2496,12 @@ export class WorkflowRunner {
         let lastError;
         let lastExitCode;
         let lastExitSignal;
+        let lastCompletionReason;
+        // OWNER_DECISION: INCOMPLETE_RETRY is enforced here at the attempt-loop level so every
+        // interactive execution path shares the same contract:
+        // - retries remaining => throw back into the loop and retry
+        // - maxRetries = 0 => fail immediately after the first retry request
+        // - retry budget exhausted => fail with retry_requested_by_owner, never "completed"
         for (let attempt = 0; attempt <= maxRetries; attempt++) {
             this.checkAborted();
             // Reset per-attempt exit info so stale values don't leak across retries
@@ -1954,6 +2510,11 @@ export class WorkflowRunner {
             if (attempt > 0) {
                 this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
                 this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
+                this.recordStepToolSideEffect(step.name, {
+                    type: 'retry',
+                    detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
+                    raw: { attempt, maxRetries },
+                });
                 state.row.retryCount = attempt;
                 await this.db.updateStep(state.row.id, {
                     retryCount: attempt,
@@ -1965,14 +2526,19 @@ export class WorkflowRunner {
             try {
                 // Mark step as running
                 state.row.status = 'running';
+                state.row.error = undefined;
+                state.row.completionReason = undefined;
                 state.row.startedAt = new Date().toISOString();
                 await this.db.updateStep(state.row.id, {
                     status: 'running',
+                    error: undefined,
+                    completionReason: undefined,
                     startedAt: state.row.startedAt,
                     updatedAt: new Date().toISOString(),
                 });
                 this.emit({ type: 'step:started', runId, stepName: step.name });
-                this.postToChannel(`**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
+                this.log(`[${step.name}] Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
+                this.initializeStepSignalParticipants(step.name, ownerDef.name, specialistDef.name);
                 await this.trajectory?.stepStarted(step, ownerDef.name, {
                     role: usesDedicatedOwner ? 'owner' : 'specialist',
                     owner: ownerDef.name,
@@ -2021,63 +2587,154 @@ export class WorkflowRunner {
                 };
                 const effectiveSpecialist = applyStepWorkdir(specialistDef);
                 const effectiveOwner = applyStepWorkdir(ownerDef);
+                const effectiveReviewer = reviewDef ? applyStepWorkdir(reviewDef) : undefined;
+                this.beginStepEvidence(step.name, [
+                    this.resolveAgentCwd(effectiveSpecialist),
+                    this.resolveAgentCwd(effectiveOwner),
+                    effectiveReviewer ? this.resolveAgentCwd(effectiveReviewer) : undefined,
+                ], state.row.startedAt);
                 let specialistOutput;
                 let ownerOutput;
                 let ownerElapsed;
+                let completionReason;
                 if (usesDedicatedOwner) {
                     const result = await this.executeSupervisedAgentStep(step, { specialist: effectiveSpecialist, owner: effectiveOwner, reviewer: reviewDef }, resolvedTask, timeoutMs);
                     specialistOutput = result.specialistOutput;
                     ownerOutput = result.ownerOutput;
                     ownerElapsed = result.ownerElapsed;
+                    completionReason = result.completionReason;
                 }
                 else {
                     const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
+                    const explicitInteractiveWorker = this.isExplicitInteractiveWorker(effectiveOwner);
+                    let explicitWorkerHandle;
+                    let explicitWorkerCompleted = false;
+                    let explicitWorkerOutput = '';
                     this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
                     const resolvedStep = { ...step, task: ownerTask };
                     const ownerStartTime = Date.now();
                     const spawnResult = this.executor
                         ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
-                        : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
+                        : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, {
+                            evidenceStepName: step.name,
+                            evidenceRole: usesOwnerFlow ? 'owner' : 'specialist',
+                            preserveOnIdle: (!isHubPattern || !this.isLeadLikeAgent(effectiveOwner)) ? false : undefined,
+                            logicalName: effectiveOwner.name,
+                            onSpawned: explicitInteractiveWorker
+                                ? ({ agent }) => {
+                                    explicitWorkerHandle = agent;
+                                }
+                                : undefined,
+                            onChunk: explicitInteractiveWorker
+                                ? ({ chunk }) => {
+                                    explicitWorkerOutput += WorkflowRunner.stripAnsi(chunk);
+                                    if (!explicitWorkerCompleted &&
+                                        this.hasExplicitInteractiveWorkerCompletionEvidence(step, explicitWorkerOutput, ownerTask, resolvedTask)) {
+                                        explicitWorkerCompleted = true;
+                                        void explicitWorkerHandle?.release().catch(() => undefined);
+                                    }
+                                }
+                                : undefined,
+                        });
                     const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
                     lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
                     lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
                     ownerElapsed = Date.now() - ownerStartTime;
                     this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
                     if (usesOwnerFlow) {
-                        this.assertOwnerCompletionMarker(step, output, ownerTask);
+                        try {
+                            const completionDecision = this.resolveOwnerCompletionDecision(step, output, output, ownerTask, resolvedTask);
+                            completionReason = completionDecision.completionReason;
+                        }
+                        catch (error) {
+                            const canUseVerificationFallback = !usesDedicatedOwner &&
+                                step.verification &&
+                                error instanceof WorkflowCompletionError &&
+                                error.completionReason === 'failed_no_evidence';
+                            if (!canUseVerificationFallback) {
+                                throw error;
+                            }
+                        }
                     }
                     specialistOutput = output;
                     ownerOutput = output;
                 }
-                // Run verification if configured
-                if (step.verification) {
-                    this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
+                // Even non-interactive steps can emit an explicit OWNER_DECISION contract.
+                // Honor retry/fail/clarification signals before verification-driven success so
+                // real runs stay consistent with interactive owner flows.
+                if (!usesOwnerFlow) {
+                    const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
+                    if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
+                        throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
+                    }
+                    if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
+                        throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
+                    }
+                    if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
+                        throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
+                    }
+                }
+                // Run verification if configured.
+                // Self-owned interactive steps still need verification fallback so
+                // explicit OWNER_DECISION output is not mandatory for the happy path.
+                if (step.verification && (!usesOwnerFlow || !usesDedicatedOwner) && !completionReason) {
+                    const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
+                    completionReason = verificationResult.completionReason;
+                }
+                // Retry-style owner decisions are control-flow signals, not terminal success states.
+                // Guard here so they cannot accidentally fall through into review or completed-step
+                // persistence if a future branch returns a completionReason instead of throwing.
+                if (completionReason === 'retry_requested_by_owner') {
+                    throw new WorkflowCompletionError(`Step "${step.name}" owner requested another attempt`, 'retry_requested_by_owner');
                 }
                 // Every interactive step gets a review pass; pick a dedicated reviewer when available.
+                // Resolve reviewer JIT so activeReviewers reflects concurrent steps that started earlier.
+                if (usesAutoHardening && usesDedicatedOwner && !reviewDef) {
+                    reviewDef = this.resolveAutoReviewAgent(ownerDef, agentMap);
+                    supervised.reviewer = reviewDef;
+                }
                 let combinedOutput = specialistOutput;
                 if (usesOwnerFlow && reviewDef) {
-                    const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
-                    const reviewOutput = await this.runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewDef, remainingMs);
-                    combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
+                    this.activeReviewers.set(reviewDef.name, (this.activeReviewers.get(reviewDef.name) ?? 0) + 1);
+                    try {
+                        const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
+                        const reviewOutput = await this.runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewDef, remainingMs);
+                        combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
+                    }
+                    finally {
+                        const count = (this.activeReviewers.get(reviewDef.name) ?? 1) - 1;
+                        if (count <= 0)
+                            this.activeReviewers.delete(reviewDef.name);
+                        else
+                            this.activeReviewers.set(reviewDef.name, count);
+                    }
                 }
                 // Mark completed
                 state.row.status = 'completed';
                 state.row.output = combinedOutput;
+                state.row.completionReason = completionReason;
                 state.row.completedAt = new Date().toISOString();
                 await this.db.updateStep(state.row.id, {
                     status: 'completed',
                     output: combinedOutput,
+                    completionReason,
                     completedAt: state.row.completedAt,
                     updatedAt: new Date().toISOString(),
                 });
                 // Persist step output to disk so it survives restarts and is inspectable
                 await this.persistStepOutput(runId, step.name, combinedOutput);
                 this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput, exitCode: lastExitCode, exitSignal: lastExitSignal });
+                this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, completionReason);
                 await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
                 return;
             }
             catch (err) {
                 lastError = err instanceof Error ? err.message : String(err);
+                lastCompletionReason =
+                    err instanceof WorkflowCompletionError ? err.completionReason : undefined;
+                if (lastCompletionReason === 'retry_requested_by_owner' && attempt >= maxRetries) {
+                    lastError = this.buildOwnerRetryBudgetExceededMessage(step.name, maxRetries, lastError);
+                }
                 if (err instanceof SpawnExitError) {
                     lastExitCode = err.exitCode;
                     lastExitSignal = err.exitSignal;
@@ -2104,9 +2761,27 @@ export class WorkflowRunner {
         await this.markStepFailed(state, lastError ?? 'Unknown error', runId, {
             exitCode: lastExitCode,
             exitSignal: lastExitSignal,
-        });
+        }, lastCompletionReason);
         throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
     }
+    buildOwnerRetryBudgetExceededMessage(stepName, maxRetries, ownerDecisionError) {
+        const attempts = maxRetries + 1;
+        const prefix = `Step "${stepName}" `;
+        const normalizedDecision = ownerDecisionError?.startsWith(prefix)
+            ? ownerDecisionError.slice(prefix.length).trim()
+            : ownerDecisionError?.trim();
+        const decisionSuffix = normalizedDecision
+            ? ` Latest owner decision: ${normalizedDecision}`
+            : '';
+        if (maxRetries === 0) {
+            return (`Step "${stepName}" owner requested another attempt, but no retries are configured ` +
+                `(maxRetries=0). Configure retries > 0 to allow OWNER_DECISION: INCOMPLETE_RETRY.` +
+                decisionSuffix);
+        }
+        return (`Step "${stepName}" owner requested another attempt after ${attempts} total attempts, ` +
+            `but the retry budget is exhausted (maxRetries=${maxRetries}).` +
+            decisionSuffix);
+    }
     injectStepOwnerContract(step, resolvedTask, ownerDef, specialistDef) {
         if (ownerDef.interactive === false)
             return resolvedTask;
@@ -2119,12 +2794,19 @@ export class WorkflowRunner {
             `- You are the accountable owner for step "${step.name}".\n` +
             (specialistNote ? `- ${specialistNote}\n` : '') +
             `- If you delegate, you must still verify completion yourself.\n` +
-            `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
+            `- Preferred final decision format:\n` +
+            `  OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
+            `  REASON: <one sentence>\n` +
+            `- Legacy completion marker still supported: STEP_COMPLETE:${step.name}\n` +
             `- Then self-terminate immediately with /exit.`);
     }
     buildOwnerSupervisorTask(step, originalTask, supervised, workerRuntimeName) {
         const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
         const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
+        const channelContract = this.channel
+            ? `- Prefer Relaycast/group-chat handoff signals over terminal sentinels: wait for the worker to post \`WORKER_DONE: <brief summary>\` in ${channelLine}\n` +
+                `- When you have validated the handoff, post \`LEAD_DONE: <brief summary>\` to ${channelLine} before you exit\n`
+            : '';
         return (`You are the step owner/supervisor for step "${step.name}".\n\n` +
             `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
             `Task: ${originalTask}\n\n` +
@@ -2133,9 +2815,22 @@ export class WorkflowRunner {
             `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
             `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
             `- Ask the worker directly on ${channelLine} if you need a status update\n` +
+            channelContract +
             verificationGuide +
-            `\nWhen you're satisfied the work is done correctly:\n` +
-            `Output exactly: STEP_COMPLETE:${step.name}`);
+            `\nWhen you have enough evidence, return:\n` +
+            `OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
+            `REASON: <one sentence>\n` +
+            `Legacy completion marker still supported: STEP_COMPLETE:${step.name}`);
+    }
+    buildWorkerHandoffTask(step, originalTask, supervised) {
+        if (!this.channel)
+            return originalTask;
+        return (`${originalTask}\n\n---\n` +
+            `WORKER COMPLETION CONTRACT:\n` +
+            `- You are handing work off to owner "${supervised.owner.name}" for step "${step.name}".\n` +
+            `- When your work is ready for review, post to #${this.channel}: \`WORKER_DONE: <brief summary>\`\n` +
+            `- Do not rely on terminal output alone for handoff; use the workflow group chat signal above.\n` +
+            `- After posting your handoff signal, self-terminate with /exit unless the owner asks for follow-up.`);
     }
     buildSupervisorVerificationGuide(verification) {
         if (!verification)
@@ -2155,8 +2850,9 @@ export class WorkflowRunner {
     }
     async executeSupervisedAgentStep(step, supervised, resolvedTask, timeoutMs) {
         if (this.executor) {
+            const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
             const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, supervised.specialist.name);
-            const specialistStep = { ...step, task: resolvedTask };
+            const specialistStep = { ...step, task: specialistTask };
             const ownerStep = {
                 ...step,
                 name: `${step.name}-owner`,
@@ -2164,16 +2860,21 @@ export class WorkflowRunner {
                 task: supervisorTask,
             };
             this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" and owner "${supervised.owner.name}"`);
-            const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, resolvedTask, timeoutMs);
+            const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, specialistTask, timeoutMs);
             // Guard against unhandled rejection if owner fails before specialist settles
             const specialistSettled = specialistPromise.catch(() => undefined);
             try {
                 const ownerStartTime = Date.now();
                 const ownerOutput = await this.executor.executeAgentStep(ownerStep, supervised.owner, supervisorTask, timeoutMs);
                 const ownerElapsed = Date.now() - ownerStartTime;
-                this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
                 const specialistOutput = await specialistPromise;
-                return { specialistOutput, ownerOutput, ownerElapsed };
+                const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
+                return {
+                    specialistOutput,
+                    ownerOutput,
+                    ownerElapsed,
+                    completionReason: completionDecision.completionReason,
+                };
             }
             catch (error) {
                 await specialistSettled;
@@ -2190,10 +2891,14 @@ export class WorkflowRunner {
             resolveWorkerSpawn = resolve;
             rejectWorkerSpawn = reject;
         });
-        const specialistStep = { ...step, task: resolvedTask };
+        const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
+        const specialistStep = { ...step, task: specialistTask };
         this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`);
         const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
             agentNameSuffix: 'worker',
+            evidenceStepName: step.name,
+            evidenceRole: 'worker',
+            logicalName: supervised.specialist.name,
             onSpawned: ({ actualName, agent }) => {
                 workerHandle = agent;
                 workerRuntimeName = actualName;
@@ -2208,7 +2913,7 @@ export class WorkflowRunner {
                 }
             },
             onChunk: ({ agentName, chunk }) => {
-                this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
+                this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk, supervised.specialist.name);
             },
         }).catch((error) => {
             if (!workerSpawned) {
@@ -2221,14 +2926,24 @@ export class WorkflowRunner {
         workerPromise
             .then((result) => {
             workerReleased = true;
-            this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
+            this.log(`[${step.name}] Worker ${workerRuntimeName} exited`);
+            this.recordStepToolSideEffect(step.name, {
+                type: 'worker_exit',
+                detail: `Worker ${workerRuntimeName} exited`,
+                raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
+            });
             if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
-                this.postToChannel(`**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
+                this.log(`[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
             }
         })
             .catch((error) => {
             const message = error instanceof Error ? error.message : String(error);
             this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`);
+            this.recordStepToolSideEffect(step.name, {
+                type: 'worker_error',
+                detail: `Worker ${workerRuntimeName} exited with error: ${message}`,
+                raw: { worker: workerRuntimeName, error: message },
+            });
         });
         await workerReady;
         const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, workerRuntimeName);
@@ -2243,6 +2958,9 @@ export class WorkflowRunner {
         try {
             const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
                 agentNameSuffix: 'owner',
+                evidenceStepName: step.name,
+                evidenceRole: 'owner',
+                logicalName: supervised.owner.name,
                 onSpawned: ({ actualName }) => {
                     this.supervisedRuntimeAgents.set(actualName, {
                         stepName: step.name,
@@ -2257,9 +2975,14 @@ export class WorkflowRunner {
             const ownerElapsed = Date.now() - ownerStartTime;
             const ownerOutput = ownerResultObj.output;
             this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
-            this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
             const specialistOutput = (await workerPromise).output;
-            return { specialistOutput, ownerOutput, ownerElapsed };
+            const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
+            return {
+                specialistOutput,
+                ownerOutput,
+                ownerElapsed,
+                completionReason: completionDecision.completionReason,
+            };
         }
         catch (error) {
             const message = error instanceof Error ? error.message : String(error);
@@ -2273,14 +2996,20 @@ export class WorkflowRunner {
             throw error;
         }
     }
-    forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk) {
-        const lines = WorkflowRunner.stripAnsi(chunk)
+    forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk, sender) {
+        const lines = WorkflowRunner.scrubForChannel(chunk)
             .split('\n')
             .map((line) => line.trim())
             .filter(Boolean)
             .slice(0, 3);
         for (const line of lines) {
-            this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
+            this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`, {
+                stepName,
+                sender,
+                actor: agentName,
+                role: roleLabel,
+                origin: 'forwarded_chunk',
+            });
         }
     }
     async recordOwnerMonitoringChunk(step, ownerDef, chunk) {
@@ -2295,6 +3024,11 @@ export class WorkflowRunner {
         if (/STEP_COMPLETE:/i.test(stripped))
             details.push('Declared the step complete');
         for (const detail of details) {
+            this.recordStepToolSideEffect(step.name, {
+                type: 'owner_monitoring',
+                detail,
+                raw: { output: stripped.slice(0, 240), owner: ownerDef.name },
+            });
             await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
                 output: stripped.slice(0, 240),
             });
@@ -2335,6 +3069,7 @@ export class WorkflowRunner {
     }
     resolveAutoReviewAgent(ownerDef, agentMap) {
         const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
+        const eligible = (def) => def.name !== ownerDef.name && !this.isExplicitInteractiveWorker(def);
         const isReviewer = (def) => {
             const roleLC = def.role?.toLowerCase() ?? '';
             const nameLC = def.name.toLowerCase();
@@ -2358,33 +3093,244 @@ export class WorkflowRunner {
                 return 2;
             return isReviewer(def) ? 1 : 0;
         };
-        const dedicated = allDefs
-            .filter((d) => d.name !== ownerDef.name && isReviewer(d))
-            .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
+        // Prefer agents not currently assigned as reviewers to avoid double-booking
+        const notBusy = (def) => !this.activeReviewers.has(def.name);
+        const dedicatedCandidates = allDefs
+            .filter((d) => eligible(d) && isReviewer(d))
+            .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name));
+        const dedicated = dedicatedCandidates.find(notBusy) ?? dedicatedCandidates[0];
         if (dedicated)
             return dedicated;
-        const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
+        const alternateCandidates = allDefs.filter((d) => eligible(d) && d.interactive !== false);
+        const alternate = alternateCandidates.find(notBusy) ?? alternateCandidates[0];
         if (alternate)
             return alternate;
         // Self-review fallback — log a warning since owner reviewing itself is weak.
         return ownerDef;
     }
-    assertOwnerCompletionMarker(step, output, injectedTaskText) {
+    isExplicitInteractiveWorker(agentDef) {
+        return agentDef.preset === 'worker' && agentDef.interactive !== false;
+    }
+    resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, injectedTaskText, verificationTaskText) {
+        const hasMarker = this.hasOwnerCompletionMarker(step, ownerOutput, injectedTaskText);
+        const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
+        // INCOMPLETE_RETRY / NEEDS_CLARIFICATION are non-terminal owner outcomes. They never mark
+        // the step complete here; instead they throw back to executeAgentStep(), which decides
+        // whether to retry or fail based on the remaining retry budget for this step.
+        if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
+            throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
+        }
+        if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
+            throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
+        }
+        if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
+            throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
+        }
+        const verificationResult = step.verification
+            ? this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, {
+                allowFailure: true,
+                completionMarkerFound: hasMarker,
+            })
+            : { passed: false };
+        if (verificationResult.error) {
+            throw new WorkflowCompletionError(`Step "${step.name}" verification failed and no owner decision or evidence established completion: ${verificationResult.error}`, 'failed_verification');
+        }
+        if (explicitOwnerDecision?.decision === 'COMPLETE') {
+            if (!hasMarker) {
+                this.log(`[${step.name}] Structured OWNER_DECISION completed the step without legacy STEP_COMPLETE marker`);
+            }
+            return {
+                completionReason: 'completed_by_owner_decision',
+                ownerDecision: explicitOwnerDecision.decision,
+                reason: explicitOwnerDecision.reason,
+            };
+        }
+        if (verificationResult.passed) {
+            return { completionReason: 'completed_verified' };
+        }
+        const ownerDecision = this.parseOwnerDecision(step, ownerOutput, hasMarker);
+        if (ownerDecision?.decision === 'COMPLETE') {
+            return {
+                completionReason: 'completed_by_owner_decision',
+                ownerDecision: ownerDecision.decision,
+                reason: ownerDecision.reason,
+            };
+        }
+        if (!explicitOwnerDecision) {
+            const evidenceReason = this.judgeOwnerCompletionByEvidence(step.name, ownerOutput);
+            if (evidenceReason) {
+                if (!hasMarker) {
+                    this.log(`[${step.name}] Evidence-based completion resolved without legacy STEP_COMPLETE marker`);
+                }
+                return {
+                    completionReason: 'completed_by_evidence',
+                    reason: evidenceReason,
+                };
+            }
+        }
+        // Process-exit fallback: if the agent exited cleanly (code 0) and verification
+        // passes (or no verification is configured), infer completion rather than failing.
+        // This reduces dependence on agents posting exact coordination signals.
+        const processExitFallback = this.tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput);
+        if (processExitFallback) {
+            this.log(`[${step.name}] Completion inferred from clean process exit (code 0)` +
+                (step.verification ? ' + verification passed' : '') +
+                ' — no coordination signal was required');
+            return processExitFallback;
+        }
+        throw new WorkflowCompletionError(`Step "${step.name}" owner completion decision missing: no OWNER_DECISION, legacy STEP_COMPLETE marker, or evidence-backed completion signal`, 'failed_no_evidence');
+    }
+    hasExplicitInteractiveWorkerCompletionEvidence(step, output, injectedTaskText, verificationTaskText) {
+        try {
+            this.resolveOwnerCompletionDecision(step, output, output, injectedTaskText, verificationTaskText);
+            return true;
+        }
+        catch {
+            return false;
+        }
+    }
+    hasOwnerCompletionMarker(step, output, injectedTaskText) {
         const marker = `STEP_COMPLETE:${step.name}`;
         const taskHasMarker = injectedTaskText.includes(marker);
         const first = output.indexOf(marker);
         if (first === -1) {
-            throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
-        }
-        // PTY output includes injected task text, so require a second marker occurrence
-        // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
-        const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
+            return false;
+        }
+        // PTY output often includes echoed prompt text, so when the injected task
+        // itself contains the legacy marker require a second occurrence from the
+        // agent response.
+        const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') ||
+            output.includes('Preferred final decision format') ||
+            output.includes('Legacy completion marker still supported') ||
+            output.includes('Output exactly: STEP_COMPLETE:');
         if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
-            const hasSecond = output.includes(marker, first + marker.length);
-            if (!hasSecond) {
-                throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
-            }
+            return output.includes(marker, first + marker.length);
         }
+        return true;
+    }
+    parseOwnerDecision(step, ownerOutput, hasMarker) {
+        const decisionPattern = /OWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi;
+        const decisionMatches = [...ownerOutput.matchAll(decisionPattern)];
+        const outputLikelyContainsEchoedPrompt = ownerOutput.includes('STEP OWNER CONTRACT') ||
+            ownerOutput.includes('Preferred final decision format') ||
+            ownerOutput.includes('one of COMPLETE, INCOMPLETE_RETRY') ||
+            ownerOutput.includes('COMPLETE|INCOMPLETE_RETRY');
+        if (decisionMatches.length === 0) {
+            if (!hasMarker)
+                return null;
+            return {
+                decision: 'COMPLETE',
+                reason: `Legacy completion marker observed: STEP_COMPLETE:${step.name}`,
+            };
+        }
+        // Filter out matches that appear on a template/instruction line (e.g.
+        // "COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION") to avoid
+        // picking up the template format as the agent's actual decision.
+        const realMatches = outputLikelyContainsEchoedPrompt
+            ? decisionMatches.filter((m) => {
+                const lineStart = ownerOutput.lastIndexOf('\n', m.index) + 1;
+                const lineEnd = ownerOutput.indexOf('\n', m.index);
+                const line = ownerOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
+                return !line.includes('COMPLETE|INCOMPLETE_RETRY');
+            })
+            : decisionMatches;
+        const decisionMatch = realMatches.length > 0
+            ? realMatches[realMatches.length - 1]
+            : decisionMatches[decisionMatches.length - 1];
+        const decision = decisionMatch?.[1]?.toUpperCase();
+        if (decision !== 'COMPLETE' &&
+            decision !== 'INCOMPLETE_RETRY' &&
+            decision !== 'INCOMPLETE_FAIL' &&
+            decision !== 'NEEDS_CLARIFICATION') {
+            return null;
+        }
+        const reasonPattern = /(?:^|\n)REASON:\s*(.+)/gi;
+        const reasonMatches = [...ownerOutput.matchAll(reasonPattern)];
+        const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
+            ? reasonMatches[reasonMatches.length - 1]
+            : reasonMatches[0];
+        const reason = reasonMatch?.[1]?.trim();
+        return {
+            decision,
+            reason: reason && reason !== '<one sentence>' ? reason : undefined,
+        };
+    }
+    stripEchoedPromptLines(output, patterns) {
+        return output
+            .split('\n')
+            .map((line) => line.trim())
+            .filter(Boolean)
+            .filter((line) => patterns.every((pattern) => !pattern.test(line)))
+            .join('\n');
+    }
+    firstMeaningfulLine(output) {
+        return output
+            .split('\n')
+            .map((line) => line.trim())
+            .find(Boolean);
+    }
+    judgeOwnerCompletionByEvidence(stepName, ownerOutput) {
+        // Never infer completion when the raw output contains an explicit retry/fail/clarification signal.
+        if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
+            return null;
+        }
+        const sanitized = this.stripEchoedPromptLines(ownerOutput, [
+            /^STEP OWNER CONTRACT:?$/i,
+            /^Preferred final decision format:?$/i,
+            /^OWNER_DECISION:\s*(?:COMPLETE\|INCOMPLETE_RETRY|<one of COMPLETE, INCOMPLETE_RETRY)/i,
+            /^REASON:\s*<one sentence>$/i,
+            /^Legacy completion marker still supported:/i,
+            /^STEP_COMPLETE:/i,
+        ]);
+        if (!sanitized)
+            return null;
+        const hasExplicitSelfRelease = /Calling\s+(?:[\w.-]+\.)?remove_agent\(\{[^<\n]*"reason":"task completed"/i.test(sanitized);
+        const hasPositiveConclusion = /\b(complete(?:d)?|done|verified|looks correct|safe handoff|artifact verified)\b/i.test(sanitized) ||
+            /\bartifacts?\b.*\b(correct|verified|complete)\b/i.test(sanitized) ||
+            hasExplicitSelfRelease;
+        const evidence = this.getStepCompletionEvidence(stepName);
+        const hasValidatedCoordinationSignal = evidence?.coordinationSignals.some((signal) => signal.kind === 'worker_done' ||
+            signal.kind === 'lead_done' ||
+            signal.kind === 'verification_passed' ||
+            (signal.kind === 'process_exit' && signal.value === '0')) ?? false;
+        const hasValidatedInspectionSignal = evidence?.toolSideEffects.some((effect) => effect.type === 'owner_monitoring' &&
+            (/Checked git diff stats/i.test(effect.detail) ||
+                /Listed files for verification/i.test(effect.detail))) ?? false;
+        const hasEvidenceSignal = hasValidatedCoordinationSignal || hasValidatedInspectionSignal;
+        if (!hasPositiveConclusion || !hasEvidenceSignal) {
+            return null;
+        }
+        return this.firstMeaningfulLine(sanitized) ?? 'Evidence-backed completion';
+    }
+    /**
+     * Process-exit fallback: when agent exits with code 0 but posts no coordination
+     * signal, check if verification passes (or no verification is configured) and
+     * infer completion. This is the key mechanism for reducing agent compliance
+     * dependence — the runner trusts a clean exit + passing verification over
+     * requiring exact signal text.
+     */
+    tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput) {
+        const gracePeriodMs = this.currentConfig?.swarm.completionGracePeriodMs ?? 5000;
+        if (gracePeriodMs === 0)
+            return null;
+        // Never infer completion when the owner explicitly requested retry/fail/clarification.
+        if (ownerOutput && /OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
+            return null;
+        }
+        const evidence = this.getStepCompletionEvidence(step.name);
+        const hasCleanExit = evidence?.coordinationSignals.some((signal) => signal.kind === 'process_exit' && signal.value === '0') ?? false;
+        if (!hasCleanExit)
+            return null;
+        // If verification is configured, it must pass for the fallback to succeed.
+        if (step.verification) {
+            const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, { allowFailure: true });
+            if (!verificationResult.passed)
+                return null;
+        }
+        return {
+            completionReason: 'completed_by_process_exit',
+            reason: `Process exited with code 0${step.verification ? ' and verification passed' : ''} — coordination signal not required`,
+        };
     }
     async runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewerDef, timeoutMs) {
         const reviewSnippetMax = 12_000;
@@ -2426,7 +3372,17 @@ export class WorkflowRunner {
         };
         await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
         this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
+        this.recordStepToolSideEffect(step.name, {
+            type: 'review_started',
+            detail: `Review started with ${reviewerDef.name}`,
+            raw: { reviewer: reviewerDef.name },
+        });
         const emitReviewCompleted = async (decision, reason) => {
+            this.recordStepToolSideEffect(step.name, {
+                type: 'review_completed',
+                detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`,
+                raw: { reviewer: reviewerDef.name, decision, reason },
+            });
             await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
             this.emit({
                 type: 'step:review-completed',
@@ -2470,6 +3426,9 @@ export class WorkflowRunner {
         };
         try {
             await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
+                evidenceStepName: step.name,
+                evidenceRole: 'reviewer',
+                logicalName: reviewerDef.name,
                 onSpawned: ({ agent }) => {
                     reviewerHandle = agent;
                 },
@@ -2507,15 +3466,34 @@ export class WorkflowRunner {
         return reviewOutput;
     }
     parseReviewDecision(reviewOutput) {
+        const strict = this.parseStrictReviewDecision(reviewOutput);
+        if (strict) {
+            return strict;
+        }
+        const tolerant = this.parseTolerantReviewDecision(reviewOutput);
+        if (tolerant) {
+            return tolerant;
+        }
+        return this.judgeReviewDecisionFromEvidence(reviewOutput);
+    }
+    parseStrictReviewDecision(reviewOutput) {
         const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
         const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
         if (decisionMatches.length === 0) {
             return null;
         }
         const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
-        const decisionMatch = outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
-            ? decisionMatches[decisionMatches.length - 1]
-            : decisionMatches[0];
+        const realReviewMatches = outputLikelyContainsEchoedPrompt
+            ? decisionMatches.filter((m) => {
+                const lineStart = reviewOutput.lastIndexOf('\n', m.index) + 1;
+                const lineEnd = reviewOutput.indexOf('\n', m.index);
+                const line = reviewOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
+                return !line.includes('APPROVE or REJECT');
+            })
+            : decisionMatches;
+        const decisionMatch = realReviewMatches.length > 0
+            ? realReviewMatches[realReviewMatches.length - 1]
+            : decisionMatches[decisionMatches.length - 1];
         const decision = decisionMatch?.[1]?.toUpperCase();
         if (decision !== 'APPROVE' && decision !== 'REJECT') {
             return null;
@@ -2531,6 +3509,85 @@ export class WorkflowRunner {
             reason: reason && reason !== '<one sentence>' ? reason : undefined,
         };
     }
+    parseTolerantReviewDecision(reviewOutput) {
+        const sanitized = this.stripEchoedPromptLines(reviewOutput, [
+            /^Return exactly:?$/i,
+            /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
+            /^REVIEW_REASON:\s*<one sentence>$/i,
+        ]);
+        if (!sanitized) {
+            return null;
+        }
+        const lines = sanitized
+            .split('\n')
+            .map((line) => line.trim())
+            .filter(Boolean);
+        for (const line of lines) {
+            const candidate = line.replace(/^REVIEW_DECISION:\s*/i, '').trim();
+            const decision = this.normalizeReviewDecisionCandidate(candidate);
+            if (decision) {
+                return {
+                    decision,
+                    reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
+                };
+            }
+        }
+        const decision = this.normalizeReviewDecisionCandidate(lines.join(' '));
+        if (!decision) {
+            return null;
+        }
+        return {
+            decision,
+            reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
+        };
+    }
+    normalizeReviewDecisionCandidate(candidate) {
+        const value = candidate.trim().toLowerCase();
+        if (!value)
+            return null;
+        if (/^(approve|approved|complete|completed|pass|passed|accept|accepted|lgtm|ship it|looks good|looks fine)\b/i.test(value)) {
+            return 'approved';
+        }
+        if (/^(reject|rejected|retry|retry requested|fail|failed|incomplete|needs clarification|not complete|not ready|insufficient evidence)\b/i.test(value)) {
+            return 'rejected';
+        }
+        return null;
+    }
+    parseReviewReason(reviewOutput) {
+        const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
+        const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
+        const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
+        const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
+            ? reasonMatches[reasonMatches.length - 1]
+            : reasonMatches[0];
+        const reason = reasonMatch?.[1]?.trim();
+        return reason && reason !== '<one sentence>' ? reason : undefined;
+    }
+    judgeReviewDecisionFromEvidence(reviewOutput) {
+        const sanitized = this.stripEchoedPromptLines(reviewOutput, [
+            /^Return exactly:?$/i,
+            /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
+            /^REVIEW_REASON:\s*<one sentence>$/i,
+        ]);
+        if (!sanitized) {
+            return null;
+        }
+        const hasPositiveEvidence = /\b(approved?|complete(?:d)?|verified|looks good|looks fine|safe handoff|pass(?:ed)?)\b/i.test(sanitized);
+        const hasNegativeEvidence = /\b(reject(?:ed)?|retry|fail(?:ed)?|incomplete|missing checks|insufficient evidence|not safe)\b/i.test(sanitized);
+        if (hasNegativeEvidence) {
+            return {
+                decision: 'rejected',
+                reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
+            };
+        }
+        if (!hasPositiveEvidence) {
+            return null;
+        }
+        return {
+            decision: 'approved',
+            reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
+        };
+    }
     combineStepAndReviewOutput(stepOutput, reviewOutput) {
         const primary = stepOutput.trimEnd();
         const review = reviewOutput.trim();
@@ -2601,7 +3658,7 @@ export class WorkflowRunner {
             case 'worker':
                 return ('You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
                     'Do NOT use mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn sub-agents.\n' +
-                    'Do NOT use mcp__relaycast__dm_send or any Relaycast messaging tools — you have no relay connection.\n\n');
+                    'Do NOT use mcp__relaycast__message_dm_send or any Relaycast messaging tools — you have no relay connection.\n\n');
             case 'reviewer':
                 return ('You are a non-interactive reviewer agent. Read the specified files/artifacts and produce a clear verdict.\n' +
                     'Do NOT spawn sub-agents or use any Relaycast messaging tools.\n\n');
@@ -2759,11 +3816,19 @@ export class WorkflowRunner {
                     reject(new Error(`Failed to spawn ${cmd}: ${err.message}`));
                 });
             });
+            this.captureStepTerminalEvidence(step.name, {}, { exitCode, exitSignal });
             return { output, exitCode, exitSignal };
         }
         finally {
-            const combinedOutput = stdoutChunks.join('') + stderrChunks.join('');
+            const stdout = stdoutChunks.join('');
+            const stderr = stderrChunks.join('');
+            const combinedOutput = stdout + stderr;
             this.lastFailedStepOutput.set(step.name, combinedOutput);
+            this.captureStepTerminalEvidence(step.name, {
+                stdout,
+                stderr,
+                combined: combinedOutput,
+            });
             stopHeartbeat?.();
             logStream.end();
             this.unregisterWorker(agentName);
@@ -2777,6 +3842,7 @@ export class WorkflowRunner {
         if (!this.relay) {
             throw new Error('AgentRelay not initialized');
         }
+        const evidenceStepName = options.evidenceStepName ?? step.name;
         // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
         const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
         let agentName = requestedName;
@@ -2823,11 +3889,17 @@ export class WorkflowRunner {
         let ptyChunks = [];
         try {
             const agentCwd = this.resolveAgentCwd(agentDef);
+            const interactiveSpawnPolicy = resolveSpawnPolicy({
+                AGENT_NAME: agentName,
+                AGENT_CLI: agentDef.cli,
+                RELAY_API_KEY: this.relayApiKey ?? 'workflow-runner',
+                AGENT_CHANNELS: (agentChannels ?? []).join(','),
+            });
             agent = await this.relay.spawnPty({
                 name: agentName,
                 cli: agentDef.cli,
                 model: agentDef.constraints?.model,
-                args: [],
+                args: interactiveSpawnPolicy.args,
                 channels: agentChannels,
                 task: taskWithExit,
                 idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
@@ -2859,16 +3931,27 @@ export class WorkflowRunner {
                 const oldListener = this.ptyListeners.get(oldName);
                 if (oldListener) {
                     this.ptyListeners.delete(oldName);
-                    this.ptyListeners.set(agent.name, (chunk) => {
+                    const resolvedAgentName = agent.name;
+                    this.ptyListeners.set(resolvedAgentName, (chunk) => {
                         const stripped = WorkflowRunner.stripAnsi(chunk);
-                        this.ptyOutputBuffers.get(agent.name)?.push(stripped);
+                        this.ptyOutputBuffers.get(resolvedAgentName)?.push(stripped);
                         newLogStream.write(chunk);
-                        options.onChunk?.({ agentName: agent.name, chunk });
+                        options.onChunk?.({ agentName: resolvedAgentName, chunk });
                     });
                 }
                 agentName = agent.name;
             }
-            await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
+            const liveAgent = agent;
+            await options.onSpawned?.({ requestedName, actualName: liveAgent.name, agent: liveAgent });
+            this.runtimeStepAgents.set(liveAgent.name, {
+                stepName: evidenceStepName,
+                role: options.evidenceRole ?? agentDef.role ?? 'agent',
+                logicalName: options.logicalName ?? agentDef.name,
+            });
+            const signalParticipant = this.resolveSignalParticipantKind(options.evidenceRole ?? agentDef.role ?? 'agent');
+            if (signalParticipant) {
+                this.rememberStepSignalSender(evidenceStepName, signalParticipant, liveAgent.name, options.logicalName ?? agentDef.name);
+            }
             // Register in workers.json so `agents:kill` can find this agent
             let workerPid;
             try {
@@ -2881,8 +3964,8 @@ export class WorkflowRunner {
             this.registerWorker(agentName, agentDef.cli, step.task ?? '', workerPid);
             // Register the spawned agent in Relaycast for observability + start heartbeat
             if (this.relayApiKey) {
-                const agentClient = await this.registerRelaycastExternalAgent(agent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
-                    console.warn(`[WorkflowRunner] Failed to register ${agent.name} in Relaycast:`, err?.message ?? err);
+                const agentClient = await this.registerRelaycastExternalAgent(liveAgent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
+                    console.warn(`[WorkflowRunner] Failed to register ${liveAgent.name} in Relaycast:`, err?.message ?? err);
                     return null;
                 });
                 // Keep the agent online in the dashboard while it's working
@@ -2895,30 +3978,30 @@ export class WorkflowRunner {
                 const channelAgent = await this.ensureRelaycastRunnerAgent().catch(() => null);
                 await channelAgent?.channels.invite(this.channel, agent.name).catch(() => { });
             }
-            // Post assignment notification (no task content — task arrives via direct broker injection)
-            this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\``);
+            // Keep operational assignment chatter out of the agent coordination channel.
+            this.log(`[${step.name}] Assigned to ${agent.name}`);
             // Register agent handle for hub-mediated nudging
             this.activeAgentHandles.set(agentName, agent);
             // Wait for agent to exit, with idle nudging if configured
-            exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
+            exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole));
             // Stop heartbeat now that agent has exited
             stopHeartbeat?.();
             if (exitResult === 'timeout') {
-                // Safety net: check if the verification file exists before giving up.
-                // The agent may have completed work but failed to /exit.
-                if (step.verification?.type === 'file_exists') {
-                    const verifyPath = path.resolve(this.cwd, step.verification.value);
-                    if (existsSync(verifyPath)) {
-                        this.postToChannel(`**[${step.name}]** Agent idle after completing work — releasing`);
-                        await agent.release();
-                        // Fall through to read output below
-                    }
-                    else {
+                // Grace-period fallback: before failing, check if the agent completed
+                // its work but just failed to self-terminate. Run verification if
+                // configured — a passing gate + timeout is better than a hard failure.
+                let timeoutRecovered = false;
+                if (step.verification) {
+                    const ptyOutput = (this.ptyOutputBuffers.get(agentName) ?? []).join('');
+                    const verificationResult = this.runVerification(step.verification, ptyOutput, step.name, undefined, { allowFailure: true });
+                    if (verificationResult.passed) {
+                        this.log(`[${step.name}] Agent timed out but verification passed — treating as complete`);
+                        this.postToChannel(`**[${step.name}]** Agent idle after completing work — verification passed, releasing`);
                         await agent.release();
-                        throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
+                        timeoutRecovered = true;
                     }
                 }
-                else {
+                if (!timeoutRecovered) {
                     await agent.release();
                     throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
                 }
@@ -2931,6 +4014,19 @@ export class WorkflowRunner {
             // Snapshot PTY chunks before cleanup — we need them for output reading below
             ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
             this.lastFailedStepOutput.set(step.name, ptyChunks.join(''));
+            if (ptyChunks.length > 0 || agent?.exitCode !== undefined || agent?.exitSignal !== undefined) {
+                this.captureStepTerminalEvidence(evidenceStepName, {
+                    stdout: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
+                    combined: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
+                }, {
+                    exitCode: agent?.exitCode,
+                    exitSignal: agent?.exitSignal,
+                }, {
+                    sender: options.logicalName ?? agentDef.name,
+                    actor: agent?.name ?? agentName,
+                    role: options.evidenceRole ?? agentDef.role ?? 'agent',
+                });
+            }
             // Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
             stopHeartbeat?.();
             this.activeAgentHandles.delete(agentName);
@@ -2943,6 +4039,7 @@ export class WorkflowRunner {
             }
             this.unregisterWorker(agentName);
             this.supervisedRuntimeAgents.delete(agentName);
+            this.runtimeStepAgents.delete(agentName);
         }
         let output;
         if (ptyChunks.length > 0) {
@@ -2959,6 +4056,13 @@ export class WorkflowRunner {
                         ? 'Agent completed (idle — treated as done)'
                         : `Agent exited (${exitResult})`;
         }
+        if (ptyChunks.length === 0) {
+            this.captureStepTerminalEvidence(evidenceStepName, { stdout: output, combined: output }, { exitCode: agent?.exitCode, exitSignal: agent?.exitSignal }, {
+                sender: options.logicalName ?? agentDef.name,
+                actor: agent?.name ?? agentName,
+                role: options.evidenceRole ?? agentDef.role ?? 'agent',
+            });
+        }
         return {
             output,
             exitCode: agent?.exitCode,
@@ -2986,31 +4090,106 @@ export class WorkflowRunner {
         'orchestrator',
         'auctioneer',
     ]);
+    isLeadLikeAgent(agentDef, roleOverride) {
+        if (agentDef.preset === 'lead')
+            return true;
+        const role = (roleOverride ?? agentDef.role ?? '').toLowerCase();
+        const nameLC = agentDef.name.toLowerCase();
+        return [...WorkflowRunner.HUB_ROLES].some((hubRole) => new RegExp(`\\b${hubRole}\\b`, 'i').test(nameLC) ||
+            new RegExp(`\\b${hubRole}\\b`, 'i').test(role));
+    }
+    shouldPreserveIdleSupervisor(agentDef, step, evidenceRole) {
+        if (evidenceRole && /\bowner\b/i.test(evidenceRole)) {
+            return true;
+        }
+        if (!this.isLeadLikeAgent(agentDef, evidenceRole)) {
+            return false;
+        }
+        const task = step.task ?? '';
+        return /\b(wait|waiting|monitor|supervis|check inbox|check.*channel|poll|DONE|_DONE|signal|handoff)\b/i.test(task);
+    }
     /**
      * Wait for agent exit with idle detection and nudging.
      * If no idle nudge config is set, falls through to simple waitForExit.
      */
-    async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs) {
+    async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, preserveIdleSupervisor = false) {
         const nudgeConfig = this.currentConfig?.swarm.idleNudge;
         if (!nudgeConfig) {
-            // Idle = done: race exit against idle. Whichever fires first completes the step.
-            const result = await Promise.race([
-                agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit', result: r })),
-                agent.waitForIdle(timeoutMs).then((r) => ({ kind: 'idle', result: r })),
-            ]);
-            if (result.kind === 'idle' && result.result === 'idle') {
-                this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`);
-                this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — treating as complete`);
-                await agent.release();
-                return 'released';
-            }
-            // Exit won the race, or idle returned 'exited'/'timeout' — pass through.
-            return result.result;
+            if (preserveIdleSupervisor) {
+                this.log(`[${step.name}] Supervising agent "${agent.name}" may idle while waiting — using exit-only completion`);
+                return agent.waitForExit(timeoutMs);
+            }
+            // Idle = done: race exit against idle, but only accept idle if verification passes.
+            const idleLoopStart = Date.now();
+            while (true) {
+                const elapsed = Date.now() - idleLoopStart;
+                const remaining = timeoutMs != null ? Math.max(0, timeoutMs - elapsed) : undefined;
+                if (remaining != null && remaining <= 0) {
+                    return 'timeout';
+                }
+                const result = await Promise.race([
+                    agent.waitForExit(remaining).then((r) => ({ kind: 'exit', result: r })),
+                    agent.waitForIdle(remaining).then((r) => ({ kind: 'idle', result: r })),
+                ]);
+                if (result.kind === 'idle' && result.result === 'idle') {
+                    // Check verification before treating idle as complete.
+                    // Mirror runVerification's double-occurrence guard: if the task text
+                    // contains the token (from the prompt instruction), require a second
+                    // occurrence from the agent's actual output to avoid false positives.
+                    if (step.verification && step.verification.type === 'output_contains') {
+                        const token = step.verification.value;
+                        const ptyOutput = (this.ptyOutputBuffers.get(agent.name) ?? []).join('');
+                        const taskText = step.task ?? '';
+                        const taskHasToken = taskText.includes(token);
+                        let verificationPassed = true;
+                        if (taskHasToken) {
+                            const first = ptyOutput.indexOf(token);
+                            verificationPassed = first !== -1 && ptyOutput.includes(token, first + token.length);
+                        }
+                        else {
+                            verificationPassed = ptyOutput.includes(token);
+                        }
+                        if (!verificationPassed) {
+                            // The broker fires agent_idle only once per idle transition.
+                            // If the agent is still working (will produce output then idle again),
+                            // continuing the loop works. But if the agent is permanently idle,
+                            // waitForIdle won't resolve again. Wait briefly for new output,
+                            // then release and let upstream verification handle the result.
+                            this.log(`[${step.name}] Agent "${agent.name}" went idle but verification not yet passed — waiting for more output`);
+                            const idleGraceSecs = 15;
+                            const graceResult = await Promise.race([
+                                agent.waitForExit(idleGraceSecs * 1000).then((r) => ({ kind: 'exit', result: r })),
+                                agent.waitForIdle(idleGraceSecs * 1000).then((r) => ({ kind: 'idle', result: r })),
+                            ]);
+                            if (graceResult.kind === 'idle' && graceResult.result === 'idle') {
+                                // Agent went idle again after producing output — re-check verification
+                                continue;
+                            }
+                            if (graceResult.kind === 'exit') {
+                                return graceResult.result;
+                            }
+                            // Grace period timed out — agent is permanently idle without verification.
+                            // Release and let upstream executeAgentStep handle verification.
+                            this.log(`[${step.name}] Agent "${agent.name}" still idle after ${idleGraceSecs}s grace — releasing`);
+                            this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — releasing (verification pending)`);
+                            await agent.release();
+                            return 'released';
+                        }
+                    }
+                    this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`);
+                    this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — treating as complete`);
+                    await agent.release();
+                    return 'released';
+                }
+                // Exit won the race, or idle returned 'exited'/'timeout' — pass through.
+                return result.result;
+            }
         }
         const nudgeAfterMs = nudgeConfig.nudgeAfterMs ?? 120_000;
         const escalateAfterMs = nudgeConfig.escalateAfterMs ?? 120_000;
         const maxNudges = nudgeConfig.maxNudges ?? 1;
         let nudgeCount = 0;
+        let preservedSupervisorNoticeSent = false;
         const startTime = Date.now();
         while (true) {
             // Calculate remaining time from overall timeout
@@ -3045,6 +4224,14 @@ export class WorkflowRunner {
                 this.emit({ type: 'step:nudged', runId: this.currentRunId ?? '', stepName: step.name, nudgeCount });
                 continue;
             }
+            if (preserveIdleSupervisor) {
+                if (!preservedSupervisorNoticeSent) {
+                    this.log(`[${step.name}] Supervising agent "${agent.name}" stayed idle after ${nudgeCount} nudge(s) — preserving until exit or timeout`);
+                    this.postToChannel(`**[${step.name}]** Supervising agent \`${agent.name}\` is waiting on handoff — keeping it alive until it exits or the step times out`);
+                    preservedSupervisorNoticeSent = true;
+                }
+                continue;
+            }
             // Exhausted nudges — force-release
             this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`);
             this.emit({ type: 'step:force-released', runId: this.currentRunId ?? '', stepName: step.name });
@@ -3114,7 +4301,31 @@ export class WorkflowRunner {
         return undefined;
     }
     // ── Verification ────────────────────────────────────────────────────────
-    runVerification(check, output, stepName, injectedTaskText) {
+    runVerification(check, output, stepName, injectedTaskText, options) {
+        const fail = (message) => {
+            const observedAt = new Date().toISOString();
+            this.recordStepToolSideEffect(stepName, {
+                type: 'verification_observed',
+                detail: message,
+                observedAt,
+                raw: { passed: false, type: check.type, value: check.value },
+            });
+            this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
+                kind: 'verification_failed',
+                source: 'verification',
+                text: message,
+                observedAt,
+                value: check.value,
+            });
+            if (options?.allowFailure) {
+                return {
+                    passed: false,
+                    completionReason: 'failed_verification',
+                    error: message,
+                };
+            }
+            throw new WorkflowCompletionError(message, 'failed_verification');
+        };
         switch (check.type) {
             case 'output_contains': {
                 // Guard against false positives: the PTY captures the injected task text
@@ -3128,12 +4339,12 @@ export class WorkflowRunner {
                     const first = output.indexOf(token);
                     const hasSecond = first !== -1 && output.includes(token, first + token.length);
                     if (!hasSecond) {
-                        throw new Error(`Verification failed for "${stepName}": output does not contain "${token}" ` +
+                        return fail(`Verification failed for "${stepName}": output does not contain "${token}" ` +
                             `(token found only in task injection — agent must output it explicitly)`);
                     }
                 }
                 else if (!output.includes(token)) {
-                    throw new Error(`Verification failed for "${stepName}": output does not contain "${token}"`);
+                    return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
                 }
                 break;
             }
@@ -3142,13 +4353,37 @@ export class WorkflowRunner {
                 break;
             case 'file_exists':
                 if (!existsSync(path.resolve(this.cwd, check.value))) {
-                    throw new Error(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
+                    return fail(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
                 }
                 break;
             case 'custom':
                 // Custom verifications are evaluated by callers; no-op here
-                break;
-        }
+                return { passed: false };
+        }
+        if (options?.completionMarkerFound === false) {
+            this.log(`[${stepName}] Verification passed without legacy STEP_COMPLETE marker; allowing completion`);
+        }
+        const successMessage = options?.completionMarkerFound === false
+            ? `Verification passed without legacy STEP_COMPLETE marker`
+            : `Verification passed`;
+        const observedAt = new Date().toISOString();
+        this.recordStepToolSideEffect(stepName, {
+            type: 'verification_observed',
+            detail: successMessage,
+            observedAt,
+            raw: { passed: true, type: check.type, value: check.value },
+        });
+        this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
+            kind: 'verification_passed',
+            source: 'verification',
+            text: successMessage,
+            observedAt,
+            value: check.value,
+        });
+        return {
+            passed: true,
+            completionReason: 'completed_verified',
+        };
     }
     // ── State helpers ─────────────────────────────────────────────────────
     async updateRunStatus(runId, status, error) {
@@ -3164,13 +4399,16 @@ export class WorkflowRunner {
         }
         await this.db.updateRun(runId, patch);
     }
-    async markStepFailed(state, error, runId, exitInfo) {
+    async markStepFailed(state, error, runId, exitInfo, completionReason) {
+        this.captureStepTerminalEvidence(state.row.stepName, {}, exitInfo);
         state.row.status = 'failed';
         state.row.error = error;
+        state.row.completionReason = completionReason;
         state.row.completedAt = new Date().toISOString();
         await this.db.updateStep(state.row.id, {
             status: 'failed',
             error,
+            completionReason,
             completedAt: state.row.completedAt,
             updatedAt: new Date().toISOString(),
         });
@@ -3182,6 +4420,7 @@ export class WorkflowRunner {
             exitCode: exitInfo?.exitCode,
             exitSignal: exitInfo?.exitSignal,
         });
+        this.finalizeStepEvidence(state.row.stepName, 'failed', state.row.completedAt, completionReason);
     }
     async markDownstreamSkipped(failedStepName, allSteps, stepStates, runId) {
         const queue = [failedStepName];
@@ -3275,7 +4514,7 @@ export class WorkflowRunner {
             'RELAY SETUP — do this FIRST before any other relay tool:\n' +
             `1. Call: register(name="${agentName}")\n` +
             '   This authenticates you in the Relaycast workspace.\n' +
-            '   ALL relay tools (mcp__relaycast__dm_send, mcp__relaycast__inbox_check, mcp__relaycast__message_post, etc.) require\n' +
+            '   ALL relay tools (mcp__relaycast__message_dm_send, mcp__relaycast__message_inbox_check, mcp__relaycast__message_post, etc.) require\n' +
             '   registration first — they will fail with "Not registered" otherwise.\n' +
             `2. Your agent name is "${agentName}" — use this exact name when registering.`);
     }
@@ -3298,8 +4537,8 @@ export class WorkflowRunner {
             'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
             'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
             '  - mcp__relaycast__agent_add(name="helper-1", cli="claude", task="Specific subtask description")\n' +
-            '  - Coordinate via mcp__relaycast__dm_send(to="helper-1", text="...")\n' +
-            '  - Check on them with mcp__relaycast__inbox_check()\n' +
+            '  - Coordinate via mcp__relaycast__message_dm_send(to="helper-1", text="...")\n' +
+            '  - Check on them with mcp__relaycast__message_inbox_check()\n' +
             '  - Clean up when done: mcp__relaycast__agent_remove(name="helper-1")\n\n' +
             subAgentOption +
             'Guidelines:\n' +
@@ -3311,9 +4550,23 @@ export class WorkflowRunner {
             '  "RELAY SETUP: First call register(name=\'<exact-agent-name>\') before any other relay tool."');
     }
     /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
-    postToChannel(text) {
+    postToChannel(text, options = {}) {
         if (!this.relayApiKey || !this.channel)
             return;
+        this.recordChannelEvidence(text, options);
+        const stepName = options.stepName ?? this.inferStepNameFromChannelText(text);
+        if (stepName) {
+            this.recordStepToolSideEffect(stepName, {
+                type: 'post_channel_message',
+                detail: text.slice(0, 240),
+                raw: {
+                    actor: options.actor,
+                    role: options.role,
+                    target: options.target ?? this.channel,
+                    origin: options.origin ?? 'runner_post',
+                },
+            });
+        }
         this.ensureRelaycastRunnerAgent()
             .then((agent) => agent.send(this.channel, text))
             .catch(() => {
@@ -3471,6 +4724,9 @@ export class WorkflowRunner {
                 output: state.row.output,
                 error: state.row.error,
                 verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
+                completionMode: state.row.completionReason
+                    ? this.buildStepCompletionDecision(name, state.row.completionReason)?.mode
+                    : undefined,
             });
         }
         return outcomes;
@@ -3603,24 +4859,30 @@ export class WorkflowRunner {
     /** Persist step output to disk and post full output as a channel message. */
     async persistStepOutput(runId, stepName, output) {
         // 1. Write to disk
+        const outputPath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
         try {
             const dir = this.getStepOutputDir(runId);
             mkdirSync(dir, { recursive: true });
             const cleaned = WorkflowRunner.stripAnsi(output);
-            await writeFile(path.join(dir, `${stepName}.md`), cleaned);
+            await writeFile(outputPath, cleaned);
         }
         catch {
             // Non-critical
         }
+        this.recordStepToolSideEffect(stepName, {
+            type: 'persist_step_output',
+            detail: `Persisted step output to ${this.normalizeEvidencePath(outputPath)}`,
+            raw: { path: outputPath },
+        });
         // 2. Post scrubbed output as a single channel message (most recent tail only)
         const scrubbed = WorkflowRunner.scrubForChannel(output);
         if (scrubbed.length === 0) {
-            this.postToChannel(`**[${stepName}]** Step completed — output written to disk`);
+            this.postToChannel(`**[${stepName}]** Step completed — output written to disk`, { stepName });
             return;
         }
         const maxMsg = 2000;
         const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
-        this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``);
+        this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``, { stepName });
     }
     /** Load persisted step output from disk. */
     loadStepOutput(runId, stepName) {