@agent-relay/sdk 3.2.0 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  2. package/bin/agent-relay-broker-darwin-x64 +0 -0
  3. package/bin/agent-relay-broker-linux-arm64 +0 -0
  4. package/bin/agent-relay-broker-linux-x64 +0 -0
  5. package/dist/__tests__/completion-pipeline.test.d.ts +14 -0
  6. package/dist/__tests__/completion-pipeline.test.d.ts.map +1 -0
  7. package/dist/__tests__/completion-pipeline.test.js +1476 -0
  8. package/dist/__tests__/completion-pipeline.test.js.map +1 -0
  9. package/dist/__tests__/e2e-owner-review.test.js +2 -2
  10. package/dist/__tests__/e2e-owner-review.test.js.map +1 -1
  11. package/dist/examples/example.js +1 -1
  12. package/dist/examples/example.js.map +1 -1
  13. package/dist/relay-adapter.js +4 -4
  14. package/dist/relay-adapter.js.map +1 -1
  15. package/dist/workflows/builder.d.ts +18 -3
  16. package/dist/workflows/builder.d.ts.map +1 -1
  17. package/dist/workflows/builder.js +24 -12
  18. package/dist/workflows/builder.js.map +1 -1
  19. package/dist/workflows/runner.d.ts +55 -2
  20. package/dist/workflows/runner.d.ts.map +1 -1
  21. package/dist/workflows/runner.js +1370 -108
  22. package/dist/workflows/runner.js.map +1 -1
  23. package/dist/workflows/trajectory.d.ts +6 -2
  24. package/dist/workflows/trajectory.d.ts.map +1 -1
  25. package/dist/workflows/trajectory.js +37 -2
  26. package/dist/workflows/trajectory.js.map +1 -1
  27. package/dist/workflows/types.d.ts +88 -0
  28. package/dist/workflows/types.d.ts.map +1 -1
  29. package/dist/workflows/types.js.map +1 -1
  30. package/dist/workflows/validator.js +1 -1
  31. package/dist/workflows/validator.js.map +1 -1
  32. package/package.json +2 -2
@@ -5,11 +5,12 @@
5
5
  */
6
6
  import { spawn as cpSpawn, execFileSync } from 'node:child_process';
7
7
  import { randomBytes } from 'node:crypto';
8
- import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
8
+ import { createWriteStream, existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, writeFileSync, } from 'node:fs';
9
9
  import { readFile, writeFile } from 'node:fs/promises';
10
10
  import path from 'node:path';
11
11
  import { parse as parseYaml } from 'yaml';
12
12
  import { stripAnsi as stripAnsiFn } from '../pty.js';
13
+ import { resolveSpawnPolicy } from '../spawn-from-env.js';
13
14
  import { loadCustomSteps, resolveAllCustomSteps, validateCustomStepsUsage, CustomStepsParseError, CustomStepResolutionError, } from './custom-steps.js';
14
15
  import { InMemoryWorkflowDb } from './memory-db.js';
15
16
  import { WorkflowTrajectory } from './trajectory.js';
@@ -28,6 +29,14 @@ class SpawnExitError extends Error {
28
29
  this.exitSignal = exitSignal ?? undefined;
29
30
  }
30
31
  }
32
+ class WorkflowCompletionError extends Error {
33
+ completionReason;
34
+ constructor(message, completionReason) {
35
+ super(message);
36
+ this.name = 'WorkflowCompletionError';
37
+ this.completionReason = completionReason;
38
+ }
39
+ }
31
40
  // ── CLI resolution ───────────────────────────────────────────────────────────
32
41
  /**
33
42
  * Resolve `cursor` to the concrete cursor agent binary available in PATH.
@@ -101,8 +110,16 @@ export class WorkflowRunner {
101
110
  lastActivity = new Map();
102
111
  /** Runtime-name lookup for agents participating in supervised owner flows. */
103
112
  supervisedRuntimeAgents = new Map();
113
+ /** Runtime-name lookup for active step agents so channel messages can be attributed to a step. */
114
+ runtimeStepAgents = new Map();
115
+ /** Per-step completion evidence collected across output, channel, files, and tool side-effects. */
116
+ stepCompletionEvidence = new Map();
117
+ /** Expected owner/worker identities per step so coordination signals can be validated by sender. */
118
+ stepSignalParticipants = new Map();
104
119
  /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
105
120
  resolvedPaths = new Map();
121
+ /** Tracks agent names currently assigned as reviewers (ref-counted to handle concurrent usage). */
122
+ activeReviewers = new Map();
106
123
  constructor(options = {}) {
107
124
  this.db = options.db ?? new InMemoryWorkflowDb();
108
125
  this.workspaceId = options.workspaceId ?? 'local';
@@ -182,6 +199,441 @@ export class WorkflowRunner {
182
199
  }
183
200
  return resolved;
184
201
  }
202
+ static EVIDENCE_IGNORED_DIRS = new Set([
203
+ '.git',
204
+ '.agent-relay',
205
+ '.trajectories',
206
+ 'node_modules',
207
+ ]);
208
+ getStepCompletionEvidence(stepName) {
209
+ const record = this.stepCompletionEvidence.get(stepName);
210
+ if (!record)
211
+ return undefined;
212
+ const evidence = structuredClone(record.evidence);
213
+ return this.filterStepEvidenceBySignalProvenance(stepName, evidence);
214
+ }
215
+ getOrCreateStepEvidenceRecord(stepName) {
216
+ const existing = this.stepCompletionEvidence.get(stepName);
217
+ if (existing)
218
+ return existing;
219
+ const now = new Date().toISOString();
220
+ const record = {
221
+ evidence: {
222
+ stepName,
223
+ lastUpdatedAt: now,
224
+ roots: [],
225
+ output: {
226
+ stdout: '',
227
+ stderr: '',
228
+ combined: '',
229
+ },
230
+ channelPosts: [],
231
+ files: [],
232
+ process: {},
233
+ toolSideEffects: [],
234
+ coordinationSignals: [],
235
+ },
236
+ baselineSnapshots: new Map(),
237
+ filesCaptured: false,
238
+ };
239
+ this.stepCompletionEvidence.set(stepName, record);
240
+ return record;
241
+ }
242
+ initializeStepSignalParticipants(stepName, ownerSender, workerSender) {
243
+ this.stepSignalParticipants.set(stepName, {
244
+ ownerSenders: new Set(),
245
+ workerSenders: new Set(),
246
+ });
247
+ this.rememberStepSignalSender(stepName, 'owner', ownerSender);
248
+ this.rememberStepSignalSender(stepName, 'worker', workerSender);
249
+ }
250
+ rememberStepSignalSender(stepName, participant, ...senders) {
251
+ const participants = this.stepSignalParticipants.get(stepName) ??
252
+ {
253
+ ownerSenders: new Set(),
254
+ workerSenders: new Set(),
255
+ };
256
+ this.stepSignalParticipants.set(stepName, participants);
257
+ const target = participant === 'owner' ? participants.ownerSenders : participants.workerSenders;
258
+ for (const sender of senders) {
259
+ const trimmed = sender?.trim();
260
+ if (trimmed)
261
+ target.add(trimmed);
262
+ }
263
+ }
264
+ resolveSignalParticipantKind(role) {
265
+ const roleLC = role?.toLowerCase().trim();
266
+ if (!roleLC)
267
+ return undefined;
268
+ if (/\b(owner|lead|supervisor)\b/.test(roleLC))
269
+ return 'owner';
270
+ if (/\b(worker|specialist|engineer|implementer)\b/.test(roleLC))
271
+ return 'worker';
272
+ return undefined;
273
+ }
274
+ isSignalFromExpectedSender(stepName, signal) {
275
+ const expectedParticipant = signal.kind === 'worker_done'
276
+ ? 'worker'
277
+ : signal.kind === 'lead_done'
278
+ ? 'owner'
279
+ : undefined;
280
+ if (!expectedParticipant)
281
+ return true;
282
+ const participants = this.stepSignalParticipants.get(stepName);
283
+ if (!participants)
284
+ return true;
285
+ const allowedSenders = expectedParticipant === 'owner' ? participants.ownerSenders : participants.workerSenders;
286
+ if (allowedSenders.size === 0)
287
+ return true;
288
+ const sender = signal.sender ?? signal.actor;
289
+ if (sender) {
290
+ return allowedSenders.has(sender);
291
+ }
292
+ const observedParticipant = this.resolveSignalParticipantKind(signal.role);
293
+ if (observedParticipant) {
294
+ return observedParticipant === expectedParticipant;
295
+ }
296
+ return signal.source !== 'channel';
297
+ }
298
+ filterStepEvidenceBySignalProvenance(stepName, evidence) {
299
+ evidence.channelPosts = evidence.channelPosts.map((post) => {
300
+ const signals = post.signals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
301
+ return {
302
+ ...post,
303
+ completionRelevant: signals.length > 0,
304
+ signals,
305
+ };
306
+ });
307
+ evidence.coordinationSignals = evidence.coordinationSignals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
308
+ return evidence;
309
+ }
310
+ beginStepEvidence(stepName, roots, startedAt) {
311
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
312
+ const evidence = record.evidence;
313
+ const now = startedAt ?? new Date().toISOString();
314
+ evidence.startedAt ??= now;
315
+ evidence.status = 'running';
316
+ evidence.lastUpdatedAt = now;
317
+ for (const root of this.uniqueEvidenceRoots(roots)) {
318
+ if (!evidence.roots.includes(root)) {
319
+ evidence.roots.push(root);
320
+ }
321
+ if (!record.baselineSnapshots.has(root)) {
322
+ record.baselineSnapshots.set(root, this.captureFileSnapshot(root));
323
+ }
324
+ }
325
+ }
326
+ captureStepTerminalEvidence(stepName, output, process, meta) {
327
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
328
+ const evidence = record.evidence;
329
+ const observedAt = new Date().toISOString();
330
+ const append = (current, next) => {
331
+ if (!next)
332
+ return current;
333
+ return current ? `${current}\n${next}` : next;
334
+ };
335
+ if (output.stdout) {
336
+ evidence.output.stdout = append(evidence.output.stdout, output.stdout);
337
+ for (const signal of this.extractCompletionSignals(output.stdout, 'stdout', observedAt, meta)) {
338
+ evidence.coordinationSignals.push(signal);
339
+ }
340
+ }
341
+ if (output.stderr) {
342
+ evidence.output.stderr = append(evidence.output.stderr, output.stderr);
343
+ for (const signal of this.extractCompletionSignals(output.stderr, 'stderr', observedAt, meta)) {
344
+ evidence.coordinationSignals.push(signal);
345
+ }
346
+ }
347
+ const combinedOutput = output.combined ??
348
+ [output.stdout, output.stderr].filter((value) => Boolean(value)).join('\n');
349
+ if (combinedOutput) {
350
+ evidence.output.combined = append(evidence.output.combined, combinedOutput);
351
+ }
352
+ if (process) {
353
+ if (process.exitCode !== undefined) {
354
+ evidence.process.exitCode = process.exitCode;
355
+ evidence.coordinationSignals.push({
356
+ kind: 'process_exit',
357
+ source: 'process',
358
+ text: `Process exited with code ${process.exitCode}`,
359
+ observedAt,
360
+ value: String(process.exitCode),
361
+ });
362
+ }
363
+ if (process.exitSignal !== undefined) {
364
+ evidence.process.exitSignal = process.exitSignal;
365
+ }
366
+ }
367
+ evidence.lastUpdatedAt = observedAt;
368
+ }
369
+ finalizeStepEvidence(stepName, status, completedAt, completionReason) {
370
+ const record = this.stepCompletionEvidence.get(stepName);
371
+ if (!record)
372
+ return;
373
+ const evidence = record.evidence;
374
+ const observedAt = completedAt ?? new Date().toISOString();
375
+ evidence.status = status;
376
+ if (status !== 'running') {
377
+ evidence.completedAt = observedAt;
378
+ }
379
+ evidence.lastUpdatedAt = observedAt;
380
+ if (!record.filesCaptured) {
381
+ const existing = new Set(evidence.files.map((file) => `${file.kind}:${file.path}`));
382
+ for (const root of evidence.roots) {
383
+ const before = record.baselineSnapshots.get(root) ?? new Map();
384
+ const after = this.captureFileSnapshot(root);
385
+ for (const change of this.diffFileSnapshots(before, after, root, observedAt)) {
386
+ const key = `${change.kind}:${change.path}`;
387
+ if (existing.has(key))
388
+ continue;
389
+ existing.add(key);
390
+ evidence.files.push(change);
391
+ }
392
+ }
393
+ record.filesCaptured = true;
394
+ }
395
+ if (completionReason) {
396
+ const decision = this.buildStepCompletionDecision(stepName, completionReason);
397
+ if (decision) {
398
+ void this.trajectory?.stepCompletionDecision(stepName, decision);
399
+ }
400
+ }
401
+ }
402
+ recordStepToolSideEffect(stepName, effect) {
403
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
404
+ const observedAt = effect.observedAt ?? new Date().toISOString();
405
+ record.evidence.toolSideEffects.push({
406
+ ...effect,
407
+ observedAt,
408
+ });
409
+ record.evidence.lastUpdatedAt = observedAt;
410
+ }
411
+ recordChannelEvidence(text, options = {}) {
412
+ const stepName = options.stepName ??
413
+ this.inferStepNameFromChannelText(text) ??
414
+ (options.actor ? this.runtimeStepAgents.get(options.actor)?.stepName : undefined);
415
+ if (!stepName)
416
+ return;
417
+ const record = this.getOrCreateStepEvidenceRecord(stepName);
418
+ const postedAt = new Date().toISOString();
419
+ const sender = options.sender ?? options.actor;
420
+ const signals = this.extractCompletionSignals(text, 'channel', postedAt, {
421
+ sender,
422
+ actor: options.actor,
423
+ role: options.role,
424
+ });
425
+ const channelPost = {
426
+ stepName,
427
+ text,
428
+ postedAt,
429
+ origin: options.origin ?? 'runner_post',
430
+ completionRelevant: signals.length > 0,
431
+ sender,
432
+ actor: options.actor,
433
+ role: options.role,
434
+ target: options.target,
435
+ signals,
436
+ };
437
+ record.evidence.channelPosts.push(channelPost);
438
+ record.evidence.coordinationSignals.push(...signals);
439
+ record.evidence.lastUpdatedAt = postedAt;
440
+ }
441
+ extractCompletionSignals(text, source, observedAt, meta) {
442
+ const signals = [];
443
+ const seen = new Set();
444
+ const add = (kind, signalText, value) => {
445
+ const trimmed = signalText.trim().slice(0, 280);
446
+ if (!trimmed)
447
+ return;
448
+ const key = `${kind}:${trimmed}:${value ?? ''}`;
449
+ if (seen.has(key))
450
+ return;
451
+ seen.add(key);
452
+ signals.push({
453
+ kind,
454
+ source,
455
+ text: trimmed,
456
+ observedAt,
457
+ sender: meta?.sender,
458
+ actor: meta?.actor,
459
+ role: meta?.role,
460
+ value,
461
+ });
462
+ };
463
+ for (const match of text.matchAll(/\bWORKER_DONE\b(?::\s*([^\n]+))?/gi)) {
464
+ add('worker_done', match[0], match[1]?.trim());
465
+ }
466
+ for (const match of text.matchAll(/\bLEAD_DONE\b(?::\s*([^\n]+))?/gi)) {
467
+ add('lead_done', match[0], match[1]?.trim());
468
+ }
469
+ for (const match of text.matchAll(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/g)) {
470
+ add('step_complete', match[0], match[1]);
471
+ }
472
+ for (const match of text.matchAll(/\bOWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi)) {
473
+ add('owner_decision', match[0], match[1].toUpperCase());
474
+ }
475
+ for (const match of text.matchAll(/\bREVIEW_DECISION:\s*(APPROVE|REJECT)\b/gi)) {
476
+ add('review_decision', match[0], match[1].toUpperCase());
477
+ }
478
+ if (/\bverification gate observed\b|\bverification passed\b/i.test(text)) {
479
+ add('verification_passed', this.firstMeaningfulLine(text) ?? text);
480
+ }
481
+ if (/\bverification failed\b/i.test(text)) {
482
+ add('verification_failed', this.firstMeaningfulLine(text) ?? text);
483
+ }
484
+ if (/\b(summary|handoff|ready for review|ready for handoff|task complete|work complete|completed work|finished work)\b/i.test(text)) {
485
+ add('task_summary', this.firstMeaningfulLine(text) ?? text);
486
+ }
487
+ return signals;
488
+ }
489
+ inferStepNameFromChannelText(text) {
490
+ const bracketMatch = text.match(/^\*\*\[([^\]]+)\]/);
491
+ if (bracketMatch?.[1])
492
+ return bracketMatch[1];
493
+ const markerMatch = text.match(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/);
494
+ if (markerMatch?.[1])
495
+ return markerMatch[1];
496
+ return undefined;
497
+ }
498
+ uniqueEvidenceRoots(roots) {
499
+ return [...new Set(roots.filter((root) => Boolean(root)).map((root) => path.resolve(root)))];
500
+ }
501
+ captureFileSnapshot(root) {
502
+ const snapshot = new Map();
503
+ if (!existsSync(root))
504
+ return snapshot;
505
+ const visit = (currentPath) => {
506
+ let entries;
507
+ try {
508
+ entries = readdirSync(currentPath, { withFileTypes: true });
509
+ }
510
+ catch {
511
+ return;
512
+ }
513
+ for (const entry of entries) {
514
+ if (entry.isDirectory() && WorkflowRunner.EVIDENCE_IGNORED_DIRS.has(entry.name)) {
515
+ continue;
516
+ }
517
+ const fullPath = path.join(currentPath, entry.name);
518
+ if (entry.isDirectory()) {
519
+ visit(fullPath);
520
+ continue;
521
+ }
522
+ try {
523
+ const stats = statSync(fullPath);
524
+ if (!stats.isFile())
525
+ continue;
526
+ snapshot.set(fullPath, { mtimeMs: stats.mtimeMs, size: stats.size });
527
+ }
528
+ catch {
529
+ // Best-effort evidence collection only.
530
+ }
531
+ }
532
+ };
533
+ try {
534
+ const stats = statSync(root);
535
+ if (stats.isFile()) {
536
+ snapshot.set(root, { mtimeMs: stats.mtimeMs, size: stats.size });
537
+ return snapshot;
538
+ }
539
+ }
540
+ catch {
541
+ return snapshot;
542
+ }
543
+ visit(root);
544
+ return snapshot;
545
+ }
546
+ diffFileSnapshots(before, after, root, observedAt) {
547
+ const allPaths = new Set([...before.keys(), ...after.keys()]);
548
+ const changes = [];
549
+ for (const filePath of allPaths) {
550
+ const prior = before.get(filePath);
551
+ const next = after.get(filePath);
552
+ let kind;
553
+ if (!prior && next) {
554
+ kind = 'created';
555
+ }
556
+ else if (prior && !next) {
557
+ kind = 'deleted';
558
+ }
559
+ else if (prior && next && (prior.mtimeMs !== next.mtimeMs || prior.size !== next.size)) {
560
+ kind = 'modified';
561
+ }
562
+ if (!kind)
563
+ continue;
564
+ changes.push({
565
+ path: this.normalizeEvidencePath(filePath),
566
+ kind,
567
+ observedAt,
568
+ root,
569
+ });
570
+ }
571
+ return changes.sort((a, b) => a.path.localeCompare(b.path));
572
+ }
573
+ normalizeEvidencePath(filePath) {
574
+ const relative = path.relative(this.cwd, filePath);
575
+ if (!relative || relative === '')
576
+ return path.basename(filePath);
577
+ return relative.startsWith('..') ? filePath : relative;
578
+ }
579
+ buildStepCompletionDecision(stepName, completionReason) {
580
+ let reason;
581
+ let mode;
582
+ switch (completionReason) {
583
+ case 'completed_verified':
584
+ mode = 'verification';
585
+ reason = 'Verification passed';
586
+ break;
587
+ case 'completed_by_evidence':
588
+ mode = 'evidence';
589
+ reason = 'Completion inferred from collected evidence';
590
+ break;
591
+ case 'completed_by_owner_decision': {
592
+ const evidence = this.getStepCompletionEvidence(stepName);
593
+ const markerObserved = evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete');
594
+ mode = markerObserved ? 'marker' : 'owner_decision';
595
+ reason = markerObserved ? 'Legacy STEP_COMPLETE marker observed' : 'Owner approved completion';
596
+ break;
597
+ }
598
+ default:
599
+ return undefined;
600
+ }
601
+ return {
602
+ mode,
603
+ reason,
604
+ evidence: this.buildTrajectoryCompletionEvidence(stepName),
605
+ };
606
+ }
607
+ buildTrajectoryCompletionEvidence(stepName) {
608
+ const evidence = this.getStepCompletionEvidence(stepName);
609
+ if (!evidence)
610
+ return undefined;
611
+ const signals = evidence.coordinationSignals
612
+ .slice(-6)
613
+ .map((signal) => signal.value ?? signal.text);
614
+ const channelPosts = evidence.channelPosts
615
+ .filter((post) => post.completionRelevant)
616
+ .slice(-3)
617
+ .map((post) => post.text.slice(0, 160));
618
+ const files = evidence.files.slice(0, 6).map((file) => `${file.kind}:${file.path}`);
619
+ const summaryParts = [];
620
+ if (signals.length > 0)
621
+ summaryParts.push(`${signals.length} signal(s)`);
622
+ if (channelPosts.length > 0)
623
+ summaryParts.push(`${channelPosts.length} relevant channel post(s)`);
624
+ if (files.length > 0)
625
+ summaryParts.push(`${files.length} file change(s)`);
626
+ if (evidence.process.exitCode !== undefined) {
627
+ summaryParts.push(`exit=${evidence.process.exitCode}`);
628
+ }
629
+ return {
630
+ summary: summaryParts.length > 0 ? summaryParts.join(', ') : undefined,
631
+ signals: signals.length > 0 ? signals : undefined,
632
+ channelPosts: channelPosts.length > 0 ? channelPosts : undefined,
633
+ files: files.length > 0 ? files : undefined,
634
+ exitCode: evidence.process.exitCode,
635
+ };
636
+ }
185
637
  // ── Progress logging ────────────────────────────────────────────────────
186
638
  /** Log a progress message with elapsed time since run start. */
187
639
  log(msg) {
@@ -985,9 +1437,11 @@ export class WorkflowRunner {
985
1437
  if (state.row.status === 'failed') {
986
1438
  state.row.status = 'pending';
987
1439
  state.row.error = undefined;
1440
+ state.row.completionReason = undefined;
988
1441
  await this.db.updateStep(state.row.id, {
989
1442
  status: 'pending',
990
1443
  error: undefined,
1444
+ completionReason: undefined,
991
1445
  updatedAt: new Date().toISOString(),
992
1446
  });
993
1447
  }
@@ -1007,6 +1461,8 @@ export class WorkflowRunner {
1007
1461
  this.currentConfig = config;
1008
1462
  this.currentRunId = runId;
1009
1463
  this.runStartTime = Date.now();
1464
+ this.runtimeStepAgents.clear();
1465
+ this.stepCompletionEvidence.clear();
1010
1466
  this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
1011
1467
  // Initialize trajectory recording
1012
1468
  this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
@@ -1132,8 +1588,24 @@ export class WorkflowRunner {
1132
1588
  const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
1133
1589
  const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
1134
1590
  this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
1591
+ if (this.channel && (msg.to === this.channel || msg.to === `#${this.channel}`)) {
1592
+ const runtimeAgent = this.runtimeStepAgents.get(msg.from);
1593
+ this.recordChannelEvidence(msg.text, {
1594
+ sender: runtimeAgent?.logicalName ?? msg.from,
1595
+ actor: msg.from,
1596
+ role: runtimeAgent?.role,
1597
+ target: msg.to,
1598
+ origin: 'relay_message',
1599
+ stepName: runtimeAgent?.stepName,
1600
+ });
1601
+ }
1135
1602
  const supervision = this.supervisedRuntimeAgents.get(msg.from);
1136
1603
  if (supervision?.role === 'owner') {
1604
+ this.recordStepToolSideEffect(supervision.stepName, {
1605
+ type: 'owner_monitoring',
1606
+ detail: `Owner messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
1607
+ raw: { to: msg.to, text: msg.text },
1608
+ });
1137
1609
  void this.trajectory?.ownerMonitoringEvent(supervision.stepName, supervision.logicalName, `Messaged ${msg.to}: ${msg.text.slice(0, 120)}`, { to: msg.to, text: msg.text });
1138
1610
  }
1139
1611
  };
@@ -1288,6 +1760,7 @@ export class WorkflowRunner {
1288
1760
  updatedAt: new Date().toISOString(),
1289
1761
  });
1290
1762
  this.emit({ type: 'step:failed', runId, stepName, error: 'Cancelled' });
1763
+ this.finalizeStepEvidence(stepName, 'failed');
1291
1764
  }
1292
1765
  }
1293
1766
  this.emit({ type: 'run:cancelled', runId });
@@ -1328,6 +1801,8 @@ export class WorkflowRunner {
1328
1801
  this.lastIdleLog.clear();
1329
1802
  this.lastActivity.clear();
1330
1803
  this.supervisedRuntimeAgents.clear();
1804
+ this.runtimeStepAgents.clear();
1805
+ this.activeReviewers.clear();
1331
1806
  this.log('Shutting down broker...');
1332
1807
  await this.relay?.shutdown();
1333
1808
  this.relay = undefined;
@@ -1435,6 +1910,9 @@ export class WorkflowRunner {
1435
1910
  attempts: (state?.row.retryCount ?? 0) + 1,
1436
1911
  output: state?.row.output,
1437
1912
  verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
1913
+ completionMode: state?.row.completionReason
1914
+ ? this.buildStepCompletionDecision(step.name, state.row.completionReason)?.mode
1915
+ : undefined,
1438
1916
  });
1439
1917
  }
1440
1918
  }
@@ -1595,11 +2073,21 @@ export class WorkflowRunner {
1595
2073
  const maxRetries = step.retries ?? errorHandling?.maxRetries ?? 0;
1596
2074
  const retryDelay = errorHandling?.retryDelayMs ?? 1000;
1597
2075
  let lastError;
2076
+ let lastCompletionReason;
2077
+ let lastExitCode;
2078
+ let lastExitSignal;
1598
2079
  for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
1599
2080
  this.checkAborted();
2081
+ lastExitCode = undefined;
2082
+ lastExitSignal = undefined;
1600
2083
  if (attempt > 0) {
1601
2084
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
1602
2085
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
2086
+ this.recordStepToolSideEffect(step.name, {
2087
+ type: 'retry',
2088
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
2089
+ raw: { attempt, maxRetries },
2090
+ });
1603
2091
  state.row.retryCount = attempt;
1604
2092
  await this.db.updateStep(state.row.id, {
1605
2093
  retryCount: attempt,
@@ -1609,9 +2097,13 @@ export class WorkflowRunner {
1609
2097
  }
1610
2098
  // Mark step as running
1611
2099
  state.row.status = 'running';
2100
+ state.row.error = undefined;
2101
+ state.row.completionReason = undefined;
1612
2102
  state.row.startedAt = new Date().toISOString();
1613
2103
  await this.db.updateStep(state.row.id, {
1614
2104
  status: 'running',
2105
+ error: undefined,
2106
+ completionReason: undefined,
1615
2107
  startedAt: state.row.startedAt,
1616
2108
  updatedAt: new Date().toISOString(),
1617
2109
  });
@@ -1629,32 +2121,40 @@ export class WorkflowRunner {
1629
2121
  });
1630
2122
  // Resolve step workdir (named path reference) for deterministic steps
1631
2123
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2124
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
1632
2125
  try {
1633
2126
  // Delegate to executor if present
1634
2127
  if (this.executor?.executeDeterministicStep) {
1635
2128
  const result = await this.executor.executeDeterministicStep(step, resolvedCommand, stepCwd);
2129
+ lastExitCode = result.exitCode;
1636
2130
  const failOnError = step.failOnError !== false;
1637
2131
  if (failOnError && result.exitCode !== 0) {
1638
2132
  throw new Error(`Command failed with exit code ${result.exitCode}: ${result.output.slice(0, 500)}`);
1639
2133
  }
1640
2134
  const output = step.captureOutput !== false ? result.output : `Command completed (exit code ${result.exitCode})`;
1641
- if (step.verification) {
1642
- this.runVerification(step.verification, output, step.name);
1643
- }
2135
+ this.captureStepTerminalEvidence(step.name, { stdout: result.output, combined: result.output }, { exitCode: result.exitCode });
2136
+ const verificationResult = step.verification
2137
+ ? this.runVerification(step.verification, output, step.name)
2138
+ : undefined;
1644
2139
  // Mark completed
1645
2140
  state.row.status = 'completed';
1646
2141
  state.row.output = output;
2142
+ state.row.completionReason = verificationResult?.completionReason;
1647
2143
  state.row.completedAt = new Date().toISOString();
1648
2144
  await this.db.updateStep(state.row.id, {
1649
2145
  status: 'completed',
1650
2146
  output,
2147
+ completionReason: verificationResult?.completionReason,
1651
2148
  completedAt: state.row.completedAt,
1652
2149
  updatedAt: new Date().toISOString(),
1653
2150
  });
1654
2151
  await this.persistStepOutput(runId, step.name, output);
1655
2152
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2153
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
1656
2154
  return;
1657
2155
  }
2156
+ let commandStdout = '';
2157
+ let commandStderr = '';
1658
2158
  const output = await new Promise((resolve, reject) => {
1659
2159
  const child = cpSpawn('sh', ['-c', resolvedCommand], {
1660
2160
  stdio: 'pipe',
@@ -1689,7 +2189,7 @@ export class WorkflowRunner {
1689
2189
  child.stderr?.on('data', (chunk) => {
1690
2190
  stderrChunks.push(chunk.toString());
1691
2191
  });
1692
- child.on('close', (code) => {
2192
+ child.on('close', (code, signal) => {
1693
2193
  if (timer)
1694
2194
  clearTimeout(timer);
1695
2195
  if (abortHandler && abortSignal) {
@@ -1705,6 +2205,10 @@ export class WorkflowRunner {
1705
2205
  }
1706
2206
  const stdout = stdoutChunks.join('');
1707
2207
  const stderr = stderrChunks.join('');
2208
+ commandStdout = stdout;
2209
+ commandStderr = stderr;
2210
+ lastExitCode = code ?? undefined;
2211
+ lastExitSignal = signal ?? undefined;
1708
2212
  // Check exit code unless failOnError is explicitly false
1709
2213
  const failOnError = step.failOnError !== false;
1710
2214
  if (failOnError && code !== 0 && code !== null) {
@@ -1722,31 +2226,41 @@ export class WorkflowRunner {
1722
2226
  reject(new Error(`Failed to execute command: ${err.message}`));
1723
2227
  });
1724
2228
  });
1725
- if (step.verification) {
1726
- this.runVerification(step.verification, output, step.name);
1727
- }
2229
+ this.captureStepTerminalEvidence(step.name, {
2230
+ stdout: commandStdout || output,
2231
+ stderr: commandStderr,
2232
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
2233
+ }, { exitCode: lastExitCode, exitSignal: lastExitSignal });
2234
+ const verificationResult = step.verification
2235
+ ? this.runVerification(step.verification, output, step.name)
2236
+ : undefined;
1728
2237
  // Mark completed
1729
2238
  state.row.status = 'completed';
1730
2239
  state.row.output = output;
2240
+ state.row.completionReason = verificationResult?.completionReason;
1731
2241
  state.row.completedAt = new Date().toISOString();
1732
2242
  await this.db.updateStep(state.row.id, {
1733
2243
  status: 'completed',
1734
2244
  output,
2245
+ completionReason: verificationResult?.completionReason,
1735
2246
  completedAt: state.row.completedAt,
1736
2247
  updatedAt: new Date().toISOString(),
1737
2248
  });
1738
2249
  // Persist step output
1739
2250
  await this.persistStepOutput(runId, step.name, output);
1740
2251
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
2252
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
1741
2253
  return;
1742
2254
  }
1743
2255
  catch (err) {
1744
2256
  lastError = err instanceof Error ? err.message : String(err);
2257
+ lastCompletionReason =
2258
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
1745
2259
  }
1746
2260
  }
1747
2261
  const errorMsg = lastError ?? 'Unknown error';
1748
2262
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
1749
- await this.markStepFailed(state, errorMsg, runId);
2263
+ await this.markStepFailed(state, errorMsg, runId, { exitCode: lastExitCode, exitSignal: lastExitSignal }, lastCompletionReason);
1750
2264
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1751
2265
  }
1752
2266
  /**
@@ -1758,12 +2272,18 @@ export class WorkflowRunner {
1758
2272
  const state = stepStates.get(step.name);
1759
2273
  if (!state)
1760
2274
  throw new Error(`Step state not found: ${step.name}`);
2275
+ let lastExitCode;
2276
+ let lastExitSignal;
1761
2277
  this.checkAborted();
1762
2278
  // Mark step as running
1763
2279
  state.row.status = 'running';
2280
+ state.row.error = undefined;
2281
+ state.row.completionReason = undefined;
1764
2282
  state.row.startedAt = new Date().toISOString();
1765
2283
  await this.db.updateStep(state.row.id, {
1766
2284
  status: 'running',
2285
+ error: undefined,
2286
+ completionReason: undefined,
1767
2287
  startedAt: state.row.startedAt,
1768
2288
  updatedAt: new Date().toISOString(),
1769
2289
  });
@@ -1781,6 +2301,7 @@ export class WorkflowRunner {
1781
2301
  const createBranch = step.createBranch !== false;
1782
2302
  // Resolve workdir for worktree steps (same as deterministic/agent steps)
1783
2303
  const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
2304
+ this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
1784
2305
  if (!branch) {
1785
2306
  const errorMsg = 'Worktree step missing required "branch" field';
1786
2307
  await this.markStepFailed(state, errorMsg, runId);
@@ -1821,6 +2342,10 @@ export class WorkflowRunner {
1821
2342
  await this.markStepFailed(state, errorMsg, runId);
1822
2343
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1823
2344
  }
2345
+ let commandStdout = '';
2346
+ let commandStderr = '';
2347
+ let commandExitCode;
2348
+ let commandExitSignal;
1824
2349
  const output = await new Promise((resolve, reject) => {
1825
2350
  const child = cpSpawn('sh', ['-c', worktreeCmd], {
1826
2351
  stdio: 'pipe',
@@ -1855,7 +2380,7 @@ export class WorkflowRunner {
1855
2380
  child.stderr?.on('data', (chunk) => {
1856
2381
  stderrChunks.push(chunk.toString());
1857
2382
  });
1858
- child.on('close', (code) => {
2383
+ child.on('close', (code, signal) => {
1859
2384
  if (timer)
1860
2385
  clearTimeout(timer);
1861
2386
  if (abortHandler && abortSignal) {
@@ -1869,7 +2394,13 @@ export class WorkflowRunner {
1869
2394
  reject(new Error(`Step "${step.name}" timed out (no step timeout set, check global swarm.timeoutMs)`));
1870
2395
  return;
1871
2396
  }
2397
+ commandStdout = stdoutChunks.join('');
1872
2398
  const stderr = stderrChunks.join('');
2399
+ commandStderr = stderr;
2400
+ commandExitCode = code ?? undefined;
2401
+ commandExitSignal = signal ?? undefined;
2402
+ lastExitCode = commandExitCode;
2403
+ lastExitSignal = commandExitSignal;
1873
2404
  if (code !== 0 && code !== null) {
1874
2405
  reject(new Error(`git worktree add failed with exit code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
1875
2406
  return;
@@ -1886,6 +2417,11 @@ export class WorkflowRunner {
1886
2417
  reject(new Error(`Failed to execute git worktree command: ${err.message}`));
1887
2418
  });
1888
2419
  });
2420
+ this.captureStepTerminalEvidence(step.name, {
2421
+ stdout: commandStdout || output,
2422
+ stderr: commandStderr,
2423
+ combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
2424
+ }, { exitCode: commandExitCode, exitSignal: commandExitSignal });
1889
2425
  // Mark completed
1890
2426
  state.row.status = 'completed';
1891
2427
  state.row.output = output;
@@ -1900,11 +2436,20 @@ export class WorkflowRunner {
1900
2436
  await this.persistStepOutput(runId, step.name, output);
1901
2437
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
1902
2438
  this.postToChannel(`**[${step.name}]** Worktree created at: ${output}\n Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`);
2439
+ this.recordStepToolSideEffect(step.name, {
2440
+ type: 'worktree_created',
2441
+ detail: `Worktree created at ${output}`,
2442
+ raw: { branch, createdBranch: !branchExists && createBranch },
2443
+ });
2444
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt);
1903
2445
  }
1904
2446
  catch (err) {
1905
2447
  const errorMsg = err instanceof Error ? err.message : String(err);
1906
2448
  this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
1907
- await this.markStepFailed(state, errorMsg, runId);
2449
+ await this.markStepFailed(state, errorMsg, runId, {
2450
+ exitCode: lastExitCode,
2451
+ exitSignal: lastExitSignal,
2452
+ });
1908
2453
  throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1909
2454
  }
1910
2455
  }
@@ -1925,8 +2470,13 @@ export class WorkflowRunner {
1925
2470
  }
1926
2471
  const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
1927
2472
  const usesOwnerFlow = specialistDef.interactive !== false;
1928
- const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
1929
- const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
2473
+ const currentPattern = this.currentConfig?.swarm?.pattern ?? '';
2474
+ const isHubPattern = WorkflowRunner.HUB_PATTERNS.has(currentPattern);
2475
+ const usesAutoHardening = usesOwnerFlow && isHubPattern && !this.isExplicitInteractiveWorker(specialistDef);
2476
+ const ownerDef = usesAutoHardening ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
2477
+ // Reviewer resolution is deferred to just before the review gate runs (see below)
2478
+ // so that activeReviewers is up-to-date for concurrent steps.
2479
+ let reviewDef;
1930
2480
  const supervised = {
1931
2481
  specialist: specialistDef,
1932
2482
  owner: ownerDef,
@@ -1946,6 +2496,12 @@ export class WorkflowRunner {
1946
2496
  let lastError;
1947
2497
  let lastExitCode;
1948
2498
  let lastExitSignal;
2499
+ let lastCompletionReason;
2500
+ // OWNER_DECISION: INCOMPLETE_RETRY is enforced here at the attempt-loop level so every
2501
+ // interactive execution path shares the same contract:
2502
+ // - retries remaining => throw back into the loop and retry
2503
+ // - maxRetries = 0 => fail immediately after the first retry request
2504
+ // - retry budget exhausted => fail with retry_requested_by_owner, never "completed"
1949
2505
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
1950
2506
  this.checkAborted();
1951
2507
  // Reset per-attempt exit info so stale values don't leak across retries
@@ -1954,6 +2510,11 @@ export class WorkflowRunner {
1954
2510
  if (attempt > 0) {
1955
2511
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
1956
2512
  this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
2513
+ this.recordStepToolSideEffect(step.name, {
2514
+ type: 'retry',
2515
+ detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
2516
+ raw: { attempt, maxRetries },
2517
+ });
1957
2518
  state.row.retryCount = attempt;
1958
2519
  await this.db.updateStep(state.row.id, {
1959
2520
  retryCount: attempt,
@@ -1965,14 +2526,19 @@ export class WorkflowRunner {
1965
2526
  try {
1966
2527
  // Mark step as running
1967
2528
  state.row.status = 'running';
2529
+ state.row.error = undefined;
2530
+ state.row.completionReason = undefined;
1968
2531
  state.row.startedAt = new Date().toISOString();
1969
2532
  await this.db.updateStep(state.row.id, {
1970
2533
  status: 'running',
2534
+ error: undefined,
2535
+ completionReason: undefined,
1971
2536
  startedAt: state.row.startedAt,
1972
2537
  updatedAt: new Date().toISOString(),
1973
2538
  });
1974
2539
  this.emit({ type: 'step:started', runId, stepName: step.name });
1975
- this.postToChannel(`**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
2540
+ this.log(`[${step.name}] Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
2541
+ this.initializeStepSignalParticipants(step.name, ownerDef.name, specialistDef.name);
1976
2542
  await this.trajectory?.stepStarted(step, ownerDef.name, {
1977
2543
  role: usesDedicatedOwner ? 'owner' : 'specialist',
1978
2544
  owner: ownerDef.name,
@@ -2021,63 +2587,154 @@ export class WorkflowRunner {
2021
2587
  };
2022
2588
  const effectiveSpecialist = applyStepWorkdir(specialistDef);
2023
2589
  const effectiveOwner = applyStepWorkdir(ownerDef);
2590
+ const effectiveReviewer = reviewDef ? applyStepWorkdir(reviewDef) : undefined;
2591
+ this.beginStepEvidence(step.name, [
2592
+ this.resolveAgentCwd(effectiveSpecialist),
2593
+ this.resolveAgentCwd(effectiveOwner),
2594
+ effectiveReviewer ? this.resolveAgentCwd(effectiveReviewer) : undefined,
2595
+ ], state.row.startedAt);
2024
2596
  let specialistOutput;
2025
2597
  let ownerOutput;
2026
2598
  let ownerElapsed;
2599
+ let completionReason;
2027
2600
  if (usesDedicatedOwner) {
2028
2601
  const result = await this.executeSupervisedAgentStep(step, { specialist: effectiveSpecialist, owner: effectiveOwner, reviewer: reviewDef }, resolvedTask, timeoutMs);
2029
2602
  specialistOutput = result.specialistOutput;
2030
2603
  ownerOutput = result.ownerOutput;
2031
2604
  ownerElapsed = result.ownerElapsed;
2605
+ completionReason = result.completionReason;
2032
2606
  }
2033
2607
  else {
2034
2608
  const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
2609
+ const explicitInteractiveWorker = this.isExplicitInteractiveWorker(effectiveOwner);
2610
+ let explicitWorkerHandle;
2611
+ let explicitWorkerCompleted = false;
2612
+ let explicitWorkerOutput = '';
2035
2613
  this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
2036
2614
  const resolvedStep = { ...step, task: ownerTask };
2037
2615
  const ownerStartTime = Date.now();
2038
2616
  const spawnResult = this.executor
2039
2617
  ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
2040
- : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
2618
+ : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, {
2619
+ evidenceStepName: step.name,
2620
+ evidenceRole: usesOwnerFlow ? 'owner' : 'specialist',
2621
+ preserveOnIdle: (!isHubPattern || !this.isLeadLikeAgent(effectiveOwner)) ? false : undefined,
2622
+ logicalName: effectiveOwner.name,
2623
+ onSpawned: explicitInteractiveWorker
2624
+ ? ({ agent }) => {
2625
+ explicitWorkerHandle = agent;
2626
+ }
2627
+ : undefined,
2628
+ onChunk: explicitInteractiveWorker
2629
+ ? ({ chunk }) => {
2630
+ explicitWorkerOutput += WorkflowRunner.stripAnsi(chunk);
2631
+ if (!explicitWorkerCompleted &&
2632
+ this.hasExplicitInteractiveWorkerCompletionEvidence(step, explicitWorkerOutput, ownerTask, resolvedTask)) {
2633
+ explicitWorkerCompleted = true;
2634
+ void explicitWorkerHandle?.release().catch(() => undefined);
2635
+ }
2636
+ }
2637
+ : undefined,
2638
+ });
2041
2639
  const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
2042
2640
  lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
2043
2641
  lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
2044
2642
  ownerElapsed = Date.now() - ownerStartTime;
2045
2643
  this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
2046
2644
  if (usesOwnerFlow) {
2047
- this.assertOwnerCompletionMarker(step, output, ownerTask);
2645
+ try {
2646
+ const completionDecision = this.resolveOwnerCompletionDecision(step, output, output, ownerTask, resolvedTask);
2647
+ completionReason = completionDecision.completionReason;
2648
+ }
2649
+ catch (error) {
2650
+ const canUseVerificationFallback = !usesDedicatedOwner &&
2651
+ step.verification &&
2652
+ error instanceof WorkflowCompletionError &&
2653
+ error.completionReason === 'failed_no_evidence';
2654
+ if (!canUseVerificationFallback) {
2655
+ throw error;
2656
+ }
2657
+ }
2048
2658
  }
2049
2659
  specialistOutput = output;
2050
2660
  ownerOutput = output;
2051
2661
  }
2052
- // Run verification if configured
2053
- if (step.verification) {
2054
- this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
2662
+ // Even non-interactive steps can emit an explicit OWNER_DECISION contract.
2663
+ // Honor retry/fail/clarification signals before verification-driven success so
2664
+ // real runs stay consistent with interactive owner flows.
2665
+ if (!usesOwnerFlow) {
2666
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
2667
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
2668
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
2669
+ }
2670
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
2671
+ throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
2672
+ }
2673
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
2674
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
2675
+ }
2676
+ }
2677
+ // Run verification if configured.
2678
+ // Self-owned interactive steps still need verification fallback so
2679
+ // explicit OWNER_DECISION output is not mandatory for the happy path.
2680
+ if (step.verification && (!usesOwnerFlow || !usesDedicatedOwner) && !completionReason) {
2681
+ const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
2682
+ completionReason = verificationResult.completionReason;
2683
+ }
2684
+ // Retry-style owner decisions are control-flow signals, not terminal success states.
2685
+ // Guard here so they cannot accidentally fall through into review or completed-step
2686
+ // persistence if a future branch returns a completionReason instead of throwing.
2687
+ if (completionReason === 'retry_requested_by_owner') {
2688
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested another attempt`, 'retry_requested_by_owner');
2055
2689
  }
2056
2690
  // Every interactive step gets a review pass; pick a dedicated reviewer when available.
2691
+ // Resolve reviewer JIT so activeReviewers reflects concurrent steps that started earlier.
2692
+ if (usesAutoHardening && usesDedicatedOwner && !reviewDef) {
2693
+ reviewDef = this.resolveAutoReviewAgent(ownerDef, agentMap);
2694
+ supervised.reviewer = reviewDef;
2695
+ }
2057
2696
  let combinedOutput = specialistOutput;
2058
2697
  if (usesOwnerFlow && reviewDef) {
2059
- const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
2060
- const reviewOutput = await this.runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewDef, remainingMs);
2061
- combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
2698
+ this.activeReviewers.set(reviewDef.name, (this.activeReviewers.get(reviewDef.name) ?? 0) + 1);
2699
+ try {
2700
+ const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
2701
+ const reviewOutput = await this.runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewDef, remainingMs);
2702
+ combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
2703
+ }
2704
+ finally {
2705
+ const count = (this.activeReviewers.get(reviewDef.name) ?? 1) - 1;
2706
+ if (count <= 0)
2707
+ this.activeReviewers.delete(reviewDef.name);
2708
+ else
2709
+ this.activeReviewers.set(reviewDef.name, count);
2710
+ }
2062
2711
  }
2063
2712
  // Mark completed
2064
2713
  state.row.status = 'completed';
2065
2714
  state.row.output = combinedOutput;
2715
+ state.row.completionReason = completionReason;
2066
2716
  state.row.completedAt = new Date().toISOString();
2067
2717
  await this.db.updateStep(state.row.id, {
2068
2718
  status: 'completed',
2069
2719
  output: combinedOutput,
2720
+ completionReason,
2070
2721
  completedAt: state.row.completedAt,
2071
2722
  updatedAt: new Date().toISOString(),
2072
2723
  });
2073
2724
  // Persist step output to disk so it survives restarts and is inspectable
2074
2725
  await this.persistStepOutput(runId, step.name, combinedOutput);
2075
2726
  this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput, exitCode: lastExitCode, exitSignal: lastExitSignal });
2727
+ this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, completionReason);
2076
2728
  await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
2077
2729
  return;
2078
2730
  }
2079
2731
  catch (err) {
2080
2732
  lastError = err instanceof Error ? err.message : String(err);
2733
+ lastCompletionReason =
2734
+ err instanceof WorkflowCompletionError ? err.completionReason : undefined;
2735
+ if (lastCompletionReason === 'retry_requested_by_owner' && attempt >= maxRetries) {
2736
+ lastError = this.buildOwnerRetryBudgetExceededMessage(step.name, maxRetries, lastError);
2737
+ }
2081
2738
  if (err instanceof SpawnExitError) {
2082
2739
  lastExitCode = err.exitCode;
2083
2740
  lastExitSignal = err.exitSignal;
@@ -2104,9 +2761,27 @@ export class WorkflowRunner {
2104
2761
  await this.markStepFailed(state, lastError ?? 'Unknown error', runId, {
2105
2762
  exitCode: lastExitCode,
2106
2763
  exitSignal: lastExitSignal,
2107
- });
2764
+ }, lastCompletionReason);
2108
2765
  throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
2109
2766
  }
2767
+ buildOwnerRetryBudgetExceededMessage(stepName, maxRetries, ownerDecisionError) {
2768
+ const attempts = maxRetries + 1;
2769
+ const prefix = `Step "${stepName}" `;
2770
+ const normalizedDecision = ownerDecisionError?.startsWith(prefix)
2771
+ ? ownerDecisionError.slice(prefix.length).trim()
2772
+ : ownerDecisionError?.trim();
2773
+ const decisionSuffix = normalizedDecision
2774
+ ? ` Latest owner decision: ${normalizedDecision}`
2775
+ : '';
2776
+ if (maxRetries === 0) {
2777
+ return (`Step "${stepName}" owner requested another attempt, but no retries are configured ` +
2778
+ `(maxRetries=0). Configure retries > 0 to allow OWNER_DECISION: INCOMPLETE_RETRY.` +
2779
+ decisionSuffix);
2780
+ }
2781
+ return (`Step "${stepName}" owner requested another attempt after ${attempts} total attempts, ` +
2782
+ `but the retry budget is exhausted (maxRetries=${maxRetries}).` +
2783
+ decisionSuffix);
2784
+ }
2110
2785
  injectStepOwnerContract(step, resolvedTask, ownerDef, specialistDef) {
2111
2786
  if (ownerDef.interactive === false)
2112
2787
  return resolvedTask;
@@ -2119,12 +2794,19 @@ export class WorkflowRunner {
2119
2794
  `- You are the accountable owner for step "${step.name}".\n` +
2120
2795
  (specialistNote ? `- ${specialistNote}\n` : '') +
2121
2796
  `- If you delegate, you must still verify completion yourself.\n` +
2122
- `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
2797
+ `- Preferred final decision format:\n` +
2798
+ ` OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
2799
+ ` REASON: <one sentence>\n` +
2800
+ `- Legacy completion marker still supported: STEP_COMPLETE:${step.name}\n` +
2123
2801
  `- Then self-terminate immediately with /exit.`);
2124
2802
  }
2125
2803
  buildOwnerSupervisorTask(step, originalTask, supervised, workerRuntimeName) {
2126
2804
  const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
2127
2805
  const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
2806
+ const channelContract = this.channel
2807
+ ? `- Prefer Relaycast/group-chat handoff signals over terminal sentinels: wait for the worker to post \`WORKER_DONE: <brief summary>\` in ${channelLine}\n` +
2808
+ `- When you have validated the handoff, post \`LEAD_DONE: <brief summary>\` to ${channelLine} before you exit\n`
2809
+ : '';
2128
2810
  return (`You are the step owner/supervisor for step "${step.name}".\n\n` +
2129
2811
  `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
2130
2812
  `Task: ${originalTask}\n\n` +
@@ -2133,9 +2815,22 @@ export class WorkflowRunner {
2133
2815
  `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
2134
2816
  `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
2135
2817
  `- Ask the worker directly on ${channelLine} if you need a status update\n` +
2818
+ channelContract +
2136
2819
  verificationGuide +
2137
- `\nWhen you're satisfied the work is done correctly:\n` +
2138
- `Output exactly: STEP_COMPLETE:${step.name}`);
2820
+ `\nWhen you have enough evidence, return:\n` +
2821
+ `OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
2822
+ `REASON: <one sentence>\n` +
2823
+ `Legacy completion marker still supported: STEP_COMPLETE:${step.name}`);
2824
+ }
2825
+ buildWorkerHandoffTask(step, originalTask, supervised) {
2826
+ if (!this.channel)
2827
+ return originalTask;
2828
+ return (`${originalTask}\n\n---\n` +
2829
+ `WORKER COMPLETION CONTRACT:\n` +
2830
+ `- You are handing work off to owner "${supervised.owner.name}" for step "${step.name}".\n` +
2831
+ `- When your work is ready for review, post to #${this.channel}: \`WORKER_DONE: <brief summary>\`\n` +
2832
+ `- Do not rely on terminal output alone for handoff; use the workflow group chat signal above.\n` +
2833
+ `- After posting your handoff signal, self-terminate with /exit unless the owner asks for follow-up.`);
2139
2834
  }
2140
2835
  buildSupervisorVerificationGuide(verification) {
2141
2836
  if (!verification)
@@ -2155,8 +2850,9 @@ export class WorkflowRunner {
2155
2850
  }
2156
2851
  async executeSupervisedAgentStep(step, supervised, resolvedTask, timeoutMs) {
2157
2852
  if (this.executor) {
2853
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
2158
2854
  const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, supervised.specialist.name);
2159
- const specialistStep = { ...step, task: resolvedTask };
2855
+ const specialistStep = { ...step, task: specialistTask };
2160
2856
  const ownerStep = {
2161
2857
  ...step,
2162
2858
  name: `${step.name}-owner`,
@@ -2164,16 +2860,21 @@ export class WorkflowRunner {
2164
2860
  task: supervisorTask,
2165
2861
  };
2166
2862
  this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" and owner "${supervised.owner.name}"`);
2167
- const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, resolvedTask, timeoutMs);
2863
+ const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, specialistTask, timeoutMs);
2168
2864
  // Guard against unhandled rejection if owner fails before specialist settles
2169
2865
  const specialistSettled = specialistPromise.catch(() => undefined);
2170
2866
  try {
2171
2867
  const ownerStartTime = Date.now();
2172
2868
  const ownerOutput = await this.executor.executeAgentStep(ownerStep, supervised.owner, supervisorTask, timeoutMs);
2173
2869
  const ownerElapsed = Date.now() - ownerStartTime;
2174
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2175
2870
  const specialistOutput = await specialistPromise;
2176
- return { specialistOutput, ownerOutput, ownerElapsed };
2871
+ const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
2872
+ return {
2873
+ specialistOutput,
2874
+ ownerOutput,
2875
+ ownerElapsed,
2876
+ completionReason: completionDecision.completionReason,
2877
+ };
2177
2878
  }
2178
2879
  catch (error) {
2179
2880
  await specialistSettled;
@@ -2190,10 +2891,14 @@ export class WorkflowRunner {
2190
2891
  resolveWorkerSpawn = resolve;
2191
2892
  rejectWorkerSpawn = reject;
2192
2893
  });
2193
- const specialistStep = { ...step, task: resolvedTask };
2894
+ const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
2895
+ const specialistStep = { ...step, task: specialistTask };
2194
2896
  this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`);
2195
2897
  const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
2196
2898
  agentNameSuffix: 'worker',
2899
+ evidenceStepName: step.name,
2900
+ evidenceRole: 'worker',
2901
+ logicalName: supervised.specialist.name,
2197
2902
  onSpawned: ({ actualName, agent }) => {
2198
2903
  workerHandle = agent;
2199
2904
  workerRuntimeName = actualName;
@@ -2208,7 +2913,7 @@ export class WorkflowRunner {
2208
2913
  }
2209
2914
  },
2210
2915
  onChunk: ({ agentName, chunk }) => {
2211
- this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
2916
+ this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk, supervised.specialist.name);
2212
2917
  },
2213
2918
  }).catch((error) => {
2214
2919
  if (!workerSpawned) {
@@ -2221,14 +2926,24 @@ export class WorkflowRunner {
2221
2926
  workerPromise
2222
2927
  .then((result) => {
2223
2928
  workerReleased = true;
2224
- this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
2929
+ this.log(`[${step.name}] Worker ${workerRuntimeName} exited`);
2930
+ this.recordStepToolSideEffect(step.name, {
2931
+ type: 'worker_exit',
2932
+ detail: `Worker ${workerRuntimeName} exited`,
2933
+ raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
2934
+ });
2225
2935
  if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
2226
- this.postToChannel(`**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
2936
+ this.log(`[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
2227
2937
  }
2228
2938
  })
2229
2939
  .catch((error) => {
2230
2940
  const message = error instanceof Error ? error.message : String(error);
2231
2941
  this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`);
2942
+ this.recordStepToolSideEffect(step.name, {
2943
+ type: 'worker_error',
2944
+ detail: `Worker ${workerRuntimeName} exited with error: ${message}`,
2945
+ raw: { worker: workerRuntimeName, error: message },
2946
+ });
2232
2947
  });
2233
2948
  await workerReady;
2234
2949
  const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, workerRuntimeName);
@@ -2243,6 +2958,9 @@ export class WorkflowRunner {
2243
2958
  try {
2244
2959
  const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
2245
2960
  agentNameSuffix: 'owner',
2961
+ evidenceStepName: step.name,
2962
+ evidenceRole: 'owner',
2963
+ logicalName: supervised.owner.name,
2246
2964
  onSpawned: ({ actualName }) => {
2247
2965
  this.supervisedRuntimeAgents.set(actualName, {
2248
2966
  stepName: step.name,
@@ -2257,9 +2975,14 @@ export class WorkflowRunner {
2257
2975
  const ownerElapsed = Date.now() - ownerStartTime;
2258
2976
  const ownerOutput = ownerResultObj.output;
2259
2977
  this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
2260
- this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2261
2978
  const specialistOutput = (await workerPromise).output;
2262
- return { specialistOutput, ownerOutput, ownerElapsed };
2979
+ const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
2980
+ return {
2981
+ specialistOutput,
2982
+ ownerOutput,
2983
+ ownerElapsed,
2984
+ completionReason: completionDecision.completionReason,
2985
+ };
2263
2986
  }
2264
2987
  catch (error) {
2265
2988
  const message = error instanceof Error ? error.message : String(error);
@@ -2273,14 +2996,20 @@ export class WorkflowRunner {
2273
2996
  throw error;
2274
2997
  }
2275
2998
  }
2276
- forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk) {
2277
- const lines = WorkflowRunner.stripAnsi(chunk)
2999
+ forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk, sender) {
3000
+ const lines = WorkflowRunner.scrubForChannel(chunk)
2278
3001
  .split('\n')
2279
3002
  .map((line) => line.trim())
2280
3003
  .filter(Boolean)
2281
3004
  .slice(0, 3);
2282
3005
  for (const line of lines) {
2283
- this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
3006
+ this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`, {
3007
+ stepName,
3008
+ sender,
3009
+ actor: agentName,
3010
+ role: roleLabel,
3011
+ origin: 'forwarded_chunk',
3012
+ });
2284
3013
  }
2285
3014
  }
2286
3015
  async recordOwnerMonitoringChunk(step, ownerDef, chunk) {
@@ -2295,6 +3024,11 @@ export class WorkflowRunner {
2295
3024
  if (/STEP_COMPLETE:/i.test(stripped))
2296
3025
  details.push('Declared the step complete');
2297
3026
  for (const detail of details) {
3027
+ this.recordStepToolSideEffect(step.name, {
3028
+ type: 'owner_monitoring',
3029
+ detail,
3030
+ raw: { output: stripped.slice(0, 240), owner: ownerDef.name },
3031
+ });
2298
3032
  await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
2299
3033
  output: stripped.slice(0, 240),
2300
3034
  });
@@ -2335,6 +3069,7 @@ export class WorkflowRunner {
2335
3069
  }
2336
3070
  resolveAutoReviewAgent(ownerDef, agentMap) {
2337
3071
  const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
3072
+ const eligible = (def) => def.name !== ownerDef.name && !this.isExplicitInteractiveWorker(def);
2338
3073
  const isReviewer = (def) => {
2339
3074
  const roleLC = def.role?.toLowerCase() ?? '';
2340
3075
  const nameLC = def.name.toLowerCase();
@@ -2358,33 +3093,244 @@ export class WorkflowRunner {
2358
3093
  return 2;
2359
3094
  return isReviewer(def) ? 1 : 0;
2360
3095
  };
2361
- const dedicated = allDefs
2362
- .filter((d) => d.name !== ownerDef.name && isReviewer(d))
2363
- .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
3096
+ // Prefer agents not currently assigned as reviewers to avoid double-booking
3097
+ const notBusy = (def) => !this.activeReviewers.has(def.name);
3098
+ const dedicatedCandidates = allDefs
3099
+ .filter((d) => eligible(d) && isReviewer(d))
3100
+ .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name));
3101
+ const dedicated = dedicatedCandidates.find(notBusy) ?? dedicatedCandidates[0];
2364
3102
  if (dedicated)
2365
3103
  return dedicated;
2366
- const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
3104
+ const alternateCandidates = allDefs.filter((d) => eligible(d) && d.interactive !== false);
3105
+ const alternate = alternateCandidates.find(notBusy) ?? alternateCandidates[0];
2367
3106
  if (alternate)
2368
3107
  return alternate;
2369
3108
  // Self-review fallback — log a warning since owner reviewing itself is weak.
2370
3109
  return ownerDef;
2371
3110
  }
2372
- assertOwnerCompletionMarker(step, output, injectedTaskText) {
3111
+ isExplicitInteractiveWorker(agentDef) {
3112
+ return agentDef.preset === 'worker' && agentDef.interactive !== false;
3113
+ }
3114
+ resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, injectedTaskText, verificationTaskText) {
3115
+ const hasMarker = this.hasOwnerCompletionMarker(step, ownerOutput, injectedTaskText);
3116
+ const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
3117
+ // INCOMPLETE_RETRY / NEEDS_CLARIFICATION are non-terminal owner outcomes. They never mark
3118
+ // the step complete here; instead they throw back to executeAgentStep(), which decides
3119
+ // whether to retry or fail based on the remaining retry budget for this step.
3120
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
3121
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
3122
+ }
3123
+ if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
3124
+ throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
3125
+ }
3126
+ if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
3127
+ throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
3128
+ }
3129
+ const verificationResult = step.verification
3130
+ ? this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, {
3131
+ allowFailure: true,
3132
+ completionMarkerFound: hasMarker,
3133
+ })
3134
+ : { passed: false };
3135
+ if (verificationResult.error) {
3136
+ throw new WorkflowCompletionError(`Step "${step.name}" verification failed and no owner decision or evidence established completion: ${verificationResult.error}`, 'failed_verification');
3137
+ }
3138
+ if (explicitOwnerDecision?.decision === 'COMPLETE') {
3139
+ if (!hasMarker) {
3140
+ this.log(`[${step.name}] Structured OWNER_DECISION completed the step without legacy STEP_COMPLETE marker`);
3141
+ }
3142
+ return {
3143
+ completionReason: 'completed_by_owner_decision',
3144
+ ownerDecision: explicitOwnerDecision.decision,
3145
+ reason: explicitOwnerDecision.reason,
3146
+ };
3147
+ }
3148
+ if (verificationResult.passed) {
3149
+ return { completionReason: 'completed_verified' };
3150
+ }
3151
+ const ownerDecision = this.parseOwnerDecision(step, ownerOutput, hasMarker);
3152
+ if (ownerDecision?.decision === 'COMPLETE') {
3153
+ return {
3154
+ completionReason: 'completed_by_owner_decision',
3155
+ ownerDecision: ownerDecision.decision,
3156
+ reason: ownerDecision.reason,
3157
+ };
3158
+ }
3159
+ if (!explicitOwnerDecision) {
3160
+ const evidenceReason = this.judgeOwnerCompletionByEvidence(step.name, ownerOutput);
3161
+ if (evidenceReason) {
3162
+ if (!hasMarker) {
3163
+ this.log(`[${step.name}] Evidence-based completion resolved without legacy STEP_COMPLETE marker`);
3164
+ }
3165
+ return {
3166
+ completionReason: 'completed_by_evidence',
3167
+ reason: evidenceReason,
3168
+ };
3169
+ }
3170
+ }
3171
+ // Process-exit fallback: if the agent exited cleanly (code 0) and verification
3172
+ // passes (or no verification is configured), infer completion rather than failing.
3173
+ // This reduces dependence on agents posting exact coordination signals.
3174
+ const processExitFallback = this.tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput);
3175
+ if (processExitFallback) {
3176
+ this.log(`[${step.name}] Completion inferred from clean process exit (code 0)` +
3177
+ (step.verification ? ' + verification passed' : '') +
3178
+ ' — no coordination signal was required');
3179
+ return processExitFallback;
3180
+ }
3181
+ throw new WorkflowCompletionError(`Step "${step.name}" owner completion decision missing: no OWNER_DECISION, legacy STEP_COMPLETE marker, or evidence-backed completion signal`, 'failed_no_evidence');
3182
+ }
3183
+ hasExplicitInteractiveWorkerCompletionEvidence(step, output, injectedTaskText, verificationTaskText) {
3184
+ try {
3185
+ this.resolveOwnerCompletionDecision(step, output, output, injectedTaskText, verificationTaskText);
3186
+ return true;
3187
+ }
3188
+ catch {
3189
+ return false;
3190
+ }
3191
+ }
3192
+ hasOwnerCompletionMarker(step, output, injectedTaskText) {
2373
3193
  const marker = `STEP_COMPLETE:${step.name}`;
2374
3194
  const taskHasMarker = injectedTaskText.includes(marker);
2375
3195
  const first = output.indexOf(marker);
2376
3196
  if (first === -1) {
2377
- throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
2378
- }
2379
- // PTY output includes injected task text, so require a second marker occurrence
2380
- // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
2381
- const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
3197
+ return false;
3198
+ }
3199
+ // PTY output often includes echoed prompt text, so when the injected task
3200
+ // itself contains the legacy marker require a second occurrence from the
3201
+ // agent response.
3202
+ const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') ||
3203
+ output.includes('Preferred final decision format') ||
3204
+ output.includes('Legacy completion marker still supported') ||
3205
+ output.includes('Output exactly: STEP_COMPLETE:');
2382
3206
  if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
2383
- const hasSecond = output.includes(marker, first + marker.length);
2384
- if (!hasSecond) {
2385
- throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
2386
- }
3207
+ return output.includes(marker, first + marker.length);
2387
3208
  }
3209
+ return true;
3210
+ }
3211
+ parseOwnerDecision(step, ownerOutput, hasMarker) {
3212
+ const decisionPattern = /OWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi;
3213
+ const decisionMatches = [...ownerOutput.matchAll(decisionPattern)];
3214
+ const outputLikelyContainsEchoedPrompt = ownerOutput.includes('STEP OWNER CONTRACT') ||
3215
+ ownerOutput.includes('Preferred final decision format') ||
3216
+ ownerOutput.includes('one of COMPLETE, INCOMPLETE_RETRY') ||
3217
+ ownerOutput.includes('COMPLETE|INCOMPLETE_RETRY');
3218
+ if (decisionMatches.length === 0) {
3219
+ if (!hasMarker)
3220
+ return null;
3221
+ return {
3222
+ decision: 'COMPLETE',
3223
+ reason: `Legacy completion marker observed: STEP_COMPLETE:${step.name}`,
3224
+ };
3225
+ }
3226
+ // Filter out matches that appear on a template/instruction line (e.g.
3227
+ // "COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION") to avoid
3228
+ // picking up the template format as the agent's actual decision.
3229
+ const realMatches = outputLikelyContainsEchoedPrompt
3230
+ ? decisionMatches.filter((m) => {
3231
+ const lineStart = ownerOutput.lastIndexOf('\n', m.index) + 1;
3232
+ const lineEnd = ownerOutput.indexOf('\n', m.index);
3233
+ const line = ownerOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
3234
+ return !line.includes('COMPLETE|INCOMPLETE_RETRY');
3235
+ })
3236
+ : decisionMatches;
3237
+ const decisionMatch = realMatches.length > 0
3238
+ ? realMatches[realMatches.length - 1]
3239
+ : decisionMatches[decisionMatches.length - 1];
3240
+ const decision = decisionMatch?.[1]?.toUpperCase();
3241
+ if (decision !== 'COMPLETE' &&
3242
+ decision !== 'INCOMPLETE_RETRY' &&
3243
+ decision !== 'INCOMPLETE_FAIL' &&
3244
+ decision !== 'NEEDS_CLARIFICATION') {
3245
+ return null;
3246
+ }
3247
+ const reasonPattern = /(?:^|\n)REASON:\s*(.+)/gi;
3248
+ const reasonMatches = [...ownerOutput.matchAll(reasonPattern)];
3249
+ const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
3250
+ ? reasonMatches[reasonMatches.length - 1]
3251
+ : reasonMatches[0];
3252
+ const reason = reasonMatch?.[1]?.trim();
3253
+ return {
3254
+ decision,
3255
+ reason: reason && reason !== '<one sentence>' ? reason : undefined,
3256
+ };
3257
+ }
3258
+ stripEchoedPromptLines(output, patterns) {
3259
+ return output
3260
+ .split('\n')
3261
+ .map((line) => line.trim())
3262
+ .filter(Boolean)
3263
+ .filter((line) => patterns.every((pattern) => !pattern.test(line)))
3264
+ .join('\n');
3265
+ }
3266
+ firstMeaningfulLine(output) {
3267
+ return output
3268
+ .split('\n')
3269
+ .map((line) => line.trim())
3270
+ .find(Boolean);
3271
+ }
3272
+ judgeOwnerCompletionByEvidence(stepName, ownerOutput) {
3273
+ // Never infer completion when the raw output contains an explicit retry/fail/clarification signal.
3274
+ if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
3275
+ return null;
3276
+ }
3277
+ const sanitized = this.stripEchoedPromptLines(ownerOutput, [
3278
+ /^STEP OWNER CONTRACT:?$/i,
3279
+ /^Preferred final decision format:?$/i,
3280
+ /^OWNER_DECISION:\s*(?:COMPLETE\|INCOMPLETE_RETRY|<one of COMPLETE, INCOMPLETE_RETRY)/i,
3281
+ /^REASON:\s*<one sentence>$/i,
3282
+ /^Legacy completion marker still supported:/i,
3283
+ /^STEP_COMPLETE:/i,
3284
+ ]);
3285
+ if (!sanitized)
3286
+ return null;
3287
+ const hasExplicitSelfRelease = /Calling\s+(?:[\w.-]+\.)?remove_agent\(\{[^<\n]*"reason":"task completed"/i.test(sanitized);
3288
+ const hasPositiveConclusion = /\b(complete(?:d)?|done|verified|looks correct|safe handoff|artifact verified)\b/i.test(sanitized) ||
3289
+ /\bartifacts?\b.*\b(correct|verified|complete)\b/i.test(sanitized) ||
3290
+ hasExplicitSelfRelease;
3291
+ const evidence = this.getStepCompletionEvidence(stepName);
3292
+ const hasValidatedCoordinationSignal = evidence?.coordinationSignals.some((signal) => signal.kind === 'worker_done' ||
3293
+ signal.kind === 'lead_done' ||
3294
+ signal.kind === 'verification_passed' ||
3295
+ (signal.kind === 'process_exit' && signal.value === '0')) ?? false;
3296
+ const hasValidatedInspectionSignal = evidence?.toolSideEffects.some((effect) => effect.type === 'owner_monitoring' &&
3297
+ (/Checked git diff stats/i.test(effect.detail) ||
3298
+ /Listed files for verification/i.test(effect.detail))) ?? false;
3299
+ const hasEvidenceSignal = hasValidatedCoordinationSignal || hasValidatedInspectionSignal;
3300
+ if (!hasPositiveConclusion || !hasEvidenceSignal) {
3301
+ return null;
3302
+ }
3303
+ return this.firstMeaningfulLine(sanitized) ?? 'Evidence-backed completion';
3304
+ }
3305
+ /**
3306
+ * Process-exit fallback: when agent exits with code 0 but posts no coordination
3307
+ * signal, check if verification passes (or no verification is configured) and
3308
+ * infer completion. This is the key mechanism for reducing agent compliance
3309
+ * dependence — the runner trusts a clean exit + passing verification over
3310
+ * requiring exact signal text.
3311
+ */
3312
+ tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput) {
3313
+ const gracePeriodMs = this.currentConfig?.swarm.completionGracePeriodMs ?? 5000;
3314
+ if (gracePeriodMs === 0)
3315
+ return null;
3316
+ // Never infer completion when the owner explicitly requested retry/fail/clarification.
3317
+ if (ownerOutput && /OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
3318
+ return null;
3319
+ }
3320
+ const evidence = this.getStepCompletionEvidence(step.name);
3321
+ const hasCleanExit = evidence?.coordinationSignals.some((signal) => signal.kind === 'process_exit' && signal.value === '0') ?? false;
3322
+ if (!hasCleanExit)
3323
+ return null;
3324
+ // If verification is configured, it must pass for the fallback to succeed.
3325
+ if (step.verification) {
3326
+ const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, { allowFailure: true });
3327
+ if (!verificationResult.passed)
3328
+ return null;
3329
+ }
3330
+ return {
3331
+ completionReason: 'completed_by_process_exit',
3332
+ reason: `Process exited with code 0${step.verification ? ' and verification passed' : ''} — coordination signal not required`,
3333
+ };
2388
3334
  }
2389
3335
  async runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewerDef, timeoutMs) {
2390
3336
  const reviewSnippetMax = 12_000;
@@ -2426,7 +3372,17 @@ export class WorkflowRunner {
2426
3372
  };
2427
3373
  await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
2428
3374
  this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
3375
+ this.recordStepToolSideEffect(step.name, {
3376
+ type: 'review_started',
3377
+ detail: `Review started with ${reviewerDef.name}`,
3378
+ raw: { reviewer: reviewerDef.name },
3379
+ });
2429
3380
  const emitReviewCompleted = async (decision, reason) => {
3381
+ this.recordStepToolSideEffect(step.name, {
3382
+ type: 'review_completed',
3383
+ detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`,
3384
+ raw: { reviewer: reviewerDef.name, decision, reason },
3385
+ });
2430
3386
  await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
2431
3387
  this.emit({
2432
3388
  type: 'step:review-completed',
@@ -2470,6 +3426,9 @@ export class WorkflowRunner {
2470
3426
  };
2471
3427
  try {
2472
3428
  await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
3429
+ evidenceStepName: step.name,
3430
+ evidenceRole: 'reviewer',
3431
+ logicalName: reviewerDef.name,
2473
3432
  onSpawned: ({ agent }) => {
2474
3433
  reviewerHandle = agent;
2475
3434
  },
@@ -2507,15 +3466,34 @@ export class WorkflowRunner {
2507
3466
  return reviewOutput;
2508
3467
  }
2509
3468
  parseReviewDecision(reviewOutput) {
3469
+ const strict = this.parseStrictReviewDecision(reviewOutput);
3470
+ if (strict) {
3471
+ return strict;
3472
+ }
3473
+ const tolerant = this.parseTolerantReviewDecision(reviewOutput);
3474
+ if (tolerant) {
3475
+ return tolerant;
3476
+ }
3477
+ return this.judgeReviewDecisionFromEvidence(reviewOutput);
3478
+ }
3479
+ parseStrictReviewDecision(reviewOutput) {
2510
3480
  const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
2511
3481
  const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
2512
3482
  if (decisionMatches.length === 0) {
2513
3483
  return null;
2514
3484
  }
2515
3485
  const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
2516
- const decisionMatch = outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
2517
- ? decisionMatches[decisionMatches.length - 1]
2518
- : decisionMatches[0];
3486
+ const realReviewMatches = outputLikelyContainsEchoedPrompt
3487
+ ? decisionMatches.filter((m) => {
3488
+ const lineStart = reviewOutput.lastIndexOf('\n', m.index) + 1;
3489
+ const lineEnd = reviewOutput.indexOf('\n', m.index);
3490
+ const line = reviewOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
3491
+ return !line.includes('APPROVE or REJECT');
3492
+ })
3493
+ : decisionMatches;
3494
+ const decisionMatch = realReviewMatches.length > 0
3495
+ ? realReviewMatches[realReviewMatches.length - 1]
3496
+ : decisionMatches[decisionMatches.length - 1];
2519
3497
  const decision = decisionMatch?.[1]?.toUpperCase();
2520
3498
  if (decision !== 'APPROVE' && decision !== 'REJECT') {
2521
3499
  return null;
@@ -2531,6 +3509,85 @@ export class WorkflowRunner {
2531
3509
  reason: reason && reason !== '<one sentence>' ? reason : undefined,
2532
3510
  };
2533
3511
  }
3512
+ parseTolerantReviewDecision(reviewOutput) {
3513
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
3514
+ /^Return exactly:?$/i,
3515
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
3516
+ /^REVIEW_REASON:\s*<one sentence>$/i,
3517
+ ]);
3518
+ if (!sanitized) {
3519
+ return null;
3520
+ }
3521
+ const lines = sanitized
3522
+ .split('\n')
3523
+ .map((line) => line.trim())
3524
+ .filter(Boolean);
3525
+ for (const line of lines) {
3526
+ const candidate = line.replace(/^REVIEW_DECISION:\s*/i, '').trim();
3527
+ const decision = this.normalizeReviewDecisionCandidate(candidate);
3528
+ if (decision) {
3529
+ return {
3530
+ decision,
3531
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3532
+ };
3533
+ }
3534
+ }
3535
+ const decision = this.normalizeReviewDecisionCandidate(lines.join(' '));
3536
+ if (!decision) {
3537
+ return null;
3538
+ }
3539
+ return {
3540
+ decision,
3541
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3542
+ };
3543
+ }
3544
+ normalizeReviewDecisionCandidate(candidate) {
3545
+ const value = candidate.trim().toLowerCase();
3546
+ if (!value)
3547
+ return null;
3548
+ if (/^(approve|approved|complete|completed|pass|passed|accept|accepted|lgtm|ship it|looks good|looks fine)\b/i.test(value)) {
3549
+ return 'approved';
3550
+ }
3551
+ if (/^(reject|rejected|retry|retry requested|fail|failed|incomplete|needs clarification|not complete|not ready|insufficient evidence)\b/i.test(value)) {
3552
+ return 'rejected';
3553
+ }
3554
+ return null;
3555
+ }
3556
+ parseReviewReason(reviewOutput) {
3557
+ const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
3558
+ const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
3559
+ const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
3560
+ const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
3561
+ ? reasonMatches[reasonMatches.length - 1]
3562
+ : reasonMatches[0];
3563
+ const reason = reasonMatch?.[1]?.trim();
3564
+ return reason && reason !== '<one sentence>' ? reason : undefined;
3565
+ }
3566
+ judgeReviewDecisionFromEvidence(reviewOutput) {
3567
+ const sanitized = this.stripEchoedPromptLines(reviewOutput, [
3568
+ /^Return exactly:?$/i,
3569
+ /^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
3570
+ /^REVIEW_REASON:\s*<one sentence>$/i,
3571
+ ]);
3572
+ if (!sanitized) {
3573
+ return null;
3574
+ }
3575
+ const hasPositiveEvidence = /\b(approved?|complete(?:d)?|verified|looks good|looks fine|safe handoff|pass(?:ed)?)\b/i.test(sanitized);
3576
+ const hasNegativeEvidence = /\b(reject(?:ed)?|retry|fail(?:ed)?|incomplete|missing checks|insufficient evidence|not safe)\b/i.test(sanitized);
3577
+ if (hasNegativeEvidence) {
3578
+ return {
3579
+ decision: 'rejected',
3580
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3581
+ };
3582
+ }
3583
+ if (!hasPositiveEvidence) {
3584
+ return null;
3585
+ }
3586
+ return {
3587
+ decision: 'approved',
3588
+ reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
3589
+ };
3590
+ }
2534
3591
  combineStepAndReviewOutput(stepOutput, reviewOutput) {
2535
3592
  const primary = stepOutput.trimEnd();
2536
3593
  const review = reviewOutput.trim();
@@ -2601,7 +3658,7 @@ export class WorkflowRunner {
2601
3658
  case 'worker':
2602
3659
  return ('You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
2603
3660
  'Do NOT use mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn sub-agents.\n' +
2604
- 'Do NOT use mcp__relaycast__dm_send or any Relaycast messaging tools — you have no relay connection.\n\n');
3661
+ 'Do NOT use mcp__relaycast__message_dm_send or any Relaycast messaging tools — you have no relay connection.\n\n');
2605
3662
  case 'reviewer':
2606
3663
  return ('You are a non-interactive reviewer agent. Read the specified files/artifacts and produce a clear verdict.\n' +
2607
3664
  'Do NOT spawn sub-agents or use any Relaycast messaging tools.\n\n');
@@ -2759,11 +3816,19 @@ export class WorkflowRunner {
2759
3816
  reject(new Error(`Failed to spawn ${cmd}: ${err.message}`));
2760
3817
  });
2761
3818
  });
3819
+ this.captureStepTerminalEvidence(step.name, {}, { exitCode, exitSignal });
2762
3820
  return { output, exitCode, exitSignal };
2763
3821
  }
2764
3822
  finally {
2765
- const combinedOutput = stdoutChunks.join('') + stderrChunks.join('');
3823
+ const stdout = stdoutChunks.join('');
3824
+ const stderr = stderrChunks.join('');
3825
+ const combinedOutput = stdout + stderr;
2766
3826
  this.lastFailedStepOutput.set(step.name, combinedOutput);
3827
+ this.captureStepTerminalEvidence(step.name, {
3828
+ stdout,
3829
+ stderr,
3830
+ combined: combinedOutput,
3831
+ });
2767
3832
  stopHeartbeat?.();
2768
3833
  logStream.end();
2769
3834
  this.unregisterWorker(agentName);
@@ -2777,6 +3842,7 @@ export class WorkflowRunner {
2777
3842
  if (!this.relay) {
2778
3843
  throw new Error('AgentRelay not initialized');
2779
3844
  }
3845
+ const evidenceStepName = options.evidenceStepName ?? step.name;
2780
3846
  // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
2781
3847
  const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
2782
3848
  let agentName = requestedName;
@@ -2823,11 +3889,17 @@ export class WorkflowRunner {
2823
3889
  let ptyChunks = [];
2824
3890
  try {
2825
3891
  const agentCwd = this.resolveAgentCwd(agentDef);
3892
+ const interactiveSpawnPolicy = resolveSpawnPolicy({
3893
+ AGENT_NAME: agentName,
3894
+ AGENT_CLI: agentDef.cli,
3895
+ RELAY_API_KEY: this.relayApiKey ?? 'workflow-runner',
3896
+ AGENT_CHANNELS: (agentChannels ?? []).join(','),
3897
+ });
2826
3898
  agent = await this.relay.spawnPty({
2827
3899
  name: agentName,
2828
3900
  cli: agentDef.cli,
2829
3901
  model: agentDef.constraints?.model,
2830
- args: [],
3902
+ args: interactiveSpawnPolicy.args,
2831
3903
  channels: agentChannels,
2832
3904
  task: taskWithExit,
2833
3905
  idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
@@ -2859,16 +3931,27 @@ export class WorkflowRunner {
2859
3931
  const oldListener = this.ptyListeners.get(oldName);
2860
3932
  if (oldListener) {
2861
3933
  this.ptyListeners.delete(oldName);
2862
- this.ptyListeners.set(agent.name, (chunk) => {
3934
+ const resolvedAgentName = agent.name;
3935
+ this.ptyListeners.set(resolvedAgentName, (chunk) => {
2863
3936
  const stripped = WorkflowRunner.stripAnsi(chunk);
2864
- this.ptyOutputBuffers.get(agent.name)?.push(stripped);
3937
+ this.ptyOutputBuffers.get(resolvedAgentName)?.push(stripped);
2865
3938
  newLogStream.write(chunk);
2866
- options.onChunk?.({ agentName: agent.name, chunk });
3939
+ options.onChunk?.({ agentName: resolvedAgentName, chunk });
2867
3940
  });
2868
3941
  }
2869
3942
  agentName = agent.name;
2870
3943
  }
2871
- await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
3944
+ const liveAgent = agent;
3945
+ await options.onSpawned?.({ requestedName, actualName: liveAgent.name, agent: liveAgent });
3946
+ this.runtimeStepAgents.set(liveAgent.name, {
3947
+ stepName: evidenceStepName,
3948
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
3949
+ logicalName: options.logicalName ?? agentDef.name,
3950
+ });
3951
+ const signalParticipant = this.resolveSignalParticipantKind(options.evidenceRole ?? agentDef.role ?? 'agent');
3952
+ if (signalParticipant) {
3953
+ this.rememberStepSignalSender(evidenceStepName, signalParticipant, liveAgent.name, options.logicalName ?? agentDef.name);
3954
+ }
2872
3955
  // Register in workers.json so `agents:kill` can find this agent
2873
3956
  let workerPid;
2874
3957
  try {
@@ -2881,8 +3964,8 @@ export class WorkflowRunner {
2881
3964
  this.registerWorker(agentName, agentDef.cli, step.task ?? '', workerPid);
2882
3965
  // Register the spawned agent in Relaycast for observability + start heartbeat
2883
3966
  if (this.relayApiKey) {
2884
- const agentClient = await this.registerRelaycastExternalAgent(agent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
2885
- console.warn(`[WorkflowRunner] Failed to register ${agent.name} in Relaycast:`, err?.message ?? err);
3967
+ const agentClient = await this.registerRelaycastExternalAgent(liveAgent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
3968
+ console.warn(`[WorkflowRunner] Failed to register ${liveAgent.name} in Relaycast:`, err?.message ?? err);
2886
3969
  return null;
2887
3970
  });
2888
3971
  // Keep the agent online in the dashboard while it's working
@@ -2895,30 +3978,30 @@ export class WorkflowRunner {
2895
3978
  const channelAgent = await this.ensureRelaycastRunnerAgent().catch(() => null);
2896
3979
  await channelAgent?.channels.invite(this.channel, agent.name).catch(() => { });
2897
3980
  }
2898
- // Post assignment notification (no task content task arrives via direct broker injection)
2899
- this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\``);
3981
+ // Keep operational assignment chatter out of the agent coordination channel.
3982
+ this.log(`[${step.name}] Assigned to ${agent.name}`);
2900
3983
  // Register agent handle for hub-mediated nudging
2901
3984
  this.activeAgentHandles.set(agentName, agent);
2902
3985
  // Wait for agent to exit, with idle nudging if configured
2903
- exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
3986
+ exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole));
2904
3987
  // Stop heartbeat now that agent has exited
2905
3988
  stopHeartbeat?.();
2906
3989
  if (exitResult === 'timeout') {
2907
- // Safety net: check if the verification file exists before giving up.
2908
- // The agent may have completed work but failed to /exit.
2909
- if (step.verification?.type === 'file_exists') {
2910
- const verifyPath = path.resolve(this.cwd, step.verification.value);
2911
- if (existsSync(verifyPath)) {
2912
- this.postToChannel(`**[${step.name}]** Agent idle after completing work — releasing`);
2913
- await agent.release();
2914
- // Fall through to read output below
2915
- }
2916
- else {
3990
+ // Grace-period fallback: before failing, check if the agent completed
3991
+ // its work but just failed to self-terminate. Run verification if
3992
+ // configured a passing gate + timeout is better than a hard failure.
3993
+ let timeoutRecovered = false;
3994
+ if (step.verification) {
3995
+ const ptyOutput = (this.ptyOutputBuffers.get(agentName) ?? []).join('');
3996
+ const verificationResult = this.runVerification(step.verification, ptyOutput, step.name, undefined, { allowFailure: true });
3997
+ if (verificationResult.passed) {
3998
+ this.log(`[${step.name}] Agent timed out but verification passed — treating as complete`);
3999
+ this.postToChannel(`**[${step.name}]** Agent idle after completing work — verification passed, releasing`);
2917
4000
  await agent.release();
2918
- throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
4001
+ timeoutRecovered = true;
2919
4002
  }
2920
4003
  }
2921
- else {
4004
+ if (!timeoutRecovered) {
2922
4005
  await agent.release();
2923
4006
  throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
2924
4007
  }
@@ -2931,6 +4014,19 @@ export class WorkflowRunner {
2931
4014
  // Snapshot PTY chunks before cleanup — we need them for output reading below
2932
4015
  ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
2933
4016
  this.lastFailedStepOutput.set(step.name, ptyChunks.join(''));
4017
+ if (ptyChunks.length > 0 || agent?.exitCode !== undefined || agent?.exitSignal !== undefined) {
4018
+ this.captureStepTerminalEvidence(evidenceStepName, {
4019
+ stdout: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
4020
+ combined: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
4021
+ }, {
4022
+ exitCode: agent?.exitCode,
4023
+ exitSignal: agent?.exitSignal,
4024
+ }, {
4025
+ sender: options.logicalName ?? agentDef.name,
4026
+ actor: agent?.name ?? agentName,
4027
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
4028
+ });
4029
+ }
2934
4030
  // Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
2935
4031
  stopHeartbeat?.();
2936
4032
  this.activeAgentHandles.delete(agentName);
@@ -2943,6 +4039,7 @@ export class WorkflowRunner {
2943
4039
  }
2944
4040
  this.unregisterWorker(agentName);
2945
4041
  this.supervisedRuntimeAgents.delete(agentName);
4042
+ this.runtimeStepAgents.delete(agentName);
2946
4043
  }
2947
4044
  let output;
2948
4045
  if (ptyChunks.length > 0) {
@@ -2959,6 +4056,13 @@ export class WorkflowRunner {
2959
4056
  ? 'Agent completed (idle — treated as done)'
2960
4057
  : `Agent exited (${exitResult})`;
2961
4058
  }
4059
+ if (ptyChunks.length === 0) {
4060
+ this.captureStepTerminalEvidence(evidenceStepName, { stdout: output, combined: output }, { exitCode: agent?.exitCode, exitSignal: agent?.exitSignal }, {
4061
+ sender: options.logicalName ?? agentDef.name,
4062
+ actor: agent?.name ?? agentName,
4063
+ role: options.evidenceRole ?? agentDef.role ?? 'agent',
4064
+ });
4065
+ }
2962
4066
  return {
2963
4067
  output,
2964
4068
  exitCode: agent?.exitCode,
@@ -2986,31 +4090,106 @@ export class WorkflowRunner {
2986
4090
  'orchestrator',
2987
4091
  'auctioneer',
2988
4092
  ]);
4093
+ isLeadLikeAgent(agentDef, roleOverride) {
4094
+ if (agentDef.preset === 'lead')
4095
+ return true;
4096
+ const role = (roleOverride ?? agentDef.role ?? '').toLowerCase();
4097
+ const nameLC = agentDef.name.toLowerCase();
4098
+ return [...WorkflowRunner.HUB_ROLES].some((hubRole) => new RegExp(`\\b${hubRole}\\b`, 'i').test(nameLC) ||
4099
+ new RegExp(`\\b${hubRole}\\b`, 'i').test(role));
4100
+ }
4101
+ shouldPreserveIdleSupervisor(agentDef, step, evidenceRole) {
4102
+ if (evidenceRole && /\bowner\b/i.test(evidenceRole)) {
4103
+ return true;
4104
+ }
4105
+ if (!this.isLeadLikeAgent(agentDef, evidenceRole)) {
4106
+ return false;
4107
+ }
4108
+ const task = step.task ?? '';
4109
+ return /\b(wait|waiting|monitor|supervis|check inbox|check.*channel|poll|DONE|_DONE|signal|handoff)\b/i.test(task);
4110
+ }
2989
4111
  /**
2990
4112
  * Wait for agent exit with idle detection and nudging.
2991
4113
  * If no idle nudge config is set, falls through to simple waitForExit.
2992
4114
  */
2993
- async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs) {
4115
+ async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, preserveIdleSupervisor = false) {
2994
4116
  const nudgeConfig = this.currentConfig?.swarm.idleNudge;
2995
4117
  if (!nudgeConfig) {
2996
- // Idle = done: race exit against idle. Whichever fires first completes the step.
2997
- const result = await Promise.race([
2998
- agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit', result: r })),
2999
- agent.waitForIdle(timeoutMs).then((r) => ({ kind: 'idle', result: r })),
3000
- ]);
3001
- if (result.kind === 'idle' && result.result === 'idle') {
3002
- this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`);
3003
- this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — treating as complete`);
3004
- await agent.release();
3005
- return 'released';
3006
- }
3007
- // Exit won the race, or idle returned 'exited'/'timeout' — pass through.
3008
- return result.result;
4118
+ if (preserveIdleSupervisor) {
4119
+ this.log(`[${step.name}] Supervising agent "${agent.name}" may idle while waiting — using exit-only completion`);
4120
+ return agent.waitForExit(timeoutMs);
4121
+ }
4122
+ // Idle = done: race exit against idle, but only accept idle if verification passes.
4123
+ const idleLoopStart = Date.now();
4124
+ while (true) {
4125
+ const elapsed = Date.now() - idleLoopStart;
4126
+ const remaining = timeoutMs != null ? Math.max(0, timeoutMs - elapsed) : undefined;
4127
+ if (remaining != null && remaining <= 0) {
4128
+ return 'timeout';
4129
+ }
4130
+ const result = await Promise.race([
4131
+ agent.waitForExit(remaining).then((r) => ({ kind: 'exit', result: r })),
4132
+ agent.waitForIdle(remaining).then((r) => ({ kind: 'idle', result: r })),
4133
+ ]);
4134
+ if (result.kind === 'idle' && result.result === 'idle') {
4135
+ // Check verification before treating idle as complete.
4136
+ // Mirror runVerification's double-occurrence guard: if the task text
4137
+ // contains the token (from the prompt instruction), require a second
4138
+ // occurrence from the agent's actual output to avoid false positives.
4139
+ if (step.verification && step.verification.type === 'output_contains') {
4140
+ const token = step.verification.value;
4141
+ const ptyOutput = (this.ptyOutputBuffers.get(agent.name) ?? []).join('');
4142
+ const taskText = step.task ?? '';
4143
+ const taskHasToken = taskText.includes(token);
4144
+ let verificationPassed = true;
4145
+ if (taskHasToken) {
4146
+ const first = ptyOutput.indexOf(token);
4147
+ verificationPassed = first !== -1 && ptyOutput.includes(token, first + token.length);
4148
+ }
4149
+ else {
4150
+ verificationPassed = ptyOutput.includes(token);
4151
+ }
4152
+ if (!verificationPassed) {
4153
+ // The broker fires agent_idle only once per idle transition.
4154
+ // If the agent is still working (will produce output then idle again),
4155
+ // continuing the loop works. But if the agent is permanently idle,
4156
+ // waitForIdle won't resolve again. Wait briefly for new output,
4157
+ // then release and let upstream verification handle the result.
4158
+ this.log(`[${step.name}] Agent "${agent.name}" went idle but verification not yet passed — waiting for more output`);
4159
+ const idleGraceSecs = 15;
4160
+ const graceResult = await Promise.race([
4161
+ agent.waitForExit(idleGraceSecs * 1000).then((r) => ({ kind: 'exit', result: r })),
4162
+ agent.waitForIdle(idleGraceSecs * 1000).then((r) => ({ kind: 'idle', result: r })),
4163
+ ]);
4164
+ if (graceResult.kind === 'idle' && graceResult.result === 'idle') {
4165
+ // Agent went idle again after producing output — re-check verification
4166
+ continue;
4167
+ }
4168
+ if (graceResult.kind === 'exit') {
4169
+ return graceResult.result;
4170
+ }
4171
+ // Grace period timed out — agent is permanently idle without verification.
4172
+ // Release and let upstream executeAgentStep handle verification.
4173
+ this.log(`[${step.name}] Agent "${agent.name}" still idle after ${idleGraceSecs}s grace — releasing`);
4174
+ this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — releasing (verification pending)`);
4175
+ await agent.release();
4176
+ return 'released';
4177
+ }
4178
+ }
4179
+ this.log(`[${step.name}] Agent "${agent.name}" went idle — treating as complete`);
4180
+ this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — treating as complete`);
4181
+ await agent.release();
4182
+ return 'released';
4183
+ }
4184
+ // Exit won the race, or idle returned 'exited'/'timeout' — pass through.
4185
+ return result.result;
4186
+ }
3009
4187
  }
3010
4188
  const nudgeAfterMs = nudgeConfig.nudgeAfterMs ?? 120_000;
3011
4189
  const escalateAfterMs = nudgeConfig.escalateAfterMs ?? 120_000;
3012
4190
  const maxNudges = nudgeConfig.maxNudges ?? 1;
3013
4191
  let nudgeCount = 0;
4192
+ let preservedSupervisorNoticeSent = false;
3014
4193
  const startTime = Date.now();
3015
4194
  while (true) {
3016
4195
  // Calculate remaining time from overall timeout
@@ -3045,6 +4224,14 @@ export class WorkflowRunner {
3045
4224
  this.emit({ type: 'step:nudged', runId: this.currentRunId ?? '', stepName: step.name, nudgeCount });
3046
4225
  continue;
3047
4226
  }
4227
+ if (preserveIdleSupervisor) {
4228
+ if (!preservedSupervisorNoticeSent) {
4229
+ this.log(`[${step.name}] Supervising agent "${agent.name}" stayed idle after ${nudgeCount} nudge(s) — preserving until exit or timeout`);
4230
+ this.postToChannel(`**[${step.name}]** Supervising agent \`${agent.name}\` is waiting on handoff — keeping it alive until it exits or the step times out`);
4231
+ preservedSupervisorNoticeSent = true;
4232
+ }
4233
+ continue;
4234
+ }
3048
4235
  // Exhausted nudges — force-release
3049
4236
  this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`);
3050
4237
  this.emit({ type: 'step:force-released', runId: this.currentRunId ?? '', stepName: step.name });
@@ -3114,7 +4301,31 @@ export class WorkflowRunner {
3114
4301
  return undefined;
3115
4302
  }
3116
4303
  // ── Verification ────────────────────────────────────────────────────────
3117
- runVerification(check, output, stepName, injectedTaskText) {
4304
+ runVerification(check, output, stepName, injectedTaskText, options) {
4305
+ const fail = (message) => {
4306
+ const observedAt = new Date().toISOString();
4307
+ this.recordStepToolSideEffect(stepName, {
4308
+ type: 'verification_observed',
4309
+ detail: message,
4310
+ observedAt,
4311
+ raw: { passed: false, type: check.type, value: check.value },
4312
+ });
4313
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
4314
+ kind: 'verification_failed',
4315
+ source: 'verification',
4316
+ text: message,
4317
+ observedAt,
4318
+ value: check.value,
4319
+ });
4320
+ if (options?.allowFailure) {
4321
+ return {
4322
+ passed: false,
4323
+ completionReason: 'failed_verification',
4324
+ error: message,
4325
+ };
4326
+ }
4327
+ throw new WorkflowCompletionError(message, 'failed_verification');
4328
+ };
3118
4329
  switch (check.type) {
3119
4330
  case 'output_contains': {
3120
4331
  // Guard against false positives: the PTY captures the injected task text
@@ -3128,12 +4339,12 @@ export class WorkflowRunner {
3128
4339
  const first = output.indexOf(token);
3129
4340
  const hasSecond = first !== -1 && output.includes(token, first + token.length);
3130
4341
  if (!hasSecond) {
3131
- throw new Error(`Verification failed for "${stepName}": output does not contain "${token}" ` +
4342
+ return fail(`Verification failed for "${stepName}": output does not contain "${token}" ` +
3132
4343
  `(token found only in task injection — agent must output it explicitly)`);
3133
4344
  }
3134
4345
  }
3135
4346
  else if (!output.includes(token)) {
3136
- throw new Error(`Verification failed for "${stepName}": output does not contain "${token}"`);
4347
+ return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
3137
4348
  }
3138
4349
  break;
3139
4350
  }
@@ -3142,13 +4353,37 @@ export class WorkflowRunner {
3142
4353
  break;
3143
4354
  case 'file_exists':
3144
4355
  if (!existsSync(path.resolve(this.cwd, check.value))) {
3145
- throw new Error(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
4356
+ return fail(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
3146
4357
  }
3147
4358
  break;
3148
4359
  case 'custom':
3149
4360
  // Custom verifications are evaluated by callers; no-op here
3150
- break;
3151
- }
4361
+ return { passed: false };
4362
+ }
4363
+ if (options?.completionMarkerFound === false) {
4364
+ this.log(`[${stepName}] Verification passed without legacy STEP_COMPLETE marker; allowing completion`);
4365
+ }
4366
+ const successMessage = options?.completionMarkerFound === false
4367
+ ? `Verification passed without legacy STEP_COMPLETE marker`
4368
+ : `Verification passed`;
4369
+ const observedAt = new Date().toISOString();
4370
+ this.recordStepToolSideEffect(stepName, {
4371
+ type: 'verification_observed',
4372
+ detail: successMessage,
4373
+ observedAt,
4374
+ raw: { passed: true, type: check.type, value: check.value },
4375
+ });
4376
+ this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
4377
+ kind: 'verification_passed',
4378
+ source: 'verification',
4379
+ text: successMessage,
4380
+ observedAt,
4381
+ value: check.value,
4382
+ });
4383
+ return {
4384
+ passed: true,
4385
+ completionReason: 'completed_verified',
4386
+ };
3152
4387
  }
3153
4388
  // ── State helpers ─────────────────────────────────────────────────────
3154
4389
  async updateRunStatus(runId, status, error) {
@@ -3164,13 +4399,16 @@ export class WorkflowRunner {
3164
4399
  }
3165
4400
  await this.db.updateRun(runId, patch);
3166
4401
  }
3167
- async markStepFailed(state, error, runId, exitInfo) {
4402
+ async markStepFailed(state, error, runId, exitInfo, completionReason) {
4403
+ this.captureStepTerminalEvidence(state.row.stepName, {}, exitInfo);
3168
4404
  state.row.status = 'failed';
3169
4405
  state.row.error = error;
4406
+ state.row.completionReason = completionReason;
3170
4407
  state.row.completedAt = new Date().toISOString();
3171
4408
  await this.db.updateStep(state.row.id, {
3172
4409
  status: 'failed',
3173
4410
  error,
4411
+ completionReason,
3174
4412
  completedAt: state.row.completedAt,
3175
4413
  updatedAt: new Date().toISOString(),
3176
4414
  });
@@ -3182,6 +4420,7 @@ export class WorkflowRunner {
3182
4420
  exitCode: exitInfo?.exitCode,
3183
4421
  exitSignal: exitInfo?.exitSignal,
3184
4422
  });
4423
+ this.finalizeStepEvidence(state.row.stepName, 'failed', state.row.completedAt, completionReason);
3185
4424
  }
3186
4425
  async markDownstreamSkipped(failedStepName, allSteps, stepStates, runId) {
3187
4426
  const queue = [failedStepName];
@@ -3275,7 +4514,7 @@ export class WorkflowRunner {
3275
4514
  'RELAY SETUP — do this FIRST before any other relay tool:\n' +
3276
4515
  `1. Call: register(name="${agentName}")\n` +
3277
4516
  ' This authenticates you in the Relaycast workspace.\n' +
3278
- ' ALL relay tools (mcp__relaycast__dm_send, mcp__relaycast__inbox_check, mcp__relaycast__message_post, etc.) require\n' +
4517
+ ' ALL relay tools (mcp__relaycast__message_dm_send, mcp__relaycast__message_inbox_check, mcp__relaycast__message_post, etc.) require\n' +
3279
4518
  ' registration first — they will fail with "Not registered" otherwise.\n' +
3280
4519
  `2. Your agent name is "${agentName}" — use this exact name when registering.`);
3281
4520
  }
@@ -3298,8 +4537,8 @@ export class WorkflowRunner {
3298
4537
  'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
3299
4538
  'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
3300
4539
  ' - mcp__relaycast__agent_add(name="helper-1", cli="claude", task="Specific subtask description")\n' +
3301
- ' - Coordinate via mcp__relaycast__dm_send(to="helper-1", text="...")\n' +
3302
- ' - Check on them with mcp__relaycast__inbox_check()\n' +
4540
+ ' - Coordinate via mcp__relaycast__message_dm_send(to="helper-1", text="...")\n' +
4541
+ ' - Check on them with mcp__relaycast__message_inbox_check()\n' +
3303
4542
  ' - Clean up when done: mcp__relaycast__agent_remove(name="helper-1")\n\n' +
3304
4543
  subAgentOption +
3305
4544
  'Guidelines:\n' +
@@ -3311,9 +4550,23 @@ export class WorkflowRunner {
3311
4550
  ' "RELAY SETUP: First call register(name=\'<exact-agent-name>\') before any other relay tool."');
3312
4551
  }
3313
4552
  /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
3314
- postToChannel(text) {
4553
+ postToChannel(text, options = {}) {
3315
4554
  if (!this.relayApiKey || !this.channel)
3316
4555
  return;
4556
+ this.recordChannelEvidence(text, options);
4557
+ const stepName = options.stepName ?? this.inferStepNameFromChannelText(text);
4558
+ if (stepName) {
4559
+ this.recordStepToolSideEffect(stepName, {
4560
+ type: 'post_channel_message',
4561
+ detail: text.slice(0, 240),
4562
+ raw: {
4563
+ actor: options.actor,
4564
+ role: options.role,
4565
+ target: options.target ?? this.channel,
4566
+ origin: options.origin ?? 'runner_post',
4567
+ },
4568
+ });
4569
+ }
3317
4570
  this.ensureRelaycastRunnerAgent()
3318
4571
  .then((agent) => agent.send(this.channel, text))
3319
4572
  .catch(() => {
@@ -3471,6 +4724,9 @@ export class WorkflowRunner {
3471
4724
  output: state.row.output,
3472
4725
  error: state.row.error,
3473
4726
  verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
4727
+ completionMode: state.row.completionReason
4728
+ ? this.buildStepCompletionDecision(name, state.row.completionReason)?.mode
4729
+ : undefined,
3474
4730
  });
3475
4731
  }
3476
4732
  return outcomes;
@@ -3603,24 +4859,30 @@ export class WorkflowRunner {
3603
4859
  /** Persist step output to disk and post full output as a channel message. */
3604
4860
  async persistStepOutput(runId, stepName, output) {
3605
4861
  // 1. Write to disk
4862
+ const outputPath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
3606
4863
  try {
3607
4864
  const dir = this.getStepOutputDir(runId);
3608
4865
  mkdirSync(dir, { recursive: true });
3609
4866
  const cleaned = WorkflowRunner.stripAnsi(output);
3610
- await writeFile(path.join(dir, `${stepName}.md`), cleaned);
4867
+ await writeFile(outputPath, cleaned);
3611
4868
  }
3612
4869
  catch {
3613
4870
  // Non-critical
3614
4871
  }
4872
+ this.recordStepToolSideEffect(stepName, {
4873
+ type: 'persist_step_output',
4874
+ detail: `Persisted step output to ${this.normalizeEvidencePath(outputPath)}`,
4875
+ raw: { path: outputPath },
4876
+ });
3615
4877
  // 2. Post scrubbed output as a single channel message (most recent tail only)
3616
4878
  const scrubbed = WorkflowRunner.scrubForChannel(output);
3617
4879
  if (scrubbed.length === 0) {
3618
- this.postToChannel(`**[${stepName}]** Step completed — output written to disk`);
4880
+ this.postToChannel(`**[${stepName}]** Step completed — output written to disk`, { stepName });
3619
4881
  return;
3620
4882
  }
3621
4883
  const maxMsg = 2000;
3622
4884
  const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
3623
- this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``);
4885
+ this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``, { stepName });
3624
4886
  }
3625
4887
  /** Load persisted step output from disk. */
3626
4888
  loadStepOutput(runId, stepName) {