agent-relay 2.3.12 → 2.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +20 -20
  2. package/packages/acp-bridge/package.json +2 -2
  3. package/packages/bridge/package.json +7 -7
  4. package/packages/broker-sdk/dist/protocol.d.ts +4 -0
  5. package/packages/broker-sdk/dist/protocol.d.ts.map +1 -1
  6. package/packages/broker-sdk/dist/relay.d.ts +6 -0
  7. package/packages/broker-sdk/dist/relay.d.ts.map +1 -1
  8. package/packages/broker-sdk/dist/relay.js +8 -0
  9. package/packages/broker-sdk/dist/relay.js.map +1 -1
  10. package/packages/broker-sdk/dist/relaycast.d.ts +17 -1
  11. package/packages/broker-sdk/dist/relaycast.d.ts.map +1 -1
  12. package/packages/broker-sdk/dist/relaycast.js +79 -5
  13. package/packages/broker-sdk/dist/relaycast.js.map +1 -1
  14. package/packages/broker-sdk/dist/workflows/index.d.ts +1 -0
  15. package/packages/broker-sdk/dist/workflows/index.d.ts.map +1 -1
  16. package/packages/broker-sdk/dist/workflows/index.js +1 -0
  17. package/packages/broker-sdk/dist/workflows/index.js.map +1 -1
  18. package/packages/broker-sdk/dist/workflows/run.d.ts +3 -1
  19. package/packages/broker-sdk/dist/workflows/run.d.ts.map +1 -1
  20. package/packages/broker-sdk/dist/workflows/run.js +4 -0
  21. package/packages/broker-sdk/dist/workflows/run.js.map +1 -1
  22. package/packages/broker-sdk/dist/workflows/runner.d.ts +13 -0
  23. package/packages/broker-sdk/dist/workflows/runner.d.ts.map +1 -1
  24. package/packages/broker-sdk/dist/workflows/runner.js +279 -29
  25. package/packages/broker-sdk/dist/workflows/runner.js.map +1 -1
  26. package/packages/broker-sdk/dist/workflows/trajectory.d.ts +80 -0
  27. package/packages/broker-sdk/dist/workflows/trajectory.d.ts.map +1 -0
  28. package/packages/broker-sdk/dist/workflows/trajectory.js +362 -0
  29. package/packages/broker-sdk/dist/workflows/trajectory.js.map +1 -0
  30. package/packages/broker-sdk/dist/workflows/types.d.ts +12 -0
  31. package/packages/broker-sdk/dist/workflows/types.d.ts.map +1 -1
  32. package/packages/broker-sdk/package.json +2 -2
  33. package/packages/broker-sdk/src/__tests__/workflow-trajectory.test.ts +408 -0
  34. package/packages/broker-sdk/src/protocol.ts +5 -0
  35. package/packages/broker-sdk/src/relay.ts +11 -0
  36. package/packages/broker-sdk/src/relaycast.ts +83 -5
  37. package/packages/broker-sdk/src/workflows/README.md +30 -0
  38. package/packages/broker-sdk/src/workflows/index.ts +1 -0
  39. package/packages/broker-sdk/src/workflows/run.ts +9 -1
  40. package/packages/broker-sdk/src/workflows/runner.ts +349 -28
  41. package/packages/broker-sdk/src/workflows/trajectory.ts +507 -0
  42. package/packages/broker-sdk/src/workflows/types.ts +15 -0
  43. package/packages/broker-sdk/tsconfig.json +1 -0
  44. package/packages/broker-sdk/vitest.config.ts +9 -0
  45. package/packages/config/package.json +2 -2
  46. package/packages/continuity/package.json +2 -2
  47. package/packages/daemon/package.json +12 -12
  48. package/packages/hooks/package.json +4 -4
  49. package/packages/mcp/package.json +5 -5
  50. package/packages/memory/package.json +2 -2
  51. package/packages/policy/package.json +2 -2
  52. package/packages/protocol/package.json +1 -1
  53. package/packages/resiliency/package.json +1 -1
  54. package/packages/sdk/package.json +3 -3
  55. package/packages/spawner/package.json +1 -1
  56. package/packages/state/package.json +1 -1
  57. package/packages/storage/package.json +2 -2
  58. package/packages/telemetry/package.json +1 -1
  59. package/packages/trajectory/package.json +2 -2
  60. package/packages/user-directory/package.json +2 -2
  61. package/packages/utils/package.json +3 -3
  62. package/packages/wrapper/package.json +5 -5
@@ -1,5 +1,5 @@
1
1
  import type { AgentRelayOptions } from '../relay.js';
2
- import type { WorkflowRunRow } from './types.js';
2
+ import type { TrajectoryConfig, WorkflowRunRow } from './types.js';
3
3
  import { WorkflowRunner, type WorkflowEventListener, type VariableContext } from './runner.js';
4
4
 
5
5
  /**
@@ -16,6 +16,8 @@ export interface RunWorkflowOptions {
16
16
  relay?: AgentRelayOptions;
17
17
  /** Progress callback for workflow events. */
18
18
  onEvent?: WorkflowEventListener;
19
+ /** Override trajectory config. Set to false to disable trajectory recording. */
20
+ trajectories?: TrajectoryConfig | false;
19
21
  }
20
22
 
21
23
  /**
@@ -43,5 +45,11 @@ export async function runWorkflow(
43
45
  }
44
46
 
45
47
  const config = await runner.parseYamlFile(yamlPath);
48
+
49
+ // Allow programmatic trajectory override
50
+ if (options.trajectories !== undefined) {
51
+ config.trajectories = options.trajectories;
52
+ }
53
+
46
54
  return runner.execute(config, options.workflow, options.vars);
47
55
  }
@@ -27,12 +27,14 @@ import type {
27
27
  WorkflowStepRow,
28
28
  WorkflowStepStatus,
29
29
  } from './types.js';
30
+ import { WorkflowTrajectory, type StepOutcome } from './trajectory.js';
30
31
 
31
32
  // ── AgentRelay SDK imports ──────────────────────────────────────────────────
32
33
 
33
34
  // Import from sub-paths to avoid pulling in the full @relaycast/sdk dependency.
34
35
  import { AgentRelay } from '../relay.js';
35
36
  import type { Agent, AgentRelayOptions } from '../relay.js';
37
+ import { RelaycastApi } from '../relaycast.js';
36
38
 
37
39
  // ── DB adapter interface ────────────────────────────────────────────────────
38
40
 
@@ -95,6 +97,9 @@ export class WorkflowRunner {
95
97
  private readonly summaryDir: string;
96
98
 
97
99
  private relay?: AgentRelay;
100
+ private relaycastApi?: RelaycastApi;
101
+ private channel?: string;
102
+ private trajectory?: WorkflowTrajectory;
98
103
  private abortController?: AbortController;
99
104
  private paused = false;
100
105
  private pauseResolver?: () => void;
@@ -120,18 +125,24 @@ export class WorkflowRunner {
120
125
  private async ensureRelaycastApiKey(channel: string): Promise<void> {
121
126
  if (process.env.RELAY_API_KEY) return;
122
127
 
123
- // Check cached credentials
124
- const cachePath = path.join(homedir(), '.agent-relay', 'relaycast.json');
125
- if (existsSync(cachePath)) {
126
- try {
127
- const raw = await readFile(cachePath, 'utf-8');
128
- const creds = JSON.parse(raw);
129
- if (creds.api_key) {
130
- process.env.RELAY_API_KEY = creds.api_key;
131
- return;
128
+ // Check cached credentials — prefer per-project cache (written by the local
129
+ // relay daemon) over the legacy global cache so concurrent workflows from
130
+ // different repos never stomp each other's credentials.
131
+ const projectCachePath = path.join(this.cwd, '.agent-relay', 'relaycast.json');
132
+ const globalCachePath = path.join(homedir(), '.agent-relay', 'relaycast.json');
133
+
134
+ for (const cachePath of [projectCachePath, globalCachePath]) {
135
+ if (existsSync(cachePath)) {
136
+ try {
137
+ const raw = await readFile(cachePath, 'utf-8');
138
+ const creds = JSON.parse(raw);
139
+ if (creds.api_key) {
140
+ process.env.RELAY_API_KEY = creds.api_key;
141
+ return;
142
+ }
143
+ } catch {
144
+ // Cache corrupt — try next path
132
145
  }
133
- } catch {
134
- // Cache corrupt — fall through to auto-create
135
146
  }
136
147
  }
137
148
 
@@ -159,11 +170,12 @@ export class WorkflowRunner {
159
170
  throw new Error('Relaycast workspace response missing api_key');
160
171
  }
161
172
 
162
- // Cache credentials for future runs
163
- const cacheDir = path.dirname(cachePath);
173
+ // Cache credentials in the per-project directory so concurrent workflows
174
+ // from different repos each get their own workspace credentials.
175
+ const cacheDir = path.dirname(projectCachePath);
164
176
  await mkdir(cacheDir, { recursive: true, mode: 0o700 });
165
177
  await writeFile(
166
- cachePath,
178
+ projectCachePath,
167
179
  JSON.stringify({
168
180
  workspace_id: workspaceId,
169
181
  api_key: apiKey,
@@ -468,11 +480,19 @@ export class WorkflowRunner {
468
480
  this.abortController = new AbortController();
469
481
  this.paused = false;
470
482
 
483
+ // Initialize trajectory recording
484
+ this.trajectory = new WorkflowTrajectory(resolved.trajectories, runId, this.cwd);
485
+
471
486
  try {
472
487
  await this.updateRunStatus(runId, 'running');
473
488
  this.emit({ type: 'run:started', runId });
474
489
 
490
+ // Analyze DAG for trajectory context
491
+ const dagInfo = this.analyzeDAG(workflow.steps);
492
+ await this.trajectory.start(workflow.name, workflow.steps.length, dagInfo);
493
+
475
494
  const channel = resolved.swarm.channel ?? 'general';
495
+ this.channel = channel;
476
496
  await this.ensureRelaycastApiKey(channel);
477
497
 
478
498
  this.relay = new AgentRelay({
@@ -480,6 +500,17 @@ export class WorkflowRunner {
480
500
  channels: [channel],
481
501
  });
482
502
 
503
+ // Create the dedicated workflow channel and join it
504
+ this.relaycastApi = new RelaycastApi({
505
+ agentName: 'WorkflowRunner',
506
+ cachePath: path.join(this.cwd, '.agent-relay', 'relaycast.json'),
507
+ });
508
+ await this.relaycastApi.createChannel(channel, workflow.description);
509
+ await this.relaycastApi.joinChannel(channel);
510
+ this.postToChannel(
511
+ `Workflow **${workflow.name}** started — ${workflow.steps.length} steps, pattern: ${resolved.swarm.pattern}`,
512
+ );
513
+
483
514
  const agentMap = new Map<string, AgentDefinition>();
484
515
  for (const agent of resolved.agents) {
485
516
  agentMap.set(agent.name, agent);
@@ -501,11 +532,29 @@ export class WorkflowRunner {
501
532
  if (allCompleted) {
502
533
  await this.updateRunStatus(runId, 'completed');
503
534
  this.emit({ type: 'run:completed', runId });
535
+
536
+ // Complete trajectory with summary
537
+ const outcomes = this.collectOutcomes(stepStates, workflow.steps);
538
+ const summary = this.trajectory.buildRunSummary(outcomes);
539
+ const confidence = this.trajectory.computeConfidence(outcomes);
540
+ await this.trajectory.complete(summary, confidence, {
541
+ learnings: this.trajectory.extractLearnings(outcomes),
542
+ challenges: this.trajectory.extractChallenges(outcomes),
543
+ });
544
+
545
+ // Post rich completion report to channel
546
+ this.postCompletionReport(workflow.name, outcomes, summary, confidence);
504
547
  } else {
505
548
  const failedStep = [...stepStates.values()].find((s) => s.row.status === 'failed');
506
549
  const errorMsg = failedStep?.row.error ?? 'One or more steps failed';
507
550
  await this.updateRunStatus(runId, 'failed', errorMsg);
508
551
  this.emit({ type: 'run:failed', runId, error: errorMsg });
552
+
553
+ const outcomes = this.collectOutcomes(stepStates, workflow.steps);
554
+ this.postFailureReport(workflow.name, outcomes, errorMsg);
555
+
556
+ // Abandon trajectory on failure
557
+ await this.trajectory.abandon(errorMsg);
509
558
  }
510
559
  } catch (err) {
511
560
  const errorMsg = err instanceof Error ? err.message : String(err);
@@ -514,12 +563,19 @@ export class WorkflowRunner {
514
563
 
515
564
  if (status === 'cancelled') {
516
565
  this.emit({ type: 'run:cancelled', runId });
566
+ this.postToChannel(`Workflow **${workflow.name}** cancelled`);
567
+ await this.trajectory.abandon('Cancelled by user');
517
568
  } else {
518
569
  this.emit({ type: 'run:failed', runId, error: errorMsg });
570
+ this.postToChannel(`Workflow failed: ${errorMsg}`);
571
+ await this.trajectory.abandon(errorMsg);
519
572
  }
520
573
  } finally {
521
574
  await this.relay?.shutdown();
522
575
  this.relay = undefined;
576
+ this.relaycastApi = undefined;
577
+ this.channel = undefined;
578
+ this.trajectory = undefined;
523
579
  this.abortController = undefined;
524
580
  }
525
581
 
@@ -567,10 +623,21 @@ export class WorkflowRunner {
567
623
  this.abortController = new AbortController();
568
624
  this.paused = false;
569
625
 
626
+ // Initialize trajectory for resumed run
627
+ this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
628
+
570
629
  try {
571
630
  await this.updateRunStatus(runId, 'running');
572
631
 
632
+ const pendingCount = [...stepStates.values()].filter((s) => s.row.status === 'pending').length;
633
+ await this.trajectory.start(
634
+ workflow.name,
635
+ workflow.steps.length,
636
+ `Resumed run: ${pendingCount} pending steps of ${workflow.steps.length} total`,
637
+ );
638
+
573
639
  const resumeChannel = config.swarm.channel ?? 'general';
640
+ this.channel = resumeChannel;
574
641
  await this.ensureRelaycastApiKey(resumeChannel);
575
642
 
576
643
  this.relay = new AgentRelay({
@@ -578,6 +645,17 @@ export class WorkflowRunner {
578
645
  channels: [resumeChannel],
579
646
  });
580
647
 
648
+ // Ensure channel exists and join it for resumed runs
649
+ this.relaycastApi = new RelaycastApi({
650
+ agentName: 'WorkflowRunner',
651
+ cachePath: path.join(this.cwd, '.agent-relay', 'relaycast.json'),
652
+ });
653
+ await this.relaycastApi.createChannel(resumeChannel);
654
+ await this.relaycastApi.joinChannel(resumeChannel);
655
+ this.postToChannel(
656
+ `Workflow **${workflow.name}** resumed — ${pendingCount} pending steps`,
657
+ );
658
+
581
659
  const agentMap = new Map<string, AgentDefinition>();
582
660
  for (const agent of config.agents) {
583
661
  agentMap.set(agent.name, agent);
@@ -592,19 +670,38 @@ export class WorkflowRunner {
592
670
  if (allCompleted) {
593
671
  await this.updateRunStatus(runId, 'completed');
594
672
  this.emit({ type: 'run:completed', runId });
673
+
674
+ const outcomes = this.collectOutcomes(stepStates, workflow.steps);
675
+ const summary = this.trajectory.buildRunSummary(outcomes);
676
+ const confidence = this.trajectory.computeConfidence(outcomes);
677
+ await this.trajectory.complete(summary, confidence, {
678
+ learnings: this.trajectory.extractLearnings(outcomes),
679
+ challenges: this.trajectory.extractChallenges(outcomes),
680
+ });
681
+
682
+ this.postCompletionReport(workflow.name, outcomes, summary, confidence);
595
683
  } else {
596
684
  const failedStep = [...stepStates.values()].find((s) => s.row.status === 'failed');
597
685
  const errorMsg = failedStep?.row.error ?? 'One or more steps failed';
598
686
  await this.updateRunStatus(runId, 'failed', errorMsg);
599
687
  this.emit({ type: 'run:failed', runId, error: errorMsg });
688
+
689
+ const outcomes = this.collectOutcomes(stepStates, workflow.steps);
690
+ this.postFailureReport(workflow.name, outcomes, errorMsg);
691
+ await this.trajectory.abandon(errorMsg);
600
692
  }
601
693
  } catch (err) {
602
694
  const errorMsg = err instanceof Error ? err.message : String(err);
603
695
  await this.updateRunStatus(runId, 'failed', errorMsg);
604
696
  this.emit({ type: 'run:failed', runId, error: errorMsg });
697
+ this.postToChannel(`Workflow failed: ${errorMsg}`);
698
+ await this.trajectory.abandon(errorMsg);
605
699
  } finally {
606
700
  await this.relay?.shutdown();
607
701
  this.relay = undefined;
702
+ this.relaycastApi = undefined;
703
+ this.channel = undefined;
704
+ this.trajectory = undefined;
608
705
  this.abortController = undefined;
609
706
  }
610
707
 
@@ -660,23 +757,40 @@ export class WorkflowRunner {
660
757
  break;
661
758
  }
662
759
 
760
+ // Begin a track chapter if multiple parallel steps are starting
761
+ if (readySteps.length > 1 && this.trajectory) {
762
+ const trackNames = readySteps.map((s) => s.name).join(', ');
763
+ await this.trajectory.beginTrack(trackNames);
764
+ }
765
+
663
766
  const results = await Promise.allSettled(
664
767
  readySteps.map((step) =>
665
768
  this.executeStep(step, stepStates, agentMap, errorHandling, runId),
666
769
  ),
667
770
  );
668
771
 
772
+ // Collect outcomes from this batch for convergence reflection
773
+ const batchOutcomes: StepOutcome[] = [];
774
+
669
775
  for (let i = 0; i < results.length; i++) {
670
776
  const result = results[i];
671
777
  const step = readySteps[i];
778
+ const state = stepStates.get(step.name);
672
779
 
673
780
  if (result.status === 'rejected') {
674
781
  const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
675
- const state = stepStates.get(step.name);
676
782
  if (state && state.row.status !== 'failed') {
677
783
  await this.markStepFailed(state, error, runId);
678
784
  }
679
785
 
786
+ batchOutcomes.push({
787
+ name: step.name,
788
+ agent: step.agent,
789
+ status: 'failed',
790
+ attempts: (state?.row.retryCount ?? 0) + 1,
791
+ error,
792
+ });
793
+
680
794
  if (strategy === 'fail-fast') {
681
795
  // Mark all pending downstream steps as skipped
682
796
  await this.markDownstreamSkipped(step.name, workflow.steps, stepStates, runId);
@@ -686,8 +800,33 @@ export class WorkflowRunner {
686
800
  if (strategy === 'continue') {
687
801
  await this.markDownstreamSkipped(step.name, workflow.steps, stepStates, runId);
688
802
  }
803
+ } else {
804
+ batchOutcomes.push({
805
+ name: step.name,
806
+ agent: step.agent,
807
+ status: state?.row.status === 'completed' ? 'completed' : 'failed',
808
+ attempts: (state?.row.retryCount ?? 0) + 1,
809
+ output: state?.row.output,
810
+ verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
811
+ });
689
812
  }
690
813
  }
814
+
815
+ // Reflect at convergence when a parallel batch completes
816
+ if (readySteps.length > 1 && this.trajectory?.shouldReflectOnConverge()) {
817
+ const label = readySteps.map((s) => s.name).join(' + ');
818
+ // Find steps that this batch unblocks
819
+ const completedNames = new Set(batchOutcomes.filter((o) => o.status === 'completed').map((o) => o.name));
820
+ const unblocked = workflow.steps
821
+ .filter((s) => s.dependsOn?.some((dep) => completedNames.has(dep)))
822
+ .filter((s) => {
823
+ const st = stepStates.get(s.name);
824
+ return st && st.row.status === 'pending';
825
+ })
826
+ .map((s) => s.name);
827
+
828
+ await this.trajectory.synthesizeAndReflect(label, batchOutcomes, unblocked.length > 0 ? unblocked : undefined);
829
+ }
691
830
  }
692
831
  }
693
832
 
@@ -733,11 +872,13 @@ export class WorkflowRunner {
733
872
 
734
873
  if (attempt > 0) {
735
874
  this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
875
+ this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
736
876
  state.row.retryCount = attempt;
737
877
  await this.db.updateStep(state.row.id, {
738
878
  retryCount: attempt,
739
879
  updatedAt: new Date().toISOString(),
740
880
  });
881
+ await this.trajectory?.stepRetrying(step, attempt, maxRetries);
741
882
  await this.delay(retryDelay);
742
883
  }
743
884
 
@@ -751,6 +892,8 @@ export class WorkflowRunner {
751
892
  updatedAt: new Date().toISOString(),
752
893
  });
753
894
  this.emit({ type: 'step:started', runId, stepName: step.name });
895
+ this.postToChannel(`**[${step.name}]** Started (agent: ${agentDef.name})`);
896
+ await this.trajectory?.stepStarted(step, agentDef.name);
754
897
 
755
898
  // Resolve step-output variables (e.g. {{steps.plan.output}}) at execution time
756
899
  const stepOutputContext = this.buildStepOutputContext(stepStates);
@@ -776,13 +919,24 @@ export class WorkflowRunner {
776
919
  updatedAt: new Date().toISOString(),
777
920
  });
778
921
  this.emit({ type: 'step:completed', runId, stepName: step.name, output });
922
+ this.postToChannel(
923
+ `**[${step.name}]** Completed\n${output.slice(0, 500)}${output.length > 500 ? '\n...(truncated)' : ''}`,
924
+ );
925
+ await this.trajectory?.stepCompleted(step, output, attempt + 1);
779
926
  return;
780
927
  } catch (err) {
781
928
  lastError = err instanceof Error ? err.message : String(err);
782
929
  }
783
930
  }
784
931
 
785
- // All retries exhausted — mark failed and throw so callers can apply error strategy
932
+ // All retries exhausted — record decision and mark failed
933
+ await this.trajectory?.stepFailed(step, lastError ?? 'Unknown error', maxRetries + 1, maxRetries);
934
+ await this.trajectory?.decide(
935
+ `How to handle ${step.name} failure`,
936
+ 'exhausted',
937
+ `All ${maxRetries + 1} attempts failed: ${lastError ?? 'Unknown error'}`,
938
+ );
939
+ this.postToChannel(`**[${step.name}]** Failed: ${lastError ?? 'Unknown error'}`);
786
940
  await this.markStepFailed(state, lastError ?? 'Unknown error', runId);
787
941
  throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
788
942
  }
@@ -796,33 +950,85 @@ export class WorkflowRunner {
796
950
  throw new Error('AgentRelay not initialized');
797
951
  }
798
952
 
953
+ // Append self-termination instructions to the task
954
+ const agentName = `${step.name}-${this.generateShortId()}`;
955
+ const taskWithExit = step.task + '\n\n---\n' +
956
+ 'IMPORTANT: When you have fully completed this task, you MUST self-terminate by calling ' +
957
+ `the MCP tool: remove_agent(name="${agentName}", reason="Task completed"). ` +
958
+ 'Do not wait for further input — release yourself immediately after finishing.';
959
+
960
+ const agentChannels = this.channel ? [this.channel] : agentDef.channels;
961
+
799
962
  const agent = await this.relay.spawnPty({
800
- name: `${step.name}-${this.generateShortId()}`,
963
+ name: agentName,
801
964
  cli: agentDef.cli,
802
965
  args: agentDef.constraints?.model ? ['--model', agentDef.constraints.model] : [],
803
- channels: agentDef.channels,
966
+ channels: agentChannels,
967
+ task: taskWithExit,
804
968
  idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
805
969
  });
806
970
 
807
- // Send the task as a message to the agent
808
- const system = this.relay.human({ name: 'WorkflowRunner' });
809
- await system.sendMessage({ to: agent.name, text: step.task });
971
+ // Register the spawned agent in Relaycast for observability + start heartbeat
972
+ let stopHeartbeat: (() => void) | undefined;
973
+ if (this.relaycastApi) {
974
+ const agentClient = await this.relaycastApi.registerExternalAgent(
975
+ agent.name,
976
+ `Workflow agent for step "${step.name}" (${agentDef.cli})`,
977
+ ).catch(() => null);
978
+
979
+ // Keep the agent online in the dashboard while it's working
980
+ if (agentClient) {
981
+ stopHeartbeat = this.relaycastApi.startHeartbeat(agentClient);
982
+ }
983
+ }
810
984
 
811
- // Wait for agent to exit
985
+ // Invite the spawned agent to the workflow channel
986
+ if (this.channel && this.relaycastApi) {
987
+ await this.relaycastApi.inviteToChannel(this.channel, agent.name).catch(() => {});
988
+ }
989
+
990
+ // Post task assignment to channel for observability
991
+ const taskPreview = step.task.slice(0, 500) + (step.task.length > 500 ? '...' : '');
992
+ this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\`:\n${taskPreview}`);
993
+
994
+ // Task was already delivered as initial_task via spawnPty above.
995
+
996
+ // Wait for agent to exit (self-termination via /exit)
812
997
  const exitResult = await agent.waitForExit(timeoutMs);
813
998
 
999
+ // Stop heartbeat now that agent has exited
1000
+ stopHeartbeat?.();
1001
+
814
1002
  if (exitResult === 'timeout') {
815
- await agent.release();
816
- throw new Error(`Step "${step.name}" timed out after ${timeoutMs}ms`);
1003
+ // Safety net: check if the verification file exists before giving up.
1004
+ // The agent may have completed work but failed to /exit.
1005
+ if (step.verification?.type === 'file_exists') {
1006
+ const verifyPath = path.resolve(this.cwd, step.verification.value);
1007
+ if (existsSync(verifyPath)) {
1008
+ this.postToChannel(
1009
+ `**[${step.name}]** Agent idle after completing work — releasing`,
1010
+ );
1011
+ await agent.release();
1012
+ // Fall through to read output below
1013
+ } else {
1014
+ await agent.release();
1015
+ throw new Error(`Step "${step.name}" timed out after ${timeoutMs}ms`);
1016
+ }
1017
+ } else {
1018
+ await agent.release();
1019
+ throw new Error(`Step "${step.name}" timed out after ${timeoutMs}ms`);
1020
+ }
817
1021
  }
818
1022
 
819
1023
  // Read output from summary file if it exists
820
1024
  const summaryPath = path.join(this.summaryDir, `${step.name}.md`);
821
- if (existsSync(summaryPath)) {
822
- return await readFile(summaryPath, 'utf-8');
823
- }
1025
+ const output = existsSync(summaryPath)
1026
+ ? await readFile(summaryPath, 'utf-8')
1027
+ : exitResult === 'timeout'
1028
+ ? 'Agent completed (released after idle timeout)'
1029
+ : `Agent exited (${exitResult})`;
824
1030
 
825
- return `Agent exited (${exitResult})`;
1031
+ return output;
826
1032
  }
827
1033
 
828
1034
  // ── Verification ────────────────────────────────────────────────────────
@@ -912,6 +1118,13 @@ export class WorkflowRunner {
912
1118
  updatedAt: new Date().toISOString(),
913
1119
  });
914
1120
  this.emit({ type: 'step:skipped', runId, stepName: step.name });
1121
+ this.postToChannel(`**[${step.name}]** Skipped — upstream dependency "${current}" failed`);
1122
+ await this.trajectory?.stepSkipped(step, `Upstream dependency "${current}" failed`);
1123
+ await this.trajectory?.decide(
1124
+ `Whether to skip ${step.name}`,
1125
+ 'skip',
1126
+ `Upstream dependency "${current}" failed`,
1127
+ );
915
1128
  queue.push(step.name);
916
1129
  }
917
1130
  }
@@ -938,6 +1151,114 @@ export class WorkflowRunner {
938
1151
  return new Promise((resolve) => setTimeout(resolve, ms));
939
1152
  }
940
1153
 
1154
+ // ── Channel messaging ──────────────────────────────────────────────────
1155
+
1156
+ /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
1157
+ private postToChannel(text: string): void {
1158
+ if (!this.relaycastApi || !this.channel) return;
1159
+ this.relaycastApi.sendToChannel(this.channel, text).catch(() => {
1160
+ // Non-critical — don't break workflow execution
1161
+ });
1162
+ }
1163
+
1164
+ /** Post a rich completion report to the channel. */
1165
+ private postCompletionReport(
1166
+ workflowName: string,
1167
+ outcomes: StepOutcome[],
1168
+ summary: string,
1169
+ confidence: number,
1170
+ ): void {
1171
+ const completed = outcomes.filter((o) => o.status === 'completed');
1172
+ const skipped = outcomes.filter((o) => o.status === 'skipped');
1173
+ const retried = outcomes.filter((o) => o.attempts > 1);
1174
+
1175
+ const lines: string[] = [
1176
+ `## Workflow **${workflowName}** — Complete`,
1177
+ '',
1178
+ summary,
1179
+ `Confidence: ${Math.round(confidence * 100)}%`,
1180
+ '',
1181
+ '### Steps',
1182
+ ...completed.map((o) =>
1183
+ `- **${o.name}** (${o.agent}) — passed${o.verificationPassed ? ' (verified)' : ''}${o.attempts > 1 ? ` after ${o.attempts} attempts` : ''}`,
1184
+ ),
1185
+ ...skipped.map((o) => `- **${o.name}** — skipped`),
1186
+ ];
1187
+
1188
+ if (retried.length > 0) {
1189
+ lines.push('', '### Retries');
1190
+ for (const o of retried) {
1191
+ lines.push(`- ${o.name}: ${o.attempts} attempts`);
1192
+ }
1193
+ }
1194
+
1195
+ this.postToChannel(lines.join('\n'));
1196
+ }
1197
+
1198
+ /** Post a failure report to the channel. */
1199
+ private postFailureReport(
1200
+ workflowName: string,
1201
+ outcomes: StepOutcome[],
1202
+ errorMsg: string,
1203
+ ): void {
1204
+ const completed = outcomes.filter((o) => o.status === 'completed');
1205
+ const failed = outcomes.filter((o) => o.status === 'failed');
1206
+ const skipped = outcomes.filter((o) => o.status === 'skipped');
1207
+
1208
+ const lines: string[] = [
1209
+ `## Workflow **${workflowName}** — Failed`,
1210
+ '',
1211
+ `${completed.length}/${outcomes.length} steps passed. Error: ${errorMsg}`,
1212
+ '',
1213
+ '### Steps',
1214
+ ...completed.map((o) => `- **${o.name}** (${o.agent}) — passed`),
1215
+ ...failed.map((o) => `- **${o.name}** (${o.agent}) — FAILED: ${o.error ?? 'unknown'}`),
1216
+ ...skipped.map((o) => `- **${o.name}** — skipped`),
1217
+ ];
1218
+
1219
+ this.postToChannel(lines.join('\n'));
1220
+ }
1221
+
1222
+ // ── Trajectory helpers ────────────────────────────────────────────────
1223
+
1224
+ /** Analyze DAG structure for trajectory context. */
1225
+ private analyzeDAG(steps: WorkflowStep[]): string {
1226
+ const roots = steps.filter((s) => !s.dependsOn?.length);
1227
+ const withDeps = steps.filter((s) => s.dependsOn?.length);
1228
+
1229
+ const parts = [`Parsed ${steps.length} steps`];
1230
+ if (roots.length > 1) {
1231
+ parts.push(`${roots.length} parallel tracks`);
1232
+ }
1233
+ if (withDeps.length > 0) {
1234
+ parts.push(`${withDeps.length} dependent steps`);
1235
+ }
1236
+ parts.push('DAG validated, no cycles');
1237
+ return parts.join(', ');
1238
+ }
1239
+
1240
+ /** Collect step outcomes for trajectory synthesis. */
1241
+ private collectOutcomes(stepStates: Map<string, StepState>, steps?: WorkflowStep[]): StepOutcome[] {
1242
+ const stepsWithVerification = new Set(
1243
+ steps?.filter((s) => s.verification).map((s) => s.name) ?? [],
1244
+ );
1245
+ const outcomes: StepOutcome[] = [];
1246
+ for (const [name, state] of stepStates) {
1247
+ outcomes.push({
1248
+ name,
1249
+ agent: state.row.agentName,
1250
+ status: state.row.status === 'completed' ? 'completed'
1251
+ : state.row.status === 'skipped' ? 'skipped'
1252
+ : 'failed',
1253
+ attempts: state.row.retryCount + 1,
1254
+ output: state.row.output,
1255
+ error: state.row.error,
1256
+ verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
1257
+ });
1258
+ }
1259
+ return outcomes;
1260
+ }
1261
+
941
1262
  // ── ID generation ─────────────────────────────────────────────────────
942
1263
 
943
1264
  private generateId(): string {