@agent-relay/sdk 2.3.14 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/README.md +68 -838
  2. package/bin/agent-relay-broker +0 -0
  3. package/dist/__tests__/contract-fixtures.test.d.ts +2 -0
  4. package/dist/__tests__/contract-fixtures.test.d.ts.map +1 -0
  5. package/dist/__tests__/contract-fixtures.test.js +85 -0
  6. package/dist/__tests__/contract-fixtures.test.js.map +1 -0
  7. package/dist/__tests__/facade.test.d.ts +2 -0
  8. package/dist/__tests__/facade.test.d.ts.map +1 -0
  9. package/dist/__tests__/facade.test.js +305 -0
  10. package/dist/__tests__/facade.test.js.map +1 -0
  11. package/dist/__tests__/integration.test.d.ts +2 -0
  12. package/dist/__tests__/integration.test.d.ts.map +1 -0
  13. package/dist/__tests__/integration.test.js +169 -0
  14. package/dist/__tests__/integration.test.js.map +1 -0
  15. package/dist/__tests__/pty.test.d.ts +2 -0
  16. package/dist/__tests__/pty.test.d.ts.map +1 -0
  17. package/dist/__tests__/pty.test.js +20 -0
  18. package/dist/__tests__/pty.test.js.map +1 -0
  19. package/dist/__tests__/quickstart.test.d.ts +2 -0
  20. package/dist/__tests__/quickstart.test.d.ts.map +1 -0
  21. package/dist/__tests__/quickstart.test.js +176 -0
  22. package/dist/__tests__/quickstart.test.js.map +1 -0
  23. package/dist/__tests__/spawn-from-env.test.d.ts +2 -0
  24. package/dist/__tests__/spawn-from-env.test.d.ts.map +1 -0
  25. package/dist/__tests__/spawn-from-env.test.js +206 -0
  26. package/dist/__tests__/spawn-from-env.test.js.map +1 -0
  27. package/dist/__tests__/unit.test.d.ts +2 -0
  28. package/dist/__tests__/unit.test.d.ts.map +1 -0
  29. package/dist/__tests__/unit.test.js +347 -0
  30. package/dist/__tests__/unit.test.js.map +1 -0
  31. package/dist/browser.d.ts +16 -0
  32. package/dist/browser.d.ts.map +1 -0
  33. package/dist/browser.js +19 -0
  34. package/dist/browser.js.map +1 -0
  35. package/dist/client.d.ts +140 -526
  36. package/dist/client.d.ts.map +1 -1
  37. package/dist/client.js +416 -1509
  38. package/dist/client.js.map +1 -1
  39. package/dist/consensus-helpers.d.ts +103 -0
  40. package/dist/consensus-helpers.d.ts.map +1 -0
  41. package/dist/consensus-helpers.js +147 -0
  42. package/dist/consensus-helpers.js.map +1 -0
  43. package/dist/consensus.d.ts +72 -0
  44. package/dist/consensus.d.ts.map +1 -0
  45. package/dist/consensus.js +378 -0
  46. package/dist/consensus.js.map +1 -0
  47. package/dist/examples/demo.d.ts +2 -0
  48. package/dist/examples/demo.d.ts.map +1 -0
  49. package/dist/examples/demo.js +63 -0
  50. package/dist/examples/demo.js.map +1 -0
  51. package/dist/examples/example.d.ts +2 -0
  52. package/dist/examples/example.d.ts.map +1 -0
  53. package/dist/examples/example.js +80 -0
  54. package/dist/examples/example.js.map +1 -0
  55. package/dist/examples/quickstart.d.ts +2 -0
  56. package/dist/examples/quickstart.d.ts.map +1 -0
  57. package/dist/examples/quickstart.js +56 -0
  58. package/dist/examples/quickstart.js.map +1 -0
  59. package/dist/examples/ralph-loop.d.ts +2 -0
  60. package/dist/examples/ralph-loop.d.ts.map +1 -0
  61. package/dist/examples/ralph-loop.js +281 -0
  62. package/dist/examples/ralph-loop.js.map +1 -0
  63. package/dist/examples/workflow-superiority.d.ts +32 -0
  64. package/dist/examples/workflow-superiority.d.ts.map +1 -0
  65. package/dist/examples/workflow-superiority.js +1421 -0
  66. package/dist/examples/workflow-superiority.js.map +1 -0
  67. package/dist/index.d.ts +13 -20
  68. package/dist/index.d.ts.map +1 -1
  69. package/dist/index.js +12 -26
  70. package/dist/index.js.map +1 -1
  71. package/dist/logs.d.ts +70 -25
  72. package/dist/logs.d.ts.map +1 -1
  73. package/dist/logs.js +238 -42
  74. package/dist/logs.js.map +1 -1
  75. package/dist/models.d.ts +9 -0
  76. package/dist/models.d.ts.map +1 -0
  77. package/dist/models.js +17 -0
  78. package/dist/models.js.map +1 -0
  79. package/dist/protocol.d.ts +366 -0
  80. package/dist/protocol.d.ts.map +1 -0
  81. package/dist/protocol.js +2 -0
  82. package/dist/protocol.js.map +1 -0
  83. package/dist/pty.d.ts +8 -0
  84. package/dist/pty.d.ts.map +1 -0
  85. package/dist/pty.js +26 -0
  86. package/dist/pty.js.map +1 -0
  87. package/dist/relay-adapter.d.ts +139 -0
  88. package/dist/relay-adapter.d.ts.map +1 -0
  89. package/dist/relay-adapter.js +210 -0
  90. package/dist/relay-adapter.js.map +1 -0
  91. package/dist/relay.d.ts +304 -0
  92. package/dist/relay.d.ts.map +1 -0
  93. package/dist/relay.js +910 -0
  94. package/dist/relay.js.map +1 -0
  95. package/dist/shadow.d.ts +101 -0
  96. package/dist/shadow.d.ts.map +1 -0
  97. package/dist/shadow.js +174 -0
  98. package/dist/shadow.js.map +1 -0
  99. package/dist/spawn-from-env.d.ts +77 -0
  100. package/dist/spawn-from-env.d.ts.map +1 -0
  101. package/dist/spawn-from-env.js +172 -0
  102. package/dist/spawn-from-env.js.map +1 -0
  103. package/dist/workflows/barrier.d.ts +72 -0
  104. package/dist/workflows/barrier.d.ts.map +1 -0
  105. package/dist/workflows/barrier.js +162 -0
  106. package/dist/workflows/barrier.js.map +1 -0
  107. package/dist/workflows/builder.d.ts +114 -0
  108. package/dist/workflows/builder.d.ts.map +1 -0
  109. package/dist/workflows/builder.js +201 -0
  110. package/dist/workflows/builder.js.map +1 -0
  111. package/dist/workflows/cli.d.ts +11 -0
  112. package/dist/workflows/cli.d.ts.map +1 -0
  113. package/dist/workflows/cli.js +144 -0
  114. package/dist/workflows/cli.js.map +1 -0
  115. package/dist/workflows/coordinator.d.ts +73 -0
  116. package/dist/workflows/coordinator.d.ts.map +1 -0
  117. package/dist/workflows/coordinator.js +647 -0
  118. package/dist/workflows/coordinator.js.map +1 -0
  119. package/dist/workflows/custom-steps.d.ts +73 -0
  120. package/dist/workflows/custom-steps.d.ts.map +1 -0
  121. package/dist/workflows/custom-steps.js +321 -0
  122. package/dist/workflows/custom-steps.js.map +1 -0
  123. package/dist/workflows/dry-run-format.d.ts +6 -0
  124. package/dist/workflows/dry-run-format.d.ts.map +1 -0
  125. package/dist/workflows/dry-run-format.js +68 -0
  126. package/dist/workflows/dry-run-format.js.map +1 -0
  127. package/dist/workflows/file-db.d.ts +33 -0
  128. package/dist/workflows/file-db.d.ts.map +1 -0
  129. package/dist/workflows/file-db.js +108 -0
  130. package/dist/workflows/file-db.js.map +1 -0
  131. package/dist/workflows/index.d.ts +15 -0
  132. package/dist/workflows/index.d.ts.map +1 -0
  133. package/dist/workflows/index.js +15 -0
  134. package/dist/workflows/index.js.map +1 -0
  135. package/dist/workflows/memory-db.d.ts +17 -0
  136. package/dist/workflows/memory-db.d.ts.map +1 -0
  137. package/dist/workflows/memory-db.js +33 -0
  138. package/dist/workflows/memory-db.js.map +1 -0
  139. package/dist/workflows/run.d.ts +38 -0
  140. package/dist/workflows/run.d.ts.map +1 -0
  141. package/dist/workflows/run.js +25 -0
  142. package/dist/workflows/run.js.map +1 -0
  143. package/dist/workflows/runner.d.ts +320 -0
  144. package/dist/workflows/runner.d.ts.map +1 -0
  145. package/dist/workflows/runner.js +2821 -0
  146. package/dist/workflows/runner.js.map +1 -0
  147. package/dist/workflows/state.d.ts +77 -0
  148. package/dist/workflows/state.d.ts.map +1 -0
  149. package/dist/workflows/state.js +140 -0
  150. package/dist/workflows/state.js.map +1 -0
  151. package/dist/workflows/templates.d.ts +47 -0
  152. package/dist/workflows/templates.d.ts.map +1 -0
  153. package/dist/workflows/templates.js +405 -0
  154. package/dist/workflows/templates.js.map +1 -0
  155. package/dist/workflows/trajectory.d.ts +87 -0
  156. package/dist/workflows/trajectory.d.ts.map +1 -0
  157. package/dist/workflows/trajectory.js +441 -0
  158. package/dist/workflows/trajectory.js.map +1 -0
  159. package/dist/workflows/types.d.ts +306 -0
  160. package/dist/workflows/types.d.ts.map +1 -0
  161. package/dist/workflows/types.js +23 -0
  162. package/dist/workflows/types.js.map +1 -0
  163. package/dist/workflows/validator.d.ts +11 -0
  164. package/dist/workflows/validator.d.ts.map +1 -0
  165. package/dist/workflows/validator.js +128 -0
  166. package/dist/workflows/validator.js.map +1 -0
  167. package/package.json +59 -53
  168. package/dist/discovery.d.ts +0 -10
  169. package/dist/discovery.d.ts.map +0 -1
  170. package/dist/discovery.js +0 -22
  171. package/dist/discovery.js.map +0 -1
  172. package/dist/errors.d.ts +0 -9
  173. package/dist/errors.d.ts.map +0 -1
  174. package/dist/errors.js +0 -9
  175. package/dist/errors.js.map +0 -1
  176. package/dist/protocol/index.d.ts +0 -8
  177. package/dist/protocol/index.d.ts.map +0 -1
  178. package/dist/protocol/index.js +0 -8
  179. package/dist/protocol/index.js.map +0 -1
@@ -0,0 +1,2821 @@
1
+ /**
2
+ * WorkflowRunner — parses relay.yaml, validates config, resolves templates,
3
+ * executes steps (sequential/parallel/DAG), runs verification checks,
4
+ * persists state to DB, and supports pause/resume/abort with retries.
5
+ */
6
+ import { spawn as cpSpawn } from 'node:child_process';
7
+ import { randomBytes } from 'node:crypto';
8
+ import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
9
+ import { readFile, writeFile } from 'node:fs/promises';
10
+ import path from 'node:path';
11
+ import { parse as parseYaml } from 'yaml';
12
+ import { stripAnsi as stripAnsiFn } from '../pty.js';
13
+ import { loadCustomSteps, resolveAllCustomSteps, validateCustomStepsUsage, CustomStepsParseError, CustomStepResolutionError, } from './custom-steps.js';
14
+ import { InMemoryWorkflowDb } from './memory-db.js';
15
+ import { WorkflowTrajectory } from './trajectory.js';
16
+ // ── AgentRelay SDK imports ──────────────────────────────────────────────────
17
+ // Import from sub-paths to avoid pulling in the full @relaycast/sdk dependency.
18
+ import { AgentRelay } from '../relay.js';
19
+ import { RelayCast, RelayError } from '@relaycast/sdk';
20
+ // ── WorkflowRunner ──────────────────────────────────────────────────────────
21
+ export class WorkflowRunner {
22
+ db;
23
+ workspaceId;
24
+ relayOptions;
25
+ cwd;
26
+ summaryDir;
27
+ /** @internal exposed for CLI signal-handler shutdown only */
28
+ relay;
29
+ relaycast;
30
+ relaycastAgent;
31
+ relayApiKey;
32
+ relayApiKeyAutoCreated = false;
33
+ channel;
34
+ trajectory;
35
+ abortController;
36
+ paused = false;
37
+ pauseResolver;
38
+ listeners = [];
39
+ /** Current config for the active run, so spawnAndWait can access swarm config. */
40
+ currentConfig;
41
+ /** Current run ID for event emission from spawnAndWait context. */
42
+ currentRunId;
43
+ /** Live Agent handles keyed by name, for hub-mediated nudging. */
44
+ activeAgentHandles = new Map();
45
+ // PTY-based output capture: accumulate terminal output per-agent
46
+ ptyOutputBuffers = new Map();
47
+ ptyListeners = new Map();
48
+ ptyLogStreams = new Map();
49
+ /** Path to workers.json so `agents:kill` can find workflow-spawned agents */
50
+ workersPath;
51
+ /** In-memory tracking of active workers to avoid race conditions on workers.json */
52
+ activeWorkers = new Map();
53
+ /** Mutex for serializing workers.json file access */
54
+ workersFileLock = Promise.resolve();
55
+ /** Timestamp when the current workflow run started, for elapsed-time logging. */
56
+ runStartTime;
57
+ /** Unsubscribe handle for broker stderr listener wired during a run. */
58
+ unsubBrokerStderr;
59
+ /** Tracks last idle log time per agent to debounce idle warnings (30s multiples). */
60
+ lastIdleLog = new Map();
61
+ /** Tracks last logged activity type per agent to avoid duplicate status lines. */
62
+ lastActivity = new Map();
63
+ constructor(options = {}) {
64
+ this.db = options.db ?? new InMemoryWorkflowDb();
65
+ this.workspaceId = options.workspaceId ?? 'local';
66
+ this.relayOptions = options.relay ?? {};
67
+ this.cwd = options.cwd ?? process.cwd();
68
+ this.summaryDir = options.summaryDir ?? path.join(this.cwd, '.relay', 'summaries');
69
+ this.workersPath = path.join(this.cwd, '.agent-relay', 'team', 'workers.json');
70
+ }
71
+ // ── Progress logging ────────────────────────────────────────────────────
72
+ /** Log a progress message with elapsed time since run start. */
73
+ log(msg) {
74
+ const elapsed = this.runStartTime ? Math.round((Date.now() - this.runStartTime) / 1000) : 0;
75
+ const mins = Math.floor(elapsed / 60);
76
+ const secs = elapsed % 60;
77
+ const ts = mins > 0
78
+ ? `${String(mins).padStart(2, '0')}:${String(secs).padStart(2, '0')}`
79
+ : `00:${String(secs).padStart(2, '0')}`;
80
+ console.log(`[workflow ${ts}] ${msg}`);
81
+ }
82
+ // ── Relaycast auto-provisioning ────────────────────────────────────────
83
+ /**
84
+ * Ensure a Relaycast workspace API key is available for the broker.
85
+ * Resolution order:
86
+ * 1. RELAY_API_KEY environment variable (explicit override)
87
+ * 2. Auto-create a fresh workspace via the Relaycast API
88
+ *
89
+ * Each workflow run gets its own isolated workspace — no caching, no sharing.
90
+ */
91
+ async ensureRelaycastApiKey(channel) {
92
+ if (this.relayApiKey)
93
+ return;
94
+ // Explicit override from relayOptions or environment takes priority.
95
+ const envKey = this.relayOptions.env?.RELAY_API_KEY ?? process.env.RELAY_API_KEY;
96
+ if (envKey) {
97
+ this.relayApiKey = envKey;
98
+ return;
99
+ }
100
+ // Always create a fresh workspace — each run gets full isolation.
101
+ const workspaceName = `relay-${channel}-${randomBytes(4).toString('hex')}`;
102
+ const baseUrl = this.relayOptions.env?.RELAYCAST_BASE_URL ??
103
+ process.env.RELAYCAST_BASE_URL ??
104
+ 'https://api.relaycast.dev';
105
+ const res = await fetch(`${baseUrl}/v1/workspaces`, {
106
+ method: 'POST',
107
+ headers: { 'content-type': 'application/json' },
108
+ body: JSON.stringify({ name: workspaceName }),
109
+ });
110
+ if (!res.ok) {
111
+ throw new Error(`Failed to auto-create Relaycast workspace: ${res.status} ${await res.text()}`);
112
+ }
113
+ const body = (await res.json());
114
+ const data = (body.data ?? body);
115
+ const apiKey = data.api_key;
116
+ if (!apiKey) {
117
+ throw new Error('Relaycast workspace response missing api_key');
118
+ }
119
+ this.relayApiKey = apiKey;
120
+ this.relayApiKeyAutoCreated = true;
121
+ }
122
+ getRelayEnv() {
123
+ if (!this.relayApiKey) {
124
+ return this.relayOptions.env;
125
+ }
126
+ return {
127
+ ...(this.relayOptions.env ?? process.env),
128
+ RELAY_API_KEY: this.relayApiKey,
129
+ };
130
+ }
131
+ getRelaycastBaseUrl() {
132
+ return (this.relayOptions.env?.RELAYCAST_BASE_URL ??
133
+ process.env.RELAYCAST_BASE_URL ??
134
+ 'https://api.relaycast.dev');
135
+ }
136
+ getRelaycastClient() {
137
+ if (!this.relayApiKey) {
138
+ throw new Error('No Relaycast API key available');
139
+ }
140
+ if (!this.relaycast) {
141
+ this.relaycast = new RelayCast({
142
+ apiKey: this.relayApiKey,
143
+ baseUrl: this.getRelaycastBaseUrl(),
144
+ });
145
+ }
146
+ return this.relaycast;
147
+ }
148
+ async ensureRelaycastRunnerAgent() {
149
+ if (this.relaycastAgent)
150
+ return this.relaycastAgent;
151
+ const rc = this.getRelaycastClient();
152
+ let registration;
153
+ try {
154
+ registration = await rc.agents.register({ name: 'WorkflowRunner', type: 'agent' });
155
+ }
156
+ catch (err) {
157
+ if (err instanceof RelayError && err.code === 'name_conflict') {
158
+ registration = await rc.agents.register({
159
+ name: `WorkflowRunner-${randomBytes(4).toString('hex')}`,
160
+ type: 'agent',
161
+ });
162
+ }
163
+ else {
164
+ throw err;
165
+ }
166
+ }
167
+ this.relaycastAgent = rc.as(registration.token);
168
+ return this.relaycastAgent;
169
+ }
170
+ async createAndJoinRelaycastChannel(channel, topic) {
171
+ const agent = await this.ensureRelaycastRunnerAgent();
172
+ try {
173
+ await agent.channels.create({ name: channel, ...(topic ? { topic } : {}) });
174
+ }
175
+ catch (err) {
176
+ if (!(err instanceof RelayError && err.code === 'name_conflict')) {
177
+ throw err;
178
+ }
179
+ }
180
+ await agent.channels.join(channel);
181
+ }
182
+ async registerRelaycastExternalAgent(name, persona) {
183
+ const rc = this.getRelaycastClient();
184
+ try {
185
+ const registration = await rc.agents.register({
186
+ name,
187
+ type: 'agent',
188
+ ...(persona ? { persona } : {}),
189
+ });
190
+ return rc.as(registration.token);
191
+ }
192
+ catch (err) {
193
+ if (err instanceof RelayError && err.code === 'name_conflict') {
194
+ return null;
195
+ }
196
+ throw err;
197
+ }
198
+ }
199
+ startRelaycastHeartbeat(agent, intervalMs = 30_000) {
200
+ const beat = () => {
201
+ agent.heartbeat().catch(() => { });
202
+ };
203
+ const timer = setInterval(beat, intervalMs);
204
+ timer.unref();
205
+ beat();
206
+ return () => clearInterval(timer);
207
+ }
208
+ // ── Event subscription ──────────────────────────────────────────────────
209
+ on(listener) {
210
+ this.listeners.push(listener);
211
+ return () => {
212
+ this.listeners = this.listeners.filter((l) => l !== listener);
213
+ };
214
+ }
215
+ emit(event) {
216
+ for (const listener of this.listeners) {
217
+ listener(event);
218
+ }
219
+ }
220
+ // ── Parsing & validation ────────────────────────────────────────────────
221
+ /** Parse a relay.yaml file from disk. */
222
+ async parseYamlFile(filePath) {
223
+ const absPath = path.resolve(this.cwd, filePath);
224
+ const raw = await readFile(absPath, 'utf-8');
225
+ return this.parseYamlString(raw, absPath);
226
+ }
227
+ /** Parse a relay.yaml string. */
228
+ parseYamlString(raw, source = '<string>') {
229
+ const parsed = parseYaml(raw);
230
+ this.validateConfig(parsed, source);
231
+ const config = parsed;
232
+ config.agents ??= [];
233
+ return config;
234
+ }
235
+ /** Validate a config object against the RelayYamlConfig shape. */
236
+ validateConfig(config, source = '<config>') {
237
+ if (typeof config !== 'object' || config === null) {
238
+ throw new Error(`${source}: config must be a non-null object`);
239
+ }
240
+ const c = config;
241
+ if (typeof c.version !== 'string') {
242
+ throw new Error(`${source}: missing required field "version"`);
243
+ }
244
+ if (typeof c.name !== 'string') {
245
+ throw new Error(`${source}: missing required field "name"`);
246
+ }
247
+ if (typeof c.swarm !== 'object' || c.swarm === null) {
248
+ throw new Error(`${source}: missing required field "swarm"`);
249
+ }
250
+ const swarm = c.swarm;
251
+ if (typeof swarm.pattern !== 'string') {
252
+ throw new Error(`${source}: missing required field "swarm.pattern"`);
253
+ }
254
+ if (c.agents !== undefined && !Array.isArray(c.agents)) {
255
+ throw new Error(`${source}: "agents" must be an array when provided`);
256
+ }
257
+ for (const agent of c.agents ?? []) {
258
+ if (typeof agent !== 'object' || agent === null) {
259
+ throw new Error(`${source}: each agent must be an object`);
260
+ }
261
+ const a = agent;
262
+ if (typeof a.name !== 'string') {
263
+ throw new Error(`${source}: each agent must have a string "name"`);
264
+ }
265
+ if (typeof a.cli !== 'string') {
266
+ throw new Error(`${source}: each agent must have a string "cli"`);
267
+ }
268
+ }
269
+ if (c.workflows !== undefined) {
270
+ if (!Array.isArray(c.workflows)) {
271
+ throw new Error(`${source}: "workflows" must be an array`);
272
+ }
273
+ for (const wf of c.workflows) {
274
+ this.validateWorkflow(wf, (c.agents ?? []), source);
275
+ }
276
+ }
277
+ }
278
+ // ── Dry-run simulation ──────────────────────────────────────────────
279
+ /**
280
+ * Validate a workflow config and simulate execution waves without spawning agents.
281
+ * Returns a DryRunReport with DAG analysis, agent summary, and wave breakdown.
282
+ */
283
+ dryRun(config, workflowName, vars) {
284
+ const errors = [];
285
+ const warnings = [];
286
+ // 1. Validate config
287
+ let resolved;
288
+ try {
289
+ this.validateConfig(config);
290
+ resolved = vars ? this.resolveVariables(config, vars) : config;
291
+ }
292
+ catch (err) {
293
+ errors.push(err instanceof Error ? err.message : String(err));
294
+ return {
295
+ valid: false,
296
+ errors,
297
+ warnings,
298
+ name: config?.name ?? '<unknown>',
299
+ pattern: config?.swarm?.pattern ?? '<unknown>',
300
+ agents: [],
301
+ waves: [],
302
+ totalSteps: 0,
303
+ estimatedWaves: 0,
304
+ };
305
+ }
306
+ // 2. Find target workflow
307
+ const workflows = resolved.workflows ?? [];
308
+ const workflow = workflowName ? workflows.find((w) => w.name === workflowName) : workflows[0];
309
+ if (!workflow) {
310
+ errors.push(workflowName ? `Workflow "${workflowName}" not found in config` : 'No workflows defined in config');
311
+ return {
312
+ valid: false,
313
+ errors,
314
+ warnings,
315
+ name: resolved.name,
316
+ description: resolved.description,
317
+ pattern: resolved.swarm.pattern,
318
+ agents: [],
319
+ waves: [],
320
+ totalSteps: 0,
321
+ estimatedWaves: 0,
322
+ };
323
+ }
324
+ // 3. Load and validate custom steps
325
+ let customSteps = new Map();
326
+ try {
327
+ customSteps = loadCustomSteps(this.cwd);
328
+ }
329
+ catch (err) {
330
+ if (err instanceof CustomStepsParseError) {
331
+ errors.push(`Custom steps file error: ${err.issue}\n${err.suggestion}`);
332
+ }
333
+ else {
334
+ errors.push(`Failed to load custom steps: ${err instanceof Error ? err.message : String(err)}`);
335
+ }
336
+ }
337
+ // Validate custom step usage in workflow steps
338
+ const customStepValidation = validateCustomStepsUsage(workflow.steps, customSteps);
339
+ errors.push(...customStepValidation.errors);
340
+ warnings.push(...customStepValidation.warnings);
341
+ // Resolve custom steps for further validation
342
+ let resolvedSteps = workflow.steps;
343
+ if (customStepValidation.valid) {
344
+ try {
345
+ resolvedSteps = resolveAllCustomSteps(workflow.steps, customSteps);
346
+ }
347
+ catch (err) {
348
+ if (err instanceof CustomStepResolutionError) {
349
+ errors.push(`${err.issue}\n${err.suggestion}`);
350
+ }
351
+ else {
352
+ errors.push(`Failed to resolve custom steps: ${err instanceof Error ? err.message : String(err)}`);
353
+ }
354
+ }
355
+ }
356
+ // 4. Build agent map and validate step→agent references
357
+ const agentMap = new Map();
358
+ for (const agent of resolved.agents) {
359
+ agentMap.set(agent.name, agent);
360
+ }
361
+ const stepAgentCounts = new Map();
362
+ for (const step of resolvedSteps) {
363
+ // Only validate agent references for agent-type steps
364
+ if (step.agent) {
365
+ if (!agentMap.has(step.agent)) {
366
+ warnings.push(`Step "${step.name}" references unknown agent "${step.agent}"`);
367
+ }
368
+ stepAgentCounts.set(step.agent, (stepAgentCounts.get(step.agent) ?? 0) + 1);
369
+ }
370
+ }
371
+ // Validate cwd paths
372
+ for (const agent of resolved.agents) {
373
+ if (agent.cwd) {
374
+ const resolvedCwd = path.resolve(this.cwd, agent.cwd);
375
+ if (!existsSync(resolvedCwd)) {
376
+ warnings.push(`Agent "${agent.name}" cwd "${agent.cwd}" resolves to "${resolvedCwd}" which does not exist`);
377
+ }
378
+ }
379
+ if (agent.additionalPaths) {
380
+ for (const ap of agent.additionalPaths) {
381
+ const resolvedPath = path.resolve(this.cwd, ap);
382
+ if (!existsSync(resolvedPath)) {
383
+ warnings.push(`Agent "${agent.name}" additionalPath "${ap}" resolves to "${resolvedPath}" which does not exist`);
384
+ }
385
+ }
386
+ }
387
+ }
388
+ // Cycle detection via topological sort
389
+ const stepNames = new Set(resolvedSteps.map((s) => s.name));
390
+ const inDegree = new Map();
391
+ const adjacency = new Map();
392
+ for (const step of resolvedSteps) {
393
+ inDegree.set(step.name, 0);
394
+ adjacency.set(step.name, []);
395
+ }
396
+ for (const step of resolvedSteps) {
397
+ for (const dep of step.dependsOn ?? []) {
398
+ if (stepNames.has(dep)) {
399
+ adjacency.get(dep).push(step.name);
400
+ inDegree.set(step.name, (inDegree.get(step.name) ?? 0) + 1);
401
+ }
402
+ }
403
+ }
404
+ const topoQueue = [];
405
+ for (const [name, deg] of inDegree) {
406
+ if (deg === 0)
407
+ topoQueue.push(name);
408
+ }
409
+ let visited = 0;
410
+ while (topoQueue.length > 0) {
411
+ const node = topoQueue.shift();
412
+ visited++;
413
+ for (const neighbor of adjacency.get(node) ?? []) {
414
+ const newDeg = (inDegree.get(neighbor) ?? 1) - 1;
415
+ inDegree.set(neighbor, newDeg);
416
+ if (newDeg === 0)
417
+ topoQueue.push(neighbor);
418
+ }
419
+ }
420
+ if (visited < resolvedSteps.length) {
421
+ errors.push('Dependency cycle detected in workflow steps. Check dependsOn references for circular dependencies.');
422
+ }
423
+ // Missing dependency references
424
+ for (const step of resolvedSteps) {
425
+ for (const dep of step.dependsOn ?? []) {
426
+ if (!stepNames.has(dep)) {
427
+ errors.push(`Step "${step.name}" depends on unknown step "${dep}"`);
428
+ }
429
+ }
430
+ }
431
+ // Unreachable steps (steps that are never depended on and aren't root steps)
432
+ const dependedOn = new Set();
433
+ for (const step of resolvedSteps) {
434
+ for (const dep of step.dependsOn ?? []) {
435
+ dependedOn.add(dep);
436
+ }
437
+ }
438
+ // Timeout warnings
439
+ for (const step of resolvedSteps) {
440
+ if (!step.timeoutMs) {
441
+ const agentDef = step.agent ? agentMap.get(step.agent) : undefined;
442
+ if (!agentDef?.constraints?.timeoutMs && !resolved.swarm.timeoutMs) {
443
+ warnings.push(`Step "${step.name}" has no timeout configured (no step, agent, or swarm-level timeout)`);
444
+ }
445
+ }
446
+ }
447
+ // Large dependency fan-in warning (decomposition guidance)
448
+ for (const step of resolvedSteps) {
449
+ if ((step.dependsOn?.length ?? 0) >= 5) {
450
+ warnings.push(`Step "${step.name}" depends on ${step.dependsOn.length} upstream steps. ` +
451
+ `Consider decomposing into smaller verification steps to reduce context size.`);
452
+ }
453
+ }
454
+ // 4. Build agent summary
455
+ const agents = resolved.agents.map((a) => ({
456
+ name: a.name,
457
+ cli: a.cli,
458
+ role: a.role,
459
+ cwd: a.cwd,
460
+ stepCount: stepAgentCounts.get(a.name) ?? 0,
461
+ }));
462
+ // 5. Simulate execution waves
463
+ const waves = [];
464
+ const completed = new Set();
465
+ const allSteps = [...resolvedSteps];
466
+ let waveNum = 0;
467
+ while (completed.size < allSteps.length) {
468
+ const ready = allSteps.filter((step) => {
469
+ if (completed.has(step.name))
470
+ return false;
471
+ const deps = step.dependsOn ?? [];
472
+ return deps.every((dep) => completed.has(dep));
473
+ });
474
+ if (ready.length === 0) {
475
+ // Remaining steps are blocked — likely a cycle or unresolvable deps
476
+ const blocked = allSteps.filter((s) => !completed.has(s.name)).map((s) => s.name);
477
+ errors.push(`Blocked steps with unresolvable dependencies: ${blocked.join(', ')}`);
478
+ break;
479
+ }
480
+ waveNum++;
481
+ waves.push({
482
+ wave: waveNum,
483
+ steps: ready.map((s) => ({
484
+ name: s.name,
485
+ agent: s.agent,
486
+ dependsOn: s.dependsOn ?? [],
487
+ })),
488
+ });
489
+ for (const step of ready) {
490
+ completed.add(step.name);
491
+ }
492
+ }
493
+ // 6. Resource estimation
494
+ const peakConcurrency = Math.max(...waves.map((w) => w.steps.length), 0);
495
+ const totalAgentSteps = resolvedSteps.filter((s) => s.type !== 'deterministic' && s.type !== 'worktree').length;
496
+ // 7. Check maxConcurrency against wave widths
497
+ const maxConcurrency = resolved.swarm.maxConcurrency;
498
+ if (maxConcurrency !== undefined) {
499
+ for (const wave of waves) {
500
+ if (wave.steps.length > maxConcurrency) {
501
+ warnings.push(`Wave ${wave.wave} has ${wave.steps.length} parallel steps but maxConcurrency is ${maxConcurrency}`);
502
+ }
503
+ }
504
+ }
505
+ return {
506
+ valid: errors.length === 0,
507
+ errors,
508
+ warnings,
509
+ name: workflow.name,
510
+ description: workflow.description ?? resolved.description,
511
+ pattern: resolved.swarm.pattern,
512
+ agents,
513
+ waves,
514
+ totalSteps: workflow.steps.length,
515
+ maxConcurrency,
516
+ estimatedWaves: waves.length,
517
+ estimatedPeakConcurrency: peakConcurrency,
518
+ estimatedTotalAgentSteps: totalAgentSteps,
519
+ };
520
+ }
521
+ validateWorkflow(wf, agents, source) {
522
+ if (typeof wf !== 'object' || wf === null) {
523
+ throw new Error(`${source}: each workflow must be an object`);
524
+ }
525
+ const w = wf;
526
+ if (typeof w.name !== 'string') {
527
+ throw new Error(`${source}: each workflow must have a string "name"`);
528
+ }
529
+ if (!Array.isArray(w.steps) || w.steps.length === 0) {
530
+ throw new Error(`${source}: workflow "${w.name}" must have a non-empty "steps" array`);
531
+ }
532
+ for (const step of w.steps) {
533
+ if (typeof step !== 'object' || step === null) {
534
+ throw new Error(`${source}: each step must be an object`);
535
+ }
536
+ const s = step;
537
+ if (typeof s.name !== 'string') {
538
+ throw new Error(`${source}: each step must have a string "name" field`);
539
+ }
540
+ // Deterministic steps require type and command
541
+ if (s.type === 'deterministic') {
542
+ if (typeof s.command !== 'string') {
543
+ throw new Error(`${source}: deterministic step "${s.name}" must have a "command" field`);
544
+ }
545
+ }
546
+ else {
547
+ // Agent steps (type undefined or 'agent') require agent and task
548
+ if (typeof s.agent !== 'string' || typeof s.task !== 'string') {
549
+ throw new Error(`${source}: agent step "${s.name}" must have "agent" and "task" string fields`);
550
+ }
551
+ }
552
+ }
553
+ // Validate DAG: check for unknown dependencies and cycles
554
+ const stepNames = new Set(w.steps.map((s) => s.name));
555
+ for (const step of w.steps) {
556
+ if (step.dependsOn) {
557
+ for (const dep of step.dependsOn) {
558
+ if (!stepNames.has(dep)) {
559
+ throw new Error(`${source}: step "${step.name}" depends on unknown step "${dep}"`);
560
+ }
561
+ }
562
+ }
563
+ }
564
+ this.detectCycles(w.steps, source, w.name);
565
+ this.detectLeadWorkerDeadlock(w.steps, agents, source, w.name);
566
+ // Warn if non-interactive agent task is excessively large before interpolation
567
+ for (const step of w.steps) {
568
+ if (step.type === 'deterministic' || step.type === 'worktree')
569
+ continue;
570
+ const agentDef = agents.find((a) => a.name === step.agent);
571
+ const isNonInteractive = agentDef?.interactive === false || ['worker', 'reviewer', 'analyst'].includes(agentDef?.preset ?? '');
572
+ if (isNonInteractive && (step.task ?? '').length > 10_000) {
573
+ console.warn(`[WorkflowRunner] Warning: non-interactive step "${step.name}" has a very large task (${step.task.length} chars). ` +
574
+ `Consider pre-reading files in a deterministic step and injecting only the relevant excerpt.`);
575
+ }
576
+ }
577
+ }
578
+ detectCycles(steps, source, workflowName) {
579
+ const adj = new Map();
580
+ for (const step of steps) {
581
+ adj.set(step.name, step.dependsOn ?? []);
582
+ }
583
+ const visited = new Set();
584
+ const inStack = new Set();
585
+ const dfs = (node) => {
586
+ if (inStack.has(node)) {
587
+ throw new Error(`${source}: workflow "${workflowName}" contains a dependency cycle involving "${node}"`);
588
+ }
589
+ if (visited.has(node))
590
+ return;
591
+ inStack.add(node);
592
+ for (const dep of adj.get(node) ?? []) {
593
+ dfs(dep);
594
+ }
595
+ inStack.delete(node);
596
+ visited.add(node);
597
+ };
598
+ for (const step of steps) {
599
+ dfs(step.name);
600
+ }
601
+ }
602
+ detectLeadWorkerDeadlock(steps, agents, source, workflowName) {
603
+ // Build a map of step name → steps that depend on it
604
+ const downstreamOf = new Map();
605
+ for (const step of steps) {
606
+ for (const dep of step.dependsOn ?? []) {
607
+ if (!downstreamOf.has(dep))
608
+ downstreamOf.set(dep, []);
609
+ downstreamOf.get(dep).push(step.name);
610
+ }
611
+ }
612
+ for (const step of steps) {
613
+ // Only check interactive agent steps (leads)
614
+ if (step.type === 'deterministic' || step.type === 'worktree')
615
+ continue;
616
+ const agentDef = agents.find((a) => a.name === step.agent);
617
+ // Skip non-interactive agents — they can't wait for channel signals
618
+ if (agentDef?.interactive === false ||
619
+ agentDef?.preset === 'worker' ||
620
+ agentDef?.preset === 'reviewer' ||
621
+ agentDef?.preset === 'analyst')
622
+ continue;
623
+ const downstream = downstreamOf.get(step.name) ?? [];
624
+ if (downstream.length === 0)
625
+ continue;
626
+ // Check if the task mentions downstream step names in a "waiting" context
627
+ const task = step.task ?? '';
628
+ const waitingKeywords = /\b(wait|waiting|monitor|check inbox|check.*channel|DONE|_DONE|signal)\b/i;
629
+ if (!waitingKeywords.test(task))
630
+ continue;
631
+ // Check if any downstream step name appears in the task
632
+ const mentioned = downstream.filter((name) => task.includes(name));
633
+ if (mentioned.length > 0) {
634
+ throw new Error(`${source}: workflow "${workflowName}" likely has a lead\u2194worker deadlock. ` +
635
+ `Step "${step.name}" (interactive lead) mentions downstream step(s) [${mentioned.join(', ')}] in its task ` +
636
+ `and appears to wait for their signals, but those steps can't start until "${step.name}" completes. ` +
637
+ `Fix: make workers depend on a shared upstream step (e.g. "context"), not on the lead step. ` +
638
+ `See tests/workflows/README.md rule #6.`);
639
+ }
640
+ }
641
+ }
642
+ // ── Template variable resolution ────────────────────────────────────────
643
+ /** Resolve {{variable}} placeholders in all task strings. */
644
+ resolveVariables(config, vars) {
645
+ const resolved = structuredClone(config);
646
+ for (const agent of resolved.agents) {
647
+ if (agent.task) {
648
+ agent.task = this.interpolate(agent.task, vars);
649
+ }
650
+ }
651
+ if (resolved.workflows) {
652
+ for (const wf of resolved.workflows) {
653
+ for (const step of wf.steps) {
654
+ // Resolve variables in task (agent steps) and command (deterministic steps)
655
+ if (step.task) {
656
+ step.task = this.interpolate(step.task, vars);
657
+ }
658
+ if (step.command) {
659
+ step.command = this.interpolate(step.command, vars);
660
+ }
661
+ }
662
+ }
663
+ }
664
+ return resolved;
665
+ }
666
+ interpolate(template, vars) {
667
+ return template.replace(/\{\{([\w][\w.\-]*)\}\}/g, (_match, key) => {
668
+ // Skip step-output placeholders — they are resolved at execution time by interpolateStepTask()
669
+ if (key.startsWith('steps.')) {
670
+ return _match;
671
+ }
672
+ // Resolve dot-path variables like steps.plan.output
673
+ const value = this.resolveDotPath(key, vars);
674
+ if (value === undefined) {
675
+ throw new Error(`Unresolved variable: {{${key}}}`);
676
+ }
677
+ return String(value);
678
+ });
679
+ }
680
+ resolveDotPath(key, vars) {
681
+ // Simple key — direct lookup
682
+ if (!key.includes('.')) {
683
+ return vars[key];
684
+ }
685
+ // Dot-path — walk into nested context
686
+ const parts = key.split('.');
687
+ let current = vars;
688
+ for (const part of parts) {
689
+ if (current === null || current === undefined || typeof current !== 'object') {
690
+ return undefined;
691
+ }
692
+ current = current[part];
693
+ }
694
+ if (current === undefined || current === null) {
695
+ return undefined;
696
+ }
697
+ if (typeof current === 'string' || typeof current === 'number' || typeof current === 'boolean') {
698
+ return current;
699
+ }
700
+ return String(current);
701
+ }
702
+ /** Build a nested context from completed step outputs for {{steps.X.output}} resolution. */
703
+ buildStepOutputContext(stepStates, runId) {
704
+ const steps = {};
705
+ for (const [name, state] of stepStates) {
706
+ if (state.row.status === 'completed' && state.row.output !== undefined) {
707
+ steps[name] = { output: state.row.output };
708
+ }
709
+ else if (state.row.status === 'completed' && runId) {
710
+ // Recover from persisted output on disk (e.g., after restart)
711
+ const persisted = this.loadStepOutput(runId, name);
712
+ if (persisted) {
713
+ state.row.output = persisted;
714
+ steps[name] = { output: persisted };
715
+ }
716
+ }
717
+ }
718
+ return { steps };
719
+ }
720
+ /** Interpolate step-output variables, silently skipping unresolved ones (they may be user vars). */
721
+ interpolateStepTask(template, context) {
722
+ return template.replace(/\{\{(steps\.[\w\-]+\.output)\}\}/g, (_match, key) => {
723
+ const value = this.resolveDotPath(key, context);
724
+ if (value === undefined) {
725
+ // Leave unresolved — may not be an error if the template doesn't depend on prior steps
726
+ return _match;
727
+ }
728
+ return String(value);
729
+ });
730
+ }
731
+ // ── Execution ───────────────────────────────────────────────────────────
732
+ /** Execute a named workflow from a validated config. */
733
+ async execute(config, workflowName, vars) {
734
+ const resolved = vars ? this.resolveVariables(config, vars) : config;
735
+ const workflows = resolved.workflows ?? [];
736
+ const workflow = workflowName ? workflows.find((w) => w.name === workflowName) : workflows[0];
737
+ if (!workflow) {
738
+ throw new Error(workflowName ? `Workflow "${workflowName}" not found in config` : 'No workflows defined in config');
739
+ }
740
+ // Load and resolve custom step definitions
741
+ const customSteps = loadCustomSteps(this.cwd);
742
+ const resolvedSteps = resolveAllCustomSteps(workflow.steps, customSteps);
743
+ const resolvedWorkflow = { ...workflow, steps: resolvedSteps };
744
+ const runId = this.generateId();
745
+ const now = new Date().toISOString();
746
+ const run = {
747
+ id: runId,
748
+ workspaceId: this.workspaceId,
749
+ workflowName: resolvedWorkflow.name,
750
+ pattern: resolved.swarm.pattern,
751
+ status: 'pending',
752
+ config: resolved,
753
+ startedAt: now,
754
+ createdAt: now,
755
+ updatedAt: now,
756
+ };
757
+ await this.db.insertRun(run);
758
+ // Build step rows
759
+ const stepStates = new Map();
760
+ for (const step of resolvedWorkflow.steps) {
761
+ // Handle agent, deterministic, and worktree steps
762
+ const isNonAgent = step.type === 'deterministic' || step.type === 'worktree';
763
+ const stepRow = {
764
+ id: this.generateId(),
765
+ runId,
766
+ stepName: step.name,
767
+ agentName: isNonAgent ? null : (step.agent ?? null),
768
+ stepType: isNonAgent ? step.type : 'agent',
769
+ status: 'pending',
770
+ task: step.type === 'deterministic'
771
+ ? (step.command ?? '')
772
+ : step.type === 'worktree'
773
+ ? (step.branch ?? '')
774
+ : (step.task ?? ''),
775
+ dependsOn: step.dependsOn ?? [],
776
+ retryCount: 0,
777
+ createdAt: now,
778
+ updatedAt: now,
779
+ };
780
+ await this.db.insertStep(stepRow);
781
+ stepStates.set(step.name, { row: stepRow });
782
+ }
783
+ return this.runWorkflowCore({
784
+ run,
785
+ workflow: resolvedWorkflow,
786
+ config: resolved,
787
+ stepStates,
788
+ isResume: false,
789
+ });
790
+ }
791
+ /** Resume a previously paused or partially completed run. */
792
+ async resume(runId, vars) {
793
+ const run = await this.db.getRun(runId);
794
+ if (!run) {
795
+ throw new Error(`Run "${runId}" not found`);
796
+ }
797
+ if (run.status !== 'running' && run.status !== 'failed') {
798
+ throw new Error(`Run "${runId}" is in status "${run.status}" and cannot be resumed`);
799
+ }
800
+ const config = vars ? this.resolveVariables(run.config, vars) : run.config;
801
+ const workflows = config.workflows ?? [];
802
+ const workflow = workflows.find((w) => w.name === run.workflowName);
803
+ if (!workflow) {
804
+ throw new Error(`Workflow "${run.workflowName}" not found in stored config`);
805
+ }
806
+ const existingSteps = await this.db.getStepsByRunId(runId);
807
+ const stepStates = new Map();
808
+ for (const stepRow of existingSteps) {
809
+ stepStates.set(stepRow.stepName, { row: stepRow });
810
+ }
811
+ // Reset failed steps to pending for retry
812
+ for (const [, state] of stepStates) {
813
+ if (state.row.status === 'failed') {
814
+ state.row.status = 'pending';
815
+ state.row.error = undefined;
816
+ await this.db.updateStep(state.row.id, {
817
+ status: 'pending',
818
+ error: undefined,
819
+ updatedAt: new Date().toISOString(),
820
+ });
821
+ }
822
+ }
823
+ return this.runWorkflowCore({
824
+ run,
825
+ workflow,
826
+ config,
827
+ stepStates,
828
+ isResume: true,
829
+ });
830
+ }
831
+ async runWorkflowCore(input) {
832
+ const { run, workflow, config, stepStates, isResume } = input;
833
+ const runId = run.id;
834
+ // Start execution
835
+ this.abortController = new AbortController();
836
+ this.paused = false;
837
+ this.currentConfig = config;
838
+ this.currentRunId = runId;
839
+ this.runStartTime = Date.now();
840
+ this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
841
+ // Initialize trajectory recording
842
+ this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
843
+ try {
844
+ await this.updateRunStatus(runId, 'running');
845
+ if (!isResume) {
846
+ this.emit({ type: 'run:started', runId });
847
+ }
848
+ const pendingCount = [...stepStates.values()].filter((s) => s.row.status === 'pending').length;
849
+ if (isResume) {
850
+ await this.trajectory.start(workflow.name, workflow.steps.length, `Resumed run: ${pendingCount} pending steps of ${workflow.steps.length} total`, config.description, config.swarm.pattern);
851
+ }
852
+ else {
853
+ // Analyze DAG for trajectory context on first run
854
+ const dagInfo = this.analyzeDAG(workflow.steps);
855
+ await this.trajectory.start(workflow.name, workflow.steps.length, dagInfo, config.description, config.swarm.pattern);
856
+ }
857
+ const channel = config.swarm.channel ??
858
+ `wf-${this.sanitizeChannelName(config.name || run.workflowName)}-${this.generateShortId()}`;
859
+ this.channel = channel;
860
+ if (!config.swarm.channel) {
861
+ config.swarm.channel = channel;
862
+ await this.db.updateRun(runId, { config });
863
+ }
864
+ this.log('Resolving Relaycast API key...');
865
+ await this.ensureRelaycastApiKey(channel);
866
+ this.log('API key resolved');
867
+ if (this.relayApiKeyAutoCreated && this.relayApiKey) {
868
+ this.log(`Workspace created — follow this run in Relaycast:`);
869
+ this.log(` RELAY_API_KEY=${this.relayApiKey}`);
870
+ this.log(` Observer: https://observer.relaycast.dev (paste key above)`);
871
+ this.log(` Channel: ${channel}`);
872
+ }
873
+ this.log('Starting broker...');
874
+ // Include a short run ID suffix in the broker name so each workflow execution
875
+ // registers a unique identity in Relaycast. Without this, re-running in the same
876
+ // workspace hits a 409 conflict because the previous run's agent is still registered.
877
+ const brokerBaseName = path.basename(this.cwd) || 'workflow';
878
+ const brokerName = `${brokerBaseName}-${runId.slice(0, 8)}`;
879
+ this.relay = new AgentRelay({
880
+ ...this.relayOptions,
881
+ brokerName,
882
+ channels: [channel],
883
+ env: this.getRelayEnv(),
884
+ // Workflows spawn agents across multiple waves; each spawn requires a PTY +
885
+ // Relaycast registration. 60s is too tight when the broker is saturated with
886
+ // long-running PTY processes from earlier steps. 120s gives room to breathe.
887
+ requestTimeoutMs: this.relayOptions.requestTimeoutMs ?? 120_000,
888
+ });
889
+ // Wire PTY output dispatcher — routes chunks to per-agent listeners + activity logging
890
+ this.relay.onWorkerOutput = ({ name, chunk }) => {
891
+ const listener = this.ptyListeners.get(name);
892
+ if (listener)
893
+ listener(chunk);
894
+ // Parse PTY output for high-signal activity
895
+ const stripped = WorkflowRunner.stripAnsi(chunk);
896
+ const shortName = name.replace(/-[a-f0-9]{6,}$/, '');
897
+ let activity;
898
+ if (/Read\(/.test(stripped)) {
899
+ // Extract filename — path may be truncated at chunk boundary so require
900
+ // at least a dir separator or 8+ chars to trust the basename.
901
+ const m = stripped.match(/Read\(\s*~?([^\s)"']{8,})/);
902
+ if (m) {
903
+ const base = path.basename(m[1]);
904
+ activity = base.length >= 3 ? `Reading ${base}` : 'Reading file...';
905
+ }
906
+ else {
907
+ activity = 'Reading file...';
908
+ }
909
+ }
910
+ else if (/Edit\(/.test(stripped)) {
911
+ const m = stripped.match(/Edit\(\s*~?([^\s)"']{8,})/);
912
+ if (m) {
913
+ const base = path.basename(m[1]);
914
+ activity = base.length >= 3 ? `Editing ${base}` : 'Editing file...';
915
+ }
916
+ else {
917
+ activity = 'Editing file...';
918
+ }
919
+ }
920
+ else if (/Bash\(/.test(stripped)) {
921
+ // Extract a short preview of the command
922
+ const m = stripped.match(/Bash\(\s*(.{1,40})/);
923
+ activity = m ? `Running: ${m[1].trim()}...` : 'Running command...';
924
+ }
925
+ else if (/Explore\(/.test(stripped)) {
926
+ const m = stripped.match(/Explore\(\s*(.{1,50})/);
927
+ activity = m ? `Exploring: ${m[1].replace(/\).*/, '').trim()}` : 'Exploring codebase...';
928
+ }
929
+ else if (/Task\(/.test(stripped)) {
930
+ activity = 'Running sub-agent...';
931
+ }
932
+ else if (/Sublimating|Thinking|Coalescing|Cultivating/.test(stripped)) {
933
+ const m = stripped.match(/(\d+)s/);
934
+ activity = m ? `Thinking... (${m[1]}s)` : 'Thinking...';
935
+ }
936
+ if (activity && this.lastActivity.get(name) !== activity) {
937
+ this.lastActivity.set(name, activity);
938
+ this.log(`[${shortName}] ${activity}`);
939
+ }
940
+ };
941
+ // Wire relay event hooks for rich console logging
942
+ this.relay.onMessageReceived = (msg) => {
943
+ const body = msg.text.length > 120 ? msg.text.slice(0, 117) + '...' : msg.text;
944
+ const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
945
+ const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
946
+ this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
947
+ };
948
+ this.relay.onAgentSpawned = (agent) => {
949
+ // Skip agents already managed by step execution
950
+ if (!this.activeAgentHandles.has(agent.name)) {
951
+ this.log(`[spawned] ${agent.name} (${agent.runtime})`);
952
+ }
953
+ };
954
+ this.relay.onAgentExited = (agent) => {
955
+ this.lastActivity.delete(agent.name);
956
+ this.lastIdleLog.delete(agent.name);
957
+ if (!this.activeAgentHandles.has(agent.name)) {
958
+ this.log(`[exited] ${agent.name} (code: ${agent.exitCode ?? '?'})`);
959
+ }
960
+ };
961
+ this.relay.onAgentIdle = ({ name, idleSecs }) => {
962
+ // Only log at 30s multiples to avoid watchdog spam
963
+ const bucket = Math.floor(idleSecs / 30) * 30;
964
+ if (bucket >= 30 && this.lastIdleLog.get(name) !== bucket) {
965
+ this.lastIdleLog.set(name, bucket);
966
+ const shortName = name.replace(/-[a-f0-9]{6,}$/, '');
967
+ this.log(`[idle] ${shortName} silent for ${bucket}s`);
968
+ }
969
+ };
970
+ this.relaycast = undefined;
971
+ this.relaycastAgent = undefined;
972
+ // Wire broker stderr to console for observability
973
+ this.unsubBrokerStderr = this.relay.onBrokerStderr((line) => {
974
+ console.log(`[broker] ${line}`);
975
+ });
976
+ this.log(`Creating channel: ${channel}...`);
977
+ if (isResume) {
978
+ await this.createAndJoinRelaycastChannel(channel);
979
+ }
980
+ else {
981
+ await this.createAndJoinRelaycastChannel(channel, workflow.description);
982
+ }
983
+ this.log('Channel ready');
984
+ if (isResume) {
985
+ this.postToChannel(`Workflow **${workflow.name}** resumed — ${pendingCount} pending steps`);
986
+ }
987
+ else {
988
+ this.postToChannel(`Workflow **${workflow.name}** started — ${workflow.steps.length} steps, pattern: ${config.swarm.pattern}`);
989
+ }
990
+ const agentMap = new Map();
991
+ for (const agent of config.agents) {
992
+ agentMap.set(agent.name, agent);
993
+ }
994
+ // Run preflight checks before any steps (skip on resume)
995
+ if (!isResume && workflow.preflight?.length) {
996
+ await this.runPreflightChecks(workflow.preflight, runId);
997
+ }
998
+ // Pre-register all interactive agent steps with Relaycast before execution.
999
+ // This warms the broker's token cache so spawn_agent calls are instant cache
1000
+ // hits rather than blocking on individual HTTP registrations per spawn.
1001
+ // Agent names use the run ID prefix (deterministic) so we can predict them.
1002
+ if (this.relay && !isResume) {
1003
+ const agentPreflight = workflow.steps
1004
+ .filter((s) => s.type !== 'deterministic' && s.type !== 'worktree' && s.agent)
1005
+ .map((s) => {
1006
+ const agentDef = agentMap.get(s.agent);
1007
+ return agentDef && agentDef.interactive !== false
1008
+ ? { name: `${s.name}-${runId.slice(0, 8)}`, cli: agentDef.cli }
1009
+ : null;
1010
+ })
1011
+ .filter((e) => e !== null);
1012
+ if (agentPreflight.length > 0) {
1013
+ this.log(`Pre-registering ${agentPreflight.length} agents with Relaycast...`);
1014
+ await this.relay.preflightAgents(agentPreflight).catch((err) => {
1015
+ this.log(`[preflight-agents] warning: ${err.message} — continuing without pre-registration`);
1016
+ });
1017
+ this.log('Agent pre-registration complete');
1018
+ }
1019
+ }
1020
+ this.log(`Executing ${workflow.steps.length} steps (pattern: ${config.swarm.pattern})`);
1021
+ await this.executeSteps(workflow, stepStates, agentMap, config.errorHandling, runId);
1022
+ const allCompleted = [...stepStates.values()].every((s) => s.row.status === 'completed' || s.row.status === 'skipped');
1023
+ if (allCompleted) {
1024
+ this.log('Workflow completed successfully');
1025
+ await this.updateRunStatus(runId, 'completed');
1026
+ this.emit({ type: 'run:completed', runId });
1027
+ const outcomes = this.collectOutcomes(stepStates, workflow.steps);
1028
+ const summary = this.trajectory.buildRunSummary(outcomes);
1029
+ const confidence = this.trajectory.computeConfidence(outcomes);
1030
+ await this.trajectory.complete(summary, confidence, {
1031
+ learnings: this.trajectory.extractLearnings(outcomes),
1032
+ challenges: this.trajectory.extractChallenges(outcomes),
1033
+ });
1034
+ this.postCompletionReport(workflow.name, outcomes, summary, confidence);
1035
+ this.logRunSummary(workflow.name, outcomes, runId);
1036
+ }
1037
+ else {
1038
+ const failedStep = [...stepStates.values()].find((s) => s.row.status === 'failed');
1039
+ const errorMsg = failedStep?.row.error ?? 'One or more steps failed';
1040
+ await this.updateRunStatus(runId, 'failed', errorMsg);
1041
+ this.emit({ type: 'run:failed', runId, error: errorMsg });
1042
+ const outcomes = this.collectOutcomes(stepStates, workflow.steps);
1043
+ this.postFailureReport(workflow.name, outcomes, errorMsg);
1044
+ this.logRunSummary(workflow.name, outcomes, runId);
1045
+ await this.trajectory.abandon(errorMsg);
1046
+ }
1047
+ }
1048
+ catch (err) {
1049
+ const errorMsg = err instanceof Error ? err.message : String(err);
1050
+ const status = !isResume && this.abortController?.signal.aborted ? 'cancelled' : 'failed';
1051
+ await this.updateRunStatus(runId, status, errorMsg);
1052
+ if (status === 'cancelled') {
1053
+ this.emit({ type: 'run:cancelled', runId });
1054
+ this.postToChannel(`Workflow **${workflow.name}** cancelled`);
1055
+ await this.trajectory.abandon('Cancelled by user');
1056
+ }
1057
+ else {
1058
+ this.emit({ type: 'run:failed', runId, error: errorMsg });
1059
+ this.postToChannel(`Workflow failed: ${errorMsg}`);
1060
+ await this.trajectory.abandon(errorMsg);
1061
+ }
1062
+ }
1063
+ finally {
1064
+ for (const stream of this.ptyLogStreams.values())
1065
+ stream.end();
1066
+ this.ptyLogStreams.clear();
1067
+ this.ptyOutputBuffers.clear();
1068
+ this.ptyListeners.clear();
1069
+ this.unsubBrokerStderr?.();
1070
+ this.unsubBrokerStderr = undefined;
1071
+ // Null out relay event hooks to prevent leaks
1072
+ if (this.relay) {
1073
+ this.relay.onMessageReceived = null;
1074
+ this.relay.onAgentSpawned = null;
1075
+ this.relay.onAgentExited = null;
1076
+ this.relay.onAgentIdle = null;
1077
+ this.relay.onWorkerOutput = null;
1078
+ }
1079
+ this.lastIdleLog.clear();
1080
+ this.lastActivity.clear();
1081
+ this.log('Shutting down broker...');
1082
+ await this.relay?.shutdown();
1083
+ this.relay = undefined;
1084
+ this.runStartTime = undefined;
1085
+ this.relaycast = undefined;
1086
+ this.relaycastAgent = undefined;
1087
+ this.channel = undefined;
1088
+ this.trajectory = undefined;
1089
+ this.abortController = undefined;
1090
+ this.currentConfig = undefined;
1091
+ this.currentRunId = undefined;
1092
+ this.activeAgentHandles.clear();
1093
+ }
1094
+ const finalRun = await this.db.getRun(runId);
1095
+ return finalRun ?? run;
1096
+ }
1097
+ /** Pause execution. Currently-running steps will finish but no new steps start. */
1098
+ pause() {
1099
+ this.paused = true;
1100
+ }
1101
+ /** Resume after a pause(). */
1102
+ unpause() {
1103
+ this.paused = false;
1104
+ this.pauseResolver?.();
1105
+ this.pauseResolver = undefined;
1106
+ }
1107
+ /** Abort the current run. Running agents are released. */
1108
+ abort() {
1109
+ // Unblock waitIfPaused() so the run loop can exit
1110
+ this.pauseResolver?.();
1111
+ this.pauseResolver = undefined;
1112
+ this.abortController?.abort();
1113
+ }
1114
+ // ── Step execution engine ─────────────────────────────────────────────
1115
+ async executeSteps(workflow, stepStates, agentMap, errorHandling, runId) {
1116
+ const rawStrategy = errorHandling?.strategy ?? workflow.onError ?? 'fail-fast';
1117
+ // Map shorthand onError values to canonical strategy names.
1118
+ // 'retry' maps to 'fail-fast' so downstream steps are properly skipped after retries exhaust.
1119
+ const strategy = rawStrategy === 'fail'
1120
+ ? 'fail-fast'
1121
+ : rawStrategy === 'skip'
1122
+ ? 'continue'
1123
+ : rawStrategy === 'retry'
1124
+ ? 'fail-fast'
1125
+ : rawStrategy;
1126
+ // DAG-based execution: repeatedly find ready steps and run them in parallel
1127
+ while (true) {
1128
+ this.checkAborted();
1129
+ await this.waitIfPaused();
1130
+ const readySteps = this.findReadySteps(workflow.steps, stepStates);
1131
+ if (readySteps.length === 0) {
1132
+ // No steps ready — either all done or blocked
1133
+ break;
1134
+ }
1135
+ // Begin a track chapter if multiple parallel steps are starting
1136
+ if (readySteps.length > 1 && this.trajectory) {
1137
+ const trackNames = readySteps.map((s) => s.name).join(', ');
1138
+ await this.trajectory.beginTrack(trackNames);
1139
+ }
1140
+ // Stagger spawns when many steps are ready simultaneously.
1141
+ // All agents still run concurrently once spawned — this only delays when
1142
+ // each step's executeStep() begins, preventing Relaycast from receiving
1143
+ // N simultaneous registration requests which causes spawn timeouts.
1144
+ const STAGGER_THRESHOLD = 3;
1145
+ const STAGGER_DELAY_MS = 2_000;
1146
+ const results = await Promise.allSettled(readySteps.map((step, i) => {
1147
+ const delay = readySteps.length > STAGGER_THRESHOLD ? i * STAGGER_DELAY_MS : 0;
1148
+ if (delay === 0) {
1149
+ return this.executeStep(step, stepStates, agentMap, errorHandling, runId);
1150
+ }
1151
+ return new Promise((resolve) => setTimeout(resolve, delay)).then(() => this.executeStep(step, stepStates, agentMap, errorHandling, runId));
1152
+ }));
1153
+ // Collect outcomes from this batch for convergence reflection
1154
+ const batchOutcomes = [];
1155
+ for (let i = 0; i < results.length; i++) {
1156
+ const result = results[i];
1157
+ const step = readySteps[i];
1158
+ const state = stepStates.get(step.name);
1159
+ if (result.status === 'rejected') {
1160
+ const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
1161
+ if (state && state.row.status !== 'failed') {
1162
+ await this.markStepFailed(state, error, runId);
1163
+ }
1164
+ batchOutcomes.push({
1165
+ name: step.name,
1166
+ agent: step.agent ?? 'deterministic',
1167
+ status: 'failed',
1168
+ attempts: (state?.row.retryCount ?? 0) + 1,
1169
+ error,
1170
+ });
1171
+ if (strategy === 'fail-fast') {
1172
+ // Mark all pending downstream steps as skipped
1173
+ await this.markDownstreamSkipped(step.name, workflow.steps, stepStates, runId);
1174
+ throw new Error(`Step "${step.name}" failed: ${error}`);
1175
+ }
1176
+ if (strategy === 'continue') {
1177
+ await this.markDownstreamSkipped(step.name, workflow.steps, stepStates, runId);
1178
+ }
1179
+ }
1180
+ else {
1181
+ batchOutcomes.push({
1182
+ name: step.name,
1183
+ agent: step.agent ?? 'deterministic',
1184
+ status: state?.row.status === 'completed' ? 'completed' : 'failed',
1185
+ attempts: (state?.row.retryCount ?? 0) + 1,
1186
+ output: state?.row.output,
1187
+ verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
1188
+ });
1189
+ }
1190
+ }
1191
+ // Reflect at convergence when a parallel batch completes
1192
+ if (readySteps.length > 1 && this.trajectory?.shouldReflectOnConverge()) {
1193
+ const label = readySteps.map((s) => s.name).join(' + ');
1194
+ // Find steps that this batch unblocks
1195
+ const completedNames = new Set(batchOutcomes.filter((o) => o.status === 'completed').map((o) => o.name));
1196
+ const unblocked = workflow.steps
1197
+ .filter((s) => s.dependsOn?.some((dep) => completedNames.has(dep)))
1198
+ .filter((s) => {
1199
+ const st = stepStates.get(s.name);
1200
+ return st && st.row.status === 'pending';
1201
+ })
1202
+ .map((s) => s.name);
1203
+ await this.trajectory.synthesizeAndReflect(label, batchOutcomes, unblocked.length > 0 ? unblocked : undefined);
1204
+ }
1205
+ }
1206
+ }
1207
+ findReadySteps(steps, stepStates) {
1208
+ return steps.filter((step) => {
1209
+ const state = stepStates.get(step.name);
1210
+ if (!state || state.row.status !== 'pending')
1211
+ return false;
1212
+ const deps = step.dependsOn ?? [];
1213
+ return deps.every((dep) => {
1214
+ const depState = stepStates.get(dep);
1215
+ return depState && (depState.row.status === 'completed' || depState.row.status === 'skipped');
1216
+ });
1217
+ });
1218
+ }
1219
+ /**
1220
+ * Execute preflight checks before any workflow steps.
1221
+ * All checks must pass or the workflow fails immediately.
1222
+ */
1223
+ async runPreflightChecks(checks, runId) {
1224
+ this.postToChannel(`Running ${checks.length} preflight check(s)...`);
1225
+ for (const check of checks) {
1226
+ this.checkAborted();
1227
+ const description = check.description ?? check.command.slice(0, 50);
1228
+ this.postToChannel(`**[preflight]** ${description}`);
1229
+ try {
1230
+ const output = await new Promise((resolve, reject) => {
1231
+ const child = cpSpawn('sh', ['-c', check.command], {
1232
+ stdio: 'pipe',
1233
+ cwd: this.cwd,
1234
+ env: { ...process.env },
1235
+ });
1236
+ const stdoutChunks = [];
1237
+ const stderrChunks = [];
1238
+ // Wire abort signal
1239
+ const abortSignal = this.abortController?.signal;
1240
+ let abortHandler;
1241
+ if (abortSignal && !abortSignal.aborted) {
1242
+ abortHandler = () => {
1243
+ child.kill('SIGTERM');
1244
+ };
1245
+ abortSignal.addEventListener('abort', abortHandler, { once: true });
1246
+ }
1247
+ // 30s timeout for preflight checks
1248
+ const timer = setTimeout(() => {
1249
+ child.kill('SIGTERM');
1250
+ reject(new Error(`Preflight check timed out: ${description}`));
1251
+ }, 30_000);
1252
+ child.stdout?.on('data', (chunk) => {
1253
+ stdoutChunks.push(chunk.toString());
1254
+ });
1255
+ child.stderr?.on('data', (chunk) => {
1256
+ stderrChunks.push(chunk.toString());
1257
+ });
1258
+ child.on('close', (code) => {
1259
+ clearTimeout(timer);
1260
+ if (abortHandler && abortSignal) {
1261
+ abortSignal.removeEventListener('abort', abortHandler);
1262
+ }
1263
+ if (abortSignal?.aborted) {
1264
+ reject(new Error('Preflight check aborted'));
1265
+ return;
1266
+ }
1267
+ // Non-zero exit code is a failure
1268
+ if (code !== 0 && code !== null) {
1269
+ const stderr = stderrChunks.join('');
1270
+ reject(new Error(`Preflight check failed (exit ${code})${stderr ? `: ${stderr.slice(0, 200)}` : ''}`));
1271
+ return;
1272
+ }
1273
+ resolve(stdoutChunks.join(''));
1274
+ });
1275
+ child.on('error', (err) => {
1276
+ clearTimeout(timer);
1277
+ if (abortHandler && abortSignal) {
1278
+ abortSignal.removeEventListener('abort', abortHandler);
1279
+ }
1280
+ reject(new Error(`Preflight check error: ${err.message}`));
1281
+ });
1282
+ });
1283
+ // Check failIf condition
1284
+ if (check.failIf) {
1285
+ const trimmedOutput = output.trim();
1286
+ if (check.failIf === 'non-empty' && trimmedOutput.length > 0) {
1287
+ throw new Error(`Preflight failed: output is non-empty\n${trimmedOutput.slice(0, 200)}`);
1288
+ }
1289
+ if (check.failIf === 'empty' && trimmedOutput.length === 0) {
1290
+ throw new Error('Preflight failed: output is empty');
1291
+ }
1292
+ // Treat as regex pattern
1293
+ if (check.failIf !== 'non-empty' && check.failIf !== 'empty') {
1294
+ const regex = new RegExp(check.failIf);
1295
+ if (regex.test(output)) {
1296
+ throw new Error(`Preflight failed: output matches pattern "${check.failIf}"`);
1297
+ }
1298
+ }
1299
+ }
1300
+ // Check successIf condition
1301
+ if (check.successIf) {
1302
+ const regex = new RegExp(check.successIf);
1303
+ if (!regex.test(output)) {
1304
+ throw new Error(`Preflight failed: output does not match required pattern "${check.successIf}"`);
1305
+ }
1306
+ }
1307
+ this.postToChannel(`**[preflight]** ${description} — passed`);
1308
+ }
1309
+ catch (err) {
1310
+ const errorMsg = err instanceof Error ? err.message : String(err);
1311
+ this.postToChannel(`**[preflight]** ${description} — FAILED: ${errorMsg}`);
1312
+ throw new Error(`Preflight check failed: ${errorMsg}`);
1313
+ }
1314
+ }
1315
+ this.postToChannel('All preflight checks passed');
1316
+ }
1317
+ /** Check if a step is deterministic (shell command) vs agent (LLM-powered). */
1318
+ isDeterministicStep(step) {
1319
+ return step.type === 'deterministic';
1320
+ }
1321
+ /** Check if a step is a worktree (git worktree setup) step. */
1322
+ isWorktreeStep(step) {
1323
+ return step.type === 'worktree';
1324
+ }
1325
+ async executeStep(step, stepStates, agentMap, errorHandling, runId) {
1326
+ // Branch: deterministic steps execute shell commands
1327
+ if (this.isDeterministicStep(step)) {
1328
+ return this.executeDeterministicStep(step, stepStates, runId);
1329
+ }
1330
+ // Branch: worktree steps set up git worktrees
1331
+ if (this.isWorktreeStep(step)) {
1332
+ return this.executeWorktreeStep(step, stepStates, runId);
1333
+ }
1334
+ // Agent step execution
1335
+ return this.executeAgentStep(step, stepStates, agentMap, errorHandling, runId);
1336
+ }
1337
+ /**
1338
+ * Execute a deterministic step (shell command).
1339
+ * Fast, reliable, $0 LLM cost.
1340
+ */
1341
+ async executeDeterministicStep(step, stepStates, runId) {
1342
+ const state = stepStates.get(step.name);
1343
+ if (!state)
1344
+ throw new Error(`Step state not found: ${step.name}`);
1345
+ this.checkAborted();
1346
+ // Mark step as running
1347
+ state.row.status = 'running';
1348
+ state.row.startedAt = new Date().toISOString();
1349
+ await this.db.updateStep(state.row.id, {
1350
+ status: 'running',
1351
+ startedAt: state.row.startedAt,
1352
+ updatedAt: new Date().toISOString(),
1353
+ });
1354
+ this.emit({ type: 'step:started', runId, stepName: step.name });
1355
+ this.postToChannel(`**[${step.name}]** Started (deterministic)`);
1356
+ // Resolve variables in the command (e.g., {{steps.plan.output}}, {{branch-name}})
1357
+ const stepOutputContext = this.buildStepOutputContext(stepStates, runId);
1358
+ let resolvedCommand = this.interpolateStepTask(step.command ?? '', stepOutputContext);
1359
+ // Also resolve simple {{variable}} placeholders (already resolved in top-level config but safe to re-run)
1360
+ resolvedCommand = resolvedCommand.replace(/\{\{([\w][\w.\-]*)\}\}/g, (_match, key) => {
1361
+ if (key.startsWith('steps.'))
1362
+ return _match; // Already handled above
1363
+ const value = this.resolveDotPath(key, stepOutputContext);
1364
+ return value !== undefined ? String(value) : _match;
1365
+ });
1366
+ try {
1367
+ const output = await new Promise((resolve, reject) => {
1368
+ const child = cpSpawn('sh', ['-c', resolvedCommand], {
1369
+ stdio: 'pipe',
1370
+ cwd: this.cwd,
1371
+ env: { ...process.env },
1372
+ });
1373
+ const stdoutChunks = [];
1374
+ const stderrChunks = [];
1375
+ // Wire abort signal
1376
+ const abortSignal = this.abortController?.signal;
1377
+ let abortHandler;
1378
+ if (abortSignal && !abortSignal.aborted) {
1379
+ abortHandler = () => {
1380
+ child.kill('SIGTERM');
1381
+ setTimeout(() => child.kill('SIGKILL'), 5000);
1382
+ };
1383
+ abortSignal.addEventListener('abort', abortHandler, { once: true });
1384
+ }
1385
+ // Handle timeout
1386
+ let timedOut = false;
1387
+ let timer;
1388
+ if (step.timeoutMs) {
1389
+ timer = setTimeout(() => {
1390
+ timedOut = true;
1391
+ child.kill('SIGTERM');
1392
+ setTimeout(() => child.kill('SIGKILL'), 5000);
1393
+ }, step.timeoutMs);
1394
+ }
1395
+ child.stdout?.on('data', (chunk) => {
1396
+ stdoutChunks.push(chunk.toString());
1397
+ });
1398
+ child.stderr?.on('data', (chunk) => {
1399
+ stderrChunks.push(chunk.toString());
1400
+ });
1401
+ child.on('close', (code) => {
1402
+ if (timer)
1403
+ clearTimeout(timer);
1404
+ if (abortHandler && abortSignal) {
1405
+ abortSignal.removeEventListener('abort', abortHandler);
1406
+ }
1407
+ if (abortSignal?.aborted) {
1408
+ reject(new Error(`Step "${step.name}" aborted`));
1409
+ return;
1410
+ }
1411
+ if (timedOut) {
1412
+ reject(new Error(`Step "${step.name}" timed out (no step timeout set, check global swarm.timeoutMs)`));
1413
+ return;
1414
+ }
1415
+ const stdout = stdoutChunks.join('');
1416
+ const stderr = stderrChunks.join('');
1417
+ // Check exit code unless failOnError is explicitly false
1418
+ const failOnError = step.failOnError !== false;
1419
+ if (failOnError && code !== 0 && code !== null) {
1420
+ reject(new Error(`Command failed with exit code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
1421
+ return;
1422
+ }
1423
+ resolve(step.captureOutput !== false ? stdout : `Command completed (exit code ${code ?? 0})`);
1424
+ });
1425
+ child.on('error', (err) => {
1426
+ if (timer)
1427
+ clearTimeout(timer);
1428
+ if (abortHandler && abortSignal) {
1429
+ abortSignal.removeEventListener('abort', abortHandler);
1430
+ }
1431
+ reject(new Error(`Failed to execute command: ${err.message}`));
1432
+ });
1433
+ });
1434
+ // Mark completed
1435
+ state.row.status = 'completed';
1436
+ state.row.output = output;
1437
+ state.row.completedAt = new Date().toISOString();
1438
+ await this.db.updateStep(state.row.id, {
1439
+ status: 'completed',
1440
+ output,
1441
+ completedAt: state.row.completedAt,
1442
+ updatedAt: new Date().toISOString(),
1443
+ });
1444
+ // Persist step output
1445
+ await this.persistStepOutput(runId, step.name, output);
1446
+ this.emit({ type: 'step:completed', runId, stepName: step.name, output });
1447
+ this.postToChannel(`**[${step.name}]** Completed (deterministic)\n${output.slice(0, 500)}${output.length > 500 ? '\n...(truncated)' : ''}`);
1448
+ }
1449
+ catch (err) {
1450
+ const errorMsg = err instanceof Error ? err.message : String(err);
1451
+ this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
1452
+ await this.markStepFailed(state, errorMsg, runId);
1453
+ throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1454
+ }
1455
+ }
1456
+ /**
1457
+ * Execute a worktree step (git worktree setup).
1458
+ * Fast, reliable, $0 LLM cost.
1459
+ * Outputs the worktree path for downstream steps to use.
1460
+ */
1461
+ async executeWorktreeStep(step, stepStates, runId) {
1462
+ const state = stepStates.get(step.name);
1463
+ if (!state)
1464
+ throw new Error(`Step state not found: ${step.name}`);
1465
+ this.checkAborted();
1466
+ // Mark step as running
1467
+ state.row.status = 'running';
1468
+ state.row.startedAt = new Date().toISOString();
1469
+ await this.db.updateStep(state.row.id, {
1470
+ status: 'running',
1471
+ startedAt: state.row.startedAt,
1472
+ updatedAt: new Date().toISOString(),
1473
+ });
1474
+ this.emit({ type: 'step:started', runId, stepName: step.name });
1475
+ this.postToChannel(`**[${step.name}]** Started (worktree setup)`);
1476
+ // Resolve variables in branch name and path
1477
+ const stepOutputContext = this.buildStepOutputContext(stepStates, runId);
1478
+ const branch = this.interpolateStepTask(step.branch ?? '', stepOutputContext);
1479
+ const baseBranch = step.baseBranch
1480
+ ? this.interpolateStepTask(step.baseBranch, stepOutputContext)
1481
+ : 'HEAD';
1482
+ const worktreePath = step.path
1483
+ ? this.interpolateStepTask(step.path, stepOutputContext)
1484
+ : path.join('.worktrees', step.name);
1485
+ const createBranch = step.createBranch !== false;
1486
+ if (!branch) {
1487
+ const errorMsg = 'Worktree step missing required "branch" field';
1488
+ await this.markStepFailed(state, errorMsg, runId);
1489
+ throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1490
+ }
1491
+ try {
1492
+ // Build the git worktree command
1493
+ // If createBranch is true and branch doesn't exist, use -b flag
1494
+ const absoluteWorktreePath = path.resolve(this.cwd, worktreePath);
1495
+ // First, check if the branch already exists
1496
+ const checkBranchCmd = `git rev-parse --verify --quiet ${branch} 2>/dev/null`;
1497
+ let branchExists = false;
1498
+ await new Promise((resolve) => {
1499
+ const checkChild = cpSpawn('sh', ['-c', checkBranchCmd], {
1500
+ stdio: 'pipe',
1501
+ cwd: this.cwd,
1502
+ env: { ...process.env },
1503
+ });
1504
+ checkChild.on('close', (code) => {
1505
+ branchExists = code === 0;
1506
+ resolve();
1507
+ });
1508
+ checkChild.on('error', () => resolve());
1509
+ });
1510
+ // Build appropriate worktree add command
1511
+ let worktreeCmd;
1512
+ if (branchExists) {
1513
+ // Branch exists, just checkout into worktree
1514
+ worktreeCmd = `git worktree add "${absoluteWorktreePath}" ${branch}`;
1515
+ }
1516
+ else if (createBranch) {
1517
+ // Create new branch from baseBranch
1518
+ worktreeCmd = `git worktree add -b ${branch} "${absoluteWorktreePath}" ${baseBranch}`;
1519
+ }
1520
+ else {
1521
+ // Branch doesn't exist and we're not creating it
1522
+ const errorMsg = `Branch "${branch}" does not exist and createBranch is false`;
1523
+ await this.markStepFailed(state, errorMsg, runId);
1524
+ throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1525
+ }
1526
+ const output = await new Promise((resolve, reject) => {
1527
+ const child = cpSpawn('sh', ['-c', worktreeCmd], {
1528
+ stdio: 'pipe',
1529
+ cwd: this.cwd,
1530
+ env: { ...process.env },
1531
+ });
1532
+ const stdoutChunks = [];
1533
+ const stderrChunks = [];
1534
+ // Wire abort signal
1535
+ const abortSignal = this.abortController?.signal;
1536
+ let abortHandler;
1537
+ if (abortSignal && !abortSignal.aborted) {
1538
+ abortHandler = () => {
1539
+ child.kill('SIGTERM');
1540
+ setTimeout(() => child.kill('SIGKILL'), 5000);
1541
+ };
1542
+ abortSignal.addEventListener('abort', abortHandler, { once: true });
1543
+ }
1544
+ // Handle timeout
1545
+ let timedOut = false;
1546
+ let timer;
1547
+ if (step.timeoutMs) {
1548
+ timer = setTimeout(() => {
1549
+ timedOut = true;
1550
+ child.kill('SIGTERM');
1551
+ setTimeout(() => child.kill('SIGKILL'), 5000);
1552
+ }, step.timeoutMs);
1553
+ }
1554
+ child.stdout?.on('data', (chunk) => {
1555
+ stdoutChunks.push(chunk.toString());
1556
+ });
1557
+ child.stderr?.on('data', (chunk) => {
1558
+ stderrChunks.push(chunk.toString());
1559
+ });
1560
+ child.on('close', (code) => {
1561
+ if (timer)
1562
+ clearTimeout(timer);
1563
+ if (abortHandler && abortSignal) {
1564
+ abortSignal.removeEventListener('abort', abortHandler);
1565
+ }
1566
+ if (abortSignal?.aborted) {
1567
+ reject(new Error(`Step "${step.name}" aborted`));
1568
+ return;
1569
+ }
1570
+ if (timedOut) {
1571
+ reject(new Error(`Step "${step.name}" timed out (no step timeout set, check global swarm.timeoutMs)`));
1572
+ return;
1573
+ }
1574
+ const stderr = stderrChunks.join('');
1575
+ if (code !== 0 && code !== null) {
1576
+ reject(new Error(`git worktree add failed with exit code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
1577
+ return;
1578
+ }
1579
+ // Output the worktree path for downstream steps
1580
+ resolve(absoluteWorktreePath);
1581
+ });
1582
+ child.on('error', (err) => {
1583
+ if (timer)
1584
+ clearTimeout(timer);
1585
+ if (abortHandler && abortSignal) {
1586
+ abortSignal.removeEventListener('abort', abortHandler);
1587
+ }
1588
+ reject(new Error(`Failed to execute git worktree command: ${err.message}`));
1589
+ });
1590
+ });
1591
+ // Mark completed
1592
+ state.row.status = 'completed';
1593
+ state.row.output = output;
1594
+ state.row.completedAt = new Date().toISOString();
1595
+ await this.db.updateStep(state.row.id, {
1596
+ status: 'completed',
1597
+ output,
1598
+ completedAt: state.row.completedAt,
1599
+ updatedAt: new Date().toISOString(),
1600
+ });
1601
+ // Persist step output
1602
+ await this.persistStepOutput(runId, step.name, output);
1603
+ this.emit({ type: 'step:completed', runId, stepName: step.name, output });
1604
+ this.postToChannel(`**[${step.name}]** Worktree created at: ${output}\n Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`);
1605
+ }
1606
+ catch (err) {
1607
+ const errorMsg = err instanceof Error ? err.message : String(err);
1608
+ this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
1609
+ await this.markStepFailed(state, errorMsg, runId);
1610
+ throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
1611
+ }
1612
+ }
1613
+ /**
1614
+ * Execute an agent step (LLM-powered).
1615
+ */
1616
+ async executeAgentStep(step, stepStates, agentMap, errorHandling, runId) {
1617
+ const state = stepStates.get(step.name);
1618
+ if (!state)
1619
+ throw new Error(`Step state not found: ${step.name}`);
1620
+ const agentName = step.agent;
1621
+ if (!agentName) {
1622
+ throw new Error(`Step "${step.name}" is missing required "agent" field`);
1623
+ }
1624
+ const rawAgentDef = agentMap.get(agentName);
1625
+ if (!rawAgentDef) {
1626
+ throw new Error(`Agent "${agentName}" not found in config`);
1627
+ }
1628
+ const agentDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
1629
+ const maxRetries = step.retries ?? agentDef.constraints?.retries ?? errorHandling?.maxRetries ?? 0;
1630
+ const retryDelay = errorHandling?.retryDelayMs ?? 1000;
1631
+ const timeoutMs = step.timeoutMs ?? agentDef.constraints?.timeoutMs ?? this.currentConfig?.swarm?.timeoutMs;
1632
+ let lastError;
1633
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1634
+ this.checkAborted();
1635
+ if (attempt > 0) {
1636
+ this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
1637
+ this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
1638
+ state.row.retryCount = attempt;
1639
+ await this.db.updateStep(state.row.id, {
1640
+ retryCount: attempt,
1641
+ updatedAt: new Date().toISOString(),
1642
+ });
1643
+ await this.trajectory?.stepRetrying(step, attempt, maxRetries);
1644
+ await this.delay(retryDelay);
1645
+ }
1646
+ try {
1647
+ // Mark step as running
1648
+ state.row.status = 'running';
1649
+ state.row.startedAt = new Date().toISOString();
1650
+ await this.db.updateStep(state.row.id, {
1651
+ status: 'running',
1652
+ startedAt: state.row.startedAt,
1653
+ updatedAt: new Date().toISOString(),
1654
+ });
1655
+ this.emit({ type: 'step:started', runId, stepName: step.name });
1656
+ this.postToChannel(`**[${step.name}]** Started (agent: ${agentDef.name})`);
1657
+ await this.trajectory?.stepStarted(step, agentDef.name);
1658
+ // Resolve step-output variables (e.g. {{steps.plan.output}}) at execution time
1659
+ const stepOutputContext = this.buildStepOutputContext(stepStates, runId);
1660
+ let resolvedTask = this.interpolateStepTask(step.task ?? '', stepOutputContext);
1661
+ // If this is an interactive agent, append awareness of non-interactive workers
1662
+ // so the lead knows not to message them and to use step output chaining instead
1663
+ if (agentDef.interactive !== false) {
1664
+ const nonInteractiveInfo = this.buildNonInteractiveAwareness(agentMap, stepStates);
1665
+ if (nonInteractiveInfo) {
1666
+ resolvedTask += nonInteractiveInfo;
1667
+ }
1668
+ }
1669
+ // Spawn agent via AgentRelay
1670
+ this.log(`[${step.name}] Spawning agent "${agentDef.name}" (cli: ${agentDef.cli})`);
1671
+ const resolvedStep = { ...step, task: resolvedTask };
1672
+ const output = await this.spawnAndWait(agentDef, resolvedStep, timeoutMs);
1673
+ this.log(`[${step.name}] Agent "${agentDef.name}" exited`);
1674
+ // Run verification if configured
1675
+ if (step.verification) {
1676
+ this.runVerification(step.verification, output, step.name, resolvedTask);
1677
+ }
1678
+ // Mark completed
1679
+ state.row.status = 'completed';
1680
+ state.row.output = output;
1681
+ state.row.completedAt = new Date().toISOString();
1682
+ await this.db.updateStep(state.row.id, {
1683
+ status: 'completed',
1684
+ output,
1685
+ completedAt: state.row.completedAt,
1686
+ updatedAt: new Date().toISOString(),
1687
+ });
1688
+ // Persist step output to disk so it survives restarts and is inspectable
1689
+ await this.persistStepOutput(runId, step.name, output);
1690
+ this.emit({ type: 'step:completed', runId, stepName: step.name, output });
1691
+ this.postToChannel(`**[${step.name}]** Completed\n${output.slice(0, 500)}${output.length > 500 ? '\n...(truncated)' : ''}`);
1692
+ await this.trajectory?.stepCompleted(step, output, attempt + 1);
1693
+ return;
1694
+ }
1695
+ catch (err) {
1696
+ lastError = err instanceof Error ? err.message : String(err);
1697
+ }
1698
+ }
1699
+ // All retries exhausted — record root-cause diagnosis and mark failed
1700
+ const nonInteractive = agentDef.interactive === false || ['worker', 'reviewer', 'analyst'].includes(agentDef.preset ?? '');
1701
+ const verificationValue = typeof step.verification === 'object' && 'value' in step.verification
1702
+ ? String(step.verification.value)
1703
+ : undefined;
1704
+ await this.trajectory?.stepFailed(step, lastError ?? 'Unknown error', maxRetries + 1, maxRetries, {
1705
+ agent: agentName,
1706
+ nonInteractive,
1707
+ verificationValue,
1708
+ });
1709
+ this.postToChannel(`**[${step.name}]** Failed: ${lastError ?? 'Unknown error'}`);
1710
+ await this.markStepFailed(state, lastError ?? 'Unknown error', runId);
1711
+ throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
1712
+ }
1713
+ /**
1714
+ * Build the CLI command and arguments for a non-interactive agent execution.
1715
+ * Each CLI has a specific flag for one-shot prompt mode.
1716
+ */
1717
+ static buildNonInteractiveCommand(cli, task, extraArgs = []) {
1718
+ switch (cli) {
1719
+ case 'claude':
1720
+ // --dangerously-skip-permissions prevents any tool-use permission prompt
1721
+ // from blocking the process when stdio is piped (no TTY available).
1722
+ return { cmd: 'claude', args: ['-p', '--dangerously-skip-permissions', task, ...extraArgs] };
1723
+ case 'codex':
1724
+ return { cmd: 'codex', args: ['exec', task, ...extraArgs] };
1725
+ case 'gemini':
1726
+ return { cmd: 'gemini', args: ['-p', task, ...extraArgs] };
1727
+ case 'opencode':
1728
+ return { cmd: 'opencode', args: ['--prompt', task, ...extraArgs] };
1729
+ case 'droid':
1730
+ return { cmd: 'droid', args: ['exec', task, ...extraArgs] };
1731
+ case 'aider':
1732
+ return { cmd: 'aider', args: ['--message', task, '--yes-always', '--no-git', ...extraArgs] };
1733
+ case 'goose':
1734
+ return { cmd: 'goose', args: ['run', '--text', task, '--no-session', ...extraArgs] };
1735
+ }
1736
+ }
1737
+ /**
1738
+ * Apply preset defaults to an agent definition.
1739
+ * Explicit fields on the definition always win over preset-inferred defaults.
1740
+ */
1741
+ static resolveAgentDef(def) {
1742
+ if (!def.preset)
1743
+ return def;
1744
+ const nonInteractivePresets = ['worker', 'reviewer', 'analyst'];
1745
+ const defaults = nonInteractivePresets.includes(def.preset)
1746
+ ? { interactive: false }
1747
+ : {};
1748
+ // Explicit fields on the def always win
1749
+ return { ...defaults, ...def };
1750
+ }
1751
+ /**
1752
+ * Returns a preset-specific prefix that is prepended to the non-interactive
1753
+ * enforcement block in execNonInteractive.
1754
+ */
1755
+ /**
1756
+ * Returns a prefix injected into the task prompt for non-interactive agents.
1757
+ * Lead agents are always interactive (PTY), so they never reach execNonInteractive
1758
+ * and there is no 'lead' case here.
1759
+ */
1760
+ buildPresetInjection(preset) {
1761
+ switch (preset) {
1762
+ case 'worker':
1763
+ return ('You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
1764
+ 'Do NOT use relay_spawn, add_agent, or any MCP tool to spawn sub-agents.\n' +
1765
+ 'Do NOT use relay_send or any Relaycast messaging tools — you have no relay connection.\n\n');
1766
+ case 'reviewer':
1767
+ return ('You are a non-interactive reviewer agent. Read the specified files/artifacts and produce a clear verdict.\n' +
1768
+ 'Do NOT spawn sub-agents or use any Relaycast messaging tools.\n\n');
1769
+ case 'analyst':
1770
+ return ('You are a non-interactive analyst agent. Read the specified code/files and write your findings.\n' +
1771
+ 'Do NOT spawn sub-agents or use any Relaycast messaging tools.\n\n');
1772
+ default:
1773
+ return '';
1774
+ }
1775
+ }
1776
+ /**
1777
+ * Execute an agent as a non-interactive subprocess.
1778
+ * No PTY, no relay messaging, no /exit injection. The process receives its task
1779
+ * as a CLI argument and stdout is captured as the step output.
1780
+ */
1781
+ async execNonInteractive(agentDef, step, timeoutMs) {
1782
+ const agentName = `${step.name}-${this.generateShortId()}`;
1783
+ const modelArgs = agentDef.constraints?.model ? ['--model', agentDef.constraints.model] : [];
1784
+ // Append strict deliverable enforcement — non-interactive agents MUST produce
1785
+ // clear, structured output since there's no opportunity for follow-up or clarification.
1786
+ const presetPrefix = this.buildPresetInjection(agentDef.preset);
1787
+ const taskWithDeliverable = presetPrefix +
1788
+ step.task +
1789
+ '\n\n---\n' +
1790
+ 'IMPORTANT: You are running as a non-interactive subprocess. ' +
1791
+ 'Do NOT call relay_spawn, add_agent, or any MCP tool to spawn or manage other agents.\n\n' +
1792
+ 'CRITICAL REQUIREMENT — YOU MUST FOLLOW THIS EXACTLY:\n' +
1793
+ 'You are running in non-interactive mode. There is NO opportunity for follow-up, ' +
1794
+ 'clarification, or additional input. Your stdout output is your ONLY deliverable.\n\n' +
1795
+ 'You MUST:\n' +
1796
+ '1. Complete the ENTIRE task in a single pass — no partial work, no "I\'ll continue later"\n' +
1797
+ '2. Print your COMPLETE deliverable to stdout — this is the ONLY output that will be captured\n' +
1798
+ '3. Be thorough and self-contained — another agent will consume your output with zero context about your process\n' +
1799
+ '4. End with a clear summary of what was accomplished and any artifacts produced\n\n' +
1800
+ 'DO NOT:\n' +
1801
+ '- Ask questions or request clarification (there is no one to answer)\n' +
1802
+ '- Output partial results expecting a follow-up (there will be none)\n' +
1803
+ '- Skip steps or leave work incomplete\n' +
1804
+ '- Output only status messages without the actual deliverable content';
1805
+ const { cmd, args } = WorkflowRunner.buildNonInteractiveCommand(agentDef.cli, taskWithDeliverable, modelArgs);
1806
+ // Open a log file for dashboard observability
1807
+ const logsDir = this.getWorkerLogsDir();
1808
+ const logPath = path.join(logsDir, `${agentName}.log`);
1809
+ const logStream = createWriteStream(logPath, { flags: 'a' });
1810
+ // Register in workers.json with interactive: false metadata
1811
+ this.registerWorker(agentName, agentDef.cli, step.task ?? '', undefined, false);
1812
+ // Register agent in Relaycast for observability
1813
+ let stopHeartbeat;
1814
+ if (this.relayApiKey) {
1815
+ const agentClient = await this.registerRelaycastExternalAgent(agentName, `Non-interactive workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
1816
+ console.warn(`[WorkflowRunner] Failed to register ${agentName} in Relaycast:`, err?.message ?? err);
1817
+ return null;
1818
+ });
1819
+ if (agentClient) {
1820
+ stopHeartbeat = this.startRelaycastHeartbeat(agentClient);
1821
+ }
1822
+ }
1823
+ // Post assignment notification (no task content — task arrives via direct broker injection)
1824
+ this.postToChannel(`**[${step.name}]** Assigned to \`${agentName}\` (non-interactive)`);
1825
+ const stdoutChunks = [];
1826
+ const stderrChunks = [];
1827
+ try {
1828
+ const output = await new Promise((resolve, reject) => {
1829
+ const child = cpSpawn(cmd, args, {
1830
+ stdio: ['ignore', 'pipe', 'pipe'],
1831
+ cwd: agentDef.cwd ? path.resolve(this.cwd, agentDef.cwd) : this.cwd,
1832
+ env: this.getRelayEnv() ?? { ...process.env },
1833
+ });
1834
+ // Update workers.json with PID now that we have it
1835
+ this.registerWorker(agentName, agentDef.cli, step.task ?? '', child.pid, false);
1836
+ // Wire abort signal so runner.abort() kills the child process
1837
+ const abortSignal = this.abortController?.signal;
1838
+ let abortHandler;
1839
+ if (abortSignal && !abortSignal.aborted) {
1840
+ abortHandler = () => {
1841
+ child.kill('SIGTERM');
1842
+ setTimeout(() => child.kill('SIGKILL'), 5000);
1843
+ };
1844
+ abortSignal.addEventListener('abort', abortHandler, { once: true });
1845
+ }
1846
+ // Heartbeat so a slow non-interactive agent doesn't look frozen.
1847
+ // Each tick shows the last substantive line received — gives insight
1848
+ // without flooding the log with raw model output.
1849
+ const startedAt = Date.now();
1850
+ let lastHeartbeatLine = '';
1851
+ const heartbeat = setInterval(() => {
1852
+ const elapsed = Math.round((Date.now() - startedAt) / 1000);
1853
+ const suffix = lastHeartbeatLine ? ` — ${lastHeartbeatLine.slice(0, 80)}` : '';
1854
+ this.log(`[${step.name}] still running (${elapsed}s)${suffix}`);
1855
+ lastHeartbeatLine = '';
1856
+ }, 30_000);
1857
+ child.stdout?.on('data', (chunk) => {
1858
+ const text = chunk.toString();
1859
+ stdoutChunks.push(text);
1860
+ logStream.write(text);
1861
+ // Track last substantive line for the next heartbeat
1862
+ const line = text
1863
+ .split('\n')
1864
+ .map((l) => l.trim())
1865
+ .filter(Boolean)
1866
+ .at(-1) ?? '';
1867
+ if (line)
1868
+ lastHeartbeatLine = line;
1869
+ });
1870
+ child.stderr?.on('data', (chunk) => {
1871
+ const text = chunk.toString();
1872
+ stderrChunks.push(text);
1873
+ logStream.write(`[stderr] ${text}`);
1874
+ });
1875
+ // Handle timeout
1876
+ let timedOut = false;
1877
+ let timer;
1878
+ if (timeoutMs) {
1879
+ timer = setTimeout(() => {
1880
+ timedOut = true;
1881
+ child.kill('SIGTERM');
1882
+ // Give process time to clean up, then force kill
1883
+ setTimeout(() => child.kill('SIGKILL'), 5000);
1884
+ }, timeoutMs);
1885
+ }
1886
+ child.on('close', (code) => {
1887
+ clearInterval(heartbeat);
1888
+ if (timer)
1889
+ clearTimeout(timer);
1890
+ if (abortHandler && abortSignal) {
1891
+ abortSignal.removeEventListener('abort', abortHandler);
1892
+ }
1893
+ const stdout = stdoutChunks.join('');
1894
+ if (abortSignal?.aborted) {
1895
+ reject(new Error(`Step "${step.name}" aborted`));
1896
+ return;
1897
+ }
1898
+ if (timedOut) {
1899
+ reject(new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`));
1900
+ return;
1901
+ }
1902
+ if (code !== 0 && code !== null) {
1903
+ const stderr = stderrChunks.join('');
1904
+ reject(new Error(`Step "${step.name}" exited with code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
1905
+ return;
1906
+ }
1907
+ resolve(stdout);
1908
+ });
1909
+ child.on('error', (err) => {
1910
+ clearInterval(heartbeat);
1911
+ if (timer)
1912
+ clearTimeout(timer);
1913
+ if (abortHandler && abortSignal) {
1914
+ abortSignal.removeEventListener('abort', abortHandler);
1915
+ }
1916
+ reject(new Error(`Failed to spawn ${cmd}: ${err.message}`));
1917
+ });
1918
+ });
1919
+ return output;
1920
+ }
1921
+ finally {
1922
+ stopHeartbeat?.();
1923
+ logStream.end();
1924
+ this.unregisterWorker(agentName);
1925
+ }
1926
+ }
1927
+ async spawnAndWait(agentDef, step, timeoutMs) {
1928
+ // Branch: non-interactive agents run as simple subprocesses
1929
+ if (agentDef.interactive === false) {
1930
+ return this.execNonInteractive(agentDef, step, timeoutMs);
1931
+ }
1932
+ if (!this.relay) {
1933
+ throw new Error('AgentRelay not initialized');
1934
+ }
1935
+ // Deterministic name: step name + first 8 chars of run ID.
1936
+ // This matches the names pre-registered in preflightAgents(), so the broker
1937
+ // hits its token cache instantly instead of making a fresh Relaycast HTTP call.
1938
+ // On retry the broker may suffix a UUID (409 conflict) — that's fine, the agent
1939
+ // still works, just without the cache benefit.
1940
+ let agentName = `${step.name}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
1941
+ // Only inject delegation guidance for lead/coordinator agents, not spokes/workers.
1942
+ // In non-hub patterns (pipeline, dag, etc.) every agent is autonomous so they all get it.
1943
+ const role = agentDef.role?.toLowerCase() ?? '';
1944
+ const nameLC = agentDef.name.toLowerCase();
1945
+ const isHub = WorkflowRunner.HUB_ROLES.has(nameLC) || [...WorkflowRunner.HUB_ROLES].some((r) => role.includes(r));
1946
+ const pattern = this.currentConfig?.swarm.pattern;
1947
+ const isHubPattern = pattern && WorkflowRunner.HUB_PATTERNS.has(pattern);
1948
+ const delegationGuidance = isHub || !isHubPattern ? this.buildDelegationGuidance(agentDef.cli, timeoutMs) : '';
1949
+ // Non-claude CLIs (codex, gemini, etc.) don't auto-register with Relaycast
1950
+ // via the MCP system prompt the way claude does. Inject an explicit preamble
1951
+ // so they call register() before any other relay tool.
1952
+ const relayRegistrationNote = this.buildRelayRegistrationNote(agentDef.cli, agentName);
1953
+ const taskWithExit = step.task +
1954
+ (relayRegistrationNote ? '\n\n' + relayRegistrationNote : '') +
1955
+ (delegationGuidance ? '\n\n' + delegationGuidance + '\n' : '') +
1956
+ '\n\n---\n' +
1957
+ 'IMPORTANT: When you have fully completed this task, you MUST self-terminate by either: ' +
1958
+ '(a) calling remove_agent(name: "<your-agent-name>", reason: "task completed") — preferred, or ' +
1959
+ '(b) outputting the exact text "/exit" on its own line as a fallback. ' +
1960
+ 'Do not wait for further input — terminate immediately after finishing. ' +
1961
+ 'Do NOT spawn sub-agents unless the task explicitly requires it.';
1962
+ // Register PTY output listener before spawning so we capture everything
1963
+ this.ptyOutputBuffers.set(agentName, []);
1964
+ // Open a log file so `agents:logs <name>` works for workflow-spawned agents
1965
+ const logsDir = this.getWorkerLogsDir();
1966
+ const logStream = createWriteStream(path.join(logsDir, `${agentName}.log`), { flags: 'a' });
1967
+ this.ptyLogStreams.set(agentName, logStream);
1968
+ this.ptyListeners.set(agentName, (chunk) => {
1969
+ const stripped = WorkflowRunner.stripAnsi(chunk);
1970
+ this.ptyOutputBuffers.get(agentName)?.push(stripped);
1971
+ // Write raw output (with ANSI codes) to log file so dashboard's
1972
+ // XTermLogViewer can render colors/formatting natively via xterm.js
1973
+ logStream.write(chunk);
1974
+ });
1975
+ const agentChannels = this.channel ? [this.channel] : agentDef.channels;
1976
+ let agent;
1977
+ let exitResult = 'unknown';
1978
+ let stopHeartbeat;
1979
+ let ptyChunks = [];
1980
+ try {
1981
+ agent = await this.relay.spawnPty({
1982
+ name: agentName,
1983
+ cli: agentDef.cli,
1984
+ model: agentDef.constraints?.model,
1985
+ args: [],
1986
+ channels: agentChannels,
1987
+ task: taskWithExit,
1988
+ idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
1989
+ cwd: agentDef.cwd ? path.resolve(this.cwd, agentDef.cwd) : undefined,
1990
+ });
1991
+ // Re-key PTY maps if broker assigned a different name than requested
1992
+ if (agent.name !== agentName) {
1993
+ const oldName = agentName;
1994
+ this.ptyOutputBuffers.set(agent.name, this.ptyOutputBuffers.get(oldName) ?? []);
1995
+ this.ptyOutputBuffers.delete(oldName);
1996
+ // Close old log stream and rename the file to match the new agent name
1997
+ const oldLogPath = path.join(logsDir, `${oldName}.log`);
1998
+ const newLogPath = path.join(logsDir, `${agent.name}.log`);
1999
+ const oldLogStream = this.ptyLogStreams.get(oldName);
2000
+ if (oldLogStream) {
2001
+ oldLogStream.end();
2002
+ this.ptyLogStreams.delete(oldName);
2003
+ try {
2004
+ renameSync(oldLogPath, newLogPath);
2005
+ }
2006
+ catch {
2007
+ // File may not exist yet if no output was written
2008
+ }
2009
+ }
2010
+ // Open new log stream with the correct name
2011
+ const newLogStream = createWriteStream(newLogPath, { flags: 'a' });
2012
+ this.ptyLogStreams.set(agent.name, newLogStream);
2013
+ // Update listener to use the new log stream
2014
+ const oldListener = this.ptyListeners.get(oldName);
2015
+ if (oldListener) {
2016
+ this.ptyListeners.delete(oldName);
2017
+ this.ptyListeners.set(agent.name, (chunk) => {
2018
+ const stripped = WorkflowRunner.stripAnsi(chunk);
2019
+ this.ptyOutputBuffers.get(agent.name)?.push(stripped);
2020
+ newLogStream.write(chunk);
2021
+ });
2022
+ }
2023
+ agentName = agent.name;
2024
+ }
2025
+ // Register in workers.json so `agents:kill` can find this agent
2026
+ let workerPid;
2027
+ try {
2028
+ const rawAgents = await this.relay.listAgentsRaw();
2029
+ workerPid = rawAgents.find((a) => a.name === agentName)?.pid ?? undefined;
2030
+ }
2031
+ catch {
2032
+ // Best-effort PID lookup
2033
+ }
2034
+ this.registerWorker(agentName, agentDef.cli, step.task ?? '', workerPid);
2035
+ // Register the spawned agent in Relaycast for observability + start heartbeat
2036
+ if (this.relayApiKey) {
2037
+ const agentClient = await this.registerRelaycastExternalAgent(agent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
2038
+ console.warn(`[WorkflowRunner] Failed to register ${agent.name} in Relaycast:`, err?.message ?? err);
2039
+ return null;
2040
+ });
2041
+ // Keep the agent online in the dashboard while it's working
2042
+ if (agentClient) {
2043
+ stopHeartbeat = this.startRelaycastHeartbeat(agentClient);
2044
+ }
2045
+ }
2046
+ // Invite the spawned agent to the workflow channel
2047
+ if (this.channel && this.relayApiKey) {
2048
+ const channelAgent = await this.ensureRelaycastRunnerAgent().catch(() => null);
2049
+ await channelAgent?.channels.invite(this.channel, agent.name).catch(() => { });
2050
+ }
2051
+ // Post assignment notification (no task content — task arrives via direct broker injection)
2052
+ this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\``);
2053
+ // Register agent handle for hub-mediated nudging
2054
+ this.activeAgentHandles.set(agentName, agent);
2055
+ // Wait for agent to exit, with idle nudging if configured
2056
+ exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
2057
+ // Stop heartbeat now that agent has exited
2058
+ stopHeartbeat?.();
2059
+ if (exitResult === 'timeout') {
2060
+ // Safety net: check if the verification file exists before giving up.
2061
+ // The agent may have completed work but failed to /exit.
2062
+ if (step.verification?.type === 'file_exists') {
2063
+ const verifyPath = path.resolve(this.cwd, step.verification.value);
2064
+ if (existsSync(verifyPath)) {
2065
+ this.postToChannel(`**[${step.name}]** Agent idle after completing work — releasing`);
2066
+ await agent.release();
2067
+ // Fall through to read output below
2068
+ }
2069
+ else {
2070
+ await agent.release();
2071
+ throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
2072
+ }
2073
+ }
2074
+ else {
2075
+ await agent.release();
2076
+ throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
2077
+ }
2078
+ }
2079
+ }
2080
+ finally {
2081
+ // Snapshot PTY chunks before cleanup — we need them for output reading below
2082
+ ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
2083
+ // Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
2084
+ stopHeartbeat?.();
2085
+ this.activeAgentHandles.delete(agentName);
2086
+ this.ptyOutputBuffers.delete(agentName);
2087
+ this.ptyListeners.delete(agentName);
2088
+ const stream = this.ptyLogStreams.get(agentName);
2089
+ if (stream) {
2090
+ stream.end();
2091
+ this.ptyLogStreams.delete(agentName);
2092
+ }
2093
+ this.unregisterWorker(agentName);
2094
+ }
2095
+ let output;
2096
+ if (ptyChunks.length > 0) {
2097
+ output = ptyChunks.join('');
2098
+ }
2099
+ else {
2100
+ // Legacy fallback: summary file
2101
+ const summaryPath = path.join(this.summaryDir, `${step.name}.md`);
2102
+ output = existsSync(summaryPath)
2103
+ ? await readFile(summaryPath, 'utf-8')
2104
+ : exitResult === 'timeout'
2105
+ ? 'Agent completed (released after idle timeout)'
2106
+ : exitResult === 'released'
2107
+ ? 'Agent completed (force-released after idle nudging)'
2108
+ : `Agent exited (${exitResult})`;
2109
+ }
2110
+ return output;
2111
+ }
2112
+ // ── Idle nudging ────────────────────────────────────────────────────────
2113
+ /** Patterns where a hub agent coordinates spoke agents. */
2114
+ static HUB_PATTERNS = new Set([
2115
+ 'fan-out',
2116
+ 'hub-spoke',
2117
+ 'hierarchical',
2118
+ 'map-reduce',
2119
+ 'scatter-gather',
2120
+ 'supervisor',
2121
+ 'saga',
2122
+ 'auction',
2123
+ ]);
2124
+ /** Roles that indicate a coordinator/lead agent (eligible for delegation guidance). */
2125
+ static HUB_ROLES = new Set([
2126
+ 'lead',
2127
+ 'hub',
2128
+ 'coordinator',
2129
+ 'supervisor',
2130
+ 'orchestrator',
2131
+ 'auctioneer',
2132
+ ]);
2133
+ /**
2134
+ * Wait for agent exit with idle detection and nudging.
2135
+ * If no idle nudge config is set, falls through to simple waitForExit.
2136
+ */
2137
+ async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs) {
2138
+ const nudgeConfig = this.currentConfig?.swarm.idleNudge;
2139
+ if (!nudgeConfig) {
2140
+ // No nudge config — backward compatible simple wait
2141
+ return agent.waitForExit(timeoutMs);
2142
+ }
2143
+ const nudgeAfterMs = nudgeConfig.nudgeAfterMs ?? 120_000;
2144
+ const escalateAfterMs = nudgeConfig.escalateAfterMs ?? 120_000;
2145
+ const maxNudges = nudgeConfig.maxNudges ?? 1;
2146
+ let nudgeCount = 0;
2147
+ const startTime = Date.now();
2148
+ while (true) {
2149
+ // Calculate remaining time from overall timeout
2150
+ const elapsed = Date.now() - startTime;
2151
+ const remaining = timeoutMs ? timeoutMs - elapsed : undefined;
2152
+ if (remaining !== undefined && remaining <= 0) {
2153
+ return 'timeout';
2154
+ }
2155
+ // nudgeAfterMs = how long to wait before nudging (first interval).
2156
+ // escalateAfterMs = how long to wait between subsequent nudges.
2157
+ //
2158
+ // We wait for exit, not for idle. The broker's idle_threshold_secs is
2159
+ // only 30s by default, so racing waitForExit vs waitForIdle would nudge
2160
+ // after 30s of PTY silence regardless of nudgeAfterMs. Instead, we give
2161
+ // the agent the full nudgeAfterMs window to finish before nudging.
2162
+ const windowMs = nudgeCount === 0 ? nudgeAfterMs : escalateAfterMs;
2163
+ const waitMs = remaining !== undefined ? Math.min(windowMs, remaining) : windowMs;
2164
+ const exitResult = await agent.waitForExit(waitMs);
2165
+ if (exitResult !== 'timeout') {
2166
+ // Agent actually exited or was released — done
2167
+ return exitResult;
2168
+ }
2169
+ // Agent is still running after the window expired.
2170
+ if (remaining !== undefined && Date.now() - startTime >= remaining) {
2171
+ return 'timeout';
2172
+ }
2173
+ // Nudge if we haven't exhausted the limit
2174
+ if (nudgeCount < maxNudges) {
2175
+ await this.nudgeIdleAgent(agent, agentDef, step);
2176
+ nudgeCount++;
2177
+ this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` idle — nudge #${nudgeCount} sent`);
2178
+ this.emit({ type: 'step:nudged', runId: this.currentRunId ?? '', stepName: step.name, nudgeCount });
2179
+ continue;
2180
+ }
2181
+ // Exhausted nudges — force-release
2182
+ this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`);
2183
+ this.emit({ type: 'step:force-released', runId: this.currentRunId ?? '', stepName: step.name });
2184
+ await agent.release();
2185
+ return 'released';
2186
+ }
2187
+ }
2188
+ /**
2189
+ * Send a nudge to an idle agent. Uses hub-mediated nudge for hub patterns,
2190
+ * or direct system injection otherwise.
2191
+ */
2192
+ async nudgeIdleAgent(agent, agentDef, step) {
2193
+ const hubAgent = this.resolveHubForNudge(agentDef);
2194
+ if (hubAgent) {
2195
+ // Hub-mediated: tell the hub to check on the idle agent
2196
+ try {
2197
+ await hubAgent.sendMessage({
2198
+ to: agent.name,
2199
+ text: `Agent ${agent.name} appears idle on step "${step.name}". Check on them and remind them to /exit when done.`,
2200
+ });
2201
+ return; // Hub nudge succeeded
2202
+ }
2203
+ catch {
2204
+ // Fall through to direct nudge
2205
+ }
2206
+ }
2207
+ // Direct system injection via human handle
2208
+ if (this.relay) {
2209
+ const human = this.relay.human({ name: 'workflow-runner' });
2210
+ await human
2211
+ .sendMessage({
2212
+ to: agent.name,
2213
+ text: "You appear idle. If you've completed your task, output /exit. If still working, continue.",
2214
+ })
2215
+ .catch(() => {
2216
+ // Non-critical — don't break workflow
2217
+ });
2218
+ }
2219
+ }
2220
+ /**
2221
+ * Find the hub agent for hub-mediated nudging.
2222
+ * Returns the hub's live Agent handle if this is a hub pattern and the idle agent is not the hub.
2223
+ */
2224
+ resolveHubForNudge(idleAgentDef) {
2225
+ const pattern = this.currentConfig?.swarm.pattern;
2226
+ if (!pattern || !WorkflowRunner.HUB_PATTERNS.has(pattern)) {
2227
+ return undefined;
2228
+ }
2229
+ // Find an interactive agent with a hub-like role
2230
+ const agents = this.currentConfig?.agents ?? [];
2231
+ for (const agentDef of agents) {
2232
+ // Skip non-interactive and the idle agent itself
2233
+ if (agentDef.interactive === false)
2234
+ continue;
2235
+ if (agentDef.name === idleAgentDef.name)
2236
+ continue;
2237
+ const role = agentDef.role?.toLowerCase() ?? '';
2238
+ const nameLC = agentDef.name.toLowerCase();
2239
+ if (WorkflowRunner.HUB_ROLES.has(nameLC) ||
2240
+ [...WorkflowRunner.HUB_ROLES].some((r) => role.includes(r))) {
2241
+ // Found a hub candidate — check if we have a live handle
2242
+ const handle = this.activeAgentHandles.get(agentDef.name);
2243
+ if (handle)
2244
+ return handle;
2245
+ }
2246
+ }
2247
+ return undefined;
2248
+ }
2249
+ // ── Verification ────────────────────────────────────────────────────────
2250
+ runVerification(check, output, stepName, injectedTaskText) {
2251
+ switch (check.type) {
2252
+ case 'output_contains': {
2253
+ // Guard against false positives: the PTY captures the injected task text
2254
+ // verbatim, so if the verification token appears in the task itself the
2255
+ // check would pass immediately without the agent doing any real work.
2256
+ // When the task contains the token, require a SECOND occurrence — one
2257
+ // from the task injection and one from the agent's actual response.
2258
+ const token = check.value;
2259
+ const taskHasToken = injectedTaskText ? injectedTaskText.includes(token) : false;
2260
+ if (taskHasToken) {
2261
+ const first = output.indexOf(token);
2262
+ const hasSecond = first !== -1 && output.includes(token, first + token.length);
2263
+ if (!hasSecond) {
2264
+ throw new Error(`Verification failed for "${stepName}": output does not contain "${token}" ` +
2265
+ `(token found only in task injection — agent must output it explicitly)`);
2266
+ }
2267
+ }
2268
+ else if (!output.includes(token)) {
2269
+ throw new Error(`Verification failed for "${stepName}": output does not contain "${token}"`);
2270
+ }
2271
+ break;
2272
+ }
2273
+ case 'exit_code':
2274
+ // exit_code verification is implicitly satisfied if the agent exited successfully
2275
+ break;
2276
+ case 'file_exists':
2277
+ if (!existsSync(path.resolve(this.cwd, check.value))) {
2278
+ throw new Error(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
2279
+ }
2280
+ break;
2281
+ case 'custom':
2282
+ // Custom verifications are evaluated by callers; no-op here
2283
+ break;
2284
+ }
2285
+ }
2286
+ // ── State helpers ─────────────────────────────────────────────────────
2287
+ async updateRunStatus(runId, status, error) {
2288
+ const patch = {
2289
+ status,
2290
+ updatedAt: new Date().toISOString(),
2291
+ };
2292
+ if (status === 'completed' || status === 'failed' || status === 'cancelled') {
2293
+ patch.completedAt = new Date().toISOString();
2294
+ }
2295
+ if (error) {
2296
+ patch.error = error;
2297
+ }
2298
+ await this.db.updateRun(runId, patch);
2299
+ }
2300
+ async markStepFailed(state, error, runId) {
2301
+ state.row.status = 'failed';
2302
+ state.row.error = error;
2303
+ state.row.completedAt = new Date().toISOString();
2304
+ await this.db.updateStep(state.row.id, {
2305
+ status: 'failed',
2306
+ error,
2307
+ completedAt: state.row.completedAt,
2308
+ updatedAt: new Date().toISOString(),
2309
+ });
2310
+ this.emit({ type: 'step:failed', runId, stepName: state.row.stepName, error });
2311
+ }
2312
+ async markDownstreamSkipped(failedStepName, allSteps, stepStates, runId) {
2313
+ const queue = [failedStepName];
2314
+ const visited = new Set();
2315
+ while (queue.length > 0) {
2316
+ const current = queue.shift();
2317
+ if (visited.has(current))
2318
+ continue;
2319
+ visited.add(current);
2320
+ for (const step of allSteps) {
2321
+ if (step.dependsOn?.includes(current)) {
2322
+ const state = stepStates.get(step.name);
2323
+ if (state && state.row.status === 'pending') {
2324
+ state.row.status = 'skipped';
2325
+ await this.db.updateStep(state.row.id, {
2326
+ status: 'skipped',
2327
+ updatedAt: new Date().toISOString(),
2328
+ });
2329
+ this.emit({ type: 'step:skipped', runId, stepName: step.name });
2330
+ this.postToChannel(`**[${step.name}]** Skipped — upstream dependency "${current}" failed`);
2331
+ await this.trajectory?.stepSkipped(step, `Upstream dependency "${current}" failed`);
2332
+ await this.trajectory?.decide(`Whether to skip ${step.name}`, 'skip', `Upstream dependency "${current}" failed`);
2333
+ queue.push(step.name);
2334
+ }
2335
+ }
2336
+ }
2337
+ }
2338
+ }
2339
+ // ── Control flow helpers ──────────────────────────────────────────────
2340
+ checkAborted() {
2341
+ if (this.abortController?.signal.aborted) {
2342
+ throw new Error('Workflow aborted');
2343
+ }
2344
+ }
2345
+ async waitIfPaused() {
2346
+ if (!this.paused)
2347
+ return;
2348
+ await new Promise((resolve) => {
2349
+ this.pauseResolver = resolve;
2350
+ });
2351
+ }
2352
+ delay(ms) {
2353
+ return new Promise((resolve) => setTimeout(resolve, ms));
2354
+ }
2355
+ // ── Channel messaging ──────────────────────────────────────────────────
2356
+ /**
2357
+ * Build a metadata note about non-interactive workers for inclusion in interactive agent tasks.
2358
+ * Returns undefined if there are no non-interactive agents.
2359
+ */
2360
+ buildNonInteractiveAwareness(agentMap, stepStates) {
2361
+ const nonInteractive = [...agentMap.values()].filter((a) => a.interactive === false);
2362
+ if (nonInteractive.length === 0)
2363
+ return undefined;
2364
+ // Map agent names to their step names so the lead knows exact {{steps.X.output}} references
2365
+ const agentToSteps = new Map();
2366
+ for (const [stepName, state] of stepStates) {
2367
+ const agentName = state.row.agentName;
2368
+ if (!agentName)
2369
+ continue; // Skip deterministic steps
2370
+ if (!agentToSteps.has(agentName))
2371
+ agentToSteps.set(agentName, []);
2372
+ agentToSteps.get(agentName).push(stepName);
2373
+ }
2374
+ const lines = nonInteractive.map((a) => {
2375
+ const steps = agentToSteps.get(a.name) ?? [];
2376
+ const stepRefs = steps.map((s) => `{{steps.${s}.output}}`).join(', ');
2377
+ return `- ${a.name} (${a.cli}) — will return output when complete${stepRefs ? `. Access via: ${stepRefs}` : ''}`;
2378
+ });
2379
+ return ('\n\n---\n' +
2380
+ 'Note: The following agents are non-interactive workers and cannot receive messages:\n' +
2381
+ lines.join('\n') +
2382
+ '\n' +
2383
+ 'Do NOT attempt to message these agents. Use the {{steps.<name>.output}} references above to access their results.');
2384
+ }
2385
+ /**
2386
+ * Build guidance that encourages agents to autonomously delegate subtasks
2387
+ * to helper agents when work is too complex for a single pass.
2388
+ */
2389
+ /**
2390
+ * Returns a relay registration preamble for CLIs that don't auto-call
2391
+ * `register` via the MCP system prompt (everyone except claude).
2392
+ *
2393
+ * Claude reads the Relaycast system prompt and registers on its own.
2394
+ * Codex, gemini, etc. have the MCP server configured with the workspace
2395
+ * key, but they won't call `register` unless explicitly told to.
2396
+ */
2397
+ buildRelayRegistrationNote(cli, agentName) {
2398
+ if (cli === 'claude')
2399
+ return '';
2400
+ return ('---\n' +
2401
+ 'RELAY SETUP — do this FIRST before any other relay tool:\n' +
2402
+ `1. Call: register(name="${agentName}")\n` +
2403
+ ' This authenticates you in the Relaycast workspace.\n' +
2404
+ ' ALL relay tools (relay_send, relay_inbox, post_message, etc.) require\n' +
2405
+ ' registration first — they will fail with "Not registered" otherwise.\n' +
2406
+ `2. Your agent name is "${agentName}" — use this exact name when registering.`);
2407
+ }
2408
+ buildDelegationGuidance(cli, timeoutMs) {
2409
+ const timeoutNote = timeoutMs
2410
+ ? `You have approximately ${Math.round(timeoutMs / 60000)} minutes before this step times out. ` +
2411
+ 'Plan accordingly — delegate early if the work is substantial.\n\n'
2412
+ : '';
2413
+ // Option 2 (sub-agents via Task tool) is only available in Claude
2414
+ const subAgentOption = cli === 'claude'
2415
+ ? 'Option 2 — Use built-in sub-agents (Task tool) for research or scoped work:\n' +
2416
+ ' - Good for exploring code, reading files, or making targeted changes\n' +
2417
+ ' - Can run multiple sub-agents in parallel\n\n'
2418
+ : '';
2419
+ return ('---\n' +
2420
+ 'AUTONOMOUS DELEGATION — READ THIS BEFORE STARTING:\n' +
2421
+ timeoutNote +
2422
+ 'Before diving in, assess whether this task is too large or complex for a single agent. ' +
2423
+ 'If it involves multiple independent subtasks, touches many files, or could take a long time, ' +
2424
+ 'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
2425
+ 'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
2426
+ ' - relay_spawn(name="helper-1", cli="claude", task="Specific subtask description")\n' +
2427
+ ' - Coordinate via relay_send(to="helper-1", message="...")\n' +
2428
+ ' - Check on them with relay_inbox()\n' +
2429
+ ' - Clean up when done: relay_release(name="helper-1")\n\n' +
2430
+ subAgentOption +
2431
+ 'Guidelines:\n' +
2432
+ '- You are the lead — delegate but stay in control, track progress, integrate results\n' +
2433
+ '- Give each helper a clear, self-contained task with enough context to work independently\n' +
2434
+ "- For simple or quick work, just do it yourself — don't over-delegate\n" +
2435
+ '- Always release spawned relay agents when their work is complete\n' +
2436
+ '- When spawning non-claude agents (codex, gemini, etc.), prepend to their task:\n' +
2437
+ ' "RELAY SETUP: First call register(name=\'<exact-agent-name>\') before any other relay tool."');
2438
+ }
2439
+ /** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
2440
+ postToChannel(text) {
2441
+ if (!this.relayApiKey || !this.channel)
2442
+ return;
2443
+ this.ensureRelaycastRunnerAgent()
2444
+ .then((agent) => agent.send(this.channel, text))
2445
+ .catch(() => {
2446
+ // Non-critical — don't break workflow execution
2447
+ });
2448
+ }
2449
+ /** Post a rich completion report to the channel. */
2450
+ postCompletionReport(workflowName, outcomes, summary, confidence) {
2451
+ const completed = outcomes.filter((o) => o.status === 'completed');
2452
+ const skipped = outcomes.filter((o) => o.status === 'skipped');
2453
+ const retried = outcomes.filter((o) => o.attempts > 1);
2454
+ const lines = [
2455
+ `## Workflow **${workflowName}** — Complete`,
2456
+ '',
2457
+ summary,
2458
+ `Confidence: ${Math.round(confidence * 100)}%`,
2459
+ '',
2460
+ '### Steps',
2461
+ ...completed.map((o) => `- **${o.name}** (${o.agent}) — passed${o.verificationPassed ? ' (verified)' : ''}${o.attempts > 1 ? ` after ${o.attempts} attempts` : ''}`),
2462
+ ...skipped.map((o) => `- **${o.name}** — skipped`),
2463
+ ];
2464
+ if (retried.length > 0) {
2465
+ lines.push('', '### Retries');
2466
+ for (const o of retried) {
2467
+ lines.push(`- ${o.name}: ${o.attempts} attempts`);
2468
+ }
2469
+ }
2470
+ this.postToChannel(lines.join('\n'));
2471
+ }
2472
+ /** Post a failure report to the channel. */
2473
+ postFailureReport(workflowName, outcomes, errorMsg) {
2474
+ const completed = outcomes.filter((o) => o.status === 'completed');
2475
+ const failed = outcomes.filter((o) => o.status === 'failed');
2476
+ const skipped = outcomes.filter((o) => o.status === 'skipped');
2477
+ const lines = [
2478
+ `## Workflow **${workflowName}** — Failed`,
2479
+ '',
2480
+ `${completed.length}/${outcomes.length} steps passed. Error: ${errorMsg}`,
2481
+ '',
2482
+ '### Steps',
2483
+ ...completed.map((o) => `- **${o.name}** (${o.agent}) — passed`),
2484
+ ...failed.map((o) => `- **${o.name}** (${o.agent}) — FAILED: ${o.error ?? 'unknown'}`),
2485
+ ...skipped.map((o) => `- **${o.name}** — skipped`),
2486
+ ];
2487
+ this.postToChannel(lines.join('\n'));
2488
+ }
2489
+ /**
2490
+ * Log a human-readable run summary to the console after completion or failure.
2491
+ * Extracts the last meaningful lines from each step's raw PTY output.
2492
+ */
2493
+ logRunSummary(workflowName, outcomes, runId) {
2494
+ const completed = outcomes.filter((o) => o.status === 'completed');
2495
+ const failed = outcomes.filter((o) => o.status === 'failed');
2496
+ const skipped = outcomes.filter((o) => o.status === 'skipped');
2497
+ console.log('');
2498
+ console.log('━'.repeat(70));
2499
+ console.log(` Workflow "${workflowName}" — ${failed.length === 0 ? 'COMPLETED' : 'FAILED'}`);
2500
+ console.log(` ${completed.length} passed, ${failed.length} failed, ${skipped.length} skipped`);
2501
+ console.log('━'.repeat(70));
2502
+ for (const outcome of outcomes) {
2503
+ const icon = outcome.status === 'completed' ? '✓' : outcome.status === 'failed' ? '✗' : '⊘';
2504
+ const retryNote = outcome.attempts > 1 ? ` (${outcome.attempts} attempts)` : '';
2505
+ console.log(` ${icon} ${outcome.name} [${outcome.agent}]${retryNote}`);
2506
+ if (outcome.error) {
2507
+ console.log(` Error: ${outcome.error}`);
2508
+ }
2509
+ // Extract last meaningful lines from raw PTY output
2510
+ if (outcome.output) {
2511
+ const excerpt = this.extractOutputExcerpt(outcome.output);
2512
+ if (excerpt) {
2513
+ for (const line of excerpt.split('\n')) {
2514
+ console.log(` ${line}`);
2515
+ }
2516
+ }
2517
+ }
2518
+ }
2519
+ // Point to detailed output files
2520
+ const outputDir = this.getStepOutputDir(runId);
2521
+ const logsDir = path.join(this.cwd, '.agent-relay', 'team', 'worker-logs');
2522
+ console.log('');
2523
+ console.log(` Step output: ${outputDir}`);
2524
+ console.log(` Agent logs: ${logsDir}`);
2525
+ console.log('━'.repeat(70));
2526
+ console.log('');
2527
+ }
2528
+ /**
2529
+ * Extract a useful excerpt from raw PTY output.
2530
+ * Looks for the agent's final text output (ignoring ANSI, system prompts, tool calls).
2531
+ */
2532
+ extractOutputExcerpt(rawOutput) {
2533
+ const stripped = WorkflowRunner.stripAnsi(rawOutput);
2534
+ // Split into lines, filter out noise
2535
+ const lines = stripped.split('\n').filter((line) => {
2536
+ const trimmed = line.trim();
2537
+ if (!trimmed)
2538
+ return false;
2539
+ // Skip system/UI chrome
2540
+ if (trimmed.startsWith('╭') || trimmed.startsWith('╰') || trimmed.startsWith('│'))
2541
+ return false;
2542
+ if (trimmed.startsWith('─'))
2543
+ return false;
2544
+ if (trimmed.startsWith('❯') || trimmed.startsWith('⏵'))
2545
+ return false;
2546
+ if (trimmed.startsWith('<system-reminder>') || trimmed.startsWith('</system-reminder>'))
2547
+ return false;
2548
+ if (/^\[?workflow\s/.test(trimmed))
2549
+ return false;
2550
+ // Skip tool invocations
2551
+ if (/^(Read|Edit|Bash|Glob|Grep|Task|Explore|Write)\(/.test(trimmed))
2552
+ return false;
2553
+ // Skip thinking indicators
2554
+ if (/^[·✳✻✽⏺]?\s*Sublimating/.test(trimmed))
2555
+ return false;
2556
+ // Skip very short lines (likely UI fragments)
2557
+ if (trimmed.length < 10)
2558
+ return false;
2559
+ return true;
2560
+ });
2561
+ if (lines.length === 0)
2562
+ return '';
2563
+ // Take the last few meaningful lines (agent's final words)
2564
+ const tail = lines.slice(-5);
2565
+ const excerpt = tail.map((l) => l.trim().slice(0, 120)).join('\n');
2566
+ return excerpt.length > 0 ? `...\n${excerpt}` : '';
2567
+ }
2568
+ // ── Trajectory helpers ────────────────────────────────────────────────
2569
+ /** Analyze DAG structure for trajectory context. */
2570
+ analyzeDAG(steps) {
2571
+ const roots = steps.filter((s) => !s.dependsOn?.length);
2572
+ const withDeps = steps.filter((s) => s.dependsOn?.length);
2573
+ const parts = [`Parsed ${steps.length} steps`];
2574
+ if (roots.length > 1) {
2575
+ parts.push(`${roots.length} parallel tracks`);
2576
+ }
2577
+ if (withDeps.length > 0) {
2578
+ parts.push(`${withDeps.length} dependent steps`);
2579
+ }
2580
+ parts.push('DAG validated, no cycles');
2581
+ return parts.join(', ');
2582
+ }
2583
+ /** Collect step outcomes for trajectory synthesis. */
2584
+ collectOutcomes(stepStates, steps) {
2585
+ const stepsWithVerification = new Set(steps?.filter((s) => s.verification).map((s) => s.name) ?? []);
2586
+ const outcomes = [];
2587
+ for (const [name, state] of stepStates) {
2588
+ outcomes.push({
2589
+ name,
2590
+ agent: state.row.agentName ?? 'deterministic',
2591
+ status: state.row.status === 'completed'
2592
+ ? 'completed'
2593
+ : state.row.status === 'skipped'
2594
+ ? 'skipped'
2595
+ : 'failed',
2596
+ attempts: state.row.retryCount + 1,
2597
+ output: state.row.output,
2598
+ error: state.row.error,
2599
+ verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
2600
+ });
2601
+ }
2602
+ return outcomes;
2603
+ }
2604
+ // ── ID generation ─────────────────────────────────────────────────────
2605
+ generateId() {
2606
+ return randomBytes(12).toString('hex');
2607
+ }
2608
+ generateShortId() {
2609
+ return randomBytes(4).toString('hex');
2610
+ }
2611
+ /** Strip ANSI escape codes from terminal output — delegates to pty.ts canonical regex. */
2612
+ static stripAnsi(text) {
2613
+ return stripAnsiFn(text);
2614
+ }
2615
+ /**
2616
+ * Strip TUI chrome from PTY-captured output before posting to a channel.
2617
+ * Removes: ANSI codes, unicode spinner/thinking characters, cursor-movement
2618
+ * artifacts, and collapses runs of blank lines to a single blank line.
2619
+ * The raw (ANSI-stripped) output is still written to disk for step chaining.
2620
+ */
2621
+ static scrubForChannel(text) {
2622
+ // Strip system-reminder blocks (closed or unclosed)
2623
+ const withoutSystemReminders = text
2624
+ .replace(/<system-reminder>[\s\S]*?<\/system-reminder>/giu, '')
2625
+ .replace(/<system-reminder>[\s\S]*/giu, '');
2626
+ // Normalize CRLF and bare \r before stripping ANSI — PTY output often
2627
+ // contains \r\r\n which leaves stray \r after stripping that confuse line splitting.
2628
+ const normalized = withoutSystemReminders.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
2629
+ const ansiStripped = stripAnsiFn(normalized);
2630
+ // Unicode spinner / ornament characters used by Claude TUI animations.
2631
+ // Includes block-element chars (▗▖▘▝) used in the Claude Code header bar.
2632
+ const SPINNER = '\\u2756\\u2738\\u2739\\u273a\\u273b\\u273c\\u273d\\u2731\\u2732\\u2733\\u2734\\u2735\\u2736\\u2737\\u2743\\u2745\\u2746\\u25d6\\u25d7\\u25d8\\u25d9\\u2022\\u25cf\\u25cb\\u25a0\\u25a1\\u25b6\\u25c0\\u23f5\\u23f6\\u23f7\\u23f8\\u23f9\\u25e2\\u25e3\\u25e4\\u25e5\\u2597\\u2596\\u2598\\u259d\\u2bc8\\u2bc7\\u2bc5\\u2bc6\\u00b7' +
2633
+ '\\u2590\\u258c\\u2588\\u2584\\u2580\\u259a\\u259e'; // additional block elements
2634
+ const spinnerRe = new RegExp(`[${SPINNER}]`, 'gu');
2635
+ const spinnerClassRe = new RegExp(`^[\\s${SPINNER}]*$`, 'u');
2636
+ // Line-level filters
2637
+ const boxDrawingOnlyRe = /^[\s\u2500-\u257f\u2580-\u259f\u25a0-\u25ff\-_=~]{3,}$/u;
2638
+ // Broker internal log lines: "2026-02-26T12:45:12.123Z INFO agent_relay_broker::..."
2639
+ const brokerLogRe = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z\s+(?:INFO|WARN|ERROR|DEBUG)\s/u;
2640
+ const claudeHeaderRe = /^(?:[\s\u2580-\u259f✢*·▗▖▘▝]+\s*)?(?:Claude\s+Code(?:\s+v?[\d.]+)?|(?:Sonnet|Haiku|Opus)\s*[\d.]+|claude-(?:sonnet|haiku|opus)-[\w.-]+|Running\s+on\s+claude)/iu;
2641
+ // TUI directory breadcrumb lines (e.g. " ~/Projects/agent-workforce/relay-...")
2642
+ const dirBreadcrumbRe = /^\s*~[\\/]/u;
2643
+ const uiHintRe = /\b(?:Press\s+up\s+to\s+edit|tab\s+to\s+queue|bypass\s+permissions|esc\s+to\s+interrupt)\b/iu;
2644
+ // Any spinner-prefixed word ending in … — catches all Claude thinking animations
2645
+ // regardless of the specific word used (Thinking, Cascading, Flibbertigibbeting, etc.)
2646
+ const thinkingLineRe = new RegExp(`^[\\s${SPINNER}]*\\s*\\w[\\w\\s]*\\u2026\\s*$`, 'u');
2647
+ const cursorOnlyRe = /^[\s❯⎿›»◀▶←→↑↓⟨⟩⟪⟫·]+$/u;
2648
+ const slashCommandRe = /^\/\w+\s*$/u;
2649
+ const mcpJsonKvRe = /^\s*"(?:type|method|params|result|id|jsonrpc|tool|name|arguments|content|role|metadata)"\s*:/u;
2650
+ const meaningfulContentRe = /[a-zA-Z0-9]/u;
2651
+ const countJsonDepth = (line) => {
2652
+ let depth = 0;
2653
+ for (const ch of line) {
2654
+ if (ch === '{' || ch === '[')
2655
+ depth += 1;
2656
+ if (ch === '}' || ch === ']')
2657
+ depth -= 1;
2658
+ }
2659
+ return depth;
2660
+ };
2661
+ const lines = ansiStripped.split('\n');
2662
+ const meaningful = [];
2663
+ let jsonDepth = 0;
2664
+ for (const line of lines) {
2665
+ const trimmed = line.trim();
2666
+ if (jsonDepth > 0) {
2667
+ jsonDepth += countJsonDepth(line);
2668
+ if (jsonDepth <= 0)
2669
+ jsonDepth = 0;
2670
+ continue;
2671
+ }
2672
+ if (trimmed.length === 0)
2673
+ continue;
2674
+ if (trimmed.startsWith('{') || /^\[\s*\{/.test(trimmed)) {
2675
+ jsonDepth = Math.max(countJsonDepth(line), 0);
2676
+ continue;
2677
+ }
2678
+ if (mcpJsonKvRe.test(line))
2679
+ continue;
2680
+ if (spinnerClassRe.test(trimmed))
2681
+ continue;
2682
+ if (boxDrawingOnlyRe.test(trimmed))
2683
+ continue;
2684
+ if (brokerLogRe.test(trimmed))
2685
+ continue;
2686
+ if (claudeHeaderRe.test(trimmed))
2687
+ continue;
2688
+ if (dirBreadcrumbRe.test(trimmed))
2689
+ continue;
2690
+ if (uiHintRe.test(trimmed))
2691
+ continue;
2692
+ if (thinkingLineRe.test(trimmed))
2693
+ continue;
2694
+ if (cursorOnlyRe.test(trimmed))
2695
+ continue;
2696
+ if (slashCommandRe.test(trimmed))
2697
+ continue;
2698
+ if (!meaningfulContentRe.test(trimmed))
2699
+ continue;
2700
+ // Drop TUI animation frame fragments: lines where stripping spinners and
2701
+ // whitespace leaves ≤ 3 alphanumeric characters (e.g. "F", "l b", "i g").
2702
+ const alphanum = trimmed.replace(spinnerRe, '').replace(/\s+/g, '');
2703
+ if (alphanum.replace(/[^a-zA-Z0-9]/g, '').length <= 3)
2704
+ continue;
2705
+ meaningful.push(line);
2706
+ }
2707
+ return meaningful
2708
+ .join('\n')
2709
+ .replace(/\n{3,}/g, '\n\n')
2710
+ .trim();
2711
+ }
2712
+ /** Sanitize a workflow name into a valid channel name. */
2713
+ sanitizeChannelName(name) {
2714
+ return name
2715
+ .toLowerCase()
2716
+ .replace(/[^a-z0-9-]/g, '-')
2717
+ .replace(/-+/g, '-')
2718
+ .slice(0, 32);
2719
+ }
2720
+ /** Directory for persisted step outputs: .agent-relay/step-outputs/{runId}/ */
2721
+ getStepOutputDir(runId) {
2722
+ return path.join(this.cwd, '.agent-relay', 'step-outputs', runId);
2723
+ }
2724
+ /** Persist step output to disk and post full output as a channel message. */
2725
+ async persistStepOutput(runId, stepName, output) {
2726
+ // 1. Write to disk
2727
+ try {
2728
+ const dir = this.getStepOutputDir(runId);
2729
+ mkdirSync(dir, { recursive: true });
2730
+ const cleaned = WorkflowRunner.stripAnsi(output);
2731
+ await writeFile(path.join(dir, `${stepName}.md`), cleaned);
2732
+ }
2733
+ catch {
2734
+ // Non-critical
2735
+ }
2736
+ // 2. Post scrubbed output as a single channel message (most recent tail only)
2737
+ const scrubbed = WorkflowRunner.scrubForChannel(output);
2738
+ if (scrubbed.length === 0) {
2739
+ this.postToChannel(`**[${stepName}]** Step completed — output written to disk`);
2740
+ return;
2741
+ }
2742
+ const maxMsg = 2000;
2743
+ const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
2744
+ this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``);
2745
+ }
2746
+ /** Load persisted step output from disk. */
2747
+ loadStepOutput(runId, stepName) {
2748
+ try {
2749
+ const filePath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
2750
+ if (!existsSync(filePath))
2751
+ return undefined;
2752
+ return readFileSync(filePath, 'utf-8');
2753
+ }
2754
+ catch {
2755
+ return undefined;
2756
+ }
2757
+ }
2758
+ /** Get or create the worker logs directory (.agent-relay/team/worker-logs) */
2759
+ getWorkerLogsDir() {
2760
+ const logsDir = path.join(this.cwd, '.agent-relay', 'team', 'worker-logs');
2761
+ mkdirSync(logsDir, { recursive: true });
2762
+ return logsDir;
2763
+ }
2764
+ /** Register a spawned agent in workers.json so `agents:kill` can find it. */
2765
+ registerWorker(agentName, cli, task, pid, interactive = true) {
2766
+ // Track in memory first (no race condition)
2767
+ const workerEntry = {
2768
+ cli,
2769
+ task: task.slice(0, 500),
2770
+ spawnedAt: Date.now(),
2771
+ pid,
2772
+ interactive,
2773
+ logFile: path.join(this.getWorkerLogsDir(), `${agentName}.log`),
2774
+ };
2775
+ this.activeWorkers.set(agentName, workerEntry);
2776
+ // Serialize file writes with mutex to prevent race conditions
2777
+ this.workersFileLock = this.workersFileLock.then(() => {
2778
+ try {
2779
+ mkdirSync(path.dirname(this.workersPath), { recursive: true });
2780
+ // Filter out any existing entry with the same name before adding
2781
+ const existing = this.readWorkers().filter((w) => w.name !== agentName);
2782
+ existing.push({ name: agentName, ...workerEntry });
2783
+ this.writeWorkers(existing);
2784
+ }
2785
+ catch {
2786
+ // Non-critical — don't fail the workflow if workers.json can't be written
2787
+ }
2788
+ });
2789
+ }
2790
+ /** Remove a spawned agent from workers.json after it exits. */
2791
+ unregisterWorker(agentName) {
2792
+ // Remove from in-memory tracking first
2793
+ this.activeWorkers.delete(agentName);
2794
+ // Serialize file writes with mutex to prevent race conditions
2795
+ this.workersFileLock = this.workersFileLock.then(() => {
2796
+ try {
2797
+ const existing = this.readWorkers();
2798
+ const filtered = existing.filter((w) => w.name !== agentName);
2799
+ this.writeWorkers(filtered);
2800
+ }
2801
+ catch {
2802
+ // Non-critical
2803
+ }
2804
+ });
2805
+ }
2806
+ readWorkers() {
2807
+ try {
2808
+ if (!existsSync(this.workersPath))
2809
+ return [];
2810
+ const raw = JSON.parse(readFileSync(this.workersPath, 'utf-8'));
2811
+ return Array.isArray(raw?.workers) ? raw.workers : [];
2812
+ }
2813
+ catch {
2814
+ return [];
2815
+ }
2816
+ }
2817
+ writeWorkers(workers) {
2818
+ writeFileSync(this.workersPath, JSON.stringify({ workers }, null, 2));
2819
+ }
2820
+ }
2821
+ //# sourceMappingURL=runner.js.map