agent-relay 3.1.16 → 3.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/bin/agent-relay-broker-darwin-arm64 +0 -0
  2. package/bin/agent-relay-broker-darwin-x64 +0 -0
  3. package/bin/agent-relay-broker-linux-arm64 +0 -0
  4. package/bin/agent-relay-broker-linux-x64 +0 -0
  5. package/dist/index.cjs +573 -32
  6. package/package.json +8 -8
  7. package/packages/acp-bridge/package.json +2 -2
  8. package/packages/config/package.json +1 -1
  9. package/packages/hooks/package.json +4 -4
  10. package/packages/memory/package.json +2 -2
  11. package/packages/openclaw/package.json +2 -2
  12. package/packages/policy/package.json +2 -2
  13. package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts +16 -0
  14. package/packages/sdk/dist/__tests__/e2e-owner-review.test.d.ts.map +1 -0
  15. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js +640 -0
  16. package/packages/sdk/dist/__tests__/e2e-owner-review.test.js.map +1 -0
  17. package/packages/sdk/dist/client.d.ts +2 -0
  18. package/packages/sdk/dist/client.d.ts.map +1 -1
  19. package/packages/sdk/dist/client.js +2 -0
  20. package/packages/sdk/dist/client.js.map +1 -1
  21. package/packages/sdk/dist/protocol.d.ts +4 -0
  22. package/packages/sdk/dist/protocol.d.ts.map +1 -1
  23. package/packages/sdk/dist/workflows/cli.js +10 -0
  24. package/packages/sdk/dist/workflows/cli.js.map +1 -1
  25. package/packages/sdk/dist/workflows/runner.d.ts +31 -0
  26. package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
  27. package/packages/sdk/dist/workflows/runner.js +542 -31
  28. package/packages/sdk/dist/workflows/runner.js.map +1 -1
  29. package/packages/sdk/dist/workflows/trajectory.d.ts +22 -1
  30. package/packages/sdk/dist/workflows/trajectory.d.ts.map +1 -1
  31. package/packages/sdk/dist/workflows/trajectory.js +55 -8
  32. package/packages/sdk/dist/workflows/trajectory.js.map +1 -1
  33. package/packages/sdk/dist/workflows/validator.d.ts.map +1 -1
  34. package/packages/sdk/dist/workflows/validator.js +29 -0
  35. package/packages/sdk/dist/workflows/validator.js.map +1 -1
  36. package/packages/sdk/package.json +2 -2
  37. package/packages/sdk/src/__tests__/e2e-owner-review.test.ts +778 -0
  38. package/packages/sdk/src/__tests__/workflow-runner.test.ts +484 -9
  39. package/packages/sdk/src/client.ts +4 -0
  40. package/packages/sdk/src/protocol.ts +4 -0
  41. package/packages/sdk/src/workflows/README.md +11 -0
  42. package/packages/sdk/src/workflows/cli.ts +10 -0
  43. package/packages/sdk/src/workflows/runner.ts +714 -33
  44. package/packages/sdk/src/workflows/trajectory.ts +89 -8
  45. package/packages/sdk/src/workflows/validator.ts +29 -0
  46. package/packages/sdk-py/pyproject.toml +1 -1
  47. package/packages/telemetry/package.json +1 -1
  48. package/packages/trajectory/package.json +2 -2
  49. package/packages/user-directory/package.json +2 -2
  50. package/packages/utils/package.json +2 -2
  51. package/relay-snippets/agent-relay-snippet.md +12 -0
@@ -86,6 +86,8 @@ export class WorkflowRunner {
86
86
  lastIdleLog = new Map();
87
87
  /** Tracks last logged activity type per agent to avoid duplicate status lines. */
88
88
  lastActivity = new Map();
89
+ /** Runtime-name lookup for agents participating in supervised owner flows. */
90
+ supervisedRuntimeAgents = new Map();
89
91
  /** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
90
92
  resolvedPaths = new Map();
91
93
  constructor(options = {}) {
@@ -1094,6 +1096,10 @@ export class WorkflowRunner {
1094
1096
  const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
1095
1097
  const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
1096
1098
  this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
1099
+ const supervision = this.supervisedRuntimeAgents.get(msg.from);
1100
+ if (supervision?.role === 'owner') {
1101
+ void this.trajectory?.ownerMonitoringEvent(supervision.stepName, supervision.logicalName, `Messaged ${msg.to}: ${msg.text.slice(0, 120)}`, { to: msg.to, text: msg.text });
1102
+ }
1097
1103
  };
1098
1104
  this.relay.onAgentSpawned = (agent) => {
1099
1105
  // Skip agents already managed by step execution
@@ -1207,6 +1213,7 @@ export class WorkflowRunner {
1207
1213
  }
1208
1214
  this.lastIdleLog.clear();
1209
1215
  this.lastActivity.clear();
1216
+ this.supervisedRuntimeAgents.clear();
1210
1217
  this.log('Shutting down broker...');
1211
1218
  await this.relay?.shutdown();
1212
1219
  this.relay = undefined;
@@ -1779,10 +1786,26 @@ export class WorkflowRunner {
1779
1786
  if (!rawAgentDef) {
1780
1787
  throw new Error(`Agent "${agentName}" not found in config`);
1781
1788
  }
1782
- const agentDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
1783
- const maxRetries = step.retries ?? agentDef.constraints?.retries ?? errorHandling?.maxRetries ?? 0;
1789
+ const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
1790
+ const usesOwnerFlow = specialistDef.interactive !== false;
1791
+ const ownerDef = usesOwnerFlow ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
1792
+ const reviewDef = usesOwnerFlow ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
1793
+ const supervised = {
1794
+ specialist: specialistDef,
1795
+ owner: ownerDef,
1796
+ reviewer: reviewDef,
1797
+ };
1798
+ const usesDedicatedOwner = usesOwnerFlow && ownerDef.name !== specialistDef.name;
1799
+ const maxRetries = step.retries ??
1800
+ ownerDef.constraints?.retries ??
1801
+ specialistDef.constraints?.retries ??
1802
+ errorHandling?.maxRetries ??
1803
+ 0;
1784
1804
  const retryDelay = errorHandling?.retryDelayMs ?? 1000;
1785
- const timeoutMs = step.timeoutMs ?? agentDef.constraints?.timeoutMs ?? this.currentConfig?.swarm?.timeoutMs;
1805
+ const timeoutMs = step.timeoutMs ??
1806
+ ownerDef.constraints?.timeoutMs ??
1807
+ specialistDef.constraints?.timeoutMs ??
1808
+ this.currentConfig?.swarm?.timeoutMs;
1786
1809
  let lastError;
1787
1810
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
1788
1811
  this.checkAborted();
@@ -1807,60 +1830,110 @@ export class WorkflowRunner {
1807
1830
  updatedAt: new Date().toISOString(),
1808
1831
  });
1809
1832
  this.emit({ type: 'step:started', runId, stepName: step.name });
1810
- this.postToChannel(`**[${step.name}]** Started (agent: ${agentDef.name})`);
1811
- await this.trajectory?.stepStarted(step, agentDef.name);
1833
+ this.postToChannel(`**[${step.name}]** Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
1834
+ await this.trajectory?.stepStarted(step, ownerDef.name, {
1835
+ role: usesDedicatedOwner ? 'owner' : 'specialist',
1836
+ owner: ownerDef.name,
1837
+ specialist: specialistDef.name,
1838
+ reviewer: reviewDef?.name,
1839
+ });
1840
+ if (usesDedicatedOwner) {
1841
+ await this.trajectory?.stepSupervisionAssigned(step, supervised);
1842
+ }
1843
+ this.emit({
1844
+ type: 'step:owner-assigned',
1845
+ runId,
1846
+ stepName: step.name,
1847
+ ownerName: ownerDef.name,
1848
+ specialistName: specialistDef.name,
1849
+ });
1812
1850
  // Resolve step-output variables (e.g. {{steps.plan.output}}) at execution time
1813
1851
  const stepOutputContext = this.buildStepOutputContext(stepStates, runId);
1814
1852
  let resolvedTask = this.interpolateStepTask(step.task ?? '', stepOutputContext);
1815
1853
  // If this is an interactive agent, append awareness of non-interactive workers
1816
1854
  // so the lead knows not to message them and to use step output chaining instead
1817
- if (agentDef.interactive !== false) {
1855
+ if (specialistDef.interactive !== false || ownerDef.interactive !== false) {
1818
1856
  const nonInteractiveInfo = this.buildNonInteractiveAwareness(agentMap, stepStates);
1819
1857
  if (nonInteractiveInfo) {
1820
1858
  resolvedTask += nonInteractiveInfo;
1821
1859
  }
1822
1860
  }
1823
- // Apply step-level workdir override to agent definition if present
1824
- let effectiveAgentDef = agentDef;
1825
- if (step.workdir) {
1826
- const stepWorkdir = this.resolveStepWorkdir(step);
1827
- if (stepWorkdir) {
1828
- effectiveAgentDef = { ...agentDef, cwd: stepWorkdir, workdir: undefined };
1861
+ // Apply step-level workdir override to agent definitions if present
1862
+ const applyStepWorkdir = (def) => {
1863
+ if (step.workdir) {
1864
+ const stepWorkdir = this.resolveStepWorkdir(step);
1865
+ if (stepWorkdir) {
1866
+ return { ...def, cwd: stepWorkdir, workdir: undefined };
1867
+ }
1829
1868
  }
1869
+ return def;
1870
+ };
1871
+ const effectiveSpecialist = applyStepWorkdir(specialistDef);
1872
+ const effectiveOwner = applyStepWorkdir(ownerDef);
1873
+ let specialistOutput;
1874
+ let ownerOutput;
1875
+ let ownerElapsed;
1876
+ if (usesDedicatedOwner) {
1877
+ const result = await this.executeSupervisedAgentStep(step, { specialist: effectiveSpecialist, owner: effectiveOwner, reviewer: reviewDef }, resolvedTask, timeoutMs);
1878
+ specialistOutput = result.specialistOutput;
1879
+ ownerOutput = result.ownerOutput;
1880
+ ownerElapsed = result.ownerElapsed;
1881
+ }
1882
+ else {
1883
+ const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
1884
+ this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
1885
+ const resolvedStep = { ...step, task: ownerTask };
1886
+ const ownerStartTime = Date.now();
1887
+ const output = this.executor
1888
+ ? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
1889
+ : await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs);
1890
+ ownerElapsed = Date.now() - ownerStartTime;
1891
+ this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
1892
+ if (usesOwnerFlow) {
1893
+ this.assertOwnerCompletionMarker(step, output, ownerTask);
1894
+ }
1895
+ specialistOutput = output;
1896
+ ownerOutput = output;
1830
1897
  }
1831
- // Spawn agent via AgentRelay
1832
- this.log(`[${step.name}] Spawning agent "${effectiveAgentDef.name}" (cli: ${effectiveAgentDef.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
1833
- const resolvedStep = { ...step, task: resolvedTask };
1834
- const output = this.executor
1835
- ? await this.executor.executeAgentStep(resolvedStep, effectiveAgentDef, resolvedTask, timeoutMs)
1836
- : await this.spawnAndWait(effectiveAgentDef, resolvedStep, timeoutMs);
1837
- this.log(`[${step.name}] Agent "${agentDef.name}" exited`);
1838
1898
  // Run verification if configured
1839
1899
  if (step.verification) {
1840
- this.runVerification(step.verification, output, step.name, resolvedTask);
1900
+ this.runVerification(step.verification, specialistOutput, step.name, resolvedTask);
1901
+ }
1902
+ // Every interactive step gets a review pass; pick a dedicated reviewer when available.
1903
+ let combinedOutput = specialistOutput;
1904
+ if (usesOwnerFlow && reviewDef) {
1905
+ const remainingMs = timeoutMs ? Math.max(0, timeoutMs - ownerElapsed) : undefined;
1906
+ const reviewOutput = await this.runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewDef, remainingMs);
1907
+ combinedOutput = this.combineStepAndReviewOutput(specialistOutput, reviewOutput);
1841
1908
  }
1842
1909
  // Mark completed
1843
1910
  state.row.status = 'completed';
1844
- state.row.output = output;
1911
+ state.row.output = combinedOutput;
1845
1912
  state.row.completedAt = new Date().toISOString();
1846
1913
  await this.db.updateStep(state.row.id, {
1847
1914
  status: 'completed',
1848
- output,
1915
+ output: combinedOutput,
1849
1916
  completedAt: state.row.completedAt,
1850
1917
  updatedAt: new Date().toISOString(),
1851
1918
  });
1852
1919
  // Persist step output to disk so it survives restarts and is inspectable
1853
- await this.persistStepOutput(runId, step.name, output);
1854
- this.emit({ type: 'step:completed', runId, stepName: step.name, output });
1855
- await this.trajectory?.stepCompleted(step, output, attempt + 1);
1920
+ await this.persistStepOutput(runId, step.name, combinedOutput);
1921
+ this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput });
1922
+ await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
1856
1923
  return;
1857
1924
  }
1858
1925
  catch (err) {
1859
1926
  lastError = err instanceof Error ? err.message : String(err);
1927
+ const ownerTimedOut = usesDedicatedOwner
1928
+ ? /\bowner timed out\b/i.test(lastError)
1929
+ : /\btimed out\b/i.test(lastError) && !lastError.includes(`${step.name}-review`);
1930
+ if (ownerTimedOut) {
1931
+ this.emit({ type: 'step:owner-timeout', runId, stepName: step.name, ownerName: ownerDef.name });
1932
+ }
1860
1933
  }
1861
1934
  }
1862
1935
  // All retries exhausted — record root-cause diagnosis and mark failed
1863
- const nonInteractive = agentDef.interactive === false || ['worker', 'reviewer', 'analyst'].includes(agentDef.preset ?? '');
1936
+ const nonInteractive = ownerDef.interactive === false || ['worker', 'reviewer', 'analyst'].includes(ownerDef.preset ?? '');
1864
1937
  const verificationValue = typeof step.verification === 'object' && 'value' in step.verification
1865
1938
  ? String(step.verification.value)
1866
1939
  : undefined;
@@ -1873,6 +1946,438 @@ export class WorkflowRunner {
1873
1946
  await this.markStepFailed(state, lastError ?? 'Unknown error', runId);
1874
1947
  throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
1875
1948
  }
1949
+ injectStepOwnerContract(step, resolvedTask, ownerDef, specialistDef) {
1950
+ if (ownerDef.interactive === false)
1951
+ return resolvedTask;
1952
+ const specialistNote = ownerDef.name === specialistDef.name
1953
+ ? ''
1954
+ : `Specialist intended for this step: "${specialistDef.name}" (${specialistDef.role ?? specialistDef.cli}).`;
1955
+ return (resolvedTask +
1956
+ '\n\n---\n' +
1957
+ `STEP OWNER CONTRACT:\n` +
1958
+ `- You are the accountable owner for step "${step.name}".\n` +
1959
+ (specialistNote ? `- ${specialistNote}\n` : '') +
1960
+ `- If you delegate, you must still verify completion yourself.\n` +
1961
+ `- Before exiting, provide an explicit completion line: STEP_COMPLETE:${step.name}\n` +
1962
+ `- Then self-terminate immediately with /exit.`);
1963
+ }
1964
+ buildOwnerSupervisorTask(step, originalTask, supervised, workerRuntimeName) {
1965
+ const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
1966
+ const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
1967
+ return (`You are the step owner/supervisor for step "${step.name}".\n\n` +
1968
+ `Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
1969
+ `Task: ${originalTask}\n\n` +
1970
+ `Your job: Monitor the worker and determine when the task is complete.\n\n` +
1971
+ `How to verify completion:\n` +
1972
+ `- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
1973
+ `- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
1974
+ `- Ask the worker directly on ${channelLine} if you need a status update\n` +
1975
+ verificationGuide +
1976
+ `\nWhen you're satisfied the work is done correctly:\n` +
1977
+ `Output exactly: STEP_COMPLETE:${step.name}`);
1978
+ }
1979
+ buildSupervisorVerificationGuide(verification) {
1980
+ if (!verification)
1981
+ return '';
1982
+ switch (verification.type) {
1983
+ case 'output_contains':
1984
+ return `- Verification gate: confirm the worker output contains ${JSON.stringify(verification.value)}\n`;
1985
+ case 'file_exists':
1986
+ return `- Verification gate: confirm the file exists at ${JSON.stringify(verification.value)}\n`;
1987
+ case 'exit_code':
1988
+ return `- Verification gate: confirm the worker exits with code ${JSON.stringify(verification.value)}\n`;
1989
+ case 'custom':
1990
+ return `- Verification gate: apply the custom verification rule ${JSON.stringify(verification.value)}\n`;
1991
+ default:
1992
+ return '';
1993
+ }
1994
+ }
1995
+ async executeSupervisedAgentStep(step, supervised, resolvedTask, timeoutMs) {
1996
+ if (this.executor) {
1997
+ const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, supervised.specialist.name);
1998
+ const specialistStep = { ...step, task: resolvedTask };
1999
+ const ownerStep = {
2000
+ ...step,
2001
+ name: `${step.name}-owner`,
2002
+ agent: supervised.owner.name,
2003
+ task: supervisorTask,
2004
+ };
2005
+ this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" and owner "${supervised.owner.name}"`);
2006
+ const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, resolvedTask, timeoutMs);
2007
+ // Guard against unhandled rejection if owner fails before specialist settles
2008
+ const specialistSettled = specialistPromise.catch(() => undefined);
2009
+ try {
2010
+ const ownerStartTime = Date.now();
2011
+ const ownerOutput = await this.executor.executeAgentStep(ownerStep, supervised.owner, supervisorTask, timeoutMs);
2012
+ const ownerElapsed = Date.now() - ownerStartTime;
2013
+ this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2014
+ const specialistOutput = await specialistPromise;
2015
+ return { specialistOutput, ownerOutput, ownerElapsed };
2016
+ }
2017
+ catch (error) {
2018
+ await specialistSettled;
2019
+ throw error;
2020
+ }
2021
+ }
2022
+ let workerHandle;
2023
+ let workerRuntimeName = supervised.specialist.name;
2024
+ let workerSpawned = false;
2025
+ let workerReleased = false;
2026
+ let resolveWorkerSpawn;
2027
+ let rejectWorkerSpawn;
2028
+ const workerReady = new Promise((resolve, reject) => {
2029
+ resolveWorkerSpawn = resolve;
2030
+ rejectWorkerSpawn = reject;
2031
+ });
2032
+ const specialistStep = { ...step, task: resolvedTask };
2033
+ this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`);
2034
+ const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
2035
+ agentNameSuffix: 'worker',
2036
+ onSpawned: ({ actualName, agent }) => {
2037
+ workerHandle = agent;
2038
+ workerRuntimeName = actualName;
2039
+ this.supervisedRuntimeAgents.set(actualName, {
2040
+ stepName: step.name,
2041
+ role: 'specialist',
2042
+ logicalName: supervised.specialist.name,
2043
+ });
2044
+ if (!workerSpawned) {
2045
+ workerSpawned = true;
2046
+ resolveWorkerSpawn();
2047
+ }
2048
+ },
2049
+ onChunk: ({ agentName, chunk }) => {
2050
+ this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
2051
+ },
2052
+ }).catch((error) => {
2053
+ if (!workerSpawned) {
2054
+ workerSpawned = true;
2055
+ rejectWorkerSpawn(error);
2056
+ }
2057
+ throw error;
2058
+ });
2059
+ const workerSettled = workerPromise.catch(() => undefined);
2060
+ workerPromise
2061
+ .then((output) => {
2062
+ workerReleased = true;
2063
+ this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited`);
2064
+ if (step.verification?.type === 'output_contains' && output.includes(step.verification.value)) {
2065
+ this.postToChannel(`**[${step.name}]** Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
2066
+ }
2067
+ })
2068
+ .catch((error) => {
2069
+ const message = error instanceof Error ? error.message : String(error);
2070
+ this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`);
2071
+ });
2072
+ await workerReady;
2073
+ const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, workerRuntimeName);
2074
+ const ownerStep = {
2075
+ ...step,
2076
+ name: `${step.name}-owner`,
2077
+ agent: supervised.owner.name,
2078
+ task: supervisorTask,
2079
+ };
2080
+ this.log(`[${step.name}] Spawning owner "${supervised.owner.name}" (cli: ${supervised.owner.cli})`);
2081
+ const ownerStartTime = Date.now();
2082
+ try {
2083
+ const ownerOutput = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
2084
+ agentNameSuffix: 'owner',
2085
+ onSpawned: ({ actualName }) => {
2086
+ this.supervisedRuntimeAgents.set(actualName, {
2087
+ stepName: step.name,
2088
+ role: 'owner',
2089
+ logicalName: supervised.owner.name,
2090
+ });
2091
+ },
2092
+ onChunk: ({ chunk }) => {
2093
+ void this.recordOwnerMonitoringChunk(step, supervised.owner, chunk);
2094
+ },
2095
+ });
2096
+ const ownerElapsed = Date.now() - ownerStartTime;
2097
+ this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
2098
+ this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
2099
+ const specialistOutput = await workerPromise;
2100
+ return { specialistOutput, ownerOutput, ownerElapsed };
2101
+ }
2102
+ catch (error) {
2103
+ const message = error instanceof Error ? error.message : String(error);
2104
+ if (!workerReleased && workerHandle) {
2105
+ await workerHandle.release().catch(() => undefined);
2106
+ }
2107
+ await workerSettled;
2108
+ if (/\btimed out\b/i.test(message)) {
2109
+ throw new Error(`Step "${step.name}" owner timed out after ${timeoutMs ?? 'unknown'}ms`);
2110
+ }
2111
+ throw error;
2112
+ }
2113
+ }
2114
+ forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk) {
2115
+ const lines = WorkflowRunner.stripAnsi(chunk)
2116
+ .split('\n')
2117
+ .map((line) => line.trim())
2118
+ .filter(Boolean)
2119
+ .slice(0, 3);
2120
+ for (const line of lines) {
2121
+ this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`);
2122
+ }
2123
+ }
2124
+ async recordOwnerMonitoringChunk(step, ownerDef, chunk) {
2125
+ const stripped = WorkflowRunner.stripAnsi(chunk);
2126
+ const details = [];
2127
+ if (/git diff --stat/i.test(stripped))
2128
+ details.push('Checked git diff stats');
2129
+ if (/\bls -la\b/i.test(stripped))
2130
+ details.push('Listed files for verification');
2131
+ if (/status update\?/i.test(stripped))
2132
+ details.push('Asked the worker for a status update');
2133
+ if (/STEP_COMPLETE:/i.test(stripped))
2134
+ details.push('Declared the step complete');
2135
+ for (const detail of details) {
2136
+ await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
2137
+ output: stripped.slice(0, 240),
2138
+ });
2139
+ }
2140
+ }
2141
+ resolveAutoStepOwner(specialistDef, agentMap) {
2142
+ if (specialistDef.interactive === false)
2143
+ return specialistDef;
2144
+ const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
2145
+ const candidates = allDefs.filter((d) => d.interactive !== false);
2146
+ const matchesHubRole = (text) => [...WorkflowRunner.HUB_ROLES].some((r) => new RegExp(`\\b${r}\\b`, 'i').test(text));
2147
+ const ownerish = (def) => {
2148
+ const nameLC = def.name.toLowerCase();
2149
+ const roleLC = def.role?.toLowerCase() ?? '';
2150
+ return matchesHubRole(nameLC) || matchesHubRole(roleLC);
2151
+ };
2152
+ const ownerPriority = (def) => {
2153
+ const roleLC = def.role?.toLowerCase() ?? '';
2154
+ const nameLC = def.name.toLowerCase();
2155
+ if (/\blead\b/.test(roleLC) || /\blead\b/.test(nameLC))
2156
+ return 6;
2157
+ if (/\bcoordinator\b/.test(roleLC) || /\bcoordinator\b/.test(nameLC))
2158
+ return 5;
2159
+ if (/\bsupervisor\b/.test(roleLC) || /\bsupervisor\b/.test(nameLC))
2160
+ return 4;
2161
+ if (/\borchestrator\b/.test(roleLC) || /\borchestrator\b/.test(nameLC))
2162
+ return 3;
2163
+ if (/\bhub\b/.test(roleLC) || /\bhub\b/.test(nameLC))
2164
+ return 2;
2165
+ return ownerish(def) ? 1 : 0;
2166
+ };
2167
+ const dedicatedOwner = candidates
2168
+ .filter((d) => d.name !== specialistDef.name && ownerish(d))
2169
+ .sort((a, b) => ownerPriority(b) - ownerPriority(a) || a.name.localeCompare(b.name))[0];
2170
+ if (dedicatedOwner)
2171
+ return dedicatedOwner;
2172
+ return specialistDef;
2173
+ }
2174
+ resolveAutoReviewAgent(ownerDef, agentMap) {
2175
+ const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
2176
+ const isReviewer = (def) => {
2177
+ const roleLC = def.role?.toLowerCase() ?? '';
2178
+ const nameLC = def.name.toLowerCase();
2179
+ return (def.preset === 'reviewer' ||
2180
+ roleLC.includes('review') ||
2181
+ roleLC.includes('critic') ||
2182
+ roleLC.includes('verifier') ||
2183
+ roleLC.includes('qa') ||
2184
+ nameLC.includes('review'));
2185
+ };
2186
+ const reviewerPriority = (def) => {
2187
+ if (def.preset === 'reviewer')
2188
+ return 5;
2189
+ const roleLC = def.role?.toLowerCase() ?? '';
2190
+ const nameLC = def.name.toLowerCase();
2191
+ if (roleLC.includes('review') || nameLC.includes('review'))
2192
+ return 4;
2193
+ if (roleLC.includes('verifier') || roleLC.includes('qa'))
2194
+ return 3;
2195
+ if (roleLC.includes('critic'))
2196
+ return 2;
2197
+ return isReviewer(def) ? 1 : 0;
2198
+ };
2199
+ const dedicated = allDefs
2200
+ .filter((d) => d.name !== ownerDef.name && isReviewer(d))
2201
+ .sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
2202
+ if (dedicated)
2203
+ return dedicated;
2204
+ const alternate = allDefs.find((d) => d.name !== ownerDef.name && d.interactive !== false);
2205
+ if (alternate)
2206
+ return alternate;
2207
+ // Self-review fallback — log a warning since owner reviewing itself is weak.
2208
+ return ownerDef;
2209
+ }
2210
+ assertOwnerCompletionMarker(step, output, injectedTaskText) {
2211
+ const marker = `STEP_COMPLETE:${step.name}`;
2212
+ const taskHasMarker = injectedTaskText.includes(marker);
2213
+ const first = output.indexOf(marker);
2214
+ if (first === -1) {
2215
+ throw new Error(`Step "${step.name}" owner completion marker missing: "${marker}"`);
2216
+ }
2217
+ // PTY output includes injected task text, so require a second marker occurrence
2218
+ // when the marker was present in the injected prompt (either owner contract or supervisor prompt).
2219
+ const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') || output.includes('Output exactly: STEP_COMPLETE:');
2220
+ if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
2221
+ const hasSecond = output.includes(marker, first + marker.length);
2222
+ if (!hasSecond) {
2223
+ throw new Error(`Step "${step.name}" owner completion marker missing in agent response: "${marker}"`);
2224
+ }
2225
+ }
2226
+ }
2227
+ async runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewerDef, timeoutMs) {
2228
+ const reviewSnippetMax = 12_000;
2229
+ let specialistSnippet = specialistOutput;
2230
+ if (specialistOutput.length > reviewSnippetMax) {
2231
+ const head = Math.floor(reviewSnippetMax / 2);
2232
+ const tail = reviewSnippetMax - head;
2233
+ const omitted = specialistOutput.length - head - tail;
2234
+ specialistSnippet =
2235
+ `${specialistOutput.slice(0, head)}\n` +
2236
+ `...[truncated ${omitted} chars for review]...\n` +
2237
+ `${specialistOutput.slice(specialistOutput.length - tail)}`;
2238
+ }
2239
+ let ownerSnippet = ownerOutput;
2240
+ if (ownerOutput.length > reviewSnippetMax) {
2241
+ const head = Math.floor(reviewSnippetMax / 2);
2242
+ const tail = reviewSnippetMax - head;
2243
+ const omitted = ownerOutput.length - head - tail;
2244
+ ownerSnippet =
2245
+ `${ownerOutput.slice(0, head)}\n` +
2246
+ `...[truncated ${omitted} chars for review]...\n` +
2247
+ `${ownerOutput.slice(ownerOutput.length - tail)}`;
2248
+ }
2249
+ const reviewTask = `Review workflow step "${step.name}" for completion and safe handoff.\n` +
2250
+ `Step owner: ${ownerDef.name}\n` +
2251
+ `Original objective:\n${resolvedTask}\n\n` +
2252
+ `Specialist output:\n${specialistSnippet}\n\n` +
2253
+ `Owner verification notes:\n${ownerSnippet}\n\n` +
2254
+ `Return exactly:\n` +
2255
+ `REVIEW_DECISION: APPROVE or REJECT\n` +
2256
+ `REVIEW_REASON: <one sentence>\n` +
2257
+ `Then output /exit.`;
2258
+ const safetyTimeoutMs = timeoutMs ?? 600_000;
2259
+ const reviewStep = {
2260
+ name: `${step.name}-review`,
2261
+ type: 'agent',
2262
+ agent: reviewerDef.name,
2263
+ task: reviewTask,
2264
+ };
2265
+ await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
2266
+ this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
2267
+ const emitReviewCompleted = async (decision, reason) => {
2268
+ await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
2269
+ this.emit({
2270
+ type: 'step:review-completed',
2271
+ runId: this.currentRunId ?? '',
2272
+ stepName: step.name,
2273
+ reviewerName: reviewerDef.name,
2274
+ decision,
2275
+ });
2276
+ };
2277
+ if (this.executor) {
2278
+ const reviewOutput = await this.executor.executeAgentStep(reviewStep, reviewerDef, reviewTask, safetyTimeoutMs);
2279
+ const parsed = this.parseReviewDecision(reviewOutput);
2280
+ if (!parsed) {
2281
+ throw new Error(`Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)`);
2282
+ }
2283
+ await emitReviewCompleted(parsed.decision, parsed.reason);
2284
+ if (parsed.decision === 'rejected') {
2285
+ throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`);
2286
+ }
2287
+ this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``);
2288
+ return reviewOutput;
2289
+ }
2290
+ let reviewerHandle;
2291
+ let reviewerReleased = false;
2292
+ let reviewOutput = '';
2293
+ let completedReview;
2294
+ let reviewCompletionPromise;
2295
+ const reviewCompletionStarted = { value: false };
2296
+ const startReviewCompletion = (parsed) => {
2297
+ if (reviewCompletionStarted.value)
2298
+ return;
2299
+ reviewCompletionStarted.value = true;
2300
+ completedReview = parsed;
2301
+ reviewCompletionPromise = (async () => {
2302
+ await emitReviewCompleted(parsed.decision, parsed.reason);
2303
+ if (reviewerHandle && !reviewerReleased) {
2304
+ reviewerReleased = true;
2305
+ await reviewerHandle.release().catch(() => undefined);
2306
+ }
2307
+ })();
2308
+ };
2309
+ try {
2310
+ reviewOutput = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
2311
+ onSpawned: ({ agent }) => {
2312
+ reviewerHandle = agent;
2313
+ },
2314
+ onChunk: ({ chunk }) => {
2315
+ const nextOutput = reviewOutput + WorkflowRunner.stripAnsi(chunk);
2316
+ reviewOutput = nextOutput;
2317
+ const parsed = this.parseReviewDecision(nextOutput);
2318
+ if (parsed) {
2319
+ startReviewCompletion(parsed);
2320
+ }
2321
+ },
2322
+ });
2323
+ await reviewCompletionPromise;
2324
+ }
2325
+ catch (error) {
2326
+ const message = error instanceof Error ? error.message : String(error);
2327
+ if (/\btimed out\b/i.test(message)) {
2328
+ this.log(`[${step.name}] Review safety backstop timeout fired after ${safetyTimeoutMs}ms`);
2329
+ throw new Error(`Step "${step.name}" review safety backstop timed out after ${safetyTimeoutMs}ms`);
2330
+ }
2331
+ throw error;
2332
+ }
2333
+ if (!completedReview) {
2334
+ const parsed = this.parseReviewDecision(reviewOutput);
2335
+ if (!parsed) {
2336
+ throw new Error(`Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)`);
2337
+ }
2338
+ completedReview = parsed;
2339
+ await emitReviewCompleted(parsed.decision, parsed.reason);
2340
+ }
2341
+ if (completedReview.decision === 'rejected') {
2342
+ throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`);
2343
+ }
2344
+ this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``);
2345
+ return reviewOutput;
2346
+ }
2347
+ parseReviewDecision(reviewOutput) {
2348
+ const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
2349
+ const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
2350
+ if (decisionMatches.length === 0) {
2351
+ return null;
2352
+ }
2353
+ const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
2354
+ const decisionMatch = outputLikelyContainsEchoedPrompt && decisionMatches.length > 1
2355
+ ? decisionMatches[decisionMatches.length - 1]
2356
+ : decisionMatches[0];
2357
+ const decision = decisionMatch?.[1]?.toUpperCase();
2358
+ if (decision !== 'APPROVE' && decision !== 'REJECT') {
2359
+ return null;
2360
+ }
2361
+ const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
2362
+ const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
2363
+ const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
2364
+ ? reasonMatches[reasonMatches.length - 1]
2365
+ : reasonMatches[0];
2366
+ const reason = reasonMatch?.[1]?.trim();
2367
+ return {
2368
+ decision: decision === 'APPROVE' ? 'approved' : 'rejected',
2369
+ reason: reason && reason !== '<one sentence>' ? reason : undefined,
2370
+ };
2371
+ }
2372
+ combineStepAndReviewOutput(stepOutput, reviewOutput) {
2373
+ const primary = stepOutput.trimEnd();
2374
+ const review = reviewOutput.trim();
2375
+ if (!review)
2376
+ return primary;
2377
+ if (!primary)
2378
+ return `REVIEW_OUTPUT\n${review}\n`;
2379
+ return `${primary}\n\n---\nREVIEW_OUTPUT\n${review}\n`;
2380
+ }
1876
2381
  /**
1877
2382
  * Build the CLI command and arguments for a non-interactive agent execution.
1878
2383
  * Each CLI has a specific flag for one-shot prompt mode.
@@ -2096,7 +2601,7 @@ export class WorkflowRunner {
2096
2601
  this.unregisterWorker(agentName);
2097
2602
  }
2098
2603
  }
2099
- async spawnAndWait(agentDef, step, timeoutMs) {
2604
+ async spawnAndWait(agentDef, step, timeoutMs, options = {}) {
2100
2605
  // Branch: non-interactive agents run as simple subprocesses
2101
2606
  if (agentDef.interactive === false) {
2102
2607
  return this.execNonInteractive(agentDef, step, timeoutMs);
@@ -2104,13 +2609,15 @@ export class WorkflowRunner {
2104
2609
  if (!this.relay) {
2105
2610
  throw new Error('AgentRelay not initialized');
2106
2611
  }
2107
- // Deterministic name: step name + first 8 chars of run ID.
2108
- let agentName = `${step.name}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
2612
+ // Deterministic name: step name + optional role suffix + first 8 chars of run ID.
2613
+ const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
2614
+ let agentName = requestedName;
2109
2615
  // Only inject delegation guidance for lead/coordinator agents, not spokes/workers.
2110
2616
  // In non-hub patterns (pipeline, dag, etc.) every agent is autonomous so they all get it.
2111
2617
  const role = agentDef.role?.toLowerCase() ?? '';
2112
2618
  const nameLC = agentDef.name.toLowerCase();
2113
- const isHub = WorkflowRunner.HUB_ROLES.has(nameLC) || [...WorkflowRunner.HUB_ROLES].some((r) => role.includes(r));
2619
+ const isHub = WorkflowRunner.HUB_ROLES.has(nameLC) ||
2620
+ [...WorkflowRunner.HUB_ROLES].some((r) => new RegExp(`\\b${r}\\b`).test(role));
2114
2621
  const pattern = this.currentConfig?.swarm.pattern;
2115
2622
  const isHubPattern = pattern && WorkflowRunner.HUB_PATTERNS.has(pattern);
2116
2623
  const delegationGuidance = isHub || !isHubPattern ? this.buildDelegationGuidance(agentDef.cli, timeoutMs) : '';
@@ -2139,6 +2646,7 @@ export class WorkflowRunner {
2139
2646
  // Write raw output (with ANSI codes) to log file so dashboard's
2140
2647
  // XTermLogViewer can render colors/formatting natively via xterm.js
2141
2648
  logStream.write(chunk);
2649
+ options.onChunk?.({ agentName, chunk });
2142
2650
  });
2143
2651
  const agentChannels = this.channel ? [this.channel] : agentDef.channels;
2144
2652
  let agent;
@@ -2187,10 +2695,12 @@ export class WorkflowRunner {
2187
2695
  const stripped = WorkflowRunner.stripAnsi(chunk);
2188
2696
  this.ptyOutputBuffers.get(agent.name)?.push(stripped);
2189
2697
  newLogStream.write(chunk);
2698
+ options.onChunk?.({ agentName: agent.name, chunk });
2190
2699
  });
2191
2700
  }
2192
2701
  agentName = agent.name;
2193
2702
  }
2703
+ await options.onSpawned?.({ requestedName, actualName: agent.name, agent });
2194
2704
  // Register in workers.json so `agents:kill` can find this agent
2195
2705
  let workerPid;
2196
2706
  try {
@@ -2260,6 +2770,7 @@ export class WorkflowRunner {
2260
2770
  this.ptyLogStreams.delete(agentName);
2261
2771
  }
2262
2772
  this.unregisterWorker(agentName);
2773
+ this.supervisedRuntimeAgents.delete(agentName);
2263
2774
  }
2264
2775
  let output;
2265
2776
  if (ptyChunks.length > 0) {
@@ -2417,7 +2928,7 @@ export class WorkflowRunner {
2417
2928
  const role = agentDef.role?.toLowerCase() ?? '';
2418
2929
  const nameLC = agentDef.name.toLowerCase();
2419
2930
  if (WorkflowRunner.HUB_ROLES.has(nameLC) ||
2420
- [...WorkflowRunner.HUB_ROLES].some((r) => role.includes(r))) {
2931
+ [...WorkflowRunner.HUB_ROLES].some((r) => new RegExp(`\\b${r}\\b`).test(role))) {
2421
2932
  // Found a hub candidate — check if we have a live handle
2422
2933
  const handle = this.activeAgentHandles.get(agentDef.name);
2423
2934
  if (handle)