agent-relay 2.3.2 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. package/README.md +1 -1
  2. package/dist/index.cjs +1 -1
  3. package/dist/src/cli/index.js +124 -7
  4. package/dist/src/cli/index.js.map +1 -1
  5. package/package.json +20 -26
  6. package/packages/acp-bridge/package.json +2 -2
  7. package/packages/bridge/package.json +7 -7
  8. package/packages/config/dist/cloud-config.d.ts +1 -1
  9. package/packages/config/dist/cloud-config.d.ts.map +1 -1
  10. package/packages/config/dist/cloud-config.js.map +1 -1
  11. package/packages/config/dist/schemas.d.ts +5 -5
  12. package/packages/config/dist/schemas.js +1 -1
  13. package/packages/config/dist/schemas.js.map +1 -1
  14. package/packages/config/package.json +2 -2
  15. package/packages/config/src/cloud-config.ts +2 -2
  16. package/packages/config/src/schemas.test.ts +48 -0
  17. package/packages/config/src/schemas.ts +1 -1
  18. package/packages/continuity/package.json +2 -2
  19. package/packages/daemon/package.json +12 -12
  20. package/packages/hooks/package.json +4 -4
  21. package/packages/mcp/package.json +5 -5
  22. package/packages/memory/package.json +2 -2
  23. package/packages/policy/package.json +2 -2
  24. package/packages/protocol/package.json +1 -1
  25. package/packages/resiliency/package.json +1 -1
  26. package/packages/sdk/dist/index.d.ts +1 -29
  27. package/packages/sdk/dist/index.d.ts.map +1 -1
  28. package/packages/sdk/dist/index.js +1 -38
  29. package/packages/sdk/dist/index.js.map +1 -1
  30. package/packages/sdk/package.json +4 -25
  31. package/packages/sdk/src/index.ts +1 -69
  32. package/packages/sdk-py/README.md +56 -0
  33. package/packages/sdk-py/pyproject.toml +23 -0
  34. package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
  35. package/packages/sdk-py/src/agent_relay/builder.py +367 -0
  36. package/packages/sdk-py/src/agent_relay/types.py +92 -0
  37. package/packages/sdk-py/tests/__init__.py +0 -0
  38. package/packages/sdk-py/tests/test_builder.py +101 -0
  39. package/packages/sdk-ts/dist/__tests__/facade.test.d.ts +2 -0
  40. package/packages/sdk-ts/dist/__tests__/facade.test.d.ts.map +1 -0
  41. package/packages/sdk-ts/dist/__tests__/facade.test.js +257 -0
  42. package/packages/sdk-ts/dist/__tests__/facade.test.js.map +1 -0
  43. package/packages/sdk-ts/dist/__tests__/unit.test.d.ts +2 -0
  44. package/packages/sdk-ts/dist/__tests__/unit.test.d.ts.map +1 -0
  45. package/packages/sdk-ts/dist/__tests__/unit.test.js +124 -0
  46. package/packages/sdk-ts/dist/__tests__/unit.test.js.map +1 -0
  47. package/packages/sdk-ts/dist/client.d.ts +2 -0
  48. package/packages/sdk-ts/dist/client.d.ts.map +1 -1
  49. package/packages/sdk-ts/dist/client.js +2 -0
  50. package/packages/sdk-ts/dist/client.js.map +1 -1
  51. package/packages/sdk-ts/dist/index.d.ts +1 -0
  52. package/packages/sdk-ts/dist/index.d.ts.map +1 -1
  53. package/packages/sdk-ts/dist/index.js +1 -0
  54. package/packages/sdk-ts/dist/index.js.map +1 -1
  55. package/packages/sdk-ts/dist/protocol.d.ts +1 -0
  56. package/packages/sdk-ts/dist/protocol.d.ts.map +1 -1
  57. package/packages/sdk-ts/dist/relay.d.ts +44 -0
  58. package/packages/sdk-ts/dist/relay.d.ts.map +1 -1
  59. package/packages/sdk-ts/dist/relay.js +89 -11
  60. package/packages/sdk-ts/dist/relay.js.map +1 -1
  61. package/packages/sdk-ts/dist/relaycast.js +2 -2
  62. package/packages/sdk-ts/dist/relaycast.js.map +1 -1
  63. package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
  64. package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
  65. package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
  66. package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
  67. package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
  68. package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
  69. package/packages/sdk-ts/dist/workflows/builder.js +179 -0
  70. package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
  71. package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
  72. package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
  73. package/packages/sdk-ts/dist/workflows/cli.js +82 -0
  74. package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
  75. package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
  76. package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
  77. package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
  78. package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
  79. package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
  80. package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
  81. package/packages/sdk-ts/dist/workflows/index.js +10 -0
  82. package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
  83. package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
  84. package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
  85. package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
  86. package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
  87. package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
  88. package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
  89. package/packages/sdk-ts/dist/workflows/run.js +24 -0
  90. package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
  91. package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
  92. package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
  93. package/packages/sdk-ts/dist/workflows/runner.js +650 -0
  94. package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
  95. package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
  96. package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
  97. package/packages/sdk-ts/dist/workflows/state.js +140 -0
  98. package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
  99. package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
  100. package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
  101. package/packages/sdk-ts/dist/workflows/templates.js +395 -0
  102. package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
  103. package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
  104. package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
  105. package/packages/sdk-ts/dist/workflows/types.js +8 -0
  106. package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
  107. package/packages/sdk-ts/package.json +9 -3
  108. package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
  109. package/packages/sdk-ts/src/__tests__/facade.test.ts +296 -0
  110. package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
  111. package/packages/sdk-ts/src/__tests__/unit.test.ts +152 -0
  112. package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
  113. package/packages/sdk-ts/src/client.ts +4 -0
  114. package/packages/sdk-ts/src/index.ts +1 -0
  115. package/packages/sdk-ts/src/protocol.ts +1 -1
  116. package/packages/sdk-ts/src/relay.ts +112 -11
  117. package/packages/sdk-ts/src/relaycast.ts +2 -2
  118. package/packages/sdk-ts/src/workflows/README.md +450 -0
  119. package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
  120. package/packages/sdk-ts/src/workflows/builder.ts +241 -0
  121. package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
  122. package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
  123. package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
  124. package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
  125. package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
  126. package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
  127. package/packages/sdk-ts/src/workflows/cli.ts +93 -0
  128. package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
  129. package/packages/sdk-ts/src/workflows/index.ts +9 -0
  130. package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
  131. package/packages/sdk-ts/src/workflows/run.ts +47 -0
  132. package/packages/sdk-ts/src/workflows/runner.ts +873 -0
  133. package/packages/sdk-ts/src/workflows/schema.json +321 -0
  134. package/packages/sdk-ts/src/workflows/state.ts +279 -0
  135. package/packages/sdk-ts/src/workflows/templates.ts +544 -0
  136. package/packages/sdk-ts/src/workflows/types.ts +178 -0
  137. package/packages/sdk-ts/tsconfig.json +6 -1
  138. package/packages/spawner/package.json +1 -1
  139. package/packages/state/package.json +1 -1
  140. package/packages/storage/package.json +2 -2
  141. package/packages/telemetry/package.json +1 -1
  142. package/packages/trajectory/package.json +2 -2
  143. package/packages/user-directory/package.json +2 -2
  144. package/packages/utils/package.json +3 -3
  145. package/packages/wrapper/package.json +5 -6
  146. package/scripts/postinstall.js +106 -2
  147. package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
  148. package/packages/api-types/.trajectories/index.json +0 -12
  149. package/packages/api-types/dist/index.d.ts +0 -21
  150. package/packages/api-types/dist/index.d.ts.map +0 -1
  151. package/packages/api-types/dist/index.js +0 -22
  152. package/packages/api-types/dist/index.js.map +0 -1
  153. package/packages/api-types/dist/schemas/agent.d.ts +0 -259
  154. package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
  155. package/packages/api-types/dist/schemas/agent.js +0 -102
  156. package/packages/api-types/dist/schemas/agent.js.map +0 -1
  157. package/packages/api-types/dist/schemas/api.d.ts +0 -290
  158. package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
  159. package/packages/api-types/dist/schemas/api.js +0 -162
  160. package/packages/api-types/dist/schemas/api.js.map +0 -1
  161. package/packages/api-types/dist/schemas/decision.d.ts +0 -230
  162. package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
  163. package/packages/api-types/dist/schemas/decision.js +0 -104
  164. package/packages/api-types/dist/schemas/decision.js.map +0 -1
  165. package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
  166. package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
  167. package/packages/api-types/dist/schemas/fleet.js +0 -71
  168. package/packages/api-types/dist/schemas/fleet.js.map +0 -1
  169. package/packages/api-types/dist/schemas/history.d.ts +0 -180
  170. package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
  171. package/packages/api-types/dist/schemas/history.js +0 -72
  172. package/packages/api-types/dist/schemas/history.js.map +0 -1
  173. package/packages/api-types/dist/schemas/index.d.ts +0 -14
  174. package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
  175. package/packages/api-types/dist/schemas/index.js +0 -22
  176. package/packages/api-types/dist/schemas/index.js.map +0 -1
  177. package/packages/api-types/dist/schemas/message.d.ts +0 -456
  178. package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
  179. package/packages/api-types/dist/schemas/message.js +0 -88
  180. package/packages/api-types/dist/schemas/message.js.map +0 -1
  181. package/packages/api-types/dist/schemas/session.d.ts +0 -60
  182. package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
  183. package/packages/api-types/dist/schemas/session.js +0 -36
  184. package/packages/api-types/dist/schemas/session.js.map +0 -1
  185. package/packages/api-types/dist/schemas/task.d.ts +0 -111
  186. package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
  187. package/packages/api-types/dist/schemas/task.js +0 -64
  188. package/packages/api-types/dist/schemas/task.js.map +0 -1
  189. package/packages/api-types/package.json +0 -61
  190. package/packages/api-types/scripts/generate-openapi.ts +0 -106
  191. package/packages/api-types/src/index.ts +0 -22
  192. package/packages/api-types/src/schemas/agent.test.ts +0 -164
  193. package/packages/api-types/src/schemas/agent.ts +0 -110
  194. package/packages/api-types/src/schemas/api.test.ts +0 -372
  195. package/packages/api-types/src/schemas/api.ts +0 -194
  196. package/packages/api-types/src/schemas/decision.test.ts +0 -324
  197. package/packages/api-types/src/schemas/decision.ts +0 -136
  198. package/packages/api-types/src/schemas/fleet.test.ts +0 -212
  199. package/packages/api-types/src/schemas/fleet.ts +0 -83
  200. package/packages/api-types/src/schemas/history.test.ts +0 -242
  201. package/packages/api-types/src/schemas/history.ts +0 -84
  202. package/packages/api-types/src/schemas/index.ts +0 -148
  203. package/packages/api-types/src/schemas/message.test.ts +0 -192
  204. package/packages/api-types/src/schemas/message.ts +0 -98
  205. package/packages/api-types/src/schemas/session.test.ts +0 -104
  206. package/packages/api-types/src/schemas/session.ts +0 -40
  207. package/packages/api-types/src/schemas/task.test.ts +0 -192
  208. package/packages/api-types/src/schemas/task.ts +0 -78
  209. package/packages/api-types/tsconfig.json +0 -19
  210. package/packages/api-types/vitest.config.ts +0 -9
  211. package/packages/benchmark/README.md +0 -200
  212. package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
  213. package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
  214. package/packages/benchmark/datasets/quick-test.yaml +0 -20
  215. package/packages/benchmark/dist/benchmark.d.ts +0 -47
  216. package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
  217. package/packages/benchmark/dist/benchmark.js +0 -224
  218. package/packages/benchmark/dist/benchmark.js.map +0 -1
  219. package/packages/benchmark/dist/cli.d.ts +0 -8
  220. package/packages/benchmark/dist/cli.d.ts.map +0 -1
  221. package/packages/benchmark/dist/cli.js +0 -185
  222. package/packages/benchmark/dist/cli.js.map +0 -1
  223. package/packages/benchmark/dist/harbor.d.ts +0 -53
  224. package/packages/benchmark/dist/harbor.d.ts.map +0 -1
  225. package/packages/benchmark/dist/harbor.js +0 -127
  226. package/packages/benchmark/dist/harbor.js.map +0 -1
  227. package/packages/benchmark/dist/index.d.ts +0 -48
  228. package/packages/benchmark/dist/index.d.ts.map +0 -1
  229. package/packages/benchmark/dist/index.js +0 -50
  230. package/packages/benchmark/dist/index.js.map +0 -1
  231. package/packages/benchmark/dist/runners/base.d.ts +0 -63
  232. package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
  233. package/packages/benchmark/dist/runners/base.js +0 -156
  234. package/packages/benchmark/dist/runners/base.js.map +0 -1
  235. package/packages/benchmark/dist/runners/index.d.ts +0 -10
  236. package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
  237. package/packages/benchmark/dist/runners/index.js +0 -10
  238. package/packages/benchmark/dist/runners/index.js.map +0 -1
  239. package/packages/benchmark/dist/runners/single.d.ts +0 -19
  240. package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
  241. package/packages/benchmark/dist/runners/single.js +0 -111
  242. package/packages/benchmark/dist/runners/single.js.map +0 -1
  243. package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
  244. package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
  245. package/packages/benchmark/dist/runners/subagent.js +0 -212
  246. package/packages/benchmark/dist/runners/subagent.js.map +0 -1
  247. package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
  248. package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
  249. package/packages/benchmark/dist/runners/swarm.js +0 -273
  250. package/packages/benchmark/dist/runners/swarm.js.map +0 -1
  251. package/packages/benchmark/dist/types.d.ts +0 -178
  252. package/packages/benchmark/dist/types.d.ts.map +0 -1
  253. package/packages/benchmark/dist/types.js +0 -16
  254. package/packages/benchmark/dist/types.js.map +0 -1
  255. package/packages/benchmark/package.json +0 -80
  256. package/packages/benchmark/src/benchmark.ts +0 -298
  257. package/packages/benchmark/src/cli.ts +0 -240
  258. package/packages/benchmark/src/harbor.ts +0 -170
  259. package/packages/benchmark/src/index.ts +0 -73
  260. package/packages/benchmark/src/runners/base.ts +0 -205
  261. package/packages/benchmark/src/runners/index.ts +0 -10
  262. package/packages/benchmark/src/runners/single.ts +0 -121
  263. package/packages/benchmark/src/runners/subagent.ts +0 -240
  264. package/packages/benchmark/src/runners/swarm.ts +0 -326
  265. package/packages/benchmark/src/types.ts +0 -205
  266. package/packages/benchmark/tsconfig.json +0 -20
  267. package/packages/cli-tester/README.md +0 -277
  268. package/packages/cli-tester/dist/index.d.ts +0 -21
  269. package/packages/cli-tester/dist/index.d.ts.map +0 -1
  270. package/packages/cli-tester/dist/index.js +0 -21
  271. package/packages/cli-tester/dist/index.js.map +0 -1
  272. package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
  273. package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
  274. package/packages/cli-tester/dist/utils/credential-check.js +0 -230
  275. package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
  276. package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
  277. package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
  278. package/packages/cli-tester/dist/utils/socket-client.js +0 -153
  279. package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
  280. package/packages/cli-tester/docker/Dockerfile +0 -61
  281. package/packages/cli-tester/docker/docker-compose.yml +0 -71
  282. package/packages/cli-tester/docker/entrypoint.sh +0 -58
  283. package/packages/cli-tester/package.json +0 -32
  284. package/packages/cli-tester/scripts/clear-auth.sh +0 -101
  285. package/packages/cli-tester/scripts/inject-message.sh +0 -42
  286. package/packages/cli-tester/scripts/start.sh +0 -71
  287. package/packages/cli-tester/scripts/test-cli.sh +0 -56
  288. package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
  289. package/packages/cli-tester/scripts/test-registration.sh +0 -182
  290. package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
  291. package/packages/cli-tester/scripts/test-spawn.sh +0 -140
  292. package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
  293. package/packages/cli-tester/scripts/verify-auth.sh +0 -112
  294. package/packages/cli-tester/src/index.ts +0 -40
  295. package/packages/cli-tester/src/utils/credential-check.ts +0 -284
  296. package/packages/cli-tester/src/utils/socket-client.ts +0 -211
  297. package/packages/cli-tester/tests/credential-check.test.ts +0 -56
  298. package/packages/cli-tester/tsconfig.json +0 -11
  299. package/packages/sdk/dist/browser-client.d.ts +0 -212
  300. package/packages/sdk/dist/browser-client.d.ts.map +0 -1
  301. package/packages/sdk/dist/browser-client.js +0 -750
  302. package/packages/sdk/dist/browser-client.js.map +0 -1
  303. package/packages/sdk/dist/browser-framing.d.ts +0 -46
  304. package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
  305. package/packages/sdk/dist/browser-framing.js +0 -122
  306. package/packages/sdk/dist/browser-framing.js.map +0 -1
  307. package/packages/sdk/dist/standalone.d.ts +0 -89
  308. package/packages/sdk/dist/standalone.d.ts.map +0 -1
  309. package/packages/sdk/dist/standalone.js +0 -131
  310. package/packages/sdk/dist/standalone.js.map +0 -1
  311. package/packages/sdk/dist/transports/index.d.ts +0 -92
  312. package/packages/sdk/dist/transports/index.d.ts.map +0 -1
  313. package/packages/sdk/dist/transports/index.js +0 -129
  314. package/packages/sdk/dist/transports/index.js.map +0 -1
  315. package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
  316. package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
  317. package/packages/sdk/dist/transports/socket-transport.js +0 -94
  318. package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
  319. package/packages/sdk/dist/transports/types.d.ts +0 -69
  320. package/packages/sdk/dist/transports/types.d.ts.map +0 -1
  321. package/packages/sdk/dist/transports/types.js +0 -10
  322. package/packages/sdk/dist/transports/types.js.map +0 -1
  323. package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
  324. package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
  325. package/packages/sdk/dist/transports/websocket-transport.js +0 -180
  326. package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
  327. package/packages/sdk/src/browser-client.ts +0 -985
  328. package/packages/sdk/src/browser-framing.test.ts +0 -115
  329. package/packages/sdk/src/browser-framing.ts +0 -150
  330. package/packages/sdk/src/standalone.ts +0 -183
  331. package/packages/sdk/src/transports/index.ts +0 -197
  332. package/packages/sdk/src/transports/socket-transport.ts +0 -115
  333. package/packages/sdk/src/transports/types.ts +0 -77
  334. package/packages/sdk/src/transports/websocket-transport.ts +0 -245
@@ -1,240 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Benchmark CLI
4
- *
5
- * Command-line interface for running agent swarm benchmarks.
6
- */
7
-
8
- import { Command } from 'commander';
9
- import { readFileSync } from 'node:fs';
10
- import { parse as parseYaml } from 'yaml';
11
- import { ComparisonBenchmark } from './benchmark.js';
12
- import type {
13
- Task,
14
- TaskDataset,
15
- ConfigurationType,
16
- BenchmarkConfig,
17
- } from './types.js';
18
-
19
- const program = new Command();
20
-
21
- program
22
- .name('relay-benchmark')
23
- .description('Benchmark agent swarms, sub-agents, and single agents')
24
- .version('1.0.0');
25
-
26
- program
27
- .command('run')
28
- .description('Run a benchmark comparison')
29
- .option('-d, --dataset <path>', 'Path to task dataset (YAML or JSON)')
30
- .option('-t, --task <id>', 'Run only a specific task by ID')
31
- .option(
32
- '-c, --config <types>',
33
- 'Configurations to run (single,subagent,swarm,all)',
34
- 'all'
35
- )
36
- .option('--cli <name>', 'CLI to use for agents', 'claude')
37
- .option('--cwd <path>', 'Working directory for tasks')
38
- .option('-q, --quiet', 'Suppress output', false)
39
- .option('--cooldown <ms>', 'Cooldown between runs in ms', '5000')
40
- .option('--max-swarm <n>', 'Maximum swarm size', '10')
41
- .option('-o, --output <path>', 'Output results to JSON file')
42
- .action(async (options) => {
43
- try {
44
- await runBenchmark(options);
45
- } catch (err) {
46
- console.error('Error:', (err as Error).message);
47
- process.exit(1);
48
- }
49
- });
50
-
51
- program
52
- .command('list')
53
- .description('List tasks in a dataset')
54
- .argument('<dataset>', 'Path to task dataset')
55
- .action((datasetPath) => {
56
- const dataset = loadDataset(datasetPath);
57
- console.log(`\nDataset: ${dataset.name || 'Unnamed'}`);
58
- if (dataset.description) {
59
- console.log(`Description: ${dataset.description}`);
60
- }
61
- console.log(`\nTasks (${dataset.tasks.length}):\n`);
62
-
63
- for (const task of dataset.tasks) {
64
- console.log(` ${task.id}`);
65
- console.log(` Complexity: ${task.complexity}`);
66
- console.log(` Files: ${task.files.length}`);
67
- console.log(` ${task.description.substring(0, 60)}...`);
68
- console.log('');
69
- }
70
- });
71
-
72
- async function runBenchmark(options: {
73
- dataset?: string;
74
- task?: string;
75
- config: string;
76
- cli: string;
77
- cwd?: string;
78
- quiet: boolean;
79
- cooldown: string;
80
- maxSwarm: string;
81
- output?: string;
82
- }): Promise<void> {
83
- // Parse configurations
84
- const configurations = parseConfigurations(options.config);
85
-
86
- // Build benchmark config
87
- const benchmarkConfig: Partial<BenchmarkConfig> = {
88
- configurations,
89
- cli: options.cli,
90
- cwd: options.cwd,
91
- quiet: options.quiet,
92
- cooldownMs: parseInt(options.cooldown, 10),
93
- maxSwarmSize: parseInt(options.maxSwarm, 10),
94
- };
95
-
96
- const benchmark = new ComparisonBenchmark(benchmarkConfig);
97
-
98
- // Load tasks
99
- let tasks: Task[];
100
- if (options.dataset) {
101
- const dataset = loadDataset(options.dataset);
102
- tasks = dataset.tasks;
103
-
104
- if (options.task) {
105
- tasks = tasks.filter((t) => t.id === options.task);
106
- if (tasks.length === 0) {
107
- throw new Error(`Task not found: ${options.task}`);
108
- }
109
- }
110
- } else if (options.task) {
111
- // Create a simple task from command line
112
- tasks = [
113
- {
114
- id: options.task,
115
- description: options.task,
116
- files: [],
117
- expectedOutcome: 'Task completed',
118
- complexity: 'medium',
119
- },
120
- ];
121
- } else {
122
- throw new Error('Either --dataset or --task is required');
123
- }
124
-
125
- // Run benchmarks
126
- const results = [];
127
- for (const task of tasks) {
128
- if (!options.quiet) {
129
- console.log(`\n${'='.repeat(60)}`);
130
- console.log(`Running task: ${task.id}`);
131
- console.log('='.repeat(60));
132
- }
133
-
134
- const comparison = await benchmark.runComparison(task);
135
-
136
- if (!options.quiet) {
137
- benchmark.printComparison(comparison);
138
- }
139
-
140
- results.push({
141
- taskId: task.id,
142
- winner: comparison.winner,
143
- results: Object.fromEntries(comparison.results),
144
- scores: Object.fromEntries(comparison.scores),
145
- });
146
- }
147
-
148
- // Output results
149
- if (options.output) {
150
- const { writeFileSync } = await import('node:fs');
151
- writeFileSync(options.output, JSON.stringify(results, null, 2));
152
- console.log(`\nResults written to: ${options.output}`);
153
- }
154
-
155
- // Print summary
156
- if (!options.quiet && results.length > 1) {
157
- printSummary(results);
158
- }
159
- }
160
-
161
- function parseConfigurations(config: string): ConfigurationType[] {
162
- if (config === 'all') {
163
- return ['single', 'subagent', 'swarm'];
164
- }
165
-
166
- const configs = config.split(',').map((c) => c.trim()) as ConfigurationType[];
167
- const valid: ConfigurationType[] = ['single', 'subagent', 'swarm'];
168
-
169
- for (const c of configs) {
170
- if (!valid.includes(c)) {
171
- throw new Error(`Invalid configuration: ${c}. Valid: ${valid.join(', ')}`);
172
- }
173
- }
174
-
175
- return configs;
176
- }
177
-
178
- function loadDataset(path: string): TaskDataset {
179
- const content = readFileSync(path, 'utf-8');
180
-
181
- let data: TaskDataset;
182
- if (path.endsWith('.yaml') || path.endsWith('.yml')) {
183
- data = parseYaml(content) as TaskDataset;
184
- } else {
185
- data = JSON.parse(content) as TaskDataset;
186
- }
187
-
188
- // Validate and normalize tasks
189
- if (!data.tasks || !Array.isArray(data.tasks)) {
190
- throw new Error('Dataset must have a "tasks" array');
191
- }
192
-
193
- data.tasks = data.tasks.map((t, i) => ({
194
- id: t.id || `task-${i}`,
195
- description: t.description || '',
196
- files: t.files || [],
197
- expectedOutcome: t.expectedOutcome || t.success_criteria || 'Completed',
198
- complexity: t.complexity || 'medium',
199
- timeoutMs: t.timeoutMs || 300000,
200
- tags: t.tags || [],
201
- }));
202
-
203
- return data;
204
- }
205
-
206
- function printSummary(
207
- results: Array<{
208
- taskId: string;
209
- winner: ConfigurationType;
210
- results: Record<string, unknown>;
211
- scores: Record<string, unknown>;
212
- }>
213
- ): void {
214
- console.log('\n' + '='.repeat(60));
215
- console.log('BENCHMARK SUMMARY');
216
- console.log('='.repeat(60));
217
-
218
- const wins: Record<ConfigurationType, number> = {
219
- single: 0,
220
- subagent: 0,
221
- swarm: 0,
222
- };
223
-
224
- for (const result of results) {
225
- wins[result.winner]++;
226
- }
227
-
228
- console.log('\nWins by configuration:');
229
- for (const [config, count] of Object.entries(wins)) {
230
- const bar = '█'.repeat(count) + '░'.repeat(results.length - count);
231
- console.log(` ${config.padEnd(10)} ${bar} ${count}/${results.length}`);
232
- }
233
-
234
- const overallWinner = (Object.entries(wins) as [ConfigurationType, number][])
235
- .sort((a, b) => b[1] - a[1])[0][0];
236
-
237
- console.log(`\nOverall winner: ${overallWinner.toUpperCase()}`);
238
- }
239
-
240
- program.parse();
@@ -1,170 +0,0 @@
1
- /**
2
- * Harbor Integration
3
- *
4
- * Entry points for Harbor benchmark framework integration.
5
- * https://github.com/laude-institute/harbor
6
- */
7
-
8
- import type {
9
- Task,
10
- TaskComplexity,
11
- ConfigurationType,
12
- HarborTaskInput,
13
- HarborEvaluationOutput,
14
- BenchmarkConfig,
15
- } from './types.js';
16
- import { ComparisonBenchmark } from './benchmark.js';
17
-
18
- const BENCHMARK_VERSION = '1.0.0';
19
-
20
- /**
21
- * Convert Harbor task input to internal Task format
22
- */
23
- function convertHarborTask(input: HarborTaskInput): Task {
24
- return {
25
- id: input.id,
26
- description: input.description,
27
- files: input.files || [],
28
- expectedOutcome: input.success_criteria || 'Task completed successfully',
29
- complexity: (input.complexity as TaskComplexity) || 'medium',
30
- timeoutMs: 300000, // 5 minute default
31
- tags: [],
32
- };
33
- }
34
-
35
- /**
36
- * Main Harbor evaluation entry point
37
- *
38
- * This function is called by Harbor to evaluate a task across all configurations.
39
- *
40
- * @example Harbor dataset format:
41
- * ```yaml
42
- * tasks:
43
- * - id: refactor-auth
44
- * description: "Refactor authentication to use JWT"
45
- * files:
46
- * - src/auth/session.ts
47
- * - src/auth/middleware.ts
48
- * success_criteria: "All tests pass, JWT tokens used"
49
- * complexity: medium
50
- * ```
51
- *
52
- * @example Running with Harbor:
53
- * ```bash
54
- * harbor run \
55
- * --dataset tasks.yaml \
56
- * --agent @agent-relay/benchmark/harbor \
57
- * --parallel 10
58
- * ```
59
- */
60
- export async function evaluate(
61
- input: HarborTaskInput
62
- ): Promise<HarborEvaluationOutput> {
63
- const startedAt = Date.now();
64
- const task = convertHarborTask(input);
65
-
66
- const benchmark = new ComparisonBenchmark({
67
- configurations: ['single', 'subagent', 'swarm'],
68
- cli: 'claude',
69
- quiet: true, // Suppress output in Harbor runs
70
- cooldownMs: 2000,
71
- });
72
-
73
- const comparison = await benchmark.runComparison(task);
74
-
75
- const completedAt = Date.now();
76
-
77
- return {
78
- task_id: task.id,
79
- configurations: Object.fromEntries(comparison.results) as Record<
80
- ConfigurationType,
81
- any
82
- >,
83
- winner: comparison.winner,
84
- scores: Object.fromEntries(comparison.scores) as Record<
85
- ConfigurationType,
86
- any
87
- >,
88
- metadata: {
89
- benchmark_version: BENCHMARK_VERSION,
90
- started_at: startedAt,
91
- completed_at: completedAt,
92
- total_duration_ms: completedAt - startedAt,
93
- },
94
- };
95
- }
96
-
97
- /**
98
- * Run a single configuration (for targeted Harbor evaluations)
99
- *
100
- * @example Running single config with Harbor:
101
- * ```bash
102
- * harbor run \
103
- * --dataset tasks.yaml \
104
- * --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
105
- * --env-var CONFIG=swarm
106
- * ```
107
- */
108
- export async function evaluateSingle(
109
- input: HarborTaskInput & { config?: ConfigurationType }
110
- ): Promise<Record<string, unknown>> {
111
- const config = input.config || 'single';
112
- const task = convertHarborTask(input);
113
-
114
- const benchmark = new ComparisonBenchmark({
115
- configurations: [config],
116
- cli: 'claude',
117
- quiet: true,
118
- cooldownMs: 0,
119
- });
120
-
121
- const result = await benchmark.runSingle(task, config);
122
-
123
- return {
124
- task_id: task.id,
125
- configuration: config,
126
- result,
127
- success: result.success,
128
- };
129
- }
130
-
131
- /**
132
- * Evaluate with custom configuration
133
- */
134
- export async function evaluateCustom(
135
- input: HarborTaskInput,
136
- config: Partial<BenchmarkConfig>
137
- ): Promise<HarborEvaluationOutput> {
138
- const startedAt = Date.now();
139
- const task = convertHarborTask(input);
140
-
141
- const benchmark = new ComparisonBenchmark({
142
- ...config,
143
- quiet: true,
144
- });
145
-
146
- const comparison = await benchmark.runComparison(task);
147
- const completedAt = Date.now();
148
-
149
- return {
150
- task_id: task.id,
151
- configurations: Object.fromEntries(comparison.results) as Record<
152
- ConfigurationType,
153
- any
154
- >,
155
- winner: comparison.winner,
156
- scores: Object.fromEntries(comparison.scores) as Record<
157
- ConfigurationType,
158
- any
159
- >,
160
- metadata: {
161
- benchmark_version: BENCHMARK_VERSION,
162
- started_at: startedAt,
163
- completed_at: completedAt,
164
- total_duration_ms: completedAt - startedAt,
165
- },
166
- };
167
- }
168
-
169
- // Default export for Harbor
170
- export default evaluate;
@@ -1,73 +0,0 @@
1
- /**
2
- * @agent-relay/benchmark
3
- *
4
- * Performance benchmarking for agent swarms, sub-agents, and single agents.
5
- *
6
- * ## Quick Start
7
- *
8
- * ```typescript
9
- * import { ComparisonBenchmark, type Task } from '@agent-relay/benchmark';
10
- *
11
- * const task: Task = {
12
- * id: 'refactor-auth',
13
- * description: 'Refactor authentication to use JWT',
14
- * files: ['src/auth/session.ts', 'src/auth/middleware.ts'],
15
- * expectedOutcome: 'All tests pass, JWT tokens used',
16
- * complexity: 'medium',
17
- * };
18
- *
19
- * const benchmark = new ComparisonBenchmark();
20
- * const comparison = await benchmark.runComparison(task);
21
- *
22
- * console.log(`Winner: ${comparison.winner}`);
23
- * benchmark.printComparison(comparison);
24
- * ```
25
- *
26
- * ## With Harbor
27
- *
28
- * ```bash
29
- * harbor run \
30
- * --dataset tasks.yaml \
31
- * --agent @agent-relay/benchmark/harbor \
32
- * --parallel 10
33
- * ```
34
- *
35
- * ## CLI Usage
36
- *
37
- * ```bash
38
- * relay-benchmark run --dataset tasks.yaml --config all
39
- * relay-benchmark run --dataset tasks.yaml --config swarm
40
- * relay-benchmark list tasks.yaml
41
- * ```
42
- */
43
-
44
- // Types
45
- export type {
46
- ConfigurationType,
47
- TaskComplexity,
48
- Task,
49
- RunResult,
50
- ComparisonResult,
51
- ScoreBreakdown,
52
- BenchmarkConfig,
53
- RunMetrics,
54
- TaskDataset,
55
- HarborTaskInput,
56
- HarborEvaluationOutput,
57
- } from './types.js';
58
-
59
- export { DEFAULT_BENCHMARK_CONFIG } from './types.js';
60
-
61
- // Main benchmark class
62
- export { ComparisonBenchmark, runComparison } from './benchmark.js';
63
-
64
- // Runners
65
- export {
66
- ConfigurationRunner,
67
- SingleAgentRunner,
68
- SubAgentRunner,
69
- SwarmRunner,
70
- } from './runners/index.js';
71
-
72
- // Harbor integration
73
- export { evaluate, evaluateSingle, evaluateCustom } from './harbor.js';
@@ -1,205 +0,0 @@
1
- /**
2
- * Base Configuration Runner
3
- *
4
- * Abstract base class for benchmark configuration runners.
5
- */
6
-
7
- import {
8
- createRelay,
9
- RelayClient,
10
- type MetricsResponsePayload,
11
- type Relay,
12
- } from '@agent-relay/sdk';
13
- import type {
14
- ConfigurationType,
15
- Task,
16
- RunResult,
17
- RunMetrics,
18
- BenchmarkConfig,
19
- } from '../types.js';
20
- import { DEFAULT_BENCHMARK_CONFIG } from '../types.js';
21
-
22
- type AgentMetrics = MetricsResponsePayload['agents'][number] & {
23
- tokens?: number;
24
- memoryMb?: number;
25
- };
26
-
27
- /**
28
- * Abstract base class for configuration runners
29
- */
30
- export abstract class ConfigurationRunner {
31
- protected relay!: Relay;
32
- protected orchestrator!: RelayClient;
33
- protected config: BenchmarkConfig;
34
- protected metrics: RunMetrics = {
35
- messages: 0,
36
- latencies: [],
37
- startTime: 0,
38
- spawnedAgents: [],
39
- errors: [],
40
- };
41
-
42
- constructor(config: Partial<BenchmarkConfig> = {}) {
43
- this.config = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
44
- }
45
-
46
- /**
47
- * Get the configuration type this runner handles
48
- */
49
- abstract get configurationType(): ConfigurationType;
50
-
51
- /**
52
- * Set up the relay and orchestrator client
53
- */
54
- async setup(): Promise<void> {
55
- this.relay = await createRelay({
56
- socketPath: this.config.socketPath,
57
- quiet: this.config.quiet,
58
- spawnManager: true,
59
- });
60
- this.orchestrator = await this.relay.client('Orchestrator', {
61
- quiet: this.config.quiet,
62
- });
63
- this.resetMetrics();
64
- }
65
-
66
- /**
67
- * Run a task and return the result
68
- */
69
- abstract run(task: Task): Promise<RunResult>;
70
-
71
- /**
72
- * Clean up resources
73
- */
74
- async teardown(): Promise<void> {
75
- // Release any remaining agents
76
- for (const agent of this.metrics.spawnedAgents) {
77
- try {
78
- await this.orchestrator.release(agent);
79
- } catch {
80
- // Ignore release errors during cleanup
81
- }
82
- }
83
-
84
- await this.relay.stop();
85
- }
86
-
87
- /**
88
- * Reset metrics for a new run
89
- */
90
- protected resetMetrics(): void {
91
- this.metrics = {
92
- messages: 0,
93
- latencies: [],
94
- startTime: Date.now(),
95
- spawnedAgents: [],
96
- errors: [],
97
- };
98
- }
99
-
100
- /**
101
- * Calculate percentile from an array of values
102
- */
103
- protected percentile(arr: number[], p: number): number {
104
- if (arr.length === 0) return 0;
105
- const sorted = [...arr].sort((a, b) => a - b);
106
- const idx = Math.ceil((p / 100) * sorted.length) - 1;
107
- return sorted[Math.max(0, idx)];
108
- }
109
-
110
- /**
111
- * Extract total tokens from metrics response
112
- */
113
- protected extractTokens(metrics: MetricsResponsePayload): number {
114
- const agents = metrics.agents as AgentMetrics[] | undefined;
115
- return (
116
- agents?.reduce((sum, agent) => sum + (agent.tokens || 0), 0) || 0
117
- );
118
- }
119
-
120
- /**
121
- * Extract peak memory from metrics response
122
- */
123
- protected extractMemory(metrics: MetricsResponsePayload): number {
124
- const agents = metrics.agents as AgentMetrics[] | undefined;
125
- const memoryValues = agents?.map((agent) => {
126
- if (agent.memoryMb != null) return agent.memoryMb;
127
- if (agent.rssBytes != null) return agent.rssBytes / 1024 / 1024;
128
- return 0;
129
- });
130
- return Math.max(...(memoryValues || [0]));
131
- }
132
-
133
- /**
134
- * Build a failed result when setup fails
135
- */
136
- protected buildFailedResult(
137
- task: Task,
138
- startTime: number,
139
- errors: string[]
140
- ): RunResult {
141
- const now = Date.now();
142
- return {
143
- taskId: task.id,
144
- configuration: this.configurationType,
145
- totalTimeMs: now - startTime,
146
- timeToFirstActionMs: 0,
147
- messageCount: 0,
148
- avgLatencyMs: 0,
149
- latencyP50Ms: 0,
150
- latencyP99Ms: 0,
151
- coordinationRounds: 0,
152
- agentCount: 0,
153
- totalTokensUsed: 0,
154
- peakMemoryMb: 0,
155
- success: false,
156
- completionRate: 0,
157
- errors,
158
- startedAt: startTime,
159
- completedAt: now,
160
- };
161
- }
162
-
163
- /**
164
- * Wait for an agent to complete their task
165
- */
166
- protected waitForCompletion(
167
- agentName: string,
168
- task: Task,
169
- donePrefix = 'DONE:'
170
- ): Promise<boolean> {
171
- const timeoutMs = task.timeoutMs || 300000;
172
-
173
- return new Promise((resolve) => {
174
- const timeout = setTimeout(() => {
175
- this.metrics.errors.push(`Timeout waiting for ${agentName}`);
176
- resolve(false);
177
- }, timeoutMs);
178
-
179
- const originalHandler = this.orchestrator.onMessage;
180
- this.orchestrator.onMessage = (from, payload, id, meta, originalTo) => {
181
- // Call original handler if exists
182
- if (originalHandler) {
183
- originalHandler(from, payload, id, meta, originalTo);
184
- }
185
-
186
- if (from === agentName && payload.body.startsWith(donePrefix)) {
187
- clearTimeout(timeout);
188
- resolve(true);
189
- }
190
- };
191
- });
192
- }
193
-
194
- /**
195
- * Log a message if not in quiet mode
196
- */
197
- protected log(message: string): void {
198
- if (!this.config.quiet) {
199
- console.log(`[${this.configurationType}] ${message}`);
200
- }
201
- }
202
- }
203
-
204
- // Re-export the DEFAULT_BENCHMARK_CONFIG
205
- export { DEFAULT_BENCHMARK_CONFIG } from '../types.js';
@@ -1,10 +0,0 @@
1
- /**
2
- * Benchmark Runners
3
- *
4
- * Export all configuration runners.
5
- */
6
-
7
- export { ConfigurationRunner, DEFAULT_BENCHMARK_CONFIG } from './base.js';
8
- export { SingleAgentRunner } from './single.js';
9
- export { SubAgentRunner } from './subagent.js';
10
- export { SwarmRunner } from './swarm.js';