agent-relay 2.3.2 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. package/README.md +1 -1
  2. package/dist/index.cjs +1 -1
  3. package/dist/src/cli/index.js +124 -7
  4. package/dist/src/cli/index.js.map +1 -1
  5. package/package.json +20 -26
  6. package/packages/acp-bridge/package.json +2 -2
  7. package/packages/bridge/package.json +7 -7
  8. package/packages/config/dist/cloud-config.d.ts +1 -1
  9. package/packages/config/dist/cloud-config.d.ts.map +1 -1
  10. package/packages/config/dist/cloud-config.js.map +1 -1
  11. package/packages/config/dist/schemas.d.ts +5 -5
  12. package/packages/config/dist/schemas.js +1 -1
  13. package/packages/config/dist/schemas.js.map +1 -1
  14. package/packages/config/package.json +2 -2
  15. package/packages/config/src/cloud-config.ts +2 -2
  16. package/packages/config/src/schemas.test.ts +48 -0
  17. package/packages/config/src/schemas.ts +1 -1
  18. package/packages/continuity/package.json +2 -2
  19. package/packages/daemon/package.json +12 -12
  20. package/packages/hooks/package.json +4 -4
  21. package/packages/mcp/package.json +5 -5
  22. package/packages/memory/package.json +2 -2
  23. package/packages/policy/package.json +2 -2
  24. package/packages/protocol/package.json +1 -1
  25. package/packages/resiliency/package.json +1 -1
  26. package/packages/sdk/dist/index.d.ts +1 -29
  27. package/packages/sdk/dist/index.d.ts.map +1 -1
  28. package/packages/sdk/dist/index.js +1 -38
  29. package/packages/sdk/dist/index.js.map +1 -1
  30. package/packages/sdk/package.json +4 -25
  31. package/packages/sdk/src/index.ts +1 -69
  32. package/packages/sdk-py/README.md +56 -0
  33. package/packages/sdk-py/pyproject.toml +23 -0
  34. package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
  35. package/packages/sdk-py/src/agent_relay/builder.py +367 -0
  36. package/packages/sdk-py/src/agent_relay/types.py +92 -0
  37. package/packages/sdk-py/tests/__init__.py +0 -0
  38. package/packages/sdk-py/tests/test_builder.py +101 -0
  39. package/packages/sdk-ts/dist/__tests__/facade.test.d.ts +2 -0
  40. package/packages/sdk-ts/dist/__tests__/facade.test.d.ts.map +1 -0
  41. package/packages/sdk-ts/dist/__tests__/facade.test.js +257 -0
  42. package/packages/sdk-ts/dist/__tests__/facade.test.js.map +1 -0
  43. package/packages/sdk-ts/dist/__tests__/unit.test.d.ts +2 -0
  44. package/packages/sdk-ts/dist/__tests__/unit.test.d.ts.map +1 -0
  45. package/packages/sdk-ts/dist/__tests__/unit.test.js +124 -0
  46. package/packages/sdk-ts/dist/__tests__/unit.test.js.map +1 -0
  47. package/packages/sdk-ts/dist/client.d.ts +2 -0
  48. package/packages/sdk-ts/dist/client.d.ts.map +1 -1
  49. package/packages/sdk-ts/dist/client.js +2 -0
  50. package/packages/sdk-ts/dist/client.js.map +1 -1
  51. package/packages/sdk-ts/dist/index.d.ts +1 -0
  52. package/packages/sdk-ts/dist/index.d.ts.map +1 -1
  53. package/packages/sdk-ts/dist/index.js +1 -0
  54. package/packages/sdk-ts/dist/index.js.map +1 -1
  55. package/packages/sdk-ts/dist/protocol.d.ts +1 -0
  56. package/packages/sdk-ts/dist/protocol.d.ts.map +1 -1
  57. package/packages/sdk-ts/dist/relay.d.ts +44 -0
  58. package/packages/sdk-ts/dist/relay.d.ts.map +1 -1
  59. package/packages/sdk-ts/dist/relay.js +89 -11
  60. package/packages/sdk-ts/dist/relay.js.map +1 -1
  61. package/packages/sdk-ts/dist/relaycast.js +2 -2
  62. package/packages/sdk-ts/dist/relaycast.js.map +1 -1
  63. package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
  64. package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
  65. package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
  66. package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
  67. package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
  68. package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
  69. package/packages/sdk-ts/dist/workflows/builder.js +179 -0
  70. package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
  71. package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
  72. package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
  73. package/packages/sdk-ts/dist/workflows/cli.js +82 -0
  74. package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
  75. package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
  76. package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
  77. package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
  78. package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
  79. package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
  80. package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
  81. package/packages/sdk-ts/dist/workflows/index.js +10 -0
  82. package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
  83. package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
  84. package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
  85. package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
  86. package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
  87. package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
  88. package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
  89. package/packages/sdk-ts/dist/workflows/run.js +24 -0
  90. package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
  91. package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
  92. package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
  93. package/packages/sdk-ts/dist/workflows/runner.js +650 -0
  94. package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
  95. package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
  96. package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
  97. package/packages/sdk-ts/dist/workflows/state.js +140 -0
  98. package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
  99. package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
  100. package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
  101. package/packages/sdk-ts/dist/workflows/templates.js +395 -0
  102. package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
  103. package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
  104. package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
  105. package/packages/sdk-ts/dist/workflows/types.js +8 -0
  106. package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
  107. package/packages/sdk-ts/package.json +9 -3
  108. package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
  109. package/packages/sdk-ts/src/__tests__/facade.test.ts +296 -0
  110. package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
  111. package/packages/sdk-ts/src/__tests__/unit.test.ts +152 -0
  112. package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
  113. package/packages/sdk-ts/src/client.ts +4 -0
  114. package/packages/sdk-ts/src/index.ts +1 -0
  115. package/packages/sdk-ts/src/protocol.ts +1 -1
  116. package/packages/sdk-ts/src/relay.ts +112 -11
  117. package/packages/sdk-ts/src/relaycast.ts +2 -2
  118. package/packages/sdk-ts/src/workflows/README.md +450 -0
  119. package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
  120. package/packages/sdk-ts/src/workflows/builder.ts +241 -0
  121. package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
  122. package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
  123. package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
  124. package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
  125. package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
  126. package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
  127. package/packages/sdk-ts/src/workflows/cli.ts +93 -0
  128. package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
  129. package/packages/sdk-ts/src/workflows/index.ts +9 -0
  130. package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
  131. package/packages/sdk-ts/src/workflows/run.ts +47 -0
  132. package/packages/sdk-ts/src/workflows/runner.ts +873 -0
  133. package/packages/sdk-ts/src/workflows/schema.json +321 -0
  134. package/packages/sdk-ts/src/workflows/state.ts +279 -0
  135. package/packages/sdk-ts/src/workflows/templates.ts +544 -0
  136. package/packages/sdk-ts/src/workflows/types.ts +178 -0
  137. package/packages/sdk-ts/tsconfig.json +6 -1
  138. package/packages/spawner/package.json +1 -1
  139. package/packages/state/package.json +1 -1
  140. package/packages/storage/package.json +2 -2
  141. package/packages/telemetry/package.json +1 -1
  142. package/packages/trajectory/package.json +2 -2
  143. package/packages/user-directory/package.json +2 -2
  144. package/packages/utils/package.json +3 -3
  145. package/packages/wrapper/package.json +5 -6
  146. package/scripts/postinstall.js +106 -2
  147. package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
  148. package/packages/api-types/.trajectories/index.json +0 -12
  149. package/packages/api-types/dist/index.d.ts +0 -21
  150. package/packages/api-types/dist/index.d.ts.map +0 -1
  151. package/packages/api-types/dist/index.js +0 -22
  152. package/packages/api-types/dist/index.js.map +0 -1
  153. package/packages/api-types/dist/schemas/agent.d.ts +0 -259
  154. package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
  155. package/packages/api-types/dist/schemas/agent.js +0 -102
  156. package/packages/api-types/dist/schemas/agent.js.map +0 -1
  157. package/packages/api-types/dist/schemas/api.d.ts +0 -290
  158. package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
  159. package/packages/api-types/dist/schemas/api.js +0 -162
  160. package/packages/api-types/dist/schemas/api.js.map +0 -1
  161. package/packages/api-types/dist/schemas/decision.d.ts +0 -230
  162. package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
  163. package/packages/api-types/dist/schemas/decision.js +0 -104
  164. package/packages/api-types/dist/schemas/decision.js.map +0 -1
  165. package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
  166. package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
  167. package/packages/api-types/dist/schemas/fleet.js +0 -71
  168. package/packages/api-types/dist/schemas/fleet.js.map +0 -1
  169. package/packages/api-types/dist/schemas/history.d.ts +0 -180
  170. package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
  171. package/packages/api-types/dist/schemas/history.js +0 -72
  172. package/packages/api-types/dist/schemas/history.js.map +0 -1
  173. package/packages/api-types/dist/schemas/index.d.ts +0 -14
  174. package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
  175. package/packages/api-types/dist/schemas/index.js +0 -22
  176. package/packages/api-types/dist/schemas/index.js.map +0 -1
  177. package/packages/api-types/dist/schemas/message.d.ts +0 -456
  178. package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
  179. package/packages/api-types/dist/schemas/message.js +0 -88
  180. package/packages/api-types/dist/schemas/message.js.map +0 -1
  181. package/packages/api-types/dist/schemas/session.d.ts +0 -60
  182. package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
  183. package/packages/api-types/dist/schemas/session.js +0 -36
  184. package/packages/api-types/dist/schemas/session.js.map +0 -1
  185. package/packages/api-types/dist/schemas/task.d.ts +0 -111
  186. package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
  187. package/packages/api-types/dist/schemas/task.js +0 -64
  188. package/packages/api-types/dist/schemas/task.js.map +0 -1
  189. package/packages/api-types/package.json +0 -61
  190. package/packages/api-types/scripts/generate-openapi.ts +0 -106
  191. package/packages/api-types/src/index.ts +0 -22
  192. package/packages/api-types/src/schemas/agent.test.ts +0 -164
  193. package/packages/api-types/src/schemas/agent.ts +0 -110
  194. package/packages/api-types/src/schemas/api.test.ts +0 -372
  195. package/packages/api-types/src/schemas/api.ts +0 -194
  196. package/packages/api-types/src/schemas/decision.test.ts +0 -324
  197. package/packages/api-types/src/schemas/decision.ts +0 -136
  198. package/packages/api-types/src/schemas/fleet.test.ts +0 -212
  199. package/packages/api-types/src/schemas/fleet.ts +0 -83
  200. package/packages/api-types/src/schemas/history.test.ts +0 -242
  201. package/packages/api-types/src/schemas/history.ts +0 -84
  202. package/packages/api-types/src/schemas/index.ts +0 -148
  203. package/packages/api-types/src/schemas/message.test.ts +0 -192
  204. package/packages/api-types/src/schemas/message.ts +0 -98
  205. package/packages/api-types/src/schemas/session.test.ts +0 -104
  206. package/packages/api-types/src/schemas/session.ts +0 -40
  207. package/packages/api-types/src/schemas/task.test.ts +0 -192
  208. package/packages/api-types/src/schemas/task.ts +0 -78
  209. package/packages/api-types/tsconfig.json +0 -19
  210. package/packages/api-types/vitest.config.ts +0 -9
  211. package/packages/benchmark/README.md +0 -200
  212. package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
  213. package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
  214. package/packages/benchmark/datasets/quick-test.yaml +0 -20
  215. package/packages/benchmark/dist/benchmark.d.ts +0 -47
  216. package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
  217. package/packages/benchmark/dist/benchmark.js +0 -224
  218. package/packages/benchmark/dist/benchmark.js.map +0 -1
  219. package/packages/benchmark/dist/cli.d.ts +0 -8
  220. package/packages/benchmark/dist/cli.d.ts.map +0 -1
  221. package/packages/benchmark/dist/cli.js +0 -185
  222. package/packages/benchmark/dist/cli.js.map +0 -1
  223. package/packages/benchmark/dist/harbor.d.ts +0 -53
  224. package/packages/benchmark/dist/harbor.d.ts.map +0 -1
  225. package/packages/benchmark/dist/harbor.js +0 -127
  226. package/packages/benchmark/dist/harbor.js.map +0 -1
  227. package/packages/benchmark/dist/index.d.ts +0 -48
  228. package/packages/benchmark/dist/index.d.ts.map +0 -1
  229. package/packages/benchmark/dist/index.js +0 -50
  230. package/packages/benchmark/dist/index.js.map +0 -1
  231. package/packages/benchmark/dist/runners/base.d.ts +0 -63
  232. package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
  233. package/packages/benchmark/dist/runners/base.js +0 -156
  234. package/packages/benchmark/dist/runners/base.js.map +0 -1
  235. package/packages/benchmark/dist/runners/index.d.ts +0 -10
  236. package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
  237. package/packages/benchmark/dist/runners/index.js +0 -10
  238. package/packages/benchmark/dist/runners/index.js.map +0 -1
  239. package/packages/benchmark/dist/runners/single.d.ts +0 -19
  240. package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
  241. package/packages/benchmark/dist/runners/single.js +0 -111
  242. package/packages/benchmark/dist/runners/single.js.map +0 -1
  243. package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
  244. package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
  245. package/packages/benchmark/dist/runners/subagent.js +0 -212
  246. package/packages/benchmark/dist/runners/subagent.js.map +0 -1
  247. package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
  248. package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
  249. package/packages/benchmark/dist/runners/swarm.js +0 -273
  250. package/packages/benchmark/dist/runners/swarm.js.map +0 -1
  251. package/packages/benchmark/dist/types.d.ts +0 -178
  252. package/packages/benchmark/dist/types.d.ts.map +0 -1
  253. package/packages/benchmark/dist/types.js +0 -16
  254. package/packages/benchmark/dist/types.js.map +0 -1
  255. package/packages/benchmark/package.json +0 -80
  256. package/packages/benchmark/src/benchmark.ts +0 -298
  257. package/packages/benchmark/src/cli.ts +0 -240
  258. package/packages/benchmark/src/harbor.ts +0 -170
  259. package/packages/benchmark/src/index.ts +0 -73
  260. package/packages/benchmark/src/runners/base.ts +0 -205
  261. package/packages/benchmark/src/runners/index.ts +0 -10
  262. package/packages/benchmark/src/runners/single.ts +0 -121
  263. package/packages/benchmark/src/runners/subagent.ts +0 -240
  264. package/packages/benchmark/src/runners/swarm.ts +0 -326
  265. package/packages/benchmark/src/types.ts +0 -205
  266. package/packages/benchmark/tsconfig.json +0 -20
  267. package/packages/cli-tester/README.md +0 -277
  268. package/packages/cli-tester/dist/index.d.ts +0 -21
  269. package/packages/cli-tester/dist/index.d.ts.map +0 -1
  270. package/packages/cli-tester/dist/index.js +0 -21
  271. package/packages/cli-tester/dist/index.js.map +0 -1
  272. package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
  273. package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
  274. package/packages/cli-tester/dist/utils/credential-check.js +0 -230
  275. package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
  276. package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
  277. package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
  278. package/packages/cli-tester/dist/utils/socket-client.js +0 -153
  279. package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
  280. package/packages/cli-tester/docker/Dockerfile +0 -61
  281. package/packages/cli-tester/docker/docker-compose.yml +0 -71
  282. package/packages/cli-tester/docker/entrypoint.sh +0 -58
  283. package/packages/cli-tester/package.json +0 -32
  284. package/packages/cli-tester/scripts/clear-auth.sh +0 -101
  285. package/packages/cli-tester/scripts/inject-message.sh +0 -42
  286. package/packages/cli-tester/scripts/start.sh +0 -71
  287. package/packages/cli-tester/scripts/test-cli.sh +0 -56
  288. package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
  289. package/packages/cli-tester/scripts/test-registration.sh +0 -182
  290. package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
  291. package/packages/cli-tester/scripts/test-spawn.sh +0 -140
  292. package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
  293. package/packages/cli-tester/scripts/verify-auth.sh +0 -112
  294. package/packages/cli-tester/src/index.ts +0 -40
  295. package/packages/cli-tester/src/utils/credential-check.ts +0 -284
  296. package/packages/cli-tester/src/utils/socket-client.ts +0 -211
  297. package/packages/cli-tester/tests/credential-check.test.ts +0 -56
  298. package/packages/cli-tester/tsconfig.json +0 -11
  299. package/packages/sdk/dist/browser-client.d.ts +0 -212
  300. package/packages/sdk/dist/browser-client.d.ts.map +0 -1
  301. package/packages/sdk/dist/browser-client.js +0 -750
  302. package/packages/sdk/dist/browser-client.js.map +0 -1
  303. package/packages/sdk/dist/browser-framing.d.ts +0 -46
  304. package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
  305. package/packages/sdk/dist/browser-framing.js +0 -122
  306. package/packages/sdk/dist/browser-framing.js.map +0 -1
  307. package/packages/sdk/dist/standalone.d.ts +0 -89
  308. package/packages/sdk/dist/standalone.d.ts.map +0 -1
  309. package/packages/sdk/dist/standalone.js +0 -131
  310. package/packages/sdk/dist/standalone.js.map +0 -1
  311. package/packages/sdk/dist/transports/index.d.ts +0 -92
  312. package/packages/sdk/dist/transports/index.d.ts.map +0 -1
  313. package/packages/sdk/dist/transports/index.js +0 -129
  314. package/packages/sdk/dist/transports/index.js.map +0 -1
  315. package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
  316. package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
  317. package/packages/sdk/dist/transports/socket-transport.js +0 -94
  318. package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
  319. package/packages/sdk/dist/transports/types.d.ts +0 -69
  320. package/packages/sdk/dist/transports/types.d.ts.map +0 -1
  321. package/packages/sdk/dist/transports/types.js +0 -10
  322. package/packages/sdk/dist/transports/types.js.map +0 -1
  323. package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
  324. package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
  325. package/packages/sdk/dist/transports/websocket-transport.js +0 -180
  326. package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
  327. package/packages/sdk/src/browser-client.ts +0 -985
  328. package/packages/sdk/src/browser-framing.test.ts +0 -115
  329. package/packages/sdk/src/browser-framing.ts +0 -150
  330. package/packages/sdk/src/standalone.ts +0 -183
  331. package/packages/sdk/src/transports/index.ts +0 -197
  332. package/packages/sdk/src/transports/socket-transport.ts +0 -115
  333. package/packages/sdk/src/transports/types.ts +0 -77
  334. package/packages/sdk/src/transports/websocket-transport.ts +0 -245
@@ -1,224 +0,0 @@
1
- /**
2
- * Comparison Benchmark
3
- *
4
- * Main orchestrator for running comparison benchmarks across configurations.
5
- */
6
- import { DEFAULT_BENCHMARK_CONFIG } from './types.js';
7
- import { SingleAgentRunner, SubAgentRunner, SwarmRunner, } from './runners/index.js';
8
- /**
9
- * Main benchmark orchestrator
10
- */
11
- export class ComparisonBenchmark {
12
- config;
13
- runners;
14
- constructor(config = {}) {
15
- this.config = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
16
- // Initialize runners for configured configurations
17
- this.runners = new Map();
18
- for (const configType of this.config.configurations) {
19
- this.runners.set(configType, this.createRunner(configType));
20
- }
21
- }
22
- /**
23
- * Create a runner for a configuration type
24
- */
25
- createRunner(type) {
26
- switch (type) {
27
- case 'single':
28
- return new SingleAgentRunner(this.config);
29
- case 'subagent':
30
- return new SubAgentRunner(this.config);
31
- case 'swarm':
32
- return new SwarmRunner(this.config);
33
- default:
34
- throw new Error(`Unknown configuration type: ${type}`);
35
- }
36
- }
37
- /**
38
- * Run a comparison across all configured configurations
39
- */
40
- async runComparison(task) {
41
- const results = new Map();
42
- const scores = new Map();
43
- for (const [configType, runner] of this.runners) {
44
- if (!this.config.quiet) {
45
- console.log(`\n=== Running ${configType} configuration ===`);
46
- console.log(`Task: ${task.id}`);
47
- }
48
- try {
49
- await runner.setup();
50
- const result = await runner.run(task);
51
- await runner.teardown();
52
- results.set(configType, result);
53
- scores.set(configType, this.calculateScore(result));
54
- if (!this.config.quiet) {
55
- this.printRunResult(result);
56
- }
57
- }
58
- catch (err) {
59
- console.error(`Error running ${configType}:`, err.message);
60
- // Create failed result
61
- const failedResult = {
62
- taskId: task.id,
63
- configuration: configType,
64
- totalTimeMs: 0,
65
- timeToFirstActionMs: 0,
66
- messageCount: 0,
67
- avgLatencyMs: 0,
68
- latencyP50Ms: 0,
69
- latencyP99Ms: 0,
70
- coordinationRounds: 0,
71
- agentCount: 0,
72
- totalTokensUsed: 0,
73
- peakMemoryMb: 0,
74
- success: false,
75
- completionRate: 0,
76
- errors: [err.message],
77
- startedAt: Date.now(),
78
- completedAt: Date.now(),
79
- };
80
- results.set(configType, failedResult);
81
- scores.set(configType, { total: 0, successScore: 0, timeScore: 0, efficiencyScore: 0 });
82
- }
83
- // Cool-down between runs
84
- if (this.config.cooldownMs > 0) {
85
- await new Promise((r) => setTimeout(r, this.config.cooldownMs));
86
- }
87
- }
88
- const winner = this.determineWinner(results, scores);
89
- return {
90
- taskId: task.id,
91
- results,
92
- winner,
93
- scores,
94
- };
95
- }
96
- /**
97
- * Run a single configuration
98
- */
99
- async runSingle(task, configType) {
100
- const runner = this.runners.get(configType);
101
- if (!runner) {
102
- throw new Error(`Configuration ${configType} not enabled`);
103
- }
104
- await runner.setup();
105
- const result = await runner.run(task);
106
- await runner.teardown();
107
- return result;
108
- }
109
- /**
110
- * Calculate score breakdown for a result
111
- */
112
- calculateScore(result) {
113
- const maxTimeMs = 300000; // 5 minutes baseline
114
- // Success component (0-50 points)
115
- const successScore = result.success ? 50 : result.completionRate * 25;
116
- // Time component (0-30 points) - faster is better
117
- const timeScore = result.success
118
- ? 30 * Math.max(0, 1 - result.totalTimeMs / maxTimeMs)
119
- : 0;
120
- // Efficiency component (0-20 points) - fewer agents is better for same result
121
- const efficiencyScore = result.success
122
- ? 20 / Math.max(1, result.agentCount)
123
- : 0;
124
- return {
125
- total: successScore + timeScore + efficiencyScore,
126
- successScore,
127
- timeScore,
128
- efficiencyScore,
129
- };
130
- }
131
- /**
132
- * Determine the winning configuration
133
- */
134
- determineWinner(results, scores) {
135
- let best = 'single';
136
- let bestScore = -1;
137
- for (const [configType, score] of scores) {
138
- if (score.total > bestScore) {
139
- bestScore = score.total;
140
- best = configType;
141
- }
142
- }
143
- return best;
144
- }
145
- /**
146
- * Print a single run result
147
- */
148
- printRunResult(result) {
149
- console.log(`\nResult for ${result.configuration}:`);
150
- console.log(` Success: ${result.success ? '✓' : '✗'}`);
151
- console.log(` Time: ${(result.totalTimeMs / 1000).toFixed(1)}s`);
152
- console.log(` Agents: ${result.agentCount}`);
153
- console.log(` Messages: ${result.messageCount}`);
154
- if (result.errors.length > 0) {
155
- console.log(` Errors: ${result.errors.join(', ')}`);
156
- }
157
- }
158
- /**
159
- * Print comparison table
160
- */
161
- printComparison(comparison) {
162
- console.log('\n' + '='.repeat(60));
163
- console.log('COMPARISON RESULTS');
164
- console.log('='.repeat(60));
165
- console.log(`Task: ${comparison.taskId}`);
166
- console.log(`Winner: ${comparison.winner.toUpperCase()}`);
167
- console.log('');
168
- // Build table data
169
- const configs = Array.from(comparison.results.keys());
170
- const headers = ['Metric', ...configs.map((c) => c.charAt(0).toUpperCase() + c.slice(1))];
171
- const rows = [
172
- [
173
- 'Success',
174
- ...configs.map((c) => comparison.results.get(c)?.success ? '✓' : '✗'),
175
- ],
176
- [
177
- 'Time (s)',
178
- ...configs.map((c) => ((comparison.results.get(c)?.totalTimeMs || 0) / 1000).toFixed(1)),
179
- ],
180
- [
181
- 'Agents',
182
- ...configs.map((c) => String(comparison.results.get(c)?.agentCount || 0)),
183
- ],
184
- [
185
- 'Messages',
186
- ...configs.map((c) => String(comparison.results.get(c)?.messageCount || 0)),
187
- ],
188
- [
189
- 'Avg Latency (ms)',
190
- ...configs.map((c) => (comparison.results.get(c)?.avgLatencyMs || 0).toFixed(0)),
191
- ],
192
- [
193
- 'Completion %',
194
- ...configs.map((c) => ((comparison.results.get(c)?.completionRate || 0) * 100).toFixed(0) + '%'),
195
- ],
196
- [
197
- 'Score',
198
- ...configs.map((c) => (comparison.scores.get(c)?.total || 0).toFixed(1)),
199
- ],
200
- ];
201
- // Print table
202
- const colWidths = headers.map((h, i) => Math.max(h.length, ...rows.map((r) => String(r[i]).length)));
203
- const separator = colWidths.map((w) => '-'.repeat(w + 2)).join('+');
204
- console.log(separator);
205
- console.log('|' +
206
- headers.map((h, i) => ` ${h.padEnd(colWidths[i])} `).join('|') +
207
- '|');
208
- console.log(separator);
209
- for (const row of rows) {
210
- console.log('|' +
211
- row.map((cell, i) => ` ${String(cell).padEnd(colWidths[i])} `).join('|') +
212
- '|');
213
- }
214
- console.log(separator);
215
- }
216
- }
217
- /**
218
- * Quick helper to run a comparison benchmark
219
- */
220
- export async function runComparison(task, config) {
221
- const benchmark = new ComparisonBenchmark(config);
222
- return benchmark.runComparison(task);
223
- }
224
- //# sourceMappingURL=benchmark.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAUH,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AACtD,OAAO,EAEL,iBAAiB,EACjB,cAAc,EACd,WAAW,GACZ,MAAM,oBAAoB,CAAC;AAE5B;;GAEG;AACH,MAAM,OAAO,mBAAmB;IACtB,MAAM,CAAkB;IACxB,OAAO,CAA8C;IAE7D,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,wBAAwB,EAAE,GAAG,MAAM,EAAE,CAAC;QAEzD,mDAAmD;QACnD,IAAI,CAAC,OAAO,GAAG,IAAI,GAAG,EAAE,CAAC;QACzB,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YACpD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAuB;QAC1C,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,QAAQ;gBACX,OAAO,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5C,KAAK,UAAU;gBACb,OAAO,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzC,KAAK,OAAO;gBACV,OAAO,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACtC;gBACE,MAAM,IAAI,KAAK,CAAC,+BAA+B,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CAAC,IAAU;QAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAgC,CAAC;QACxD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqC,CAAC;QAE5D,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAChD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;gBACvB,OAAO,CAAC,GAAG,CAAC,iBAAiB,UAAU,oBAAoB,CAAC,CAAC;gBAC7D,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YAClC,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;gBACrB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBACtC,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAExB,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;gBAChC,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;gBAEpD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBACvB,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,iBAAiB,UAAU,GAAG,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;gBAEtE,uBAAuB;gBACvB,MAAM,YAAY,GAAc;oBAC9B,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,aAAa,EAAE,UAAU;oBACzB,WAAW,EAAE,CAAC;oBACd,mBAAmB,EAAE,CAAC;oBACtB,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,kBAAkB,EAAE,CAAC;oBACrB,UAAU,EAAE,CAAC;oBACb,eAAe,EAAE,CAAC;oBAClB,YAAY,EAAE,CAAC;oBACf,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,CAAC;oBACjB,MAAM,EAAE,CAAE,GAAa,CAAC,OAAO,CAAC;oBAChC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;oBACrB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;iBACxB,CAAC;gBACF,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;gBACtC,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC,CAAC;YAC1F,CAAC;YAED,yBAAyB;YACzB,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAErD,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,OAAO;YACP,MAAM;YACN,MAAM;SACP,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CACb,IAAU,EACV,UAA6B;QAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,iBAAiB,UAAU,cAAc,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;QAExB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,MAAiB;QACtC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,qBAAqB;QAE/C,kCAAkC;QAClC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,GAAG,EAAE,CAAC;QAEtE,kDAAkD;QAClD,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO;YAC9B,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,WAAW,GAAG,SAAS,CAAC;YACtD,CAAC,CAAC,CAAC,CAAC;QAEN,8EAA8E;QAC9E,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO;YACpC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;YACrC,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,KAAK,EAAE,YAAY,GAAG,SAAS,GAAG,eAAe;YACjD,YAAY;YACZ,SAAS;YACT,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAA0C,EAC1C,MAA8C;QAE9C,IAAI,IAAI,GAAsB,QAAQ,CAAC;QACvC,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;QAEnB,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;YACzC,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBAC5B,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC;gBACxB,IAAI,GAAG,UAAU,CAAC;YACpB,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,MAAiB;QACtC,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QACxD,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,UAA4B;QAC1C,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAEhB,mBAAmB;QACnB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QACtD,MAAM,OAAO,GAAG,CAAC,QAAQ,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1F,MAAM,IAAI,GAAG;YACX;gBACE,SAAS;gBACT,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAC/C;aACF;YACD;gBACE,UAAU;gBACV,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAClE;aACF;YACD;gBACE,QAAQ;gBACR,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,IAAI,CAAC,CAAC,CACnD;aACF;YACD;gBACE,UAAU;gBACV,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CACrD;aACF;YACD;gBACE,kBAAkB;gBAClB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAC1D;aACF;YACD;gBACE,cAAc;gBACd,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,cAAc,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAC1E;aACF;YACD;gBACE,OAAO;gBACP,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAClD;aACF;SACF,CAAC;QAEF,cAAc;QACd,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACrC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC5D,CAAC;QAEF,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpE,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvB,OAAO,CAAC,GAAG,CACT,GAAG;YACD,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YAC9D,GAAG,CACN,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEvB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CACT,GAAG;gBACD,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBACxE,GAAG,CACN,CAAC;QACJ,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IACzB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAU,EACV,MAAiC;IAEjC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC,MAAM,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;AACvC,CAAC"}
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Benchmark CLI
4
- *
5
- * Command-line interface for running agent swarm benchmarks.
6
- */
7
- export {};
8
- //# sourceMappingURL=cli.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG"}
@@ -1,185 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Benchmark CLI
4
- *
5
- * Command-line interface for running agent swarm benchmarks.
6
- */
7
- import { Command } from 'commander';
8
- import { readFileSync } from 'node:fs';
9
- import { parse as parseYaml } from 'yaml';
10
- import { ComparisonBenchmark } from './benchmark.js';
11
- const program = new Command();
12
- program
13
- .name('relay-benchmark')
14
- .description('Benchmark agent swarms, sub-agents, and single agents')
15
- .version('1.0.0');
16
- program
17
- .command('run')
18
- .description('Run a benchmark comparison')
19
- .option('-d, --dataset <path>', 'Path to task dataset (YAML or JSON)')
20
- .option('-t, --task <id>', 'Run only a specific task by ID')
21
- .option('-c, --config <types>', 'Configurations to run (single,subagent,swarm,all)', 'all')
22
- .option('--cli <name>', 'CLI to use for agents', 'claude')
23
- .option('--cwd <path>', 'Working directory for tasks')
24
- .option('-q, --quiet', 'Suppress output', false)
25
- .option('--cooldown <ms>', 'Cooldown between runs in ms', '5000')
26
- .option('--max-swarm <n>', 'Maximum swarm size', '10')
27
- .option('-o, --output <path>', 'Output results to JSON file')
28
- .action(async (options) => {
29
- try {
30
- await runBenchmark(options);
31
- }
32
- catch (err) {
33
- console.error('Error:', err.message);
34
- process.exit(1);
35
- }
36
- });
37
- program
38
- .command('list')
39
- .description('List tasks in a dataset')
40
- .argument('<dataset>', 'Path to task dataset')
41
- .action((datasetPath) => {
42
- const dataset = loadDataset(datasetPath);
43
- console.log(`\nDataset: ${dataset.name || 'Unnamed'}`);
44
- if (dataset.description) {
45
- console.log(`Description: ${dataset.description}`);
46
- }
47
- console.log(`\nTasks (${dataset.tasks.length}):\n`);
48
- for (const task of dataset.tasks) {
49
- console.log(` ${task.id}`);
50
- console.log(` Complexity: ${task.complexity}`);
51
- console.log(` Files: ${task.files.length}`);
52
- console.log(` ${task.description.substring(0, 60)}...`);
53
- console.log('');
54
- }
55
- });
56
- async function runBenchmark(options) {
57
- // Parse configurations
58
- const configurations = parseConfigurations(options.config);
59
- // Build benchmark config
60
- const benchmarkConfig = {
61
- configurations,
62
- cli: options.cli,
63
- cwd: options.cwd,
64
- quiet: options.quiet,
65
- cooldownMs: parseInt(options.cooldown, 10),
66
- maxSwarmSize: parseInt(options.maxSwarm, 10),
67
- };
68
- const benchmark = new ComparisonBenchmark(benchmarkConfig);
69
- // Load tasks
70
- let tasks;
71
- if (options.dataset) {
72
- const dataset = loadDataset(options.dataset);
73
- tasks = dataset.tasks;
74
- if (options.task) {
75
- tasks = tasks.filter((t) => t.id === options.task);
76
- if (tasks.length === 0) {
77
- throw new Error(`Task not found: ${options.task}`);
78
- }
79
- }
80
- }
81
- else if (options.task) {
82
- // Create a simple task from command line
83
- tasks = [
84
- {
85
- id: options.task,
86
- description: options.task,
87
- files: [],
88
- expectedOutcome: 'Task completed',
89
- complexity: 'medium',
90
- },
91
- ];
92
- }
93
- else {
94
- throw new Error('Either --dataset or --task is required');
95
- }
96
- // Run benchmarks
97
- const results = [];
98
- for (const task of tasks) {
99
- if (!options.quiet) {
100
- console.log(`\n${'='.repeat(60)}`);
101
- console.log(`Running task: ${task.id}`);
102
- console.log('='.repeat(60));
103
- }
104
- const comparison = await benchmark.runComparison(task);
105
- if (!options.quiet) {
106
- benchmark.printComparison(comparison);
107
- }
108
- results.push({
109
- taskId: task.id,
110
- winner: comparison.winner,
111
- results: Object.fromEntries(comparison.results),
112
- scores: Object.fromEntries(comparison.scores),
113
- });
114
- }
115
- // Output results
116
- if (options.output) {
117
- const { writeFileSync } = await import('node:fs');
118
- writeFileSync(options.output, JSON.stringify(results, null, 2));
119
- console.log(`\nResults written to: ${options.output}`);
120
- }
121
- // Print summary
122
- if (!options.quiet && results.length > 1) {
123
- printSummary(results);
124
- }
125
- }
126
- function parseConfigurations(config) {
127
- if (config === 'all') {
128
- return ['single', 'subagent', 'swarm'];
129
- }
130
- const configs = config.split(',').map((c) => c.trim());
131
- const valid = ['single', 'subagent', 'swarm'];
132
- for (const c of configs) {
133
- if (!valid.includes(c)) {
134
- throw new Error(`Invalid configuration: ${c}. Valid: ${valid.join(', ')}`);
135
- }
136
- }
137
- return configs;
138
- }
139
- function loadDataset(path) {
140
- const content = readFileSync(path, 'utf-8');
141
- let data;
142
- if (path.endsWith('.yaml') || path.endsWith('.yml')) {
143
- data = parseYaml(content);
144
- }
145
- else {
146
- data = JSON.parse(content);
147
- }
148
- // Validate and normalize tasks
149
- if (!data.tasks || !Array.isArray(data.tasks)) {
150
- throw new Error('Dataset must have a "tasks" array');
151
- }
152
- data.tasks = data.tasks.map((t, i) => ({
153
- id: t.id || `task-${i}`,
154
- description: t.description || '',
155
- files: t.files || [],
156
- expectedOutcome: t.expectedOutcome || t.success_criteria || 'Completed',
157
- complexity: t.complexity || 'medium',
158
- timeoutMs: t.timeoutMs || 300000,
159
- tags: t.tags || [],
160
- }));
161
- return data;
162
- }
163
- function printSummary(results) {
164
- console.log('\n' + '='.repeat(60));
165
- console.log('BENCHMARK SUMMARY');
166
- console.log('='.repeat(60));
167
- const wins = {
168
- single: 0,
169
- subagent: 0,
170
- swarm: 0,
171
- };
172
- for (const result of results) {
173
- wins[result.winner]++;
174
- }
175
- console.log('\nWins by configuration:');
176
- for (const [config, count] of Object.entries(wins)) {
177
- const bar = '█'.repeat(count) + '░'.repeat(results.length - count);
178
- console.log(` ${config.padEnd(10)} ${bar} ${count}/${results.length}`);
179
- }
180
- const overallWinner = Object.entries(wins)
181
- .sort((a, b) => b[1] - a[1])[0][0];
182
- console.log(`\nOverall winner: ${overallWinner.toUpperCase()}`);
183
- }
184
- program.parse();
185
- //# sourceMappingURL=cli.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAQrD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,iBAAiB,CAAC;KACvB,WAAW,CAAC,uDAAuD,CAAC;KACpE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,4BAA4B,CAAC;KACzC,MAAM,CAAC,sBAAsB,EAAE,qCAAqC,CAAC;KACrE,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,CAAC;KAC3D,MAAM,CACL,sBAAsB,EACtB,mDAAmD,EACnD,KAAK,CACN;KACA,MAAM,CAAC,cAAc,EAAE,uBAAuB,EAAE,QAAQ,CAAC;KACzD,MAAM,CAAC,cAAc,EAAE,6BAA6B,CAAC;KACrD,MAAM,CAAC,aAAa,EAAE,iBAAiB,EAAE,KAAK,CAAC;KAC/C,MAAM,CAAC,iBAAiB,EAAE,6BAA6B,EAAE,MAAM,CAAC;KAChE,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,IAAI,CAAC;KACrD,MAAM,CAAC,qBAAqB,EAAE,6BAA6B,CAAC;KAC5D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,IAAI,CAAC;QACH,MAAM,YAAY,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,yBAAyB,CAAC;KACtC,QAAQ,CAAC,WAAW,EAAE,sBAAsB,CAAC;KAC7C,MAAM,CAAC,CAAC,WAAW,EAAE,EAAE;IACtB,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IACzC,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,IAAI,IAAI,SAAS,EAAE,CAAC,CAAC;IACvD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,KAAK,CAAC,MAAM,MAAM,CAAC,CAAC;IAEpD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;QAC3D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,KAAK,UAAU,YAAY,CAAC,OAU3B;IACC,uBAAuB;IACvB,MAAM,cAAc,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAE3D,yBAAyB;IACzB,MAAM,eAAe,GAA6B;QAChD,cAAc;QACd,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,UAAU,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1C,YAAY,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;KAC7C,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC,eAAe,CAAC,CAAC;IAE3D,aAAa;IACb,IAAI,KAAa,CAAC;IAClB,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC7C,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAEtB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;YACnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,mBAAmB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACxB,yCAAyC;QACzC,KAAK,GAAG;YACN;gBACE,EAAE,EAAE,OAAO,CAAC,IAAI;gBAChB,WAAW,EAAE,OAAO,CAAC,IAAI;gBACzB,KAAK,EAAE,EAAE;gBACT,eAAe,EAAE,gBAAgB;gBACjC,UAAU,EAAE,QAAQ;aACrB;SACF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9B,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAEvD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACnB,SAAS,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QACxC,CAAC;QAED,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAAC;YAC/C,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC;SAC9C,CAAC,CAAC;IACL,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAClD,aAAa,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,yBAAyB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,gBAAgB;IAChB,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzC,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,MAAc;IACzC,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;QACrB,OAAO,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAwB,CAAC;IAC9E,MAAM,KAAK,GAAwB,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;IAEnE,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,YAAY,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAE5C,IAAI,IAAiB,CAAC;IACtB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,IAAI,GAAG,SAAS,CAAC,OAAO,CAAgB,CAAC;IAC3C,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAgB,CAAC;IAC5C,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACrC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,QAAQ,CAAC,EAAE;QACvB,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,EAAE;QAChC,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;QACpB,eAAe,EAAE,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,gBAAgB,IAAI,WAAW;QACvE,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,QAAQ;QACpC,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,MAAM;QAChC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE;KACnB,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,YAAY,CACnB,OAKE;IAEF,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5B,MAAM,IAAI,GAAsC;QAC9C,MAAM,EAAE,CAAC;QACT,QAAQ,EAAE,CAAC;QACX,KAAK,EAAE,CAAC;KACT,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IACxB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;IACxC,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACnD,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;QACnE,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,GAAG,IAAI,KAAK,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,aAAa,GAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAmC;SAC1E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,OAAO,CAAC,GAAG,CAAC,qBAAqB,aAAa,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -1,53 +0,0 @@
1
- /**
2
- * Harbor Integration
3
- *
4
- * Entry points for Harbor benchmark framework integration.
5
- * https://github.com/laude-institute/harbor
6
- */
7
- import type { ConfigurationType, HarborTaskInput, HarborEvaluationOutput, BenchmarkConfig } from './types.js';
8
- /**
9
- * Main Harbor evaluation entry point
10
- *
11
- * This function is called by Harbor to evaluate a task across all configurations.
12
- *
13
- * @example Harbor dataset format:
14
- * ```yaml
15
- * tasks:
16
- * - id: refactor-auth
17
- * description: "Refactor authentication to use JWT"
18
- * files:
19
- * - src/auth/session.ts
20
- * - src/auth/middleware.ts
21
- * success_criteria: "All tests pass, JWT tokens used"
22
- * complexity: medium
23
- * ```
24
- *
25
- * @example Running with Harbor:
26
- * ```bash
27
- * harbor run \
28
- * --dataset tasks.yaml \
29
- * --agent @agent-relay/benchmark/harbor \
30
- * --parallel 10
31
- * ```
32
- */
33
- export declare function evaluate(input: HarborTaskInput): Promise<HarborEvaluationOutput>;
34
- /**
35
- * Run a single configuration (for targeted Harbor evaluations)
36
- *
37
- * @example Running single config with Harbor:
38
- * ```bash
39
- * harbor run \
40
- * --dataset tasks.yaml \
41
- * --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
42
- * --env-var CONFIG=swarm
43
- * ```
44
- */
45
- export declare function evaluateSingle(input: HarborTaskInput & {
46
- config?: ConfigurationType;
47
- }): Promise<Record<string, unknown>>;
48
- /**
49
- * Evaluate with custom configuration
50
- */
51
- export declare function evaluateCustom(input: HarborTaskInput, config: Partial<BenchmarkConfig>): Promise<HarborEvaluationOutput>;
52
- export default evaluate;
53
- //# sourceMappingURL=harbor.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"harbor.d.ts","sourceRoot":"","sources":["../src/harbor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAGV,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,eAAe,EAChB,MAAM,YAAY,CAAC;AAoBpB;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,sBAAsB,CAAC,CAiCjC;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,eAAe,GAAG;IAAE,MAAM,CAAC,EAAE,iBAAiB,CAAA;CAAE,GACtD,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAmBlC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,eAAe,EACtB,MAAM,EAAE,OAAO,CAAC,eAAe,CAAC,GAC/B,OAAO,CAAC,sBAAsB,CAAC,CA8BjC;AAGD,eAAe,QAAQ,CAAC"}
@@ -1,127 +0,0 @@
1
- /**
2
- * Harbor Integration
3
- *
4
- * Entry points for Harbor benchmark framework integration.
5
- * https://github.com/laude-institute/harbor
6
- */
7
- import { ComparisonBenchmark } from './benchmark.js';
8
- const BENCHMARK_VERSION = '1.0.0';
9
- /**
10
- * Convert Harbor task input to internal Task format
11
- */
12
- function convertHarborTask(input) {
13
- return {
14
- id: input.id,
15
- description: input.description,
16
- files: input.files || [],
17
- expectedOutcome: input.success_criteria || 'Task completed successfully',
18
- complexity: input.complexity || 'medium',
19
- timeoutMs: 300000, // 5 minute default
20
- tags: [],
21
- };
22
- }
23
- /**
24
- * Main Harbor evaluation entry point
25
- *
26
- * This function is called by Harbor to evaluate a task across all configurations.
27
- *
28
- * @example Harbor dataset format:
29
- * ```yaml
30
- * tasks:
31
- * - id: refactor-auth
32
- * description: "Refactor authentication to use JWT"
33
- * files:
34
- * - src/auth/session.ts
35
- * - src/auth/middleware.ts
36
- * success_criteria: "All tests pass, JWT tokens used"
37
- * complexity: medium
38
- * ```
39
- *
40
- * @example Running with Harbor:
41
- * ```bash
42
- * harbor run \
43
- * --dataset tasks.yaml \
44
- * --agent @agent-relay/benchmark/harbor \
45
- * --parallel 10
46
- * ```
47
- */
48
- export async function evaluate(input) {
49
- const startedAt = Date.now();
50
- const task = convertHarborTask(input);
51
- const benchmark = new ComparisonBenchmark({
52
- configurations: ['single', 'subagent', 'swarm'],
53
- cli: 'claude',
54
- quiet: true, // Suppress output in Harbor runs
55
- cooldownMs: 2000,
56
- });
57
- const comparison = await benchmark.runComparison(task);
58
- const completedAt = Date.now();
59
- return {
60
- task_id: task.id,
61
- configurations: Object.fromEntries(comparison.results),
62
- winner: comparison.winner,
63
- scores: Object.fromEntries(comparison.scores),
64
- metadata: {
65
- benchmark_version: BENCHMARK_VERSION,
66
- started_at: startedAt,
67
- completed_at: completedAt,
68
- total_duration_ms: completedAt - startedAt,
69
- },
70
- };
71
- }
72
- /**
73
- * Run a single configuration (for targeted Harbor evaluations)
74
- *
75
- * @example Running single config with Harbor:
76
- * ```bash
77
- * harbor run \
78
- * --dataset tasks.yaml \
79
- * --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
80
- * --env-var CONFIG=swarm
81
- * ```
82
- */
83
- export async function evaluateSingle(input) {
84
- const config = input.config || 'single';
85
- const task = convertHarborTask(input);
86
- const benchmark = new ComparisonBenchmark({
87
- configurations: [config],
88
- cli: 'claude',
89
- quiet: true,
90
- cooldownMs: 0,
91
- });
92
- const result = await benchmark.runSingle(task, config);
93
- return {
94
- task_id: task.id,
95
- configuration: config,
96
- result,
97
- success: result.success,
98
- };
99
- }
100
- /**
101
- * Evaluate with custom configuration
102
- */
103
- export async function evaluateCustom(input, config) {
104
- const startedAt = Date.now();
105
- const task = convertHarborTask(input);
106
- const benchmark = new ComparisonBenchmark({
107
- ...config,
108
- quiet: true,
109
- });
110
- const comparison = await benchmark.runComparison(task);
111
- const completedAt = Date.now();
112
- return {
113
- task_id: task.id,
114
- configurations: Object.fromEntries(comparison.results),
115
- winner: comparison.winner,
116
- scores: Object.fromEntries(comparison.scores),
117
- metadata: {
118
- benchmark_version: BENCHMARK_VERSION,
119
- started_at: startedAt,
120
- completed_at: completedAt,
121
- total_duration_ms: completedAt - startedAt,
122
- },
123
- };
124
- }
125
- // Default export for Harbor
126
- export default evaluate;
127
- //# sourceMappingURL=harbor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"harbor.js","sourceRoot":"","sources":["../src/harbor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAErD,MAAM,iBAAiB,GAAG,OAAO,CAAC;AAElC;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAsB;IAC/C,OAAO;QACL,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,EAAE;QACxB,eAAe,EAAE,KAAK,CAAC,gBAAgB,IAAI,6BAA6B;QACxE,UAAU,EAAG,KAAK,CAAC,UAA6B,IAAI,QAAQ;QAC5D,SAAS,EAAE,MAAM,EAAE,mBAAmB;QACtC,IAAI,EAAE,EAAE;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,KAAsB;IAEtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,cAAc,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC;QAC/C,GAAG,EAAE,QAAQ;QACb,KAAK,EAAE,IAAI,EAAE,iCAAiC;QAC9C,UAAU,EAAE,IAAI;KACjB,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAEvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE/B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,cAAc,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAGpD;QACD,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAG3C;QACD,QAAQ,EAAE;YACR,iBAAiB,EAAE,iBAAiB;YACpC,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,WAAW;YACzB,iBAAiB,EAAE,WAAW,GAAG,SAAS;SAC3C;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAuD;IAEvD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC;IACxC,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,cAAc,EAAE,CAAC,MAAM,CAAC;QACxB,GAAG,EAAE,QAAQ;QACb,KAAK,EAAE,IAAI;QACX,UAAU,EAAE,CAAC;KACd,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAEvD,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,aAAa,EAAE,MAAM;QACrB,MAAM;QACN,OAAO,EAAE,MAAM,CAAC,OAAO;KACxB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAsB,EACtB,MAAgC;IAEhC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,GAAG,MAAM;QACT,KAAK,EAAE,IAAI;KACZ,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE/B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,cAAc,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAGpD;QACD,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAG3C;QACD,QAAQ,EAAE;YACR,iBAAiB,EAAE,iBAAiB;YACpC,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,WAAW;YACzB,iBAAiB,EAAE,WAAW,GAAG,SAAS;SAC3C;KACF,CAAC;AACJ,CAAC;AAED,4BAA4B;AAC5B,eAAe,QAAQ,CAAC"}