agent-relay 2.3.4 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. package/README.md +1 -1
  2. package/dist/src/cli/index.js +124 -7
  3. package/dist/src/cli/index.js.map +1 -1
  4. package/package.json +20 -26
  5. package/packages/acp-bridge/package.json +2 -2
  6. package/packages/bridge/package.json +7 -7
  7. package/packages/config/package.json +2 -2
  8. package/packages/continuity/package.json +2 -2
  9. package/packages/daemon/package.json +12 -12
  10. package/packages/hooks/package.json +4 -4
  11. package/packages/mcp/package.json +5 -5
  12. package/packages/memory/package.json +2 -2
  13. package/packages/policy/package.json +2 -2
  14. package/packages/protocol/package.json +1 -1
  15. package/packages/resiliency/package.json +1 -1
  16. package/packages/sdk/dist/index.d.ts +1 -29
  17. package/packages/sdk/dist/index.d.ts.map +1 -1
  18. package/packages/sdk/dist/index.js +1 -38
  19. package/packages/sdk/dist/index.js.map +1 -1
  20. package/packages/sdk/package.json +4 -25
  21. package/packages/sdk/src/index.ts +1 -69
  22. package/packages/sdk-py/README.md +56 -0
  23. package/packages/sdk-py/pyproject.toml +23 -0
  24. package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
  25. package/packages/sdk-py/src/agent_relay/builder.py +367 -0
  26. package/packages/sdk-py/src/agent_relay/types.py +92 -0
  27. package/packages/sdk-py/tests/__init__.py +0 -0
  28. package/packages/sdk-py/tests/test_builder.py +101 -0
  29. package/packages/sdk-ts/dist/index.d.ts +1 -0
  30. package/packages/sdk-ts/dist/index.d.ts.map +1 -1
  31. package/packages/sdk-ts/dist/index.js +1 -0
  32. package/packages/sdk-ts/dist/index.js.map +1 -1
  33. package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
  34. package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
  35. package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
  36. package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
  37. package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
  38. package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
  39. package/packages/sdk-ts/dist/workflows/builder.js +179 -0
  40. package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
  41. package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
  42. package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
  43. package/packages/sdk-ts/dist/workflows/cli.js +82 -0
  44. package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
  45. package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
  46. package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
  47. package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
  48. package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
  49. package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
  50. package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
  51. package/packages/sdk-ts/dist/workflows/index.js +10 -0
  52. package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
  53. package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
  54. package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
  55. package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
  56. package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
  57. package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
  58. package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
  59. package/packages/sdk-ts/dist/workflows/run.js +24 -0
  60. package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
  61. package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
  62. package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
  63. package/packages/sdk-ts/dist/workflows/runner.js +650 -0
  64. package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
  65. package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
  66. package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
  67. package/packages/sdk-ts/dist/workflows/state.js +140 -0
  68. package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
  69. package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
  70. package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
  71. package/packages/sdk-ts/dist/workflows/templates.js +395 -0
  72. package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
  73. package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
  74. package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
  75. package/packages/sdk-ts/dist/workflows/types.js +8 -0
  76. package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
  77. package/packages/sdk-ts/package.json +8 -2
  78. package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
  79. package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
  80. package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
  81. package/packages/sdk-ts/src/index.ts +1 -0
  82. package/packages/sdk-ts/src/workflows/README.md +450 -0
  83. package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
  84. package/packages/sdk-ts/src/workflows/builder.ts +241 -0
  85. package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
  86. package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
  87. package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
  88. package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
  89. package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
  90. package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
  91. package/packages/sdk-ts/src/workflows/cli.ts +93 -0
  92. package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
  93. package/packages/sdk-ts/src/workflows/index.ts +9 -0
  94. package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
  95. package/packages/sdk-ts/src/workflows/run.ts +47 -0
  96. package/packages/sdk-ts/src/workflows/runner.ts +873 -0
  97. package/packages/sdk-ts/src/workflows/schema.json +321 -0
  98. package/packages/sdk-ts/src/workflows/state.ts +279 -0
  99. package/packages/sdk-ts/src/workflows/templates.ts +544 -0
  100. package/packages/sdk-ts/src/workflows/types.ts +178 -0
  101. package/packages/sdk-ts/tsconfig.json +6 -1
  102. package/packages/spawner/package.json +1 -1
  103. package/packages/state/package.json +1 -1
  104. package/packages/storage/package.json +2 -2
  105. package/packages/telemetry/package.json +1 -1
  106. package/packages/trajectory/package.json +2 -2
  107. package/packages/user-directory/package.json +2 -2
  108. package/packages/utils/package.json +3 -3
  109. package/packages/wrapper/package.json +5 -6
  110. package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
  111. package/packages/api-types/.trajectories/index.json +0 -12
  112. package/packages/api-types/dist/index.d.ts +0 -21
  113. package/packages/api-types/dist/index.d.ts.map +0 -1
  114. package/packages/api-types/dist/index.js +0 -22
  115. package/packages/api-types/dist/index.js.map +0 -1
  116. package/packages/api-types/dist/schemas/agent.d.ts +0 -259
  117. package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
  118. package/packages/api-types/dist/schemas/agent.js +0 -102
  119. package/packages/api-types/dist/schemas/agent.js.map +0 -1
  120. package/packages/api-types/dist/schemas/api.d.ts +0 -290
  121. package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
  122. package/packages/api-types/dist/schemas/api.js +0 -162
  123. package/packages/api-types/dist/schemas/api.js.map +0 -1
  124. package/packages/api-types/dist/schemas/decision.d.ts +0 -230
  125. package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
  126. package/packages/api-types/dist/schemas/decision.js +0 -104
  127. package/packages/api-types/dist/schemas/decision.js.map +0 -1
  128. package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
  129. package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
  130. package/packages/api-types/dist/schemas/fleet.js +0 -71
  131. package/packages/api-types/dist/schemas/fleet.js.map +0 -1
  132. package/packages/api-types/dist/schemas/history.d.ts +0 -180
  133. package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
  134. package/packages/api-types/dist/schemas/history.js +0 -72
  135. package/packages/api-types/dist/schemas/history.js.map +0 -1
  136. package/packages/api-types/dist/schemas/index.d.ts +0 -14
  137. package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
  138. package/packages/api-types/dist/schemas/index.js +0 -22
  139. package/packages/api-types/dist/schemas/index.js.map +0 -1
  140. package/packages/api-types/dist/schemas/message.d.ts +0 -456
  141. package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
  142. package/packages/api-types/dist/schemas/message.js +0 -88
  143. package/packages/api-types/dist/schemas/message.js.map +0 -1
  144. package/packages/api-types/dist/schemas/session.d.ts +0 -60
  145. package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
  146. package/packages/api-types/dist/schemas/session.js +0 -36
  147. package/packages/api-types/dist/schemas/session.js.map +0 -1
  148. package/packages/api-types/dist/schemas/task.d.ts +0 -111
  149. package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
  150. package/packages/api-types/dist/schemas/task.js +0 -64
  151. package/packages/api-types/dist/schemas/task.js.map +0 -1
  152. package/packages/api-types/package.json +0 -61
  153. package/packages/api-types/scripts/generate-openapi.ts +0 -106
  154. package/packages/api-types/src/index.ts +0 -22
  155. package/packages/api-types/src/schemas/agent.test.ts +0 -164
  156. package/packages/api-types/src/schemas/agent.ts +0 -110
  157. package/packages/api-types/src/schemas/api.test.ts +0 -372
  158. package/packages/api-types/src/schemas/api.ts +0 -194
  159. package/packages/api-types/src/schemas/decision.test.ts +0 -324
  160. package/packages/api-types/src/schemas/decision.ts +0 -136
  161. package/packages/api-types/src/schemas/fleet.test.ts +0 -212
  162. package/packages/api-types/src/schemas/fleet.ts +0 -83
  163. package/packages/api-types/src/schemas/history.test.ts +0 -242
  164. package/packages/api-types/src/schemas/history.ts +0 -84
  165. package/packages/api-types/src/schemas/index.ts +0 -148
  166. package/packages/api-types/src/schemas/message.test.ts +0 -192
  167. package/packages/api-types/src/schemas/message.ts +0 -98
  168. package/packages/api-types/src/schemas/session.test.ts +0 -104
  169. package/packages/api-types/src/schemas/session.ts +0 -40
  170. package/packages/api-types/src/schemas/task.test.ts +0 -192
  171. package/packages/api-types/src/schemas/task.ts +0 -78
  172. package/packages/api-types/tsconfig.json +0 -19
  173. package/packages/api-types/vitest.config.ts +0 -9
  174. package/packages/benchmark/README.md +0 -200
  175. package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
  176. package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
  177. package/packages/benchmark/datasets/quick-test.yaml +0 -20
  178. package/packages/benchmark/dist/benchmark.d.ts +0 -47
  179. package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
  180. package/packages/benchmark/dist/benchmark.js +0 -224
  181. package/packages/benchmark/dist/benchmark.js.map +0 -1
  182. package/packages/benchmark/dist/cli.d.ts +0 -8
  183. package/packages/benchmark/dist/cli.d.ts.map +0 -1
  184. package/packages/benchmark/dist/cli.js +0 -185
  185. package/packages/benchmark/dist/cli.js.map +0 -1
  186. package/packages/benchmark/dist/harbor.d.ts +0 -53
  187. package/packages/benchmark/dist/harbor.d.ts.map +0 -1
  188. package/packages/benchmark/dist/harbor.js +0 -127
  189. package/packages/benchmark/dist/harbor.js.map +0 -1
  190. package/packages/benchmark/dist/index.d.ts +0 -48
  191. package/packages/benchmark/dist/index.d.ts.map +0 -1
  192. package/packages/benchmark/dist/index.js +0 -50
  193. package/packages/benchmark/dist/index.js.map +0 -1
  194. package/packages/benchmark/dist/runners/base.d.ts +0 -63
  195. package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
  196. package/packages/benchmark/dist/runners/base.js +0 -156
  197. package/packages/benchmark/dist/runners/base.js.map +0 -1
  198. package/packages/benchmark/dist/runners/index.d.ts +0 -10
  199. package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
  200. package/packages/benchmark/dist/runners/index.js +0 -10
  201. package/packages/benchmark/dist/runners/index.js.map +0 -1
  202. package/packages/benchmark/dist/runners/single.d.ts +0 -19
  203. package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
  204. package/packages/benchmark/dist/runners/single.js +0 -111
  205. package/packages/benchmark/dist/runners/single.js.map +0 -1
  206. package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
  207. package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
  208. package/packages/benchmark/dist/runners/subagent.js +0 -212
  209. package/packages/benchmark/dist/runners/subagent.js.map +0 -1
  210. package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
  211. package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
  212. package/packages/benchmark/dist/runners/swarm.js +0 -273
  213. package/packages/benchmark/dist/runners/swarm.js.map +0 -1
  214. package/packages/benchmark/dist/types.d.ts +0 -178
  215. package/packages/benchmark/dist/types.d.ts.map +0 -1
  216. package/packages/benchmark/dist/types.js +0 -16
  217. package/packages/benchmark/dist/types.js.map +0 -1
  218. package/packages/benchmark/package.json +0 -80
  219. package/packages/benchmark/src/benchmark.ts +0 -298
  220. package/packages/benchmark/src/cli.ts +0 -240
  221. package/packages/benchmark/src/harbor.ts +0 -170
  222. package/packages/benchmark/src/index.ts +0 -73
  223. package/packages/benchmark/src/runners/base.ts +0 -205
  224. package/packages/benchmark/src/runners/index.ts +0 -10
  225. package/packages/benchmark/src/runners/single.ts +0 -121
  226. package/packages/benchmark/src/runners/subagent.ts +0 -240
  227. package/packages/benchmark/src/runners/swarm.ts +0 -326
  228. package/packages/benchmark/src/types.ts +0 -205
  229. package/packages/benchmark/tsconfig.json +0 -20
  230. package/packages/cli-tester/README.md +0 -277
  231. package/packages/cli-tester/dist/index.d.ts +0 -21
  232. package/packages/cli-tester/dist/index.d.ts.map +0 -1
  233. package/packages/cli-tester/dist/index.js +0 -21
  234. package/packages/cli-tester/dist/index.js.map +0 -1
  235. package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
  236. package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
  237. package/packages/cli-tester/dist/utils/credential-check.js +0 -230
  238. package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
  239. package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
  240. package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
  241. package/packages/cli-tester/dist/utils/socket-client.js +0 -153
  242. package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
  243. package/packages/cli-tester/docker/Dockerfile +0 -61
  244. package/packages/cli-tester/docker/docker-compose.yml +0 -71
  245. package/packages/cli-tester/docker/entrypoint.sh +0 -58
  246. package/packages/cli-tester/package.json +0 -32
  247. package/packages/cli-tester/scripts/clear-auth.sh +0 -101
  248. package/packages/cli-tester/scripts/inject-message.sh +0 -42
  249. package/packages/cli-tester/scripts/start.sh +0 -71
  250. package/packages/cli-tester/scripts/test-cli.sh +0 -56
  251. package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
  252. package/packages/cli-tester/scripts/test-registration.sh +0 -182
  253. package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
  254. package/packages/cli-tester/scripts/test-spawn.sh +0 -140
  255. package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
  256. package/packages/cli-tester/scripts/verify-auth.sh +0 -112
  257. package/packages/cli-tester/src/index.ts +0 -40
  258. package/packages/cli-tester/src/utils/credential-check.ts +0 -284
  259. package/packages/cli-tester/src/utils/socket-client.ts +0 -211
  260. package/packages/cli-tester/tests/credential-check.test.ts +0 -56
  261. package/packages/cli-tester/tsconfig.json +0 -11
  262. package/packages/sdk/dist/browser-client.d.ts +0 -212
  263. package/packages/sdk/dist/browser-client.d.ts.map +0 -1
  264. package/packages/sdk/dist/browser-client.js +0 -750
  265. package/packages/sdk/dist/browser-client.js.map +0 -1
  266. package/packages/sdk/dist/browser-framing.d.ts +0 -46
  267. package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
  268. package/packages/sdk/dist/browser-framing.js +0 -122
  269. package/packages/sdk/dist/browser-framing.js.map +0 -1
  270. package/packages/sdk/dist/standalone.d.ts +0 -89
  271. package/packages/sdk/dist/standalone.d.ts.map +0 -1
  272. package/packages/sdk/dist/standalone.js +0 -131
  273. package/packages/sdk/dist/standalone.js.map +0 -1
  274. package/packages/sdk/dist/transports/index.d.ts +0 -92
  275. package/packages/sdk/dist/transports/index.d.ts.map +0 -1
  276. package/packages/sdk/dist/transports/index.js +0 -129
  277. package/packages/sdk/dist/transports/index.js.map +0 -1
  278. package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
  279. package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
  280. package/packages/sdk/dist/transports/socket-transport.js +0 -94
  281. package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
  282. package/packages/sdk/dist/transports/types.d.ts +0 -69
  283. package/packages/sdk/dist/transports/types.d.ts.map +0 -1
  284. package/packages/sdk/dist/transports/types.js +0 -10
  285. package/packages/sdk/dist/transports/types.js.map +0 -1
  286. package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
  287. package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
  288. package/packages/sdk/dist/transports/websocket-transport.js +0 -180
  289. package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
  290. package/packages/sdk/src/browser-client.ts +0 -985
  291. package/packages/sdk/src/browser-framing.test.ts +0 -115
  292. package/packages/sdk/src/browser-framing.ts +0 -150
  293. package/packages/sdk/src/standalone.ts +0 -183
  294. package/packages/sdk/src/transports/index.ts +0 -197
  295. package/packages/sdk/src/transports/socket-transport.ts +0 -115
  296. package/packages/sdk/src/transports/types.ts +0 -77
  297. package/packages/sdk/src/transports/websocket-transport.ts +0 -245
@@ -1,224 +0,0 @@
1
- /**
2
- * Comparison Benchmark
3
- *
4
- * Main orchestrator for running comparison benchmarks across configurations.
5
- */
6
- import { DEFAULT_BENCHMARK_CONFIG } from './types.js';
7
- import { SingleAgentRunner, SubAgentRunner, SwarmRunner, } from './runners/index.js';
8
- /**
9
- * Main benchmark orchestrator
10
- */
11
- export class ComparisonBenchmark {
12
- config;
13
- runners;
14
- constructor(config = {}) {
15
- this.config = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
16
- // Initialize runners for configured configurations
17
- this.runners = new Map();
18
- for (const configType of this.config.configurations) {
19
- this.runners.set(configType, this.createRunner(configType));
20
- }
21
- }
22
- /**
23
- * Create a runner for a configuration type
24
- */
25
- createRunner(type) {
26
- switch (type) {
27
- case 'single':
28
- return new SingleAgentRunner(this.config);
29
- case 'subagent':
30
- return new SubAgentRunner(this.config);
31
- case 'swarm':
32
- return new SwarmRunner(this.config);
33
- default:
34
- throw new Error(`Unknown configuration type: ${type}`);
35
- }
36
- }
37
- /**
38
- * Run a comparison across all configured configurations
39
- */
40
- async runComparison(task) {
41
- const results = new Map();
42
- const scores = new Map();
43
- for (const [configType, runner] of this.runners) {
44
- if (!this.config.quiet) {
45
- console.log(`\n=== Running ${configType} configuration ===`);
46
- console.log(`Task: ${task.id}`);
47
- }
48
- try {
49
- await runner.setup();
50
- const result = await runner.run(task);
51
- await runner.teardown();
52
- results.set(configType, result);
53
- scores.set(configType, this.calculateScore(result));
54
- if (!this.config.quiet) {
55
- this.printRunResult(result);
56
- }
57
- }
58
- catch (err) {
59
- console.error(`Error running ${configType}:`, err.message);
60
- // Create failed result
61
- const failedResult = {
62
- taskId: task.id,
63
- configuration: configType,
64
- totalTimeMs: 0,
65
- timeToFirstActionMs: 0,
66
- messageCount: 0,
67
- avgLatencyMs: 0,
68
- latencyP50Ms: 0,
69
- latencyP99Ms: 0,
70
- coordinationRounds: 0,
71
- agentCount: 0,
72
- totalTokensUsed: 0,
73
- peakMemoryMb: 0,
74
- success: false,
75
- completionRate: 0,
76
- errors: [err.message],
77
- startedAt: Date.now(),
78
- completedAt: Date.now(),
79
- };
80
- results.set(configType, failedResult);
81
- scores.set(configType, { total: 0, successScore: 0, timeScore: 0, efficiencyScore: 0 });
82
- }
83
- // Cool-down between runs
84
- if (this.config.cooldownMs > 0) {
85
- await new Promise((r) => setTimeout(r, this.config.cooldownMs));
86
- }
87
- }
88
- const winner = this.determineWinner(results, scores);
89
- return {
90
- taskId: task.id,
91
- results,
92
- winner,
93
- scores,
94
- };
95
- }
96
- /**
97
- * Run a single configuration
98
- */
99
- async runSingle(task, configType) {
100
- const runner = this.runners.get(configType);
101
- if (!runner) {
102
- throw new Error(`Configuration ${configType} not enabled`);
103
- }
104
- await runner.setup();
105
- const result = await runner.run(task);
106
- await runner.teardown();
107
- return result;
108
- }
109
- /**
110
- * Calculate score breakdown for a result
111
- */
112
- calculateScore(result) {
113
- const maxTimeMs = 300000; // 5 minutes baseline
114
- // Success component (0-50 points)
115
- const successScore = result.success ? 50 : result.completionRate * 25;
116
- // Time component (0-30 points) - faster is better
117
- const timeScore = result.success
118
- ? 30 * Math.max(0, 1 - result.totalTimeMs / maxTimeMs)
119
- : 0;
120
- // Efficiency component (0-20 points) - fewer agents is better for same result
121
- const efficiencyScore = result.success
122
- ? 20 / Math.max(1, result.agentCount)
123
- : 0;
124
- return {
125
- total: successScore + timeScore + efficiencyScore,
126
- successScore,
127
- timeScore,
128
- efficiencyScore,
129
- };
130
- }
131
- /**
132
- * Determine the winning configuration
133
- */
134
- determineWinner(results, scores) {
135
- let best = 'single';
136
- let bestScore = -1;
137
- for (const [configType, score] of scores) {
138
- if (score.total > bestScore) {
139
- bestScore = score.total;
140
- best = configType;
141
- }
142
- }
143
- return best;
144
- }
145
- /**
146
- * Print a single run result
147
- */
148
- printRunResult(result) {
149
- console.log(`\nResult for ${result.configuration}:`);
150
- console.log(` Success: ${result.success ? '✓' : '✗'}`);
151
- console.log(` Time: ${(result.totalTimeMs / 1000).toFixed(1)}s`);
152
- console.log(` Agents: ${result.agentCount}`);
153
- console.log(` Messages: ${result.messageCount}`);
154
- if (result.errors.length > 0) {
155
- console.log(` Errors: ${result.errors.join(', ')}`);
156
- }
157
- }
158
- /**
159
- * Print comparison table
160
- */
161
- printComparison(comparison) {
162
- console.log('\n' + '='.repeat(60));
163
- console.log('COMPARISON RESULTS');
164
- console.log('='.repeat(60));
165
- console.log(`Task: ${comparison.taskId}`);
166
- console.log(`Winner: ${comparison.winner.toUpperCase()}`);
167
- console.log('');
168
- // Build table data
169
- const configs = Array.from(comparison.results.keys());
170
- const headers = ['Metric', ...configs.map((c) => c.charAt(0).toUpperCase() + c.slice(1))];
171
- const rows = [
172
- [
173
- 'Success',
174
- ...configs.map((c) => comparison.results.get(c)?.success ? '✓' : '✗'),
175
- ],
176
- [
177
- 'Time (s)',
178
- ...configs.map((c) => ((comparison.results.get(c)?.totalTimeMs || 0) / 1000).toFixed(1)),
179
- ],
180
- [
181
- 'Agents',
182
- ...configs.map((c) => String(comparison.results.get(c)?.agentCount || 0)),
183
- ],
184
- [
185
- 'Messages',
186
- ...configs.map((c) => String(comparison.results.get(c)?.messageCount || 0)),
187
- ],
188
- [
189
- 'Avg Latency (ms)',
190
- ...configs.map((c) => (comparison.results.get(c)?.avgLatencyMs || 0).toFixed(0)),
191
- ],
192
- [
193
- 'Completion %',
194
- ...configs.map((c) => ((comparison.results.get(c)?.completionRate || 0) * 100).toFixed(0) + '%'),
195
- ],
196
- [
197
- 'Score',
198
- ...configs.map((c) => (comparison.scores.get(c)?.total || 0).toFixed(1)),
199
- ],
200
- ];
201
- // Print table
202
- const colWidths = headers.map((h, i) => Math.max(h.length, ...rows.map((r) => String(r[i]).length)));
203
- const separator = colWidths.map((w) => '-'.repeat(w + 2)).join('+');
204
- console.log(separator);
205
- console.log('|' +
206
- headers.map((h, i) => ` ${h.padEnd(colWidths[i])} `).join('|') +
207
- '|');
208
- console.log(separator);
209
- for (const row of rows) {
210
- console.log('|' +
211
- row.map((cell, i) => ` ${String(cell).padEnd(colWidths[i])} `).join('|') +
212
- '|');
213
- }
214
- console.log(separator);
215
- }
216
- }
217
- /**
218
- * Quick helper to run a comparison benchmark
219
- */
220
- export async function runComparison(task, config) {
221
- const benchmark = new ComparisonBenchmark(config);
222
- return benchmark.runComparison(task);
223
- }
224
- //# sourceMappingURL=benchmark.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAUH,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AACtD,OAAO,EAEL,iBAAiB,EACjB,cAAc,EACd,WAAW,GACZ,MAAM,oBAAoB,CAAC;AAE5B;;GAEG;AACH,MAAM,OAAO,mBAAmB;IACtB,MAAM,CAAkB;IACxB,OAAO,CAA8C;IAE7D,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,wBAAwB,EAAE,GAAG,MAAM,EAAE,CAAC;QAEzD,mDAAmD;QACnD,IAAI,CAAC,OAAO,GAAG,IAAI,GAAG,EAAE,CAAC;QACzB,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YACpD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAuB;QAC1C,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,QAAQ;gBACX,OAAO,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5C,KAAK,UAAU;gBACb,OAAO,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzC,KAAK,OAAO;gBACV,OAAO,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACtC;gBACE,MAAM,IAAI,KAAK,CAAC,+BAA+B,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CAAC,IAAU;QAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAgC,CAAC;QACxD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqC,CAAC;QAE5D,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAChD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;gBACvB,OAAO,CAAC,GAAG,CAAC,iBAAiB,UAAU,oBAAoB,CAAC,CAAC;gBAC7D,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YAClC,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;gBACrB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBACtC,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAExB,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;gBAChC,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;gBAEpD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBACvB,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,iBAAiB,UAAU,GAAG,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;gBAEtE,uBAAuB;gBACvB,MAAM,YAAY,GAAc;oBAC9B,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,aAAa,EAAE,UAAU;oBACzB,WAAW,EAAE,CAAC;oBACd,mBAAmB,EAAE,CAAC;oBACtB,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,kBAAkB,EAAE,CAAC;oBACrB,UAAU,EAAE,CAAC;oBACb,eAAe,EAAE,CAAC;oBAClB,YAAY,EAAE,CAAC;oBACf,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,CAAC;oBACjB,MAAM,EAAE,CAAE,GAAa,CAAC,OAAO,CAAC;oBAChC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;oBACrB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;iBACxB,CAAC;gBACF,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;gBACtC,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC,CAAC;YAC1F,CAAC;YAED,yBAAyB;YACzB,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAErD,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,OAAO;YACP,MAAM;YACN,MAAM;SACP,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CACb,IAAU,EACV,UAA6B;QAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,iBAAiB,UAAU,cAAc,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;QAExB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,MAAiB;QACtC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,qBAAqB;QAE/C,kCAAkC;QAClC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,GAAG,EAAE,CAAC;QAEtE,kDAAkD;QAClD,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO;YAC9B,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,WAAW,GAAG,SAAS,CAAC;YACtD,CAAC,CAAC,CAAC,CAAC;QAEN,8EAA8E;QAC9E,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO;YACpC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;YACrC,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,KAAK,EAAE,YAAY,GAAG,SAAS,GAAG,eAAe;YACjD,YAAY;YACZ,SAAS;YACT,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAA0C,EAC1C,MAA8C;QAE9C,IAAI,IAAI,GAAsB,QAAQ,CAAC;QACvC,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;QAEnB,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;YACzC,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBAC5B,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC;gBACxB,IAAI,GAAG,UAAU,CAAC;YACpB,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,MAAiB;QACtC,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QACxD,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,UAA4B;QAC1C,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAEhB,mBAAmB;QACnB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QACtD,MAAM,OAAO,GAAG,CAAC,QAAQ,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1F,MAAM,IAAI,GAAG;YACX;gBACE,SAAS;gBACT,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAC/C;aACF;YACD;gBACE,UAAU;gBACV,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAClE;aACF;YACD;gBACE,QAAQ;gBACR,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,IAAI,CAAC,CAAC,CACnD;aACF;YACD;gBACE,UAAU;gBACV,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CACrD;aACF;YACD;gBACE,kBAAkB;gBAClB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAC1D;aACF;YACD;gBACE,cAAc;gBACd,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,cAAc,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAC1E;aACF;YACD;gBACE,OAAO;gBACP,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAClD;aACF;SACF,CAAC;QAEF,cAAc;QACd,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACrC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC5D,CAAC;QAEF,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpE,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvB,OAAO,CAAC,GAAG,CACT,GAAG;YACD,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YAC9D,GAAG,CACN,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEvB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CACT,GAAG;gBACD,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBACxE,GAAG,CACN,CAAC;QACJ,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IACzB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAU,EACV,MAAiC;IAEjC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC,MAAM,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;AACvC,CAAC"}
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Benchmark CLI
4
- *
5
- * Command-line interface for running agent swarm benchmarks.
6
- */
7
- export {};
8
- //# sourceMappingURL=cli.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG"}
@@ -1,185 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * Benchmark CLI
4
- *
5
- * Command-line interface for running agent swarm benchmarks.
6
- */
7
- import { Command } from 'commander';
8
- import { readFileSync } from 'node:fs';
9
- import { parse as parseYaml } from 'yaml';
10
- import { ComparisonBenchmark } from './benchmark.js';
11
- const program = new Command();
12
- program
13
- .name('relay-benchmark')
14
- .description('Benchmark agent swarms, sub-agents, and single agents')
15
- .version('1.0.0');
16
- program
17
- .command('run')
18
- .description('Run a benchmark comparison')
19
- .option('-d, --dataset <path>', 'Path to task dataset (YAML or JSON)')
20
- .option('-t, --task <id>', 'Run only a specific task by ID')
21
- .option('-c, --config <types>', 'Configurations to run (single,subagent,swarm,all)', 'all')
22
- .option('--cli <name>', 'CLI to use for agents', 'claude')
23
- .option('--cwd <path>', 'Working directory for tasks')
24
- .option('-q, --quiet', 'Suppress output', false)
25
- .option('--cooldown <ms>', 'Cooldown between runs in ms', '5000')
26
- .option('--max-swarm <n>', 'Maximum swarm size', '10')
27
- .option('-o, --output <path>', 'Output results to JSON file')
28
- .action(async (options) => {
29
- try {
30
- await runBenchmark(options);
31
- }
32
- catch (err) {
33
- console.error('Error:', err.message);
34
- process.exit(1);
35
- }
36
- });
37
- program
38
- .command('list')
39
- .description('List tasks in a dataset')
40
- .argument('<dataset>', 'Path to task dataset')
41
- .action((datasetPath) => {
42
- const dataset = loadDataset(datasetPath);
43
- console.log(`\nDataset: ${dataset.name || 'Unnamed'}`);
44
- if (dataset.description) {
45
- console.log(`Description: ${dataset.description}`);
46
- }
47
- console.log(`\nTasks (${dataset.tasks.length}):\n`);
48
- for (const task of dataset.tasks) {
49
- console.log(` ${task.id}`);
50
- console.log(` Complexity: ${task.complexity}`);
51
- console.log(` Files: ${task.files.length}`);
52
- console.log(` ${task.description.substring(0, 60)}...`);
53
- console.log('');
54
- }
55
- });
56
- async function runBenchmark(options) {
57
- // Parse configurations
58
- const configurations = parseConfigurations(options.config);
59
- // Build benchmark config
60
- const benchmarkConfig = {
61
- configurations,
62
- cli: options.cli,
63
- cwd: options.cwd,
64
- quiet: options.quiet,
65
- cooldownMs: parseInt(options.cooldown, 10),
66
- maxSwarmSize: parseInt(options.maxSwarm, 10),
67
- };
68
- const benchmark = new ComparisonBenchmark(benchmarkConfig);
69
- // Load tasks
70
- let tasks;
71
- if (options.dataset) {
72
- const dataset = loadDataset(options.dataset);
73
- tasks = dataset.tasks;
74
- if (options.task) {
75
- tasks = tasks.filter((t) => t.id === options.task);
76
- if (tasks.length === 0) {
77
- throw new Error(`Task not found: ${options.task}`);
78
- }
79
- }
80
- }
81
- else if (options.task) {
82
- // Create a simple task from command line
83
- tasks = [
84
- {
85
- id: options.task,
86
- description: options.task,
87
- files: [],
88
- expectedOutcome: 'Task completed',
89
- complexity: 'medium',
90
- },
91
- ];
92
- }
93
- else {
94
- throw new Error('Either --dataset or --task is required');
95
- }
96
- // Run benchmarks
97
- const results = [];
98
- for (const task of tasks) {
99
- if (!options.quiet) {
100
- console.log(`\n${'='.repeat(60)}`);
101
- console.log(`Running task: ${task.id}`);
102
- console.log('='.repeat(60));
103
- }
104
- const comparison = await benchmark.runComparison(task);
105
- if (!options.quiet) {
106
- benchmark.printComparison(comparison);
107
- }
108
- results.push({
109
- taskId: task.id,
110
- winner: comparison.winner,
111
- results: Object.fromEntries(comparison.results),
112
- scores: Object.fromEntries(comparison.scores),
113
- });
114
- }
115
- // Output results
116
- if (options.output) {
117
- const { writeFileSync } = await import('node:fs');
118
- writeFileSync(options.output, JSON.stringify(results, null, 2));
119
- console.log(`\nResults written to: ${options.output}`);
120
- }
121
- // Print summary
122
- if (!options.quiet && results.length > 1) {
123
- printSummary(results);
124
- }
125
- }
126
- function parseConfigurations(config) {
127
- if (config === 'all') {
128
- return ['single', 'subagent', 'swarm'];
129
- }
130
- const configs = config.split(',').map((c) => c.trim());
131
- const valid = ['single', 'subagent', 'swarm'];
132
- for (const c of configs) {
133
- if (!valid.includes(c)) {
134
- throw new Error(`Invalid configuration: ${c}. Valid: ${valid.join(', ')}`);
135
- }
136
- }
137
- return configs;
138
- }
139
- function loadDataset(path) {
140
- const content = readFileSync(path, 'utf-8');
141
- let data;
142
- if (path.endsWith('.yaml') || path.endsWith('.yml')) {
143
- data = parseYaml(content);
144
- }
145
- else {
146
- data = JSON.parse(content);
147
- }
148
- // Validate and normalize tasks
149
- if (!data.tasks || !Array.isArray(data.tasks)) {
150
- throw new Error('Dataset must have a "tasks" array');
151
- }
152
- data.tasks = data.tasks.map((t, i) => ({
153
- id: t.id || `task-${i}`,
154
- description: t.description || '',
155
- files: t.files || [],
156
- expectedOutcome: t.expectedOutcome || t.success_criteria || 'Completed',
157
- complexity: t.complexity || 'medium',
158
- timeoutMs: t.timeoutMs || 300000,
159
- tags: t.tags || [],
160
- }));
161
- return data;
162
- }
163
- function printSummary(results) {
164
- console.log('\n' + '='.repeat(60));
165
- console.log('BENCHMARK SUMMARY');
166
- console.log('='.repeat(60));
167
- const wins = {
168
- single: 0,
169
- subagent: 0,
170
- swarm: 0,
171
- };
172
- for (const result of results) {
173
- wins[result.winner]++;
174
- }
175
- console.log('\nWins by configuration:');
176
- for (const [config, count] of Object.entries(wins)) {
177
- const bar = '█'.repeat(count) + '░'.repeat(results.length - count);
178
- console.log(` ${config.padEnd(10)} ${bar} ${count}/${results.length}`);
179
- }
180
- const overallWinner = Object.entries(wins)
181
- .sort((a, b) => b[1] - a[1])[0][0];
182
- console.log(`\nOverall winner: ${overallWinner.toUpperCase()}`);
183
- }
184
- program.parse();
185
- //# sourceMappingURL=cli.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAQrD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,iBAAiB,CAAC;KACvB,WAAW,CAAC,uDAAuD,CAAC;KACpE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,4BAA4B,CAAC;KACzC,MAAM,CAAC,sBAAsB,EAAE,qCAAqC,CAAC;KACrE,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,CAAC;KAC3D,MAAM,CACL,sBAAsB,EACtB,mDAAmD,EACnD,KAAK,CACN;KACA,MAAM,CAAC,cAAc,EAAE,uBAAuB,EAAE,QAAQ,CAAC;KACzD,MAAM,CAAC,cAAc,EAAE,6BAA6B,CAAC;KACrD,MAAM,CAAC,aAAa,EAAE,iBAAiB,EAAE,KAAK,CAAC;KAC/C,MAAM,CAAC,iBAAiB,EAAE,6BAA6B,EAAE,MAAM,CAAC;KAChE,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,IAAI,CAAC;KACrD,MAAM,CAAC,qBAAqB,EAAE,6BAA6B,CAAC;KAC5D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,IAAI,CAAC;QACH,MAAM,YAAY,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,yBAAyB,CAAC;KACtC,QAAQ,CAAC,WAAW,EAAE,sBAAsB,CAAC;KAC7C,MAAM,CAAC,CAAC,WAAW,EAAE,EAAE;IACtB,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IACzC,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,IAAI,IAAI,SAAS,EAAE,CAAC,CAAC;IACvD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,KAAK,CAAC,MAAM,MAAM,CAAC,CAAC;IAEpD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;QAC3D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,KAAK,UAAU,YAAY,CAAC,OAU3B;IACC,uBAAuB;IACvB,MAAM,cAAc,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAE3D,yBAAyB;IACzB,MAAM,eAAe,GAA6B;QAChD,cAAc;QACd,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,UAAU,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1C,YAAY,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;KAC7C,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC,eAAe,CAAC,CAAC;IAE3D,aAAa;IACb,IAAI,KAAa,CAAC;IAClB,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC7C,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAEtB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;YACnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,mBAAmB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACxB,yCAAyC;QACzC,KAAK,GAAG;YACN;gBACE,EAAE,EAAE,OAAO,CAAC,IAAI;gBAChB,WAAW,EAAE,OAAO,CAAC,IAAI;gBACzB,KAAK,EAAE,EAAE;gBACT,eAAe,EAAE,gBAAgB;gBACjC,UAAU,EAAE,QAAQ;aACrB;SACF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9B,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAEvD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACnB,SAAS,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QACxC,CAAC;QAED,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAAC;YAC/C,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC;SAC9C,CAAC,CAAC;IACL,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAClD,aAAa,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,yBAAyB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,gBAAgB;IAChB,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzC,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,MAAc;IACzC,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;QACrB,OAAO,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAwB,CAAC;IAC9E,MAAM,KAAK,GAAwB,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;IAEnE,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,YAAY,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAE5C,IAAI,IAAiB,CAAC;IACtB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,IAAI,GAAG,SAAS,CAAC,OAAO,CAAgB,CAAC;IAC3C,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAgB,CAAC;IAC5C,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACrC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,QAAQ,CAAC,EAAE;QACvB,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,EAAE;QAChC,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;QACpB,eAAe,EAAE,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,gBAAgB,IAAI,WAAW;QACvE,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,QAAQ;QACpC,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,MAAM;QAChC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE;KACnB,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,YAAY,CACnB,OAKE;IAEF,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5B,MAAM,IAAI,GAAsC;QAC9C,MAAM,EAAE,CAAC;QACT,QAAQ,EAAE,CAAC;QACX,KAAK,EAAE,CAAC;KACT,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IACxB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;IACxC,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACnD,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;QACnE,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,GAAG,IAAI,KAAK,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,aAAa,GAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAmC;SAC1E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,OAAO,CAAC,GAAG,CAAC,qBAAqB,aAAa,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -1,53 +0,0 @@
1
- /**
2
- * Harbor Integration
3
- *
4
- * Entry points for Harbor benchmark framework integration.
5
- * https://github.com/laude-institute/harbor
6
- */
7
- import type { ConfigurationType, HarborTaskInput, HarborEvaluationOutput, BenchmarkConfig } from './types.js';
8
- /**
9
- * Main Harbor evaluation entry point
10
- *
11
- * This function is called by Harbor to evaluate a task across all configurations.
12
- *
13
- * @example Harbor dataset format:
14
- * ```yaml
15
- * tasks:
16
- * - id: refactor-auth
17
- * description: "Refactor authentication to use JWT"
18
- * files:
19
- * - src/auth/session.ts
20
- * - src/auth/middleware.ts
21
- * success_criteria: "All tests pass, JWT tokens used"
22
- * complexity: medium
23
- * ```
24
- *
25
- * @example Running with Harbor:
26
- * ```bash
27
- * harbor run \
28
- * --dataset tasks.yaml \
29
- * --agent @agent-relay/benchmark/harbor \
30
- * --parallel 10
31
- * ```
32
- */
33
- export declare function evaluate(input: HarborTaskInput): Promise<HarborEvaluationOutput>;
34
- /**
35
- * Run a single configuration (for targeted Harbor evaluations)
36
- *
37
- * @example Running single config with Harbor:
38
- * ```bash
39
- * harbor run \
40
- * --dataset tasks.yaml \
41
- * --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
42
- * --env-var CONFIG=swarm
43
- * ```
44
- */
45
- export declare function evaluateSingle(input: HarborTaskInput & {
46
- config?: ConfigurationType;
47
- }): Promise<Record<string, unknown>>;
48
- /**
49
- * Evaluate with custom configuration
50
- */
51
- export declare function evaluateCustom(input: HarborTaskInput, config: Partial<BenchmarkConfig>): Promise<HarborEvaluationOutput>;
52
- export default evaluate;
53
- //# sourceMappingURL=harbor.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"harbor.d.ts","sourceRoot":"","sources":["../src/harbor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAGV,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,eAAe,EAChB,MAAM,YAAY,CAAC;AAoBpB;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,sBAAsB,CAAC,CAiCjC;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,eAAe,GAAG;IAAE,MAAM,CAAC,EAAE,iBAAiB,CAAA;CAAE,GACtD,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAmBlC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,eAAe,EACtB,MAAM,EAAE,OAAO,CAAC,eAAe,CAAC,GAC/B,OAAO,CAAC,sBAAsB,CAAC,CA8BjC;AAGD,eAAe,QAAQ,CAAC"}
@@ -1,127 +0,0 @@
1
- /**
2
- * Harbor Integration
3
- *
4
- * Entry points for Harbor benchmark framework integration.
5
- * https://github.com/laude-institute/harbor
6
- */
7
- import { ComparisonBenchmark } from './benchmark.js';
8
- const BENCHMARK_VERSION = '1.0.0';
9
- /**
10
- * Convert Harbor task input to internal Task format
11
- */
12
- function convertHarborTask(input) {
13
- return {
14
- id: input.id,
15
- description: input.description,
16
- files: input.files || [],
17
- expectedOutcome: input.success_criteria || 'Task completed successfully',
18
- complexity: input.complexity || 'medium',
19
- timeoutMs: 300000, // 5 minute default
20
- tags: [],
21
- };
22
- }
23
- /**
24
- * Main Harbor evaluation entry point
25
- *
26
- * This function is called by Harbor to evaluate a task across all configurations.
27
- *
28
- * @example Harbor dataset format:
29
- * ```yaml
30
- * tasks:
31
- * - id: refactor-auth
32
- * description: "Refactor authentication to use JWT"
33
- * files:
34
- * - src/auth/session.ts
35
- * - src/auth/middleware.ts
36
- * success_criteria: "All tests pass, JWT tokens used"
37
- * complexity: medium
38
- * ```
39
- *
40
- * @example Running with Harbor:
41
- * ```bash
42
- * harbor run \
43
- * --dataset tasks.yaml \
44
- * --agent @agent-relay/benchmark/harbor \
45
- * --parallel 10
46
- * ```
47
- */
48
- export async function evaluate(input) {
49
- const startedAt = Date.now();
50
- const task = convertHarborTask(input);
51
- const benchmark = new ComparisonBenchmark({
52
- configurations: ['single', 'subagent', 'swarm'],
53
- cli: 'claude',
54
- quiet: true, // Suppress output in Harbor runs
55
- cooldownMs: 2000,
56
- });
57
- const comparison = await benchmark.runComparison(task);
58
- const completedAt = Date.now();
59
- return {
60
- task_id: task.id,
61
- configurations: Object.fromEntries(comparison.results),
62
- winner: comparison.winner,
63
- scores: Object.fromEntries(comparison.scores),
64
- metadata: {
65
- benchmark_version: BENCHMARK_VERSION,
66
- started_at: startedAt,
67
- completed_at: completedAt,
68
- total_duration_ms: completedAt - startedAt,
69
- },
70
- };
71
- }
72
- /**
73
- * Run a single configuration (for targeted Harbor evaluations)
74
- *
75
- * @example Running single config with Harbor:
76
- * ```bash
77
- * harbor run \
78
- * --dataset tasks.yaml \
79
- * --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
80
- * --env-var CONFIG=swarm
81
- * ```
82
- */
83
- export async function evaluateSingle(input) {
84
- const config = input.config || 'single';
85
- const task = convertHarborTask(input);
86
- const benchmark = new ComparisonBenchmark({
87
- configurations: [config],
88
- cli: 'claude',
89
- quiet: true,
90
- cooldownMs: 0,
91
- });
92
- const result = await benchmark.runSingle(task, config);
93
- return {
94
- task_id: task.id,
95
- configuration: config,
96
- result,
97
- success: result.success,
98
- };
99
- }
100
- /**
101
- * Evaluate with custom configuration
102
- */
103
- export async function evaluateCustom(input, config) {
104
- const startedAt = Date.now();
105
- const task = convertHarborTask(input);
106
- const benchmark = new ComparisonBenchmark({
107
- ...config,
108
- quiet: true,
109
- });
110
- const comparison = await benchmark.runComparison(task);
111
- const completedAt = Date.now();
112
- return {
113
- task_id: task.id,
114
- configurations: Object.fromEntries(comparison.results),
115
- winner: comparison.winner,
116
- scores: Object.fromEntries(comparison.scores),
117
- metadata: {
118
- benchmark_version: BENCHMARK_VERSION,
119
- started_at: startedAt,
120
- completed_at: completedAt,
121
- total_duration_ms: completedAt - startedAt,
122
- },
123
- };
124
- }
125
- // Default export for Harbor
126
- export default evaluate;
127
- //# sourceMappingURL=harbor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"harbor.js","sourceRoot":"","sources":["../src/harbor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAErD,MAAM,iBAAiB,GAAG,OAAO,CAAC;AAElC;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAsB;IAC/C,OAAO;QACL,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,EAAE;QACxB,eAAe,EAAE,KAAK,CAAC,gBAAgB,IAAI,6BAA6B;QACxE,UAAU,EAAG,KAAK,CAAC,UAA6B,IAAI,QAAQ;QAC5D,SAAS,EAAE,MAAM,EAAE,mBAAmB;QACtC,IAAI,EAAE,EAAE;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,KAAsB;IAEtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,cAAc,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC;QAC/C,GAAG,EAAE,QAAQ;QACb,KAAK,EAAE,IAAI,EAAE,iCAAiC;QAC9C,UAAU,EAAE,IAAI;KACjB,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAEvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE/B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,cAAc,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAGpD;QACD,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAG3C;QACD,QAAQ,EAAE;YACR,iBAAiB,EAAE,iBAAiB;YACpC,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,WAAW;YACzB,iBAAiB,EAAE,WAAW,GAAG,SAAS;SAC3C;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAuD;IAEvD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC;IACxC,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,cAAc,EAAE,CAAC,MAAM,CAAC;QACxB,GAAG,EAAE,QAAQ;QACb,KAAK,EAAE,IAAI;QACX,UAAU,EAAE,CAAC;KACd,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAEvD,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,aAAa,EAAE,MAAM;QACrB,MAAM;QACN,OAAO,EAAE,MAAM,CAAC,OAAO;KACxB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAsB,EACtB,MAAgC;IAEhC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,GAAG,MAAM;QACT,KAAK,EAAE,IAAI;KACZ,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE/B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,cAAc,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAGpD;QACD,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAG3C;QACD,QAAQ,EAAE;YACR,iBAAiB,EAAE,iBAAiB;YACpC,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,WAAW;YACzB,iBAAiB,EAAE,WAAW,GAAG,SAAS;SAC3C;KACF,CAAC;AACJ,CAAC;AAED,4BAA4B;AAC5B,eAAe,QAAQ,CAAC"}