agent-relay 2.3.4 → 2.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. package/README.md +1 -1
  2. package/dist/src/cli/index.js +124 -7
  3. package/dist/src/cli/index.js.map +1 -1
  4. package/package.json +23 -26
  5. package/packages/acp-bridge/package.json +2 -2
  6. package/packages/bridge/package.json +7 -7
  7. package/packages/config/package.json +2 -2
  8. package/packages/continuity/package.json +2 -2
  9. package/packages/daemon/package.json +12 -12
  10. package/packages/hooks/package.json +4 -4
  11. package/packages/mcp/package.json +5 -5
  12. package/packages/memory/package.json +2 -2
  13. package/packages/policy/package.json +2 -2
  14. package/packages/protocol/package.json +1 -1
  15. package/packages/resiliency/package.json +1 -1
  16. package/packages/sdk/dist/index.d.ts +1 -29
  17. package/packages/sdk/dist/index.d.ts.map +1 -1
  18. package/packages/sdk/dist/index.js +1 -38
  19. package/packages/sdk/dist/index.js.map +1 -1
  20. package/packages/sdk/package.json +4 -25
  21. package/packages/sdk/src/index.ts +1 -69
  22. package/packages/sdk-py/README.md +56 -0
  23. package/packages/sdk-py/pyproject.toml +23 -0
  24. package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
  25. package/packages/sdk-py/src/agent_relay/builder.py +367 -0
  26. package/packages/sdk-py/src/agent_relay/types.py +92 -0
  27. package/packages/sdk-py/tests/__init__.py +0 -0
  28. package/packages/sdk-py/tests/test_builder.py +101 -0
  29. package/packages/sdk-ts/dist/index.d.ts +1 -0
  30. package/packages/sdk-ts/dist/index.d.ts.map +1 -1
  31. package/packages/sdk-ts/dist/index.js +1 -0
  32. package/packages/sdk-ts/dist/index.js.map +1 -1
  33. package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
  34. package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
  35. package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
  36. package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
  37. package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
  38. package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
  39. package/packages/sdk-ts/dist/workflows/builder.js +179 -0
  40. package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
  41. package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
  42. package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
  43. package/packages/sdk-ts/dist/workflows/cli.js +82 -0
  44. package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
  45. package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
  46. package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
  47. package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
  48. package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
  49. package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
  50. package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
  51. package/packages/sdk-ts/dist/workflows/index.js +10 -0
  52. package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
  53. package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
  54. package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
  55. package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
  56. package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
  57. package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
  58. package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
  59. package/packages/sdk-ts/dist/workflows/run.js +24 -0
  60. package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
  61. package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
  62. package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
  63. package/packages/sdk-ts/dist/workflows/runner.js +650 -0
  64. package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
  65. package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
  66. package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
  67. package/packages/sdk-ts/dist/workflows/state.js +140 -0
  68. package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
  69. package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
  70. package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
  71. package/packages/sdk-ts/dist/workflows/templates.js +395 -0
  72. package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
  73. package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
  74. package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
  75. package/packages/sdk-ts/dist/workflows/types.js +8 -0
  76. package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
  77. package/packages/sdk-ts/package.json +8 -2
  78. package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
  79. package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
  80. package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
  81. package/packages/sdk-ts/src/index.ts +1 -0
  82. package/packages/sdk-ts/src/workflows/README.md +450 -0
  83. package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
  84. package/packages/sdk-ts/src/workflows/builder.ts +241 -0
  85. package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
  86. package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
  87. package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
  88. package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
  89. package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
  90. package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
  91. package/packages/sdk-ts/src/workflows/cli.ts +93 -0
  92. package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
  93. package/packages/sdk-ts/src/workflows/index.ts +9 -0
  94. package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
  95. package/packages/sdk-ts/src/workflows/run.ts +47 -0
  96. package/packages/sdk-ts/src/workflows/runner.ts +873 -0
  97. package/packages/sdk-ts/src/workflows/schema.json +321 -0
  98. package/packages/sdk-ts/src/workflows/state.ts +279 -0
  99. package/packages/sdk-ts/src/workflows/templates.ts +544 -0
  100. package/packages/sdk-ts/src/workflows/types.ts +178 -0
  101. package/packages/sdk-ts/tsconfig.json +6 -1
  102. package/packages/spawner/package.json +1 -1
  103. package/packages/state/package.json +1 -1
  104. package/packages/storage/package.json +2 -2
  105. package/packages/telemetry/package.json +1 -1
  106. package/packages/trajectory/package.json +2 -2
  107. package/packages/user-directory/package.json +2 -2
  108. package/packages/utils/package.json +3 -3
  109. package/packages/wrapper/package.json +5 -6
  110. package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
  111. package/packages/api-types/.trajectories/index.json +0 -12
  112. package/packages/api-types/dist/index.d.ts +0 -21
  113. package/packages/api-types/dist/index.d.ts.map +0 -1
  114. package/packages/api-types/dist/index.js +0 -22
  115. package/packages/api-types/dist/index.js.map +0 -1
  116. package/packages/api-types/dist/schemas/agent.d.ts +0 -259
  117. package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
  118. package/packages/api-types/dist/schemas/agent.js +0 -102
  119. package/packages/api-types/dist/schemas/agent.js.map +0 -1
  120. package/packages/api-types/dist/schemas/api.d.ts +0 -290
  121. package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
  122. package/packages/api-types/dist/schemas/api.js +0 -162
  123. package/packages/api-types/dist/schemas/api.js.map +0 -1
  124. package/packages/api-types/dist/schemas/decision.d.ts +0 -230
  125. package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
  126. package/packages/api-types/dist/schemas/decision.js +0 -104
  127. package/packages/api-types/dist/schemas/decision.js.map +0 -1
  128. package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
  129. package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
  130. package/packages/api-types/dist/schemas/fleet.js +0 -71
  131. package/packages/api-types/dist/schemas/fleet.js.map +0 -1
  132. package/packages/api-types/dist/schemas/history.d.ts +0 -180
  133. package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
  134. package/packages/api-types/dist/schemas/history.js +0 -72
  135. package/packages/api-types/dist/schemas/history.js.map +0 -1
  136. package/packages/api-types/dist/schemas/index.d.ts +0 -14
  137. package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
  138. package/packages/api-types/dist/schemas/index.js +0 -22
  139. package/packages/api-types/dist/schemas/index.js.map +0 -1
  140. package/packages/api-types/dist/schemas/message.d.ts +0 -456
  141. package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
  142. package/packages/api-types/dist/schemas/message.js +0 -88
  143. package/packages/api-types/dist/schemas/message.js.map +0 -1
  144. package/packages/api-types/dist/schemas/session.d.ts +0 -60
  145. package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
  146. package/packages/api-types/dist/schemas/session.js +0 -36
  147. package/packages/api-types/dist/schemas/session.js.map +0 -1
  148. package/packages/api-types/dist/schemas/task.d.ts +0 -111
  149. package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
  150. package/packages/api-types/dist/schemas/task.js +0 -64
  151. package/packages/api-types/dist/schemas/task.js.map +0 -1
  152. package/packages/api-types/package.json +0 -61
  153. package/packages/api-types/scripts/generate-openapi.ts +0 -106
  154. package/packages/api-types/src/index.ts +0 -22
  155. package/packages/api-types/src/schemas/agent.test.ts +0 -164
  156. package/packages/api-types/src/schemas/agent.ts +0 -110
  157. package/packages/api-types/src/schemas/api.test.ts +0 -372
  158. package/packages/api-types/src/schemas/api.ts +0 -194
  159. package/packages/api-types/src/schemas/decision.test.ts +0 -324
  160. package/packages/api-types/src/schemas/decision.ts +0 -136
  161. package/packages/api-types/src/schemas/fleet.test.ts +0 -212
  162. package/packages/api-types/src/schemas/fleet.ts +0 -83
  163. package/packages/api-types/src/schemas/history.test.ts +0 -242
  164. package/packages/api-types/src/schemas/history.ts +0 -84
  165. package/packages/api-types/src/schemas/index.ts +0 -148
  166. package/packages/api-types/src/schemas/message.test.ts +0 -192
  167. package/packages/api-types/src/schemas/message.ts +0 -98
  168. package/packages/api-types/src/schemas/session.test.ts +0 -104
  169. package/packages/api-types/src/schemas/session.ts +0 -40
  170. package/packages/api-types/src/schemas/task.test.ts +0 -192
  171. package/packages/api-types/src/schemas/task.ts +0 -78
  172. package/packages/api-types/tsconfig.json +0 -19
  173. package/packages/api-types/vitest.config.ts +0 -9
  174. package/packages/benchmark/README.md +0 -200
  175. package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
  176. package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
  177. package/packages/benchmark/datasets/quick-test.yaml +0 -20
  178. package/packages/benchmark/dist/benchmark.d.ts +0 -47
  179. package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
  180. package/packages/benchmark/dist/benchmark.js +0 -224
  181. package/packages/benchmark/dist/benchmark.js.map +0 -1
  182. package/packages/benchmark/dist/cli.d.ts +0 -8
  183. package/packages/benchmark/dist/cli.d.ts.map +0 -1
  184. package/packages/benchmark/dist/cli.js +0 -185
  185. package/packages/benchmark/dist/cli.js.map +0 -1
  186. package/packages/benchmark/dist/harbor.d.ts +0 -53
  187. package/packages/benchmark/dist/harbor.d.ts.map +0 -1
  188. package/packages/benchmark/dist/harbor.js +0 -127
  189. package/packages/benchmark/dist/harbor.js.map +0 -1
  190. package/packages/benchmark/dist/index.d.ts +0 -48
  191. package/packages/benchmark/dist/index.d.ts.map +0 -1
  192. package/packages/benchmark/dist/index.js +0 -50
  193. package/packages/benchmark/dist/index.js.map +0 -1
  194. package/packages/benchmark/dist/runners/base.d.ts +0 -63
  195. package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
  196. package/packages/benchmark/dist/runners/base.js +0 -156
  197. package/packages/benchmark/dist/runners/base.js.map +0 -1
  198. package/packages/benchmark/dist/runners/index.d.ts +0 -10
  199. package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
  200. package/packages/benchmark/dist/runners/index.js +0 -10
  201. package/packages/benchmark/dist/runners/index.js.map +0 -1
  202. package/packages/benchmark/dist/runners/single.d.ts +0 -19
  203. package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
  204. package/packages/benchmark/dist/runners/single.js +0 -111
  205. package/packages/benchmark/dist/runners/single.js.map +0 -1
  206. package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
  207. package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
  208. package/packages/benchmark/dist/runners/subagent.js +0 -212
  209. package/packages/benchmark/dist/runners/subagent.js.map +0 -1
  210. package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
  211. package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
  212. package/packages/benchmark/dist/runners/swarm.js +0 -273
  213. package/packages/benchmark/dist/runners/swarm.js.map +0 -1
  214. package/packages/benchmark/dist/types.d.ts +0 -178
  215. package/packages/benchmark/dist/types.d.ts.map +0 -1
  216. package/packages/benchmark/dist/types.js +0 -16
  217. package/packages/benchmark/dist/types.js.map +0 -1
  218. package/packages/benchmark/package.json +0 -80
  219. package/packages/benchmark/src/benchmark.ts +0 -298
  220. package/packages/benchmark/src/cli.ts +0 -240
  221. package/packages/benchmark/src/harbor.ts +0 -170
  222. package/packages/benchmark/src/index.ts +0 -73
  223. package/packages/benchmark/src/runners/base.ts +0 -205
  224. package/packages/benchmark/src/runners/index.ts +0 -10
  225. package/packages/benchmark/src/runners/single.ts +0 -121
  226. package/packages/benchmark/src/runners/subagent.ts +0 -240
  227. package/packages/benchmark/src/runners/swarm.ts +0 -326
  228. package/packages/benchmark/src/types.ts +0 -205
  229. package/packages/benchmark/tsconfig.json +0 -20
  230. package/packages/cli-tester/README.md +0 -277
  231. package/packages/cli-tester/dist/index.d.ts +0 -21
  232. package/packages/cli-tester/dist/index.d.ts.map +0 -1
  233. package/packages/cli-tester/dist/index.js +0 -21
  234. package/packages/cli-tester/dist/index.js.map +0 -1
  235. package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
  236. package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
  237. package/packages/cli-tester/dist/utils/credential-check.js +0 -230
  238. package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
  239. package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
  240. package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
  241. package/packages/cli-tester/dist/utils/socket-client.js +0 -153
  242. package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
  243. package/packages/cli-tester/docker/Dockerfile +0 -61
  244. package/packages/cli-tester/docker/docker-compose.yml +0 -71
  245. package/packages/cli-tester/docker/entrypoint.sh +0 -58
  246. package/packages/cli-tester/package.json +0 -32
  247. package/packages/cli-tester/scripts/clear-auth.sh +0 -101
  248. package/packages/cli-tester/scripts/inject-message.sh +0 -42
  249. package/packages/cli-tester/scripts/start.sh +0 -71
  250. package/packages/cli-tester/scripts/test-cli.sh +0 -56
  251. package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
  252. package/packages/cli-tester/scripts/test-registration.sh +0 -182
  253. package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
  254. package/packages/cli-tester/scripts/test-spawn.sh +0 -140
  255. package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
  256. package/packages/cli-tester/scripts/verify-auth.sh +0 -112
  257. package/packages/cli-tester/src/index.ts +0 -40
  258. package/packages/cli-tester/src/utils/credential-check.ts +0 -284
  259. package/packages/cli-tester/src/utils/socket-client.ts +0 -211
  260. package/packages/cli-tester/tests/credential-check.test.ts +0 -56
  261. package/packages/cli-tester/tsconfig.json +0 -11
  262. package/packages/sdk/dist/browser-client.d.ts +0 -212
  263. package/packages/sdk/dist/browser-client.d.ts.map +0 -1
  264. package/packages/sdk/dist/browser-client.js +0 -750
  265. package/packages/sdk/dist/browser-client.js.map +0 -1
  266. package/packages/sdk/dist/browser-framing.d.ts +0 -46
  267. package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
  268. package/packages/sdk/dist/browser-framing.js +0 -122
  269. package/packages/sdk/dist/browser-framing.js.map +0 -1
  270. package/packages/sdk/dist/standalone.d.ts +0 -89
  271. package/packages/sdk/dist/standalone.d.ts.map +0 -1
  272. package/packages/sdk/dist/standalone.js +0 -131
  273. package/packages/sdk/dist/standalone.js.map +0 -1
  274. package/packages/sdk/dist/transports/index.d.ts +0 -92
  275. package/packages/sdk/dist/transports/index.d.ts.map +0 -1
  276. package/packages/sdk/dist/transports/index.js +0 -129
  277. package/packages/sdk/dist/transports/index.js.map +0 -1
  278. package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
  279. package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
  280. package/packages/sdk/dist/transports/socket-transport.js +0 -94
  281. package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
  282. package/packages/sdk/dist/transports/types.d.ts +0 -69
  283. package/packages/sdk/dist/transports/types.d.ts.map +0 -1
  284. package/packages/sdk/dist/transports/types.js +0 -10
  285. package/packages/sdk/dist/transports/types.js.map +0 -1
  286. package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
  287. package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
  288. package/packages/sdk/dist/transports/websocket-transport.js +0 -180
  289. package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
  290. package/packages/sdk/src/browser-client.ts +0 -985
  291. package/packages/sdk/src/browser-framing.test.ts +0 -115
  292. package/packages/sdk/src/browser-framing.ts +0 -150
  293. package/packages/sdk/src/standalone.ts +0 -183
  294. package/packages/sdk/src/transports/index.ts +0 -197
  295. package/packages/sdk/src/transports/socket-transport.ts +0 -115
  296. package/packages/sdk/src/transports/types.ts +0 -77
  297. package/packages/sdk/src/transports/websocket-transport.ts +0 -245
@@ -1,19 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "lib": ["ES2022"],
7
- "outDir": "dist",
8
- "rootDir": "src",
9
- "strict": true,
10
- "esModuleInterop": true,
11
- "skipLibCheck": true,
12
- "forceConsistentCasingInFileNames": true,
13
- "declaration": true,
14
- "declarationMap": true,
15
- "sourceMap": true
16
- },
17
- "include": ["src/**/*"],
18
- "exclude": ["node_modules", "dist", "**/*.test.ts"]
19
- }
@@ -1,9 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- globals: true,
6
- environment: 'node',
7
- include: ['src/**/*.test.ts'],
8
- },
9
- });
@@ -1,200 +0,0 @@
1
- # @agent-relay/benchmark
2
-
3
- Performance benchmarking for comparing agent configurations: single agents, sub-agents (hierarchical), and swarms (peer-to-peer).
4
-
5
- ## Overview
6
-
7
- This package provides tools to measure and compare the performance of different agent configurations on the same tasks:
8
-
9
- | Configuration | Description | Communication |
10
- |---------------|-------------|---------------|
11
- | **Single** | One agent handles everything | None |
12
- | **Sub-agent** | Lead spawns and coordinates workers | Hierarchical (parent → child) |
13
- | **Swarm** | Peer agents coordinate as equals | Peer-to-peer via channels |
14
-
15
- ## Installation
16
-
17
- ```bash
18
- npm install @agent-relay/benchmark
19
- ```
20
-
21
- For standalone mode (in-process daemon):
22
- ```bash
23
- npm install @agent-relay/benchmark @agent-relay/daemon
24
- ```
25
-
26
- ## Quick Start
27
-
28
- ### Programmatic Usage
29
-
30
- ```typescript
31
- import { ComparisonBenchmark, type Task } from '@agent-relay/benchmark';
32
-
33
- const task: Task = {
34
- id: 'refactor-auth',
35
- description: 'Refactor authentication to use JWT',
36
- files: ['src/auth/session.ts', 'src/auth/middleware.ts'],
37
- expectedOutcome: 'All tests pass, JWT tokens used',
38
- complexity: 'medium',
39
- };
40
-
41
- const benchmark = new ComparisonBenchmark();
42
- const comparison = await benchmark.runComparison(task);
43
-
44
- console.log(`Winner: ${comparison.winner}`);
45
- benchmark.printComparison(comparison);
46
- ```
47
-
48
- ### CLI Usage
49
-
50
- ```bash
51
- # Run comparison on all configurations
52
- relay-benchmark run --dataset tasks.yaml --config all
53
-
54
- # Run specific configuration
55
- relay-benchmark run --dataset tasks.yaml --config swarm
56
-
57
- # List tasks in a dataset
58
- relay-benchmark list tasks.yaml
59
-
60
- # Output results to JSON
61
- relay-benchmark run --dataset tasks.yaml -o results.json
62
- ```
63
-
64
- ### Harbor Integration
65
-
66
- This package integrates with [Harbor](https://github.com/laude-institute/harbor) for large-scale agent evaluation:
67
-
68
- ```bash
69
- # Install Harbor
70
- pip install harbor-bench
71
-
72
- # Run benchmark via Harbor
73
- harbor run \
74
- --dataset tasks.yaml \
75
- --agent @agent-relay/benchmark/harbor \
76
- --parallel 10
77
-
78
- # Run at scale with cloud providers
79
- harbor run \
80
- --dataset tasks.yaml \
81
- --agent @agent-relay/benchmark/harbor \
82
- --env daytona \
83
- --parallel 100
84
- ```
85
-
86
- ## Task Dataset Format
87
-
88
- Tasks can be defined in YAML or JSON:
89
-
90
- ```yaml
91
- name: My Tasks
92
- description: Tasks for benchmarking
93
-
94
- tasks:
95
- - id: add-feature
96
- description: Add user preferences feature
97
- files:
98
- - src/models/preferences.ts
99
- - src/routes/preferences.ts
100
- - tests/preferences.test.ts
101
- expectedOutcome: Feature working, tests pass
102
- complexity: medium # low, medium, high
103
- timeoutMs: 300000 # optional, default 5 minutes
104
- tags: # optional
105
- - feature
106
- - api
107
- ```
108
-
109
- ## Metrics Collected
110
-
111
- | Metric | Description |
112
- |--------|-------------|
113
- | `totalTimeMs` | Total execution time |
114
- | `timeToFirstActionMs` | Time until first agent action |
115
- | `messageCount` | Inter-agent messages sent |
116
- | `avgLatencyMs` | Average message latency |
117
- | `latencyP50Ms` | 50th percentile latency |
118
- | `latencyP99Ms` | 99th percentile latency |
119
- | `coordinationRounds` | Communication rounds |
120
- | `agentCount` | Agents used |
121
- | `totalTokensUsed` | LLM tokens consumed |
122
- | `peakMemoryMb` | Peak memory usage |
123
- | `success` | Task completed successfully |
124
- | `completionRate` | Partial completion (0-1) |
125
-
126
- ## Scoring
127
-
128
- Results are scored on three components:
129
-
130
- - **Success (50 points)**: Task completion
131
- - **Time (30 points)**: Faster is better
132
- - **Efficiency (20 points)**: Fewer agents is better
133
-
134
- The configuration with the highest total score wins.
135
-
136
- ## Configuration
137
-
138
- ```typescript
139
- interface BenchmarkConfig {
140
- configurations: ConfigurationType[]; // ['single', 'subagent', 'swarm']
141
- cli: string; // CLI to use (default: 'claude')
142
- cwd?: string; // Working directory
143
- quiet: boolean; // Suppress output
144
- cooldownMs: number; // Delay between runs
145
- maxSwarmSize: number; // Max agents in swarm
146
- socketPath?: string; // Custom relay socket
147
- }
148
- ```
149
-
150
- ## Architecture
151
-
152
- ```
153
- ┌─────────────────────────────────────────────────────────────┐
154
- │ ComparisonBenchmark │
155
- │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
156
- │ │ Single │ │ SubAgent │ │ Swarm │ │
157
- │ │ Runner │ │ Runner │ │ Runner │ │
158
- │ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
159
- │ │ │ │ │
160
- │ └────────────────┼────────────────┘ │
161
- │ │ │
162
- │ ▼ │
163
- │ ┌───────────────────────┐ │
164
- │ │ @agent-relay/sdk │ │
165
- │ │ (standalone mode) │ │
166
- │ └───────────────────────┘ │
167
- └─────────────────────────────────────────────────────────────┘
168
- ```
169
-
170
- ## Example Output
171
-
172
- ```
173
- ============================================================
174
- COMPARISON RESULTS
175
- ============================================================
176
- Task: refactor-auth
177
- Winner: SUBAGENT
178
-
179
- +------------+--------+----------+-------+
180
- | Metric | Single | Subagent | Swarm |
181
- +------------+--------+----------+-------+
182
- | Success | ✓ | ✓ | ✓ |
183
- | Time (s) | 45.2 | 28.1 | 32.5 |
184
- | Agents | 1 | 3 | 3 |
185
- | Messages | 0 | 12 | 24 |
186
- | Completion | 100% | 100% | 100% |
187
- | Score | 65.3 | 78.2 | 71.8 |
188
- +------------+--------+----------+-------+
189
- ```
190
-
191
- ## Included Datasets
192
-
193
- The package includes example datasets in `datasets/`:
194
-
195
- - `coding-tasks.yaml` - Standard software engineering tasks
196
- - `coordination-tasks.yaml` - Tasks requiring multi-agent coordination
197
-
198
- ## License
199
-
200
- Apache-2.0
@@ -1,127 +0,0 @@
1
- # Coding Tasks Dataset
2
- # Standard coding tasks for comparing agent configurations
3
-
4
- name: Coding Tasks
5
- description: Common software engineering tasks for benchmarking agent configurations
6
- version: "1.0.0"
7
-
8
- tasks:
9
- # Low complexity tasks
10
- - id: add-logging
11
- description: Add structured logging to the API endpoints
12
- files:
13
- - src/routes/api.ts
14
- - src/utils/logger.ts
15
- expectedOutcome: All API endpoints log requests and responses with timestamps
16
- complexity: low
17
- tags:
18
- - logging
19
- - observability
20
-
21
- - id: fix-typos
22
- description: Fix typos and improve variable names in the codebase
23
- files:
24
- - src/utils/helpers.ts
25
- - src/components/Form.tsx
26
- expectedOutcome: All typos fixed, variable names follow conventions
27
- complexity: low
28
- tags:
29
- - refactoring
30
- - code-quality
31
-
32
- # Medium complexity tasks
33
- - id: add-rate-limiting
34
- description: Add rate limiting middleware to protect API endpoints
35
- files:
36
- - src/middleware/rateLimit.ts
37
- - src/routes/api.ts
38
- - src/config/limits.ts
39
- expectedOutcome: Rate limiting applied to all endpoints, configurable limits per route
40
- complexity: medium
41
- tags:
42
- - security
43
- - middleware
44
-
45
- - id: refactor-auth-jwt
46
- description: Refactor authentication from sessions to JWT tokens
47
- files:
48
- - src/auth/session.ts
49
- - src/auth/jwt.ts
50
- - src/middleware/auth.ts
51
- - src/routes/login.ts
52
- - tests/auth.test.ts
53
- expectedOutcome: JWT-based auth working, all tests pass, sessions removed
54
- complexity: medium
55
- tags:
56
- - auth
57
- - security
58
- - refactoring
59
-
60
- - id: add-caching-layer
61
- description: Add Redis caching to expensive database queries
62
- files:
63
- - src/services/cache.ts
64
- - src/repositories/user.ts
65
- - src/repositories/product.ts
66
- - src/config/redis.ts
67
- expectedOutcome: Caching implemented for user and product queries, cache invalidation working
68
- complexity: medium
69
- tags:
70
- - performance
71
- - caching
72
-
73
- # High complexity tasks
74
- - id: database-migration
75
- description: Migrate from PostgreSQL to MongoDB while maintaining API compatibility
76
- files:
77
- - src/db/postgres/connection.ts
78
- - src/db/mongo/connection.ts
79
- - src/models/user.ts
80
- - src/models/product.ts
81
- - src/models/order.ts
82
- - src/repositories/user.ts
83
- - src/repositories/product.ts
84
- - src/repositories/order.ts
85
- - scripts/migrate-data.ts
86
- - docker-compose.yml
87
- expectedOutcome: All data migrated, API unchanged, tests pass on MongoDB
88
- complexity: high
89
- tags:
90
- - database
91
- - migration
92
- - high-risk
93
-
94
- - id: implement-rbac
95
- description: Implement role-based access control across the application
96
- files:
97
- - src/auth/rbac.ts
98
- - src/auth/permissions.ts
99
- - src/middleware/authorize.ts
100
- - src/routes/admin.ts
101
- - src/routes/user.ts
102
- - src/models/role.ts
103
- - src/models/permission.ts
104
- - tests/rbac.test.ts
105
- expectedOutcome: RBAC fully implemented, admin routes protected, tests pass
106
- complexity: high
107
- tags:
108
- - security
109
- - auth
110
- - permissions
111
-
112
- - id: api-versioning
113
- description: Implement API versioning with backward compatibility
114
- files:
115
- - src/routes/v1/index.ts
116
- - src/routes/v2/index.ts
117
- - src/middleware/version.ts
118
- - src/transformers/v1-to-v2.ts
119
- - src/docs/api-v1.yaml
120
- - src/docs/api-v2.yaml
121
- - tests/versioning.test.ts
122
- expectedOutcome: V1 and V2 APIs working, automatic version negotiation, docs updated
123
- complexity: high
124
- tags:
125
- - api
126
- - versioning
127
- - backward-compatibility
@@ -1,122 +0,0 @@
1
- # Coordination Tasks Dataset
2
- # Tasks specifically designed to test multi-agent coordination
3
-
4
- name: Coordination Tasks
5
- description: Tasks that require significant inter-agent coordination and communication
6
- version: "1.0.0"
7
-
8
- tasks:
9
- # Tasks requiring parallel work
10
- - id: parallel-refactor
11
- description: Refactor 6 service files to use a new error handling pattern
12
- files:
13
- - src/services/userService.ts
14
- - src/services/orderService.ts
15
- - src/services/productService.ts
16
- - src/services/paymentService.ts
17
- - src/services/notificationService.ts
18
- - src/services/analyticsService.ts
19
- expectedOutcome: All services use new error handling, consistent pattern across all files
20
- complexity: medium
21
- tags:
22
- - parallel-work
23
- - refactoring
24
-
25
- - id: parallel-testing
26
- description: Write comprehensive unit tests for 4 independent modules
27
- files:
28
- - src/utils/validators.ts
29
- - src/utils/formatters.ts
30
- - src/utils/transformers.ts
31
- - src/utils/parsers.ts
32
- - tests/validators.test.ts
33
- - tests/formatters.test.ts
34
- - tests/transformers.test.ts
35
- - tests/parsers.test.ts
36
- expectedOutcome: 100% test coverage for all utility modules, all tests pass
37
- complexity: medium
38
- tags:
39
- - testing
40
- - parallel-work
41
-
42
- # Tasks requiring coordination
43
- - id: api-frontend-sync
44
- description: Add a new feature requiring both API endpoints and frontend components
45
- files:
46
- - src/api/notifications.ts
47
- - src/api/routes.ts
48
- - src/components/NotificationBell.tsx
49
- - src/components/NotificationList.tsx
50
- - src/hooks/useNotifications.ts
51
- - src/types/notification.ts
52
- expectedOutcome: Notifications API and UI working together, types shared correctly
53
- complexity: medium
54
- tags:
55
- - full-stack
56
- - coordination
57
-
58
- - id: schema-migration-chain
59
- description: Database schema change requiring coordinated updates across layers
60
- files:
61
- - migrations/add_user_preferences.sql
62
- - src/models/userPreferences.ts
63
- - src/repositories/userPreferences.ts
64
- - src/services/userService.ts
65
- - src/routes/user.ts
66
- - src/types/user.ts
67
- expectedOutcome: Schema migrated, all layers updated, API returns new fields
68
- complexity: high
69
- tags:
70
- - database
71
- - coordination
72
- - layered-architecture
73
-
74
- # Tasks with dependencies
75
- - id: dependency-chain
76
- description: Build a feature where each component depends on the previous
77
- files:
78
- - src/core/eventBus.ts
79
- - src/services/eventHandler.ts
80
- - src/workers/eventProcessor.ts
81
- - src/api/webhooks.ts
82
- - tests/integration/events.test.ts
83
- expectedOutcome: Event system working end-to-end, integration tests pass
84
- complexity: high
85
- tags:
86
- - dependencies
87
- - event-driven
88
-
89
- # Tasks requiring consensus
90
- - id: design-review
91
- description: Review and improve API design across multiple endpoints
92
- files:
93
- - src/api/users.ts
94
- - src/api/products.ts
95
- - src/api/orders.ts
96
- - src/api/payments.ts
97
- - docs/api-design.md
98
- expectedOutcome: Consistent API design patterns, documentation updated
99
- complexity: medium
100
- tags:
101
- - review
102
- - consensus
103
- - documentation
104
-
105
- # Large-scale coordination
106
- - id: monorepo-sync
107
- description: Update shared types across multiple packages in a monorepo
108
- files:
109
- - packages/shared/types/user.ts
110
- - packages/shared/types/product.ts
111
- - packages/api/src/routes/user.ts
112
- - packages/api/src/routes/product.ts
113
- - packages/web/src/api/user.ts
114
- - packages/web/src/api/product.ts
115
- - packages/mobile/src/api/user.ts
116
- - packages/mobile/src/api/product.ts
117
- expectedOutcome: Types consistent across all packages, no type errors
118
- complexity: high
119
- tags:
120
- - monorepo
121
- - types
122
- - coordination
@@ -1,20 +0,0 @@
1
- name: Quick Test
2
- description: Minimal benchmark for testing swarm vs single agent
3
- version: "1.0.0"
4
-
5
- tasks:
6
- - id: count-files
7
- description: Count TypeScript files in packages/benchmark/src and list their exports
8
- files:
9
- - packages/benchmark/src
10
- expectedOutcome: Accurate count and export list
11
- complexity: low
12
- timeoutMs: 60000
13
-
14
- - id: summarize-types
15
- description: Read packages/benchmark/src/types.ts and summarize the main interfaces
16
- files:
17
- - packages/benchmark/src/types.ts
18
- expectedOutcome: Clear summary of Task, RunResult, and ComparisonResult interfaces
19
- complexity: low
20
- timeoutMs: 60000
@@ -1,47 +0,0 @@
1
- /**
2
- * Comparison Benchmark
3
- *
4
- * Main orchestrator for running comparison benchmarks across configurations.
5
- */
6
- import type { ConfigurationType, Task, RunResult, ComparisonResult, BenchmarkConfig } from './types.js';
7
- /**
8
- * Main benchmark orchestrator
9
- */
10
- export declare class ComparisonBenchmark {
11
- private config;
12
- private runners;
13
- constructor(config?: Partial<BenchmarkConfig>);
14
- /**
15
- * Create a runner for a configuration type
16
- */
17
- private createRunner;
18
- /**
19
- * Run a comparison across all configured configurations
20
- */
21
- runComparison(task: Task): Promise<ComparisonResult>;
22
- /**
23
- * Run a single configuration
24
- */
25
- runSingle(task: Task, configType: ConfigurationType): Promise<RunResult>;
26
- /**
27
- * Calculate score breakdown for a result
28
- */
29
- private calculateScore;
30
- /**
31
- * Determine the winning configuration
32
- */
33
- private determineWinner;
34
- /**
35
- * Print a single run result
36
- */
37
- private printRunResult;
38
- /**
39
- * Print comparison table
40
- */
41
- printComparison(comparison: ComparisonResult): void;
42
- }
43
- /**
44
- * Quick helper to run a comparison benchmark
45
- */
46
- export declare function runComparison(task: Task, config?: Partial<BenchmarkConfig>): Promise<ComparisonResult>;
47
- //# sourceMappingURL=benchmark.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,iBAAiB,EACjB,IAAI,EACJ,SAAS,EACT,gBAAgB,EAEhB,eAAe,EAChB,MAAM,YAAY,CAAC;AASpB;;GAEG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,OAAO,CAA8C;gBAEjD,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAUjD;;OAEG;IACH,OAAO,CAAC,YAAY;IAapB;;OAEG;IACG,aAAa,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAgE1D;;OAEG;IACG,SAAS,CACb,IAAI,EAAE,IAAI,EACV,UAAU,EAAE,iBAAiB,GAC5B,OAAO,CAAC,SAAS,CAAC;IAarB;;OAEG;IACH,OAAO,CAAC,cAAc;IAwBtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAiBvB;;OAEG;IACH,OAAO,CAAC,cAAc;IAWtB;;OAEG;IACH,eAAe,CAAC,UAAU,EAAE,gBAAgB,GAAG,IAAI;CAiFpD;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,IAAI,EAAE,IAAI,EACV,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAChC,OAAO,CAAC,gBAAgB,CAAC,CAG3B"}