agent-relay 2.3.4 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. package/README.md +1 -1
  2. package/dist/src/cli/index.js +124 -7
  3. package/dist/src/cli/index.js.map +1 -1
  4. package/package.json +20 -26
  5. package/packages/acp-bridge/package.json +2 -2
  6. package/packages/bridge/package.json +7 -7
  7. package/packages/config/package.json +2 -2
  8. package/packages/continuity/package.json +2 -2
  9. package/packages/daemon/package.json +12 -12
  10. package/packages/hooks/package.json +4 -4
  11. package/packages/mcp/package.json +5 -5
  12. package/packages/memory/package.json +2 -2
  13. package/packages/policy/package.json +2 -2
  14. package/packages/protocol/package.json +1 -1
  15. package/packages/resiliency/package.json +1 -1
  16. package/packages/sdk/dist/index.d.ts +1 -29
  17. package/packages/sdk/dist/index.d.ts.map +1 -1
  18. package/packages/sdk/dist/index.js +1 -38
  19. package/packages/sdk/dist/index.js.map +1 -1
  20. package/packages/sdk/package.json +4 -25
  21. package/packages/sdk/src/index.ts +1 -69
  22. package/packages/sdk-py/README.md +56 -0
  23. package/packages/sdk-py/pyproject.toml +23 -0
  24. package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
  25. package/packages/sdk-py/src/agent_relay/builder.py +367 -0
  26. package/packages/sdk-py/src/agent_relay/types.py +92 -0
  27. package/packages/sdk-py/tests/__init__.py +0 -0
  28. package/packages/sdk-py/tests/test_builder.py +101 -0
  29. package/packages/sdk-ts/dist/index.d.ts +1 -0
  30. package/packages/sdk-ts/dist/index.d.ts.map +1 -1
  31. package/packages/sdk-ts/dist/index.js +1 -0
  32. package/packages/sdk-ts/dist/index.js.map +1 -1
  33. package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
  34. package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
  35. package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
  36. package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
  37. package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
  38. package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
  39. package/packages/sdk-ts/dist/workflows/builder.js +179 -0
  40. package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
  41. package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
  42. package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
  43. package/packages/sdk-ts/dist/workflows/cli.js +82 -0
  44. package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
  45. package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
  46. package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
  47. package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
  48. package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
  49. package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
  50. package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
  51. package/packages/sdk-ts/dist/workflows/index.js +10 -0
  52. package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
  53. package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
  54. package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
  55. package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
  56. package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
  57. package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
  58. package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
  59. package/packages/sdk-ts/dist/workflows/run.js +24 -0
  60. package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
  61. package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
  62. package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
  63. package/packages/sdk-ts/dist/workflows/runner.js +650 -0
  64. package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
  65. package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
  66. package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
  67. package/packages/sdk-ts/dist/workflows/state.js +140 -0
  68. package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
  69. package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
  70. package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
  71. package/packages/sdk-ts/dist/workflows/templates.js +395 -0
  72. package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
  73. package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
  74. package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
  75. package/packages/sdk-ts/dist/workflows/types.js +8 -0
  76. package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
  77. package/packages/sdk-ts/package.json +8 -2
  78. package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
  79. package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
  80. package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
  81. package/packages/sdk-ts/src/index.ts +1 -0
  82. package/packages/sdk-ts/src/workflows/README.md +450 -0
  83. package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
  84. package/packages/sdk-ts/src/workflows/builder.ts +241 -0
  85. package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
  86. package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
  87. package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
  88. package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
  89. package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
  90. package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
  91. package/packages/sdk-ts/src/workflows/cli.ts +93 -0
  92. package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
  93. package/packages/sdk-ts/src/workflows/index.ts +9 -0
  94. package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
  95. package/packages/sdk-ts/src/workflows/run.ts +47 -0
  96. package/packages/sdk-ts/src/workflows/runner.ts +873 -0
  97. package/packages/sdk-ts/src/workflows/schema.json +321 -0
  98. package/packages/sdk-ts/src/workflows/state.ts +279 -0
  99. package/packages/sdk-ts/src/workflows/templates.ts +544 -0
  100. package/packages/sdk-ts/src/workflows/types.ts +178 -0
  101. package/packages/sdk-ts/tsconfig.json +6 -1
  102. package/packages/spawner/package.json +1 -1
  103. package/packages/state/package.json +1 -1
  104. package/packages/storage/package.json +2 -2
  105. package/packages/telemetry/package.json +1 -1
  106. package/packages/trajectory/package.json +2 -2
  107. package/packages/user-directory/package.json +2 -2
  108. package/packages/utils/package.json +3 -3
  109. package/packages/wrapper/package.json +5 -6
  110. package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
  111. package/packages/api-types/.trajectories/index.json +0 -12
  112. package/packages/api-types/dist/index.d.ts +0 -21
  113. package/packages/api-types/dist/index.d.ts.map +0 -1
  114. package/packages/api-types/dist/index.js +0 -22
  115. package/packages/api-types/dist/index.js.map +0 -1
  116. package/packages/api-types/dist/schemas/agent.d.ts +0 -259
  117. package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
  118. package/packages/api-types/dist/schemas/agent.js +0 -102
  119. package/packages/api-types/dist/schemas/agent.js.map +0 -1
  120. package/packages/api-types/dist/schemas/api.d.ts +0 -290
  121. package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
  122. package/packages/api-types/dist/schemas/api.js +0 -162
  123. package/packages/api-types/dist/schemas/api.js.map +0 -1
  124. package/packages/api-types/dist/schemas/decision.d.ts +0 -230
  125. package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
  126. package/packages/api-types/dist/schemas/decision.js +0 -104
  127. package/packages/api-types/dist/schemas/decision.js.map +0 -1
  128. package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
  129. package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
  130. package/packages/api-types/dist/schemas/fleet.js +0 -71
  131. package/packages/api-types/dist/schemas/fleet.js.map +0 -1
  132. package/packages/api-types/dist/schemas/history.d.ts +0 -180
  133. package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
  134. package/packages/api-types/dist/schemas/history.js +0 -72
  135. package/packages/api-types/dist/schemas/history.js.map +0 -1
  136. package/packages/api-types/dist/schemas/index.d.ts +0 -14
  137. package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
  138. package/packages/api-types/dist/schemas/index.js +0 -22
  139. package/packages/api-types/dist/schemas/index.js.map +0 -1
  140. package/packages/api-types/dist/schemas/message.d.ts +0 -456
  141. package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
  142. package/packages/api-types/dist/schemas/message.js +0 -88
  143. package/packages/api-types/dist/schemas/message.js.map +0 -1
  144. package/packages/api-types/dist/schemas/session.d.ts +0 -60
  145. package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
  146. package/packages/api-types/dist/schemas/session.js +0 -36
  147. package/packages/api-types/dist/schemas/session.js.map +0 -1
  148. package/packages/api-types/dist/schemas/task.d.ts +0 -111
  149. package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
  150. package/packages/api-types/dist/schemas/task.js +0 -64
  151. package/packages/api-types/dist/schemas/task.js.map +0 -1
  152. package/packages/api-types/package.json +0 -61
  153. package/packages/api-types/scripts/generate-openapi.ts +0 -106
  154. package/packages/api-types/src/index.ts +0 -22
  155. package/packages/api-types/src/schemas/agent.test.ts +0 -164
  156. package/packages/api-types/src/schemas/agent.ts +0 -110
  157. package/packages/api-types/src/schemas/api.test.ts +0 -372
  158. package/packages/api-types/src/schemas/api.ts +0 -194
  159. package/packages/api-types/src/schemas/decision.test.ts +0 -324
  160. package/packages/api-types/src/schemas/decision.ts +0 -136
  161. package/packages/api-types/src/schemas/fleet.test.ts +0 -212
  162. package/packages/api-types/src/schemas/fleet.ts +0 -83
  163. package/packages/api-types/src/schemas/history.test.ts +0 -242
  164. package/packages/api-types/src/schemas/history.ts +0 -84
  165. package/packages/api-types/src/schemas/index.ts +0 -148
  166. package/packages/api-types/src/schemas/message.test.ts +0 -192
  167. package/packages/api-types/src/schemas/message.ts +0 -98
  168. package/packages/api-types/src/schemas/session.test.ts +0 -104
  169. package/packages/api-types/src/schemas/session.ts +0 -40
  170. package/packages/api-types/src/schemas/task.test.ts +0 -192
  171. package/packages/api-types/src/schemas/task.ts +0 -78
  172. package/packages/api-types/tsconfig.json +0 -19
  173. package/packages/api-types/vitest.config.ts +0 -9
  174. package/packages/benchmark/README.md +0 -200
  175. package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
  176. package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
  177. package/packages/benchmark/datasets/quick-test.yaml +0 -20
  178. package/packages/benchmark/dist/benchmark.d.ts +0 -47
  179. package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
  180. package/packages/benchmark/dist/benchmark.js +0 -224
  181. package/packages/benchmark/dist/benchmark.js.map +0 -1
  182. package/packages/benchmark/dist/cli.d.ts +0 -8
  183. package/packages/benchmark/dist/cli.d.ts.map +0 -1
  184. package/packages/benchmark/dist/cli.js +0 -185
  185. package/packages/benchmark/dist/cli.js.map +0 -1
  186. package/packages/benchmark/dist/harbor.d.ts +0 -53
  187. package/packages/benchmark/dist/harbor.d.ts.map +0 -1
  188. package/packages/benchmark/dist/harbor.js +0 -127
  189. package/packages/benchmark/dist/harbor.js.map +0 -1
  190. package/packages/benchmark/dist/index.d.ts +0 -48
  191. package/packages/benchmark/dist/index.d.ts.map +0 -1
  192. package/packages/benchmark/dist/index.js +0 -50
  193. package/packages/benchmark/dist/index.js.map +0 -1
  194. package/packages/benchmark/dist/runners/base.d.ts +0 -63
  195. package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
  196. package/packages/benchmark/dist/runners/base.js +0 -156
  197. package/packages/benchmark/dist/runners/base.js.map +0 -1
  198. package/packages/benchmark/dist/runners/index.d.ts +0 -10
  199. package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
  200. package/packages/benchmark/dist/runners/index.js +0 -10
  201. package/packages/benchmark/dist/runners/index.js.map +0 -1
  202. package/packages/benchmark/dist/runners/single.d.ts +0 -19
  203. package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
  204. package/packages/benchmark/dist/runners/single.js +0 -111
  205. package/packages/benchmark/dist/runners/single.js.map +0 -1
  206. package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
  207. package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
  208. package/packages/benchmark/dist/runners/subagent.js +0 -212
  209. package/packages/benchmark/dist/runners/subagent.js.map +0 -1
  210. package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
  211. package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
  212. package/packages/benchmark/dist/runners/swarm.js +0 -273
  213. package/packages/benchmark/dist/runners/swarm.js.map +0 -1
  214. package/packages/benchmark/dist/types.d.ts +0 -178
  215. package/packages/benchmark/dist/types.d.ts.map +0 -1
  216. package/packages/benchmark/dist/types.js +0 -16
  217. package/packages/benchmark/dist/types.js.map +0 -1
  218. package/packages/benchmark/package.json +0 -80
  219. package/packages/benchmark/src/benchmark.ts +0 -298
  220. package/packages/benchmark/src/cli.ts +0 -240
  221. package/packages/benchmark/src/harbor.ts +0 -170
  222. package/packages/benchmark/src/index.ts +0 -73
  223. package/packages/benchmark/src/runners/base.ts +0 -205
  224. package/packages/benchmark/src/runners/index.ts +0 -10
  225. package/packages/benchmark/src/runners/single.ts +0 -121
  226. package/packages/benchmark/src/runners/subagent.ts +0 -240
  227. package/packages/benchmark/src/runners/swarm.ts +0 -326
  228. package/packages/benchmark/src/types.ts +0 -205
  229. package/packages/benchmark/tsconfig.json +0 -20
  230. package/packages/cli-tester/README.md +0 -277
  231. package/packages/cli-tester/dist/index.d.ts +0 -21
  232. package/packages/cli-tester/dist/index.d.ts.map +0 -1
  233. package/packages/cli-tester/dist/index.js +0 -21
  234. package/packages/cli-tester/dist/index.js.map +0 -1
  235. package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
  236. package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
  237. package/packages/cli-tester/dist/utils/credential-check.js +0 -230
  238. package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
  239. package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
  240. package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
  241. package/packages/cli-tester/dist/utils/socket-client.js +0 -153
  242. package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
  243. package/packages/cli-tester/docker/Dockerfile +0 -61
  244. package/packages/cli-tester/docker/docker-compose.yml +0 -71
  245. package/packages/cli-tester/docker/entrypoint.sh +0 -58
  246. package/packages/cli-tester/package.json +0 -32
  247. package/packages/cli-tester/scripts/clear-auth.sh +0 -101
  248. package/packages/cli-tester/scripts/inject-message.sh +0 -42
  249. package/packages/cli-tester/scripts/start.sh +0 -71
  250. package/packages/cli-tester/scripts/test-cli.sh +0 -56
  251. package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
  252. package/packages/cli-tester/scripts/test-registration.sh +0 -182
  253. package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
  254. package/packages/cli-tester/scripts/test-spawn.sh +0 -140
  255. package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
  256. package/packages/cli-tester/scripts/verify-auth.sh +0 -112
  257. package/packages/cli-tester/src/index.ts +0 -40
  258. package/packages/cli-tester/src/utils/credential-check.ts +0 -284
  259. package/packages/cli-tester/src/utils/socket-client.ts +0 -211
  260. package/packages/cli-tester/tests/credential-check.test.ts +0 -56
  261. package/packages/cli-tester/tsconfig.json +0 -11
  262. package/packages/sdk/dist/browser-client.d.ts +0 -212
  263. package/packages/sdk/dist/browser-client.d.ts.map +0 -1
  264. package/packages/sdk/dist/browser-client.js +0 -750
  265. package/packages/sdk/dist/browser-client.js.map +0 -1
  266. package/packages/sdk/dist/browser-framing.d.ts +0 -46
  267. package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
  268. package/packages/sdk/dist/browser-framing.js +0 -122
  269. package/packages/sdk/dist/browser-framing.js.map +0 -1
  270. package/packages/sdk/dist/standalone.d.ts +0 -89
  271. package/packages/sdk/dist/standalone.d.ts.map +0 -1
  272. package/packages/sdk/dist/standalone.js +0 -131
  273. package/packages/sdk/dist/standalone.js.map +0 -1
  274. package/packages/sdk/dist/transports/index.d.ts +0 -92
  275. package/packages/sdk/dist/transports/index.d.ts.map +0 -1
  276. package/packages/sdk/dist/transports/index.js +0 -129
  277. package/packages/sdk/dist/transports/index.js.map +0 -1
  278. package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
  279. package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
  280. package/packages/sdk/dist/transports/socket-transport.js +0 -94
  281. package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
  282. package/packages/sdk/dist/transports/types.d.ts +0 -69
  283. package/packages/sdk/dist/transports/types.d.ts.map +0 -1
  284. package/packages/sdk/dist/transports/types.js +0 -10
  285. package/packages/sdk/dist/transports/types.js.map +0 -1
  286. package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
  287. package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
  288. package/packages/sdk/dist/transports/websocket-transport.js +0 -180
  289. package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
  290. package/packages/sdk/src/browser-client.ts +0 -985
  291. package/packages/sdk/src/browser-framing.test.ts +0 -115
  292. package/packages/sdk/src/browser-framing.ts +0 -150
  293. package/packages/sdk/src/standalone.ts +0 -183
  294. package/packages/sdk/src/transports/index.ts +0 -197
  295. package/packages/sdk/src/transports/socket-transport.ts +0 -115
  296. package/packages/sdk/src/transports/types.ts +0 -77
  297. package/packages/sdk/src/transports/websocket-transport.ts +0 -245
@@ -1,205 +0,0 @@
1
- /**
2
- * Benchmark Types
3
- *
4
- * Type definitions for the agent swarm performance benchmark system.
5
- */
6
-
7
- /**
8
- * Configuration type for benchmark runs
9
- */
10
- export type ConfigurationType = 'single' | 'subagent' | 'swarm';
11
-
12
- /**
13
- * Task complexity level
14
- */
15
- export type TaskComplexity = 'low' | 'medium' | 'high';
16
-
17
- /**
18
- * A benchmark task definition
19
- */
20
- export interface Task {
21
- /** Unique task identifier */
22
- id: string;
23
- /** Human-readable task description */
24
- description: string;
25
- /** Files the task operates on */
26
- files: string[];
27
- /** Success criteria for the task */
28
- expectedOutcome: string;
29
- /** Optional Harbor-style success criteria key for compatibility */
30
- success_criteria?: string;
31
- /** Task complexity level */
32
- complexity: TaskComplexity;
33
- /** Optional timeout in milliseconds (default: 300000 = 5 min) */
34
- timeoutMs?: number;
35
- /** Optional tags for categorization */
36
- tags?: string[];
37
- }
38
-
39
- /**
40
- * Result of a single benchmark run
41
- */
42
- export interface RunResult {
43
- /** Task identifier */
44
- taskId: string;
45
- /** Configuration used for this run */
46
- configuration: ConfigurationType;
47
-
48
- // Performance metrics
49
- /** Total time from start to completion in milliseconds */
50
- totalTimeMs: number;
51
- /** Time to first agent action in milliseconds */
52
- timeToFirstActionMs: number;
53
-
54
- // Communication metrics (multi-agent only)
55
- /** Total number of inter-agent messages */
56
- messageCount: number;
57
- /** Average message latency in milliseconds */
58
- avgLatencyMs: number;
59
- /** P50 latency in milliseconds */
60
- latencyP50Ms: number;
61
- /** P99 latency in milliseconds */
62
- latencyP99Ms: number;
63
- /** Number of coordination rounds */
64
- coordinationRounds: number;
65
-
66
- // Resource metrics
67
- /** Number of agents used */
68
- agentCount: number;
69
- /** Total tokens consumed (if available) */
70
- totalTokensUsed: number;
71
- /** Peak memory usage in MB */
72
- peakMemoryMb: number;
73
-
74
- // Outcome metrics
75
- /** Whether the task completed successfully */
76
- success: boolean;
77
- /** Completion rate (0-1) for partial success */
78
- completionRate: number;
79
- /** Error messages if any */
80
- errors: string[];
81
-
82
- // Metadata
83
- /** Timestamp when the run started */
84
- startedAt: number;
85
- /** Timestamp when the run completed */
86
- completedAt: number;
87
- }
88
-
89
- /**
90
- * Comparison result across all configurations
91
- */
92
- export interface ComparisonResult {
93
- /** Task identifier */
94
- taskId: string;
95
- /** Results for each configuration */
96
- results: Map<ConfigurationType, RunResult>;
97
- /** The winning configuration based on scoring */
98
- winner: ConfigurationType;
99
- /** Score breakdown for each configuration */
100
- scores: Map<ConfigurationType, ScoreBreakdown>;
101
- }
102
-
103
- /**
104
- * Score breakdown for a configuration
105
- */
106
- export interface ScoreBreakdown {
107
- /** Total score (0-100) */
108
- total: number;
109
- /** Success component (0-50) */
110
- successScore: number;
111
- /** Time efficiency component (0-30) */
112
- timeScore: number;
113
- /** Resource efficiency component (0-20) */
114
- efficiencyScore: number;
115
- }
116
-
117
- /**
118
- * Benchmark configuration options
119
- */
120
- export interface BenchmarkConfig {
121
- /** Which configurations to run */
122
- configurations: ConfigurationType[];
123
- /** CLI to use for agents (default: 'claude') */
124
- cli: string;
125
- /** Working directory for tasks */
126
- cwd?: string;
127
- /** Suppress console output */
128
- quiet: boolean;
129
- /** Cool-down time between runs in milliseconds */
130
- cooldownMs: number;
131
- /** Maximum concurrent agents for swarm */
132
- maxSwarmSize: number;
133
- /** Custom socket path for relay */
134
- socketPath?: string;
135
- }
136
-
137
- /**
138
- * Default benchmark configuration
139
- */
140
- export const DEFAULT_BENCHMARK_CONFIG: BenchmarkConfig = {
141
- configurations: ['single', 'subagent', 'swarm'],
142
- cli: 'claude',
143
- quiet: false,
144
- cooldownMs: 5000,
145
- maxSwarmSize: 10,
146
- };
147
-
148
- /**
149
- * Metrics collected during a run
150
- */
151
- export interface RunMetrics {
152
- /** Number of messages sent */
153
- messages: number;
154
- /** Message latencies in milliseconds */
155
- latencies: number[];
156
- /** Run start timestamp */
157
- startTime: number;
158
- /** Spawned agent names */
159
- spawnedAgents: string[];
160
- /** Error events */
161
- errors: string[];
162
- }
163
-
164
- /**
165
- * Task dataset definition
166
- */
167
- export interface TaskDataset {
168
- /** Dataset name */
169
- name: string;
170
- /** Dataset description */
171
- description?: string;
172
- /** Version identifier */
173
- version?: string;
174
- /** Tasks in the dataset */
175
- tasks: Task[];
176
- }
177
-
178
- /**
179
- * Harbor-compatible evaluation input
180
- */
181
- export interface HarborTaskInput {
182
- id: string;
183
- description: string;
184
- files?: string[];
185
- success_criteria?: string;
186
- complexity?: TaskComplexity;
187
- agents_required?: number;
188
- [key: string]: unknown;
189
- }
190
-
191
- /**
192
- * Harbor-compatible evaluation output
193
- */
194
- export interface HarborEvaluationOutput {
195
- task_id: string;
196
- configurations: Record<ConfigurationType, RunResult>;
197
- winner: ConfigurationType;
198
- scores: Record<ConfigurationType, ScoreBreakdown>;
199
- metadata: {
200
- benchmark_version: string;
201
- started_at: number;
202
- completed_at: number;
203
- total_duration_ms: number;
204
- };
205
- }
@@ -1,20 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "lib": ["ES2022"],
7
- "outDir": "./dist",
8
- "rootDir": "./src",
9
- "strict": true,
10
- "esModuleInterop": true,
11
- "skipLibCheck": true,
12
- "forceConsistentCasingInFileNames": true,
13
- "declaration": true,
14
- "declarationMap": true,
15
- "sourceMap": true,
16
- "resolveJsonModule": true
17
- },
18
- "include": ["src/**/*"],
19
- "exclude": ["node_modules", "dist", "**/*.test.ts"]
20
- }
@@ -1,277 +0,0 @@
1
- # @agent-relay/cli-tester
2
-
3
- Manual interactive testing environment for CLI authentication flows.
4
-
5
- ## Purpose
6
-
7
- This package provides a Docker-based environment for testing CLI authentication with real OAuth providers. It's designed for:
8
-
9
- - **Debugging auth issues** - Isolate problems with specific CLIs (e.g., "Cursor doesn't work")
10
- - **Testing auth flows** - Verify OAuth flows work end-to-end
11
- - **Message injection** - Test relay-pty message delivery
12
- - **Credential verification** - Check that credentials are saved correctly
13
-
14
- ## Quick Start
15
-
16
- From the relay repo root:
17
-
18
- ```bash
19
- # Start the test environment (drops into container shell)
20
- npm run cli-tester:start
21
-
22
- # Start with clean credentials (removes any cached auth)
23
- npm run cli-tester:start:clean
24
-
25
- # Start with daemon for full integration testing
26
- npm run cli-tester:start:daemon
27
- ```
28
-
29
- ## Inside the Container
30
-
31
- ### Test a CLI
32
-
33
- ```bash
34
- # Test Claude CLI with relay-pty
35
- ./scripts/test-cli.sh claude
36
-
37
- # Test Codex with device auth
38
- ./scripts/test-cli.sh codex --device-auth
39
-
40
- # Test with debug output
41
- DEBUG=1 ./scripts/test-cli.sh cursor
42
- ```
43
-
44
- ### Verify Credentials
45
-
46
- ```bash
47
- # Check if credentials exist (after authenticating)
48
- ./scripts/verify-auth.sh claude
49
- ./scripts/verify-auth.sh codex
50
- ./scripts/verify-auth.sh gemini
51
- ```
52
-
53
- ### Inject Messages
54
-
55
- In a second terminal (while CLI is running):
56
-
57
- ```bash
58
- # Send a message via relay-pty socket
59
- ./scripts/inject-message.sh test-claude "What is 2+2?"
60
- ```
61
-
62
- ### Clear Credentials
63
-
64
- ```bash
65
- # Clear credentials for fresh testing
66
- ./scripts/clear-auth.sh claude
67
- ./scripts/clear-auth.sh all # Clear all CLIs
68
- ```
69
-
70
- ## Advanced: Testing Spawn Flow
71
-
72
- The simple `test-cli.sh` tests the CLI in isolation. For debugging issues where the CLI works in isolation but fails when spawned via the application (e.g., registration timeout), use these advanced tests:
73
-
74
- ### Test Spawn Behavior
75
-
76
- Simulates what `AgentSpawner.spawn()` does, including CLI-specific flags:
77
-
78
- ```bash
79
- # Test with same flags as spawner (--force for cursor, --dangerously-skip-permissions for claude)
80
- ./scripts/test-spawn.sh cursor
81
-
82
- # Test in interactive mode (without auto-accept flags)
83
- ./scripts/test-spawn.sh cursor --interactive
84
-
85
- # With verbose debug output
86
- DEBUG_SPAWN=1 ./scripts/test-spawn.sh cursor
87
- ```
88
-
89
- ### Test Registration Flow
90
-
91
- Monitors the registration files that the spawner polls. This is the step that times out:
92
-
93
- ```bash
94
- # Watch registration with 60 second timeout
95
- ./scripts/test-registration.sh cursor 60
96
-
97
- # With debug output
98
- DEBUG_SPAWN=1 ./scripts/test-registration.sh cursor
99
- ```
100
-
101
- ### Full Daemon Integration Test
102
-
103
- Starts a real daemon and tests the complete flow:
104
-
105
- ```bash
106
- # Full end-to-end test with daemon
107
- ./scripts/test-with-daemon.sh cursor
108
-
109
- # With debug output
110
- DEBUG=1 ./scripts/test-with-daemon.sh cursor
111
- ```
112
-
113
- **Note:** Requires the daemon to be built: `cd packages/daemon && npm run build`
114
-
115
- ## Debugging a Broken CLI
116
-
117
- When a CLI isn't working, use this workflow:
118
-
119
- ```bash
120
- # 1. Start fresh
121
- npm run cli-tester:start:clean
122
-
123
- # 2. Test the problematic CLI
124
- ./scripts/test-cli.sh cursor
125
-
126
- # 3. Observe the output for:
127
- # - Auth URLs being printed
128
- # - Error messages
129
- # - Prompt patterns
130
-
131
- # 4. Check credentials
132
- ./scripts/verify-auth.sh cursor
133
- ls -la ~/.cursor/
134
-
135
- # 5. Compare with a working CLI
136
- ./scripts/test-cli.sh claude
137
- ```
138
-
139
- ## Debugging Registration Timeout
140
-
141
- If a CLI works in isolation but times out when spawned ("Agent registration timeout"), the issue is in the daemon registration flow.
142
-
143
- ### Quick Test (Run This First)
144
-
145
- ```bash
146
- # Test the EXACT setup flow - this is what TerminalProviderSetup.tsx does
147
- DEBUG=1 ./scripts/test-full-spawn.sh cursor true
148
- ```
149
-
150
- This simulates:
151
- - `interactive: true` (no --force flag, like setup terminal)
152
- - 30 second registration timeout
153
- - Verbose logging of what's happening
154
-
155
- ### Understanding the Flow
156
-
157
- **Normal spawn (non-interactive):**
158
- ```bash
159
- ./scripts/test-full-spawn.sh cursor # Has --force flag
160
- ```
161
-
162
- **Setup terminal (interactive):**
163
- ```bash
164
- ./scripts/test-full-spawn.sh cursor true # NO --force flag
165
- ```
166
-
167
- The key difference is `interactive: true` **skips auto-accept flags**. Setup terminals expect the user to respond to prompts in the browser terminal.
168
-
169
- ### What the Tests Show
170
-
171
- 1. **test-full-spawn.sh** - Simulates spawner's 30s registration timeout
172
- - Shows poll count (like spawner logs)
173
- - Shows socket status
174
- - Captures CLI output to log file
175
- - Tells you exactly where things fail
176
-
177
- 2. **test-setup-flow.sh** - Identical to what TerminalProviderSetup.tsx does
178
- - Uses `__setup__cursor-xxx` naming
179
- - No CLI flags (interactive mode)
180
-
181
- ### Debugging Steps
182
-
183
- ```bash
184
- # 1. Test in isolation (verify CLI starts)
185
- ./scripts/test-cli.sh cursor
186
-
187
- # 2. Test NON-INTERACTIVE spawn (with --force)
188
- DEBUG=1 ./scripts/test-full-spawn.sh cursor
189
-
190
- # 3. Test INTERACTIVE spawn (setup terminal flow)
191
- DEBUG=1 ./scripts/test-full-spawn.sh cursor true
192
-
193
- # 4. Watch the log file in another terminal
194
- tail -f /tmp/relay-spawn-*.log
195
- ```
196
-
197
- ### Common Causes
198
-
199
- | Symptom | Cause | Fix |
200
- |---------|-------|-----|
201
- | CLI exits immediately | Not installed or crash | Check `which agent` |
202
- | Socket never created | CLI stuck on early prompt | Check log for prompts |
203
- | 30s timeout | CLI waiting for user input | Respond to prompts (trust, etc.) |
204
- | 30s timeout | No daemon to register with | Run with daemon profile |
205
-
206
- ### The Registration Flow
207
-
208
- The spawner waits for TWO conditions:
209
- 1. Agent in `connected-agents.json` (daemon updates this when CLI connects)
210
- 2. Agent in `agents.json` (relay-pty hook updates this)
211
-
212
- Without a running daemon, both files are empty → timeout.
213
-
214
- ## Available CLIs
215
-
216
- The container includes these pre-installed CLIs:
217
-
218
- | CLI | Command | Auth Command | Credential Path |
219
- |-----|---------|--------------|-----------------|
220
- | Claude | `claude` | (auto) | `~/.claude/.credentials.json` |
221
- | Codex | `codex` | `login` | `~/.codex/auth.json` |
222
- | Gemini | `gemini` | (auto) | `~/.gemini/credentials.json` |
223
- | Cursor | `agent` | (auto) | `~/.cursor/auth.json` |
224
- | OpenCode | `opencode` | `auth login` | `~/.local/share/opencode/auth.json` |
225
- | Droid | `droid` | `--login` | `~/.droid/auth.json` |
226
- | Copilot | `copilot` | `auth login` | `~/.config/gh/hosts.yml` |
227
-
228
- **Note:** Cursor CLI installs as `agent`, not `cursor`. The test scripts handle this mapping automatically.
229
-
230
- ## How It Works
231
-
232
- 1. **relay-pty** wraps the CLI and provides:
233
- - Unix socket for message injection
234
- - Output parsing for relay commands
235
- - Idle detection for message timing
236
-
237
- 2. **Docker volumes** persist credentials between runs so you don't have to re-authenticate each time.
238
-
239
- 3. **Shell scripts** provide simple commands for common operations.
240
-
241
- ## TypeScript API
242
-
243
- For programmatic use:
244
-
245
- ```typescript
246
- import { RelayPtyClient, checkCredentials } from '@agent-relay/cli-tester';
247
-
248
- // Check credentials
249
- const result = checkCredentials('claude');
250
- console.log(result.exists, result.valid, result.hasAccessToken);
251
-
252
- // Inject messages via socket
253
- const client = new RelayPtyClient('/tmp/relay-pty-test-claude.sock');
254
- await client.connect();
255
- await client.inject({ from: 'Test', body: 'Hello' });
256
- ```
257
-
258
- ## File Structure
259
-
260
- ```
261
- packages/cli-tester/
262
- ├── docker/
263
- │ ├── Dockerfile # Test environment image
264
- │ └── docker-compose.yml # Container configuration
265
- ├── scripts/
266
- │ ├── start.sh # Start container
267
- │ ├── test-cli.sh # Test a CLI with relay-pty
268
- │ ├── verify-auth.sh # Check credentials
269
- │ ├── inject-message.sh # Send message via socket
270
- │ └── clear-auth.sh # Clear credentials
271
- ├── src/
272
- │ └── utils/
273
- │ ├── socket-client.ts # relay-pty socket communication
274
- │ └── credential-check.ts # Credential file utilities
275
- └── tests/
276
- └── credential-check.test.ts
277
- ```
@@ -1,21 +0,0 @@
1
- /**
2
- * CLI Auth Tester - Manual interactive testing for CLI authentication flows
3
- *
4
- * This package provides utilities for testing CLI authentication in a Docker container.
5
- * Primary use case is debugging auth issues with various CLI tools (Claude, Codex, Gemini, etc.)
6
- *
7
- * @example
8
- * ```bash
9
- * # Start the test environment
10
- * npm run cli-tester:start
11
- *
12
- * # Inside container, test a CLI
13
- * ./scripts/test-cli.sh claude
14
- *
15
- * # Verify credentials
16
- * ./scripts/verify-auth.sh claude
17
- * ```
18
- */
19
- export { RelayPtyClient, createClient, getSocketPath, type InjectRequest, type InjectResponse, type StatusRequest, type StatusResponse, type RelayPtyResponse, } from './utils/socket-client.js';
20
- export { checkCredentials, clearCredentials, checkAllCredentials, clearAllCredentials, getCredentialPath, getConfigPaths, type CLIType, type CredentialCheck, } from './utils/credential-check.js';
21
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EACL,cAAc,EACd,YAAY,EACZ,aAAa,EACb,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,gBAAgB,GACtB,MAAM,0BAA0B,CAAC;AAElC,OAAO,EACL,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,EACnB,iBAAiB,EACjB,cAAc,EACd,KAAK,OAAO,EACZ,KAAK,eAAe,GACrB,MAAM,6BAA6B,CAAC"}
@@ -1,21 +0,0 @@
1
- /**
2
- * CLI Auth Tester - Manual interactive testing for CLI authentication flows
3
- *
4
- * This package provides utilities for testing CLI authentication in a Docker container.
5
- * Primary use case is debugging auth issues with various CLI tools (Claude, Codex, Gemini, etc.)
6
- *
7
- * @example
8
- * ```bash
9
- * # Start the test environment
10
- * npm run cli-tester:start
11
- *
12
- * # Inside container, test a CLI
13
- * ./scripts/test-cli.sh claude
14
- *
15
- * # Verify credentials
16
- * ./scripts/verify-auth.sh claude
17
- * ```
18
- */
19
- export { RelayPtyClient, createClient, getSocketPath, } from './utils/socket-client.js';
20
- export { checkCredentials, clearCredentials, checkAllCredentials, clearAllCredentials, getCredentialPath, getConfigPaths, } from './utils/credential-check.js';
21
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EACL,cAAc,EACd,YAAY,EACZ,aAAa,GAMd,MAAM,0BAA0B,CAAC;AAElC,OAAO,EACL,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,EACnB,iBAAiB,EACjB,cAAc,GAGf,MAAM,6BAA6B,CAAC"}
@@ -1,56 +0,0 @@
1
- /**
2
- * Credential checking utilities for CLI authentication testing
3
- * Verifies and parses credential files for various CLI tools
4
- */
5
- export type CLIType = 'claude' | 'codex' | 'gemini' | 'cursor' | 'opencode' | 'droid';
6
- export interface CredentialCheck {
7
- /** CLI type being checked */
8
- cli: CLIType;
9
- /** Whether the credential file exists */
10
- exists: boolean;
11
- /** Whether the credentials appear valid (have required fields) */
12
- valid: boolean;
13
- /** Whether an access token is present */
14
- hasAccessToken: boolean;
15
- /** Whether a refresh token is present */
16
- hasRefreshToken: boolean;
17
- /** Token expiration date if available */
18
- expiresAt?: Date;
19
- /** Path to the credential file */
20
- filePath: string;
21
- /** Raw credential data (tokens redacted) */
22
- data?: Record<string, unknown>;
23
- /** Error message if check failed */
24
- error?: string;
25
- }
26
- /**
27
- * Get the credential file path for a CLI
28
- */
29
- export declare function getCredentialPath(cli: CLIType): string;
30
- /**
31
- * Get all config paths for a CLI (for clearing)
32
- */
33
- export declare function getConfigPaths(cli: CLIType): string[];
34
- /**
35
- * Check credentials for a specific CLI
36
- */
37
- export declare function checkCredentials(cli: CLIType): CredentialCheck;
38
- /**
39
- * Clear credentials for a specific CLI
40
- */
41
- export declare function clearCredentials(cli: CLIType): {
42
- cleared: string[];
43
- errors: string[];
44
- };
45
- /**
46
- * Clear all CLI credentials
47
- */
48
- export declare function clearAllCredentials(): Record<CLIType, {
49
- cleared: string[];
50
- errors: string[];
51
- }>;
52
- /**
53
- * Check all CLI credentials
54
- */
55
- export declare function checkAllCredentials(): Record<CLIType, CredentialCheck>;
56
- //# sourceMappingURL=credential-check.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"credential-check.d.ts","sourceRoot":"","sources":["../../src/utils/credential-check.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,MAAM,MAAM,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,GAAG,OAAO,CAAC;AAEtF,MAAM,WAAW,eAAe;IAC9B,6BAA6B;IAC7B,GAAG,EAAE,OAAO,CAAC;IACb,yCAAyC;IACzC,MAAM,EAAE,OAAO,CAAC;IAChB,kEAAkE;IAClE,KAAK,EAAE,OAAO,CAAC;IACf,yCAAyC;IACzC,cAAc,EAAE,OAAO,CAAC;IACxB,yCAAyC;IACzC,eAAe,EAAE,OAAO,CAAC;IACzB,yCAAyC;IACzC,SAAS,CAAC,EAAE,IAAI,CAAC;IACjB,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAmBtD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,EAAE,CAkCrD;AAoGD;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,OAAO,GAAG,eAAe,CAqC9D;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,OAAO,GAAG;IAAE,OAAO,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAiBtF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,CAAC,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CAS9F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,CAAC,OAAO,EAAE,eAAe,CAAC,CAStE"}