agent-relay 2.3.4 → 2.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/src/cli/index.js +124 -7
- package/dist/src/cli/index.js.map +1 -1
- package/package.json +23 -26
- package/packages/acp-bridge/package.json +2 -2
- package/packages/bridge/package.json +7 -7
- package/packages/config/package.json +2 -2
- package/packages/continuity/package.json +2 -2
- package/packages/daemon/package.json +12 -12
- package/packages/hooks/package.json +4 -4
- package/packages/mcp/package.json +5 -5
- package/packages/memory/package.json +2 -2
- package/packages/policy/package.json +2 -2
- package/packages/protocol/package.json +1 -1
- package/packages/resiliency/package.json +1 -1
- package/packages/sdk/dist/index.d.ts +1 -29
- package/packages/sdk/dist/index.d.ts.map +1 -1
- package/packages/sdk/dist/index.js +1 -38
- package/packages/sdk/dist/index.js.map +1 -1
- package/packages/sdk/package.json +4 -25
- package/packages/sdk/src/index.ts +1 -69
- package/packages/sdk-py/README.md +56 -0
- package/packages/sdk-py/pyproject.toml +23 -0
- package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
- package/packages/sdk-py/src/agent_relay/builder.py +367 -0
- package/packages/sdk-py/src/agent_relay/types.py +92 -0
- package/packages/sdk-py/tests/__init__.py +0 -0
- package/packages/sdk-py/tests/test_builder.py +101 -0
- package/packages/sdk-ts/dist/index.d.ts +1 -0
- package/packages/sdk-ts/dist/index.d.ts.map +1 -1
- package/packages/sdk-ts/dist/index.js +1 -0
- package/packages/sdk-ts/dist/index.js.map +1 -1
- package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
- package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
- package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.js +179 -0
- package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.js +82 -0
- package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.js +10 -0
- package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.js +24 -0
- package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.js +650 -0
- package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.js +140 -0
- package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.js +395 -0
- package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.js +8 -0
- package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
- package/packages/sdk-ts/package.json +8 -2
- package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
- package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
- package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
- package/packages/sdk-ts/src/index.ts +1 -0
- package/packages/sdk-ts/src/workflows/README.md +450 -0
- package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
- package/packages/sdk-ts/src/workflows/builder.ts +241 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
- package/packages/sdk-ts/src/workflows/cli.ts +93 -0
- package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
- package/packages/sdk-ts/src/workflows/index.ts +9 -0
- package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
- package/packages/sdk-ts/src/workflows/run.ts +47 -0
- package/packages/sdk-ts/src/workflows/runner.ts +873 -0
- package/packages/sdk-ts/src/workflows/schema.json +321 -0
- package/packages/sdk-ts/src/workflows/state.ts +279 -0
- package/packages/sdk-ts/src/workflows/templates.ts +544 -0
- package/packages/sdk-ts/src/workflows/types.ts +178 -0
- package/packages/sdk-ts/tsconfig.json +6 -1
- package/packages/spawner/package.json +1 -1
- package/packages/state/package.json +1 -1
- package/packages/storage/package.json +2 -2
- package/packages/telemetry/package.json +1 -1
- package/packages/trajectory/package.json +2 -2
- package/packages/user-directory/package.json +2 -2
- package/packages/utils/package.json +3 -3
- package/packages/wrapper/package.json +5 -6
- package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
- package/packages/api-types/.trajectories/index.json +0 -12
- package/packages/api-types/dist/index.d.ts +0 -21
- package/packages/api-types/dist/index.d.ts.map +0 -1
- package/packages/api-types/dist/index.js +0 -22
- package/packages/api-types/dist/index.js.map +0 -1
- package/packages/api-types/dist/schemas/agent.d.ts +0 -259
- package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/agent.js +0 -102
- package/packages/api-types/dist/schemas/agent.js.map +0 -1
- package/packages/api-types/dist/schemas/api.d.ts +0 -290
- package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/api.js +0 -162
- package/packages/api-types/dist/schemas/api.js.map +0 -1
- package/packages/api-types/dist/schemas/decision.d.ts +0 -230
- package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/decision.js +0 -104
- package/packages/api-types/dist/schemas/decision.js.map +0 -1
- package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
- package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/fleet.js +0 -71
- package/packages/api-types/dist/schemas/fleet.js.map +0 -1
- package/packages/api-types/dist/schemas/history.d.ts +0 -180
- package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/history.js +0 -72
- package/packages/api-types/dist/schemas/history.js.map +0 -1
- package/packages/api-types/dist/schemas/index.d.ts +0 -14
- package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/index.js +0 -22
- package/packages/api-types/dist/schemas/index.js.map +0 -1
- package/packages/api-types/dist/schemas/message.d.ts +0 -456
- package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/message.js +0 -88
- package/packages/api-types/dist/schemas/message.js.map +0 -1
- package/packages/api-types/dist/schemas/session.d.ts +0 -60
- package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/session.js +0 -36
- package/packages/api-types/dist/schemas/session.js.map +0 -1
- package/packages/api-types/dist/schemas/task.d.ts +0 -111
- package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/task.js +0 -64
- package/packages/api-types/dist/schemas/task.js.map +0 -1
- package/packages/api-types/package.json +0 -61
- package/packages/api-types/scripts/generate-openapi.ts +0 -106
- package/packages/api-types/src/index.ts +0 -22
- package/packages/api-types/src/schemas/agent.test.ts +0 -164
- package/packages/api-types/src/schemas/agent.ts +0 -110
- package/packages/api-types/src/schemas/api.test.ts +0 -372
- package/packages/api-types/src/schemas/api.ts +0 -194
- package/packages/api-types/src/schemas/decision.test.ts +0 -324
- package/packages/api-types/src/schemas/decision.ts +0 -136
- package/packages/api-types/src/schemas/fleet.test.ts +0 -212
- package/packages/api-types/src/schemas/fleet.ts +0 -83
- package/packages/api-types/src/schemas/history.test.ts +0 -242
- package/packages/api-types/src/schemas/history.ts +0 -84
- package/packages/api-types/src/schemas/index.ts +0 -148
- package/packages/api-types/src/schemas/message.test.ts +0 -192
- package/packages/api-types/src/schemas/message.ts +0 -98
- package/packages/api-types/src/schemas/session.test.ts +0 -104
- package/packages/api-types/src/schemas/session.ts +0 -40
- package/packages/api-types/src/schemas/task.test.ts +0 -192
- package/packages/api-types/src/schemas/task.ts +0 -78
- package/packages/api-types/tsconfig.json +0 -19
- package/packages/api-types/vitest.config.ts +0 -9
- package/packages/benchmark/README.md +0 -200
- package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
- package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
- package/packages/benchmark/datasets/quick-test.yaml +0 -20
- package/packages/benchmark/dist/benchmark.d.ts +0 -47
- package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
- package/packages/benchmark/dist/benchmark.js +0 -224
- package/packages/benchmark/dist/benchmark.js.map +0 -1
- package/packages/benchmark/dist/cli.d.ts +0 -8
- package/packages/benchmark/dist/cli.d.ts.map +0 -1
- package/packages/benchmark/dist/cli.js +0 -185
- package/packages/benchmark/dist/cli.js.map +0 -1
- package/packages/benchmark/dist/harbor.d.ts +0 -53
- package/packages/benchmark/dist/harbor.d.ts.map +0 -1
- package/packages/benchmark/dist/harbor.js +0 -127
- package/packages/benchmark/dist/harbor.js.map +0 -1
- package/packages/benchmark/dist/index.d.ts +0 -48
- package/packages/benchmark/dist/index.d.ts.map +0 -1
- package/packages/benchmark/dist/index.js +0 -50
- package/packages/benchmark/dist/index.js.map +0 -1
- package/packages/benchmark/dist/runners/base.d.ts +0 -63
- package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/base.js +0 -156
- package/packages/benchmark/dist/runners/base.js.map +0 -1
- package/packages/benchmark/dist/runners/index.d.ts +0 -10
- package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/index.js +0 -10
- package/packages/benchmark/dist/runners/index.js.map +0 -1
- package/packages/benchmark/dist/runners/single.d.ts +0 -19
- package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/single.js +0 -111
- package/packages/benchmark/dist/runners/single.js.map +0 -1
- package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
- package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/subagent.js +0 -212
- package/packages/benchmark/dist/runners/subagent.js.map +0 -1
- package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
- package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/swarm.js +0 -273
- package/packages/benchmark/dist/runners/swarm.js.map +0 -1
- package/packages/benchmark/dist/types.d.ts +0 -178
- package/packages/benchmark/dist/types.d.ts.map +0 -1
- package/packages/benchmark/dist/types.js +0 -16
- package/packages/benchmark/dist/types.js.map +0 -1
- package/packages/benchmark/package.json +0 -80
- package/packages/benchmark/src/benchmark.ts +0 -298
- package/packages/benchmark/src/cli.ts +0 -240
- package/packages/benchmark/src/harbor.ts +0 -170
- package/packages/benchmark/src/index.ts +0 -73
- package/packages/benchmark/src/runners/base.ts +0 -205
- package/packages/benchmark/src/runners/index.ts +0 -10
- package/packages/benchmark/src/runners/single.ts +0 -121
- package/packages/benchmark/src/runners/subagent.ts +0 -240
- package/packages/benchmark/src/runners/swarm.ts +0 -326
- package/packages/benchmark/src/types.ts +0 -205
- package/packages/benchmark/tsconfig.json +0 -20
- package/packages/cli-tester/README.md +0 -277
- package/packages/cli-tester/dist/index.d.ts +0 -21
- package/packages/cli-tester/dist/index.d.ts.map +0 -1
- package/packages/cli-tester/dist/index.js +0 -21
- package/packages/cli-tester/dist/index.js.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
- package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.js +0 -230
- package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
- package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.js +0 -153
- package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
- package/packages/cli-tester/docker/Dockerfile +0 -61
- package/packages/cli-tester/docker/docker-compose.yml +0 -71
- package/packages/cli-tester/docker/entrypoint.sh +0 -58
- package/packages/cli-tester/package.json +0 -32
- package/packages/cli-tester/scripts/clear-auth.sh +0 -101
- package/packages/cli-tester/scripts/inject-message.sh +0 -42
- package/packages/cli-tester/scripts/start.sh +0 -71
- package/packages/cli-tester/scripts/test-cli.sh +0 -56
- package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
- package/packages/cli-tester/scripts/test-registration.sh +0 -182
- package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
- package/packages/cli-tester/scripts/test-spawn.sh +0 -140
- package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
- package/packages/cli-tester/scripts/verify-auth.sh +0 -112
- package/packages/cli-tester/src/index.ts +0 -40
- package/packages/cli-tester/src/utils/credential-check.ts +0 -284
- package/packages/cli-tester/src/utils/socket-client.ts +0 -211
- package/packages/cli-tester/tests/credential-check.test.ts +0 -56
- package/packages/cli-tester/tsconfig.json +0 -11
- package/packages/sdk/dist/browser-client.d.ts +0 -212
- package/packages/sdk/dist/browser-client.d.ts.map +0 -1
- package/packages/sdk/dist/browser-client.js +0 -750
- package/packages/sdk/dist/browser-client.js.map +0 -1
- package/packages/sdk/dist/browser-framing.d.ts +0 -46
- package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
- package/packages/sdk/dist/browser-framing.js +0 -122
- package/packages/sdk/dist/browser-framing.js.map +0 -1
- package/packages/sdk/dist/standalone.d.ts +0 -89
- package/packages/sdk/dist/standalone.d.ts.map +0 -1
- package/packages/sdk/dist/standalone.js +0 -131
- package/packages/sdk/dist/standalone.js.map +0 -1
- package/packages/sdk/dist/transports/index.d.ts +0 -92
- package/packages/sdk/dist/transports/index.d.ts.map +0 -1
- package/packages/sdk/dist/transports/index.js +0 -129
- package/packages/sdk/dist/transports/index.js.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
- package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.js +0 -94
- package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
- package/packages/sdk/dist/transports/types.d.ts +0 -69
- package/packages/sdk/dist/transports/types.d.ts.map +0 -1
- package/packages/sdk/dist/transports/types.js +0 -10
- package/packages/sdk/dist/transports/types.js.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
- package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.js +0 -180
- package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
- package/packages/sdk/src/browser-client.ts +0 -985
- package/packages/sdk/src/browser-framing.test.ts +0 -115
- package/packages/sdk/src/browser-framing.ts +0 -150
- package/packages/sdk/src/standalone.ts +0 -183
- package/packages/sdk/src/transports/index.ts +0 -197
- package/packages/sdk/src/transports/socket-transport.ts +0 -115
- package/packages/sdk/src/transports/types.ts +0 -77
- package/packages/sdk/src/transports/websocket-transport.ts +0 -245
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"target": "ES2022",
|
|
4
|
-
"module": "NodeNext",
|
|
5
|
-
"moduleResolution": "NodeNext",
|
|
6
|
-
"lib": ["ES2022"],
|
|
7
|
-
"outDir": "dist",
|
|
8
|
-
"rootDir": "src",
|
|
9
|
-
"strict": true,
|
|
10
|
-
"esModuleInterop": true,
|
|
11
|
-
"skipLibCheck": true,
|
|
12
|
-
"forceConsistentCasingInFileNames": true,
|
|
13
|
-
"declaration": true,
|
|
14
|
-
"declarationMap": true,
|
|
15
|
-
"sourceMap": true
|
|
16
|
-
},
|
|
17
|
-
"include": ["src/**/*"],
|
|
18
|
-
"exclude": ["node_modules", "dist", "**/*.test.ts"]
|
|
19
|
-
}
|
|
@@ -1,200 +0,0 @@
|
|
|
1
|
-
# @agent-relay/benchmark
|
|
2
|
-
|
|
3
|
-
Performance benchmarking for comparing agent configurations: single agents, sub-agents (hierarchical), and swarms (peer-to-peer).
|
|
4
|
-
|
|
5
|
-
## Overview
|
|
6
|
-
|
|
7
|
-
This package provides tools to measure and compare the performance of different agent configurations on the same tasks:
|
|
8
|
-
|
|
9
|
-
| Configuration | Description | Communication |
|
|
10
|
-
|---------------|-------------|---------------|
|
|
11
|
-
| **Single** | One agent handles everything | None |
|
|
12
|
-
| **Sub-agent** | Lead spawns and coordinates workers | Hierarchical (parent → child) |
|
|
13
|
-
| **Swarm** | Peer agents coordinate as equals | Peer-to-peer via channels |
|
|
14
|
-
|
|
15
|
-
## Installation
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
npm install @agent-relay/benchmark
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
For standalone mode (in-process daemon):
|
|
22
|
-
```bash
|
|
23
|
-
npm install @agent-relay/benchmark @agent-relay/daemon
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
## Quick Start
|
|
27
|
-
|
|
28
|
-
### Programmatic Usage
|
|
29
|
-
|
|
30
|
-
```typescript
|
|
31
|
-
import { ComparisonBenchmark, type Task } from '@agent-relay/benchmark';
|
|
32
|
-
|
|
33
|
-
const task: Task = {
|
|
34
|
-
id: 'refactor-auth',
|
|
35
|
-
description: 'Refactor authentication to use JWT',
|
|
36
|
-
files: ['src/auth/session.ts', 'src/auth/middleware.ts'],
|
|
37
|
-
expectedOutcome: 'All tests pass, JWT tokens used',
|
|
38
|
-
complexity: 'medium',
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
const benchmark = new ComparisonBenchmark();
|
|
42
|
-
const comparison = await benchmark.runComparison(task);
|
|
43
|
-
|
|
44
|
-
console.log(`Winner: ${comparison.winner}`);
|
|
45
|
-
benchmark.printComparison(comparison);
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
### CLI Usage
|
|
49
|
-
|
|
50
|
-
```bash
|
|
51
|
-
# Run comparison on all configurations
|
|
52
|
-
relay-benchmark run --dataset tasks.yaml --config all
|
|
53
|
-
|
|
54
|
-
# Run specific configuration
|
|
55
|
-
relay-benchmark run --dataset tasks.yaml --config swarm
|
|
56
|
-
|
|
57
|
-
# List tasks in a dataset
|
|
58
|
-
relay-benchmark list tasks.yaml
|
|
59
|
-
|
|
60
|
-
# Output results to JSON
|
|
61
|
-
relay-benchmark run --dataset tasks.yaml -o results.json
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
### Harbor Integration
|
|
65
|
-
|
|
66
|
-
This package integrates with [Harbor](https://github.com/laude-institute/harbor) for large-scale agent evaluation:
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
# Install Harbor
|
|
70
|
-
pip install harbor-bench
|
|
71
|
-
|
|
72
|
-
# Run benchmark via Harbor
|
|
73
|
-
harbor run \
|
|
74
|
-
--dataset tasks.yaml \
|
|
75
|
-
--agent @agent-relay/benchmark/harbor \
|
|
76
|
-
--parallel 10
|
|
77
|
-
|
|
78
|
-
# Run at scale with cloud providers
|
|
79
|
-
harbor run \
|
|
80
|
-
--dataset tasks.yaml \
|
|
81
|
-
--agent @agent-relay/benchmark/harbor \
|
|
82
|
-
--env daytona \
|
|
83
|
-
--parallel 100
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
## Task Dataset Format
|
|
87
|
-
|
|
88
|
-
Tasks can be defined in YAML or JSON:
|
|
89
|
-
|
|
90
|
-
```yaml
|
|
91
|
-
name: My Tasks
|
|
92
|
-
description: Tasks for benchmarking
|
|
93
|
-
|
|
94
|
-
tasks:
|
|
95
|
-
- id: add-feature
|
|
96
|
-
description: Add user preferences feature
|
|
97
|
-
files:
|
|
98
|
-
- src/models/preferences.ts
|
|
99
|
-
- src/routes/preferences.ts
|
|
100
|
-
- tests/preferences.test.ts
|
|
101
|
-
expectedOutcome: Feature working, tests pass
|
|
102
|
-
complexity: medium # low, medium, high
|
|
103
|
-
timeoutMs: 300000 # optional, default 5 minutes
|
|
104
|
-
tags: # optional
|
|
105
|
-
- feature
|
|
106
|
-
- api
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
## Metrics Collected
|
|
110
|
-
|
|
111
|
-
| Metric | Description |
|
|
112
|
-
|--------|-------------|
|
|
113
|
-
| `totalTimeMs` | Total execution time |
|
|
114
|
-
| `timeToFirstActionMs` | Time until first agent action |
|
|
115
|
-
| `messageCount` | Inter-agent messages sent |
|
|
116
|
-
| `avgLatencyMs` | Average message latency |
|
|
117
|
-
| `latencyP50Ms` | 50th percentile latency |
|
|
118
|
-
| `latencyP99Ms` | 99th percentile latency |
|
|
119
|
-
| `coordinationRounds` | Communication rounds |
|
|
120
|
-
| `agentCount` | Agents used |
|
|
121
|
-
| `totalTokensUsed` | LLM tokens consumed |
|
|
122
|
-
| `peakMemoryMb` | Peak memory usage |
|
|
123
|
-
| `success` | Task completed successfully |
|
|
124
|
-
| `completionRate` | Partial completion (0-1) |
|
|
125
|
-
|
|
126
|
-
## Scoring
|
|
127
|
-
|
|
128
|
-
Results are scored on three components:
|
|
129
|
-
|
|
130
|
-
- **Success (50 points)**: Task completion
|
|
131
|
-
- **Time (30 points)**: Faster is better
|
|
132
|
-
- **Efficiency (20 points)**: Fewer agents is better
|
|
133
|
-
|
|
134
|
-
The configuration with the highest total score wins.
|
|
135
|
-
|
|
136
|
-
## Configuration
|
|
137
|
-
|
|
138
|
-
```typescript
|
|
139
|
-
interface BenchmarkConfig {
|
|
140
|
-
configurations: ConfigurationType[]; // ['single', 'subagent', 'swarm']
|
|
141
|
-
cli: string; // CLI to use (default: 'claude')
|
|
142
|
-
cwd?: string; // Working directory
|
|
143
|
-
quiet: boolean; // Suppress output
|
|
144
|
-
cooldownMs: number; // Delay between runs
|
|
145
|
-
maxSwarmSize: number; // Max agents in swarm
|
|
146
|
-
socketPath?: string; // Custom relay socket
|
|
147
|
-
}
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
## Architecture
|
|
151
|
-
|
|
152
|
-
```
|
|
153
|
-
┌─────────────────────────────────────────────────────────────┐
|
|
154
|
-
│ ComparisonBenchmark │
|
|
155
|
-
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
|
156
|
-
│ │ Single │ │ SubAgent │ │ Swarm │ │
|
|
157
|
-
│ │ Runner │ │ Runner │ │ Runner │ │
|
|
158
|
-
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
|
|
159
|
-
│ │ │ │ │
|
|
160
|
-
│ └────────────────┼────────────────┘ │
|
|
161
|
-
│ │ │
|
|
162
|
-
│ ▼ │
|
|
163
|
-
│ ┌───────────────────────┐ │
|
|
164
|
-
│ │ @agent-relay/sdk │ │
|
|
165
|
-
│ │ (standalone mode) │ │
|
|
166
|
-
│ └───────────────────────┘ │
|
|
167
|
-
└─────────────────────────────────────────────────────────────┘
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
## Example Output
|
|
171
|
-
|
|
172
|
-
```
|
|
173
|
-
============================================================
|
|
174
|
-
COMPARISON RESULTS
|
|
175
|
-
============================================================
|
|
176
|
-
Task: refactor-auth
|
|
177
|
-
Winner: SUBAGENT
|
|
178
|
-
|
|
179
|
-
+------------+--------+----------+-------+
|
|
180
|
-
| Metric | Single | Subagent | Swarm |
|
|
181
|
-
+------------+--------+----------+-------+
|
|
182
|
-
| Success | ✓ | ✓ | ✓ |
|
|
183
|
-
| Time (s) | 45.2 | 28.1 | 32.5 |
|
|
184
|
-
| Agents | 1 | 3 | 3 |
|
|
185
|
-
| Messages | 0 | 12 | 24 |
|
|
186
|
-
| Completion | 100% | 100% | 100% |
|
|
187
|
-
| Score | 65.3 | 78.2 | 71.8 |
|
|
188
|
-
+------------+--------+----------+-------+
|
|
189
|
-
```
|
|
190
|
-
|
|
191
|
-
## Included Datasets
|
|
192
|
-
|
|
193
|
-
The package includes example datasets in `datasets/`:
|
|
194
|
-
|
|
195
|
-
- `coding-tasks.yaml` - Standard software engineering tasks
|
|
196
|
-
- `coordination-tasks.yaml` - Tasks requiring multi-agent coordination
|
|
197
|
-
|
|
198
|
-
## License
|
|
199
|
-
|
|
200
|
-
Apache-2.0
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
# Coding Tasks Dataset
|
|
2
|
-
# Standard coding tasks for comparing agent configurations
|
|
3
|
-
|
|
4
|
-
name: Coding Tasks
|
|
5
|
-
description: Common software engineering tasks for benchmarking agent configurations
|
|
6
|
-
version: "1.0.0"
|
|
7
|
-
|
|
8
|
-
tasks:
|
|
9
|
-
# Low complexity tasks
|
|
10
|
-
- id: add-logging
|
|
11
|
-
description: Add structured logging to the API endpoints
|
|
12
|
-
files:
|
|
13
|
-
- src/routes/api.ts
|
|
14
|
-
- src/utils/logger.ts
|
|
15
|
-
expectedOutcome: All API endpoints log requests and responses with timestamps
|
|
16
|
-
complexity: low
|
|
17
|
-
tags:
|
|
18
|
-
- logging
|
|
19
|
-
- observability
|
|
20
|
-
|
|
21
|
-
- id: fix-typos
|
|
22
|
-
description: Fix typos and improve variable names in the codebase
|
|
23
|
-
files:
|
|
24
|
-
- src/utils/helpers.ts
|
|
25
|
-
- src/components/Form.tsx
|
|
26
|
-
expectedOutcome: All typos fixed, variable names follow conventions
|
|
27
|
-
complexity: low
|
|
28
|
-
tags:
|
|
29
|
-
- refactoring
|
|
30
|
-
- code-quality
|
|
31
|
-
|
|
32
|
-
# Medium complexity tasks
|
|
33
|
-
- id: add-rate-limiting
|
|
34
|
-
description: Add rate limiting middleware to protect API endpoints
|
|
35
|
-
files:
|
|
36
|
-
- src/middleware/rateLimit.ts
|
|
37
|
-
- src/routes/api.ts
|
|
38
|
-
- src/config/limits.ts
|
|
39
|
-
expectedOutcome: Rate limiting applied to all endpoints, configurable limits per route
|
|
40
|
-
complexity: medium
|
|
41
|
-
tags:
|
|
42
|
-
- security
|
|
43
|
-
- middleware
|
|
44
|
-
|
|
45
|
-
- id: refactor-auth-jwt
|
|
46
|
-
description: Refactor authentication from sessions to JWT tokens
|
|
47
|
-
files:
|
|
48
|
-
- src/auth/session.ts
|
|
49
|
-
- src/auth/jwt.ts
|
|
50
|
-
- src/middleware/auth.ts
|
|
51
|
-
- src/routes/login.ts
|
|
52
|
-
- tests/auth.test.ts
|
|
53
|
-
expectedOutcome: JWT-based auth working, all tests pass, sessions removed
|
|
54
|
-
complexity: medium
|
|
55
|
-
tags:
|
|
56
|
-
- auth
|
|
57
|
-
- security
|
|
58
|
-
- refactoring
|
|
59
|
-
|
|
60
|
-
- id: add-caching-layer
|
|
61
|
-
description: Add Redis caching to expensive database queries
|
|
62
|
-
files:
|
|
63
|
-
- src/services/cache.ts
|
|
64
|
-
- src/repositories/user.ts
|
|
65
|
-
- src/repositories/product.ts
|
|
66
|
-
- src/config/redis.ts
|
|
67
|
-
expectedOutcome: Caching implemented for user and product queries, cache invalidation working
|
|
68
|
-
complexity: medium
|
|
69
|
-
tags:
|
|
70
|
-
- performance
|
|
71
|
-
- caching
|
|
72
|
-
|
|
73
|
-
# High complexity tasks
|
|
74
|
-
- id: database-migration
|
|
75
|
-
description: Migrate from PostgreSQL to MongoDB while maintaining API compatibility
|
|
76
|
-
files:
|
|
77
|
-
- src/db/postgres/connection.ts
|
|
78
|
-
- src/db/mongo/connection.ts
|
|
79
|
-
- src/models/user.ts
|
|
80
|
-
- src/models/product.ts
|
|
81
|
-
- src/models/order.ts
|
|
82
|
-
- src/repositories/user.ts
|
|
83
|
-
- src/repositories/product.ts
|
|
84
|
-
- src/repositories/order.ts
|
|
85
|
-
- scripts/migrate-data.ts
|
|
86
|
-
- docker-compose.yml
|
|
87
|
-
expectedOutcome: All data migrated, API unchanged, tests pass on MongoDB
|
|
88
|
-
complexity: high
|
|
89
|
-
tags:
|
|
90
|
-
- database
|
|
91
|
-
- migration
|
|
92
|
-
- high-risk
|
|
93
|
-
|
|
94
|
-
- id: implement-rbac
|
|
95
|
-
description: Implement role-based access control across the application
|
|
96
|
-
files:
|
|
97
|
-
- src/auth/rbac.ts
|
|
98
|
-
- src/auth/permissions.ts
|
|
99
|
-
- src/middleware/authorize.ts
|
|
100
|
-
- src/routes/admin.ts
|
|
101
|
-
- src/routes/user.ts
|
|
102
|
-
- src/models/role.ts
|
|
103
|
-
- src/models/permission.ts
|
|
104
|
-
- tests/rbac.test.ts
|
|
105
|
-
expectedOutcome: RBAC fully implemented, admin routes protected, tests pass
|
|
106
|
-
complexity: high
|
|
107
|
-
tags:
|
|
108
|
-
- security
|
|
109
|
-
- auth
|
|
110
|
-
- permissions
|
|
111
|
-
|
|
112
|
-
- id: api-versioning
|
|
113
|
-
description: Implement API versioning with backward compatibility
|
|
114
|
-
files:
|
|
115
|
-
- src/routes/v1/index.ts
|
|
116
|
-
- src/routes/v2/index.ts
|
|
117
|
-
- src/middleware/version.ts
|
|
118
|
-
- src/transformers/v1-to-v2.ts
|
|
119
|
-
- src/docs/api-v1.yaml
|
|
120
|
-
- src/docs/api-v2.yaml
|
|
121
|
-
- tests/versioning.test.ts
|
|
122
|
-
expectedOutcome: V1 and V2 APIs working, automatic version negotiation, docs updated
|
|
123
|
-
complexity: high
|
|
124
|
-
tags:
|
|
125
|
-
- api
|
|
126
|
-
- versioning
|
|
127
|
-
- backward-compatibility
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
# Coordination Tasks Dataset
|
|
2
|
-
# Tasks specifically designed to test multi-agent coordination
|
|
3
|
-
|
|
4
|
-
name: Coordination Tasks
|
|
5
|
-
description: Tasks that require significant inter-agent coordination and communication
|
|
6
|
-
version: "1.0.0"
|
|
7
|
-
|
|
8
|
-
tasks:
|
|
9
|
-
# Tasks requiring parallel work
|
|
10
|
-
- id: parallel-refactor
|
|
11
|
-
description: Refactor 6 service files to use a new error handling pattern
|
|
12
|
-
files:
|
|
13
|
-
- src/services/userService.ts
|
|
14
|
-
- src/services/orderService.ts
|
|
15
|
-
- src/services/productService.ts
|
|
16
|
-
- src/services/paymentService.ts
|
|
17
|
-
- src/services/notificationService.ts
|
|
18
|
-
- src/services/analyticsService.ts
|
|
19
|
-
expectedOutcome: All services use new error handling, consistent pattern across all files
|
|
20
|
-
complexity: medium
|
|
21
|
-
tags:
|
|
22
|
-
- parallel-work
|
|
23
|
-
- refactoring
|
|
24
|
-
|
|
25
|
-
- id: parallel-testing
|
|
26
|
-
description: Write comprehensive unit tests for 4 independent modules
|
|
27
|
-
files:
|
|
28
|
-
- src/utils/validators.ts
|
|
29
|
-
- src/utils/formatters.ts
|
|
30
|
-
- src/utils/transformers.ts
|
|
31
|
-
- src/utils/parsers.ts
|
|
32
|
-
- tests/validators.test.ts
|
|
33
|
-
- tests/formatters.test.ts
|
|
34
|
-
- tests/transformers.test.ts
|
|
35
|
-
- tests/parsers.test.ts
|
|
36
|
-
expectedOutcome: 100% test coverage for all utility modules, all tests pass
|
|
37
|
-
complexity: medium
|
|
38
|
-
tags:
|
|
39
|
-
- testing
|
|
40
|
-
- parallel-work
|
|
41
|
-
|
|
42
|
-
# Tasks requiring coordination
|
|
43
|
-
- id: api-frontend-sync
|
|
44
|
-
description: Add a new feature requiring both API endpoints and frontend components
|
|
45
|
-
files:
|
|
46
|
-
- src/api/notifications.ts
|
|
47
|
-
- src/api/routes.ts
|
|
48
|
-
- src/components/NotificationBell.tsx
|
|
49
|
-
- src/components/NotificationList.tsx
|
|
50
|
-
- src/hooks/useNotifications.ts
|
|
51
|
-
- src/types/notification.ts
|
|
52
|
-
expectedOutcome: Notifications API and UI working together, types shared correctly
|
|
53
|
-
complexity: medium
|
|
54
|
-
tags:
|
|
55
|
-
- full-stack
|
|
56
|
-
- coordination
|
|
57
|
-
|
|
58
|
-
- id: schema-migration-chain
|
|
59
|
-
description: Database schema change requiring coordinated updates across layers
|
|
60
|
-
files:
|
|
61
|
-
- migrations/add_user_preferences.sql
|
|
62
|
-
- src/models/userPreferences.ts
|
|
63
|
-
- src/repositories/userPreferences.ts
|
|
64
|
-
- src/services/userService.ts
|
|
65
|
-
- src/routes/user.ts
|
|
66
|
-
- src/types/user.ts
|
|
67
|
-
expectedOutcome: Schema migrated, all layers updated, API returns new fields
|
|
68
|
-
complexity: high
|
|
69
|
-
tags:
|
|
70
|
-
- database
|
|
71
|
-
- coordination
|
|
72
|
-
- layered-architecture
|
|
73
|
-
|
|
74
|
-
# Tasks with dependencies
|
|
75
|
-
- id: dependency-chain
|
|
76
|
-
description: Build a feature where each component depends on the previous
|
|
77
|
-
files:
|
|
78
|
-
- src/core/eventBus.ts
|
|
79
|
-
- src/services/eventHandler.ts
|
|
80
|
-
- src/workers/eventProcessor.ts
|
|
81
|
-
- src/api/webhooks.ts
|
|
82
|
-
- tests/integration/events.test.ts
|
|
83
|
-
expectedOutcome: Event system working end-to-end, integration tests pass
|
|
84
|
-
complexity: high
|
|
85
|
-
tags:
|
|
86
|
-
- dependencies
|
|
87
|
-
- event-driven
|
|
88
|
-
|
|
89
|
-
# Tasks requiring consensus
|
|
90
|
-
- id: design-review
|
|
91
|
-
description: Review and improve API design across multiple endpoints
|
|
92
|
-
files:
|
|
93
|
-
- src/api/users.ts
|
|
94
|
-
- src/api/products.ts
|
|
95
|
-
- src/api/orders.ts
|
|
96
|
-
- src/api/payments.ts
|
|
97
|
-
- docs/api-design.md
|
|
98
|
-
expectedOutcome: Consistent API design patterns, documentation updated
|
|
99
|
-
complexity: medium
|
|
100
|
-
tags:
|
|
101
|
-
- review
|
|
102
|
-
- consensus
|
|
103
|
-
- documentation
|
|
104
|
-
|
|
105
|
-
# Large-scale coordination
|
|
106
|
-
- id: monorepo-sync
|
|
107
|
-
description: Update shared types across multiple packages in a monorepo
|
|
108
|
-
files:
|
|
109
|
-
- packages/shared/types/user.ts
|
|
110
|
-
- packages/shared/types/product.ts
|
|
111
|
-
- packages/api/src/routes/user.ts
|
|
112
|
-
- packages/api/src/routes/product.ts
|
|
113
|
-
- packages/web/src/api/user.ts
|
|
114
|
-
- packages/web/src/api/product.ts
|
|
115
|
-
- packages/mobile/src/api/user.ts
|
|
116
|
-
- packages/mobile/src/api/product.ts
|
|
117
|
-
expectedOutcome: Types consistent across all packages, no type errors
|
|
118
|
-
complexity: high
|
|
119
|
-
tags:
|
|
120
|
-
- monorepo
|
|
121
|
-
- types
|
|
122
|
-
- coordination
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
name: Quick Test
|
|
2
|
-
description: Minimal benchmark for testing swarm vs single agent
|
|
3
|
-
version: "1.0.0"
|
|
4
|
-
|
|
5
|
-
tasks:
|
|
6
|
-
- id: count-files
|
|
7
|
-
description: Count TypeScript files in packages/benchmark/src and list their exports
|
|
8
|
-
files:
|
|
9
|
-
- packages/benchmark/src
|
|
10
|
-
expectedOutcome: Accurate count and export list
|
|
11
|
-
complexity: low
|
|
12
|
-
timeoutMs: 60000
|
|
13
|
-
|
|
14
|
-
- id: summarize-types
|
|
15
|
-
description: Read packages/benchmark/src/types.ts and summarize the main interfaces
|
|
16
|
-
files:
|
|
17
|
-
- packages/benchmark/src/types.ts
|
|
18
|
-
expectedOutcome: Clear summary of Task, RunResult, and ComparisonResult interfaces
|
|
19
|
-
complexity: low
|
|
20
|
-
timeoutMs: 60000
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Comparison Benchmark
|
|
3
|
-
*
|
|
4
|
-
* Main orchestrator for running comparison benchmarks across configurations.
|
|
5
|
-
*/
|
|
6
|
-
import type { ConfigurationType, Task, RunResult, ComparisonResult, BenchmarkConfig } from './types.js';
|
|
7
|
-
/**
|
|
8
|
-
* Main benchmark orchestrator
|
|
9
|
-
*/
|
|
10
|
-
export declare class ComparisonBenchmark {
|
|
11
|
-
private config;
|
|
12
|
-
private runners;
|
|
13
|
-
constructor(config?: Partial<BenchmarkConfig>);
|
|
14
|
-
/**
|
|
15
|
-
* Create a runner for a configuration type
|
|
16
|
-
*/
|
|
17
|
-
private createRunner;
|
|
18
|
-
/**
|
|
19
|
-
* Run a comparison across all configured configurations
|
|
20
|
-
*/
|
|
21
|
-
runComparison(task: Task): Promise<ComparisonResult>;
|
|
22
|
-
/**
|
|
23
|
-
* Run a single configuration
|
|
24
|
-
*/
|
|
25
|
-
runSingle(task: Task, configType: ConfigurationType): Promise<RunResult>;
|
|
26
|
-
/**
|
|
27
|
-
* Calculate score breakdown for a result
|
|
28
|
-
*/
|
|
29
|
-
private calculateScore;
|
|
30
|
-
/**
|
|
31
|
-
* Determine the winning configuration
|
|
32
|
-
*/
|
|
33
|
-
private determineWinner;
|
|
34
|
-
/**
|
|
35
|
-
* Print a single run result
|
|
36
|
-
*/
|
|
37
|
-
private printRunResult;
|
|
38
|
-
/**
|
|
39
|
-
* Print comparison table
|
|
40
|
-
*/
|
|
41
|
-
printComparison(comparison: ComparisonResult): void;
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Quick helper to run a comparison benchmark
|
|
45
|
-
*/
|
|
46
|
-
export declare function runComparison(task: Task, config?: Partial<BenchmarkConfig>): Promise<ComparisonResult>;
|
|
47
|
-
//# sourceMappingURL=benchmark.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,iBAAiB,EACjB,IAAI,EACJ,SAAS,EACT,gBAAgB,EAEhB,eAAe,EAChB,MAAM,YAAY,CAAC;AASpB;;GAEG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,OAAO,CAA8C;gBAEjD,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAUjD;;OAEG;IACH,OAAO,CAAC,YAAY;IAapB;;OAEG;IACG,aAAa,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAgE1D;;OAEG;IACG,SAAS,CACb,IAAI,EAAE,IAAI,EACV,UAAU,EAAE,iBAAiB,GAC5B,OAAO,CAAC,SAAS,CAAC;IAarB;;OAEG;IACH,OAAO,CAAC,cAAc;IAwBtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAiBvB;;OAEG;IACH,OAAO,CAAC,cAAc;IAWtB;;OAEG;IACH,eAAe,CAAC,UAAU,EAAE,gBAAgB,GAAG,IAAI;CAiFpD;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,IAAI,EAAE,IAAI,EACV,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAChC,OAAO,CAAC,gBAAgB,CAAC,CAG3B"}
|