agent-relay 2.3.2 → 2.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs +1 -1
- package/dist/src/cli/index.js +124 -7
- package/dist/src/cli/index.js.map +1 -1
- package/package.json +20 -26
- package/packages/acp-bridge/package.json +2 -2
- package/packages/bridge/package.json +7 -7
- package/packages/config/dist/cloud-config.d.ts +1 -1
- package/packages/config/dist/cloud-config.d.ts.map +1 -1
- package/packages/config/dist/cloud-config.js.map +1 -1
- package/packages/config/dist/schemas.d.ts +5 -5
- package/packages/config/dist/schemas.js +1 -1
- package/packages/config/dist/schemas.js.map +1 -1
- package/packages/config/package.json +2 -2
- package/packages/config/src/cloud-config.ts +2 -2
- package/packages/config/src/schemas.test.ts +48 -0
- package/packages/config/src/schemas.ts +1 -1
- package/packages/continuity/package.json +2 -2
- package/packages/daemon/package.json +12 -12
- package/packages/hooks/package.json +4 -4
- package/packages/mcp/package.json +5 -5
- package/packages/memory/package.json +2 -2
- package/packages/policy/package.json +2 -2
- package/packages/protocol/package.json +1 -1
- package/packages/resiliency/package.json +1 -1
- package/packages/sdk/dist/index.d.ts +1 -29
- package/packages/sdk/dist/index.d.ts.map +1 -1
- package/packages/sdk/dist/index.js +1 -38
- package/packages/sdk/dist/index.js.map +1 -1
- package/packages/sdk/package.json +4 -25
- package/packages/sdk/src/index.ts +1 -69
- package/packages/sdk-py/README.md +56 -0
- package/packages/sdk-py/pyproject.toml +23 -0
- package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
- package/packages/sdk-py/src/agent_relay/builder.py +367 -0
- package/packages/sdk-py/src/agent_relay/types.py +92 -0
- package/packages/sdk-py/tests/__init__.py +0 -0
- package/packages/sdk-py/tests/test_builder.py +101 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.d.ts +2 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.d.ts.map +1 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.js +257 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.js.map +1 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.d.ts +2 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.d.ts.map +1 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.js +124 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.js.map +1 -0
- package/packages/sdk-ts/dist/client.d.ts +2 -0
- package/packages/sdk-ts/dist/client.d.ts.map +1 -1
- package/packages/sdk-ts/dist/client.js +2 -0
- package/packages/sdk-ts/dist/client.js.map +1 -1
- package/packages/sdk-ts/dist/index.d.ts +1 -0
- package/packages/sdk-ts/dist/index.d.ts.map +1 -1
- package/packages/sdk-ts/dist/index.js +1 -0
- package/packages/sdk-ts/dist/index.js.map +1 -1
- package/packages/sdk-ts/dist/protocol.d.ts +1 -0
- package/packages/sdk-ts/dist/protocol.d.ts.map +1 -1
- package/packages/sdk-ts/dist/relay.d.ts +44 -0
- package/packages/sdk-ts/dist/relay.d.ts.map +1 -1
- package/packages/sdk-ts/dist/relay.js +89 -11
- package/packages/sdk-ts/dist/relay.js.map +1 -1
- package/packages/sdk-ts/dist/relaycast.js +2 -2
- package/packages/sdk-ts/dist/relaycast.js.map +1 -1
- package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
- package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
- package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.js +179 -0
- package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.js +82 -0
- package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.js +10 -0
- package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.js +24 -0
- package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.js +650 -0
- package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.js +140 -0
- package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.js +395 -0
- package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.js +8 -0
- package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
- package/packages/sdk-ts/package.json +9 -3
- package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
- package/packages/sdk-ts/src/__tests__/facade.test.ts +296 -0
- package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
- package/packages/sdk-ts/src/__tests__/unit.test.ts +152 -0
- package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
- package/packages/sdk-ts/src/client.ts +4 -0
- package/packages/sdk-ts/src/index.ts +1 -0
- package/packages/sdk-ts/src/protocol.ts +1 -1
- package/packages/sdk-ts/src/relay.ts +112 -11
- package/packages/sdk-ts/src/relaycast.ts +2 -2
- package/packages/sdk-ts/src/workflows/README.md +450 -0
- package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
- package/packages/sdk-ts/src/workflows/builder.ts +241 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
- package/packages/sdk-ts/src/workflows/cli.ts +93 -0
- package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
- package/packages/sdk-ts/src/workflows/index.ts +9 -0
- package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
- package/packages/sdk-ts/src/workflows/run.ts +47 -0
- package/packages/sdk-ts/src/workflows/runner.ts +873 -0
- package/packages/sdk-ts/src/workflows/schema.json +321 -0
- package/packages/sdk-ts/src/workflows/state.ts +279 -0
- package/packages/sdk-ts/src/workflows/templates.ts +544 -0
- package/packages/sdk-ts/src/workflows/types.ts +178 -0
- package/packages/sdk-ts/tsconfig.json +6 -1
- package/packages/spawner/package.json +1 -1
- package/packages/state/package.json +1 -1
- package/packages/storage/package.json +2 -2
- package/packages/telemetry/package.json +1 -1
- package/packages/trajectory/package.json +2 -2
- package/packages/user-directory/package.json +2 -2
- package/packages/utils/package.json +3 -3
- package/packages/wrapper/package.json +5 -6
- package/scripts/postinstall.js +106 -2
- package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
- package/packages/api-types/.trajectories/index.json +0 -12
- package/packages/api-types/dist/index.d.ts +0 -21
- package/packages/api-types/dist/index.d.ts.map +0 -1
- package/packages/api-types/dist/index.js +0 -22
- package/packages/api-types/dist/index.js.map +0 -1
- package/packages/api-types/dist/schemas/agent.d.ts +0 -259
- package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/agent.js +0 -102
- package/packages/api-types/dist/schemas/agent.js.map +0 -1
- package/packages/api-types/dist/schemas/api.d.ts +0 -290
- package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/api.js +0 -162
- package/packages/api-types/dist/schemas/api.js.map +0 -1
- package/packages/api-types/dist/schemas/decision.d.ts +0 -230
- package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/decision.js +0 -104
- package/packages/api-types/dist/schemas/decision.js.map +0 -1
- package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
- package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/fleet.js +0 -71
- package/packages/api-types/dist/schemas/fleet.js.map +0 -1
- package/packages/api-types/dist/schemas/history.d.ts +0 -180
- package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/history.js +0 -72
- package/packages/api-types/dist/schemas/history.js.map +0 -1
- package/packages/api-types/dist/schemas/index.d.ts +0 -14
- package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/index.js +0 -22
- package/packages/api-types/dist/schemas/index.js.map +0 -1
- package/packages/api-types/dist/schemas/message.d.ts +0 -456
- package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/message.js +0 -88
- package/packages/api-types/dist/schemas/message.js.map +0 -1
- package/packages/api-types/dist/schemas/session.d.ts +0 -60
- package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/session.js +0 -36
- package/packages/api-types/dist/schemas/session.js.map +0 -1
- package/packages/api-types/dist/schemas/task.d.ts +0 -111
- package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/task.js +0 -64
- package/packages/api-types/dist/schemas/task.js.map +0 -1
- package/packages/api-types/package.json +0 -61
- package/packages/api-types/scripts/generate-openapi.ts +0 -106
- package/packages/api-types/src/index.ts +0 -22
- package/packages/api-types/src/schemas/agent.test.ts +0 -164
- package/packages/api-types/src/schemas/agent.ts +0 -110
- package/packages/api-types/src/schemas/api.test.ts +0 -372
- package/packages/api-types/src/schemas/api.ts +0 -194
- package/packages/api-types/src/schemas/decision.test.ts +0 -324
- package/packages/api-types/src/schemas/decision.ts +0 -136
- package/packages/api-types/src/schemas/fleet.test.ts +0 -212
- package/packages/api-types/src/schemas/fleet.ts +0 -83
- package/packages/api-types/src/schemas/history.test.ts +0 -242
- package/packages/api-types/src/schemas/history.ts +0 -84
- package/packages/api-types/src/schemas/index.ts +0 -148
- package/packages/api-types/src/schemas/message.test.ts +0 -192
- package/packages/api-types/src/schemas/message.ts +0 -98
- package/packages/api-types/src/schemas/session.test.ts +0 -104
- package/packages/api-types/src/schemas/session.ts +0 -40
- package/packages/api-types/src/schemas/task.test.ts +0 -192
- package/packages/api-types/src/schemas/task.ts +0 -78
- package/packages/api-types/tsconfig.json +0 -19
- package/packages/api-types/vitest.config.ts +0 -9
- package/packages/benchmark/README.md +0 -200
- package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
- package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
- package/packages/benchmark/datasets/quick-test.yaml +0 -20
- package/packages/benchmark/dist/benchmark.d.ts +0 -47
- package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
- package/packages/benchmark/dist/benchmark.js +0 -224
- package/packages/benchmark/dist/benchmark.js.map +0 -1
- package/packages/benchmark/dist/cli.d.ts +0 -8
- package/packages/benchmark/dist/cli.d.ts.map +0 -1
- package/packages/benchmark/dist/cli.js +0 -185
- package/packages/benchmark/dist/cli.js.map +0 -1
- package/packages/benchmark/dist/harbor.d.ts +0 -53
- package/packages/benchmark/dist/harbor.d.ts.map +0 -1
- package/packages/benchmark/dist/harbor.js +0 -127
- package/packages/benchmark/dist/harbor.js.map +0 -1
- package/packages/benchmark/dist/index.d.ts +0 -48
- package/packages/benchmark/dist/index.d.ts.map +0 -1
- package/packages/benchmark/dist/index.js +0 -50
- package/packages/benchmark/dist/index.js.map +0 -1
- package/packages/benchmark/dist/runners/base.d.ts +0 -63
- package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/base.js +0 -156
- package/packages/benchmark/dist/runners/base.js.map +0 -1
- package/packages/benchmark/dist/runners/index.d.ts +0 -10
- package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/index.js +0 -10
- package/packages/benchmark/dist/runners/index.js.map +0 -1
- package/packages/benchmark/dist/runners/single.d.ts +0 -19
- package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/single.js +0 -111
- package/packages/benchmark/dist/runners/single.js.map +0 -1
- package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
- package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/subagent.js +0 -212
- package/packages/benchmark/dist/runners/subagent.js.map +0 -1
- package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
- package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/swarm.js +0 -273
- package/packages/benchmark/dist/runners/swarm.js.map +0 -1
- package/packages/benchmark/dist/types.d.ts +0 -178
- package/packages/benchmark/dist/types.d.ts.map +0 -1
- package/packages/benchmark/dist/types.js +0 -16
- package/packages/benchmark/dist/types.js.map +0 -1
- package/packages/benchmark/package.json +0 -80
- package/packages/benchmark/src/benchmark.ts +0 -298
- package/packages/benchmark/src/cli.ts +0 -240
- package/packages/benchmark/src/harbor.ts +0 -170
- package/packages/benchmark/src/index.ts +0 -73
- package/packages/benchmark/src/runners/base.ts +0 -205
- package/packages/benchmark/src/runners/index.ts +0 -10
- package/packages/benchmark/src/runners/single.ts +0 -121
- package/packages/benchmark/src/runners/subagent.ts +0 -240
- package/packages/benchmark/src/runners/swarm.ts +0 -326
- package/packages/benchmark/src/types.ts +0 -205
- package/packages/benchmark/tsconfig.json +0 -20
- package/packages/cli-tester/README.md +0 -277
- package/packages/cli-tester/dist/index.d.ts +0 -21
- package/packages/cli-tester/dist/index.d.ts.map +0 -1
- package/packages/cli-tester/dist/index.js +0 -21
- package/packages/cli-tester/dist/index.js.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
- package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.js +0 -230
- package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
- package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.js +0 -153
- package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
- package/packages/cli-tester/docker/Dockerfile +0 -61
- package/packages/cli-tester/docker/docker-compose.yml +0 -71
- package/packages/cli-tester/docker/entrypoint.sh +0 -58
- package/packages/cli-tester/package.json +0 -32
- package/packages/cli-tester/scripts/clear-auth.sh +0 -101
- package/packages/cli-tester/scripts/inject-message.sh +0 -42
- package/packages/cli-tester/scripts/start.sh +0 -71
- package/packages/cli-tester/scripts/test-cli.sh +0 -56
- package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
- package/packages/cli-tester/scripts/test-registration.sh +0 -182
- package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
- package/packages/cli-tester/scripts/test-spawn.sh +0 -140
- package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
- package/packages/cli-tester/scripts/verify-auth.sh +0 -112
- package/packages/cli-tester/src/index.ts +0 -40
- package/packages/cli-tester/src/utils/credential-check.ts +0 -284
- package/packages/cli-tester/src/utils/socket-client.ts +0 -211
- package/packages/cli-tester/tests/credential-check.test.ts +0 -56
- package/packages/cli-tester/tsconfig.json +0 -11
- package/packages/sdk/dist/browser-client.d.ts +0 -212
- package/packages/sdk/dist/browser-client.d.ts.map +0 -1
- package/packages/sdk/dist/browser-client.js +0 -750
- package/packages/sdk/dist/browser-client.js.map +0 -1
- package/packages/sdk/dist/browser-framing.d.ts +0 -46
- package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
- package/packages/sdk/dist/browser-framing.js +0 -122
- package/packages/sdk/dist/browser-framing.js.map +0 -1
- package/packages/sdk/dist/standalone.d.ts +0 -89
- package/packages/sdk/dist/standalone.d.ts.map +0 -1
- package/packages/sdk/dist/standalone.js +0 -131
- package/packages/sdk/dist/standalone.js.map +0 -1
- package/packages/sdk/dist/transports/index.d.ts +0 -92
- package/packages/sdk/dist/transports/index.d.ts.map +0 -1
- package/packages/sdk/dist/transports/index.js +0 -129
- package/packages/sdk/dist/transports/index.js.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
- package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.js +0 -94
- package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
- package/packages/sdk/dist/transports/types.d.ts +0 -69
- package/packages/sdk/dist/transports/types.d.ts.map +0 -1
- package/packages/sdk/dist/transports/types.js +0 -10
- package/packages/sdk/dist/transports/types.js.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
- package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.js +0 -180
- package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
- package/packages/sdk/src/browser-client.ts +0 -985
- package/packages/sdk/src/browser-framing.test.ts +0 -115
- package/packages/sdk/src/browser-framing.ts +0 -150
- package/packages/sdk/src/standalone.ts +0 -183
- package/packages/sdk/src/transports/index.ts +0 -197
- package/packages/sdk/src/transports/socket-transport.ts +0 -115
- package/packages/sdk/src/transports/types.ts +0 -77
- package/packages/sdk/src/transports/websocket-transport.ts +0 -245
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Comparison Benchmark
|
|
3
|
-
*
|
|
4
|
-
* Main orchestrator for running comparison benchmarks across configurations.
|
|
5
|
-
*/
|
|
6
|
-
import { DEFAULT_BENCHMARK_CONFIG } from './types.js';
|
|
7
|
-
import { SingleAgentRunner, SubAgentRunner, SwarmRunner, } from './runners/index.js';
|
|
8
|
-
/**
|
|
9
|
-
* Main benchmark orchestrator
|
|
10
|
-
*/
|
|
11
|
-
export class ComparisonBenchmark {
|
|
12
|
-
config;
|
|
13
|
-
runners;
|
|
14
|
-
constructor(config = {}) {
|
|
15
|
-
this.config = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
|
|
16
|
-
// Initialize runners for configured configurations
|
|
17
|
-
this.runners = new Map();
|
|
18
|
-
for (const configType of this.config.configurations) {
|
|
19
|
-
this.runners.set(configType, this.createRunner(configType));
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* Create a runner for a configuration type
|
|
24
|
-
*/
|
|
25
|
-
createRunner(type) {
|
|
26
|
-
switch (type) {
|
|
27
|
-
case 'single':
|
|
28
|
-
return new SingleAgentRunner(this.config);
|
|
29
|
-
case 'subagent':
|
|
30
|
-
return new SubAgentRunner(this.config);
|
|
31
|
-
case 'swarm':
|
|
32
|
-
return new SwarmRunner(this.config);
|
|
33
|
-
default:
|
|
34
|
-
throw new Error(`Unknown configuration type: ${type}`);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
/**
|
|
38
|
-
* Run a comparison across all configured configurations
|
|
39
|
-
*/
|
|
40
|
-
async runComparison(task) {
|
|
41
|
-
const results = new Map();
|
|
42
|
-
const scores = new Map();
|
|
43
|
-
for (const [configType, runner] of this.runners) {
|
|
44
|
-
if (!this.config.quiet) {
|
|
45
|
-
console.log(`\n=== Running ${configType} configuration ===`);
|
|
46
|
-
console.log(`Task: ${task.id}`);
|
|
47
|
-
}
|
|
48
|
-
try {
|
|
49
|
-
await runner.setup();
|
|
50
|
-
const result = await runner.run(task);
|
|
51
|
-
await runner.teardown();
|
|
52
|
-
results.set(configType, result);
|
|
53
|
-
scores.set(configType, this.calculateScore(result));
|
|
54
|
-
if (!this.config.quiet) {
|
|
55
|
-
this.printRunResult(result);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
catch (err) {
|
|
59
|
-
console.error(`Error running ${configType}:`, err.message);
|
|
60
|
-
// Create failed result
|
|
61
|
-
const failedResult = {
|
|
62
|
-
taskId: task.id,
|
|
63
|
-
configuration: configType,
|
|
64
|
-
totalTimeMs: 0,
|
|
65
|
-
timeToFirstActionMs: 0,
|
|
66
|
-
messageCount: 0,
|
|
67
|
-
avgLatencyMs: 0,
|
|
68
|
-
latencyP50Ms: 0,
|
|
69
|
-
latencyP99Ms: 0,
|
|
70
|
-
coordinationRounds: 0,
|
|
71
|
-
agentCount: 0,
|
|
72
|
-
totalTokensUsed: 0,
|
|
73
|
-
peakMemoryMb: 0,
|
|
74
|
-
success: false,
|
|
75
|
-
completionRate: 0,
|
|
76
|
-
errors: [err.message],
|
|
77
|
-
startedAt: Date.now(),
|
|
78
|
-
completedAt: Date.now(),
|
|
79
|
-
};
|
|
80
|
-
results.set(configType, failedResult);
|
|
81
|
-
scores.set(configType, { total: 0, successScore: 0, timeScore: 0, efficiencyScore: 0 });
|
|
82
|
-
}
|
|
83
|
-
// Cool-down between runs
|
|
84
|
-
if (this.config.cooldownMs > 0) {
|
|
85
|
-
await new Promise((r) => setTimeout(r, this.config.cooldownMs));
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
const winner = this.determineWinner(results, scores);
|
|
89
|
-
return {
|
|
90
|
-
taskId: task.id,
|
|
91
|
-
results,
|
|
92
|
-
winner,
|
|
93
|
-
scores,
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
/**
|
|
97
|
-
* Run a single configuration
|
|
98
|
-
*/
|
|
99
|
-
async runSingle(task, configType) {
|
|
100
|
-
const runner = this.runners.get(configType);
|
|
101
|
-
if (!runner) {
|
|
102
|
-
throw new Error(`Configuration ${configType} not enabled`);
|
|
103
|
-
}
|
|
104
|
-
await runner.setup();
|
|
105
|
-
const result = await runner.run(task);
|
|
106
|
-
await runner.teardown();
|
|
107
|
-
return result;
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* Calculate score breakdown for a result
|
|
111
|
-
*/
|
|
112
|
-
calculateScore(result) {
|
|
113
|
-
const maxTimeMs = 300000; // 5 minutes baseline
|
|
114
|
-
// Success component (0-50 points)
|
|
115
|
-
const successScore = result.success ? 50 : result.completionRate * 25;
|
|
116
|
-
// Time component (0-30 points) - faster is better
|
|
117
|
-
const timeScore = result.success
|
|
118
|
-
? 30 * Math.max(0, 1 - result.totalTimeMs / maxTimeMs)
|
|
119
|
-
: 0;
|
|
120
|
-
// Efficiency component (0-20 points) - fewer agents is better for same result
|
|
121
|
-
const efficiencyScore = result.success
|
|
122
|
-
? 20 / Math.max(1, result.agentCount)
|
|
123
|
-
: 0;
|
|
124
|
-
return {
|
|
125
|
-
total: successScore + timeScore + efficiencyScore,
|
|
126
|
-
successScore,
|
|
127
|
-
timeScore,
|
|
128
|
-
efficiencyScore,
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
|
-
/**
|
|
132
|
-
* Determine the winning configuration
|
|
133
|
-
*/
|
|
134
|
-
determineWinner(results, scores) {
|
|
135
|
-
let best = 'single';
|
|
136
|
-
let bestScore = -1;
|
|
137
|
-
for (const [configType, score] of scores) {
|
|
138
|
-
if (score.total > bestScore) {
|
|
139
|
-
bestScore = score.total;
|
|
140
|
-
best = configType;
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
return best;
|
|
144
|
-
}
|
|
145
|
-
/**
|
|
146
|
-
* Print a single run result
|
|
147
|
-
*/
|
|
148
|
-
printRunResult(result) {
|
|
149
|
-
console.log(`\nResult for ${result.configuration}:`);
|
|
150
|
-
console.log(` Success: ${result.success ? '✓' : '✗'}`);
|
|
151
|
-
console.log(` Time: ${(result.totalTimeMs / 1000).toFixed(1)}s`);
|
|
152
|
-
console.log(` Agents: ${result.agentCount}`);
|
|
153
|
-
console.log(` Messages: ${result.messageCount}`);
|
|
154
|
-
if (result.errors.length > 0) {
|
|
155
|
-
console.log(` Errors: ${result.errors.join(', ')}`);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
/**
|
|
159
|
-
* Print comparison table
|
|
160
|
-
*/
|
|
161
|
-
printComparison(comparison) {
|
|
162
|
-
console.log('\n' + '='.repeat(60));
|
|
163
|
-
console.log('COMPARISON RESULTS');
|
|
164
|
-
console.log('='.repeat(60));
|
|
165
|
-
console.log(`Task: ${comparison.taskId}`);
|
|
166
|
-
console.log(`Winner: ${comparison.winner.toUpperCase()}`);
|
|
167
|
-
console.log('');
|
|
168
|
-
// Build table data
|
|
169
|
-
const configs = Array.from(comparison.results.keys());
|
|
170
|
-
const headers = ['Metric', ...configs.map((c) => c.charAt(0).toUpperCase() + c.slice(1))];
|
|
171
|
-
const rows = [
|
|
172
|
-
[
|
|
173
|
-
'Success',
|
|
174
|
-
...configs.map((c) => comparison.results.get(c)?.success ? '✓' : '✗'),
|
|
175
|
-
],
|
|
176
|
-
[
|
|
177
|
-
'Time (s)',
|
|
178
|
-
...configs.map((c) => ((comparison.results.get(c)?.totalTimeMs || 0) / 1000).toFixed(1)),
|
|
179
|
-
],
|
|
180
|
-
[
|
|
181
|
-
'Agents',
|
|
182
|
-
...configs.map((c) => String(comparison.results.get(c)?.agentCount || 0)),
|
|
183
|
-
],
|
|
184
|
-
[
|
|
185
|
-
'Messages',
|
|
186
|
-
...configs.map((c) => String(comparison.results.get(c)?.messageCount || 0)),
|
|
187
|
-
],
|
|
188
|
-
[
|
|
189
|
-
'Avg Latency (ms)',
|
|
190
|
-
...configs.map((c) => (comparison.results.get(c)?.avgLatencyMs || 0).toFixed(0)),
|
|
191
|
-
],
|
|
192
|
-
[
|
|
193
|
-
'Completion %',
|
|
194
|
-
...configs.map((c) => ((comparison.results.get(c)?.completionRate || 0) * 100).toFixed(0) + '%'),
|
|
195
|
-
],
|
|
196
|
-
[
|
|
197
|
-
'Score',
|
|
198
|
-
...configs.map((c) => (comparison.scores.get(c)?.total || 0).toFixed(1)),
|
|
199
|
-
],
|
|
200
|
-
];
|
|
201
|
-
// Print table
|
|
202
|
-
const colWidths = headers.map((h, i) => Math.max(h.length, ...rows.map((r) => String(r[i]).length)));
|
|
203
|
-
const separator = colWidths.map((w) => '-'.repeat(w + 2)).join('+');
|
|
204
|
-
console.log(separator);
|
|
205
|
-
console.log('|' +
|
|
206
|
-
headers.map((h, i) => ` ${h.padEnd(colWidths[i])} `).join('|') +
|
|
207
|
-
'|');
|
|
208
|
-
console.log(separator);
|
|
209
|
-
for (const row of rows) {
|
|
210
|
-
console.log('|' +
|
|
211
|
-
row.map((cell, i) => ` ${String(cell).padEnd(colWidths[i])} `).join('|') +
|
|
212
|
-
'|');
|
|
213
|
-
}
|
|
214
|
-
console.log(separator);
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
/**
|
|
218
|
-
* Quick helper to run a comparison benchmark
|
|
219
|
-
*/
|
|
220
|
-
export async function runComparison(task, config) {
|
|
221
|
-
const benchmark = new ComparisonBenchmark(config);
|
|
222
|
-
return benchmark.runComparison(task);
|
|
223
|
-
}
|
|
224
|
-
//# sourceMappingURL=benchmark.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAUH,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AACtD,OAAO,EAEL,iBAAiB,EACjB,cAAc,EACd,WAAW,GACZ,MAAM,oBAAoB,CAAC;AAE5B;;GAEG;AACH,MAAM,OAAO,mBAAmB;IACtB,MAAM,CAAkB;IACxB,OAAO,CAA8C;IAE7D,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,wBAAwB,EAAE,GAAG,MAAM,EAAE,CAAC;QAEzD,mDAAmD;QACnD,IAAI,CAAC,OAAO,GAAG,IAAI,GAAG,EAAE,CAAC;QACzB,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;YACpD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAuB;QAC1C,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,QAAQ;gBACX,OAAO,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5C,KAAK,UAAU;gBACb,OAAO,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzC,KAAK,OAAO;gBACV,OAAO,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACtC;gBACE,MAAM,IAAI,KAAK,CAAC,+BAA+B,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CAAC,IAAU;QAC5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAgC,CAAC;QACxD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqC,CAAC;QAE5D,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAChD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;gBACvB,OAAO,CAAC,GAAG,CAAC,iBAAiB,UAAU,oBAAoB,CAAC,CAAC;gBAC7D,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YAClC,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;gBACrB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBACtC,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAExB,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;gBAChC,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;gBAEpD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;oBACvB,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,iBAAiB,UAAU,GAAG,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;gBAEtE,uBAAuB;gBACvB,MAAM,YAAY,GAAc;oBAC9B,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,aAAa,EAAE,UAAU;oBACzB,WAAW,EAAE,CAAC;oBACd,mBAAmB,EAAE,CAAC;oBACtB,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,YAAY,EAAE,CAAC;oBACf,kBAAkB,EAAE,CAAC;oBACrB,UAAU,EAAE,CAAC;oBACb,eAAe,EAAE,CAAC;oBAClB,YAAY,EAAE,CAAC;oBACf,OAAO,EAAE,KAAK;oBACd,cAAc,EAAE,CAAC;oBACjB,MAAM,EAAE,CAAE,GAAa,CAAC,OAAO,CAAC;oBAChC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;oBACrB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;iBACxB,CAAC;gBACF,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC;gBACtC,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC,CAAC;YAC1F,CAAC;YAED,yBAAyB;YACzB,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAErD,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,OAAO;YACP,MAAM;YACN,MAAM;SACP,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CACb,IAAU,EACV,UAA6B;QAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,iBAAiB,UAAU,cAAc,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;QAExB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,MAAiB;QACtC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,qBAAqB;QAE/C,kCAAkC;QAClC,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,GAAG,EAAE,CAAC;QAEtE,kDAAkD;QAClD,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO;YAC9B,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,WAAW,GAAG,SAAS,CAAC;YACtD,CAAC,CAAC,CAAC,CAAC;QAEN,8EAA8E;QAC9E,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO;YACpC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;YACrC,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,KAAK,EAAE,YAAY,GAAG,SAAS,GAAG,eAAe;YACjD,YAAY;YACZ,SAAS;YACT,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAA0C,EAC1C,MAA8C;QAE9C,IAAI,IAAI,GAAsB,QAAQ,CAAC;QACvC,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;QAEnB,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;YACzC,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBAC5B,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC;gBACxB,IAAI,GAAG,UAAU,CAAC;YACpB,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,MAAiB;QACtC,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,aAAa,GAAG,CAAC,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QACxD,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,aAAa,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,UAA4B;QAC1C,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,WAAW,UAAU,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAEhB,mBAAmB;QACnB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QACtD,MAAM,OAAO,GAAG,CAAC,QAAQ,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1F,MAAM,IAAI,GAAG;YACX;gBACE,SAAS;gBACT,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAC/C;aACF;YACD;gBACE,UAAU;gBACV,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAClE;aACF;YACD;gBACE,QAAQ;gBACR,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,IAAI,CAAC,CAAC,CACnD;aACF;YACD;gBACE,UAAU;gBACV,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CACrD;aACF;YACD;gBACE,kBAAkB;gBAClB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAC1D;aACF;YACD;gBACE,cAAc;gBACd,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,cAAc,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAC1E;aACF;YACD;gBACE,OAAO;gBACP,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACnB,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAClD;aACF;SACF,CAAC;QAEF,cAAc;QACd,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACrC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC5D,CAAC;QAEF,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEpE,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvB,OAAO,CAAC,GAAG,CACT,GAAG;YACD,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YAC9D,GAAG,CACN,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEvB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CACT,GAAG;gBACD,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBACxE,GAAG,CACN,CAAC;QACJ,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IACzB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAAU,EACV,MAAiC;IAEjC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC,MAAM,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;AACvC,CAAC"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG"}
|
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Benchmark CLI
|
|
4
|
-
*
|
|
5
|
-
* Command-line interface for running agent swarm benchmarks.
|
|
6
|
-
*/
|
|
7
|
-
import { Command } from 'commander';
|
|
8
|
-
import { readFileSync } from 'node:fs';
|
|
9
|
-
import { parse as parseYaml } from 'yaml';
|
|
10
|
-
import { ComparisonBenchmark } from './benchmark.js';
|
|
11
|
-
const program = new Command();
|
|
12
|
-
program
|
|
13
|
-
.name('relay-benchmark')
|
|
14
|
-
.description('Benchmark agent swarms, sub-agents, and single agents')
|
|
15
|
-
.version('1.0.0');
|
|
16
|
-
program
|
|
17
|
-
.command('run')
|
|
18
|
-
.description('Run a benchmark comparison')
|
|
19
|
-
.option('-d, --dataset <path>', 'Path to task dataset (YAML or JSON)')
|
|
20
|
-
.option('-t, --task <id>', 'Run only a specific task by ID')
|
|
21
|
-
.option('-c, --config <types>', 'Configurations to run (single,subagent,swarm,all)', 'all')
|
|
22
|
-
.option('--cli <name>', 'CLI to use for agents', 'claude')
|
|
23
|
-
.option('--cwd <path>', 'Working directory for tasks')
|
|
24
|
-
.option('-q, --quiet', 'Suppress output', false)
|
|
25
|
-
.option('--cooldown <ms>', 'Cooldown between runs in ms', '5000')
|
|
26
|
-
.option('--max-swarm <n>', 'Maximum swarm size', '10')
|
|
27
|
-
.option('-o, --output <path>', 'Output results to JSON file')
|
|
28
|
-
.action(async (options) => {
|
|
29
|
-
try {
|
|
30
|
-
await runBenchmark(options);
|
|
31
|
-
}
|
|
32
|
-
catch (err) {
|
|
33
|
-
console.error('Error:', err.message);
|
|
34
|
-
process.exit(1);
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
program
|
|
38
|
-
.command('list')
|
|
39
|
-
.description('List tasks in a dataset')
|
|
40
|
-
.argument('<dataset>', 'Path to task dataset')
|
|
41
|
-
.action((datasetPath) => {
|
|
42
|
-
const dataset = loadDataset(datasetPath);
|
|
43
|
-
console.log(`\nDataset: ${dataset.name || 'Unnamed'}`);
|
|
44
|
-
if (dataset.description) {
|
|
45
|
-
console.log(`Description: ${dataset.description}`);
|
|
46
|
-
}
|
|
47
|
-
console.log(`\nTasks (${dataset.tasks.length}):\n`);
|
|
48
|
-
for (const task of dataset.tasks) {
|
|
49
|
-
console.log(` ${task.id}`);
|
|
50
|
-
console.log(` Complexity: ${task.complexity}`);
|
|
51
|
-
console.log(` Files: ${task.files.length}`);
|
|
52
|
-
console.log(` ${task.description.substring(0, 60)}...`);
|
|
53
|
-
console.log('');
|
|
54
|
-
}
|
|
55
|
-
});
|
|
56
|
-
async function runBenchmark(options) {
|
|
57
|
-
// Parse configurations
|
|
58
|
-
const configurations = parseConfigurations(options.config);
|
|
59
|
-
// Build benchmark config
|
|
60
|
-
const benchmarkConfig = {
|
|
61
|
-
configurations,
|
|
62
|
-
cli: options.cli,
|
|
63
|
-
cwd: options.cwd,
|
|
64
|
-
quiet: options.quiet,
|
|
65
|
-
cooldownMs: parseInt(options.cooldown, 10),
|
|
66
|
-
maxSwarmSize: parseInt(options.maxSwarm, 10),
|
|
67
|
-
};
|
|
68
|
-
const benchmark = new ComparisonBenchmark(benchmarkConfig);
|
|
69
|
-
// Load tasks
|
|
70
|
-
let tasks;
|
|
71
|
-
if (options.dataset) {
|
|
72
|
-
const dataset = loadDataset(options.dataset);
|
|
73
|
-
tasks = dataset.tasks;
|
|
74
|
-
if (options.task) {
|
|
75
|
-
tasks = tasks.filter((t) => t.id === options.task);
|
|
76
|
-
if (tasks.length === 0) {
|
|
77
|
-
throw new Error(`Task not found: ${options.task}`);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
else if (options.task) {
|
|
82
|
-
// Create a simple task from command line
|
|
83
|
-
tasks = [
|
|
84
|
-
{
|
|
85
|
-
id: options.task,
|
|
86
|
-
description: options.task,
|
|
87
|
-
files: [],
|
|
88
|
-
expectedOutcome: 'Task completed',
|
|
89
|
-
complexity: 'medium',
|
|
90
|
-
},
|
|
91
|
-
];
|
|
92
|
-
}
|
|
93
|
-
else {
|
|
94
|
-
throw new Error('Either --dataset or --task is required');
|
|
95
|
-
}
|
|
96
|
-
// Run benchmarks
|
|
97
|
-
const results = [];
|
|
98
|
-
for (const task of tasks) {
|
|
99
|
-
if (!options.quiet) {
|
|
100
|
-
console.log(`\n${'='.repeat(60)}`);
|
|
101
|
-
console.log(`Running task: ${task.id}`);
|
|
102
|
-
console.log('='.repeat(60));
|
|
103
|
-
}
|
|
104
|
-
const comparison = await benchmark.runComparison(task);
|
|
105
|
-
if (!options.quiet) {
|
|
106
|
-
benchmark.printComparison(comparison);
|
|
107
|
-
}
|
|
108
|
-
results.push({
|
|
109
|
-
taskId: task.id,
|
|
110
|
-
winner: comparison.winner,
|
|
111
|
-
results: Object.fromEntries(comparison.results),
|
|
112
|
-
scores: Object.fromEntries(comparison.scores),
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
// Output results
|
|
116
|
-
if (options.output) {
|
|
117
|
-
const { writeFileSync } = await import('node:fs');
|
|
118
|
-
writeFileSync(options.output, JSON.stringify(results, null, 2));
|
|
119
|
-
console.log(`\nResults written to: ${options.output}`);
|
|
120
|
-
}
|
|
121
|
-
// Print summary
|
|
122
|
-
if (!options.quiet && results.length > 1) {
|
|
123
|
-
printSummary(results);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
function parseConfigurations(config) {
|
|
127
|
-
if (config === 'all') {
|
|
128
|
-
return ['single', 'subagent', 'swarm'];
|
|
129
|
-
}
|
|
130
|
-
const configs = config.split(',').map((c) => c.trim());
|
|
131
|
-
const valid = ['single', 'subagent', 'swarm'];
|
|
132
|
-
for (const c of configs) {
|
|
133
|
-
if (!valid.includes(c)) {
|
|
134
|
-
throw new Error(`Invalid configuration: ${c}. Valid: ${valid.join(', ')}`);
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
return configs;
|
|
138
|
-
}
|
|
139
|
-
function loadDataset(path) {
|
|
140
|
-
const content = readFileSync(path, 'utf-8');
|
|
141
|
-
let data;
|
|
142
|
-
if (path.endsWith('.yaml') || path.endsWith('.yml')) {
|
|
143
|
-
data = parseYaml(content);
|
|
144
|
-
}
|
|
145
|
-
else {
|
|
146
|
-
data = JSON.parse(content);
|
|
147
|
-
}
|
|
148
|
-
// Validate and normalize tasks
|
|
149
|
-
if (!data.tasks || !Array.isArray(data.tasks)) {
|
|
150
|
-
throw new Error('Dataset must have a "tasks" array');
|
|
151
|
-
}
|
|
152
|
-
data.tasks = data.tasks.map((t, i) => ({
|
|
153
|
-
id: t.id || `task-${i}`,
|
|
154
|
-
description: t.description || '',
|
|
155
|
-
files: t.files || [],
|
|
156
|
-
expectedOutcome: t.expectedOutcome || t.success_criteria || 'Completed',
|
|
157
|
-
complexity: t.complexity || 'medium',
|
|
158
|
-
timeoutMs: t.timeoutMs || 300000,
|
|
159
|
-
tags: t.tags || [],
|
|
160
|
-
}));
|
|
161
|
-
return data;
|
|
162
|
-
}
|
|
163
|
-
function printSummary(results) {
|
|
164
|
-
console.log('\n' + '='.repeat(60));
|
|
165
|
-
console.log('BENCHMARK SUMMARY');
|
|
166
|
-
console.log('='.repeat(60));
|
|
167
|
-
const wins = {
|
|
168
|
-
single: 0,
|
|
169
|
-
subagent: 0,
|
|
170
|
-
swarm: 0,
|
|
171
|
-
};
|
|
172
|
-
for (const result of results) {
|
|
173
|
-
wins[result.winner]++;
|
|
174
|
-
}
|
|
175
|
-
console.log('\nWins by configuration:');
|
|
176
|
-
for (const [config, count] of Object.entries(wins)) {
|
|
177
|
-
const bar = '█'.repeat(count) + '░'.repeat(results.length - count);
|
|
178
|
-
console.log(` ${config.padEnd(10)} ${bar} ${count}/${results.length}`);
|
|
179
|
-
}
|
|
180
|
-
const overallWinner = Object.entries(wins)
|
|
181
|
-
.sort((a, b) => b[1] - a[1])[0][0];
|
|
182
|
-
console.log(`\nOverall winner: ${overallWinner.toUpperCase()}`);
|
|
183
|
-
}
|
|
184
|
-
program.parse();
|
|
185
|
-
//# sourceMappingURL=cli.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAQrD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,iBAAiB,CAAC;KACvB,WAAW,CAAC,uDAAuD,CAAC;KACpE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CAAC,4BAA4B,CAAC;KACzC,MAAM,CAAC,sBAAsB,EAAE,qCAAqC,CAAC;KACrE,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,CAAC;KAC3D,MAAM,CACL,sBAAsB,EACtB,mDAAmD,EACnD,KAAK,CACN;KACA,MAAM,CAAC,cAAc,EAAE,uBAAuB,EAAE,QAAQ,CAAC;KACzD,MAAM,CAAC,cAAc,EAAE,6BAA6B,CAAC;KACrD,MAAM,CAAC,aAAa,EAAE,iBAAiB,EAAE,KAAK,CAAC;KAC/C,MAAM,CAAC,iBAAiB,EAAE,6BAA6B,EAAE,MAAM,CAAC;KAChE,MAAM,CAAC,iBAAiB,EAAE,oBAAoB,EAAE,IAAI,CAAC;KACrD,MAAM,CAAC,qBAAqB,EAAE,6BAA6B,CAAC;KAC5D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,IAAI,CAAC;QACH,MAAM,YAAY,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,yBAAyB,CAAC;KACtC,QAAQ,CAAC,WAAW,EAAE,sBAAsB,CAAC;KAC7C,MAAM,CAAC,CAAC,WAAW,EAAE,EAAE;IACtB,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IACzC,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,IAAI,IAAI,SAAS,EAAE,CAAC,CAAC;IACvD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,KAAK,CAAC,MAAM,MAAM,CAAC,CAAC;IAEpD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;QAC3D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,KAAK,UAAU,YAAY,CAAC,OAU3B;IACC,uBAAuB;IACvB,MAAM,cAAc,GAAG,mBAAmB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAE3D,yBAAyB;IACzB,MAAM,eAAe,GAA6B;QAChD,cAAc;QACd,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,UAAU,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1C,YAAY,EAAE,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;KAC7C,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC,eAAe,CAAC,CAAC;IAE3D,aAAa;IACb,IAAI,KAAa,CAAC;IAClB,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC7C,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAEtB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;YACnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,mBAAmB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACxB,yCAAyC;QACzC,KAAK,GAAG;YACN;gBACE,EAAE,EAAE,OAAO,CAAC,IAAI;gBAChB,WAAW,EAAE,OAAO,CAAC,IAAI;gBACzB,KAAK,EAAE,EAAE;gBACT,eAAe,EAAE,gBAAgB;gBACjC,UAAU,EAAE,QAAQ;aACrB;SACF,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC9B,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAEvD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACnB,SAAS,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QACxC,CAAC;QAED,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAAC;YAC/C,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC;SAC9C,CAAC,CAAC;IACL,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAClD,aAAa,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,CAAC,yBAAyB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,gBAAgB;IAChB,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzC,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,MAAc;IACzC,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;QACrB,OAAO,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAwB,CAAC;IAC9E,MAAM,KAAK,GAAwB,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;IAEnE,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,YAAY,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAE5C,IAAI,IAAiB,CAAC;IACtB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,IAAI,GAAG,SAAS,CAAC,OAAO,CAAgB,CAAC;IAC3C,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAgB,CAAC;IAC5C,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACrC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,QAAQ,CAAC,EAAE;QACvB,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,EAAE;QAChC,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;QACpB,eAAe,EAAE,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,gBAAgB,IAAI,WAAW;QACvE,UAAU,EAAE,CAAC,CAAC,UAAU,IAAI,QAAQ;QACpC,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,MAAM;QAChC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE;KACnB,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,YAAY,CACnB,OAKE;IAEF,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5B,MAAM,IAAI,GAAsC;QAC9C,MAAM,EAAE,CAAC;QACT,QAAQ,EAAE,CAAC;QACX,KAAK,EAAE,CAAC;KACT,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IACxB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;IACxC,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACnD,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;QACnE,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,GAAG,IAAI,KAAK,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,aAAa,GAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAmC;SAC1E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,OAAO,CAAC,GAAG,CAAC,qBAAqB,aAAa,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Harbor Integration
|
|
3
|
-
*
|
|
4
|
-
* Entry points for Harbor benchmark framework integration.
|
|
5
|
-
* https://github.com/laude-institute/harbor
|
|
6
|
-
*/
|
|
7
|
-
import type { ConfigurationType, HarborTaskInput, HarborEvaluationOutput, BenchmarkConfig } from './types.js';
|
|
8
|
-
/**
|
|
9
|
-
* Main Harbor evaluation entry point
|
|
10
|
-
*
|
|
11
|
-
* This function is called by Harbor to evaluate a task across all configurations.
|
|
12
|
-
*
|
|
13
|
-
* @example Harbor dataset format:
|
|
14
|
-
* ```yaml
|
|
15
|
-
* tasks:
|
|
16
|
-
* - id: refactor-auth
|
|
17
|
-
* description: "Refactor authentication to use JWT"
|
|
18
|
-
* files:
|
|
19
|
-
* - src/auth/session.ts
|
|
20
|
-
* - src/auth/middleware.ts
|
|
21
|
-
* success_criteria: "All tests pass, JWT tokens used"
|
|
22
|
-
* complexity: medium
|
|
23
|
-
* ```
|
|
24
|
-
*
|
|
25
|
-
* @example Running with Harbor:
|
|
26
|
-
* ```bash
|
|
27
|
-
* harbor run \
|
|
28
|
-
* --dataset tasks.yaml \
|
|
29
|
-
* --agent @agent-relay/benchmark/harbor \
|
|
30
|
-
* --parallel 10
|
|
31
|
-
* ```
|
|
32
|
-
*/
|
|
33
|
-
export declare function evaluate(input: HarborTaskInput): Promise<HarborEvaluationOutput>;
|
|
34
|
-
/**
|
|
35
|
-
* Run a single configuration (for targeted Harbor evaluations)
|
|
36
|
-
*
|
|
37
|
-
* @example Running single config with Harbor:
|
|
38
|
-
* ```bash
|
|
39
|
-
* harbor run \
|
|
40
|
-
* --dataset tasks.yaml \
|
|
41
|
-
* --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
|
|
42
|
-
* --env-var CONFIG=swarm
|
|
43
|
-
* ```
|
|
44
|
-
*/
|
|
45
|
-
export declare function evaluateSingle(input: HarborTaskInput & {
|
|
46
|
-
config?: ConfigurationType;
|
|
47
|
-
}): Promise<Record<string, unknown>>;
|
|
48
|
-
/**
|
|
49
|
-
* Evaluate with custom configuration
|
|
50
|
-
*/
|
|
51
|
-
export declare function evaluateCustom(input: HarborTaskInput, config: Partial<BenchmarkConfig>): Promise<HarborEvaluationOutput>;
|
|
52
|
-
export default evaluate;
|
|
53
|
-
//# sourceMappingURL=harbor.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"harbor.d.ts","sourceRoot":"","sources":["../src/harbor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAGV,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,eAAe,EAChB,MAAM,YAAY,CAAC;AAoBpB;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,sBAAsB,CAAC,CAiCjC;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,eAAe,GAAG;IAAE,MAAM,CAAC,EAAE,iBAAiB,CAAA;CAAE,GACtD,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAmBlC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,eAAe,EACtB,MAAM,EAAE,OAAO,CAAC,eAAe,CAAC,GAC/B,OAAO,CAAC,sBAAsB,CAAC,CA8BjC;AAGD,eAAe,QAAQ,CAAC"}
|
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Harbor Integration
|
|
3
|
-
*
|
|
4
|
-
* Entry points for Harbor benchmark framework integration.
|
|
5
|
-
* https://github.com/laude-institute/harbor
|
|
6
|
-
*/
|
|
7
|
-
import { ComparisonBenchmark } from './benchmark.js';
|
|
8
|
-
const BENCHMARK_VERSION = '1.0.0';
|
|
9
|
-
/**
|
|
10
|
-
* Convert Harbor task input to internal Task format
|
|
11
|
-
*/
|
|
12
|
-
function convertHarborTask(input) {
|
|
13
|
-
return {
|
|
14
|
-
id: input.id,
|
|
15
|
-
description: input.description,
|
|
16
|
-
files: input.files || [],
|
|
17
|
-
expectedOutcome: input.success_criteria || 'Task completed successfully',
|
|
18
|
-
complexity: input.complexity || 'medium',
|
|
19
|
-
timeoutMs: 300000, // 5 minute default
|
|
20
|
-
tags: [],
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Main Harbor evaluation entry point
|
|
25
|
-
*
|
|
26
|
-
* This function is called by Harbor to evaluate a task across all configurations.
|
|
27
|
-
*
|
|
28
|
-
* @example Harbor dataset format:
|
|
29
|
-
* ```yaml
|
|
30
|
-
* tasks:
|
|
31
|
-
* - id: refactor-auth
|
|
32
|
-
* description: "Refactor authentication to use JWT"
|
|
33
|
-
* files:
|
|
34
|
-
* - src/auth/session.ts
|
|
35
|
-
* - src/auth/middleware.ts
|
|
36
|
-
* success_criteria: "All tests pass, JWT tokens used"
|
|
37
|
-
* complexity: medium
|
|
38
|
-
* ```
|
|
39
|
-
*
|
|
40
|
-
* @example Running with Harbor:
|
|
41
|
-
* ```bash
|
|
42
|
-
* harbor run \
|
|
43
|
-
* --dataset tasks.yaml \
|
|
44
|
-
* --agent @agent-relay/benchmark/harbor \
|
|
45
|
-
* --parallel 10
|
|
46
|
-
* ```
|
|
47
|
-
*/
|
|
48
|
-
export async function evaluate(input) {
|
|
49
|
-
const startedAt = Date.now();
|
|
50
|
-
const task = convertHarborTask(input);
|
|
51
|
-
const benchmark = new ComparisonBenchmark({
|
|
52
|
-
configurations: ['single', 'subagent', 'swarm'],
|
|
53
|
-
cli: 'claude',
|
|
54
|
-
quiet: true, // Suppress output in Harbor runs
|
|
55
|
-
cooldownMs: 2000,
|
|
56
|
-
});
|
|
57
|
-
const comparison = await benchmark.runComparison(task);
|
|
58
|
-
const completedAt = Date.now();
|
|
59
|
-
return {
|
|
60
|
-
task_id: task.id,
|
|
61
|
-
configurations: Object.fromEntries(comparison.results),
|
|
62
|
-
winner: comparison.winner,
|
|
63
|
-
scores: Object.fromEntries(comparison.scores),
|
|
64
|
-
metadata: {
|
|
65
|
-
benchmark_version: BENCHMARK_VERSION,
|
|
66
|
-
started_at: startedAt,
|
|
67
|
-
completed_at: completedAt,
|
|
68
|
-
total_duration_ms: completedAt - startedAt,
|
|
69
|
-
},
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Run a single configuration (for targeted Harbor evaluations)
|
|
74
|
-
*
|
|
75
|
-
* @example Running single config with Harbor:
|
|
76
|
-
* ```bash
|
|
77
|
-
* harbor run \
|
|
78
|
-
* --dataset tasks.yaml \
|
|
79
|
-
* --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
|
|
80
|
-
* --env-var CONFIG=swarm
|
|
81
|
-
* ```
|
|
82
|
-
*/
|
|
83
|
-
export async function evaluateSingle(input) {
|
|
84
|
-
const config = input.config || 'single';
|
|
85
|
-
const task = convertHarborTask(input);
|
|
86
|
-
const benchmark = new ComparisonBenchmark({
|
|
87
|
-
configurations: [config],
|
|
88
|
-
cli: 'claude',
|
|
89
|
-
quiet: true,
|
|
90
|
-
cooldownMs: 0,
|
|
91
|
-
});
|
|
92
|
-
const result = await benchmark.runSingle(task, config);
|
|
93
|
-
return {
|
|
94
|
-
task_id: task.id,
|
|
95
|
-
configuration: config,
|
|
96
|
-
result,
|
|
97
|
-
success: result.success,
|
|
98
|
-
};
|
|
99
|
-
}
|
|
100
|
-
/**
|
|
101
|
-
* Evaluate with custom configuration
|
|
102
|
-
*/
|
|
103
|
-
export async function evaluateCustom(input, config) {
|
|
104
|
-
const startedAt = Date.now();
|
|
105
|
-
const task = convertHarborTask(input);
|
|
106
|
-
const benchmark = new ComparisonBenchmark({
|
|
107
|
-
...config,
|
|
108
|
-
quiet: true,
|
|
109
|
-
});
|
|
110
|
-
const comparison = await benchmark.runComparison(task);
|
|
111
|
-
const completedAt = Date.now();
|
|
112
|
-
return {
|
|
113
|
-
task_id: task.id,
|
|
114
|
-
configurations: Object.fromEntries(comparison.results),
|
|
115
|
-
winner: comparison.winner,
|
|
116
|
-
scores: Object.fromEntries(comparison.scores),
|
|
117
|
-
metadata: {
|
|
118
|
-
benchmark_version: BENCHMARK_VERSION,
|
|
119
|
-
started_at: startedAt,
|
|
120
|
-
completed_at: completedAt,
|
|
121
|
-
total_duration_ms: completedAt - startedAt,
|
|
122
|
-
},
|
|
123
|
-
};
|
|
124
|
-
}
|
|
125
|
-
// Default export for Harbor
|
|
126
|
-
export default evaluate;
|
|
127
|
-
//# sourceMappingURL=harbor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"harbor.js","sourceRoot":"","sources":["../src/harbor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAErD,MAAM,iBAAiB,GAAG,OAAO,CAAC;AAElC;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAsB;IAC/C,OAAO;QACL,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,EAAE;QACxB,eAAe,EAAE,KAAK,CAAC,gBAAgB,IAAI,6BAA6B;QACxE,UAAU,EAAG,KAAK,CAAC,UAA6B,IAAI,QAAQ;QAC5D,SAAS,EAAE,MAAM,EAAE,mBAAmB;QACtC,IAAI,EAAE,EAAE;KACT,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,KAAsB;IAEtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,cAAc,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC;QAC/C,GAAG,EAAE,QAAQ;QACb,KAAK,EAAE,IAAI,EAAE,iCAAiC;QAC9C,UAAU,EAAE,IAAI;KACjB,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAEvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE/B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,cAAc,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAGpD;QACD,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAG3C;QACD,QAAQ,EAAE;YACR,iBAAiB,EAAE,iBAAiB;YACpC,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,WAAW;YACzB,iBAAiB,EAAE,WAAW,GAAG,SAAS;SAC3C;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAuD;IAEvD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,QAAQ,CAAC;IACxC,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,cAAc,EAAE,CAAC,MAAM,CAAC;QACxB,GAAG,EAAE,QAAQ;QACb,KAAK,EAAE,IAAI;QACX,UAAU,EAAE,CAAC;KACd,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAEvD,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,aAAa,EAAE,MAAM;QACrB,MAAM;QACN,OAAO,EAAE,MAAM,CAAC,OAAO;KACxB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAsB,EACtB,MAAgC;IAEhC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,SAAS,GAAG,IAAI,mBAAmB,CAAC;QACxC,GAAG,MAAM;QACT,KAAK,EAAE,IAAI;KACZ,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE/B,OAAO;QACL,OAAO,EAAE,IAAI,CAAC,EAAE;QAChB,cAAc,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAGpD;QACD,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAG3C;QACD,QAAQ,EAAE;YACR,iBAAiB,EAAE,iBAAiB;YACpC,UAAU,EAAE,SAAS;YACrB,YAAY,EAAE,WAAW;YACzB,iBAAiB,EAAE,WAAW,GAAG,SAAS;SAC3C;KACF,CAAC;AACJ,CAAC;AAED,4BAA4B;AAC5B,eAAe,QAAQ,CAAC"}
|