agent-relay 2.3.2 → 2.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs +1 -1
- package/dist/src/cli/index.js +124 -7
- package/dist/src/cli/index.js.map +1 -1
- package/package.json +20 -26
- package/packages/acp-bridge/package.json +2 -2
- package/packages/bridge/package.json +7 -7
- package/packages/config/dist/cloud-config.d.ts +1 -1
- package/packages/config/dist/cloud-config.d.ts.map +1 -1
- package/packages/config/dist/cloud-config.js.map +1 -1
- package/packages/config/dist/schemas.d.ts +5 -5
- package/packages/config/dist/schemas.js +1 -1
- package/packages/config/dist/schemas.js.map +1 -1
- package/packages/config/package.json +2 -2
- package/packages/config/src/cloud-config.ts +2 -2
- package/packages/config/src/schemas.test.ts +48 -0
- package/packages/config/src/schemas.ts +1 -1
- package/packages/continuity/package.json +2 -2
- package/packages/daemon/package.json +12 -12
- package/packages/hooks/package.json +4 -4
- package/packages/mcp/package.json +5 -5
- package/packages/memory/package.json +2 -2
- package/packages/policy/package.json +2 -2
- package/packages/protocol/package.json +1 -1
- package/packages/resiliency/package.json +1 -1
- package/packages/sdk/dist/index.d.ts +1 -29
- package/packages/sdk/dist/index.d.ts.map +1 -1
- package/packages/sdk/dist/index.js +1 -38
- package/packages/sdk/dist/index.js.map +1 -1
- package/packages/sdk/package.json +4 -25
- package/packages/sdk/src/index.ts +1 -69
- package/packages/sdk-py/README.md +56 -0
- package/packages/sdk-py/pyproject.toml +23 -0
- package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
- package/packages/sdk-py/src/agent_relay/builder.py +367 -0
- package/packages/sdk-py/src/agent_relay/types.py +92 -0
- package/packages/sdk-py/tests/__init__.py +0 -0
- package/packages/sdk-py/tests/test_builder.py +101 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.d.ts +2 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.d.ts.map +1 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.js +257 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.js.map +1 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.d.ts +2 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.d.ts.map +1 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.js +124 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.js.map +1 -0
- package/packages/sdk-ts/dist/client.d.ts +2 -0
- package/packages/sdk-ts/dist/client.d.ts.map +1 -1
- package/packages/sdk-ts/dist/client.js +2 -0
- package/packages/sdk-ts/dist/client.js.map +1 -1
- package/packages/sdk-ts/dist/index.d.ts +1 -0
- package/packages/sdk-ts/dist/index.d.ts.map +1 -1
- package/packages/sdk-ts/dist/index.js +1 -0
- package/packages/sdk-ts/dist/index.js.map +1 -1
- package/packages/sdk-ts/dist/protocol.d.ts +1 -0
- package/packages/sdk-ts/dist/protocol.d.ts.map +1 -1
- package/packages/sdk-ts/dist/relay.d.ts +44 -0
- package/packages/sdk-ts/dist/relay.d.ts.map +1 -1
- package/packages/sdk-ts/dist/relay.js +89 -11
- package/packages/sdk-ts/dist/relay.js.map +1 -1
- package/packages/sdk-ts/dist/relaycast.js +2 -2
- package/packages/sdk-ts/dist/relaycast.js.map +1 -1
- package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
- package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
- package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.js +179 -0
- package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.js +82 -0
- package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.js +10 -0
- package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.js +24 -0
- package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.js +650 -0
- package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.js +140 -0
- package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.js +395 -0
- package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.js +8 -0
- package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
- package/packages/sdk-ts/package.json +9 -3
- package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
- package/packages/sdk-ts/src/__tests__/facade.test.ts +296 -0
- package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
- package/packages/sdk-ts/src/__tests__/unit.test.ts +152 -0
- package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
- package/packages/sdk-ts/src/client.ts +4 -0
- package/packages/sdk-ts/src/index.ts +1 -0
- package/packages/sdk-ts/src/protocol.ts +1 -1
- package/packages/sdk-ts/src/relay.ts +112 -11
- package/packages/sdk-ts/src/relaycast.ts +2 -2
- package/packages/sdk-ts/src/workflows/README.md +450 -0
- package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
- package/packages/sdk-ts/src/workflows/builder.ts +241 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
- package/packages/sdk-ts/src/workflows/cli.ts +93 -0
- package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
- package/packages/sdk-ts/src/workflows/index.ts +9 -0
- package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
- package/packages/sdk-ts/src/workflows/run.ts +47 -0
- package/packages/sdk-ts/src/workflows/runner.ts +873 -0
- package/packages/sdk-ts/src/workflows/schema.json +321 -0
- package/packages/sdk-ts/src/workflows/state.ts +279 -0
- package/packages/sdk-ts/src/workflows/templates.ts +544 -0
- package/packages/sdk-ts/src/workflows/types.ts +178 -0
- package/packages/sdk-ts/tsconfig.json +6 -1
- package/packages/spawner/package.json +1 -1
- package/packages/state/package.json +1 -1
- package/packages/storage/package.json +2 -2
- package/packages/telemetry/package.json +1 -1
- package/packages/trajectory/package.json +2 -2
- package/packages/user-directory/package.json +2 -2
- package/packages/utils/package.json +3 -3
- package/packages/wrapper/package.json +5 -6
- package/scripts/postinstall.js +106 -2
- package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
- package/packages/api-types/.trajectories/index.json +0 -12
- package/packages/api-types/dist/index.d.ts +0 -21
- package/packages/api-types/dist/index.d.ts.map +0 -1
- package/packages/api-types/dist/index.js +0 -22
- package/packages/api-types/dist/index.js.map +0 -1
- package/packages/api-types/dist/schemas/agent.d.ts +0 -259
- package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/agent.js +0 -102
- package/packages/api-types/dist/schemas/agent.js.map +0 -1
- package/packages/api-types/dist/schemas/api.d.ts +0 -290
- package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/api.js +0 -162
- package/packages/api-types/dist/schemas/api.js.map +0 -1
- package/packages/api-types/dist/schemas/decision.d.ts +0 -230
- package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/decision.js +0 -104
- package/packages/api-types/dist/schemas/decision.js.map +0 -1
- package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
- package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/fleet.js +0 -71
- package/packages/api-types/dist/schemas/fleet.js.map +0 -1
- package/packages/api-types/dist/schemas/history.d.ts +0 -180
- package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/history.js +0 -72
- package/packages/api-types/dist/schemas/history.js.map +0 -1
- package/packages/api-types/dist/schemas/index.d.ts +0 -14
- package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/index.js +0 -22
- package/packages/api-types/dist/schemas/index.js.map +0 -1
- package/packages/api-types/dist/schemas/message.d.ts +0 -456
- package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/message.js +0 -88
- package/packages/api-types/dist/schemas/message.js.map +0 -1
- package/packages/api-types/dist/schemas/session.d.ts +0 -60
- package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/session.js +0 -36
- package/packages/api-types/dist/schemas/session.js.map +0 -1
- package/packages/api-types/dist/schemas/task.d.ts +0 -111
- package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/task.js +0 -64
- package/packages/api-types/dist/schemas/task.js.map +0 -1
- package/packages/api-types/package.json +0 -61
- package/packages/api-types/scripts/generate-openapi.ts +0 -106
- package/packages/api-types/src/index.ts +0 -22
- package/packages/api-types/src/schemas/agent.test.ts +0 -164
- package/packages/api-types/src/schemas/agent.ts +0 -110
- package/packages/api-types/src/schemas/api.test.ts +0 -372
- package/packages/api-types/src/schemas/api.ts +0 -194
- package/packages/api-types/src/schemas/decision.test.ts +0 -324
- package/packages/api-types/src/schemas/decision.ts +0 -136
- package/packages/api-types/src/schemas/fleet.test.ts +0 -212
- package/packages/api-types/src/schemas/fleet.ts +0 -83
- package/packages/api-types/src/schemas/history.test.ts +0 -242
- package/packages/api-types/src/schemas/history.ts +0 -84
- package/packages/api-types/src/schemas/index.ts +0 -148
- package/packages/api-types/src/schemas/message.test.ts +0 -192
- package/packages/api-types/src/schemas/message.ts +0 -98
- package/packages/api-types/src/schemas/session.test.ts +0 -104
- package/packages/api-types/src/schemas/session.ts +0 -40
- package/packages/api-types/src/schemas/task.test.ts +0 -192
- package/packages/api-types/src/schemas/task.ts +0 -78
- package/packages/api-types/tsconfig.json +0 -19
- package/packages/api-types/vitest.config.ts +0 -9
- package/packages/benchmark/README.md +0 -200
- package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
- package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
- package/packages/benchmark/datasets/quick-test.yaml +0 -20
- package/packages/benchmark/dist/benchmark.d.ts +0 -47
- package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
- package/packages/benchmark/dist/benchmark.js +0 -224
- package/packages/benchmark/dist/benchmark.js.map +0 -1
- package/packages/benchmark/dist/cli.d.ts +0 -8
- package/packages/benchmark/dist/cli.d.ts.map +0 -1
- package/packages/benchmark/dist/cli.js +0 -185
- package/packages/benchmark/dist/cli.js.map +0 -1
- package/packages/benchmark/dist/harbor.d.ts +0 -53
- package/packages/benchmark/dist/harbor.d.ts.map +0 -1
- package/packages/benchmark/dist/harbor.js +0 -127
- package/packages/benchmark/dist/harbor.js.map +0 -1
- package/packages/benchmark/dist/index.d.ts +0 -48
- package/packages/benchmark/dist/index.d.ts.map +0 -1
- package/packages/benchmark/dist/index.js +0 -50
- package/packages/benchmark/dist/index.js.map +0 -1
- package/packages/benchmark/dist/runners/base.d.ts +0 -63
- package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/base.js +0 -156
- package/packages/benchmark/dist/runners/base.js.map +0 -1
- package/packages/benchmark/dist/runners/index.d.ts +0 -10
- package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/index.js +0 -10
- package/packages/benchmark/dist/runners/index.js.map +0 -1
- package/packages/benchmark/dist/runners/single.d.ts +0 -19
- package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/single.js +0 -111
- package/packages/benchmark/dist/runners/single.js.map +0 -1
- package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
- package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/subagent.js +0 -212
- package/packages/benchmark/dist/runners/subagent.js.map +0 -1
- package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
- package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/swarm.js +0 -273
- package/packages/benchmark/dist/runners/swarm.js.map +0 -1
- package/packages/benchmark/dist/types.d.ts +0 -178
- package/packages/benchmark/dist/types.d.ts.map +0 -1
- package/packages/benchmark/dist/types.js +0 -16
- package/packages/benchmark/dist/types.js.map +0 -1
- package/packages/benchmark/package.json +0 -80
- package/packages/benchmark/src/benchmark.ts +0 -298
- package/packages/benchmark/src/cli.ts +0 -240
- package/packages/benchmark/src/harbor.ts +0 -170
- package/packages/benchmark/src/index.ts +0 -73
- package/packages/benchmark/src/runners/base.ts +0 -205
- package/packages/benchmark/src/runners/index.ts +0 -10
- package/packages/benchmark/src/runners/single.ts +0 -121
- package/packages/benchmark/src/runners/subagent.ts +0 -240
- package/packages/benchmark/src/runners/swarm.ts +0 -326
- package/packages/benchmark/src/types.ts +0 -205
- package/packages/benchmark/tsconfig.json +0 -20
- package/packages/cli-tester/README.md +0 -277
- package/packages/cli-tester/dist/index.d.ts +0 -21
- package/packages/cli-tester/dist/index.d.ts.map +0 -1
- package/packages/cli-tester/dist/index.js +0 -21
- package/packages/cli-tester/dist/index.js.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
- package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.js +0 -230
- package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
- package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.js +0 -153
- package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
- package/packages/cli-tester/docker/Dockerfile +0 -61
- package/packages/cli-tester/docker/docker-compose.yml +0 -71
- package/packages/cli-tester/docker/entrypoint.sh +0 -58
- package/packages/cli-tester/package.json +0 -32
- package/packages/cli-tester/scripts/clear-auth.sh +0 -101
- package/packages/cli-tester/scripts/inject-message.sh +0 -42
- package/packages/cli-tester/scripts/start.sh +0 -71
- package/packages/cli-tester/scripts/test-cli.sh +0 -56
- package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
- package/packages/cli-tester/scripts/test-registration.sh +0 -182
- package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
- package/packages/cli-tester/scripts/test-spawn.sh +0 -140
- package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
- package/packages/cli-tester/scripts/verify-auth.sh +0 -112
- package/packages/cli-tester/src/index.ts +0 -40
- package/packages/cli-tester/src/utils/credential-check.ts +0 -284
- package/packages/cli-tester/src/utils/socket-client.ts +0 -211
- package/packages/cli-tester/tests/credential-check.test.ts +0 -56
- package/packages/cli-tester/tsconfig.json +0 -11
- package/packages/sdk/dist/browser-client.d.ts +0 -212
- package/packages/sdk/dist/browser-client.d.ts.map +0 -1
- package/packages/sdk/dist/browser-client.js +0 -750
- package/packages/sdk/dist/browser-client.js.map +0 -1
- package/packages/sdk/dist/browser-framing.d.ts +0 -46
- package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
- package/packages/sdk/dist/browser-framing.js +0 -122
- package/packages/sdk/dist/browser-framing.js.map +0 -1
- package/packages/sdk/dist/standalone.d.ts +0 -89
- package/packages/sdk/dist/standalone.d.ts.map +0 -1
- package/packages/sdk/dist/standalone.js +0 -131
- package/packages/sdk/dist/standalone.js.map +0 -1
- package/packages/sdk/dist/transports/index.d.ts +0 -92
- package/packages/sdk/dist/transports/index.d.ts.map +0 -1
- package/packages/sdk/dist/transports/index.js +0 -129
- package/packages/sdk/dist/transports/index.js.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
- package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.js +0 -94
- package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
- package/packages/sdk/dist/transports/types.d.ts +0 -69
- package/packages/sdk/dist/transports/types.d.ts.map +0 -1
- package/packages/sdk/dist/transports/types.js +0 -10
- package/packages/sdk/dist/transports/types.js.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
- package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.js +0 -180
- package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
- package/packages/sdk/src/browser-client.ts +0 -985
- package/packages/sdk/src/browser-framing.test.ts +0 -115
- package/packages/sdk/src/browser-framing.ts +0 -150
- package/packages/sdk/src/standalone.ts +0 -183
- package/packages/sdk/src/transports/index.ts +0 -197
- package/packages/sdk/src/transports/socket-transport.ts +0 -115
- package/packages/sdk/src/transports/types.ts +0 -77
- package/packages/sdk/src/transports/websocket-transport.ts +0 -245
|
@@ -1,240 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Benchmark CLI
|
|
4
|
-
*
|
|
5
|
-
* Command-line interface for running agent swarm benchmarks.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { Command } from 'commander';
|
|
9
|
-
import { readFileSync } from 'node:fs';
|
|
10
|
-
import { parse as parseYaml } from 'yaml';
|
|
11
|
-
import { ComparisonBenchmark } from './benchmark.js';
|
|
12
|
-
import type {
|
|
13
|
-
Task,
|
|
14
|
-
TaskDataset,
|
|
15
|
-
ConfigurationType,
|
|
16
|
-
BenchmarkConfig,
|
|
17
|
-
} from './types.js';
|
|
18
|
-
|
|
19
|
-
const program = new Command();
|
|
20
|
-
|
|
21
|
-
program
|
|
22
|
-
.name('relay-benchmark')
|
|
23
|
-
.description('Benchmark agent swarms, sub-agents, and single agents')
|
|
24
|
-
.version('1.0.0');
|
|
25
|
-
|
|
26
|
-
program
|
|
27
|
-
.command('run')
|
|
28
|
-
.description('Run a benchmark comparison')
|
|
29
|
-
.option('-d, --dataset <path>', 'Path to task dataset (YAML or JSON)')
|
|
30
|
-
.option('-t, --task <id>', 'Run only a specific task by ID')
|
|
31
|
-
.option(
|
|
32
|
-
'-c, --config <types>',
|
|
33
|
-
'Configurations to run (single,subagent,swarm,all)',
|
|
34
|
-
'all'
|
|
35
|
-
)
|
|
36
|
-
.option('--cli <name>', 'CLI to use for agents', 'claude')
|
|
37
|
-
.option('--cwd <path>', 'Working directory for tasks')
|
|
38
|
-
.option('-q, --quiet', 'Suppress output', false)
|
|
39
|
-
.option('--cooldown <ms>', 'Cooldown between runs in ms', '5000')
|
|
40
|
-
.option('--max-swarm <n>', 'Maximum swarm size', '10')
|
|
41
|
-
.option('-o, --output <path>', 'Output results to JSON file')
|
|
42
|
-
.action(async (options) => {
|
|
43
|
-
try {
|
|
44
|
-
await runBenchmark(options);
|
|
45
|
-
} catch (err) {
|
|
46
|
-
console.error('Error:', (err as Error).message);
|
|
47
|
-
process.exit(1);
|
|
48
|
-
}
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
program
|
|
52
|
-
.command('list')
|
|
53
|
-
.description('List tasks in a dataset')
|
|
54
|
-
.argument('<dataset>', 'Path to task dataset')
|
|
55
|
-
.action((datasetPath) => {
|
|
56
|
-
const dataset = loadDataset(datasetPath);
|
|
57
|
-
console.log(`\nDataset: ${dataset.name || 'Unnamed'}`);
|
|
58
|
-
if (dataset.description) {
|
|
59
|
-
console.log(`Description: ${dataset.description}`);
|
|
60
|
-
}
|
|
61
|
-
console.log(`\nTasks (${dataset.tasks.length}):\n`);
|
|
62
|
-
|
|
63
|
-
for (const task of dataset.tasks) {
|
|
64
|
-
console.log(` ${task.id}`);
|
|
65
|
-
console.log(` Complexity: ${task.complexity}`);
|
|
66
|
-
console.log(` Files: ${task.files.length}`);
|
|
67
|
-
console.log(` ${task.description.substring(0, 60)}...`);
|
|
68
|
-
console.log('');
|
|
69
|
-
}
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
async function runBenchmark(options: {
|
|
73
|
-
dataset?: string;
|
|
74
|
-
task?: string;
|
|
75
|
-
config: string;
|
|
76
|
-
cli: string;
|
|
77
|
-
cwd?: string;
|
|
78
|
-
quiet: boolean;
|
|
79
|
-
cooldown: string;
|
|
80
|
-
maxSwarm: string;
|
|
81
|
-
output?: string;
|
|
82
|
-
}): Promise<void> {
|
|
83
|
-
// Parse configurations
|
|
84
|
-
const configurations = parseConfigurations(options.config);
|
|
85
|
-
|
|
86
|
-
// Build benchmark config
|
|
87
|
-
const benchmarkConfig: Partial<BenchmarkConfig> = {
|
|
88
|
-
configurations,
|
|
89
|
-
cli: options.cli,
|
|
90
|
-
cwd: options.cwd,
|
|
91
|
-
quiet: options.quiet,
|
|
92
|
-
cooldownMs: parseInt(options.cooldown, 10),
|
|
93
|
-
maxSwarmSize: parseInt(options.maxSwarm, 10),
|
|
94
|
-
};
|
|
95
|
-
|
|
96
|
-
const benchmark = new ComparisonBenchmark(benchmarkConfig);
|
|
97
|
-
|
|
98
|
-
// Load tasks
|
|
99
|
-
let tasks: Task[];
|
|
100
|
-
if (options.dataset) {
|
|
101
|
-
const dataset = loadDataset(options.dataset);
|
|
102
|
-
tasks = dataset.tasks;
|
|
103
|
-
|
|
104
|
-
if (options.task) {
|
|
105
|
-
tasks = tasks.filter((t) => t.id === options.task);
|
|
106
|
-
if (tasks.length === 0) {
|
|
107
|
-
throw new Error(`Task not found: ${options.task}`);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
} else if (options.task) {
|
|
111
|
-
// Create a simple task from command line
|
|
112
|
-
tasks = [
|
|
113
|
-
{
|
|
114
|
-
id: options.task,
|
|
115
|
-
description: options.task,
|
|
116
|
-
files: [],
|
|
117
|
-
expectedOutcome: 'Task completed',
|
|
118
|
-
complexity: 'medium',
|
|
119
|
-
},
|
|
120
|
-
];
|
|
121
|
-
} else {
|
|
122
|
-
throw new Error('Either --dataset or --task is required');
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// Run benchmarks
|
|
126
|
-
const results = [];
|
|
127
|
-
for (const task of tasks) {
|
|
128
|
-
if (!options.quiet) {
|
|
129
|
-
console.log(`\n${'='.repeat(60)}`);
|
|
130
|
-
console.log(`Running task: ${task.id}`);
|
|
131
|
-
console.log('='.repeat(60));
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const comparison = await benchmark.runComparison(task);
|
|
135
|
-
|
|
136
|
-
if (!options.quiet) {
|
|
137
|
-
benchmark.printComparison(comparison);
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
results.push({
|
|
141
|
-
taskId: task.id,
|
|
142
|
-
winner: comparison.winner,
|
|
143
|
-
results: Object.fromEntries(comparison.results),
|
|
144
|
-
scores: Object.fromEntries(comparison.scores),
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Output results
|
|
149
|
-
if (options.output) {
|
|
150
|
-
const { writeFileSync } = await import('node:fs');
|
|
151
|
-
writeFileSync(options.output, JSON.stringify(results, null, 2));
|
|
152
|
-
console.log(`\nResults written to: ${options.output}`);
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// Print summary
|
|
156
|
-
if (!options.quiet && results.length > 1) {
|
|
157
|
-
printSummary(results);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
function parseConfigurations(config: string): ConfigurationType[] {
|
|
162
|
-
if (config === 'all') {
|
|
163
|
-
return ['single', 'subagent', 'swarm'];
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const configs = config.split(',').map((c) => c.trim()) as ConfigurationType[];
|
|
167
|
-
const valid: ConfigurationType[] = ['single', 'subagent', 'swarm'];
|
|
168
|
-
|
|
169
|
-
for (const c of configs) {
|
|
170
|
-
if (!valid.includes(c)) {
|
|
171
|
-
throw new Error(`Invalid configuration: ${c}. Valid: ${valid.join(', ')}`);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
return configs;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
function loadDataset(path: string): TaskDataset {
|
|
179
|
-
const content = readFileSync(path, 'utf-8');
|
|
180
|
-
|
|
181
|
-
let data: TaskDataset;
|
|
182
|
-
if (path.endsWith('.yaml') || path.endsWith('.yml')) {
|
|
183
|
-
data = parseYaml(content) as TaskDataset;
|
|
184
|
-
} else {
|
|
185
|
-
data = JSON.parse(content) as TaskDataset;
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
// Validate and normalize tasks
|
|
189
|
-
if (!data.tasks || !Array.isArray(data.tasks)) {
|
|
190
|
-
throw new Error('Dataset must have a "tasks" array');
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
data.tasks = data.tasks.map((t, i) => ({
|
|
194
|
-
id: t.id || `task-${i}`,
|
|
195
|
-
description: t.description || '',
|
|
196
|
-
files: t.files || [],
|
|
197
|
-
expectedOutcome: t.expectedOutcome || t.success_criteria || 'Completed',
|
|
198
|
-
complexity: t.complexity || 'medium',
|
|
199
|
-
timeoutMs: t.timeoutMs || 300000,
|
|
200
|
-
tags: t.tags || [],
|
|
201
|
-
}));
|
|
202
|
-
|
|
203
|
-
return data;
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
function printSummary(
|
|
207
|
-
results: Array<{
|
|
208
|
-
taskId: string;
|
|
209
|
-
winner: ConfigurationType;
|
|
210
|
-
results: Record<string, unknown>;
|
|
211
|
-
scores: Record<string, unknown>;
|
|
212
|
-
}>
|
|
213
|
-
): void {
|
|
214
|
-
console.log('\n' + '='.repeat(60));
|
|
215
|
-
console.log('BENCHMARK SUMMARY');
|
|
216
|
-
console.log('='.repeat(60));
|
|
217
|
-
|
|
218
|
-
const wins: Record<ConfigurationType, number> = {
|
|
219
|
-
single: 0,
|
|
220
|
-
subagent: 0,
|
|
221
|
-
swarm: 0,
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
for (const result of results) {
|
|
225
|
-
wins[result.winner]++;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
console.log('\nWins by configuration:');
|
|
229
|
-
for (const [config, count] of Object.entries(wins)) {
|
|
230
|
-
const bar = '█'.repeat(count) + '░'.repeat(results.length - count);
|
|
231
|
-
console.log(` ${config.padEnd(10)} ${bar} ${count}/${results.length}`);
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
const overallWinner = (Object.entries(wins) as [ConfigurationType, number][])
|
|
235
|
-
.sort((a, b) => b[1] - a[1])[0][0];
|
|
236
|
-
|
|
237
|
-
console.log(`\nOverall winner: ${overallWinner.toUpperCase()}`);
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
program.parse();
|
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Harbor Integration
|
|
3
|
-
*
|
|
4
|
-
* Entry points for Harbor benchmark framework integration.
|
|
5
|
-
* https://github.com/laude-institute/harbor
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type {
|
|
9
|
-
Task,
|
|
10
|
-
TaskComplexity,
|
|
11
|
-
ConfigurationType,
|
|
12
|
-
HarborTaskInput,
|
|
13
|
-
HarborEvaluationOutput,
|
|
14
|
-
BenchmarkConfig,
|
|
15
|
-
} from './types.js';
|
|
16
|
-
import { ComparisonBenchmark } from './benchmark.js';
|
|
17
|
-
|
|
18
|
-
const BENCHMARK_VERSION = '1.0.0';
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* Convert Harbor task input to internal Task format
|
|
22
|
-
*/
|
|
23
|
-
function convertHarborTask(input: HarborTaskInput): Task {
|
|
24
|
-
return {
|
|
25
|
-
id: input.id,
|
|
26
|
-
description: input.description,
|
|
27
|
-
files: input.files || [],
|
|
28
|
-
expectedOutcome: input.success_criteria || 'Task completed successfully',
|
|
29
|
-
complexity: (input.complexity as TaskComplexity) || 'medium',
|
|
30
|
-
timeoutMs: 300000, // 5 minute default
|
|
31
|
-
tags: [],
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Main Harbor evaluation entry point
|
|
37
|
-
*
|
|
38
|
-
* This function is called by Harbor to evaluate a task across all configurations.
|
|
39
|
-
*
|
|
40
|
-
* @example Harbor dataset format:
|
|
41
|
-
* ```yaml
|
|
42
|
-
* tasks:
|
|
43
|
-
* - id: refactor-auth
|
|
44
|
-
* description: "Refactor authentication to use JWT"
|
|
45
|
-
* files:
|
|
46
|
-
* - src/auth/session.ts
|
|
47
|
-
* - src/auth/middleware.ts
|
|
48
|
-
* success_criteria: "All tests pass, JWT tokens used"
|
|
49
|
-
* complexity: medium
|
|
50
|
-
* ```
|
|
51
|
-
*
|
|
52
|
-
* @example Running with Harbor:
|
|
53
|
-
* ```bash
|
|
54
|
-
* harbor run \
|
|
55
|
-
* --dataset tasks.yaml \
|
|
56
|
-
* --agent @agent-relay/benchmark/harbor \
|
|
57
|
-
* --parallel 10
|
|
58
|
-
* ```
|
|
59
|
-
*/
|
|
60
|
-
export async function evaluate(
|
|
61
|
-
input: HarborTaskInput
|
|
62
|
-
): Promise<HarborEvaluationOutput> {
|
|
63
|
-
const startedAt = Date.now();
|
|
64
|
-
const task = convertHarborTask(input);
|
|
65
|
-
|
|
66
|
-
const benchmark = new ComparisonBenchmark({
|
|
67
|
-
configurations: ['single', 'subagent', 'swarm'],
|
|
68
|
-
cli: 'claude',
|
|
69
|
-
quiet: true, // Suppress output in Harbor runs
|
|
70
|
-
cooldownMs: 2000,
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
const comparison = await benchmark.runComparison(task);
|
|
74
|
-
|
|
75
|
-
const completedAt = Date.now();
|
|
76
|
-
|
|
77
|
-
return {
|
|
78
|
-
task_id: task.id,
|
|
79
|
-
configurations: Object.fromEntries(comparison.results) as Record<
|
|
80
|
-
ConfigurationType,
|
|
81
|
-
any
|
|
82
|
-
>,
|
|
83
|
-
winner: comparison.winner,
|
|
84
|
-
scores: Object.fromEntries(comparison.scores) as Record<
|
|
85
|
-
ConfigurationType,
|
|
86
|
-
any
|
|
87
|
-
>,
|
|
88
|
-
metadata: {
|
|
89
|
-
benchmark_version: BENCHMARK_VERSION,
|
|
90
|
-
started_at: startedAt,
|
|
91
|
-
completed_at: completedAt,
|
|
92
|
-
total_duration_ms: completedAt - startedAt,
|
|
93
|
-
},
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Run a single configuration (for targeted Harbor evaluations)
|
|
99
|
-
*
|
|
100
|
-
* @example Running single config with Harbor:
|
|
101
|
-
* ```bash
|
|
102
|
-
* harbor run \
|
|
103
|
-
* --dataset tasks.yaml \
|
|
104
|
-
* --agent "@agent-relay/benchmark/harbor:evaluateSingle" \
|
|
105
|
-
* --env-var CONFIG=swarm
|
|
106
|
-
* ```
|
|
107
|
-
*/
|
|
108
|
-
export async function evaluateSingle(
|
|
109
|
-
input: HarborTaskInput & { config?: ConfigurationType }
|
|
110
|
-
): Promise<Record<string, unknown>> {
|
|
111
|
-
const config = input.config || 'single';
|
|
112
|
-
const task = convertHarborTask(input);
|
|
113
|
-
|
|
114
|
-
const benchmark = new ComparisonBenchmark({
|
|
115
|
-
configurations: [config],
|
|
116
|
-
cli: 'claude',
|
|
117
|
-
quiet: true,
|
|
118
|
-
cooldownMs: 0,
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
const result = await benchmark.runSingle(task, config);
|
|
122
|
-
|
|
123
|
-
return {
|
|
124
|
-
task_id: task.id,
|
|
125
|
-
configuration: config,
|
|
126
|
-
result,
|
|
127
|
-
success: result.success,
|
|
128
|
-
};
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Evaluate with custom configuration
|
|
133
|
-
*/
|
|
134
|
-
export async function evaluateCustom(
|
|
135
|
-
input: HarborTaskInput,
|
|
136
|
-
config: Partial<BenchmarkConfig>
|
|
137
|
-
): Promise<HarborEvaluationOutput> {
|
|
138
|
-
const startedAt = Date.now();
|
|
139
|
-
const task = convertHarborTask(input);
|
|
140
|
-
|
|
141
|
-
const benchmark = new ComparisonBenchmark({
|
|
142
|
-
...config,
|
|
143
|
-
quiet: true,
|
|
144
|
-
});
|
|
145
|
-
|
|
146
|
-
const comparison = await benchmark.runComparison(task);
|
|
147
|
-
const completedAt = Date.now();
|
|
148
|
-
|
|
149
|
-
return {
|
|
150
|
-
task_id: task.id,
|
|
151
|
-
configurations: Object.fromEntries(comparison.results) as Record<
|
|
152
|
-
ConfigurationType,
|
|
153
|
-
any
|
|
154
|
-
>,
|
|
155
|
-
winner: comparison.winner,
|
|
156
|
-
scores: Object.fromEntries(comparison.scores) as Record<
|
|
157
|
-
ConfigurationType,
|
|
158
|
-
any
|
|
159
|
-
>,
|
|
160
|
-
metadata: {
|
|
161
|
-
benchmark_version: BENCHMARK_VERSION,
|
|
162
|
-
started_at: startedAt,
|
|
163
|
-
completed_at: completedAt,
|
|
164
|
-
total_duration_ms: completedAt - startedAt,
|
|
165
|
-
},
|
|
166
|
-
};
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Default export for Harbor
|
|
170
|
-
export default evaluate;
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @agent-relay/benchmark
|
|
3
|
-
*
|
|
4
|
-
* Performance benchmarking for agent swarms, sub-agents, and single agents.
|
|
5
|
-
*
|
|
6
|
-
* ## Quick Start
|
|
7
|
-
*
|
|
8
|
-
* ```typescript
|
|
9
|
-
* import { ComparisonBenchmark, type Task } from '@agent-relay/benchmark';
|
|
10
|
-
*
|
|
11
|
-
* const task: Task = {
|
|
12
|
-
* id: 'refactor-auth',
|
|
13
|
-
* description: 'Refactor authentication to use JWT',
|
|
14
|
-
* files: ['src/auth/session.ts', 'src/auth/middleware.ts'],
|
|
15
|
-
* expectedOutcome: 'All tests pass, JWT tokens used',
|
|
16
|
-
* complexity: 'medium',
|
|
17
|
-
* };
|
|
18
|
-
*
|
|
19
|
-
* const benchmark = new ComparisonBenchmark();
|
|
20
|
-
* const comparison = await benchmark.runComparison(task);
|
|
21
|
-
*
|
|
22
|
-
* console.log(`Winner: ${comparison.winner}`);
|
|
23
|
-
* benchmark.printComparison(comparison);
|
|
24
|
-
* ```
|
|
25
|
-
*
|
|
26
|
-
* ## With Harbor
|
|
27
|
-
*
|
|
28
|
-
* ```bash
|
|
29
|
-
* harbor run \
|
|
30
|
-
* --dataset tasks.yaml \
|
|
31
|
-
* --agent @agent-relay/benchmark/harbor \
|
|
32
|
-
* --parallel 10
|
|
33
|
-
* ```
|
|
34
|
-
*
|
|
35
|
-
* ## CLI Usage
|
|
36
|
-
*
|
|
37
|
-
* ```bash
|
|
38
|
-
* relay-benchmark run --dataset tasks.yaml --config all
|
|
39
|
-
* relay-benchmark run --dataset tasks.yaml --config swarm
|
|
40
|
-
* relay-benchmark list tasks.yaml
|
|
41
|
-
* ```
|
|
42
|
-
*/
|
|
43
|
-
|
|
44
|
-
// Types
|
|
45
|
-
export type {
|
|
46
|
-
ConfigurationType,
|
|
47
|
-
TaskComplexity,
|
|
48
|
-
Task,
|
|
49
|
-
RunResult,
|
|
50
|
-
ComparisonResult,
|
|
51
|
-
ScoreBreakdown,
|
|
52
|
-
BenchmarkConfig,
|
|
53
|
-
RunMetrics,
|
|
54
|
-
TaskDataset,
|
|
55
|
-
HarborTaskInput,
|
|
56
|
-
HarborEvaluationOutput,
|
|
57
|
-
} from './types.js';
|
|
58
|
-
|
|
59
|
-
export { DEFAULT_BENCHMARK_CONFIG } from './types.js';
|
|
60
|
-
|
|
61
|
-
// Main benchmark class
|
|
62
|
-
export { ComparisonBenchmark, runComparison } from './benchmark.js';
|
|
63
|
-
|
|
64
|
-
// Runners
|
|
65
|
-
export {
|
|
66
|
-
ConfigurationRunner,
|
|
67
|
-
SingleAgentRunner,
|
|
68
|
-
SubAgentRunner,
|
|
69
|
-
SwarmRunner,
|
|
70
|
-
} from './runners/index.js';
|
|
71
|
-
|
|
72
|
-
// Harbor integration
|
|
73
|
-
export { evaluate, evaluateSingle, evaluateCustom } from './harbor.js';
|
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Base Configuration Runner
|
|
3
|
-
*
|
|
4
|
-
* Abstract base class for benchmark configuration runners.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import {
|
|
8
|
-
createRelay,
|
|
9
|
-
RelayClient,
|
|
10
|
-
type MetricsResponsePayload,
|
|
11
|
-
type Relay,
|
|
12
|
-
} from '@agent-relay/sdk';
|
|
13
|
-
import type {
|
|
14
|
-
ConfigurationType,
|
|
15
|
-
Task,
|
|
16
|
-
RunResult,
|
|
17
|
-
RunMetrics,
|
|
18
|
-
BenchmarkConfig,
|
|
19
|
-
} from '../types.js';
|
|
20
|
-
import { DEFAULT_BENCHMARK_CONFIG } from '../types.js';
|
|
21
|
-
|
|
22
|
-
type AgentMetrics = MetricsResponsePayload['agents'][number] & {
|
|
23
|
-
tokens?: number;
|
|
24
|
-
memoryMb?: number;
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* Abstract base class for configuration runners
|
|
29
|
-
*/
|
|
30
|
-
export abstract class ConfigurationRunner {
|
|
31
|
-
protected relay!: Relay;
|
|
32
|
-
protected orchestrator!: RelayClient;
|
|
33
|
-
protected config: BenchmarkConfig;
|
|
34
|
-
protected metrics: RunMetrics = {
|
|
35
|
-
messages: 0,
|
|
36
|
-
latencies: [],
|
|
37
|
-
startTime: 0,
|
|
38
|
-
spawnedAgents: [],
|
|
39
|
-
errors: [],
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
constructor(config: Partial<BenchmarkConfig> = {}) {
|
|
43
|
-
this.config = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Get the configuration type this runner handles
|
|
48
|
-
*/
|
|
49
|
-
abstract get configurationType(): ConfigurationType;
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Set up the relay and orchestrator client
|
|
53
|
-
*/
|
|
54
|
-
async setup(): Promise<void> {
|
|
55
|
-
this.relay = await createRelay({
|
|
56
|
-
socketPath: this.config.socketPath,
|
|
57
|
-
quiet: this.config.quiet,
|
|
58
|
-
spawnManager: true,
|
|
59
|
-
});
|
|
60
|
-
this.orchestrator = await this.relay.client('Orchestrator', {
|
|
61
|
-
quiet: this.config.quiet,
|
|
62
|
-
});
|
|
63
|
-
this.resetMetrics();
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* Run a task and return the result
|
|
68
|
-
*/
|
|
69
|
-
abstract run(task: Task): Promise<RunResult>;
|
|
70
|
-
|
|
71
|
-
/**
|
|
72
|
-
* Clean up resources
|
|
73
|
-
*/
|
|
74
|
-
async teardown(): Promise<void> {
|
|
75
|
-
// Release any remaining agents
|
|
76
|
-
for (const agent of this.metrics.spawnedAgents) {
|
|
77
|
-
try {
|
|
78
|
-
await this.orchestrator.release(agent);
|
|
79
|
-
} catch {
|
|
80
|
-
// Ignore release errors during cleanup
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
await this.relay.stop();
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Reset metrics for a new run
|
|
89
|
-
*/
|
|
90
|
-
protected resetMetrics(): void {
|
|
91
|
-
this.metrics = {
|
|
92
|
-
messages: 0,
|
|
93
|
-
latencies: [],
|
|
94
|
-
startTime: Date.now(),
|
|
95
|
-
spawnedAgents: [],
|
|
96
|
-
errors: [],
|
|
97
|
-
};
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Calculate percentile from an array of values
|
|
102
|
-
*/
|
|
103
|
-
protected percentile(arr: number[], p: number): number {
|
|
104
|
-
if (arr.length === 0) return 0;
|
|
105
|
-
const sorted = [...arr].sort((a, b) => a - b);
|
|
106
|
-
const idx = Math.ceil((p / 100) * sorted.length) - 1;
|
|
107
|
-
return sorted[Math.max(0, idx)];
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Extract total tokens from metrics response
|
|
112
|
-
*/
|
|
113
|
-
protected extractTokens(metrics: MetricsResponsePayload): number {
|
|
114
|
-
const agents = metrics.agents as AgentMetrics[] | undefined;
|
|
115
|
-
return (
|
|
116
|
-
agents?.reduce((sum, agent) => sum + (agent.tokens || 0), 0) || 0
|
|
117
|
-
);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Extract peak memory from metrics response
|
|
122
|
-
*/
|
|
123
|
-
protected extractMemory(metrics: MetricsResponsePayload): number {
|
|
124
|
-
const agents = metrics.agents as AgentMetrics[] | undefined;
|
|
125
|
-
const memoryValues = agents?.map((agent) => {
|
|
126
|
-
if (agent.memoryMb != null) return agent.memoryMb;
|
|
127
|
-
if (agent.rssBytes != null) return agent.rssBytes / 1024 / 1024;
|
|
128
|
-
return 0;
|
|
129
|
-
});
|
|
130
|
-
return Math.max(...(memoryValues || [0]));
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Build a failed result when setup fails
|
|
135
|
-
*/
|
|
136
|
-
protected buildFailedResult(
|
|
137
|
-
task: Task,
|
|
138
|
-
startTime: number,
|
|
139
|
-
errors: string[]
|
|
140
|
-
): RunResult {
|
|
141
|
-
const now = Date.now();
|
|
142
|
-
return {
|
|
143
|
-
taskId: task.id,
|
|
144
|
-
configuration: this.configurationType,
|
|
145
|
-
totalTimeMs: now - startTime,
|
|
146
|
-
timeToFirstActionMs: 0,
|
|
147
|
-
messageCount: 0,
|
|
148
|
-
avgLatencyMs: 0,
|
|
149
|
-
latencyP50Ms: 0,
|
|
150
|
-
latencyP99Ms: 0,
|
|
151
|
-
coordinationRounds: 0,
|
|
152
|
-
agentCount: 0,
|
|
153
|
-
totalTokensUsed: 0,
|
|
154
|
-
peakMemoryMb: 0,
|
|
155
|
-
success: false,
|
|
156
|
-
completionRate: 0,
|
|
157
|
-
errors,
|
|
158
|
-
startedAt: startTime,
|
|
159
|
-
completedAt: now,
|
|
160
|
-
};
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Wait for an agent to complete their task
|
|
165
|
-
*/
|
|
166
|
-
protected waitForCompletion(
|
|
167
|
-
agentName: string,
|
|
168
|
-
task: Task,
|
|
169
|
-
donePrefix = 'DONE:'
|
|
170
|
-
): Promise<boolean> {
|
|
171
|
-
const timeoutMs = task.timeoutMs || 300000;
|
|
172
|
-
|
|
173
|
-
return new Promise((resolve) => {
|
|
174
|
-
const timeout = setTimeout(() => {
|
|
175
|
-
this.metrics.errors.push(`Timeout waiting for ${agentName}`);
|
|
176
|
-
resolve(false);
|
|
177
|
-
}, timeoutMs);
|
|
178
|
-
|
|
179
|
-
const originalHandler = this.orchestrator.onMessage;
|
|
180
|
-
this.orchestrator.onMessage = (from, payload, id, meta, originalTo) => {
|
|
181
|
-
// Call original handler if exists
|
|
182
|
-
if (originalHandler) {
|
|
183
|
-
originalHandler(from, payload, id, meta, originalTo);
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
if (from === agentName && payload.body.startsWith(donePrefix)) {
|
|
187
|
-
clearTimeout(timeout);
|
|
188
|
-
resolve(true);
|
|
189
|
-
}
|
|
190
|
-
};
|
|
191
|
-
});
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Log a message if not in quiet mode
|
|
196
|
-
*/
|
|
197
|
-
protected log(message: string): void {
|
|
198
|
-
if (!this.config.quiet) {
|
|
199
|
-
console.log(`[${this.configurationType}] ${message}`);
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// Re-export the DEFAULT_BENCHMARK_CONFIG
|
|
205
|
-
export { DEFAULT_BENCHMARK_CONFIG } from '../types.js';
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Benchmark Runners
|
|
3
|
-
*
|
|
4
|
-
* Export all configuration runners.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
export { ConfigurationRunner, DEFAULT_BENCHMARK_CONFIG } from './base.js';
|
|
8
|
-
export { SingleAgentRunner } from './single.js';
|
|
9
|
-
export { SubAgentRunner } from './subagent.js';
|
|
10
|
-
export { SwarmRunner } from './swarm.js';
|