agent-relay 2.3.2 → 2.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs +1 -1
- package/dist/src/cli/index.js +124 -7
- package/dist/src/cli/index.js.map +1 -1
- package/package.json +20 -26
- package/packages/acp-bridge/package.json +2 -2
- package/packages/bridge/package.json +7 -7
- package/packages/config/dist/cloud-config.d.ts +1 -1
- package/packages/config/dist/cloud-config.d.ts.map +1 -1
- package/packages/config/dist/cloud-config.js.map +1 -1
- package/packages/config/dist/schemas.d.ts +5 -5
- package/packages/config/dist/schemas.js +1 -1
- package/packages/config/dist/schemas.js.map +1 -1
- package/packages/config/package.json +2 -2
- package/packages/config/src/cloud-config.ts +2 -2
- package/packages/config/src/schemas.test.ts +48 -0
- package/packages/config/src/schemas.ts +1 -1
- package/packages/continuity/package.json +2 -2
- package/packages/daemon/package.json +12 -12
- package/packages/hooks/package.json +4 -4
- package/packages/mcp/package.json +5 -5
- package/packages/memory/package.json +2 -2
- package/packages/policy/package.json +2 -2
- package/packages/protocol/package.json +1 -1
- package/packages/resiliency/package.json +1 -1
- package/packages/sdk/dist/index.d.ts +1 -29
- package/packages/sdk/dist/index.d.ts.map +1 -1
- package/packages/sdk/dist/index.js +1 -38
- package/packages/sdk/dist/index.js.map +1 -1
- package/packages/sdk/package.json +4 -25
- package/packages/sdk/src/index.ts +1 -69
- package/packages/sdk-py/README.md +56 -0
- package/packages/sdk-py/pyproject.toml +23 -0
- package/packages/sdk-py/src/agent_relay/__init__.py +27 -0
- package/packages/sdk-py/src/agent_relay/builder.py +367 -0
- package/packages/sdk-py/src/agent_relay/types.py +92 -0
- package/packages/sdk-py/tests/__init__.py +0 -0
- package/packages/sdk-py/tests/test_builder.py +101 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.d.ts +2 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.d.ts.map +1 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.js +257 -0
- package/packages/sdk-ts/dist/__tests__/facade.test.js.map +1 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.d.ts +2 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.d.ts.map +1 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.js +124 -0
- package/packages/sdk-ts/dist/__tests__/unit.test.js.map +1 -0
- package/packages/sdk-ts/dist/client.d.ts +2 -0
- package/packages/sdk-ts/dist/client.d.ts.map +1 -1
- package/packages/sdk-ts/dist/client.js +2 -0
- package/packages/sdk-ts/dist/client.js.map +1 -1
- package/packages/sdk-ts/dist/index.d.ts +1 -0
- package/packages/sdk-ts/dist/index.d.ts.map +1 -1
- package/packages/sdk-ts/dist/index.js +1 -0
- package/packages/sdk-ts/dist/index.js.map +1 -1
- package/packages/sdk-ts/dist/protocol.d.ts +1 -0
- package/packages/sdk-ts/dist/protocol.d.ts.map +1 -1
- package/packages/sdk-ts/dist/relay.d.ts +44 -0
- package/packages/sdk-ts/dist/relay.d.ts.map +1 -1
- package/packages/sdk-ts/dist/relay.js +89 -11
- package/packages/sdk-ts/dist/relay.js.map +1 -1
- package/packages/sdk-ts/dist/relaycast.js +2 -2
- package/packages/sdk-ts/dist/relaycast.js.map +1 -1
- package/packages/sdk-ts/dist/workflows/barrier.d.ts +72 -0
- package/packages/sdk-ts/dist/workflows/barrier.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/barrier.js +162 -0
- package/packages/sdk-ts/dist/workflows/barrier.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts +101 -0
- package/packages/sdk-ts/dist/workflows/builder.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/builder.js +179 -0
- package/packages/sdk-ts/dist/workflows/builder.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/cli.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/cli.js +82 -0
- package/packages/sdk-ts/dist/workflows/cli.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts +68 -0
- package/packages/sdk-ts/dist/workflows/coordinator.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js +353 -0
- package/packages/sdk-ts/dist/workflows/coordinator.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts +10 -0
- package/packages/sdk-ts/dist/workflows/index.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/index.js +10 -0
- package/packages/sdk-ts/dist/workflows/index.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts +17 -0
- package/packages/sdk-ts/dist/workflows/memory-db.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js +33 -0
- package/packages/sdk-ts/dist/workflows/memory-db.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts +31 -0
- package/packages/sdk-ts/dist/workflows/run.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/run.js +24 -0
- package/packages/sdk-ts/dist/workflows/run.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts +119 -0
- package/packages/sdk-ts/dist/workflows/runner.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/runner.js +650 -0
- package/packages/sdk-ts/dist/workflows/runner.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts +77 -0
- package/packages/sdk-ts/dist/workflows/state.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/state.js +140 -0
- package/packages/sdk-ts/dist/workflows/state.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts +47 -0
- package/packages/sdk-ts/dist/workflows/templates.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/templates.js +395 -0
- package/packages/sdk-ts/dist/workflows/templates.js.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts +126 -0
- package/packages/sdk-ts/dist/workflows/types.d.ts.map +1 -0
- package/packages/sdk-ts/dist/workflows/types.js +8 -0
- package/packages/sdk-ts/dist/workflows/types.js.map +1 -0
- package/packages/sdk-ts/package.json +9 -3
- package/packages/sdk-ts/src/__tests__/error-scenarios.test.ts +682 -0
- package/packages/sdk-ts/src/__tests__/facade.test.ts +296 -0
- package/packages/sdk-ts/src/__tests__/swarm-coordinator.test.ts +416 -0
- package/packages/sdk-ts/src/__tests__/unit.test.ts +152 -0
- package/packages/sdk-ts/src/__tests__/workflow-runner.test.ts +333 -0
- package/packages/sdk-ts/src/client.ts +4 -0
- package/packages/sdk-ts/src/index.ts +1 -0
- package/packages/sdk-ts/src/protocol.ts +1 -1
- package/packages/sdk-ts/src/relay.ts +112 -11
- package/packages/sdk-ts/src/relaycast.ts +2 -2
- package/packages/sdk-ts/src/workflows/README.md +450 -0
- package/packages/sdk-ts/src/workflows/barrier.ts +254 -0
- package/packages/sdk-ts/src/workflows/builder.ts +241 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/bug-fix.yaml +75 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/code-review.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/documentation.yaml +70 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/feature-dev.yaml +76 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/refactor.yaml +82 -0
- package/packages/sdk-ts/src/workflows/builtin-templates/security-audit.yaml +84 -0
- package/packages/sdk-ts/src/workflows/cli.ts +93 -0
- package/packages/sdk-ts/src/workflows/coordinator.ts +520 -0
- package/packages/sdk-ts/src/workflows/index.ts +9 -0
- package/packages/sdk-ts/src/workflows/memory-db.ts +39 -0
- package/packages/sdk-ts/src/workflows/run.ts +47 -0
- package/packages/sdk-ts/src/workflows/runner.ts +873 -0
- package/packages/sdk-ts/src/workflows/schema.json +321 -0
- package/packages/sdk-ts/src/workflows/state.ts +279 -0
- package/packages/sdk-ts/src/workflows/templates.ts +544 -0
- package/packages/sdk-ts/src/workflows/types.ts +178 -0
- package/packages/sdk-ts/tsconfig.json +6 -1
- package/packages/spawner/package.json +1 -1
- package/packages/state/package.json +1 -1
- package/packages/storage/package.json +2 -2
- package/packages/telemetry/package.json +1 -1
- package/packages/trajectory/package.json +2 -2
- package/packages/user-directory/package.json +2 -2
- package/packages/utils/package.json +3 -3
- package/packages/wrapper/package.json +5 -6
- package/scripts/postinstall.js +106 -2
- package/packages/api-types/.trajectories/active/traj_xbsvuzogscey.json +0 -15
- package/packages/api-types/.trajectories/index.json +0 -12
- package/packages/api-types/dist/index.d.ts +0 -21
- package/packages/api-types/dist/index.d.ts.map +0 -1
- package/packages/api-types/dist/index.js +0 -22
- package/packages/api-types/dist/index.js.map +0 -1
- package/packages/api-types/dist/schemas/agent.d.ts +0 -259
- package/packages/api-types/dist/schemas/agent.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/agent.js +0 -102
- package/packages/api-types/dist/schemas/agent.js.map +0 -1
- package/packages/api-types/dist/schemas/api.d.ts +0 -290
- package/packages/api-types/dist/schemas/api.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/api.js +0 -162
- package/packages/api-types/dist/schemas/api.js.map +0 -1
- package/packages/api-types/dist/schemas/decision.d.ts +0 -230
- package/packages/api-types/dist/schemas/decision.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/decision.js +0 -104
- package/packages/api-types/dist/schemas/decision.js.map +0 -1
- package/packages/api-types/dist/schemas/fleet.d.ts +0 -615
- package/packages/api-types/dist/schemas/fleet.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/fleet.js +0 -71
- package/packages/api-types/dist/schemas/fleet.js.map +0 -1
- package/packages/api-types/dist/schemas/history.d.ts +0 -180
- package/packages/api-types/dist/schemas/history.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/history.js +0 -72
- package/packages/api-types/dist/schemas/history.js.map +0 -1
- package/packages/api-types/dist/schemas/index.d.ts +0 -14
- package/packages/api-types/dist/schemas/index.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/index.js +0 -22
- package/packages/api-types/dist/schemas/index.js.map +0 -1
- package/packages/api-types/dist/schemas/message.d.ts +0 -456
- package/packages/api-types/dist/schemas/message.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/message.js +0 -88
- package/packages/api-types/dist/schemas/message.js.map +0 -1
- package/packages/api-types/dist/schemas/session.d.ts +0 -60
- package/packages/api-types/dist/schemas/session.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/session.js +0 -36
- package/packages/api-types/dist/schemas/session.js.map +0 -1
- package/packages/api-types/dist/schemas/task.d.ts +0 -111
- package/packages/api-types/dist/schemas/task.d.ts.map +0 -1
- package/packages/api-types/dist/schemas/task.js +0 -64
- package/packages/api-types/dist/schemas/task.js.map +0 -1
- package/packages/api-types/package.json +0 -61
- package/packages/api-types/scripts/generate-openapi.ts +0 -106
- package/packages/api-types/src/index.ts +0 -22
- package/packages/api-types/src/schemas/agent.test.ts +0 -164
- package/packages/api-types/src/schemas/agent.ts +0 -110
- package/packages/api-types/src/schemas/api.test.ts +0 -372
- package/packages/api-types/src/schemas/api.ts +0 -194
- package/packages/api-types/src/schemas/decision.test.ts +0 -324
- package/packages/api-types/src/schemas/decision.ts +0 -136
- package/packages/api-types/src/schemas/fleet.test.ts +0 -212
- package/packages/api-types/src/schemas/fleet.ts +0 -83
- package/packages/api-types/src/schemas/history.test.ts +0 -242
- package/packages/api-types/src/schemas/history.ts +0 -84
- package/packages/api-types/src/schemas/index.ts +0 -148
- package/packages/api-types/src/schemas/message.test.ts +0 -192
- package/packages/api-types/src/schemas/message.ts +0 -98
- package/packages/api-types/src/schemas/session.test.ts +0 -104
- package/packages/api-types/src/schemas/session.ts +0 -40
- package/packages/api-types/src/schemas/task.test.ts +0 -192
- package/packages/api-types/src/schemas/task.ts +0 -78
- package/packages/api-types/tsconfig.json +0 -19
- package/packages/api-types/vitest.config.ts +0 -9
- package/packages/benchmark/README.md +0 -200
- package/packages/benchmark/datasets/coding-tasks.yaml +0 -127
- package/packages/benchmark/datasets/coordination-tasks.yaml +0 -122
- package/packages/benchmark/datasets/quick-test.yaml +0 -20
- package/packages/benchmark/dist/benchmark.d.ts +0 -47
- package/packages/benchmark/dist/benchmark.d.ts.map +0 -1
- package/packages/benchmark/dist/benchmark.js +0 -224
- package/packages/benchmark/dist/benchmark.js.map +0 -1
- package/packages/benchmark/dist/cli.d.ts +0 -8
- package/packages/benchmark/dist/cli.d.ts.map +0 -1
- package/packages/benchmark/dist/cli.js +0 -185
- package/packages/benchmark/dist/cli.js.map +0 -1
- package/packages/benchmark/dist/harbor.d.ts +0 -53
- package/packages/benchmark/dist/harbor.d.ts.map +0 -1
- package/packages/benchmark/dist/harbor.js +0 -127
- package/packages/benchmark/dist/harbor.js.map +0 -1
- package/packages/benchmark/dist/index.d.ts +0 -48
- package/packages/benchmark/dist/index.d.ts.map +0 -1
- package/packages/benchmark/dist/index.js +0 -50
- package/packages/benchmark/dist/index.js.map +0 -1
- package/packages/benchmark/dist/runners/base.d.ts +0 -63
- package/packages/benchmark/dist/runners/base.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/base.js +0 -156
- package/packages/benchmark/dist/runners/base.js.map +0 -1
- package/packages/benchmark/dist/runners/index.d.ts +0 -10
- package/packages/benchmark/dist/runners/index.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/index.js +0 -10
- package/packages/benchmark/dist/runners/index.js.map +0 -1
- package/packages/benchmark/dist/runners/single.d.ts +0 -19
- package/packages/benchmark/dist/runners/single.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/single.js +0 -111
- package/packages/benchmark/dist/runners/single.js.map +0 -1
- package/packages/benchmark/dist/runners/subagent.d.ts +0 -32
- package/packages/benchmark/dist/runners/subagent.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/subagent.js +0 -212
- package/packages/benchmark/dist/runners/subagent.js.map +0 -1
- package/packages/benchmark/dist/runners/swarm.d.ts +0 -36
- package/packages/benchmark/dist/runners/swarm.d.ts.map +0 -1
- package/packages/benchmark/dist/runners/swarm.js +0 -273
- package/packages/benchmark/dist/runners/swarm.js.map +0 -1
- package/packages/benchmark/dist/types.d.ts +0 -178
- package/packages/benchmark/dist/types.d.ts.map +0 -1
- package/packages/benchmark/dist/types.js +0 -16
- package/packages/benchmark/dist/types.js.map +0 -1
- package/packages/benchmark/package.json +0 -80
- package/packages/benchmark/src/benchmark.ts +0 -298
- package/packages/benchmark/src/cli.ts +0 -240
- package/packages/benchmark/src/harbor.ts +0 -170
- package/packages/benchmark/src/index.ts +0 -73
- package/packages/benchmark/src/runners/base.ts +0 -205
- package/packages/benchmark/src/runners/index.ts +0 -10
- package/packages/benchmark/src/runners/single.ts +0 -121
- package/packages/benchmark/src/runners/subagent.ts +0 -240
- package/packages/benchmark/src/runners/swarm.ts +0 -326
- package/packages/benchmark/src/types.ts +0 -205
- package/packages/benchmark/tsconfig.json +0 -20
- package/packages/cli-tester/README.md +0 -277
- package/packages/cli-tester/dist/index.d.ts +0 -21
- package/packages/cli-tester/dist/index.d.ts.map +0 -1
- package/packages/cli-tester/dist/index.js +0 -21
- package/packages/cli-tester/dist/index.js.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.d.ts +0 -56
- package/packages/cli-tester/dist/utils/credential-check.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/credential-check.js +0 -230
- package/packages/cli-tester/dist/utils/credential-check.js.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.d.ts +0 -76
- package/packages/cli-tester/dist/utils/socket-client.d.ts.map +0 -1
- package/packages/cli-tester/dist/utils/socket-client.js +0 -153
- package/packages/cli-tester/dist/utils/socket-client.js.map +0 -1
- package/packages/cli-tester/docker/Dockerfile +0 -61
- package/packages/cli-tester/docker/docker-compose.yml +0 -71
- package/packages/cli-tester/docker/entrypoint.sh +0 -58
- package/packages/cli-tester/package.json +0 -32
- package/packages/cli-tester/scripts/clear-auth.sh +0 -101
- package/packages/cli-tester/scripts/inject-message.sh +0 -42
- package/packages/cli-tester/scripts/start.sh +0 -71
- package/packages/cli-tester/scripts/test-cli.sh +0 -56
- package/packages/cli-tester/scripts/test-full-spawn.sh +0 -238
- package/packages/cli-tester/scripts/test-registration.sh +0 -182
- package/packages/cli-tester/scripts/test-setup-flow.sh +0 -202
- package/packages/cli-tester/scripts/test-spawn.sh +0 -140
- package/packages/cli-tester/scripts/test-with-daemon.sh +0 -247
- package/packages/cli-tester/scripts/verify-auth.sh +0 -112
- package/packages/cli-tester/src/index.ts +0 -40
- package/packages/cli-tester/src/utils/credential-check.ts +0 -284
- package/packages/cli-tester/src/utils/socket-client.ts +0 -211
- package/packages/cli-tester/tests/credential-check.test.ts +0 -56
- package/packages/cli-tester/tsconfig.json +0 -11
- package/packages/sdk/dist/browser-client.d.ts +0 -212
- package/packages/sdk/dist/browser-client.d.ts.map +0 -1
- package/packages/sdk/dist/browser-client.js +0 -750
- package/packages/sdk/dist/browser-client.js.map +0 -1
- package/packages/sdk/dist/browser-framing.d.ts +0 -46
- package/packages/sdk/dist/browser-framing.d.ts.map +0 -1
- package/packages/sdk/dist/browser-framing.js +0 -122
- package/packages/sdk/dist/browser-framing.js.map +0 -1
- package/packages/sdk/dist/standalone.d.ts +0 -89
- package/packages/sdk/dist/standalone.d.ts.map +0 -1
- package/packages/sdk/dist/standalone.js +0 -131
- package/packages/sdk/dist/standalone.js.map +0 -1
- package/packages/sdk/dist/transports/index.d.ts +0 -92
- package/packages/sdk/dist/transports/index.d.ts.map +0 -1
- package/packages/sdk/dist/transports/index.js +0 -129
- package/packages/sdk/dist/transports/index.js.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.d.ts +0 -30
- package/packages/sdk/dist/transports/socket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/socket-transport.js +0 -94
- package/packages/sdk/dist/transports/socket-transport.js.map +0 -1
- package/packages/sdk/dist/transports/types.d.ts +0 -69
- package/packages/sdk/dist/transports/types.d.ts.map +0 -1
- package/packages/sdk/dist/transports/types.js +0 -10
- package/packages/sdk/dist/transports/types.js.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.d.ts +0 -55
- package/packages/sdk/dist/transports/websocket-transport.d.ts.map +0 -1
- package/packages/sdk/dist/transports/websocket-transport.js +0 -180
- package/packages/sdk/dist/transports/websocket-transport.js.map +0 -1
- package/packages/sdk/src/browser-client.ts +0 -985
- package/packages/sdk/src/browser-framing.test.ts +0 -115
- package/packages/sdk/src/browser-framing.ts +0 -150
- package/packages/sdk/src/standalone.ts +0 -183
- package/packages/sdk/src/transports/index.ts +0 -197
- package/packages/sdk/src/transports/socket-transport.ts +0 -115
- package/packages/sdk/src/transports/types.ts +0 -77
- package/packages/sdk/src/transports/websocket-transport.ts +0 -245
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Benchmark Types
|
|
3
|
-
*
|
|
4
|
-
* Type definitions for the agent swarm performance benchmark system.
|
|
5
|
-
*/
|
|
6
|
-
/**
|
|
7
|
-
* Configuration type for benchmark runs
|
|
8
|
-
*/
|
|
9
|
-
export type ConfigurationType = 'single' | 'subagent' | 'swarm';
|
|
10
|
-
/**
|
|
11
|
-
* Task complexity level
|
|
12
|
-
*/
|
|
13
|
-
export type TaskComplexity = 'low' | 'medium' | 'high';
|
|
14
|
-
/**
|
|
15
|
-
* A benchmark task definition
|
|
16
|
-
*/
|
|
17
|
-
export interface Task {
|
|
18
|
-
/** Unique task identifier */
|
|
19
|
-
id: string;
|
|
20
|
-
/** Human-readable task description */
|
|
21
|
-
description: string;
|
|
22
|
-
/** Files the task operates on */
|
|
23
|
-
files: string[];
|
|
24
|
-
/** Success criteria for the task */
|
|
25
|
-
expectedOutcome: string;
|
|
26
|
-
/** Optional Harbor-style success criteria key for compatibility */
|
|
27
|
-
success_criteria?: string;
|
|
28
|
-
/** Task complexity level */
|
|
29
|
-
complexity: TaskComplexity;
|
|
30
|
-
/** Optional timeout in milliseconds (default: 300000 = 5 min) */
|
|
31
|
-
timeoutMs?: number;
|
|
32
|
-
/** Optional tags for categorization */
|
|
33
|
-
tags?: string[];
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Result of a single benchmark run
|
|
37
|
-
*/
|
|
38
|
-
export interface RunResult {
|
|
39
|
-
/** Task identifier */
|
|
40
|
-
taskId: string;
|
|
41
|
-
/** Configuration used for this run */
|
|
42
|
-
configuration: ConfigurationType;
|
|
43
|
-
/** Total time from start to completion in milliseconds */
|
|
44
|
-
totalTimeMs: number;
|
|
45
|
-
/** Time to first agent action in milliseconds */
|
|
46
|
-
timeToFirstActionMs: number;
|
|
47
|
-
/** Total number of inter-agent messages */
|
|
48
|
-
messageCount: number;
|
|
49
|
-
/** Average message latency in milliseconds */
|
|
50
|
-
avgLatencyMs: number;
|
|
51
|
-
/** P50 latency in milliseconds */
|
|
52
|
-
latencyP50Ms: number;
|
|
53
|
-
/** P99 latency in milliseconds */
|
|
54
|
-
latencyP99Ms: number;
|
|
55
|
-
/** Number of coordination rounds */
|
|
56
|
-
coordinationRounds: number;
|
|
57
|
-
/** Number of agents used */
|
|
58
|
-
agentCount: number;
|
|
59
|
-
/** Total tokens consumed (if available) */
|
|
60
|
-
totalTokensUsed: number;
|
|
61
|
-
/** Peak memory usage in MB */
|
|
62
|
-
peakMemoryMb: number;
|
|
63
|
-
/** Whether the task completed successfully */
|
|
64
|
-
success: boolean;
|
|
65
|
-
/** Completion rate (0-1) for partial success */
|
|
66
|
-
completionRate: number;
|
|
67
|
-
/** Error messages if any */
|
|
68
|
-
errors: string[];
|
|
69
|
-
/** Timestamp when the run started */
|
|
70
|
-
startedAt: number;
|
|
71
|
-
/** Timestamp when the run completed */
|
|
72
|
-
completedAt: number;
|
|
73
|
-
}
|
|
74
|
-
/**
|
|
75
|
-
* Comparison result across all configurations
|
|
76
|
-
*/
|
|
77
|
-
export interface ComparisonResult {
|
|
78
|
-
/** Task identifier */
|
|
79
|
-
taskId: string;
|
|
80
|
-
/** Results for each configuration */
|
|
81
|
-
results: Map<ConfigurationType, RunResult>;
|
|
82
|
-
/** The winning configuration based on scoring */
|
|
83
|
-
winner: ConfigurationType;
|
|
84
|
-
/** Score breakdown for each configuration */
|
|
85
|
-
scores: Map<ConfigurationType, ScoreBreakdown>;
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Score breakdown for a configuration
|
|
89
|
-
*/
|
|
90
|
-
export interface ScoreBreakdown {
|
|
91
|
-
/** Total score (0-100) */
|
|
92
|
-
total: number;
|
|
93
|
-
/** Success component (0-50) */
|
|
94
|
-
successScore: number;
|
|
95
|
-
/** Time efficiency component (0-30) */
|
|
96
|
-
timeScore: number;
|
|
97
|
-
/** Resource efficiency component (0-20) */
|
|
98
|
-
efficiencyScore: number;
|
|
99
|
-
}
|
|
100
|
-
/**
|
|
101
|
-
* Benchmark configuration options
|
|
102
|
-
*/
|
|
103
|
-
export interface BenchmarkConfig {
|
|
104
|
-
/** Which configurations to run */
|
|
105
|
-
configurations: ConfigurationType[];
|
|
106
|
-
/** CLI to use for agents (default: 'claude') */
|
|
107
|
-
cli: string;
|
|
108
|
-
/** Working directory for tasks */
|
|
109
|
-
cwd?: string;
|
|
110
|
-
/** Suppress console output */
|
|
111
|
-
quiet: boolean;
|
|
112
|
-
/** Cool-down time between runs in milliseconds */
|
|
113
|
-
cooldownMs: number;
|
|
114
|
-
/** Maximum concurrent agents for swarm */
|
|
115
|
-
maxSwarmSize: number;
|
|
116
|
-
/** Custom socket path for relay */
|
|
117
|
-
socketPath?: string;
|
|
118
|
-
}
|
|
119
|
-
/**
|
|
120
|
-
* Default benchmark configuration
|
|
121
|
-
*/
|
|
122
|
-
export declare const DEFAULT_BENCHMARK_CONFIG: BenchmarkConfig;
|
|
123
|
-
/**
|
|
124
|
-
* Metrics collected during a run
|
|
125
|
-
*/
|
|
126
|
-
export interface RunMetrics {
|
|
127
|
-
/** Number of messages sent */
|
|
128
|
-
messages: number;
|
|
129
|
-
/** Message latencies in milliseconds */
|
|
130
|
-
latencies: number[];
|
|
131
|
-
/** Run start timestamp */
|
|
132
|
-
startTime: number;
|
|
133
|
-
/** Spawned agent names */
|
|
134
|
-
spawnedAgents: string[];
|
|
135
|
-
/** Error events */
|
|
136
|
-
errors: string[];
|
|
137
|
-
}
|
|
138
|
-
/**
|
|
139
|
-
* Task dataset definition
|
|
140
|
-
*/
|
|
141
|
-
export interface TaskDataset {
|
|
142
|
-
/** Dataset name */
|
|
143
|
-
name: string;
|
|
144
|
-
/** Dataset description */
|
|
145
|
-
description?: string;
|
|
146
|
-
/** Version identifier */
|
|
147
|
-
version?: string;
|
|
148
|
-
/** Tasks in the dataset */
|
|
149
|
-
tasks: Task[];
|
|
150
|
-
}
|
|
151
|
-
/**
|
|
152
|
-
* Harbor-compatible evaluation input
|
|
153
|
-
*/
|
|
154
|
-
export interface HarborTaskInput {
|
|
155
|
-
id: string;
|
|
156
|
-
description: string;
|
|
157
|
-
files?: string[];
|
|
158
|
-
success_criteria?: string;
|
|
159
|
-
complexity?: TaskComplexity;
|
|
160
|
-
agents_required?: number;
|
|
161
|
-
[key: string]: unknown;
|
|
162
|
-
}
|
|
163
|
-
/**
|
|
164
|
-
* Harbor-compatible evaluation output
|
|
165
|
-
*/
|
|
166
|
-
export interface HarborEvaluationOutput {
|
|
167
|
-
task_id: string;
|
|
168
|
-
configurations: Record<ConfigurationType, RunResult>;
|
|
169
|
-
winner: ConfigurationType;
|
|
170
|
-
scores: Record<ConfigurationType, ScoreBreakdown>;
|
|
171
|
-
metadata: {
|
|
172
|
-
benchmark_version: string;
|
|
173
|
-
started_at: number;
|
|
174
|
-
completed_at: number;
|
|
175
|
-
total_duration_ms: number;
|
|
176
|
-
};
|
|
177
|
-
}
|
|
178
|
-
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,UAAU,GAAG,OAAO,CAAC;AAEhE;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEvD;;GAEG;AACH,MAAM,WAAW,IAAI;IACnB,6BAA6B;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,sCAAsC;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,iCAAiC;IACjC,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,oCAAoC;IACpC,eAAe,EAAE,MAAM,CAAC;IACxB,mEAAmE;IACnE,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,4BAA4B;IAC5B,UAAU,EAAE,cAAc,CAAC;IAC3B,iEAAiE;IACjE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,sCAAsC;IACtC,aAAa,EAAE,iBAAiB,CAAC;IAGjC,0DAA0D;IAC1D,WAAW,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,mBAAmB,EAAE,MAAM,CAAC;IAG5B,2CAA2C;IAC3C,YAAY,EAAE,MAAM,CAAC;IACrB,8CAA8C;IAC9C,YAAY,EAAE,MAAM,CAAC;IACrB,kCAAkC;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,kCAAkC;IAClC,YAAY,EAAE,MAAM,CAAC;IACrB,oCAAoC;IACpC,kBAAkB,EAAE,MAAM,CAAC;IAG3B,4BAA4B;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,2CAA2C;IAC3C,eAAe,EAAE,MAAM,CAAC;IACxB,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAC;IAGrB,8CAA8C;IAC9C,OAAO,EAAE,OAAO,CAAC;IACjB,gDAAgD;IAChD,cAAc,EAAE,MAAM,CAAC;IACvB,4BAA4B;IAC5B,MAAM,EAAE,MAAM,EAAE,CAAC;IAGjB,qCAAqC;IACrC,SAAS,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,OAAO,EAAE,GAAG,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;IAC3C,iDAAiD;IACjD,MAAM,EAAE,iBAAiB,CAAC;IAC1B,6CAA6C;IAC7C,MAAM,EAAE,GAAG,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,0BAA0B;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;IACrB,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,2CAA2C;IAC3C,eAAe,EAAE,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,kCAAkC;IAClC,cAAc,EAAE,iBAAiB,EAAE,CAAC;IACpC,gDAAgD;IAChD,GAAG,EAAE,MAAM,CAAC;IACZ,kCAAkC;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,8BAA8B;IAC9B,KAAK,EAAE,OAAO,CAAC;IACf,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,YAAY,EAAE,MAAM,CAAC;IACrB,mCAAmC;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,eAMtC,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8BAA8B;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,wCAAwC;IACxC,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,0BAA0B;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,0BAA0B;IAC1B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,mBAAmB;IACnB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,0BAA0B;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2BAA2B;IAC3B,KAAK,EAAE,IAAI,EAAE,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,cAAc,CAAC;IAC5B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;IACrD,MAAM,EAAE,iBAAiB,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;IAClD,QAAQ,EAAE;QACR,iBAAiB,EAAE,MAAM,CAAC;QAC1B,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;QACrB,iBAAiB,EAAE,MAAM,CAAC;KAC3B,CAAC;CACH"}
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Benchmark Types
|
|
3
|
-
*
|
|
4
|
-
* Type definitions for the agent swarm performance benchmark system.
|
|
5
|
-
*/
|
|
6
|
-
/**
|
|
7
|
-
* Default benchmark configuration
|
|
8
|
-
*/
|
|
9
|
-
export const DEFAULT_BENCHMARK_CONFIG = {
|
|
10
|
-
configurations: ['single', 'subagent', 'swarm'],
|
|
11
|
-
cli: 'claude',
|
|
12
|
-
quiet: false,
|
|
13
|
-
cooldownMs: 5000,
|
|
14
|
-
maxSwarmSize: 10,
|
|
15
|
-
};
|
|
16
|
-
//# sourceMappingURL=types.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAoIH;;GAEG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAoB;IACvD,cAAc,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,CAAC;IAC/C,GAAG,EAAE,QAAQ;IACb,KAAK,EAAE,KAAK;IACZ,UAAU,EAAE,IAAI;IAChB,YAAY,EAAE,EAAE;CACjB,CAAC"}
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@agent-relay/benchmark",
|
|
3
|
-
"version": "2.3.2",
|
|
4
|
-
"description": "Performance benchmarking for agent swarms, sub-agents, and single agents using Harbor",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "dist/index.js",
|
|
7
|
-
"types": "dist/index.d.ts",
|
|
8
|
-
"exports": {
|
|
9
|
-
".": {
|
|
10
|
-
"types": "./dist/index.d.ts",
|
|
11
|
-
"import": "./dist/index.js"
|
|
12
|
-
},
|
|
13
|
-
"./runners": {
|
|
14
|
-
"types": "./dist/runners/index.d.ts",
|
|
15
|
-
"import": "./dist/runners/index.js"
|
|
16
|
-
},
|
|
17
|
-
"./harbor": {
|
|
18
|
-
"types": "./dist/harbor.d.ts",
|
|
19
|
-
"import": "./dist/harbor.js"
|
|
20
|
-
}
|
|
21
|
-
},
|
|
22
|
-
"bin": {
|
|
23
|
-
"relay-benchmark": "dist/cli.js"
|
|
24
|
-
},
|
|
25
|
-
"files": [
|
|
26
|
-
"dist",
|
|
27
|
-
"datasets",
|
|
28
|
-
"README.md"
|
|
29
|
-
],
|
|
30
|
-
"scripts": {
|
|
31
|
-
"build": "tsc",
|
|
32
|
-
"clean": "rm -rf dist",
|
|
33
|
-
"test": "vitest run",
|
|
34
|
-
"test:watch": "vitest",
|
|
35
|
-
"benchmark": "node dist/cli.js",
|
|
36
|
-
"benchmark:single": "node dist/cli.js --config single",
|
|
37
|
-
"benchmark:subagent": "node dist/cli.js --config subagent",
|
|
38
|
-
"benchmark:swarm": "node dist/cli.js --config swarm",
|
|
39
|
-
"benchmark:compare": "node dist/cli.js --config all"
|
|
40
|
-
},
|
|
41
|
-
"keywords": [
|
|
42
|
-
"agent-relay",
|
|
43
|
-
"benchmark",
|
|
44
|
-
"performance",
|
|
45
|
-
"swarm",
|
|
46
|
-
"harbor",
|
|
47
|
-
"ai-agents"
|
|
48
|
-
],
|
|
49
|
-
"author": "Agent Workforce",
|
|
50
|
-
"license": "Apache-2.0",
|
|
51
|
-
"repository": {
|
|
52
|
-
"type": "git",
|
|
53
|
-
"url": "git+https://github.com/AgentWorkforce/relay.git",
|
|
54
|
-
"directory": "packages/benchmark"
|
|
55
|
-
},
|
|
56
|
-
"dependencies": {
|
|
57
|
-
"@agent-relay/sdk": "2.3.2",
|
|
58
|
-
"@agent-relay/protocol": "2.3.2",
|
|
59
|
-
"@agent-relay/spawner": "2.3.2",
|
|
60
|
-
"commander": "^12.1.0",
|
|
61
|
-
"yaml": "^2.3.4"
|
|
62
|
-
},
|
|
63
|
-
"devDependencies": {
|
|
64
|
-
"@agent-relay/daemon": "2.3.2",
|
|
65
|
-
"@types/node": "^22.19.3",
|
|
66
|
-
"typescript": "^5.9.3",
|
|
67
|
-
"vitest": "^3.0.0"
|
|
68
|
-
},
|
|
69
|
-
"peerDependencies": {
|
|
70
|
-
"@agent-relay/daemon": ">=2.0.0"
|
|
71
|
-
},
|
|
72
|
-
"peerDependenciesMeta": {
|
|
73
|
-
"@agent-relay/daemon": {
|
|
74
|
-
"optional": true
|
|
75
|
-
}
|
|
76
|
-
},
|
|
77
|
-
"engines": {
|
|
78
|
-
"node": ">=18.0.0"
|
|
79
|
-
}
|
|
80
|
-
}
|
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Comparison Benchmark
|
|
3
|
-
*
|
|
4
|
-
* Main orchestrator for running comparison benchmarks across configurations.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import type {
|
|
8
|
-
ConfigurationType,
|
|
9
|
-
Task,
|
|
10
|
-
RunResult,
|
|
11
|
-
ComparisonResult,
|
|
12
|
-
ScoreBreakdown,
|
|
13
|
-
BenchmarkConfig,
|
|
14
|
-
} from './types.js';
|
|
15
|
-
import { DEFAULT_BENCHMARK_CONFIG } from './types.js';
|
|
16
|
-
import {
|
|
17
|
-
ConfigurationRunner,
|
|
18
|
-
SingleAgentRunner,
|
|
19
|
-
SubAgentRunner,
|
|
20
|
-
SwarmRunner,
|
|
21
|
-
} from './runners/index.js';
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Main benchmark orchestrator
|
|
25
|
-
*/
|
|
26
|
-
export class ComparisonBenchmark {
|
|
27
|
-
private config: BenchmarkConfig;
|
|
28
|
-
private runners: Map<ConfigurationType, ConfigurationRunner>;
|
|
29
|
-
|
|
30
|
-
constructor(config: Partial<BenchmarkConfig> = {}) {
|
|
31
|
-
this.config = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
|
|
32
|
-
|
|
33
|
-
// Initialize runners for configured configurations
|
|
34
|
-
this.runners = new Map();
|
|
35
|
-
for (const configType of this.config.configurations) {
|
|
36
|
-
this.runners.set(configType, this.createRunner(configType));
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Create a runner for a configuration type
|
|
42
|
-
*/
|
|
43
|
-
private createRunner(type: ConfigurationType): ConfigurationRunner {
|
|
44
|
-
switch (type) {
|
|
45
|
-
case 'single':
|
|
46
|
-
return new SingleAgentRunner(this.config);
|
|
47
|
-
case 'subagent':
|
|
48
|
-
return new SubAgentRunner(this.config);
|
|
49
|
-
case 'swarm':
|
|
50
|
-
return new SwarmRunner(this.config);
|
|
51
|
-
default:
|
|
52
|
-
throw new Error(`Unknown configuration type: ${type}`);
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Run a comparison across all configured configurations
|
|
58
|
-
*/
|
|
59
|
-
async runComparison(task: Task): Promise<ComparisonResult> {
|
|
60
|
-
const results = new Map<ConfigurationType, RunResult>();
|
|
61
|
-
const scores = new Map<ConfigurationType, ScoreBreakdown>();
|
|
62
|
-
|
|
63
|
-
for (const [configType, runner] of this.runners) {
|
|
64
|
-
if (!this.config.quiet) {
|
|
65
|
-
console.log(`\n=== Running ${configType} configuration ===`);
|
|
66
|
-
console.log(`Task: ${task.id}`);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
try {
|
|
70
|
-
await runner.setup();
|
|
71
|
-
const result = await runner.run(task);
|
|
72
|
-
await runner.teardown();
|
|
73
|
-
|
|
74
|
-
results.set(configType, result);
|
|
75
|
-
scores.set(configType, this.calculateScore(result));
|
|
76
|
-
|
|
77
|
-
if (!this.config.quiet) {
|
|
78
|
-
this.printRunResult(result);
|
|
79
|
-
}
|
|
80
|
-
} catch (err) {
|
|
81
|
-
console.error(`Error running ${configType}:`, (err as Error).message);
|
|
82
|
-
|
|
83
|
-
// Create failed result
|
|
84
|
-
const failedResult: RunResult = {
|
|
85
|
-
taskId: task.id,
|
|
86
|
-
configuration: configType,
|
|
87
|
-
totalTimeMs: 0,
|
|
88
|
-
timeToFirstActionMs: 0,
|
|
89
|
-
messageCount: 0,
|
|
90
|
-
avgLatencyMs: 0,
|
|
91
|
-
latencyP50Ms: 0,
|
|
92
|
-
latencyP99Ms: 0,
|
|
93
|
-
coordinationRounds: 0,
|
|
94
|
-
agentCount: 0,
|
|
95
|
-
totalTokensUsed: 0,
|
|
96
|
-
peakMemoryMb: 0,
|
|
97
|
-
success: false,
|
|
98
|
-
completionRate: 0,
|
|
99
|
-
errors: [(err as Error).message],
|
|
100
|
-
startedAt: Date.now(),
|
|
101
|
-
completedAt: Date.now(),
|
|
102
|
-
};
|
|
103
|
-
results.set(configType, failedResult);
|
|
104
|
-
scores.set(configType, { total: 0, successScore: 0, timeScore: 0, efficiencyScore: 0 });
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// Cool-down between runs
|
|
108
|
-
if (this.config.cooldownMs > 0) {
|
|
109
|
-
await new Promise((r) => setTimeout(r, this.config.cooldownMs));
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const winner = this.determineWinner(results, scores);
|
|
114
|
-
|
|
115
|
-
return {
|
|
116
|
-
taskId: task.id,
|
|
117
|
-
results,
|
|
118
|
-
winner,
|
|
119
|
-
scores,
|
|
120
|
-
};
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
/**
|
|
124
|
-
* Run a single configuration
|
|
125
|
-
*/
|
|
126
|
-
async runSingle(
|
|
127
|
-
task: Task,
|
|
128
|
-
configType: ConfigurationType
|
|
129
|
-
): Promise<RunResult> {
|
|
130
|
-
const runner = this.runners.get(configType);
|
|
131
|
-
if (!runner) {
|
|
132
|
-
throw new Error(`Configuration ${configType} not enabled`);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
await runner.setup();
|
|
136
|
-
const result = await runner.run(task);
|
|
137
|
-
await runner.teardown();
|
|
138
|
-
|
|
139
|
-
return result;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
/**
|
|
143
|
-
* Calculate score breakdown for a result
|
|
144
|
-
*/
|
|
145
|
-
private calculateScore(result: RunResult): ScoreBreakdown {
|
|
146
|
-
const maxTimeMs = 300000; // 5 minutes baseline
|
|
147
|
-
|
|
148
|
-
// Success component (0-50 points)
|
|
149
|
-
const successScore = result.success ? 50 : result.completionRate * 25;
|
|
150
|
-
|
|
151
|
-
// Time component (0-30 points) - faster is better
|
|
152
|
-
const timeScore = result.success
|
|
153
|
-
? 30 * Math.max(0, 1 - result.totalTimeMs / maxTimeMs)
|
|
154
|
-
: 0;
|
|
155
|
-
|
|
156
|
-
// Efficiency component (0-20 points) - fewer agents is better for same result
|
|
157
|
-
const efficiencyScore = result.success
|
|
158
|
-
? 20 / Math.max(1, result.agentCount)
|
|
159
|
-
: 0;
|
|
160
|
-
|
|
161
|
-
return {
|
|
162
|
-
total: successScore + timeScore + efficiencyScore,
|
|
163
|
-
successScore,
|
|
164
|
-
timeScore,
|
|
165
|
-
efficiencyScore,
|
|
166
|
-
};
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Determine the winning configuration
|
|
171
|
-
*/
|
|
172
|
-
private determineWinner(
|
|
173
|
-
results: Map<ConfigurationType, RunResult>,
|
|
174
|
-
scores: Map<ConfigurationType, ScoreBreakdown>
|
|
175
|
-
): ConfigurationType {
|
|
176
|
-
let best: ConfigurationType = 'single';
|
|
177
|
-
let bestScore = -1;
|
|
178
|
-
|
|
179
|
-
for (const [configType, score] of scores) {
|
|
180
|
-
if (score.total > bestScore) {
|
|
181
|
-
bestScore = score.total;
|
|
182
|
-
best = configType;
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
return best;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
/**
|
|
190
|
-
* Print a single run result
|
|
191
|
-
*/
|
|
192
|
-
private printRunResult(result: RunResult): void {
|
|
193
|
-
console.log(`\nResult for ${result.configuration}:`);
|
|
194
|
-
console.log(` Success: ${result.success ? '✓' : '✗'}`);
|
|
195
|
-
console.log(` Time: ${(result.totalTimeMs / 1000).toFixed(1)}s`);
|
|
196
|
-
console.log(` Agents: ${result.agentCount}`);
|
|
197
|
-
console.log(` Messages: ${result.messageCount}`);
|
|
198
|
-
if (result.errors.length > 0) {
|
|
199
|
-
console.log(` Errors: ${result.errors.join(', ')}`);
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Print comparison table
|
|
205
|
-
*/
|
|
206
|
-
printComparison(comparison: ComparisonResult): void {
|
|
207
|
-
console.log('\n' + '='.repeat(60));
|
|
208
|
-
console.log('COMPARISON RESULTS');
|
|
209
|
-
console.log('='.repeat(60));
|
|
210
|
-
console.log(`Task: ${comparison.taskId}`);
|
|
211
|
-
console.log(`Winner: ${comparison.winner.toUpperCase()}`);
|
|
212
|
-
console.log('');
|
|
213
|
-
|
|
214
|
-
// Build table data
|
|
215
|
-
const configs = Array.from(comparison.results.keys());
|
|
216
|
-
const headers = ['Metric', ...configs.map((c) => c.charAt(0).toUpperCase() + c.slice(1))];
|
|
217
|
-
|
|
218
|
-
const rows = [
|
|
219
|
-
[
|
|
220
|
-
'Success',
|
|
221
|
-
...configs.map((c) =>
|
|
222
|
-
comparison.results.get(c)?.success ? '✓' : '✗'
|
|
223
|
-
),
|
|
224
|
-
],
|
|
225
|
-
[
|
|
226
|
-
'Time (s)',
|
|
227
|
-
...configs.map((c) =>
|
|
228
|
-
((comparison.results.get(c)?.totalTimeMs || 0) / 1000).toFixed(1)
|
|
229
|
-
),
|
|
230
|
-
],
|
|
231
|
-
[
|
|
232
|
-
'Agents',
|
|
233
|
-
...configs.map((c) =>
|
|
234
|
-
String(comparison.results.get(c)?.agentCount || 0)
|
|
235
|
-
),
|
|
236
|
-
],
|
|
237
|
-
[
|
|
238
|
-
'Messages',
|
|
239
|
-
...configs.map((c) =>
|
|
240
|
-
String(comparison.results.get(c)?.messageCount || 0)
|
|
241
|
-
),
|
|
242
|
-
],
|
|
243
|
-
[
|
|
244
|
-
'Avg Latency (ms)',
|
|
245
|
-
...configs.map((c) =>
|
|
246
|
-
(comparison.results.get(c)?.avgLatencyMs || 0).toFixed(0)
|
|
247
|
-
),
|
|
248
|
-
],
|
|
249
|
-
[
|
|
250
|
-
'Completion %',
|
|
251
|
-
...configs.map((c) =>
|
|
252
|
-
((comparison.results.get(c)?.completionRate || 0) * 100).toFixed(0) + '%'
|
|
253
|
-
),
|
|
254
|
-
],
|
|
255
|
-
[
|
|
256
|
-
'Score',
|
|
257
|
-
...configs.map((c) =>
|
|
258
|
-
(comparison.scores.get(c)?.total || 0).toFixed(1)
|
|
259
|
-
),
|
|
260
|
-
],
|
|
261
|
-
];
|
|
262
|
-
|
|
263
|
-
// Print table
|
|
264
|
-
const colWidths = headers.map((h, i) =>
|
|
265
|
-
Math.max(h.length, ...rows.map((r) => String(r[i]).length))
|
|
266
|
-
);
|
|
267
|
-
|
|
268
|
-
const separator = colWidths.map((w) => '-'.repeat(w + 2)).join('+');
|
|
269
|
-
|
|
270
|
-
console.log(separator);
|
|
271
|
-
console.log(
|
|
272
|
-
'|' +
|
|
273
|
-
headers.map((h, i) => ` ${h.padEnd(colWidths[i])} `).join('|') +
|
|
274
|
-
'|'
|
|
275
|
-
);
|
|
276
|
-
console.log(separator);
|
|
277
|
-
|
|
278
|
-
for (const row of rows) {
|
|
279
|
-
console.log(
|
|
280
|
-
'|' +
|
|
281
|
-
row.map((cell, i) => ` ${String(cell).padEnd(colWidths[i])} `).join('|') +
|
|
282
|
-
'|'
|
|
283
|
-
);
|
|
284
|
-
}
|
|
285
|
-
console.log(separator);
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
/**
|
|
290
|
-
* Quick helper to run a comparison benchmark
|
|
291
|
-
*/
|
|
292
|
-
export async function runComparison(
|
|
293
|
-
task: Task,
|
|
294
|
-
config?: Partial<BenchmarkConfig>
|
|
295
|
-
): Promise<ComparisonResult> {
|
|
296
|
-
const benchmark = new ComparisonBenchmark(config);
|
|
297
|
-
return benchmark.runComparison(task);
|
|
298
|
-
}
|