claw-harness 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -0
- package/dist/agent-client.d.ts +17 -0
- package/dist/agent-client.d.ts.map +1 -0
- package/dist/agent-client.js +79 -0
- package/dist/agent-client.js.map +1 -0
- package/dist/agent-client.test.d.ts +2 -0
- package/dist/agent-client.test.d.ts.map +1 -0
- package/dist/agent-client.test.js +57 -0
- package/dist/agent-client.test.js.map +1 -0
- package/dist/bench.d.ts +45 -0
- package/dist/bench.d.ts.map +1 -0
- package/dist/bench.js +133 -0
- package/dist/bench.js.map +1 -0
- package/dist/bench.test.d.ts +2 -0
- package/dist/bench.test.d.ts.map +1 -0
- package/dist/bench.test.js +95 -0
- package/dist/bench.test.js.map +1 -0
- package/dist/bin/clawbench.d.ts +11 -0
- package/dist/bin/clawbench.d.ts.map +1 -0
- package/dist/bin/clawbench.js +237 -0
- package/dist/bin/clawbench.js.map +1 -0
- package/dist/bot.d.ts +36 -0
- package/dist/bot.d.ts.map +1 -0
- package/dist/bot.js +74 -0
- package/dist/bot.js.map +1 -0
- package/dist/bot.test.d.ts +2 -0
- package/dist/bot.test.d.ts.map +1 -0
- package/dist/bot.test.js +109 -0
- package/dist/bot.test.js.map +1 -0
- package/dist/cost-tracker.d.ts +10 -0
- package/dist/cost-tracker.d.ts.map +1 -0
- package/dist/cost-tracker.js +81 -0
- package/dist/cost-tracker.js.map +1 -0
- package/dist/cost-tracker.test.d.ts +2 -0
- package/dist/cost-tracker.test.d.ts.map +1 -0
- package/dist/cost-tracker.test.js +75 -0
- package/dist/cost-tracker.test.js.map +1 -0
- package/dist/docker-gateway.d.ts +40 -0
- package/dist/docker-gateway.d.ts.map +1 -0
- package/dist/docker-gateway.js +172 -0
- package/dist/docker-gateway.js.map +1 -0
- package/dist/docker-gateway.test.d.ts +2 -0
- package/dist/docker-gateway.test.d.ts.map +1 -0
- package/dist/docker-gateway.test.js +116 -0
- package/dist/docker-gateway.test.js.map +1 -0
- package/dist/gateway.d.ts +32 -0
- package/dist/gateway.d.ts.map +1 -0
- package/dist/gateway.js +142 -0
- package/dist/gateway.js.map +1 -0
- package/dist/gateway.test.d.ts +2 -0
- package/dist/gateway.test.d.ts.map +1 -0
- package/dist/gateway.test.js +19 -0
- package/dist/gateway.test.js.map +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +28 -0
- package/dist/index.js.map +1 -0
- package/dist/runner.d.ts +13 -0
- package/dist/runner.d.ts.map +1 -0
- package/dist/runner.js +213 -0
- package/dist/runner.js.map +1 -0
- package/dist/runner.test.d.ts +2 -0
- package/dist/runner.test.d.ts.map +1 -0
- package/dist/runner.test.js +298 -0
- package/dist/runner.test.js.map +1 -0
- package/dist/scenario-loader.d.ts +6 -0
- package/dist/scenario-loader.d.ts.map +1 -0
- package/dist/scenario-loader.js +77 -0
- package/dist/scenario-loader.js.map +1 -0
- package/dist/scenario-loader.test.d.ts +2 -0
- package/dist/scenario-loader.test.d.ts.map +1 -0
- package/dist/scenario-loader.test.js +213 -0
- package/dist/scenario-loader.test.js.map +1 -0
- package/dist/types.d.ts +155 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +27 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +106 -0
- package/dist/utils.js.map +1 -0
- package/dist/utils.test.d.ts +2 -0
- package/dist/utils.test.d.ts.map +1 -0
- package/dist/utils.test.js +55 -0
- package/dist/utils.test.js.map +1 -0
- package/dist/workspace.d.ts +33 -0
- package/dist/workspace.d.ts.map +1 -0
- package/dist/workspace.js +198 -0
- package/dist/workspace.js.map +1 -0
- package/dist/workspace.test.d.ts +2 -0
- package/dist/workspace.test.d.ts.map +1 -0
- package/dist/workspace.test.js +68 -0
- package/dist/workspace.test.js.map +1 -0
- package/docker/Dockerfile +12 -0
- package/docker/entrypoint.sh +6 -0
- package/package.json +49 -0
- package/presets/configs/default.json5 +28 -0
- package/presets/configs/minimal.json5 +22 -0
- package/presets/personas/curious.md +5 -0
- package/presets/personas/friendly.md +5 -0
- package/presets/personas/terse.md +3 -0
- package/presets/scenarios/example-chat.yaml +71 -0
package/README.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Claw Harness
|
|
2
|
+
|
|
3
|
+
Testing framework for [OpenClaw](https://openclaw.ai) bots. Spin up real agent instances, load skills and personas, drive multi-turn prompts, and capture results.
|
|
4
|
+
|
|
5
|
+
**Can a real AI agent, given only your skill.md, figure out how to use your site?**
|
|
6
|
+
|
|
7
|
+
Unlike API-level test harnesses, Claw Harness tests the full agent experience end-to-end — skill comprehension, API discovery, tool usage, and multi-agent interaction.
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install claw-harness
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
**Prerequisites:**
|
|
16
|
+
- Node.js >= 22
|
|
17
|
+
- OpenClaw installed (`npm install -g openclaw@latest`)
|
|
18
|
+
- `ANTHROPIC_API_KEY` set in environment
|
|
19
|
+
|
|
20
|
+
### Run a scenario
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Scaffold a new scenario from the example template
|
|
24
|
+
claw-harness init my-test
|
|
25
|
+
|
|
26
|
+
# Run it
|
|
27
|
+
claw-harness run my-test.yaml
|
|
28
|
+
|
|
29
|
+
# Output as JSON
|
|
30
|
+
claw-harness run my-test.yaml --reporter json > results.json
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Programmatic API
|
|
34
|
+
|
|
35
|
+
```ts
|
|
36
|
+
import { ClawHarness } from 'claw-harness'
|
|
37
|
+
|
|
38
|
+
const bench = new ClawHarness({ mode: 'local' })
|
|
39
|
+
|
|
40
|
+
const bot = bench.bot('alpha', {
|
|
41
|
+
preset: 'default',
|
|
42
|
+
skills: [{ url: 'http://localhost:3000/skill.md', name: 'my-app' }],
|
|
43
|
+
userMd: 'You are a friendly bot.',
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
await bench.start()
|
|
47
|
+
const response = await bot.send('Register yourself on the platform')
|
|
48
|
+
console.log(response.text)
|
|
49
|
+
await bench.stop()
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Scenario Format
|
|
53
|
+
|
|
54
|
+
Scenarios are YAML files that define bots and a sequence of steps:
|
|
55
|
+
|
|
56
|
+
```yaml
|
|
57
|
+
name: "Chat Test"
|
|
58
|
+
|
|
59
|
+
bots:
|
|
60
|
+
alpha:
|
|
61
|
+
preset: default
|
|
62
|
+
model: anthropic/claude-haiku-4-5-20251001
|
|
63
|
+
user_md: presets/personas/friendly.md
|
|
64
|
+
skills:
|
|
65
|
+
- url: "http://localhost:3000/skill.md"
|
|
66
|
+
name: target-app
|
|
67
|
+
|
|
68
|
+
beta:
|
|
69
|
+
preset: default
|
|
70
|
+
user_md: presets/personas/curious.md
|
|
71
|
+
skills:
|
|
72
|
+
- url: "http://localhost:3000/skill.md"
|
|
73
|
+
name: target-app
|
|
74
|
+
|
|
75
|
+
steps:
|
|
76
|
+
# Serial steps
|
|
77
|
+
- bot: alpha
|
|
78
|
+
prompt: "Read the skill docs and register yourself."
|
|
79
|
+
timeout: 60s
|
|
80
|
+
|
|
81
|
+
- bot: beta
|
|
82
|
+
prompt: "Register yourself on the platform."
|
|
83
|
+
timeout: 60s
|
|
84
|
+
|
|
85
|
+
# Parallel steps
|
|
86
|
+
- parallel:
|
|
87
|
+
- bot: alpha
|
|
88
|
+
prompt: "Join a lounge and introduce yourself."
|
|
89
|
+
- bot: beta
|
|
90
|
+
prompt: "Find a lounge with another bot and join."
|
|
91
|
+
|
|
92
|
+
# Repeat block
|
|
93
|
+
- repeat: 3
|
|
94
|
+
interval: 15s
|
|
95
|
+
steps:
|
|
96
|
+
- bot: alpha
|
|
97
|
+
prompt: "Check for new messages and respond."
|
|
98
|
+
timeout: 30s
|
|
99
|
+
- bot: beta
|
|
100
|
+
prompt: "Check for new messages and respond."
|
|
101
|
+
timeout: 30s
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## CLI
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
claw-harness run <scenario.yaml> [options] Run a test scenario
|
|
108
|
+
claw-harness init [name] Scaffold a new scenario
|
|
109
|
+
claw-harness presets List available presets
|
|
110
|
+
|
|
111
|
+
Options:
|
|
112
|
+
--model <model> Override model for all bots
|
|
113
|
+
--reporter <format> Output format: console (default) | json
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Presets
|
|
117
|
+
|
|
118
|
+
Claw Harness ships with presets for common configurations:
|
|
119
|
+
|
|
120
|
+
**Configs** — merged into each bot's `openclaw.json`:
|
|
121
|
+
- `default` — Full tools, Haiku model
|
|
122
|
+
- `minimal` — Restricted tools, lower cost
|
|
123
|
+
|
|
124
|
+
**Personas** — `user.md` templates that shape bot behavior:
|
|
125
|
+
- `friendly` — Outgoing, asks follow-up questions
|
|
126
|
+
- `curious` — Thoughtful, explores ideas deeply
|
|
127
|
+
- `terse` — Brief, technical, to the point
|
|
128
|
+
|
|
129
|
+
## How It Works
|
|
130
|
+
|
|
131
|
+
Each bot gets full isolation:
|
|
132
|
+
|
|
133
|
+
1. **Workspace** — A dedicated profile directory (`~/.openclaw-claw-harness-<id>/`) with its own `openclaw.json`, `USER.md`, and skills
|
|
134
|
+
2. **Gateway** — Its own OpenClaw gateway process on a dedicated port
|
|
135
|
+
3. **Communication** — Prompts sent via the OpenAI-compatible HTTP API (`/v1/chat/completions`)
|
|
136
|
+
|
|
137
|
+
Bots have no shared state. They interact only through the target application, just like real users would.
|
|
138
|
+
|
|
139
|
+
## Development
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
git clone https://github.com/dasconnor/claw-harness.git
|
|
143
|
+
cd claw-harness
|
|
144
|
+
npm install
|
|
145
|
+
npm test # Run tests (vitest)
|
|
146
|
+
npm run build # Build to dist/
|
|
147
|
+
npm run lint # Type check
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentClient — Communicates with an OpenClaw agent via the HTTP API.
|
|
3
|
+
*
|
|
4
|
+
* Uses the OpenAI-compatible /v1/chat/completions endpoint.
|
|
5
|
+
* Multi-turn sessions are maintained via the `user` field.
|
|
6
|
+
*/
|
|
7
|
+
import type { BotResponse } from './types.js';
|
|
8
|
+
export declare class AgentClient {
|
|
9
|
+
private baseUrl;
|
|
10
|
+
private token;
|
|
11
|
+
constructor(port: number, token: string);
|
|
12
|
+
/**
|
|
13
|
+
* Send a prompt to the agent and wait for the response.
|
|
14
|
+
*/
|
|
15
|
+
send(prompt: string, sessionId: string, timeout?: number): Promise<BotResponse>;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=agent-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-client.d.ts","sourceRoot":"","sources":["../src/agent-client.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAI7C,qBAAa,WAAW;IACtB,OAAO,CAAC,OAAO,CAAQ;IACvB,OAAO,CAAC,KAAK,CAAQ;gBAET,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IAKvC;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;CAiEtF"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentClient — Communicates with an OpenClaw agent via the HTTP API.
|
|
3
|
+
*
|
|
4
|
+
* Uses the OpenAI-compatible /v1/chat/completions endpoint.
|
|
5
|
+
* Multi-turn sessions are maintained via the `user` field.
|
|
6
|
+
*/
|
|
7
|
+
const DEFAULT_TIMEOUT = 120_000;
|
|
8
|
+
export class AgentClient {
|
|
9
|
+
baseUrl;
|
|
10
|
+
token;
|
|
11
|
+
constructor(port, token) {
|
|
12
|
+
this.baseUrl = `http://127.0.0.1:${port}`;
|
|
13
|
+
this.token = token;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Send a prompt to the agent and wait for the response.
|
|
17
|
+
*/
|
|
18
|
+
async send(prompt, sessionId, timeout) {
|
|
19
|
+
const startTime = Date.now();
|
|
20
|
+
try {
|
|
21
|
+
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
22
|
+
method: 'POST',
|
|
23
|
+
headers: {
|
|
24
|
+
'Authorization': `Bearer ${this.token}`,
|
|
25
|
+
'Content-Type': 'application/json',
|
|
26
|
+
},
|
|
27
|
+
body: JSON.stringify({
|
|
28
|
+
model: 'openclaw',
|
|
29
|
+
messages: [{ role: 'user', content: prompt }],
|
|
30
|
+
stream: false,
|
|
31
|
+
user: sessionId,
|
|
32
|
+
}),
|
|
33
|
+
signal: AbortSignal.timeout(timeout ?? DEFAULT_TIMEOUT),
|
|
34
|
+
});
|
|
35
|
+
const duration = Date.now() - startTime;
|
|
36
|
+
if (!response.ok) {
|
|
37
|
+
const errorText = await response.text();
|
|
38
|
+
return {
|
|
39
|
+
text: '',
|
|
40
|
+
raw: errorText,
|
|
41
|
+
duration,
|
|
42
|
+
ok: false,
|
|
43
|
+
error: `HTTP ${response.status}: ${errorText}`,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
const data = await response.json();
|
|
47
|
+
// Extract text from OpenAI-compatible response format
|
|
48
|
+
const text = data.choices?.[0]?.message?.content ?? '';
|
|
49
|
+
return {
|
|
50
|
+
text,
|
|
51
|
+
raw: data,
|
|
52
|
+
duration,
|
|
53
|
+
ok: true,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
const duration = Date.now() - startTime;
|
|
58
|
+
// Differentiate timeout from network errors
|
|
59
|
+
let message;
|
|
60
|
+
if (error instanceof DOMException && error.name === 'TimeoutError') {
|
|
61
|
+
message = `Request timed out after ${timeout ?? DEFAULT_TIMEOUT}ms`;
|
|
62
|
+
}
|
|
63
|
+
else if (error instanceof TypeError && error.message.includes('fetch')) {
|
|
64
|
+
message = `Network error: could not connect to ${this.baseUrl}`;
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
message = error instanceof Error ? error.message : String(error);
|
|
68
|
+
}
|
|
69
|
+
return {
|
|
70
|
+
text: '',
|
|
71
|
+
raw: null,
|
|
72
|
+
duration,
|
|
73
|
+
ok: false,
|
|
74
|
+
error: message,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=agent-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-client.js","sourceRoot":"","sources":["../src/agent-client.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,MAAM,eAAe,GAAG,OAAO,CAAA;AAE/B,MAAM,OAAO,WAAW;IACd,OAAO,CAAQ;IACf,KAAK,CAAQ;IAErB,YAAY,IAAY,EAAE,KAAa;QACrC,IAAI,CAAC,OAAO,GAAG,oBAAoB,IAAI,EAAE,CAAA;QACzC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;IACpB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAc,EAAE,SAAiB,EAAE,OAAgB;QAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;QAE5B,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;gBAClE,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,IAAI,CAAC,KAAK,EAAE;oBACvC,cAAc,EAAE,kBAAkB;iBACnC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,UAAU;oBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;oBAC7C,MAAM,EAAE,KAAK;oBACb,IAAI,EAAE,SAAS;iBAChB,CAAC;gBACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC;aACxD,CAAC,CAAA;YAEF,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;YAEvC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;gBACvC,OAAO;oBACL,IAAI,EAAE,EAAE;oBACR,GAAG,EAAE,SAAS;oBACd,QAAQ;oBACR,EAAE,EAAE,KAAK;oBACT,KAAK,EAAE,QAAQ,QAAQ,CAAC,MAAM,KAAK,SAAS,EAAE;iBAC/C,CAAA;YACH,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAA6D,CAAA;YAE7F,sDAAsD;YACtD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAA;YAEtD,OAAO;gBACL,IAAI;gBACJ,GAAG,EAAE,IAAI;gBACT,QAAQ;gBACR,EAAE,EAAE,IAAI;aACT,CAAA;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;YAEvC,4CAA4C;YAC5C,IAAI,OAAe,CAAA;YACnB,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;gBACnE,OAAO,GAAG,2BAA2B,OAAO,IAAI,eAAe,IAAI,CAAA;YACrE,CAAC;iBAAM,IAAI,KAAK,YAAY,SAAS,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzE,OAAO,GAAG,uCAAuC,IAAI,CAAC,OAAO,EAAE,CAAA;YACjE,CAAC;iBAAM,CAAC;gBACN,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YAClE,CAAC;YAED,OAAO;gBACL,IAAI,EAAE,EAAE;gBACR,GAAG,EAAE,IAAI;gBACT,QAAQ;gBACR,EAAE,EAAE,KAAK;gBACT,KAAK,EAAE,OAAO;aACf,CAAA;QACH,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-client.test.d.ts","sourceRoot":"","sources":["../src/agent-client.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
2
|
+
import { AgentClient } from './agent-client.js';
|
|
3
|
+
describe('AgentClient', () => {
|
|
4
|
+
const originalFetch = globalThis.fetch;
|
|
5
|
+
afterEach(() => {
|
|
6
|
+
globalThis.fetch = originalFetch;
|
|
7
|
+
});
|
|
8
|
+
it('sends a prompt and returns successful response', async () => {
|
|
9
|
+
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
10
|
+
ok: true,
|
|
11
|
+
json: () => Promise.resolve({
|
|
12
|
+
choices: [{ message: { content: 'Hello back!' } }],
|
|
13
|
+
}),
|
|
14
|
+
});
|
|
15
|
+
const client = new AgentClient(18800, 'test-token');
|
|
16
|
+
const result = await client.send('Hello', 'session-1');
|
|
17
|
+
expect(result.ok).toBe(true);
|
|
18
|
+
expect(result.text).toBe('Hello back!');
|
|
19
|
+
expect(result.duration).toBeGreaterThanOrEqual(0);
|
|
20
|
+
expect(vi.mocked(fetch)).toHaveBeenCalledWith('http://127.0.0.1:18800/v1/chat/completions', expect.objectContaining({
|
|
21
|
+
method: 'POST',
|
|
22
|
+
headers: expect.objectContaining({
|
|
23
|
+
'Authorization': 'Bearer test-token',
|
|
24
|
+
}),
|
|
25
|
+
}));
|
|
26
|
+
});
|
|
27
|
+
it('handles HTTP error responses', async () => {
|
|
28
|
+
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
29
|
+
ok: false,
|
|
30
|
+
status: 500,
|
|
31
|
+
text: () => Promise.resolve('Internal Server Error'),
|
|
32
|
+
});
|
|
33
|
+
const client = new AgentClient(18800, 'test-token');
|
|
34
|
+
const result = await client.send('Hello', 'session-1');
|
|
35
|
+
expect(result.ok).toBe(false);
|
|
36
|
+
expect(result.error).toContain('HTTP 500');
|
|
37
|
+
expect(result.error).toContain('Internal Server Error');
|
|
38
|
+
});
|
|
39
|
+
it('handles network errors', async () => {
|
|
40
|
+
globalThis.fetch = vi.fn().mockRejectedValue(new TypeError('fetch failed'));
|
|
41
|
+
const client = new AgentClient(18800, 'test-token');
|
|
42
|
+
const result = await client.send('Hello', 'session-1');
|
|
43
|
+
expect(result.ok).toBe(false);
|
|
44
|
+
expect(result.error).toContain('Network error');
|
|
45
|
+
});
|
|
46
|
+
it('handles missing choices in response', async () => {
|
|
47
|
+
globalThis.fetch = vi.fn().mockResolvedValue({
|
|
48
|
+
ok: true,
|
|
49
|
+
json: () => Promise.resolve({}),
|
|
50
|
+
});
|
|
51
|
+
const client = new AgentClient(18800, 'test-token');
|
|
52
|
+
const result = await client.send('Hello', 'session-1');
|
|
53
|
+
expect(result.ok).toBe(true);
|
|
54
|
+
expect(result.text).toBe('');
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
//# sourceMappingURL=agent-client.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-client.test.js","sourceRoot":"","sources":["../src/agent-client.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAc,SAAS,EAAE,MAAM,QAAQ,CAAA;AACxE,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAA;AAE/C,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;IAC3B,MAAM,aAAa,GAAG,UAAU,CAAC,KAAK,CAAA;IAEtC,SAAS,CAAC,GAAG,EAAE;QACb,UAAU,CAAC,KAAK,GAAG,aAAa,CAAA;IAClC,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,UAAU,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAC3C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC;gBAC1B,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE,CAAC;aACnD,CAAC;SACH,CAAC,CAAA;QAEF,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,KAAK,EAAE,YAAY,CAAC,CAAA;QACnD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QAEtD,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;QACvC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAA;QAEjD,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,oBAAoB,CAC3C,4CAA4C,EAC5C,MAAM,CAAC,gBAAgB,CAAC;YACtB,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,MAAM,CAAC,gBAAgB,CAAC;gBAC/B,eAAe,EAAE,mBAAmB;aACrC,CAAC;SACH,CAAC,CACH,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;QAC5C,UAAU,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAC3C,EAAE,EAAE,KAAK;YACT,MAAM,EAAE,GAAG;YACX,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,uBAAuB,CAAC;SACrD,CAAC,CAAA;QAEF,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,KAAK,EAAE,YAAY,CAAC,CAAA;QACnD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QAEtD,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC7B,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAA;QAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAA;IACzD,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,wBAAwB,EAAE,KAAK,IAAI,EAAE;QACtC,UAAU,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,SAAS,CAAC,cAAc,CAAC,CAAC,CAAA;QAE3E,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,KAAK,EAAE,YAAY,CAAC,CAAA;QACnD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QAEtD,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC7B,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAA;IACjD,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;QACnD,UAAU,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAC3C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;SAChC,CAAC,CAAA;QAEF,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,KAAK,EAAE,YAAY,CAAC,CAAA;QACnD,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,CAAC,CAAA;QAEtD,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC9B,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
package/dist/bench.d.ts
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claw Harness — Main orchestrator class.
|
|
3
|
+
*
|
|
4
|
+
* Creates and manages bot instances, handles lifecycle.
|
|
5
|
+
*/
|
|
6
|
+
import { Bot } from './bot.js';
|
|
7
|
+
import type { BenchConfig, BotConfig, ScenarioResult, StepResult } from './types.js';
|
|
8
|
+
export declare class ClawHarness {
|
|
9
|
+
private config;
|
|
10
|
+
private bots;
|
|
11
|
+
private stepResults;
|
|
12
|
+
private startTime?;
|
|
13
|
+
private nextPortIndex;
|
|
14
|
+
constructor(config: BenchConfig);
|
|
15
|
+
/**
|
|
16
|
+
* Register a bot with the bench.
|
|
17
|
+
*/
|
|
18
|
+
bot(id: string, config: BotConfig): Bot;
|
|
19
|
+
/**
|
|
20
|
+
* Start all registered bots sequentially.
|
|
21
|
+
* If a bot fails to start, all previously started bots are stopped.
|
|
22
|
+
*/
|
|
23
|
+
start(): Promise<void>;
|
|
24
|
+
/**
|
|
25
|
+
* Stop all bots and clean up.
|
|
26
|
+
*/
|
|
27
|
+
stop(): Promise<void>;
|
|
28
|
+
/**
|
|
29
|
+
* Get a bot by ID.
|
|
30
|
+
*/
|
|
31
|
+
getBot(id: string): Bot | undefined;
|
|
32
|
+
/**
|
|
33
|
+
* Record a step result (used by the runner).
|
|
34
|
+
*/
|
|
35
|
+
recordStep(result: StepResult): void;
|
|
36
|
+
/**
|
|
37
|
+
* Get the full conversation log across all bots.
|
|
38
|
+
*/
|
|
39
|
+
getConversationLog(): StepResult[];
|
|
40
|
+
/**
|
|
41
|
+
* Get a summary of the run.
|
|
42
|
+
*/
|
|
43
|
+
getResults(): ScenarioResult;
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=bench.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench.d.ts","sourceRoot":"","sources":["../src/bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAA;AAC9B,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAIpF,qBAAa,WAAW;IACtB,OAAO,CAAC,MAAM,CAAuB;IACrC,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,WAAW,CAAmB;IACtC,OAAO,CAAC,SAAS,CAAC,CAAQ;IAC1B,OAAO,CAAC,aAAa,CAAI;gBAEb,MAAM,EAAE,WAAW;IAW/B;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,GAAG,GAAG;IAgBvC;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB5B;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAK3B;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,GAAG,GAAG,SAAS;IAInC;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI;IAIpC;;OAEG;IACH,kBAAkB,IAAI,UAAU,EAAE;IAIlC;;OAEG;IACH,UAAU,IAAI,cAAc;CAyC7B"}
|
package/dist/bench.js
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claw Harness — Main orchestrator class.
|
|
3
|
+
*
|
|
4
|
+
* Creates and manages bot instances, handles lifecycle.
|
|
5
|
+
*/
|
|
6
|
+
import { Bot } from './bot.js';
|
|
7
|
+
const DEFAULT_BASE_PORT = 18800;
|
|
8
|
+
export class ClawHarness {
|
|
9
|
+
config;
|
|
10
|
+
bots = new Map();
|
|
11
|
+
stepResults = [];
|
|
12
|
+
startTime;
|
|
13
|
+
nextPortIndex = 0;
|
|
14
|
+
constructor(config) {
|
|
15
|
+
this.config = {
|
|
16
|
+
mode: config.mode,
|
|
17
|
+
basePort: config.basePort ?? DEFAULT_BASE_PORT,
|
|
18
|
+
workspaceDir: config.workspaceDir ?? defaultWorkspaceDir(),
|
|
19
|
+
anthropicApiKey: config.anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? '',
|
|
20
|
+
openaiApiKey: config.openaiApiKey ?? process.env.OPENAI_API_KEY ?? '',
|
|
21
|
+
anthropicAdminKey: config.anthropicAdminKey ?? process.env.ANTHROPIC_ADMIN_API_KEY ?? '',
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Register a bot with the bench.
|
|
26
|
+
*/
|
|
27
|
+
bot(id, config) {
|
|
28
|
+
const port = this.config.basePort + (this.nextPortIndex * 20);
|
|
29
|
+
this.nextPortIndex++;
|
|
30
|
+
const bot = new Bot(id, config, {
|
|
31
|
+
mode: this.config.mode,
|
|
32
|
+
port,
|
|
33
|
+
workspaceDir: this.config.workspaceDir,
|
|
34
|
+
anthropicApiKey: this.config.anthropicApiKey,
|
|
35
|
+
openaiApiKey: this.config.openaiApiKey,
|
|
36
|
+
});
|
|
37
|
+
this.bots.set(id, bot);
|
|
38
|
+
return bot;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Start all registered bots sequentially.
|
|
42
|
+
* If a bot fails to start, all previously started bots are stopped.
|
|
43
|
+
*/
|
|
44
|
+
async start() {
|
|
45
|
+
this.startTime = new Date().toISOString();
|
|
46
|
+
const startedBots = [];
|
|
47
|
+
for (const bot of this.bots.values()) {
|
|
48
|
+
try {
|
|
49
|
+
await bot.start();
|
|
50
|
+
startedBots.push(bot);
|
|
51
|
+
}
|
|
52
|
+
catch (err) {
|
|
53
|
+
// Clean up already-started bots
|
|
54
|
+
for (const started of startedBots) {
|
|
55
|
+
await started.stop().catch(() => { });
|
|
56
|
+
}
|
|
57
|
+
throw err;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Stop all bots and clean up.
|
|
63
|
+
*/
|
|
64
|
+
async stop() {
|
|
65
|
+
const stopPromises = Array.from(this.bots.values()).map(bot => bot.stop());
|
|
66
|
+
await Promise.all(stopPromises);
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Get a bot by ID.
|
|
70
|
+
*/
|
|
71
|
+
getBot(id) {
|
|
72
|
+
return this.bots.get(id);
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Record a step result (used by the runner).
|
|
76
|
+
*/
|
|
77
|
+
recordStep(result) {
|
|
78
|
+
this.stepResults.push(result);
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Get the full conversation log across all bots.
|
|
82
|
+
*/
|
|
83
|
+
getConversationLog() {
|
|
84
|
+
return [...this.stepResults];
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Get a summary of the run.
|
|
88
|
+
*/
|
|
89
|
+
getResults() {
|
|
90
|
+
const endTime = new Date().toISOString();
|
|
91
|
+
const botSummaries = {};
|
|
92
|
+
for (const [id] of this.bots) {
|
|
93
|
+
const botSteps = this.stepResults.filter(s => s.botId === id);
|
|
94
|
+
botSummaries[id] = {
|
|
95
|
+
messagesSent: botSteps.length,
|
|
96
|
+
totalDuration: botSteps.reduce((sum, s) => sum + s.response.duration, 0),
|
|
97
|
+
errors: botSteps.filter(s => !s.response.ok).length,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
// Aggregate assertion counts
|
|
101
|
+
let totalAssertions = 0;
|
|
102
|
+
let passedAssertions = 0;
|
|
103
|
+
for (const step of this.stepResults) {
|
|
104
|
+
if (step.assertions) {
|
|
105
|
+
for (const a of step.assertions) {
|
|
106
|
+
totalAssertions++;
|
|
107
|
+
if (a.passed)
|
|
108
|
+
passedAssertions++;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
name: '',
|
|
114
|
+
startTime: this.startTime ?? endTime,
|
|
115
|
+
endTime,
|
|
116
|
+
duration: this.startTime
|
|
117
|
+
? new Date(endTime).getTime() - new Date(this.startTime).getTime()
|
|
118
|
+
: 0,
|
|
119
|
+
steps: this.stepResults,
|
|
120
|
+
bots: botSummaries,
|
|
121
|
+
assertions: {
|
|
122
|
+
total: totalAssertions,
|
|
123
|
+
passed: passedAssertions,
|
|
124
|
+
failed: totalAssertions - passedAssertions,
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
function defaultWorkspaceDir() {
|
|
130
|
+
const home = process.env.HOME ?? process.env.USERPROFILE ?? '/tmp';
|
|
131
|
+
return `${home}/.claw-harness/workspaces`;
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=bench.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench.js","sourceRoot":"","sources":["../src/bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAA;AAG9B,MAAM,iBAAiB,GAAG,KAAK,CAAA;AAE/B,MAAM,OAAO,WAAW;IACd,MAAM,CAAuB;IAC7B,IAAI,GAAqB,IAAI,GAAG,EAAE,CAAA;IAClC,WAAW,GAAiB,EAAE,CAAA;IAC9B,SAAS,CAAS;IAClB,aAAa,GAAG,CAAC,CAAA;IAEzB,YAAY,MAAmB;QAC7B,IAAI,CAAC,MAAM,GAAG;YACZ,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,iBAAiB;YAC9C,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,mBAAmB,EAAE;YAC1D,eAAe,EAAE,MAAM,CAAC,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;YAC9E,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,EAAE;YACrE,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,EAAE;SACzF,CAAA;IACH,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,EAAU,EAAE,MAAiB;QAC/B,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC,CAAA;QAC7D,IAAI,CAAC,aAAa,EAAE,CAAA;QAEpB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE;YAC9B,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;YACtB,IAAI;YACJ,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;YACtC,eAAe,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe;YAC5C,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;SACvC,CAAC,CAAA;QAEF,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAA;QACtB,OAAO,GAAG,CAAA;IACZ,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;QACzC,MAAM,WAAW,GAAU,EAAE,CAAA;QAE7B,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;YACrC,IAAI,CAAC;gBACH,MAAM,GAAG,CAAC,KAAK,EAAE,CAAA;gBACjB,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YACvB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,gCAAgC;gBAChC,KAAK,MAAM,OAAO,IAAI,WAAW,EAAE,CAAC;oBAClC,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;gBACtC,CAAC;gBACD,MAAM,GAAG,CAAA;YACX,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI;QACR,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAA;QAC1E,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;IACjC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,EAAU;QACf,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;IAC1B,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,MAAkB;QAC3B,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAC/B,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,CAAA;IAC9B,CAAC;IAED;;OAEG;IACH,UAAU;QACR,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;QACxC,MAAM,YAAY,GAA2B,EAAE,CAAA;QAE/C,KAAK,MAAM,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,EAAE,CAAC,CAAA;YAC7D,YAAY,CAAC,EAAE,CAAC,GAAG;gBACjB,YAAY,EAAE,QAAQ,CAAC,MAAM;gBAC7B,aAAa,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;gBACxE,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,MAAM;aACpD,CAAA;QACH,CAAC;QAED,6BAA6B;QAC7B,IAAI,eAAe,GAAG,CAAC,CAAA;QACvB,IAAI,gBAAgB,GAAG,CAAC,CAAA;QACxB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACpC,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBACpB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;oBAChC,eAAe,EAAE,CAAA;oBACjB,IAAI,CAAC,CAAC,MAAM;wBAAE,gBAAgB,EAAE,CAAA;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE,EAAE;YACR,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,OAAO;YACpC,OAAO;YACP,QAAQ,EAAE,IAAI,CAAC,SAAS;gBACtB,CAAC,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE;gBAClE,CAAC,CAAC,CAAC;YACL,KAAK,EAAE,IAAI,CAAC,WAAW;YACvB,IAAI,EAAE,YAAY;YAClB,UAAU,EAAE;gBACV,KAAK,EAAE,eAAe;gBACtB,MAAM,EAAE,gBAAgB;gBACxB,MAAM,EAAE,eAAe,GAAG,gBAAgB;aAC3C;SACF,CAAA;IACH,CAAC;CACF;AAED,SAAS,mBAAmB;IAC1B,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,MAAM,CAAA;IAClE,OAAO,GAAG,IAAI,2BAA2B,CAAA;AAC3C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench.test.d.ts","sourceRoot":"","sources":["../src/bench.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
+
import { ClawHarness } from './bench.js';
|
|
3
|
+
// Mock Bot to avoid spawning real processes
|
|
4
|
+
vi.mock('./bot.js', () => {
|
|
5
|
+
return {
|
|
6
|
+
Bot: vi.fn().mockImplementation((id) => ({
|
|
7
|
+
id,
|
|
8
|
+
start: vi.fn().mockResolvedValue(undefined),
|
|
9
|
+
stop: vi.fn().mockResolvedValue(undefined),
|
|
10
|
+
send: vi.fn().mockResolvedValue({
|
|
11
|
+
text: 'response',
|
|
12
|
+
raw: {},
|
|
13
|
+
duration: 100,
|
|
14
|
+
ok: true,
|
|
15
|
+
}),
|
|
16
|
+
})),
|
|
17
|
+
};
|
|
18
|
+
});
|
|
19
|
+
describe('ClawHarness', () => {
|
|
20
|
+
beforeEach(() => {
|
|
21
|
+
vi.clearAllMocks();
|
|
22
|
+
});
|
|
23
|
+
it('registers bots with incremental port allocation', async () => {
|
|
24
|
+
const bench = new ClawHarness({ mode: 'local', basePort: 10000 });
|
|
25
|
+
bench.bot('alpha', { preset: 'default' });
|
|
26
|
+
bench.bot('beta', { preset: 'default' });
|
|
27
|
+
const { Bot } = vi.mocked(await import('./bot.js'));
|
|
28
|
+
// First bot gets port 10000, second gets 10020
|
|
29
|
+
expect(Bot).toHaveBeenCalledWith('alpha', expect.anything(), expect.objectContaining({ port: 10000 }));
|
|
30
|
+
expect(Bot).toHaveBeenCalledWith('beta', expect.anything(), expect.objectContaining({ port: 10020 }));
|
|
31
|
+
});
|
|
32
|
+
it('getBot returns registered bot', () => {
|
|
33
|
+
const bench = new ClawHarness({ mode: 'local' });
|
|
34
|
+
bench.bot('alpha', { preset: 'default' });
|
|
35
|
+
expect(bench.getBot('alpha')).toBeDefined();
|
|
36
|
+
expect(bench.getBot('nonexistent')).toBeUndefined();
|
|
37
|
+
});
|
|
38
|
+
it('start() calls start on all bots sequentially', async () => {
|
|
39
|
+
const bench = new ClawHarness({ mode: 'local' });
|
|
40
|
+
bench.bot('alpha', { preset: 'default' });
|
|
41
|
+
bench.bot('beta', { preset: 'default' });
|
|
42
|
+
await bench.start();
|
|
43
|
+
const alpha = bench.getBot('alpha');
|
|
44
|
+
const beta = bench.getBot('beta');
|
|
45
|
+
expect(alpha.start).toHaveBeenCalled();
|
|
46
|
+
expect(beta.start).toHaveBeenCalled();
|
|
47
|
+
});
|
|
48
|
+
it('start() cleans up started bots if one fails', async () => {
|
|
49
|
+
const bench = new ClawHarness({ mode: 'local' });
|
|
50
|
+
const alpha = bench.bot('alpha', { preset: 'default' });
|
|
51
|
+
const beta = bench.bot('beta', { preset: 'default' });
|
|
52
|
+
// Make beta.start() fail
|
|
53
|
+
vi.mocked(beta.start).mockRejectedValueOnce(new Error('gateway failed'));
|
|
54
|
+
await expect(bench.start()).rejects.toThrow('gateway failed');
|
|
55
|
+
// Alpha should have been stopped for cleanup
|
|
56
|
+
expect(alpha.stop).toHaveBeenCalled();
|
|
57
|
+
});
|
|
58
|
+
it('recordStep and getResults work correctly', () => {
|
|
59
|
+
const bench = new ClawHarness({ mode: 'local' });
|
|
60
|
+
bench.bot('alpha', { preset: 'default' });
|
|
61
|
+
bench.recordStep({
|
|
62
|
+
botId: 'alpha',
|
|
63
|
+
prompt: 'Hello',
|
|
64
|
+
response: { text: 'Hi', raw: {}, duration: 50, ok: true },
|
|
65
|
+
timestamp: new Date().toISOString(),
|
|
66
|
+
});
|
|
67
|
+
bench.recordStep({
|
|
68
|
+
botId: 'alpha',
|
|
69
|
+
prompt: 'Bye',
|
|
70
|
+
response: { text: '', raw: null, duration: 100, ok: false, error: 'timeout' },
|
|
71
|
+
timestamp: new Date().toISOString(),
|
|
72
|
+
});
|
|
73
|
+
const results = bench.getResults();
|
|
74
|
+
expect(results.steps).toHaveLength(2);
|
|
75
|
+
expect(results.bots.alpha.messagesSent).toBe(2);
|
|
76
|
+
expect(results.bots.alpha.errors).toBe(1);
|
|
77
|
+
expect(results.bots.alpha.totalDuration).toBe(150);
|
|
78
|
+
});
|
|
79
|
+
it('getConversationLog returns a copy', () => {
|
|
80
|
+
const bench = new ClawHarness({ mode: 'local' });
|
|
81
|
+
bench.bot('alpha', { preset: 'default' });
|
|
82
|
+
bench.recordStep({
|
|
83
|
+
botId: 'alpha',
|
|
84
|
+
prompt: 'Hello',
|
|
85
|
+
response: { text: 'Hi', raw: {}, duration: 50, ok: true },
|
|
86
|
+
timestamp: new Date().toISOString(),
|
|
87
|
+
});
|
|
88
|
+
const log = bench.getConversationLog();
|
|
89
|
+
expect(log).toHaveLength(1);
|
|
90
|
+
// Modifying the copy should not affect internal state
|
|
91
|
+
log.pop();
|
|
92
|
+
expect(bench.getConversationLog()).toHaveLength(1);
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
//# sourceMappingURL=bench.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench.test.js","sourceRoot":"","sources":["../src/bench.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAA;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAExC,4CAA4C;AAC5C,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE;IACvB,OAAO;QACL,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAC,EAAU,EAAE,EAAE,CAAC,CAAC;YAC/C,EAAE;YACF,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;YAC3C,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;YAC1C,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBAC9B,IAAI,EAAE,UAAU;gBAChB,GAAG,EAAE,EAAE;gBACP,QAAQ,EAAE,GAAG;gBACb,EAAE,EAAE,IAAI;aACT,CAAC;SACH,CAAC,CAAC;KACJ,CAAA;AACH,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;IAC3B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAA;IACpB,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAA;QAEjE,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QACzC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QAExC,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC,CAAA;QAEnD,+CAA+C;QAC/C,MAAM,CAAC,GAAG,CAAC,CAAC,oBAAoB,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,EAAE,EAAE,MAAM,CAAC,gBAAgB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAA;QACtG,MAAM,CAAC,GAAG,CAAC,CAAC,oBAAoB,CAAC,MAAM,EAAE,MAAM,CAAC,QAAQ,EAAE,EAAE,MAAM,CAAC,gBAAgB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAA;IACvG,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAChD,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QAEzC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,EAAE,CAAA;QAC3C,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,EAAE,CAAA;IACrD,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAChD,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QACzC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QAExC,MAAM,KAAK,CAAC,KAAK,EAAE,CAAA;QAEnB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACnC,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QACjC,MAAM,CAAC,KAAM,CAAC,KAAK,CAAC,CAAC,gBAAgB,EAAE,CAAA;QACvC,MAAM,CAAC,IAAK,CAAC,KAAK,CAAC,CAAC,gBAAgB,EAAE,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAChD,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QACvD,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QAErD,yBAAyB;QACzB,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,qBAAqB,CAAC,IAAI,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAA;QAExE,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAA;QAE7D,6CAA6C;QAC7C,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,gBAAgB,EAAE,CAAA;IACvC,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAChD,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QAEzC,KAAK,CAAC,UAAU,CAAC;YACf,KAAK,EAAE,OAAO;YACd,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE;YACzD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAA;QAEF,KAAK,CAAC,UAAU,CAAC;YACf,KAAK,EAAE,OAAO;YACd,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,SAAS,EAAE;YAC7E,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAA;QAEF,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,EAAE,CAAA;QAClC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACrC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC/C,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACzC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACpD,CAAC,CAAC,CAAA;IAEF,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAA;QAChD,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAA;QAEzC,KAAK,CAAC,UAAU,CAAC;YACf,KAAK,EAAE,OAAO;YACd,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE;YACzD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAA;QAEF,MAAM,GAAG,GAAG,KAAK,CAAC,kBAAkB,EAAE,CAAA;QACtC,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3B,sDAAsD;QACtD,GAAG,CAAC,GAAG,EAAE,CAAA;QACT,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IACpD,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clawbench.d.ts","sourceRoot":"","sources":["../../src/bin/clawbench.ts"],"names":[],"mappings":";AAEA;;;;;;;GAOG"}
|