benchmaker 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {benchmaker-0.1.0/benchmaker.egg-info → benchmaker-0.1.2}/PKG-INFO +74 -18
- benchmaker-0.1.0/PKG-INFO → benchmaker-0.1.2/README.md +63 -35
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/__init__.py +11 -7
- benchmaker-0.1.2/benchmaker/cli.py +215 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/config.py +10 -4
- benchmaker-0.1.2/benchmaker/core/__init__.py +2 -0
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/core}/metrics.py +1 -1
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/core}/runner.py +8 -8
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/core}/trace.py +2 -2
- benchmaker-0.1.2/benchmaker/io/__init__.py +1 -0
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/io}/bundle.py +2 -2
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/io}/collect.py +1 -1
- benchmaker-0.1.2/benchmaker/recipes/__init__.py +46 -0
- benchmaker-0.1.2/benchmaker/recipes/_cli_shared.py +60 -0
- benchmaker-0.1.2/benchmaker/recipes/_factory.py +123 -0
- benchmaker-0.1.2/benchmaker/recipes/base.py +165 -0
- benchmaker-0.1.2/benchmaker/recipes/http.py +73 -0
- benchmaker-0.1.2/benchmaker/recipes/llm.py +144 -0
- benchmaker-0.1.2/benchmaker/recipes/sandbox.py +155 -0
- benchmaker-0.1.2/benchmaker/recipes/sglang.py +109 -0
- benchmaker-0.1.2/benchmaker/recipes/swebench.py +313 -0
- benchmaker-0.1.2/benchmaker/recipes/swebench_replay.py +269 -0
- benchmaker-0.1.2/benchmaker/recipes/trajectory_replay.py +124 -0
- benchmaker-0.1.2/benchmaker/swebench/__init__.py +43 -0
- benchmaker-0.1.2/benchmaker/swebench/_flash_hardening.py +175 -0
- benchmaker-0.1.2/benchmaker/swebench/agent.py +543 -0
- benchmaker-0.1.2/benchmaker/swebench/grading.py +169 -0
- benchmaker-0.1.2/benchmaker/swebench/harbor_agent.py +266 -0
- benchmaker-0.1.2/benchmaker/swebench/harbor_eval.py +375 -0
- benchmaker-0.1.2/benchmaker/swebench/observability.py +621 -0
- benchmaker-0.1.2/benchmaker/swebench/pi_agent.py +564 -0
- benchmaker-0.1.2/benchmaker/swebench/pi_ext/max_turns.js +52 -0
- benchmaker-0.1.2/benchmaker/swebench/pi_ext/remote_exec.js +47 -0
- benchmaker-0.1.2/benchmaker/swebench/replay_server.py +206 -0
- benchmaker-0.1.2/benchmaker/swebench/trajectory.py +289 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/agent.py +2 -2
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/base.py +1 -1
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/eval.py +2 -2
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/http.py +1 -1
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/llm.py +69 -6
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/sandbox.py +319 -47
- benchmaker-0.1.2/benchmaker/workloads/sglang.py +221 -0
- benchmaker-0.1.2/benchmaker/workloads/trajectory.py +209 -0
- benchmaker-0.1.0/README.md → benchmaker-0.1.2/benchmaker.egg-info/PKG-INFO +91 -16
- benchmaker-0.1.2/benchmaker.egg-info/SOURCES.txt +75 -0
- benchmaker-0.1.2/benchmaker.egg-info/requires.txt +23 -0
- benchmaker-0.1.2/pyproject.toml +50 -0
- benchmaker-0.1.2/tests/test_agent_warmup.py +147 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_bundle.py +2 -2
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_coding_agent.py +2 -2
- benchmaker-0.1.2/tests/test_flash_hardening.py +89 -0
- benchmaker-0.1.2/tests/test_observability.py +502 -0
- benchmaker-0.1.2/tests/test_passthrough_meta.py +161 -0
- benchmaker-0.1.2/tests/test_pi_agent.py +162 -0
- benchmaker-0.1.2/tests/test_recipes_cli.py +313 -0
- benchmaker-0.1.2/tests/test_replay_server.py +133 -0
- benchmaker-0.1.2/tests/test_sandbox_duration.py +40 -0
- benchmaker-0.1.2/tests/test_sglang.py +152 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_smoke.py +49 -2
- benchmaker-0.1.2/tests/test_swebench_replay_recipe.py +91 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_trace.py +2 -2
- benchmaker-0.1.2/tests/test_trajectory.py +223 -0
- benchmaker-0.1.2/tests/test_trajectory_replay.py +300 -0
- benchmaker-0.1.0/benchmaker/cli.py +0 -382
- benchmaker-0.1.0/benchmaker.egg-info/SOURCES.txt +0 -36
- benchmaker-0.1.0/benchmaker.egg-info/requires.txt +0 -13
- benchmaker-0.1.0/pyproject.toml +0 -32
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/core}/load.py +0 -0
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/core}/monitors.py +0 -0
- {benchmaker-0.1.0/benchmaker → benchmaker-0.1.2/benchmaker/core}/types.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/env.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/__init__.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/datasets.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker/workloads/hf.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker.egg-info/dependency_links.txt +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker.egg-info/entry_points.txt +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/benchmaker.egg-info/top_level.txt +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/setup.cfg +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_agent.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_eval.py +0 -0
- {benchmaker-0.1.0 → benchmaker-0.1.2}/tests/test_hf.py +0 -0
|
@@ -1,23 +1,32 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: benchmaker
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Async HTTP benchmarking utility with pluggable workloads and load models.
|
|
5
5
|
Author: Xiaozhe Yao
|
|
6
6
|
License: MIT
|
|
7
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: aiohttp>=3.9
|
|
10
10
|
Requires-Dist: click>=8.1
|
|
11
|
+
Requires-Dist: datasets>=4.8.5
|
|
12
|
+
Requires-Dist: huggingface-hub>=1.16.4
|
|
13
|
+
Requires-Dist: pyarrow>=24.0.0
|
|
11
14
|
Requires-Dist: pyyaml>=6.0
|
|
15
|
+
Requires-Dist: swebench>=4.1.0
|
|
12
16
|
Provides-Extra: rich
|
|
13
17
|
Requires-Dist: rich>=13; extra == "rich"
|
|
14
18
|
Provides-Extra: hf
|
|
15
19
|
Requires-Dist: datasets>=2.18; extra == "hf"
|
|
20
|
+
Requires-Dist: transformers>=4.40; extra == "hf"
|
|
16
21
|
Provides-Extra: dev
|
|
17
22
|
Requires-Dist: pytest>=7; extra == "dev"
|
|
18
23
|
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
24
|
+
Provides-Extra: plot
|
|
25
|
+
Requires-Dist: ipykernel>=7.2.0; extra == "plot"
|
|
26
|
+
Requires-Dist: matplotlib>=3.10.9; extra == "plot"
|
|
27
|
+
Requires-Dist: seaborn>=0.13.2; extra == "plot"
|
|
19
28
|
|
|
20
|
-
#
|
|
29
|
+
# benchmaker
|
|
21
30
|
|
|
22
31
|
Async HTTP benchmarking with pluggable workload-types (protocols), workloads
|
|
23
32
|
(datasets), load models, hooks, and optional periodic monitors.
|
|
@@ -44,7 +53,7 @@ pip install -e .
|
|
|
44
53
|
pip install -e .[dev] # for tests
|
|
45
54
|
```
|
|
46
55
|
|
|
47
|
-
This installs the `benchmaker` Python package and the `
|
|
56
|
+
This installs the `benchmaker` Python package and the `benchmaker` CLI.
|
|
48
57
|
|
|
49
58
|
## 30-second tour
|
|
50
59
|
|
|
@@ -63,10 +72,12 @@ async def main():
|
|
|
63
72
|
asyncio.run(main())
|
|
64
73
|
```
|
|
65
74
|
|
|
66
|
-
Or via the CLI
|
|
75
|
+
Or via the CLI. Workload-specific benchmarks are exposed as **recipes** —
|
|
76
|
+
`benchmaker <recipe> --args` (`http`, `llm`, `sandbox`, `swebench`, `sglang`,
|
|
77
|
+
`trajectory-replay`):
|
|
67
78
|
|
|
68
79
|
```bash
|
|
69
|
-
|
|
80
|
+
benchmaker http --url https://httpbin.org/get --rate poisson:50 --duration 10s
|
|
70
81
|
```
|
|
71
82
|
|
|
72
83
|
## Walkthrough: benchmarking an LLM endpoint with ShareGPT
|
|
@@ -129,16 +140,16 @@ also come from `.env` via `OpenAIChatWorkloadType.from_env(...)`.
|
|
|
129
140
|
|
|
130
141
|
### Rebuild or customize it yourself
|
|
131
142
|
|
|
132
|
-
The published split is produced by `tools/
|
|
143
|
+
The published split is produced by `tools/sharegpt/prepare.py`, which downloads
|
|
133
144
|
the upstream JSON once into `.local/` (gitignored) and converts it to the JSONL
|
|
134
145
|
shape above. Run it when you want a subset, different filtering, or a refresh:
|
|
135
146
|
|
|
136
147
|
```bash
|
|
137
148
|
# Defaults: .local/sharegpt_v3_raw.json -> .local/sharegpt_v3.jsonl
|
|
138
|
-
python tools/
|
|
149
|
+
python tools/sharegpt/prepare.py
|
|
139
150
|
|
|
140
151
|
# A quick subset for smoke tests:
|
|
141
|
-
python tools/
|
|
152
|
+
python tools/sharegpt/prepare.py --max-items 2000
|
|
142
153
|
```
|
|
143
154
|
|
|
144
155
|
The raw download is ~700 MB. Use `--min-chars` / `--max-chars` to drop empty or
|
|
@@ -147,7 +158,7 @@ row). Point any workload at the local file with `JsonlWorkload(path=...,
|
|
|
147
158
|
field="messages")`, or on the CLI:
|
|
148
159
|
|
|
149
160
|
```bash
|
|
150
|
-
|
|
161
|
+
benchmaker llm \
|
|
151
162
|
--url http://localhost:8000/v1/chat/completions \
|
|
152
163
|
--model meta-llama/Llama-3.1-8B-Instruct \
|
|
153
164
|
--prompts-jsonl .local/sharegpt_v3.jsonl \
|
|
@@ -157,7 +168,7 @@ bench-maker llm \
|
|
|
157
168
|
--out-dir ./runs --label dataset=sharegpt
|
|
158
169
|
```
|
|
159
170
|
|
|
160
|
-
To re-publish after regenerating, `tools/
|
|
171
|
+
To re-publish after regenerating, `tools/sharegpt/upload_hf.py` pushes the
|
|
161
172
|
JSONL back to the Hub (needs a write token).
|
|
162
173
|
|
|
163
174
|
## Documentation
|
|
@@ -174,6 +185,41 @@ Full docs live in [`docs/`](docs/):
|
|
|
174
185
|
- [Correctness / accuracy eval](docs/eval.md) — grade responses against references
|
|
175
186
|
- [CLI & YAML reference](docs/cli-and-yaml.md)
|
|
176
187
|
- [ShareGPT benchmark](docs/sharegpt-benchmark.md) — self-contained end-to-end walkthrough
|
|
188
|
+
- `benchmaker sglang` — native SGLang `/generate` benchmark (see [`docs/sglang.md`](docs/sglang.md)).
|
|
189
|
+
- `benchmaker trajectory-replay` — multi-turn prefix-cache parity replay of
|
|
190
|
+
trajectory datasets like SWE-smith (see [`docs/trajectory-replay.md`](docs/trajectory-replay.md)).
|
|
191
|
+
|
|
192
|
+
## Deterministic replay (`swebench-replay`)
|
|
193
|
+
|
|
194
|
+
Re-run a recorded SWE-bench job with the LLM **mocked from its own logs** — the
|
|
195
|
+
real pi + sandbox + verifier pipeline still runs, only the model is served back
|
|
196
|
+
from recorded outputs, so re-runs are deterministic and free of model
|
|
197
|
+
cost/variance. Vary `--concurrency` (or `--sweep`) to study the rest of the
|
|
198
|
+
pipeline without the model's stochasticity as a confound. Still needs
|
|
199
|
+
`FLASH_SANDBOX_URL` (the sandbox + verifier are real).
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# 1) (optional) convert a job's pi logs to a replay store — the recipe can also
|
|
203
|
+
# do this inline via --job.
|
|
204
|
+
python -m benchmaker.swebench.trajectory jobs/2026-06-08__05-24-01_b352cb \
|
|
205
|
+
-o replay-trajectories.jsonl
|
|
206
|
+
|
|
207
|
+
# 2) replay (host mode, localhost) across a concurrency sweep
|
|
208
|
+
FLASH_SANDBOX_URL=http://localhost:8080 \
|
|
209
|
+
benchmaker swebench-replay --trajectories replay-trajectories.jsonl \
|
|
210
|
+
--mode pi-host --sweep 1,5,25
|
|
211
|
+
|
|
212
|
+
# container mode: bind 0.0.0.0 and tell the sandbox how to reach the server
|
|
213
|
+
FLASH_SANDBOX_URL=http://localhost:8080 \
|
|
214
|
+
benchmaker swebench-replay --job jobs/2026-06-08__05-24-01_b352cb \
|
|
215
|
+
--mode pi-container --host 0.0.0.0 --reachable-host "$(hostname -I | awk '{print $1}')"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
The replay server is stateless: it picks each response by the task's identity
|
|
219
|
+
(the `# Task:` line, falling back to a hash of the full prompt when the recorded
|
|
220
|
+
run lacked an instance id) plus the count of assistant messages already in the
|
|
221
|
+
request — so it is correct at any concurrency. A `MISSES` column in the summary
|
|
222
|
+
flags any divergence (a request beyond the recorded turns).
|
|
177
223
|
|
|
178
224
|
## Examples
|
|
179
225
|
|
|
@@ -190,19 +236,29 @@ Under [`examples/`](examples/):
|
|
|
190
236
|
- `config.yaml` — generic HTTP YAML config
|
|
191
237
|
- `config_llm.yaml` — LLM YAML config with a Prometheus monitor
|
|
192
238
|
|
|
193
|
-
Helper
|
|
239
|
+
Helper tooling under [`tools/`](tools/), grouped by purpose:
|
|
194
240
|
|
|
195
|
-
- `
|
|
196
|
-
|
|
197
|
-
- `
|
|
241
|
+
- `sharegpt/` — `prepare.py` (fetch ShareGPT V3 → JSONL) + `upload_hf.py`
|
|
242
|
+
(push to the HF Hub with a write token)
|
|
243
|
+
- `swe_images/` — mirror SWE-bench/R2E-Gym container images to ghcr
|
|
244
|
+
(`publish.py`) and list the published refs (`pull.py`)
|
|
245
|
+
- `agent_warmup/` — build the agent-warmup SFT dataset
|
|
246
|
+
(`python -m tools.agent_warmup.cli`)
|
|
247
|
+
- `start_local_llm.sh` — example local SGLang launch command
|
|
198
248
|
|
|
199
249
|
## Project layout
|
|
200
250
|
|
|
201
251
|
```
|
|
202
252
|
benchmaker/ # library code
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
253
|
+
__init__.py # public API (re-exports); cli.py — the `benchmaker` CLI
|
|
254
|
+
config.py env.py # YAML config loading + .env interpolation
|
|
255
|
+
core/ # engine: types, load models, runner, metrics, monitors, trace
|
|
256
|
+
io/ # run output: per-run bundle + cross-run collection
|
|
257
|
+
workloads/ # workload-types (http, llm, sandbox, agent, hf, eval)
|
|
258
|
+
recipes/ # CLI recipes (http, llm, sandbox, swebench, swebench-replay) + registry
|
|
259
|
+
swebench/ # SWE-bench coding agent + grading + harbor adapters
|
|
260
|
+
examples/ # runnable examples (incl. swebench/ coding-agent config)
|
|
261
|
+
tools/ # out-of-tree tooling: sharegpt/, swe_images/, agent_warmup/
|
|
206
262
|
tests/ # pytest smoke tests
|
|
207
263
|
docs/ # reference docs
|
|
208
264
|
```
|
|
@@ -1,23 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Name: benchmaker
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Async HTTP benchmarking utility with pluggable workloads and load models.
|
|
5
|
-
Author: Xiaozhe Yao
|
|
6
|
-
License: MIT
|
|
7
|
-
Requires-Python: >=3.10
|
|
8
|
-
Description-Content-Type: text/markdown
|
|
9
|
-
Requires-Dist: aiohttp>=3.9
|
|
10
|
-
Requires-Dist: click>=8.1
|
|
11
|
-
Requires-Dist: pyyaml>=6.0
|
|
12
|
-
Provides-Extra: rich
|
|
13
|
-
Requires-Dist: rich>=13; extra == "rich"
|
|
14
|
-
Provides-Extra: hf
|
|
15
|
-
Requires-Dist: datasets>=2.18; extra == "hf"
|
|
16
|
-
Provides-Extra: dev
|
|
17
|
-
Requires-Dist: pytest>=7; extra == "dev"
|
|
18
|
-
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
19
|
-
|
|
20
|
-
# bench-maker
|
|
1
|
+
# benchmaker
|
|
21
2
|
|
|
22
3
|
Async HTTP benchmarking with pluggable workload-types (protocols), workloads
|
|
23
4
|
(datasets), load models, hooks, and optional periodic monitors.
|
|
@@ -44,7 +25,7 @@ pip install -e .
|
|
|
44
25
|
pip install -e .[dev] # for tests
|
|
45
26
|
```
|
|
46
27
|
|
|
47
|
-
This installs the `benchmaker` Python package and the `
|
|
28
|
+
This installs the `benchmaker` Python package and the `benchmaker` CLI.
|
|
48
29
|
|
|
49
30
|
## 30-second tour
|
|
50
31
|
|
|
@@ -63,10 +44,12 @@ async def main():
|
|
|
63
44
|
asyncio.run(main())
|
|
64
45
|
```
|
|
65
46
|
|
|
66
|
-
Or via the CLI
|
|
47
|
+
Or via the CLI. Workload-specific benchmarks are exposed as **recipes** —
|
|
48
|
+
`benchmaker <recipe> --args` (`http`, `llm`, `sandbox`, `swebench`, `sglang`,
|
|
49
|
+
`trajectory-replay`):
|
|
67
50
|
|
|
68
51
|
```bash
|
|
69
|
-
|
|
52
|
+
benchmaker http --url https://httpbin.org/get --rate poisson:50 --duration 10s
|
|
70
53
|
```
|
|
71
54
|
|
|
72
55
|
## Walkthrough: benchmarking an LLM endpoint with ShareGPT
|
|
@@ -129,16 +112,16 @@ also come from `.env` via `OpenAIChatWorkloadType.from_env(...)`.
|
|
|
129
112
|
|
|
130
113
|
### Rebuild or customize it yourself
|
|
131
114
|
|
|
132
|
-
The published split is produced by `tools/
|
|
115
|
+
The published split is produced by `tools/sharegpt/prepare.py`, which downloads
|
|
133
116
|
the upstream JSON once into `.local/` (gitignored) and converts it to the JSONL
|
|
134
117
|
shape above. Run it when you want a subset, different filtering, or a refresh:
|
|
135
118
|
|
|
136
119
|
```bash
|
|
137
120
|
# Defaults: .local/sharegpt_v3_raw.json -> .local/sharegpt_v3.jsonl
|
|
138
|
-
python tools/
|
|
121
|
+
python tools/sharegpt/prepare.py
|
|
139
122
|
|
|
140
123
|
# A quick subset for smoke tests:
|
|
141
|
-
python tools/
|
|
124
|
+
python tools/sharegpt/prepare.py --max-items 2000
|
|
142
125
|
```
|
|
143
126
|
|
|
144
127
|
The raw download is ~700 MB. Use `--min-chars` / `--max-chars` to drop empty or
|
|
@@ -147,7 +130,7 @@ row). Point any workload at the local file with `JsonlWorkload(path=...,
|
|
|
147
130
|
field="messages")`, or on the CLI:
|
|
148
131
|
|
|
149
132
|
```bash
|
|
150
|
-
|
|
133
|
+
benchmaker llm \
|
|
151
134
|
--url http://localhost:8000/v1/chat/completions \
|
|
152
135
|
--model meta-llama/Llama-3.1-8B-Instruct \
|
|
153
136
|
--prompts-jsonl .local/sharegpt_v3.jsonl \
|
|
@@ -157,7 +140,7 @@ bench-maker llm \
|
|
|
157
140
|
--out-dir ./runs --label dataset=sharegpt
|
|
158
141
|
```
|
|
159
142
|
|
|
160
|
-
To re-publish after regenerating, `tools/
|
|
143
|
+
To re-publish after regenerating, `tools/sharegpt/upload_hf.py` pushes the
|
|
161
144
|
JSONL back to the Hub (needs a write token).
|
|
162
145
|
|
|
163
146
|
## Documentation
|
|
@@ -174,6 +157,41 @@ Full docs live in [`docs/`](docs/):
|
|
|
174
157
|
- [Correctness / accuracy eval](docs/eval.md) — grade responses against references
|
|
175
158
|
- [CLI & YAML reference](docs/cli-and-yaml.md)
|
|
176
159
|
- [ShareGPT benchmark](docs/sharegpt-benchmark.md) — self-contained end-to-end walkthrough
|
|
160
|
+
- `benchmaker sglang` — native SGLang `/generate` benchmark (see [`docs/sglang.md`](docs/sglang.md)).
|
|
161
|
+
- `benchmaker trajectory-replay` — multi-turn prefix-cache parity replay of
|
|
162
|
+
trajectory datasets like SWE-smith (see [`docs/trajectory-replay.md`](docs/trajectory-replay.md)).
|
|
163
|
+
|
|
164
|
+
## Deterministic replay (`swebench-replay`)
|
|
165
|
+
|
|
166
|
+
Re-run a recorded SWE-bench job with the LLM **mocked from its own logs** — the
|
|
167
|
+
real pi + sandbox + verifier pipeline still runs, only the model is served back
|
|
168
|
+
from recorded outputs, so re-runs are deterministic and free of model
|
|
169
|
+
cost/variance. Vary `--concurrency` (or `--sweep`) to study the rest of the
|
|
170
|
+
pipeline without the model's stochasticity as a confound. Still needs
|
|
171
|
+
`FLASH_SANDBOX_URL` (the sandbox + verifier are real).
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
# 1) (optional) convert a job's pi logs to a replay store — the recipe can also
|
|
175
|
+
# do this inline via --job.
|
|
176
|
+
python -m benchmaker.swebench.trajectory jobs/2026-06-08__05-24-01_b352cb \
|
|
177
|
+
-o replay-trajectories.jsonl
|
|
178
|
+
|
|
179
|
+
# 2) replay (host mode, localhost) across a concurrency sweep
|
|
180
|
+
FLASH_SANDBOX_URL=http://localhost:8080 \
|
|
181
|
+
benchmaker swebench-replay --trajectories replay-trajectories.jsonl \
|
|
182
|
+
--mode pi-host --sweep 1,5,25
|
|
183
|
+
|
|
184
|
+
# container mode: bind 0.0.0.0 and tell the sandbox how to reach the server
|
|
185
|
+
FLASH_SANDBOX_URL=http://localhost:8080 \
|
|
186
|
+
benchmaker swebench-replay --job jobs/2026-06-08__05-24-01_b352cb \
|
|
187
|
+
--mode pi-container --host 0.0.0.0 --reachable-host "$(hostname -I | awk '{print $1}')"
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
The replay server is stateless: it picks each response by the task's identity
|
|
191
|
+
(the `# Task:` line, falling back to a hash of the full prompt when the recorded
|
|
192
|
+
run lacked an instance id) plus the count of assistant messages already in the
|
|
193
|
+
request — so it is correct at any concurrency. A `MISSES` column in the summary
|
|
194
|
+
flags any divergence (a request beyond the recorded turns).
|
|
177
195
|
|
|
178
196
|
## Examples
|
|
179
197
|
|
|
@@ -190,19 +208,29 @@ Under [`examples/`](examples/):
|
|
|
190
208
|
- `config.yaml` — generic HTTP YAML config
|
|
191
209
|
- `config_llm.yaml` — LLM YAML config with a Prometheus monitor
|
|
192
210
|
|
|
193
|
-
Helper
|
|
211
|
+
Helper tooling under [`tools/`](tools/), grouped by purpose:
|
|
194
212
|
|
|
195
|
-
- `
|
|
196
|
-
|
|
197
|
-
- `
|
|
213
|
+
- `sharegpt/` — `prepare.py` (fetch ShareGPT V3 → JSONL) + `upload_hf.py`
|
|
214
|
+
(push to the HF Hub with a write token)
|
|
215
|
+
- `swe_images/` — mirror SWE-bench/R2E-Gym container images to ghcr
|
|
216
|
+
(`publish.py`) and list the published refs (`pull.py`)
|
|
217
|
+
- `agent_warmup/` — build the agent-warmup SFT dataset
|
|
218
|
+
(`python -m tools.agent_warmup.cli`)
|
|
219
|
+
- `start_local_llm.sh` — example local SGLang launch command
|
|
198
220
|
|
|
199
221
|
## Project layout
|
|
200
222
|
|
|
201
223
|
```
|
|
202
224
|
benchmaker/ # library code
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
225
|
+
__init__.py # public API (re-exports); cli.py — the `benchmaker` CLI
|
|
226
|
+
config.py env.py # YAML config loading + .env interpolation
|
|
227
|
+
core/ # engine: types, load models, runner, metrics, monitors, trace
|
|
228
|
+
io/ # run output: per-run bundle + cross-run collection
|
|
229
|
+
workloads/ # workload-types (http, llm, sandbox, agent, hf, eval)
|
|
230
|
+
recipes/ # CLI recipes (http, llm, sandbox, swebench, swebench-replay) + registry
|
|
231
|
+
swebench/ # SWE-bench coding agent + grading + harbor adapters
|
|
232
|
+
examples/ # runnable examples (incl. swebench/ coding-agent config)
|
|
233
|
+
tools/ # out-of-tree tooling: sharegpt/, swe_images/, agent_warmup/
|
|
206
234
|
tests/ # pytest smoke tests
|
|
207
235
|
docs/ # reference docs
|
|
208
236
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""benchmaker: async HTTP benchmarking with pluggable workload-types + workloads (datasets)."""
|
|
2
2
|
|
|
3
|
-
from benchmaker.types import (
|
|
3
|
+
from benchmaker.core.types import (
|
|
4
4
|
Request,
|
|
5
5
|
Response,
|
|
6
6
|
Sample,
|
|
@@ -19,6 +19,8 @@ from benchmaker.workloads.http import HttpWorkloadType
|
|
|
19
19
|
from benchmaker.workloads.llm import OpenAIChatWorkloadType
|
|
20
20
|
from benchmaker.workloads.sandbox import SandboxWorkloadType
|
|
21
21
|
from benchmaker.workloads.hf import HFDatasetWorkload
|
|
22
|
+
from benchmaker.workloads.sglang import SGLangGenerateWorkloadType
|
|
23
|
+
from benchmaker.workloads.trajectory import TrajectoryReplayWorkload
|
|
22
24
|
from benchmaker.workloads.agent import (
|
|
23
25
|
Agent,
|
|
24
26
|
AgentContext,
|
|
@@ -41,7 +43,7 @@ from benchmaker.workloads.eval import (
|
|
|
41
43
|
judge_llm,
|
|
42
44
|
openai_chat_judge,
|
|
43
45
|
)
|
|
44
|
-
from benchmaker.load import (
|
|
46
|
+
from benchmaker.core.load import (
|
|
45
47
|
LoadModel,
|
|
46
48
|
ConstantRPS,
|
|
47
49
|
PoissonRPS,
|
|
@@ -51,21 +53,21 @@ from benchmaker.load import (
|
|
|
51
53
|
parse_rate_spec,
|
|
52
54
|
)
|
|
53
55
|
from benchmaker.env import interpolate, load_dotenv
|
|
54
|
-
from benchmaker.monitors import (
|
|
56
|
+
from benchmaker.core.monitors import (
|
|
55
57
|
Monitor,
|
|
56
58
|
FunctionMonitor,
|
|
57
59
|
PrometheusMonitor,
|
|
58
60
|
parse_prometheus,
|
|
59
61
|
)
|
|
60
|
-
from benchmaker.runner import BenchRunner, BenchConfig, BenchResult
|
|
61
|
-
from benchmaker.trace import (
|
|
62
|
+
from benchmaker.core.runner import BenchRunner, BenchConfig, BenchResult
|
|
63
|
+
from benchmaker.core.trace import (
|
|
62
64
|
ReplayWorkloadType,
|
|
63
65
|
TracePacedLoad,
|
|
64
66
|
TraceRecorder,
|
|
65
67
|
TraceWorkload,
|
|
66
68
|
load_trace,
|
|
67
69
|
)
|
|
68
|
-
from benchmaker.bundle import (
|
|
70
|
+
from benchmaker.io.bundle import (
|
|
69
71
|
BUNDLE_VERSION,
|
|
70
72
|
RunMeta,
|
|
71
73
|
default_run_id,
|
|
@@ -87,6 +89,8 @@ __all__ = [
|
|
|
87
89
|
"OpenAIChatWorkloadType",
|
|
88
90
|
"SandboxWorkloadType",
|
|
89
91
|
"HFDatasetWorkload",
|
|
92
|
+
"SGLangGenerateWorkloadType",
|
|
93
|
+
"TrajectoryReplayWorkload",
|
|
90
94
|
# agent workload (pluggable user-defined agents)
|
|
91
95
|
"Agent",
|
|
92
96
|
"AgentContext",
|
|
@@ -149,4 +153,4 @@ __all__ = [
|
|
|
149
153
|
"write_bundle",
|
|
150
154
|
]
|
|
151
155
|
|
|
152
|
-
__version__ = "0.1.
|
|
156
|
+
__version__ = "0.1.1"
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""benchmaker CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
from benchmaker.config import build_config
|
|
14
|
+
from benchmaker.core.runner import BenchRunner
|
|
15
|
+
from benchmaker.recipes import all_recipes
|
|
16
|
+
from benchmaker.recipes._cli_shared import (
|
|
17
|
+
output_options as _output_options,
|
|
18
|
+
parse_headers as _parse_headers,
|
|
19
|
+
write_bundle_if_requested as _write_bundle_if_requested,
|
|
20
|
+
)
|
|
21
|
+
from benchmaker.recipes._factory import make_command
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------- main
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@click.group()
|
|
28
|
+
@click.option("--log-level", default="INFO",
|
|
29
|
+
type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"], case_sensitive=False),
|
|
30
|
+
help="Logging level (default: INFO).")
|
|
31
|
+
def main(log_level: str) -> None:
|
|
32
|
+
"""[benchmaker]: async HTTP benchmarking with pluggable workloads."""
|
|
33
|
+
level = log_level.upper()
|
|
34
|
+
logging.basicConfig(
|
|
35
|
+
level=level,
|
|
36
|
+
format="%(asctime)s [%(name)s] %(message)s",
|
|
37
|
+
datefmt="%H:%M:%S",
|
|
38
|
+
)
|
|
39
|
+
# Chatty third-party loggers (one INFO line per HTTP request / hub fetch)
|
|
40
|
+
# drown out our own output. Pin them to WARNING unless DEBUG was requested.
|
|
41
|
+
if level != "DEBUG":
|
|
42
|
+
for noisy in ("httpx", "httpcore", "urllib3", "huggingface_hub",
|
|
43
|
+
"filelock", "fsspec", "datasets", "aiohttp"):
|
|
44
|
+
logging.getLogger(noisy).setLevel(logging.WARNING)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@main.command()
|
|
48
|
+
@click.argument("config_path", type=click.Path(exists=True, dir_okay=False))
|
|
49
|
+
@_output_options
|
|
50
|
+
@click.option("--dotenv", type=click.Path(), default=".env",
|
|
51
|
+
help="Path to .env file to load (default: .env). "
|
|
52
|
+
"Use --dotenv '' to disable.")
|
|
53
|
+
@click.option("--record", "record_path", type=click.Path(), default=None,
|
|
54
|
+
help="Write a JSONL request trace (with relative timestamps) to "
|
|
55
|
+
"this path. A later run can replay it deterministically via "
|
|
56
|
+
"a 'replay:' config block. Overrides any 'record:' in YAML.")
|
|
57
|
+
@click.option("--replay", "replay_path", type=click.Path(exists=True, dir_okay=False),
|
|
58
|
+
default=None,
|
|
59
|
+
help="Replay a previously recorded trace at the same relative "
|
|
60
|
+
"timings. Overrides 'workload_type' / 'workload' / 'load' "
|
|
61
|
+
"(and any 'replay:' in YAML).")
|
|
62
|
+
@click.option("--replay-speed", type=float, default=None,
|
|
63
|
+
help="Speed multiplier for --replay (default 1.0).")
|
|
64
|
+
@click.option("--quiet", is_flag=True, help="Suppress progress output.")
|
|
65
|
+
def run(config_path: str, out_dir: str | None, run_id: str | None,
|
|
66
|
+
labels: tuple[str, ...], notes: str, dotenv: str,
|
|
67
|
+
record_path: str | None, replay_path: str | None,
|
|
68
|
+
replay_speed: float | None, quiet: bool) -> None:
|
|
69
|
+
"""Run a benchmark from a YAML config file.
|
|
70
|
+
|
|
71
|
+
Environment variables (loaded from `.env` by default) are interpolated
|
|
72
|
+
into the YAML using `${VAR}` or `${VAR:-default}` syntax.
|
|
73
|
+
"""
|
|
74
|
+
with open(config_path) as f:
|
|
75
|
+
raw_cfg = yaml.safe_load(f)
|
|
76
|
+
|
|
77
|
+
if record_path is not None:
|
|
78
|
+
raw_cfg = {**raw_cfg, "record": {"path": record_path}}
|
|
79
|
+
if replay_path is not None:
|
|
80
|
+
replay_cfg: dict = {"path": replay_path}
|
|
81
|
+
if replay_speed is not None:
|
|
82
|
+
replay_cfg["speed"] = replay_speed
|
|
83
|
+
raw_cfg = {**raw_cfg, "replay": replay_cfg}
|
|
84
|
+
|
|
85
|
+
bench_cfg = build_config(raw_cfg, dotenv_path=(dotenv or None))
|
|
86
|
+
if quiet:
|
|
87
|
+
bench_cfg.progress_every_s = 0.0
|
|
88
|
+
|
|
89
|
+
runner = BenchRunner(bench_cfg)
|
|
90
|
+
asyncio.run(runner.run())
|
|
91
|
+
runner.metrics.render(sys.stdout)
|
|
92
|
+
_write_bundle_if_requested(runner, raw_cfg, out_dir, run_id, labels, notes)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@main.command()
|
|
96
|
+
@click.option("--url", required=True, help="Target URL.")
|
|
97
|
+
@click.option("--method", default="GET")
|
|
98
|
+
@click.option("--header", "-H", multiple=True, help="Header 'Name: value'.")
|
|
99
|
+
@click.option("--json-body", default=None, help="JSON body string.")
|
|
100
|
+
@click.option("--data", default=None, help="Raw body string.")
|
|
101
|
+
@click.option("--rate", default="10", help="Load spec, e.g. '100', 'poisson:100', "
|
|
102
|
+
"'closed:32', 'ramp:10..500:30s'.")
|
|
103
|
+
@click.option("--duration", default="10s", help="Run duration (e.g. '30s', '2m').")
|
|
104
|
+
@click.option("--max-requests", type=int, default=None)
|
|
105
|
+
@click.option("--timeout", "timeout_s", default=60.0, type=float)
|
|
106
|
+
@click.option("--connection-limit", default=1000, type=int)
|
|
107
|
+
@_output_options
|
|
108
|
+
@click.option("--quiet", is_flag=True)
|
|
109
|
+
def quick(url: str, method: str, header: tuple[str, ...], json_body: str | None,
|
|
110
|
+
data: str | None, rate: str, duration: str, max_requests: int | None,
|
|
111
|
+
timeout_s: float, connection_limit: int,
|
|
112
|
+
out_dir: str | None, run_id: str | None,
|
|
113
|
+
labels: tuple[str, ...], notes: str, quiet: bool) -> None:
|
|
114
|
+
"""[deprecated] One-liner HTTP benchmark — use `benchmaker http` instead."""
|
|
115
|
+
sys.stderr.write(
|
|
116
|
+
"[benchmaker] 'quick' is deprecated; use 'benchmaker http'.\n"
|
|
117
|
+
)
|
|
118
|
+
cfg: dict = {
|
|
119
|
+
"workload_type": {
|
|
120
|
+
"type": "http",
|
|
121
|
+
"url": url,
|
|
122
|
+
"method": method,
|
|
123
|
+
"headers": _parse_headers(header),
|
|
124
|
+
"timeout_s": timeout_s,
|
|
125
|
+
},
|
|
126
|
+
"load": rate,
|
|
127
|
+
"duration": duration,
|
|
128
|
+
"max_requests": max_requests,
|
|
129
|
+
"timeout_s": timeout_s,
|
|
130
|
+
"connection_limit": connection_limit,
|
|
131
|
+
}
|
|
132
|
+
if json_body is not None:
|
|
133
|
+
cfg["workload"] = {"type": "static", "items": [json.loads(json_body)]}
|
|
134
|
+
elif data is not None:
|
|
135
|
+
cfg["workload"] = {"type": "static", "items": [data.encode("utf-8")]}
|
|
136
|
+
|
|
137
|
+
bench_cfg = build_config(cfg)
|
|
138
|
+
if quiet:
|
|
139
|
+
bench_cfg.progress_every_s = 0.0
|
|
140
|
+
|
|
141
|
+
runner = BenchRunner(bench_cfg)
|
|
142
|
+
asyncio.run(runner.run())
|
|
143
|
+
runner.metrics.render(sys.stdout)
|
|
144
|
+
_write_bundle_if_requested(runner, cfg, out_dir, run_id, labels, notes)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ---------------------------------------------------------------- collect
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@main.command()
|
|
151
|
+
@click.argument("paths", nargs=-1, required=True,
|
|
152
|
+
type=click.Path(exists=True, file_okay=False))
|
|
153
|
+
@click.option("--format", "fmt", type=click.Choice(["md", "csv", "json"]),
|
|
154
|
+
default="md", show_default=True,
|
|
155
|
+
help="Output format. 'md' is a Markdown table, 'csv' is comma-separated, "
|
|
156
|
+
"'json' is a JSON array of row dicts.")
|
|
157
|
+
@click.option("--metric", "metrics", multiple=True,
|
|
158
|
+
help="Extra dotted-path metric to add as a column "
|
|
159
|
+
"(e.g. 'workload_metrics.ttft_s.p50'). Repeatable.")
|
|
160
|
+
@click.option("--columns", default=None,
|
|
161
|
+
help="Comma-separated list of column names to keep (after metrics are added). "
|
|
162
|
+
"Overrides the default column set.")
|
|
163
|
+
@click.option("--sort-by", default=None,
|
|
164
|
+
help="Column name to sort rows by (ascending).")
|
|
165
|
+
@click.option("--label", "label_keys", multiple=True,
|
|
166
|
+
help="Promote a meta.labels[<key>] entry into its own column. Repeatable.")
|
|
167
|
+
@click.option("--recursive/--no-recursive", default=True,
|
|
168
|
+
help="When a path is a directory of run-dirs, descend one level to find them.")
|
|
169
|
+
def collect(paths: tuple[str, ...], fmt: str, metrics: tuple[str, ...],
|
|
170
|
+
columns: str | None, sort_by: str | None,
|
|
171
|
+
label_keys: tuple[str, ...], recursive: bool) -> None:
|
|
172
|
+
"""Collect summaries from one or more run-dirs into a table.
|
|
173
|
+
|
|
174
|
+
Each PATH may be a run directory (containing meta.json + summary.json) or a
|
|
175
|
+
directory of such run-dirs. With --recursive (default), a non-bundle
|
|
176
|
+
directory is scanned for immediate subdirectories that are bundles.
|
|
177
|
+
"""
|
|
178
|
+
from benchmaker.io.collect import collect_table, format_table, find_bundles
|
|
179
|
+
|
|
180
|
+
bundle_dirs: list[str] = []
|
|
181
|
+
for p in paths:
|
|
182
|
+
bundle_dirs.extend(find_bundles(p, recursive=recursive))
|
|
183
|
+
if not bundle_dirs:
|
|
184
|
+
raise click.UsageError(
|
|
185
|
+
f"No run bundles found under: {', '.join(paths)}. "
|
|
186
|
+
"Run bundles must contain meta.json and summary.json."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
rows, column_names = collect_table(
|
|
190
|
+
bundle_dirs,
|
|
191
|
+
extra_metrics=list(metrics),
|
|
192
|
+
label_keys=list(label_keys),
|
|
193
|
+
)
|
|
194
|
+
if columns:
|
|
195
|
+
column_names = [c.strip() for c in columns.split(",") if c.strip()]
|
|
196
|
+
if sort_by:
|
|
197
|
+
rows.sort(key=lambda r: (r.get(sort_by) is None, r.get(sort_by)))
|
|
198
|
+
|
|
199
|
+
sys.stdout.write(format_table(rows, column_names, fmt))
|
|
200
|
+
if fmt != "json":
|
|
201
|
+
sys.stdout.write("\n")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# ---------------------------------------------------------------- recipes
|
|
205
|
+
#
|
|
206
|
+
# Each registered recipe (http, llm, sandbox, swebench, ...) is exposed as a
|
|
207
|
+
# `benchmaker <recipe> --args` subcommand, built from the recipe's options plus
|
|
208
|
+
# the shared load/output options. See benchmaker/recipes/.
|
|
209
|
+
|
|
210
|
+
for _recipe in all_recipes():
|
|
211
|
+
main.add_command(make_command(_recipe))
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
if __name__ == "__main__":
|
|
215
|
+
main()
|
|
@@ -20,9 +20,9 @@ import importlib
|
|
|
20
20
|
from typing import Any, Callable, Optional
|
|
21
21
|
|
|
22
22
|
from benchmaker.env import interpolate, load_dotenv
|
|
23
|
-
from benchmaker.load import parse_duration, parse_rate_spec
|
|
24
|
-
from benchmaker.monitors import FunctionMonitor, Monitor, PrometheusMonitor
|
|
25
|
-
from benchmaker.runner import BenchConfig
|
|
23
|
+
from benchmaker.core.load import parse_duration, parse_rate_spec
|
|
24
|
+
from benchmaker.core.monitors import FunctionMonitor, Monitor, PrometheusMonitor
|
|
25
|
+
from benchmaker.core.runner import BenchConfig
|
|
26
26
|
from benchmaker.workloads.base import WorkloadType
|
|
27
27
|
from benchmaker.workloads.datasets import (
|
|
28
28
|
CallableWorkload,
|
|
@@ -47,7 +47,7 @@ from benchmaker.workloads.eval import (
|
|
|
47
47
|
openai_chat_judge,
|
|
48
48
|
regex_match,
|
|
49
49
|
)
|
|
50
|
-
from benchmaker.trace import (
|
|
50
|
+
from benchmaker.core.trace import (
|
|
51
51
|
ReplayWorkloadType,
|
|
52
52
|
TracePacedLoad,
|
|
53
53
|
TraceRecorder,
|
|
@@ -88,6 +88,9 @@ def build_workload_type(spec: dict) -> WorkloadType:
|
|
|
88
88
|
return HttpWorkloadType(**kwargs)
|
|
89
89
|
if t in ("openai", "openai-chat", "llm-chat", "llm"):
|
|
90
90
|
return OpenAIChatWorkloadType(**kwargs)
|
|
91
|
+
if t in ("sglang", "sglang-generate"):
|
|
92
|
+
from benchmaker.workloads.sglang import SGLangGenerateWorkloadType
|
|
93
|
+
return SGLangGenerateWorkloadType(**kwargs)
|
|
91
94
|
if t in ("sandbox", "flash-sandbox"):
|
|
92
95
|
return SandboxWorkloadType(**kwargs)
|
|
93
96
|
if t == "agent":
|
|
@@ -151,6 +154,9 @@ def build_workload(spec: Any) -> Workload:
|
|
|
151
154
|
return CallableWorkload(fn=fn, **kwargs)
|
|
152
155
|
if t in ("hf", "huggingface"):
|
|
153
156
|
return HFDatasetWorkload(**kwargs)
|
|
157
|
+
if t == "trajectory":
|
|
158
|
+
from benchmaker.workloads.trajectory import TrajectoryReplayWorkload
|
|
159
|
+
return TrajectoryReplayWorkload(**kwargs)
|
|
154
160
|
raise ValueError(f"Unknown workload type {t!r}")
|
|
155
161
|
|
|
156
162
|
|
|
@@ -9,7 +9,7 @@ from collections import Counter, defaultdict
|
|
|
9
9
|
from dataclasses import dataclass, field
|
|
10
10
|
from typing import Optional, TextIO
|
|
11
11
|
|
|
12
|
-
from benchmaker.types import Sample
|
|
12
|
+
from benchmaker.core.types import Sample
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def _pct(xs: list[float], p: float) -> float:
|