benchmaker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
benchmaker/__init__.py ADDED
@@ -0,0 +1,152 @@
1
+ """benchmaker: async HTTP benchmarking with pluggable workload-types + workloads (datasets)."""
2
+
3
+ from benchmaker.types import (
4
+ Request,
5
+ Response,
6
+ Sample,
7
+ PreRequestHook,
8
+ PostResponseHook,
9
+ )
10
+ from benchmaker.workloads.base import WorkloadType
11
+ from benchmaker.workloads.datasets import (
12
+ Workload,
13
+ StaticWorkload,
14
+ JsonlWorkload,
15
+ CallableWorkload,
16
+ IterableWorkload,
17
+ )
18
+ from benchmaker.workloads.http import HttpWorkloadType
19
+ from benchmaker.workloads.llm import OpenAIChatWorkloadType
20
+ from benchmaker.workloads.sandbox import SandboxWorkloadType
21
+ from benchmaker.workloads.hf import HFDatasetWorkload
22
+ from benchmaker.workloads.agent import (
23
+ Agent,
24
+ AgentContext,
25
+ AgentResult,
26
+ AgentWorkloadType,
27
+ CallableAgent,
28
+ )
29
+ from benchmaker.workloads.eval import (
30
+ EvalWorkloadType,
31
+ Scorer,
32
+ correctness_hook,
33
+ extract_openai_text,
34
+ extract_raw_text,
35
+ extract_text,
36
+ exact_match,
37
+ contains,
38
+ regex_match,
39
+ json_valid,
40
+ multiple_choice,
41
+ judge_llm,
42
+ openai_chat_judge,
43
+ )
44
+ from benchmaker.load import (
45
+ LoadModel,
46
+ ConstantRPS,
47
+ PoissonRPS,
48
+ ClosedLoop,
49
+ Sweep,
50
+ Ramp,
51
+ parse_rate_spec,
52
+ )
53
+ from benchmaker.env import interpolate, load_dotenv
54
+ from benchmaker.monitors import (
55
+ Monitor,
56
+ FunctionMonitor,
57
+ PrometheusMonitor,
58
+ parse_prometheus,
59
+ )
60
+ from benchmaker.runner import BenchRunner, BenchConfig, BenchResult
61
+ from benchmaker.trace import (
62
+ ReplayWorkloadType,
63
+ TracePacedLoad,
64
+ TraceRecorder,
65
+ TraceWorkload,
66
+ load_trace,
67
+ )
68
+ from benchmaker.bundle import (
69
+ BUNDLE_VERSION,
70
+ RunMeta,
71
+ default_run_id,
72
+ is_bundle_dir,
73
+ iter_jsonl,
74
+ read_bundle,
75
+ write_bundle,
76
+ )
77
+
78
+ __all__ = [
79
+ "Request",
80
+ "Response",
81
+ "Sample",
82
+ "PreRequestHook",
83
+ "PostResponseHook",
84
+ # workload-types (protocols)
85
+ "WorkloadType",
86
+ "HttpWorkloadType",
87
+ "OpenAIChatWorkloadType",
88
+ "SandboxWorkloadType",
89
+ "HFDatasetWorkload",
90
+ # agent workload (pluggable user-defined agents)
91
+ "Agent",
92
+ "AgentContext",
93
+ "AgentResult",
94
+ "AgentWorkloadType",
95
+ "CallableAgent",
96
+ # eval / correctness
97
+ "EvalWorkloadType",
98
+ "Scorer",
99
+ "correctness_hook",
100
+ "extract_openai_text",
101
+ "extract_raw_text",
102
+ "extract_text",
103
+ "exact_match",
104
+ "contains",
105
+ "regex_match",
106
+ "json_valid",
107
+ "multiple_choice",
108
+ "judge_llm",
109
+ "openai_chat_judge",
110
+ # workloads (datasets / input sources)
111
+ "Workload",
112
+ "StaticWorkload",
113
+ "JsonlWorkload",
114
+ "CallableWorkload",
115
+ "IterableWorkload",
116
+ # load models
117
+ "LoadModel",
118
+ "ConstantRPS",
119
+ "PoissonRPS",
120
+ "ClosedLoop",
121
+ "Sweep",
122
+ "Ramp",
123
+ "parse_rate_spec",
124
+ # monitors
125
+ "Monitor",
126
+ "FunctionMonitor",
127
+ "PrometheusMonitor",
128
+ "parse_prometheus",
129
+ # env
130
+ "load_dotenv",
131
+ "interpolate",
132
+ # runner
133
+ "BenchRunner",
134
+ "BenchConfig",
135
+ "BenchResult",
136
+ # trace: record & replay
137
+ "TraceRecorder",
138
+ "ReplayWorkloadType",
139
+ "TraceWorkload",
140
+ "TracePacedLoad",
141
+ "load_trace",
142
+ # bundle / output layout
143
+ "BUNDLE_VERSION",
144
+ "RunMeta",
145
+ "default_run_id",
146
+ "is_bundle_dir",
147
+ "iter_jsonl",
148
+ "read_bundle",
149
+ "write_bundle",
150
+ ]
151
+
152
+ __version__ = "0.1.0"
benchmaker/bundle.py ADDED
@@ -0,0 +1,193 @@
1
+ """Per-run output bundle.
2
+
3
+ Each benchmark run writes a directory:
4
+
5
+ <out_dir>/<run_id>/
6
+ meta.json # run identifiers, timestamps, resolved config
7
+ summary.json # aggregated metrics
8
+ samples.jsonl # one record per request
9
+ monitors.jsonl # one record per monitor tick
10
+
11
+ The bundle is the unit of result interchange. `bench-maker collect` walks
12
+ one or more such directories and pivots the summaries into a table.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import os
19
+ import platform
20
+ import socket
21
+ import time
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime, timezone
24
+ from typing import Any, Optional
25
+
26
+ from benchmaker.metrics import MetricsAggregator, _safe_meta
27
+
28
+
29
+ BUNDLE_VERSION = 1
30
+ META_FILENAME = "meta.json"
31
+ SUMMARY_FILENAME = "summary.json"
32
+ SAMPLES_FILENAME = "samples.jsonl"
33
+ MONITORS_FILENAME = "monitors.jsonl"
34
+
35
+
36
+ def default_run_id(prefix: Optional[str] = None) -> str:
37
+ """Filesystem-safe UTC timestamp, e.g. `20260526T142233Z` or `prefix-20260526T142233Z`."""
38
+ ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
39
+ return f"{prefix}-{ts}" if prefix else ts
40
+
41
+
42
+ @dataclass
43
+ class RunMeta:
44
+ """Identifiers and provenance written next to the results."""
45
+
46
+ run_id: str
47
+ started_at: str # ISO-8601 UTC
48
+ ended_at: str # ISO-8601 UTC
49
+ wall_time_s: float
50
+ workload_type: str # e.g. "http", "openai-chat"
51
+ workload: str # e.g. "static", "jsonl"
52
+ hostname: str = field(default_factory=socket.gethostname)
53
+ python_version: str = field(default_factory=platform.python_version)
54
+ bench_maker_version: str = "" # filled in by write_bundle
55
+ bundle_version: int = BUNDLE_VERSION
56
+ source_config: dict = field(default_factory=dict)
57
+ labels: dict = field(default_factory=dict)
58
+ notes: str = ""
59
+
60
+ def to_dict(self) -> dict:
61
+ return {
62
+ "run_id": self.run_id,
63
+ "bundle_version": self.bundle_version,
64
+ "bench_maker_version": self.bench_maker_version,
65
+ "started_at": self.started_at,
66
+ "ended_at": self.ended_at,
67
+ "wall_time_s": self.wall_time_s,
68
+ "hostname": self.hostname,
69
+ "python_version": self.python_version,
70
+ "workload_type": self.workload_type,
71
+ "workload": self.workload,
72
+ "labels": self.labels,
73
+ "notes": self.notes,
74
+ "source_config": _safe_meta(self.source_config),
75
+ }
76
+
77
+
78
+ def write_bundle(
79
+ out_dir: str,
80
+ metrics: MetricsAggregator,
81
+ *,
82
+ workload_type_name: str,
83
+ workload_name: str,
84
+ run_id: Optional[str] = None,
85
+ source_config: Optional[dict] = None,
86
+ labels: Optional[dict] = None,
87
+ notes: str = "",
88
+ started_wall: Optional[float] = None,
89
+ ended_wall: Optional[float] = None,
90
+ ) -> str:
91
+ """Write the bundle for a finished run. Returns the absolute run directory path.
92
+
93
+ `out_dir` is treated as the parent: the actual run lives in `<out_dir>/<run_id>/`.
94
+ If `out_dir` itself already looks like a run dir (already has `<run_id>` baked in),
95
+ pass `run_id=""` to skip the extra nesting.
96
+ """
97
+ from benchmaker import __version__ as bm_version
98
+
99
+ if metrics.end_time is None:
100
+ metrics.finalize()
101
+
102
+ run_id = run_id if run_id is not None else default_run_id()
103
+ target = os.path.join(out_dir, run_id) if run_id else out_dir
104
+ os.makedirs(target, exist_ok=True)
105
+
106
+ duration = metrics.end_time - metrics.start_time if metrics.end_time else 0.0
107
+ end_wall = ended_wall if ended_wall is not None else (metrics.end_wall or time.time())
108
+ start_wall = started_wall if started_wall is not None else (metrics.start_wall or (end_wall - duration))
109
+
110
+ meta = RunMeta(
111
+ run_id=run_id or os.path.basename(os.path.abspath(target)),
112
+ started_at=_iso(start_wall),
113
+ ended_at=_iso(end_wall),
114
+ wall_time_s=duration,
115
+ workload_type=workload_type_name,
116
+ workload=workload_name,
117
+ bench_maker_version=bm_version,
118
+ source_config=source_config or {},
119
+ labels=labels or {},
120
+ notes=notes,
121
+ )
122
+
123
+ with open(os.path.join(target, META_FILENAME), "w") as f:
124
+ json.dump(meta.to_dict(), f, indent=2, default=_json_default)
125
+
126
+ with open(os.path.join(target, SUMMARY_FILENAME), "w") as f:
127
+ json.dump(metrics.summary(), f, indent=2, default=_json_default)
128
+
129
+ metrics.dump_samples_jsonl(os.path.join(target, SAMPLES_FILENAME))
130
+ if metrics.monitor_samples:
131
+ metrics.dump_monitor_jsonl(os.path.join(target, MONITORS_FILENAME))
132
+
133
+ return os.path.abspath(target)
134
+
135
+
136
+ def read_bundle(run_dir: str) -> dict:
137
+ """Read a run directory and return `{meta, summary, samples_path, monitors_path}`.
138
+
139
+ Samples and monitor ticks are returned as file paths rather than loaded, so
140
+ a `collect` over many runs stays cheap. Use `iter_jsonl` to stream rows.
141
+ """
142
+ meta_path = os.path.join(run_dir, META_FILENAME)
143
+ summary_path = os.path.join(run_dir, SUMMARY_FILENAME)
144
+ if not (os.path.isfile(meta_path) and os.path.isfile(summary_path)):
145
+ raise FileNotFoundError(
146
+ f"{run_dir!r} is not a run bundle (missing meta.json or summary.json)"
147
+ )
148
+ with open(meta_path) as f:
149
+ meta = json.load(f)
150
+ with open(summary_path) as f:
151
+ summary = json.load(f)
152
+ samples = os.path.join(run_dir, SAMPLES_FILENAME)
153
+ monitors = os.path.join(run_dir, MONITORS_FILENAME)
154
+ return {
155
+ "meta": meta,
156
+ "summary": summary,
157
+ "samples_path": samples if os.path.isfile(samples) else None,
158
+ "monitors_path": monitors if os.path.isfile(monitors) else None,
159
+ "dir": os.path.abspath(run_dir),
160
+ }
161
+
162
+
163
+ def is_bundle_dir(path: str) -> bool:
164
+ return (
165
+ os.path.isdir(path)
166
+ and os.path.isfile(os.path.join(path, META_FILENAME))
167
+ and os.path.isfile(os.path.join(path, SUMMARY_FILENAME))
168
+ )
169
+
170
+
171
+ def iter_jsonl(path: str):
172
+ """Yield decoded JSON objects from a JSONL file. Skips blank lines."""
173
+ with open(path) as f:
174
+ for line in f:
175
+ line = line.strip()
176
+ if not line:
177
+ continue
178
+ yield json.loads(line)
179
+
180
+
181
+ def _iso(wall_seconds: float) -> str:
182
+ return datetime.fromtimestamp(wall_seconds, tz=timezone.utc).isoformat()
183
+
184
+
185
+ def _json_default(obj: Any) -> Any:
186
+ if isinstance(obj, set):
187
+ return sorted(obj)
188
+ if hasattr(obj, "to_dict"):
189
+ try:
190
+ return obj.to_dict()
191
+ except Exception:
192
+ pass
193
+ return repr(obj)