benchmaker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
benchmaker/load.py ADDED
@@ -0,0 +1,326 @@
1
+ """Load models.
2
+
3
+ A LoadModel yields admission "tickets" — each ticket means "fire one request
4
+ now". Open-loop models yield based on a target arrival schedule; closed-loop
5
+ models yield based on completions (the runner returns a ticket to the model
6
+ when a request finishes).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import random
13
+ import re
14
+ import time
15
+ from abc import ABC, abstractmethod
16
+ from dataclasses import dataclass
17
+ from typing import AsyncIterator, Optional, Union
18
+
19
+
20
+ class LoadModel(ABC):
21
+ """Drives the timing of request admissions."""
22
+
23
+ @abstractmethod
24
+ async def tickets(self) -> AsyncIterator[None]:
25
+ """Yield None each time a request should be admitted."""
26
+ ...
27
+
28
+ def on_complete(self) -> None:
29
+ """Called by the runner when a request completes. Closed-loop uses this."""
30
+ pass
31
+
32
+ @property
33
+ def is_open_loop(self) -> bool:
34
+ return True
35
+
36
+
37
+ # -------- Open-loop --------
38
+
39
+ class ConstantRPS(LoadModel):
40
+ """Fire requests at a constant target rate (rps), regardless of in-flight count."""
41
+
42
+ def __init__(self, rps: float, duration_s: Optional[float] = None,
43
+ max_requests: Optional[int] = None):
44
+ if rps <= 0:
45
+ raise ValueError("rps must be > 0")
46
+ self.rps = rps
47
+ self.duration_s = duration_s
48
+ self.max_requests = max_requests
49
+
50
+ async def tickets(self):
51
+ interval = 1.0 / self.rps
52
+ start = time.monotonic()
53
+ next_fire = start
54
+ n = 0
55
+ while True:
56
+ now = time.monotonic()
57
+ if self.duration_s is not None and (now - start) >= self.duration_s:
58
+ return
59
+ if self.max_requests is not None and n >= self.max_requests:
60
+ return
61
+ sleep_for = next_fire - now
62
+ if sleep_for > 0:
63
+ await asyncio.sleep(sleep_for)
64
+ yield None
65
+ n += 1
66
+ next_fire += interval
67
+
68
+
69
+ class PoissonRPS(LoadModel):
70
+ """Open-loop Poisson arrivals with mean rate `rps`."""
71
+
72
+ def __init__(self, rps: float, duration_s: Optional[float] = None,
73
+ max_requests: Optional[int] = None, seed: Optional[int] = None):
74
+ if rps <= 0:
75
+ raise ValueError("rps must be > 0")
76
+ self.rps = rps
77
+ self.duration_s = duration_s
78
+ self.max_requests = max_requests
79
+ self._rng = random.Random(seed)
80
+
81
+ async def tickets(self):
82
+ start = time.monotonic()
83
+ n = 0
84
+ while True:
85
+ now = time.monotonic()
86
+ if self.duration_s is not None and (now - start) >= self.duration_s:
87
+ return
88
+ if self.max_requests is not None and n >= self.max_requests:
89
+ return
90
+ # Exponential inter-arrival time with mean 1/rps.
91
+ gap = self._rng.expovariate(self.rps)
92
+ await asyncio.sleep(gap)
93
+ yield None
94
+ n += 1
95
+
96
+
97
+ # -------- Closed-loop --------
98
+
99
+ class ClosedLoop(LoadModel):
100
+ """N concurrent workers; each fires the next request as soon as the previous
101
+ completes. Total in-flight is bounded by `concurrency`.
102
+
103
+ This is implemented as: yield up to `concurrency` tickets initially, then
104
+ one more for every completion the runner reports.
105
+ """
106
+
107
+ def __init__(self, concurrency: int, duration_s: Optional[float] = None,
108
+ max_requests: Optional[int] = None):
109
+ if concurrency <= 0:
110
+ raise ValueError("concurrency must be > 0")
111
+ self.concurrency = concurrency
112
+ self.duration_s = duration_s
113
+ self.max_requests = max_requests
114
+ self._sem = asyncio.Semaphore(concurrency)
115
+ self._completions: Optional[asyncio.Queue] = None
116
+
117
+ @property
118
+ def is_open_loop(self) -> bool:
119
+ return False
120
+
121
+ def on_complete(self) -> None:
122
+ if self._completions is not None:
123
+ self._completions.put_nowait(None)
124
+
125
+ async def tickets(self):
126
+ self._completions = asyncio.Queue()
127
+ # Seed: fire N concurrent immediately.
128
+ for _ in range(self.concurrency):
129
+ self._completions.put_nowait(None)
130
+
131
+ start = time.monotonic()
132
+ n = 0
133
+ while True:
134
+ now = time.monotonic()
135
+ if self.duration_s is not None and (now - start) >= self.duration_s:
136
+ return
137
+ if self.max_requests is not None and n >= self.max_requests:
138
+ return
139
+ await self._completions.get()
140
+ yield None
141
+ n += 1
142
+
143
+
144
+ # -------- Composite: Sweep / Ramp --------
145
+
146
+ @dataclass
147
+ class _Stage:
148
+ model: LoadModel
149
+ label: str
150
+
151
+
152
+ class Sweep(LoadModel):
153
+ """Run a sequence of sub-load-models in order (each for its own duration).
154
+
155
+ Useful for: sweep across RPS values to find saturation, e.g.
156
+ Sweep([ConstantRPS(10, 30), ConstantRPS(50, 30), ConstantRPS(100, 30)])
157
+ """
158
+
159
+ def __init__(self, stages: list[LoadModel], labels: Optional[list[str]] = None):
160
+ if not stages:
161
+ raise ValueError("Sweep requires at least one stage")
162
+ self.stages = stages
163
+ self.labels = labels or [f"stage_{i}" for i in range(len(stages))]
164
+ self.current_label: Optional[str] = None
165
+
166
+ @property
167
+ def is_open_loop(self) -> bool:
168
+ # Mixed sweeps are treated as open-loop for runner-level scheduling.
169
+ return all(s.is_open_loop for s in self.stages)
170
+
171
+ def on_complete(self) -> None:
172
+ for s in self.stages:
173
+ s.on_complete()
174
+
175
+ async def tickets(self):
176
+ for label, stage in zip(self.labels, self.stages):
177
+ self.current_label = label
178
+ async for t in stage.tickets():
179
+ yield t
180
+ self.current_label = None
181
+
182
+
183
+ class Ramp(LoadModel):
184
+ """Linearly ramp RPS from `start_rps` to `end_rps` over `duration_s`."""
185
+
186
+ def __init__(self, start_rps: float, end_rps: float, duration_s: float,
187
+ poisson: bool = False, seed: Optional[int] = None):
188
+ if start_rps <= 0 or end_rps <= 0 or duration_s <= 0:
189
+ raise ValueError("start_rps, end_rps, duration_s must all be > 0")
190
+ self.start_rps = start_rps
191
+ self.end_rps = end_rps
192
+ self.duration_s = duration_s
193
+ self.poisson = poisson
194
+ self._rng = random.Random(seed)
195
+
196
+ def _rps_at(self, t: float) -> float:
197
+ frac = min(max(t / self.duration_s, 0.0), 1.0)
198
+ return self.start_rps + (self.end_rps - self.start_rps) * frac
199
+
200
+ async def tickets(self):
201
+ start = time.monotonic()
202
+ next_fire = start
203
+ while True:
204
+ now = time.monotonic()
205
+ elapsed = now - start
206
+ if elapsed >= self.duration_s:
207
+ return
208
+ rps = self._rps_at(elapsed)
209
+ if self.poisson:
210
+ gap = self._rng.expovariate(rps)
211
+ await asyncio.sleep(gap)
212
+ yield None
213
+ else:
214
+ sleep_for = next_fire - now
215
+ if sleep_for > 0:
216
+ await asyncio.sleep(sleep_for)
217
+ yield None
218
+ next_fire += 1.0 / rps
219
+
220
+
221
+ # -------- User-friendly spec parser --------
222
+
223
+ _DURATION_RE = re.compile(r"^([0-9]*\.?[0-9]+)(ms|s|m|h)?$")
224
+
225
+
226
+ def parse_duration(s: Union[str, int, float]) -> float:
227
+ """Parse '30s', '500ms', '2m', '1h', or a bare number (seconds)."""
228
+ if isinstance(s, (int, float)):
229
+ return float(s)
230
+ m = _DURATION_RE.match(s.strip())
231
+ if not m:
232
+ raise ValueError(f"Cannot parse duration {s!r}")
233
+ val = float(m.group(1))
234
+ unit = (m.group(2) or "s").lower()
235
+ return val * {"ms": 0.001, "s": 1.0, "m": 60.0, "h": 3600.0}[unit]
236
+
237
+
238
+ def parse_rate_spec(
239
+ spec: Union[str, int, float, dict],
240
+ duration_s: Optional[float] = None,
241
+ max_requests: Optional[int] = None,
242
+ ) -> LoadModel:
243
+ """Friendly load-model factory.
244
+
245
+ Accepted forms:
246
+ 100 -> ConstantRPS(100)
247
+ "100" -> ConstantRPS(100)
248
+ "100rps" -> ConstantRPS(100)
249
+ "poisson:100" -> PoissonRPS(100)
250
+ "closed:32" or "concurrency:32" -> ClosedLoop(32)
251
+ "ramp:10..500:30s" -> Ramp(10, 500, 30)
252
+ "ramp-poisson:10..500:30s" -> Ramp(..., poisson=True)
253
+ "sweep:10,50,100,500@30s" -> Sweep of ConstantRPS, each 30s
254
+ {"type": "constant", "rps": 100, "duration": "60s"} (dict form)
255
+ """
256
+ if isinstance(spec, dict):
257
+ return _parse_rate_dict(spec)
258
+
259
+ if isinstance(spec, (int, float)):
260
+ return ConstantRPS(float(spec), duration_s=duration_s, max_requests=max_requests)
261
+
262
+ s = spec.strip().lower()
263
+
264
+ if s.startswith("poisson:"):
265
+ rps = float(s.split(":", 1)[1])
266
+ return PoissonRPS(rps, duration_s=duration_s, max_requests=max_requests)
267
+
268
+ if s.startswith("closed:") or s.startswith("concurrency:"):
269
+ n = int(s.split(":", 1)[1])
270
+ return ClosedLoop(n, duration_s=duration_s, max_requests=max_requests)
271
+
272
+ if s.startswith("ramp-poisson:") or s.startswith("ramp:"):
273
+ poisson = s.startswith("ramp-poisson:")
274
+ rest = s.split(":", 1)[1]
275
+ # rest like "10..500:30s"
276
+ rng, dur = rest.rsplit(":", 1)
277
+ a, b = rng.split("..")
278
+ return Ramp(float(a), float(b), parse_duration(dur), poisson=poisson)
279
+
280
+ if s.startswith("sweep:"):
281
+ rest = s.split(":", 1)[1]
282
+ # e.g. "10,50,100,500@30s"
283
+ if "@" in rest:
284
+ vals, dur = rest.split("@", 1)
285
+ per_stage = parse_duration(dur)
286
+ else:
287
+ vals = rest
288
+ if duration_s is None:
289
+ raise ValueError("sweep: needs '@duration' or a top-level duration")
290
+ per_stage = duration_s / len(vals.split(","))
291
+ rates = [float(v) for v in vals.split(",")]
292
+ stages = [ConstantRPS(r, duration_s=per_stage) for r in rates]
293
+ labels = [f"{r:g}rps" for r in rates]
294
+ return Sweep(stages, labels)
295
+
296
+ # Plain number with optional 'rps' suffix.
297
+ if s.endswith("rps"):
298
+ s = s[:-3]
299
+ return ConstantRPS(float(s), duration_s=duration_s, max_requests=max_requests)
300
+
301
+
302
+ def _parse_rate_dict(d: dict) -> LoadModel:
303
+ t = d.get("type", "constant").lower()
304
+ duration = d.get("duration")
305
+ if duration is not None and isinstance(duration, str):
306
+ duration = parse_duration(duration)
307
+ max_requests = d.get("max_requests")
308
+
309
+ if t == "constant":
310
+ return ConstantRPS(float(d["rps"]), duration_s=duration, max_requests=max_requests)
311
+ if t == "poisson":
312
+ return PoissonRPS(float(d["rps"]), duration_s=duration, max_requests=max_requests,
313
+ seed=d.get("seed"))
314
+ if t in ("closed", "closed-loop", "concurrency"):
315
+ return ClosedLoop(int(d["concurrency"]), duration_s=duration, max_requests=max_requests)
316
+ if t == "ramp":
317
+ return Ramp(float(d["start_rps"]), float(d["end_rps"]),
318
+ parse_duration(d.get("duration", duration)),
319
+ poisson=d.get("poisson", False), seed=d.get("seed"))
320
+ if t == "sweep":
321
+ stages_spec = d["stages"]
322
+ stages = [parse_rate_spec(s) for s in stages_spec]
323
+ labels = [s.get("label") if isinstance(s, dict) else None for s in stages_spec]
324
+ labels = [lab or f"stage_{i}" for i, lab in enumerate(labels)]
325
+ return Sweep(stages, labels)
326
+ raise ValueError(f"Unknown load model type: {t}")
benchmaker/metrics.py ADDED
@@ -0,0 +1,234 @@
1
+ """Aggregation + reporting of Samples."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import statistics
7
+ import time
8
+ from collections import Counter, defaultdict
9
+ from dataclasses import dataclass, field
10
+ from typing import Optional, TextIO
11
+
12
+ from benchmaker.types import Sample
13
+
14
+
15
+ def _pct(xs: list[float], p: float) -> float:
16
+ if not xs:
17
+ return 0.0
18
+ xs = sorted(xs)
19
+ k = (len(xs) - 1) * (p / 100.0)
20
+ f = int(k)
21
+ c = min(f + 1, len(xs) - 1)
22
+ if f == c:
23
+ return xs[f]
24
+ return xs[f] + (xs[c] - xs[f]) * (k - f)
25
+
26
+
27
+ @dataclass
28
+ class MetricsAggregator:
29
+ """Accumulates samples and produces a summary."""
30
+
31
+ samples: list[Sample] = field(default_factory=list)
32
+ start_time: float = field(default_factory=time.monotonic)
33
+ end_time: Optional[float] = None
34
+ # Wall-clock (time.time) markers, only used when writing the run bundle.
35
+ start_wall: float = field(default_factory=time.time)
36
+ end_wall: Optional[float] = None
37
+ # name -> list of (elapsed_s, {metric_name: value})
38
+ monitor_samples: dict[str, list[tuple[float, dict[str, float]]]] = field(default_factory=dict)
39
+
40
+ def add(self, sample: Sample) -> None:
41
+ self.samples.append(sample)
42
+
43
+ def monitor_buffer(self, name: str) -> list[tuple[float, dict[str, float]]]:
44
+ if name not in self.monitor_samples:
45
+ self.monitor_samples[name] = []
46
+ return self.monitor_samples[name]
47
+
48
+ def finalize(self) -> None:
49
+ self.end_time = time.monotonic()
50
+ self.end_wall = time.time()
51
+
52
+ def summary(self) -> dict:
53
+ end = self.end_time or time.monotonic()
54
+ wall_s = max(end - self.start_time, 1e-9)
55
+ ok = [s for s in self.samples if s.ok]
56
+ fail = [s for s in self.samples if not s.ok]
57
+ # Split fail into transport failures vs. delivered-but-graded-wrong.
58
+ wrong = [s for s in fail if s.request_ok]
59
+ request_failed = [s for s in fail if not s.request_ok]
60
+ latencies = [s.latency_s for s in ok]
61
+
62
+ status_counts = Counter(s.status for s in self.samples)
63
+ error_counts = Counter(s.error for s in fail if s.error)
64
+
65
+ out: dict = {
66
+ "wall_time_s": wall_s,
67
+ "total_requests": len(self.samples),
68
+ "success": len(ok),
69
+ "failed": len(fail),
70
+ "request_failed": len(request_failed),
71
+ "wrong_output": len(wrong),
72
+ "error_rate": (len(fail) / len(self.samples)) if self.samples else 0.0,
73
+ "request_failure_rate": (
74
+ (len(request_failed) / len(self.samples)) if self.samples else 0.0
75
+ ),
76
+ "throughput_rps": len(self.samples) / wall_s,
77
+ "goodput_rps": len(ok) / wall_s,
78
+ "bytes_sent": sum(s.bytes_sent for s in self.samples),
79
+ "bytes_recv": sum(s.bytes_recv for s in self.samples),
80
+ "status_codes": dict(status_counts),
81
+ "errors": dict(error_counts),
82
+ }
83
+ if latencies:
84
+ out["latency_s"] = {
85
+ "mean": statistics.mean(latencies),
86
+ "min": min(latencies),
87
+ "max": max(latencies),
88
+ "p50": _pct(latencies, 50),
89
+ "p90": _pct(latencies, 90),
90
+ "p95": _pct(latencies, 95),
91
+ "p99": _pct(latencies, 99),
92
+ "p999": _pct(latencies, 99.9),
93
+ }
94
+
95
+ # Aggregate workload-specific `extra` metrics generically: mean + percentiles.
96
+ extras: dict[str, list[float]] = defaultdict(list)
97
+ for s in ok:
98
+ for k, v in s.extra.items():
99
+ if isinstance(v, (int, float)):
100
+ extras[k].append(float(v))
101
+ if extras:
102
+ ext_summary = {}
103
+ for k, vals in extras.items():
104
+ ext_summary[k] = {
105
+ "mean": statistics.mean(vals),
106
+ "p50": _pct(vals, 50),
107
+ "p90": _pct(vals, 90),
108
+ "p99": _pct(vals, 99),
109
+ "min": min(vals),
110
+ "max": max(vals),
111
+ }
112
+ out["workload_metrics"] = ext_summary
113
+
114
+ # Monitor time-series: summarize each metric per monitor.
115
+ if self.monitor_samples:
116
+ monitors_summary: dict[str, dict] = {}
117
+ for mon_name, ticks in self.monitor_samples.items():
118
+ if not ticks:
119
+ continue
120
+ by_metric: dict[str, list[float]] = defaultdict(list)
121
+ for _t, values in ticks:
122
+ for k, v in values.items():
123
+ if isinstance(v, (int, float)):
124
+ by_metric[k].append(float(v))
125
+ per_metric = {}
126
+ for k, vals in by_metric.items():
127
+ per_metric[k] = {
128
+ "mean": statistics.mean(vals),
129
+ "min": min(vals),
130
+ "max": max(vals),
131
+ "p50": _pct(vals, 50),
132
+ "p90": _pct(vals, 90),
133
+ "p99": _pct(vals, 99),
134
+ "first": vals[0],
135
+ "last": vals[-1],
136
+ }
137
+ monitors_summary[mon_name] = {
138
+ "tick_count": len(ticks),
139
+ "metrics": per_metric,
140
+ }
141
+ if monitors_summary:
142
+ out["monitors"] = monitors_summary
143
+ return out
144
+
145
+ def render(self, out: TextIO) -> None:
146
+ s = self.summary()
147
+ lines: list[str] = []
148
+ lines.append("=" * 60)
149
+ lines.append(f"[benchmaker] results ({s['total_requests']} requests, "
150
+ f"{s['wall_time_s']:.2f}s wall)")
151
+ lines.append("=" * 60)
152
+ lines.append(f" throughput : {s['throughput_rps']:>10.2f} req/s")
153
+ lines.append(f" goodput : {s['goodput_rps']:>10.2f} req/s")
154
+ lines.append(f" success : {s['success']}")
155
+ lines.append(f" failed : {s['failed']} ({s['error_rate']*100:.2f}%)")
156
+ lines.append(
157
+ f" request failed : {s['request_failed']} "
158
+ f"({s['request_failure_rate']*100:.2f}%)"
159
+ )
160
+ lines.append(f" wrong output : {s['wrong_output']}")
161
+ if s.get("latency_s"):
162
+ l = s["latency_s"]
163
+ lines.append("")
164
+ lines.append(" latency (s)")
165
+ for k in ("mean", "p50", "p90", "p95", "p99", "p999", "max"):
166
+ lines.append(f" {k:<6}: {l[k]:.4f}")
167
+ if s["status_codes"]:
168
+ lines.append("")
169
+ lines.append(" status codes")
170
+ for code, n in sorted(s["status_codes"].items()):
171
+ lines.append(f" {code:<4} : {n}")
172
+ if s["errors"]:
173
+ lines.append("")
174
+ lines.append(" errors")
175
+ for err, n in sorted(s["errors"].items(), key=lambda kv: -kv[1])[:10]:
176
+ lines.append(f" {n:<4} x {err}")
177
+ if s.get("workload_metrics"):
178
+ lines.append("")
179
+ lines.append(" workload metrics")
180
+ for k, v in s["workload_metrics"].items():
181
+ lines.append(f" {k}")
182
+ for kk in ("mean", "p50", "p90", "p99", "max"):
183
+ lines.append(f" {kk:<6}: {v[kk]:.4f}")
184
+ if s.get("monitors"):
185
+ for mon_name, mon in s["monitors"].items():
186
+ lines.append("")
187
+ lines.append(f" monitor: {mon_name} ({mon['tick_count']} ticks)")
188
+ for k, v in mon["metrics"].items():
189
+ lines.append(f" {k}")
190
+ for kk in ("mean", "min", "max", "p50", "p99", "last"):
191
+ lines.append(f" {kk:<6}: {v[kk]:.4f}")
192
+ lines.append("=" * 60)
193
+ out.write("\n".join(lines) + "\n")
194
+ out.flush()
195
+
196
+ def dump_samples_jsonl(self, path: str) -> None:
197
+ """Write per-request records for offline analysis."""
198
+ with open(path, "w") as f:
199
+ for s in self.samples:
200
+ f.write(json.dumps({
201
+ "start_ts": s.start_ts,
202
+ "latency_s": s.latency_s,
203
+ "status": s.status,
204
+ "ok": s.ok,
205
+ "request_ok": s.request_ok,
206
+ "bytes_sent": s.bytes_sent,
207
+ "bytes_recv": s.bytes_recv,
208
+ "error": s.error,
209
+ "workload": s.workload,
210
+ "meta": _safe_meta(s.meta),
211
+ "extra": s.extra,
212
+ }) + "\n")
213
+
214
+ def dump_monitor_jsonl(self, path: str) -> None:
215
+ """Write monitor time-series ticks as JSONL for plotting/analysis."""
216
+ with open(path, "w") as f:
217
+ for mon_name, ticks in self.monitor_samples.items():
218
+ for t, values in ticks:
219
+ f.write(json.dumps({
220
+ "monitor": mon_name,
221
+ "elapsed_s": t,
222
+ "values": values,
223
+ }) + "\n")
224
+
225
+
226
+ def _safe_meta(meta: dict) -> dict:
227
+ out = {}
228
+ for k, v in meta.items():
229
+ try:
230
+ json.dumps(v)
231
+ out[k] = v
232
+ except (TypeError, ValueError):
233
+ out[k] = repr(v)
234
+ return out