benchcaddy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
benchcaddy/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .core import Sweep
2
+ from .observability import observe
3
+ from .reporting import RichSweepReporter, SweepReporter
4
+
5
+ __all__ = ["RichSweepReporter", "Sweep", "SweepReporter", "observe"]
benchcaddy/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()
benchcaddy/cli.py ADDED
@@ -0,0 +1,473 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ import typer
7
+ from rich.console import Console
8
+ from rich.panel import Panel
9
+ from rich.table import Table
10
+ from rich.text import Text
11
+
12
+ from .db import compare_runs, compare_suite_runs, get_database_path, get_run_details, get_selected_run_details, get_suite_details, list_suite_summaries
13
+ from .observability import summarize_observations
14
+ from .presentation import dump_json, json_panel, render_table
15
+
16
+ app = typer.Typer(help="Inspect BenchCaddy benchmark suites.")
17
+ console = Console()
18
+
19
+
20
+ @dataclass
21
+ class CLIState:
22
+ verbose: bool = False
23
+
24
+
25
+ _STATE = CLIState()
26
+
27
+
28
+ def _parse_compare_operands(values: list[str], strict: bool) -> tuple[str | None, list[str]]:
29
+ if not values:
30
+ return None, []
31
+
32
+ right, *extra = values
33
+ if strict and _as_run_id(right) is None:
34
+ return None, list(dict.fromkeys(values))
35
+ if extra and not strict:
36
+ console.print(f"Unexpected arguments: {' '.join(extra)}")
37
+ raise typer.Exit(code=2)
38
+ return right, list(dict.fromkeys(extra)) if strict else []
39
+
40
+
41
+ def _comparison_title(comparison: dict[str, object]) -> str:
42
+ strict_keys = comparison.get("strict_keys") or []
43
+ if not strict_keys:
44
+ return f"Comparison: {comparison['suite_name']}"
45
+ return f"Comparison: {comparison['suite_name']} (strict: {', '.join(strict_keys)})"
46
+
47
+
48
+ def _as_run_id(value: str) -> int | tuple[int, int] | None:
49
+ if "." in value:
50
+ left, dot, right = value.partition(".")
51
+ if dot and left.isdigit() and right.isdigit():
52
+ return (int(left), int(right))
53
+ try:
54
+ return int(value)
55
+ except ValueError:
56
+ return None
57
+
58
+
59
+ def _style_delta(percent_change: float | None) -> Text:
60
+ if percent_change is None: return Text("n/a")
61
+ return Text(f"{percent_change:+.2f}%", style="green" if percent_change <= -5.0 else "red" if percent_change >= 5.0 else None)
62
+
63
+
64
+ def _format_time(mean_seconds: float | None, std_seconds: float | None) -> str:
65
+ mean_value = 0.0 if mean_seconds is None else mean_seconds
66
+ std_value = 0.0 if std_seconds is None else std_seconds
67
+ return f"{mean_value:.6f} +- {std_value:.6f}"
68
+
69
+
70
+ def _styled(value: object, style: str | None = None) -> Text:
71
+ return Text(str(value), style=style)
72
+
73
+
74
+ def _style_row(values: tuple[object, ...], style: str | None = None) -> tuple[object, ...]:
75
+ return tuple(_styled(value, style) if style else value for value in values)
76
+
77
+
78
+ def _suite_row_style(comparison: dict[str, object], run: dict[str, object]) -> str | None:
79
+ basis_run = comparison.get("basis_run")
80
+ if basis_run is None:
81
+ return None
82
+
83
+ best_run = min(comparison["runs"], key=lambda candidate: (candidate["median_seconds"], candidate["id"]))
84
+ if run["id"] == best_run["id"]:
85
+ return "green"
86
+
87
+ if comparison.get("basis_metric_label") == "Reference Median (s)" and run["id"] == basis_run["id"]:
88
+ return "yellow"
89
+
90
+ return None
91
+
92
+
93
+ def _format_optional_seconds(value: float | None) -> str:
94
+ return "-" if value is None else f"{value:.6f}"
95
+
96
+
97
+ def _render_observation_table(observations: list[dict[str, object]], title: str) -> Table:
98
+ summary = summarize_observations(observations)
99
+
100
+ return render_table(
101
+ title,
102
+ ["Label", ("Calls", "right"), ("Mean +- Std (s)", "right"), ("Total (s)", "right")],
103
+ [
104
+ (
105
+ label,
106
+ stats.calls,
107
+ _format_time(stats.mean_seconds, stats.std_seconds),
108
+ f"{stats.total_seconds:.6f}",
109
+ )
110
+ for label, stats in summary.items()
111
+ ],
112
+ )
113
+
114
+
115
+ def _show_run(run: dict[str, object]) -> None:
116
+ console.print(
117
+ render_table(
118
+ f"Run: {run['display_id']}",
119
+ ["Field", "Value"],
120
+ [
121
+ ("Run ID", run["display_id"]),
122
+ ("Record ID", run["id"]),
123
+ ("Sweep ID", run["sweep_id"]),
124
+ ("Run Index", run["run_index"]),
125
+ ("Suite", run["suite_name"]),
126
+ ("Target", run["target_name"]),
127
+ ("Configuration", dump_json(run["configuration"])),
128
+ ("Mean +- Std (s)", _format_time(run.get("mean_seconds"), run.get("std_seconds"))),
129
+ ("Min (s)", _format_optional_seconds(run.get("min_seconds"))),
130
+ ("Max (s)", _format_optional_seconds(run.get("max_seconds"))),
131
+ ("Samples", len(run["samples"])),
132
+ ("Recorded At", run["created_at"]),
133
+ ],
134
+ )
135
+ )
136
+ console.print(_render_observation_table(run["observations"], title="Observed Timings"))
137
+ console.print(json_panel("Environment", run["environment"], indent=2))
138
+
139
+
140
+ def _show_suite(details: dict[str, object]) -> None:
141
+ console.print(render_table(
142
+ f"Suite: {details['suite_name']}",
143
+ [("Run ID", "right"), ("Record ID", "right"), "Configuration", ("Mean +- Std (s)", "right"), ("Samples", "right"), "Recorded At"],
144
+ [
145
+ (
146
+ run["display_id"],
147
+ run["id"],
148
+ dump_json(run["configuration"]),
149
+ _format_time(run.get("mean_seconds"), run.get("std_seconds")),
150
+ len(run["samples"]),
151
+ run["created_at"],
152
+ )
153
+ for run in details["runs"]
154
+ ],
155
+ ))
156
+ console.print(
157
+ render_table(
158
+ f"Observed Timings: {details['suite_name']}",
159
+ [("Run ID", "right"), "Label", ("Calls", "right"), ("Mean +- Std (s)", "right")],
160
+ [
161
+ (
162
+ run["display_id"],
163
+ label,
164
+ stats.calls,
165
+ _format_time(stats.mean_seconds, stats.std_seconds),
166
+ )
167
+ for run in details["runs"]
168
+ for label, stats in summarize_observations(run["observations"]).items()
169
+ ],
170
+ )
171
+ )
172
+ if details["environment"] is not None:
173
+ console.print(json_panel("Environment", details["environment"], indent=2))
174
+ if _STATE.verbose:
175
+ for run in details["runs"]:
176
+ console.print(_render_observation_table(run["observations"], title=f"Observed Timings for Run {run['display_id']}"))
177
+
178
+
179
+ def _show_selected_runs(runs: list[dict[str, object]]) -> None:
180
+ console.print(render_table(
181
+ "Selected Runs",
182
+ [
183
+ ("Run ID", "right"),
184
+ ("Record ID", "right"),
185
+ "Suite",
186
+ "Target",
187
+ "Configuration",
188
+ ("Mean +- Std (s)", "right"),
189
+ ("Samples", "right"),
190
+ "Recorded At",
191
+ ],
192
+ [
193
+ (
194
+ run["display_id"],
195
+ run["id"],
196
+ run["suite_name"],
197
+ run["target_name"],
198
+ dump_json(run["configuration"]),
199
+ _format_time(run.get("mean_seconds"), run.get("std_seconds")),
200
+ len(run["samples"]),
201
+ run["created_at"],
202
+ )
203
+ for run in runs
204
+ ],
205
+ ))
206
+ console.print(
207
+ render_table(
208
+ "Observed Timings: Selected Runs",
209
+ [("Run ID", "right"), ("Record ID", "right"), "Label", ("Calls", "right"), ("Mean +- Std (s)", "right")],
210
+ [
211
+ (
212
+ run["display_id"],
213
+ run["id"],
214
+ label,
215
+ stats.calls,
216
+ _format_time(stats.mean_seconds, stats.std_seconds),
217
+ )
218
+ for run in runs
219
+ for label, stats in summarize_observations(run["observations"]).items()
220
+ ],
221
+ )
222
+ )
223
+
224
+
225
+ def _print_run_comparison(
226
+ comparison: dict[str, object],
227
+ ) -> None:
228
+ baseline = comparison["baseline"]
229
+ candidate = comparison["candidate"]
230
+ baseline_style = "green" if baseline["median_seconds"] <= candidate["median_seconds"] else None
231
+ candidate_style = "green" if candidate["median_seconds"] <= baseline["median_seconds"] else None
232
+ console.print(
233
+ render_table(
234
+ f"Run Comparison: {baseline['display_id']} -> {candidate['display_id']}",
235
+ ["Field", "Baseline", "Candidate"],
236
+ [
237
+ ("Run ID", _styled(baseline["display_id"], baseline_style), _styled(candidate["display_id"], candidate_style)),
238
+ ("Record ID", _styled(baseline["id"], baseline_style), _styled(candidate["id"], candidate_style)),
239
+ *[
240
+ (
241
+ key,
242
+ _styled(dump_json(baseline["configuration"].get(key)), baseline_style),
243
+ _styled(dump_json(candidate["configuration"].get(key)), candidate_style),
244
+ )
245
+ for key in sorted(set(baseline["configuration"]) | set(candidate["configuration"]))
246
+ ],
247
+ ("Median (s)", _styled(f"{baseline['median_seconds']:.6f}", baseline_style), _styled(f"{candidate['median_seconds']:.6f}", candidate_style)),
248
+ ("Mean +- Std (s)", _styled(_format_time(baseline.get("mean_seconds"), baseline.get("std_seconds")), baseline_style), _styled(_format_time(candidate.get("mean_seconds"), candidate.get("std_seconds")), candidate_style)),
249
+ ("Min (s)", _styled(_format_optional_seconds(baseline.get("min_seconds")), baseline_style), _styled(_format_optional_seconds(candidate.get("min_seconds")), candidate_style)),
250
+ ("Max (s)", _styled(_format_optional_seconds(baseline.get("max_seconds")), baseline_style), _styled(_format_optional_seconds(candidate.get("max_seconds")), candidate_style)),
251
+ ("Median Delta (s)", "", f"{comparison['delta_seconds']:.6f}"),
252
+ ("Median Percent Change", "", _style_delta(comparison["percent_change"])),
253
+ ],
254
+ )
255
+ )
256
+
257
+ if comparison["observation_rows"]:
258
+ console.print(
259
+ render_table(
260
+ "Observed Timing Diff",
261
+ ["Label", ("Baseline (s)", "right"), ("Candidate (s)", "right"), ("Delta (s)", "right")],
262
+ [
263
+ (
264
+ row["label"],
265
+ "-" if row["baseline_mean_seconds"] is None else _format_time(row["baseline_mean_seconds"], row["baseline_std_seconds"]),
266
+ "-" if row["candidate_mean_seconds"] is None else _format_time(row["candidate_mean_seconds"], row["candidate_std_seconds"]),
267
+ _format_optional_seconds(row["delta_seconds"]),
268
+ )
269
+ for row in comparison["observation_rows"]
270
+ ],
271
+ )
272
+ )
273
+
274
+
275
+ def _print_suite_comparison(comparison: dict[str, object]) -> None:
276
+ console.print(
277
+ render_table(
278
+ _comparison_title(comparison),
279
+ [("Run ID", "right"), ("Record ID", "right"), "Configuration", ("Mean +- Std (s)", "right"), (comparison["delta_column_label"], "right"), (comparison["ratio_column_label"], "right"), *([("Samples", "right"), "Recorded At"] if _STATE.verbose else [])],
280
+ [
281
+ _style_row(
282
+ (
283
+ run["display_id"],
284
+ run["id"],
285
+ dump_json(run["configuration"]),
286
+ _format_time(run.get("mean_seconds"), run.get("std_seconds")),
287
+ f"{run['delta_seconds']:.6f}",
288
+ "n/a" if run["slowdown_factor"] is None else f"{run['slowdown_factor']:.2f}x",
289
+ *([run["sample_count"], run["created_at"]] if _STATE.verbose else []),
290
+ ),
291
+ _suite_row_style(comparison, run),
292
+ )
293
+ for run in comparison["runs"]
294
+ ],
295
+ )
296
+ )
297
+
298
+ if comparison["basis_median_seconds"] is not None:
299
+ best_run = comparison["basis_run"]
300
+ console.print(
301
+ Panel.fit(
302
+ " | ".join(
303
+ [
304
+ f"Run ID: {best_run['display_id']}",
305
+ f"Record ID: {best_run['id']}",
306
+ f"{comparison['basis_metric_label']}: {best_run['median_seconds']:.6f}",
307
+ f"Mean +- Std (s): {_format_time(best_run.get('mean_seconds'), best_run.get('std_seconds'))}",
308
+ ]
309
+ ),
310
+ title="Comparison Basis",
311
+ )
312
+ )
313
+
314
+
315
+ @app.callback()
316
+ def callback(
317
+ verbose: bool = typer.Option(
318
+ False,
319
+ "--verbose",
320
+ "-v",
321
+ help="Show additional detail in command output.",
322
+ ),
323
+ ) -> None:
324
+ _STATE.verbose = verbose
325
+
326
+
327
+ @app.command("list")
328
+ def list_command(
329
+ database: Path = typer.Option(
330
+ None,
331
+ "--database",
332
+ "-d",
333
+ exists=False,
334
+ dir_okay=False,
335
+ help="Path to the BenchCaddy SQLite database.",
336
+ ),
337
+ ) -> None:
338
+ database_path = get_database_path(database)
339
+ summaries = list_suite_summaries(database_path)
340
+ if not summaries:
341
+ console.print(f"No suites found in {database_path}.")
342
+ raise typer.Exit()
343
+
344
+ console.print(
345
+ render_table(
346
+ f"BenchCaddy suites ({database_path})",
347
+ ["Suite", "Target", "Observation Labels", ("Runs", "right"), "Last Run"],
348
+ [
349
+ (
350
+ summary["suite_name"],
351
+ summary["target_name"],
352
+ ", ".join(summary["observation_labels"]) or "-",
353
+ summary["run_count"],
354
+ summary["last_run_at"],
355
+ )
356
+ for summary in summaries
357
+ ],
358
+ )
359
+ )
360
+ if _STATE.verbose:
361
+ console.print(Panel.fit(str(database_path), title="Database"))
362
+
363
+
364
+ @app.command("show")
365
+ def show_command(
366
+ identifiers: list[str] = typer.Argument(..., help="Suite name or one or more run IDs to inspect (for example 3.2 5 7.1)."),
367
+ database: Path = typer.Option(
368
+ None,
369
+ "--database",
370
+ "-d",
371
+ exists=False,
372
+ dir_okay=False,
373
+ help="Path to the BenchCaddy SQLite database.",
374
+ ),
375
+ ) -> None:
376
+ database_path = get_database_path(database)
377
+
378
+ if len(identifiers) == 1:
379
+ identifier = identifiers[0]
380
+ run_id = _as_run_id(identifier)
381
+ if run_id is not None:
382
+ run = get_run_details(run_id, database_path)
383
+ if run is None:
384
+ console.print(f"Run '{identifier}' was not found in {database_path}.")
385
+ raise typer.Exit(code=1)
386
+ _show_run(run)
387
+ return
388
+
389
+ details = get_suite_details(identifier, database_path)
390
+ if details is None:
391
+ console.print(f"Suite '{identifier}' was not found in {database_path}.")
392
+ raise typer.Exit(code=1)
393
+ _show_suite(details)
394
+ return
395
+
396
+ run_ids: list[int | tuple[int, int]] = []
397
+ for identifier in identifiers:
398
+ run_id = _as_run_id(identifier)
399
+ if run_id is None:
400
+ console.print(f"'{identifier}' is not a valid run ID.")
401
+ raise typer.Exit(code=1)
402
+ run_ids.append(run_id)
403
+
404
+ runs = get_selected_run_details(run_ids, database_path)
405
+ if runs is None:
406
+ console.print(f"One or more runs were not found in {database_path}.")
407
+ raise typer.Exit(code=1)
408
+ _show_selected_runs(runs)
409
+
410
+
411
+ @app.command("compare")
412
+ def compare_command(
413
+ left: str = typer.Argument(..., help="Suite name or baseline run ID."),
414
+ operands: list[str] = typer.Argument(None, help="Optional reference run ID followed by strict config keys."),
415
+ strict: bool = typer.Option(
416
+ False,
417
+ "--strict",
418
+ "-s",
419
+ help="Restrict suite comparison to runs whose configuration matches the reference run for the given trailing config keys.",
420
+ ),
421
+ database: Path = typer.Option(
422
+ None,
423
+ "--database",
424
+ "-d",
425
+ exists=False,
426
+ dir_okay=False,
427
+ help="Path to the BenchCaddy SQLite database.",
428
+ ),
429
+ ) -> None:
430
+ right, strict_keys = _parse_compare_operands(operands, strict)
431
+ database_path = get_database_path(database)
432
+ left_run_id = _as_run_id(left)
433
+ right_run_id = _as_run_id(right) if right is not None else None
434
+ if left_run_id is not None and right_run_id is not None:
435
+ if strict_keys:
436
+ console.print("--strict is only supported for suite comparisons with a reference run.")
437
+ raise typer.Exit(code=2)
438
+ comparison = compare_runs(left_run_id, right_run_id, database_path)
439
+ if comparison is None:
440
+ console.print(f"Run comparison {left_run_id} vs {right_run_id} was not found in {database_path}.")
441
+ raise typer.Exit(code=1)
442
+ _print_run_comparison(comparison)
443
+ return
444
+
445
+ if strict_keys and right_run_id is None:
446
+ console.print("--strict requires a suite comparison with a reference run ID.")
447
+ raise typer.Exit(code=2)
448
+
449
+ comparison = compare_suite_runs(left, right_run_id, strict_keys, database_path)
450
+ if comparison is None:
451
+ console.print(f"Suite '{left}' was not found in {database_path}.")
452
+ raise typer.Exit(code=1)
453
+ if comparison.get("error") == "reference_run_not_found":
454
+ console.print(f"Reference run '{right}' was not found in {database_path}.")
455
+ raise typer.Exit(code=1)
456
+ if comparison.get("error") == "reference_run_wrong_suite":
457
+ console.print(
458
+ f"Reference run '{right}' belongs to suite '{comparison['reference_run_suite_name']}', not '{left}'."
459
+ )
460
+ raise typer.Exit(code=1)
461
+ if comparison.get("error") == "strict_requires_reference_run":
462
+ console.print("--strict requires a suite comparison with a reference run ID.")
463
+ raise typer.Exit(code=2)
464
+ if comparison.get("error") == "strict_keys_not_found":
465
+ missing_keys = ", ".join(comparison["missing_strict_keys"])
466
+ console.print(
467
+ f"Strict key(s) {missing_keys} were not found on reference run {comparison['reference_run_display_id']}."
468
+ )
469
+ raise typer.Exit(code=1)
470
+ _print_suite_comparison(comparison)
471
+
472
+
473
+ main = app