acenly-bench 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acenly_bench-0.1.0/PKG-INFO +128 -0
- acenly_bench-0.1.0/README.md +117 -0
- acenly_bench-0.1.0/acenly_bench/__init__.py +2 -0
- acenly_bench-0.1.0/acenly_bench/__main__.py +4 -0
- acenly_bench-0.1.0/acenly_bench/bench.py +725 -0
- acenly_bench-0.1.0/acenly_bench/db.py +89 -0
- acenly_bench-0.1.0/acenly_bench/hooks/pre-push.sh +7 -0
- acenly_bench-0.1.0/acenly_bench.egg-info/PKG-INFO +128 -0
- acenly_bench-0.1.0/acenly_bench.egg-info/SOURCES.txt +13 -0
- acenly_bench-0.1.0/acenly_bench.egg-info/dependency_links.txt +1 -0
- acenly_bench-0.1.0/acenly_bench.egg-info/entry_points.txt +2 -0
- acenly_bench-0.1.0/acenly_bench.egg-info/requires.txt +1 -0
- acenly_bench-0.1.0/acenly_bench.egg-info/top_level.txt +1 -0
- acenly_bench-0.1.0/pyproject.toml +25 -0
- acenly_bench-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acenly-bench
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Track Python function performance across commits. Catch regressions before they ship.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://acenly.com
|
|
7
|
+
Project-URL: Repository, https://github.com/YannWeiler/acenly_bench
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: pyyaml
|
|
11
|
+
|
|
12
|
+
# ACENLY Bench
|
|
13
|
+
|
|
14
|
+
Track Python function performance across commits. Catch regressions before they ship.
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
ACENLY Bench · main@a3f9c1b2 · Python 3.12.3
|
|
18
|
+
|
|
19
|
+
Function Median P95 vs last commit
|
|
20
|
+
──────────────────────────────────────────────────────────────
|
|
21
|
+
deduplicate_users 1.24 ms 1.51 ms ▼ 38.1% faster
|
|
22
|
+
build_index 84.21 ms 97.13 ms ▲ 12.4% slower ⚠
|
|
23
|
+
filter_records 0.88 ms 0.92 ms ~ +0.2%
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## What it does
|
|
29
|
+
|
|
30
|
+
- **Precise timing** — runs each function in an isolated subprocess with warmup, batch timing, and outlier trimming
|
|
31
|
+
- **Regression detection** — compares against the last stored result and flags slowdowns
|
|
32
|
+
- **Git hook integration** — blocks pushes automatically when a regression exceeds your threshold
|
|
33
|
+
- **History tracking** — stores all runs in a local SQLite database so you can see trends over time
|
|
34
|
+
|
|
35
|
+
No external services. No accounts. Runs entirely on your machine.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install pyyaml
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Then copy `bench.py` and `bench_db.py` into your project root.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Quick start
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Benchmark a specific function right now
|
|
53
|
+
python3 bench.py myfile.py::my_function
|
|
54
|
+
|
|
55
|
+
# Show history for tracked functions
|
|
56
|
+
python3 bench.py --history
|
|
57
|
+
|
|
58
|
+
# High-precision mode (batch timing, 3s measurement window)
|
|
59
|
+
python3 bench.py --precise
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Track functions automatically
|
|
65
|
+
|
|
66
|
+
Create `acenly.yml` in your project root (see `acenly.example.yml`):
|
|
67
|
+
|
|
68
|
+
```yaml
|
|
69
|
+
benchmark:
|
|
70
|
+
track:
|
|
71
|
+
- file: mymodule/utils.py
|
|
72
|
+
function: process_batch
|
|
73
|
+
- file: mymodule/search.py
|
|
74
|
+
function: find_duplicates
|
|
75
|
+
|
|
76
|
+
regression_warn: 0.10 # warn if 10% slower
|
|
77
|
+
regression_block: 0.25 # block push if 25% slower
|
|
78
|
+
noise_floor: 0.05 # ignore changes smaller than 5%
|
|
79
|
+
trials: 5
|
|
80
|
+
warmup: 2
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Then install the git hook:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
python3 bench.py --install-hooks
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
From now on, every `git push` runs the benchmark automatically. If a function regresses past the block threshold, the push is stopped.
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
python3 bench.py --skip-hooks # bypass when needed
|
|
93
|
+
python3 bench.py --uninstall-hooks
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Options
|
|
99
|
+
|
|
100
|
+
| Flag | Description |
|
|
101
|
+
|------|-------------|
|
|
102
|
+
| `file.py::func` | Benchmark a specific function |
|
|
103
|
+
| `--compare` | Show diff vs last stored result |
|
|
104
|
+
| `--history` | Print run history for tracked functions |
|
|
105
|
+
| `--precise` | High-precision mode: adaptive warmup, batch timing |
|
|
106
|
+
| `--repeat N` | Run N times, keep the best result |
|
|
107
|
+
| `--install-hooks` | Install git pre-push hook |
|
|
108
|
+
| `--uninstall-hooks` | Remove git pre-push hook |
|
|
109
|
+
| `--skip-hooks` | Run benchmark without enforcing regression block |
|
|
110
|
+
| `--hook-mode` | Used internally by the git hook |
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## How timing works
|
|
115
|
+
|
|
116
|
+
Each benchmark runs in a **separate subprocess** to avoid interference from the parent process. In normal mode, each function is called `trials` times with `warmup` discarded runs first. In `--precise` mode:
|
|
117
|
+
|
|
118
|
+
1. **Adaptive warmup** — keeps running until timing variance drops below 3% (CPU caches settled)
|
|
119
|
+
2. **Batch calibration** — finds a batch size so each measurement window takes ~50ms, then divides — this eliminates OS scheduler jitter from individual timings
|
|
120
|
+
3. **3-second window** — collects ~60 batch measurements, reports the minimum (least OS interference)
|
|
121
|
+
|
|
122
|
+
Results are stored in `bench.db` alongside your project.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# ACENLY Bench
|
|
2
|
+
|
|
3
|
+
Track Python function performance across commits. Catch regressions before they ship.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
ACENLY Bench · main@a3f9c1b2 · Python 3.12.3
|
|
7
|
+
|
|
8
|
+
Function Median P95 vs last commit
|
|
9
|
+
──────────────────────────────────────────────────────────────
|
|
10
|
+
deduplicate_users 1.24 ms 1.51 ms ▼ 38.1% faster
|
|
11
|
+
build_index 84.21 ms 97.13 ms ▲ 12.4% slower ⚠
|
|
12
|
+
filter_records 0.88 ms 0.92 ms ~ +0.2%
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## What it does
|
|
18
|
+
|
|
19
|
+
- **Precise timing** — runs each function in an isolated subprocess with warmup, batch timing, and outlier trimming
|
|
20
|
+
- **Regression detection** — compares against the last stored result and flags slowdowns
|
|
21
|
+
- **Git hook integration** — blocks pushes automatically when a regression exceeds your threshold
|
|
22
|
+
- **History tracking** — stores all runs in a local SQLite database so you can see trends over time
|
|
23
|
+
|
|
24
|
+
No external services. No accounts. Runs entirely on your machine.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install pyyaml
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Then copy `bench.py` and `bench_db.py` into your project root.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Benchmark a specific function right now
|
|
42
|
+
python3 bench.py myfile.py::my_function
|
|
43
|
+
|
|
44
|
+
# Show history for tracked functions
|
|
45
|
+
python3 bench.py --history
|
|
46
|
+
|
|
47
|
+
# High-precision mode (batch timing, 3s measurement window)
|
|
48
|
+
python3 bench.py --precise
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Track functions automatically
|
|
54
|
+
|
|
55
|
+
Create `acenly.yml` in your project root (see `acenly.example.yml`):
|
|
56
|
+
|
|
57
|
+
```yaml
|
|
58
|
+
benchmark:
|
|
59
|
+
track:
|
|
60
|
+
- file: mymodule/utils.py
|
|
61
|
+
function: process_batch
|
|
62
|
+
- file: mymodule/search.py
|
|
63
|
+
function: find_duplicates
|
|
64
|
+
|
|
65
|
+
regression_warn: 0.10 # warn if 10% slower
|
|
66
|
+
regression_block: 0.25 # block push if 25% slower
|
|
67
|
+
noise_floor: 0.05 # ignore changes smaller than 5%
|
|
68
|
+
trials: 5
|
|
69
|
+
warmup: 2
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Then install the git hook:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
python3 bench.py --install-hooks
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
From now on, every `git push` runs the benchmark automatically. If a function regresses past the block threshold, the push is stopped.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
python3 bench.py --skip-hooks # bypass when needed
|
|
82
|
+
python3 bench.py --uninstall-hooks
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Options
|
|
88
|
+
|
|
89
|
+
| Flag | Description |
|
|
90
|
+
|------|-------------|
|
|
91
|
+
| `file.py::func` | Benchmark a specific function |
|
|
92
|
+
| `--compare` | Show diff vs last stored result |
|
|
93
|
+
| `--history` | Print run history for tracked functions |
|
|
94
|
+
| `--precise` | High-precision mode: adaptive warmup, batch timing |
|
|
95
|
+
| `--repeat N` | Run N times, keep the best result |
|
|
96
|
+
| `--install-hooks` | Install git pre-push hook |
|
|
97
|
+
| `--uninstall-hooks` | Remove git pre-push hook |
|
|
98
|
+
| `--skip-hooks` | Run benchmark without enforcing regression block |
|
|
99
|
+
| `--hook-mode` | Used internally by the git hook |
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## How timing works
|
|
104
|
+
|
|
105
|
+
Each benchmark runs in a **separate subprocess** to avoid interference from the parent process. In normal mode, each function is called `trials` times with `warmup` discarded runs first. In `--precise` mode:
|
|
106
|
+
|
|
107
|
+
1. **Adaptive warmup** — keeps running until timing variance drops below 3% (CPU caches settled)
|
|
108
|
+
2. **Batch calibration** — finds a batch size so each measurement window takes ~50ms, then divides — this eliminates OS scheduler jitter from individual timings
|
|
109
|
+
3. **3-second window** — collects ~60 batch measurements, reports the minimum (least OS interference)
|
|
110
|
+
|
|
111
|
+
Results are stored in `bench.db` alongside your project.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
|
|
117
|
+
MIT
|
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
ACENLY Bench
|
|
4
|
+
────────────
|
|
5
|
+
Track Python function performance across commits.
|
|
6
|
+
Catch regressions before they ship.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python3 bench.py # benchmark all tracked functions
|
|
10
|
+
python3 bench.py myfile.py::my_function # benchmark one specific function
|
|
11
|
+
python3 bench.py --compare # show diff vs last stored result
|
|
12
|
+
python3 bench.py --history # show benchmark history
|
|
13
|
+
python3 bench.py --install-hooks # install git pre-push hook
|
|
14
|
+
python3 bench.py --uninstall-hooks # remove git hook
|
|
15
|
+
python3 bench.py --hook-mode # used internally by git hook
|
|
16
|
+
python3 bench.py --skip-hooks # run benchmark, skip git enforcement
|
|
17
|
+
python3 bench.py --precise # high-precision mode (batch timing)
|
|
18
|
+
python3 bench.py --repeat N # repeat N times, keep best result
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
import ast
|
|
26
|
+
import time
|
|
27
|
+
import stat
|
|
28
|
+
import shutil
|
|
29
|
+
import subprocess
|
|
30
|
+
import statistics
|
|
31
|
+
import platform
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Optional
|
|
34
|
+
|
|
35
|
+
import yaml
|
|
36
|
+
|
|
37
|
+
from acenly_bench.db import (
|
|
38
|
+
init_db,
|
|
39
|
+
save_benchmark,
|
|
40
|
+
get_benchmark_last,
|
|
41
|
+
get_benchmark_history,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
init_db()
|
|
45
|
+
|
|
46
|
+
# ── paths ──────────────────────────────────────────────────────────────────
|
|
47
|
+
ROOT = Path.cwd() # user's project directory
|
|
48
|
+
HOOKS_DIR = Path(__file__).parent / "hooks" # bundled with package
|
|
49
|
+
CFG_FILE = ROOT / "acenly.yml"
|
|
50
|
+
if not CFG_FILE.exists():
|
|
51
|
+
CFG_FILE = ROOT / "acenly.example.yml"
|
|
52
|
+
|
|
53
|
+
# ── precise mode defaults ──────────────────────────────────────────────────
|
|
54
|
+
PRECISE_TRIALS = 50
|
|
55
|
+
PRECISE_WARMUP = 10
|
|
56
|
+
PRECISE_TRIM = 0.10
|
|
57
|
+
PRECISE_TIMEOUT = 30.0
|
|
58
|
+
|
|
59
|
+
# ── terminal colours ───────────────────────────────────────────────────────
|
|
60
|
+
def _c(code: str, text: str) -> str:
|
|
61
|
+
if not sys.stdout.isatty():
|
|
62
|
+
return text
|
|
63
|
+
return f"\033[{code}m{text}\033[0m"
|
|
64
|
+
|
|
65
|
+
RED = lambda t: _c("31", t)
|
|
66
|
+
GREEN = lambda t: _c("32", t)
|
|
67
|
+
YELLOW = lambda t: _c("33", t)
|
|
68
|
+
CYAN = lambda t: _c("36", t)
|
|
69
|
+
BOLD = lambda t: _c("1", t)
|
|
70
|
+
DIM = lambda t: _c("2", t)
|
|
71
|
+
WHITE = lambda t: _c("97", t)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
75
|
+
# GIT HELPERS
|
|
76
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
77
|
+
|
|
78
|
+
def _git(*args) -> str:
|
|
79
|
+
try:
|
|
80
|
+
return subprocess.check_output(
|
|
81
|
+
["git"] + list(args),
|
|
82
|
+
stderr=subprocess.DEVNULL,
|
|
83
|
+
cwd=ROOT,
|
|
84
|
+
).decode().strip()
|
|
85
|
+
except subprocess.CalledProcessError:
|
|
86
|
+
return ""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def git_commit_hash() -> str:
|
|
90
|
+
return _git("rev-parse", "HEAD") or "unknown"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def git_branch() -> str:
|
|
94
|
+
return _git("branch", "--show-current") or "unknown"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def git_changed_files() -> set[str]:
|
|
98
|
+
out = _git("diff", "HEAD~1", "--name-only")
|
|
99
|
+
return set(out.splitlines()) if out else set()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
103
|
+
# CONFIG
|
|
104
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
105
|
+
|
|
106
|
+
def load_config() -> dict:
|
|
107
|
+
if not CFG_FILE.exists():
|
|
108
|
+
return {}
|
|
109
|
+
with open(CFG_FILE) as f:
|
|
110
|
+
return yaml.safe_load(f) or {}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def bench_config(cfg: dict) -> dict:
|
|
114
|
+
return cfg.get("benchmark", {})
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def tracked_functions(cfg: dict) -> list[dict]:
|
|
118
|
+
return bench_config(cfg).get("track", [])
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def regression_warn(cfg: dict) -> float:
|
|
122
|
+
return float(bench_config(cfg).get("regression_warn", 0.10))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def regression_block(cfg: dict) -> float:
|
|
126
|
+
return float(bench_config(cfg).get("regression_block", 0.25))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def noise_floor(cfg: dict) -> float:
|
|
130
|
+
return float(bench_config(cfg).get("noise_floor", 0.05))
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def bench_trials(cfg: dict) -> int:
|
|
134
|
+
return int(bench_config(cfg).get("trials", 5))
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def bench_warmup(cfg: dict) -> int:
|
|
138
|
+
return int(bench_config(cfg).get("warmup", 2))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
142
|
+
# BENCHMARK RUNNER
|
|
143
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
144
|
+
|
|
145
|
+
_HARNESS_TEMPLATE = """\
|
|
146
|
+
import sys, time, statistics, importlib.util, copy
|
|
147
|
+
|
|
148
|
+
FILE_PATH = {file_path!r}
|
|
149
|
+
FUNC_NAME = {func_name!r}
|
|
150
|
+
USER_CALL = {user_call!r}
|
|
151
|
+
TRIALS = {trials}
|
|
152
|
+
WARMUP = {warmup}
|
|
153
|
+
TRIM_PCT = {trim_pct}
|
|
154
|
+
PRECISE = {precise}
|
|
155
|
+
|
|
156
|
+
spec = importlib.util.spec_from_file_location("_target_mod", FILE_PATH)
|
|
157
|
+
mod = importlib.util.module_from_spec(spec)
|
|
158
|
+
spec.loader.exec_module(mod)
|
|
159
|
+
func = getattr(mod, FUNC_NAME)
|
|
160
|
+
|
|
161
|
+
# ── build _call() — either user-supplied expression or synthetic inputs ───
|
|
162
|
+
if USER_CALL is not None:
|
|
163
|
+
# Evaluate in the module's namespace so helpers/imports are available
|
|
164
|
+
_ns = {{**vars(mod), '__builtins__': __builtins__}}
|
|
165
|
+
def _call():
|
|
166
|
+
eval(USER_CALL, _ns)
|
|
167
|
+
else:
|
|
168
|
+
import inspect, random, string
|
|
169
|
+
sig = inspect.signature(func)
|
|
170
|
+
params = list(sig.parameters.values())
|
|
171
|
+
|
|
172
|
+
def _make_arg(p):
|
|
173
|
+
ann = p.annotation
|
|
174
|
+
if ann == inspect.Parameter.empty:
|
|
175
|
+
n = p.name.lower()
|
|
176
|
+
if any(k in n for k in ("item", "list", "arr", "seq", "data", "elems")):
|
|
177
|
+
return [random.randint(0, 50) for _ in range(200)]
|
|
178
|
+
if any(k in n for k in ("str", "text", "s", "word")):
|
|
179
|
+
return ''.join(random.choices(string.ascii_lowercase, k=100))
|
|
180
|
+
if any(k in n for k in ("n", "count", "size", "k")):
|
|
181
|
+
return 100
|
|
182
|
+
return [random.randint(0, 50) for _ in range(200)]
|
|
183
|
+
if ann in (list, "list"):
|
|
184
|
+
return [random.randint(0, 50) for _ in range(200)]
|
|
185
|
+
if ann in (str, "str"):
|
|
186
|
+
return ''.join(random.choices(string.ascii_lowercase, k=100))
|
|
187
|
+
if ann in (int, "int"):
|
|
188
|
+
return 100
|
|
189
|
+
if ann in (dict, "dict"):
|
|
190
|
+
return {{str(i): i for i in range(100)}}
|
|
191
|
+
return [random.randint(0, 50) for _ in range(200)]
|
|
192
|
+
|
|
193
|
+
args = tuple(_make_arg(p) for p in params if p.default is inspect.Parameter.empty)
|
|
194
|
+
|
|
195
|
+
_MUTATES = False
|
|
196
|
+
if args:
|
|
197
|
+
_before = copy.deepcopy(args)
|
|
198
|
+
try:
|
|
199
|
+
func(*copy.deepcopy(args))
|
|
200
|
+
except Exception:
|
|
201
|
+
pass
|
|
202
|
+
try:
|
|
203
|
+
_MUTATES = list(args) != list(_before)
|
|
204
|
+
except Exception:
|
|
205
|
+
_MUTATES = True
|
|
206
|
+
|
|
207
|
+
def _call():
|
|
208
|
+
a = copy.deepcopy(args) if _MUTATES and args else args
|
|
209
|
+
func(*a)
|
|
210
|
+
|
|
211
|
+
# ── warmup ───────────────────────────────────────────────────────────────
|
|
212
|
+
if PRECISE:
|
|
213
|
+
_STAB_WIN, _STAB_CV = 20, 0.03
|
|
214
|
+
_recent, _wdl = [], time.perf_counter() + 2.0
|
|
215
|
+
while time.perf_counter() < _wdl:
|
|
216
|
+
t0 = time.perf_counter()
|
|
217
|
+
_call()
|
|
218
|
+
_recent.append((time.perf_counter() - t0) * 1000)
|
|
219
|
+
if len(_recent) > _STAB_WIN:
|
|
220
|
+
_recent.pop(0)
|
|
221
|
+
if len(_recent) == _STAB_WIN:
|
|
222
|
+
_m = sum(_recent) / _STAB_WIN
|
|
223
|
+
if _m > 0 and statistics.stdev(_recent) / _m < _STAB_CV:
|
|
224
|
+
break
|
|
225
|
+
else:
|
|
226
|
+
for _ in range(WARMUP):
|
|
227
|
+
try:
|
|
228
|
+
_call()
|
|
229
|
+
except Exception as _e:
|
|
230
|
+
raise RuntimeError(f"Call failed during warmup: {{_e}}")
|
|
231
|
+
|
|
232
|
+
# ── calibrate batch size (precise mode) ──────────────────────────────────
|
|
233
|
+
if PRECISE:
|
|
234
|
+
_TARGET_S = 0.050
|
|
235
|
+
_n = 1
|
|
236
|
+
while True:
|
|
237
|
+
_t0 = time.perf_counter()
|
|
238
|
+
for _ in range(_n):
|
|
239
|
+
_call()
|
|
240
|
+
_el = time.perf_counter() - _t0
|
|
241
|
+
if _el >= 0.005 or _n >= 500_000:
|
|
242
|
+
BATCH = max(1, int(_n * _TARGET_S / max(_el, 1e-9)))
|
|
243
|
+
BATCH = min(BATCH, 500_000)
|
|
244
|
+
break
|
|
245
|
+
_n = min(_n * 10, 500_000)
|
|
246
|
+
else:
|
|
247
|
+
BATCH = 1
|
|
248
|
+
|
|
249
|
+
# ── measure ───────────────────────────────────────────────────────────────
|
|
250
|
+
timings = []
|
|
251
|
+
|
|
252
|
+
if PRECISE:
|
|
253
|
+
deadline = time.perf_counter() + 3.0
|
|
254
|
+
while time.perf_counter() < deadline:
|
|
255
|
+
t0 = time.perf_counter()
|
|
256
|
+
for _ in range(BATCH):
|
|
257
|
+
_call()
|
|
258
|
+
timings.append((time.perf_counter() - t0) * 1000 / BATCH)
|
|
259
|
+
while len(timings) < 20:
|
|
260
|
+
t0 = time.perf_counter()
|
|
261
|
+
for _ in range(BATCH):
|
|
262
|
+
_call()
|
|
263
|
+
timings.append((time.perf_counter() - t0) * 1000 / BATCH)
|
|
264
|
+
else:
|
|
265
|
+
for _ in range(TRIALS):
|
|
266
|
+
t0 = time.perf_counter()
|
|
267
|
+
_call()
|
|
268
|
+
timings.append((time.perf_counter() - t0) * 1000)
|
|
269
|
+
|
|
270
|
+
# ── stats ─────────────────────────────────────────────────────────────────
|
|
271
|
+
n_raw = len(timings)
|
|
272
|
+
sorted_t = sorted(timings)
|
|
273
|
+
|
|
274
|
+
if PRECISE:
|
|
275
|
+
cut = max(0, int(n_raw * 0.05))
|
|
276
|
+
trimmed = sorted_t[:-cut] if cut else sorted_t
|
|
277
|
+
mn = trimmed[0]
|
|
278
|
+
median = statistics.median(trimmed)
|
|
279
|
+
p95_idx = max(0, int(len(trimmed) * 0.95) - 1)
|
|
280
|
+
p95 = trimmed[p95_idx]
|
|
281
|
+
mx = trimmed[-1]
|
|
282
|
+
stdev = statistics.stdev(trimmed) if len(trimmed) > 1 else 0.0
|
|
283
|
+
cv = (stdev / mn * 100) if mn > 0 else 0.0
|
|
284
|
+
report_median = mn
|
|
285
|
+
else:
|
|
286
|
+
cut = max(1, int(n_raw * TRIM_PCT))
|
|
287
|
+
trimmed = sorted_t[cut:-cut] if TRIM_PCT > 0 and n_raw > cut * 2 else sorted_t
|
|
288
|
+
mn = trimmed[0]
|
|
289
|
+
report_median = statistics.median(trimmed)
|
|
290
|
+
median = report_median
|
|
291
|
+
p95_idx = max(0, int(len(trimmed) * 0.95) - 1)
|
|
292
|
+
p95 = trimmed[p95_idx]
|
|
293
|
+
mx = trimmed[-1]
|
|
294
|
+
stdev = statistics.stdev(trimmed) if len(trimmed) > 1 else 0.0
|
|
295
|
+
cv = (stdev / median * 100) if median > 0 else 0.0
|
|
296
|
+
|
|
297
|
+
print(f"RESULT median={{report_median:.6f}} p95={{p95:.6f}} min={{mn:.6f}} max={{mx:.6f}} "
|
|
298
|
+
f"stdev={{stdev:.6f}} cv={{cv:.3f}} trials={{n_raw}} trimmed={{len(trimmed)}}")
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _run_harness_once(harness: str, timeout: float) -> Optional[dict]:
|
|
303
|
+
tmp = ROOT / "_bench_harness_tmp.py"
|
|
304
|
+
tmp.write_text(harness)
|
|
305
|
+
try:
|
|
306
|
+
result = subprocess.run(
|
|
307
|
+
[sys.executable, str(tmp)],
|
|
308
|
+
capture_output=True, text=True,
|
|
309
|
+
timeout=timeout, cwd=ROOT,
|
|
310
|
+
)
|
|
311
|
+
tmp.unlink(missing_ok=True)
|
|
312
|
+
for line in result.stdout.splitlines():
|
|
313
|
+
if line.startswith("RESULT "):
|
|
314
|
+
parts = dict(p.split("=") for p in line[7:].split())
|
|
315
|
+
return {
|
|
316
|
+
"median_ms": float(parts["median"]),
|
|
317
|
+
"p95_ms": float(parts["p95"]),
|
|
318
|
+
"min_ms": float(parts["min"]),
|
|
319
|
+
"max_ms": float(parts["max"]),
|
|
320
|
+
"stdev_ms": float(parts.get("stdev", 0)),
|
|
321
|
+
"cv_pct": float(parts.get("cv", 0)),
|
|
322
|
+
"trials": int(parts["trials"]),
|
|
323
|
+
"trimmed": int(parts.get("trimmed", int(parts["trials"]))),
|
|
324
|
+
}
|
|
325
|
+
return None
|
|
326
|
+
except subprocess.TimeoutExpired:
|
|
327
|
+
tmp.unlink(missing_ok=True)
|
|
328
|
+
return None
|
|
329
|
+
except Exception:
|
|
330
|
+
tmp.unlink(missing_ok=True)
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def run_benchmark(
|
|
335
|
+
file_path: str,
|
|
336
|
+
func_name: str,
|
|
337
|
+
trials: int = 5,
|
|
338
|
+
warmup: int = 2,
|
|
339
|
+
timeout: float = 30.0,
|
|
340
|
+
precise: bool = False,
|
|
341
|
+
repeat: int = 1,
|
|
342
|
+
user_call: Optional[str] = None,
|
|
343
|
+
) -> Optional[dict]:
|
|
344
|
+
if precise:
|
|
345
|
+
trials = PRECISE_TRIALS
|
|
346
|
+
warmup = PRECISE_WARMUP
|
|
347
|
+
timeout = PRECISE_TIMEOUT
|
|
348
|
+
trim = PRECISE_TRIM
|
|
349
|
+
else:
|
|
350
|
+
trim = 0.0
|
|
351
|
+
|
|
352
|
+
harness = _HARNESS_TEMPLATE.format(
|
|
353
|
+
file_path=str(Path(file_path).resolve()),
|
|
354
|
+
func_name=func_name,
|
|
355
|
+
user_call=user_call,
|
|
356
|
+
trials=trials,
|
|
357
|
+
warmup=warmup,
|
|
358
|
+
trim_pct=trim,
|
|
359
|
+
precise=precise,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
best = None
|
|
363
|
+
for _ in range(max(1, repeat)):
|
|
364
|
+
r = _run_harness_once(harness, timeout)
|
|
365
|
+
if r is None:
|
|
366
|
+
continue
|
|
367
|
+
if best is None or r["median_ms"] < best["median_ms"]:
|
|
368
|
+
best = r
|
|
369
|
+
|
|
370
|
+
if best is None:
|
|
371
|
+
return None
|
|
372
|
+
return {**best, "precise": precise, "repeat": repeat}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
376
|
+
# CORE LOGIC
|
|
377
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
378
|
+
|
|
379
|
+
def benchmark_function(
|
|
380
|
+
file_path: str,
|
|
381
|
+
func_name: str,
|
|
382
|
+
cfg: dict,
|
|
383
|
+
compare: bool = True,
|
|
384
|
+
hook_mode: bool = False,
|
|
385
|
+
precise: bool = False,
|
|
386
|
+
repeat: int = 1,
|
|
387
|
+
user_call: Optional[str] = None,
|
|
388
|
+
) -> dict:
|
|
389
|
+
trials = bench_trials(cfg)
|
|
390
|
+
warmup = bench_warmup(cfg)
|
|
391
|
+
|
|
392
|
+
result = run_benchmark(
|
|
393
|
+
file_path, func_name,
|
|
394
|
+
trials=trials, warmup=warmup,
|
|
395
|
+
precise=precise, repeat=repeat,
|
|
396
|
+
user_call=user_call,
|
|
397
|
+
)
|
|
398
|
+
if result is None:
|
|
399
|
+
return {"error": True, "file": file_path, "func": func_name}
|
|
400
|
+
|
|
401
|
+
commit = git_commit_hash()
|
|
402
|
+
branch = git_branch()
|
|
403
|
+
py_ver = platform.python_version()
|
|
404
|
+
|
|
405
|
+
prev = get_benchmark_last(func_name, file_path) if compare else None
|
|
406
|
+
speedup_vs_prev = None
|
|
407
|
+
if prev:
|
|
408
|
+
speedup_vs_prev = prev["median_ms"] / result["median_ms"]
|
|
409
|
+
|
|
410
|
+
save_benchmark(
|
|
411
|
+
function_name = func_name,
|
|
412
|
+
file_path = file_path,
|
|
413
|
+
commit_hash = commit,
|
|
414
|
+
branch = branch,
|
|
415
|
+
median_ms = result["median_ms"],
|
|
416
|
+
p95_ms = result["p95_ms"],
|
|
417
|
+
min_ms = result["min_ms"],
|
|
418
|
+
max_ms = result["max_ms"],
|
|
419
|
+
trials = result["trials"],
|
|
420
|
+
speedup_vs_prev = speedup_vs_prev,
|
|
421
|
+
python_version = py_ver,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
outcome = {
|
|
425
|
+
"error": False,
|
|
426
|
+
"file": file_path,
|
|
427
|
+
"func": func_name,
|
|
428
|
+
"median_ms": result["median_ms"],
|
|
429
|
+
"p95_ms": result["p95_ms"],
|
|
430
|
+
"stdev_ms": result.get("stdev_ms", 0.0),
|
|
431
|
+
"cv_pct": result.get("cv_pct", 0.0),
|
|
432
|
+
"speedup_vs_prev": speedup_vs_prev,
|
|
433
|
+
"prev_median_ms": prev["median_ms"] if prev else None,
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
nf = noise_floor(cfg)
|
|
437
|
+
if speedup_vs_prev is None:
|
|
438
|
+
outcome["status"] = "baseline"
|
|
439
|
+
elif speedup_vs_prev < (1.0 - nf - regression_block(cfg)):
|
|
440
|
+
outcome["status"] = "regression_block"
|
|
441
|
+
elif speedup_vs_prev < (1.0 - nf - regression_warn(cfg)):
|
|
442
|
+
outcome["status"] = "regression_warn"
|
|
443
|
+
elif speedup_vs_prev > (1.0 + nf):
|
|
444
|
+
outcome["status"] = "improvement"
|
|
445
|
+
else:
|
|
446
|
+
outcome["status"] = "ok"
|
|
447
|
+
|
|
448
|
+
return outcome
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
452
|
+
# OUTPUT FORMATTING
|
|
453
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
454
|
+
|
|
455
|
+
def _fmt_ms(ms: float) -> str:
|
|
456
|
+
if ms < 1:
|
|
457
|
+
return f"{ms*1000:.1f} µs"
|
|
458
|
+
if ms < 1000:
|
|
459
|
+
return f"{ms:.3f} ms"
|
|
460
|
+
return f"{ms/1000:.2f} s"
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _fmt_change(speedup: Optional[float]) -> str:
|
|
464
|
+
if speedup is None:
|
|
465
|
+
return DIM("─ baseline")
|
|
466
|
+
pct = (speedup - 1.0) * 100
|
|
467
|
+
if speedup < 0.75:
|
|
468
|
+
return RED(f"▲ {abs(pct):.1f}% SLOWER")
|
|
469
|
+
if speedup < 0.90:
|
|
470
|
+
return YELLOW(f"▲ {abs(pct):.1f}% slower")
|
|
471
|
+
if speedup > 1.10:
|
|
472
|
+
return GREEN(f"▼ {pct:.1f}% faster")
|
|
473
|
+
return DIM(f"~ {pct:+.1f}%")
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def print_table(outcomes: list[dict], commit: str, branch: str, precise: bool = False):
|
|
477
|
+
print()
|
|
478
|
+
mode_tag = DIM(" [precise mode]") if precise else ""
|
|
479
|
+
print(BOLD(f" ACENLY Bench") + DIM(f" · {branch}@{commit[:8]}") + mode_tag)
|
|
480
|
+
print()
|
|
481
|
+
|
|
482
|
+
col1 = max(len(o.get("func", "?")) for o in outcomes) + 2
|
|
483
|
+
if precise:
|
|
484
|
+
header = (
|
|
485
|
+
f" {'Function':<{col1}} {'Median':>10} {'P95':>10} "
|
|
486
|
+
f"{'StdDev':>10} {'CV%':>6} {'vs last'}"
|
|
487
|
+
)
|
|
488
|
+
else:
|
|
489
|
+
header = f" {'Function':<{col1}} {'Median':>10} {'P95':>10} {'vs last commit'}"
|
|
490
|
+
print(DIM(header))
|
|
491
|
+
print(DIM(" " + "─" * (len(header) - 2)))
|
|
492
|
+
|
|
493
|
+
any_block = False
|
|
494
|
+
for o in outcomes:
|
|
495
|
+
if o.get("error"):
|
|
496
|
+
print(f" {RED(o['func']):<{col1}} {'ERROR':>10}")
|
|
497
|
+
continue
|
|
498
|
+
|
|
499
|
+
fn = o["func"]
|
|
500
|
+
median = _fmt_ms(o["median_ms"])
|
|
501
|
+
p95 = _fmt_ms(o["p95_ms"])
|
|
502
|
+
change = _fmt_change(o.get("speedup_vs_prev"))
|
|
503
|
+
status = o.get("status", "ok")
|
|
504
|
+
|
|
505
|
+
fn_col = (
|
|
506
|
+
RED(fn) if "block" in status else
|
|
507
|
+
YELLOW(fn) if "warn" in status else
|
|
508
|
+
WHITE(fn)
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
if precise:
|
|
512
|
+
stdev = _fmt_ms(o.get("stdev_ms", 0))
|
|
513
|
+
cv = f"{o.get('cv_pct', 0):.2f}%"
|
|
514
|
+
cv_pct = o.get("cv_pct", 0)
|
|
515
|
+
cv_col = GREEN(cv) if cv_pct < 5 else (YELLOW(cv) if cv_pct < 15 else RED(cv))
|
|
516
|
+
print(f" {fn_col:<{col1}} {median:>10} {p95:>10} {stdev:>10} {cv_col:>6} {change}")
|
|
517
|
+
else:
|
|
518
|
+
print(f" {fn_col:<{col1}} {median:>10} {p95:>10} {change}")
|
|
519
|
+
|
|
520
|
+
if "block" in status:
|
|
521
|
+
any_block = True
|
|
522
|
+
|
|
523
|
+
print()
|
|
524
|
+
return any_block
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
528
|
+
# HISTORY VIEW
|
|
529
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
530
|
+
|
|
531
|
+
def print_history(cfg: dict):
|
|
532
|
+
fns = tracked_functions(cfg)
|
|
533
|
+
if not fns:
|
|
534
|
+
print(YELLOW(" No tracked functions in acenly.yml"))
|
|
535
|
+
return
|
|
536
|
+
|
|
537
|
+
for entry in fns:
|
|
538
|
+
fp = entry.get("file", "")
|
|
539
|
+
fn = entry.get("function", "")
|
|
540
|
+
rows = get_benchmark_history(fn, fp, limit=10)
|
|
541
|
+
if not rows:
|
|
542
|
+
print(DIM(f"\n {fn} — no history yet"))
|
|
543
|
+
continue
|
|
544
|
+
|
|
545
|
+
print(f"\n {BOLD(fn)} {DIM(fp)}")
|
|
546
|
+
for r in rows:
|
|
547
|
+
ch = _fmt_change(r.get("speedup_vs_prev"))
|
|
548
|
+
ts = r["created_at"][:16].replace("T", " ")
|
|
549
|
+
print(f" {DIM(ts)} {_fmt_ms(r['median_ms']):>10} {ch} {DIM(r['commit_hash'][:8])}")
|
|
550
|
+
print()
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
554
|
+
# GIT HOOKS
|
|
555
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
556
|
+
|
|
557
|
+
def install_hooks():
|
|
558
|
+
git_dir = ROOT / ".git"
|
|
559
|
+
if not git_dir.exists():
|
|
560
|
+
print(RED(" Not a git repository."))
|
|
561
|
+
return
|
|
562
|
+
|
|
563
|
+
hook_src = HOOKS_DIR / "pre-push.sh"
|
|
564
|
+
hook_dst = git_dir / "hooks" / "pre-push"
|
|
565
|
+
|
|
566
|
+
if not hook_src.exists():
|
|
567
|
+
print(RED(f" Hook script not found: {hook_src}"))
|
|
568
|
+
return
|
|
569
|
+
|
|
570
|
+
shutil.copy(hook_src, hook_dst)
|
|
571
|
+
hook_dst.chmod(hook_dst.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)
|
|
572
|
+
|
|
573
|
+
print(GREEN(" ✓ pre-push hook installed"))
|
|
574
|
+
print(DIM(" Benchmarks will run automatically before every push."))
|
|
575
|
+
print(DIM(" Use --skip-hooks to bypass when needed."))
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def uninstall_hooks():
|
|
579
|
+
hook_dst = ROOT / ".git" / "hooks" / "pre-push"
|
|
580
|
+
if hook_dst.exists():
|
|
581
|
+
hook_dst.unlink()
|
|
582
|
+
print(GREEN(" ✓ pre-push hook removed"))
|
|
583
|
+
else:
|
|
584
|
+
print(DIM(" No hook installed."))
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
588
|
+
# MAIN
|
|
589
|
+
# ══════════════════════════════════════════════════════════════════════════
|
|
590
|
+
|
|
591
|
+
def main():
|
|
592
|
+
args = sys.argv[1:]
|
|
593
|
+
cfg = load_config()
|
|
594
|
+
|
|
595
|
+
hook_mode = "--hook-mode" in args
|
|
596
|
+
compare = "--compare" in args or hook_mode
|
|
597
|
+
history = "--history" in args
|
|
598
|
+
skip_hooks = "--skip-hooks" in args
|
|
599
|
+
do_install = "--install-hooks" in args
|
|
600
|
+
do_uninstall = "--uninstall-hooks" in args
|
|
601
|
+
precise = "--precise" in args
|
|
602
|
+
|
|
603
|
+
repeat = 1
|
|
604
|
+
if "--repeat" in args:
|
|
605
|
+
idx = args.index("--repeat")
|
|
606
|
+
try:
|
|
607
|
+
repeat = int(args[idx + 1])
|
|
608
|
+
except (IndexError, ValueError):
|
|
609
|
+
repeat = 5
|
|
610
|
+
elif precise:
|
|
611
|
+
repeat = 5
|
|
612
|
+
|
|
613
|
+
user_call = None
|
|
614
|
+
if "--call" in args:
|
|
615
|
+
idx = args.index("--call")
|
|
616
|
+
try:
|
|
617
|
+
user_call = args[idx + 1]
|
|
618
|
+
except IndexError:
|
|
619
|
+
print(RED(" --call requires an expression, e.g. --call \"my_func(data, 100)\""))
|
|
620
|
+
sys.exit(1)
|
|
621
|
+
|
|
622
|
+
_skip_next = False
|
|
623
|
+
targets_arg = []
|
|
624
|
+
for a in args:
|
|
625
|
+
if _skip_next:
|
|
626
|
+
_skip_next = False
|
|
627
|
+
continue
|
|
628
|
+
if a in ("--repeat", "--call"):
|
|
629
|
+
_skip_next = True
|
|
630
|
+
continue
|
|
631
|
+
if not a.startswith("--"):
|
|
632
|
+
targets_arg.append(a)
|
|
633
|
+
|
|
634
|
+
if do_install:
|
|
635
|
+
install_hooks(); return
|
|
636
|
+
|
|
637
|
+
if do_uninstall:
|
|
638
|
+
uninstall_hooks(); return
|
|
639
|
+
|
|
640
|
+
if history:
|
|
641
|
+
print_history(cfg); return
|
|
642
|
+
|
|
643
|
+
if targets_arg:
|
|
644
|
+
targets = []
|
|
645
|
+
for t in targets_arg:
|
|
646
|
+
if "::" in t:
|
|
647
|
+
fp, fn = t.split("::", 1)
|
|
648
|
+
else:
|
|
649
|
+
print(RED(f" Invalid target format '{t}'. Use file.py::func_name"))
|
|
650
|
+
sys.exit(1)
|
|
651
|
+
targets.append({"file": fp, "function": fn})
|
|
652
|
+
else:
|
|
653
|
+
targets = tracked_functions(cfg)
|
|
654
|
+
if not targets:
|
|
655
|
+
print(YELLOW(" No tracked functions found in acenly.yml"))
|
|
656
|
+
print(DIM(" Add functions under benchmark.track or run:"))
|
|
657
|
+
print(DIM(" python3 bench.py myfile.py::my_function"))
|
|
658
|
+
sys.exit(0)
|
|
659
|
+
|
|
660
|
+
if hook_mode and not skip_hooks:
|
|
661
|
+
changed = git_changed_files()
|
|
662
|
+
if changed:
|
|
663
|
+
targets = [t for t in targets if t.get("file", "") in changed]
|
|
664
|
+
if not targets:
|
|
665
|
+
sys.exit(0)
|
|
666
|
+
|
|
667
|
+
commit = git_commit_hash()
|
|
668
|
+
branch = git_branch()
|
|
669
|
+
|
|
670
|
+
if not hook_mode:
|
|
671
|
+
print()
|
|
672
|
+
print(BOLD(" ACENLY Bench"))
|
|
673
|
+
print(DIM(f" {branch}@{commit[:8]} · Python {platform.python_version()}"))
|
|
674
|
+
if precise:
|
|
675
|
+
print(YELLOW(f" ⚡ Precise mode (repeat={repeat} · 3 s window per run)"))
|
|
676
|
+
print()
|
|
677
|
+
|
|
678
|
+
outcomes = []
|
|
679
|
+
for entry in targets:
|
|
680
|
+
fp = entry.get("file", "")
|
|
681
|
+
fn = entry.get("function", "")
|
|
682
|
+
if not hook_mode:
|
|
683
|
+
hint = " (precise)" if precise else ""
|
|
684
|
+
print(DIM(f" Benchmarking {fn}{hint}..."), end="", flush=True)
|
|
685
|
+
|
|
686
|
+
outcome = benchmark_function(
|
|
687
|
+
fp, fn, cfg,
|
|
688
|
+
compare=compare, hook_mode=hook_mode,
|
|
689
|
+
precise=precise, repeat=repeat,
|
|
690
|
+
user_call=user_call,
|
|
691
|
+
)
|
|
692
|
+
outcomes.append(outcome)
|
|
693
|
+
|
|
694
|
+
if not hook_mode:
|
|
695
|
+
status = outcome.get("status", "error")
|
|
696
|
+
icon = "✓" if status in ("ok", "baseline", "improvement") else "⚠" if "warn" in status else "✗"
|
|
697
|
+
col = GREEN if icon == "✓" else YELLOW if icon == "⚠" else RED
|
|
698
|
+
print(f"\r {col(icon)} {fn}")
|
|
699
|
+
|
|
700
|
+
any_block = print_table(outcomes, commit, branch, precise=precise)
|
|
701
|
+
|
|
702
|
+
# ── optimizer waitlist (shown once after first successful run) ─────────
|
|
703
|
+
if not hook_mode:
|
|
704
|
+
_flag = ROOT / ".acenly_waitlist_seen"
|
|
705
|
+
if not _flag.exists():
|
|
706
|
+
print()
|
|
707
|
+
print(DIM(" ─────────────────────────────────────────────────────"))
|
|
708
|
+
print(YELLOW(" Found something slow? ACENLY can rewrite it for you."))
|
|
709
|
+
print()
|
|
710
|
+
print(f" The Optimizer rewrites functions algorithmically —")
|
|
711
|
+
print(f" O(n²) → O(n), correctness-verified, pure Python output.")
|
|
712
|
+
print()
|
|
713
|
+
print(BOLD(" → acenly.com/optimize") + DIM(" (join the waitlist)"))
|
|
714
|
+
print(DIM(" ─────────────────────────────────────────────────────"))
|
|
715
|
+
print()
|
|
716
|
+
_flag.touch()
|
|
717
|
+
|
|
718
|
+
if hook_mode and not skip_hooks and any_block:
|
|
719
|
+
sys.exit(1)
|
|
720
|
+
|
|
721
|
+
sys.exit(0)
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
if __name__ == "__main__":
|
|
725
|
+
main()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""ACENLY Bench — SQLite storage for benchmark results."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
# Always stored in the user's project directory (cwd), not the package dir
|
|
10
|
+
DB_PATH = Path.cwd() / "bench.db"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _db():
|
|
14
|
+
return sqlite3.connect(DB_PATH)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def init_db() -> None:
|
|
18
|
+
with _db() as conn:
|
|
19
|
+
conn.executescript("""
|
|
20
|
+
CREATE TABLE IF NOT EXISTS benchmarks (
|
|
21
|
+
id TEXT PRIMARY KEY,
|
|
22
|
+
created_at TEXT NOT NULL,
|
|
23
|
+
commit_hash TEXT NOT NULL,
|
|
24
|
+
branch TEXT,
|
|
25
|
+
function_name TEXT NOT NULL,
|
|
26
|
+
file_path TEXT NOT NULL,
|
|
27
|
+
median_ms REAL NOT NULL,
|
|
28
|
+
p95_ms REAL,
|
|
29
|
+
min_ms REAL,
|
|
30
|
+
max_ms REAL,
|
|
31
|
+
trials INTEGER,
|
|
32
|
+
speedup_vs_prev REAL,
|
|
33
|
+
python_version TEXT
|
|
34
|
+
);
|
|
35
|
+
""")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def save_benchmark(
|
|
39
|
+
*,
|
|
40
|
+
function_name: str,
|
|
41
|
+
file_path: str,
|
|
42
|
+
commit_hash: str,
|
|
43
|
+
branch: str,
|
|
44
|
+
median_ms: float,
|
|
45
|
+
p95_ms: Optional[float] = None,
|
|
46
|
+
min_ms: Optional[float] = None,
|
|
47
|
+
max_ms: Optional[float] = None,
|
|
48
|
+
trials: Optional[int] = None,
|
|
49
|
+
speedup_vs_prev: Optional[float] = None,
|
|
50
|
+
python_version: Optional[str] = None,
|
|
51
|
+
) -> None:
|
|
52
|
+
with _db() as conn:
|
|
53
|
+
conn.execute(
|
|
54
|
+
"""INSERT INTO benchmarks
|
|
55
|
+
(id, created_at, commit_hash, branch, function_name, file_path,
|
|
56
|
+
median_ms, p95_ms, min_ms, max_ms, trials, speedup_vs_prev, python_version)
|
|
57
|
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)""",
|
|
58
|
+
(
|
|
59
|
+
str(uuid.uuid4()),
|
|
60
|
+
datetime.now(timezone.utc).isoformat(),
|
|
61
|
+
commit_hash, branch, function_name, file_path,
|
|
62
|
+
median_ms, p95_ms, min_ms, max_ms, trials,
|
|
63
|
+
speedup_vs_prev, python_version,
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_benchmark_last(function_name: str, file_path: str) -> Optional[dict]:
|
|
69
|
+
with _db() as conn:
|
|
70
|
+
conn.row_factory = sqlite3.Row
|
|
71
|
+
row = conn.execute(
|
|
72
|
+
"""SELECT * FROM benchmarks
|
|
73
|
+
WHERE function_name = ? AND file_path = ?
|
|
74
|
+
ORDER BY created_at DESC LIMIT 1""",
|
|
75
|
+
(function_name, file_path),
|
|
76
|
+
).fetchone()
|
|
77
|
+
return dict(row) if row else None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_benchmark_history(function_name: str, file_path: str, limit: int = 20) -> list[dict]:
|
|
81
|
+
with _db() as conn:
|
|
82
|
+
conn.row_factory = sqlite3.Row
|
|
83
|
+
rows = conn.execute(
|
|
84
|
+
"""SELECT * FROM benchmarks
|
|
85
|
+
WHERE function_name = ? AND file_path = ?
|
|
86
|
+
ORDER BY created_at DESC LIMIT ?""",
|
|
87
|
+
(function_name, file_path, limit),
|
|
88
|
+
).fetchall()
|
|
89
|
+
return [dict(r) for r in rows]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acenly-bench
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Track Python function performance across commits. Catch regressions before they ship.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://acenly.com
|
|
7
|
+
Project-URL: Repository, https://github.com/YannWeiler/acenly_bench
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: pyyaml
|
|
11
|
+
|
|
12
|
+
# ACENLY Bench
|
|
13
|
+
|
|
14
|
+
Track Python function performance across commits. Catch regressions before they ship.
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
ACENLY Bench · main@a3f9c1b2 · Python 3.12.3
|
|
18
|
+
|
|
19
|
+
Function Median P95 vs last commit
|
|
20
|
+
──────────────────────────────────────────────────────────────
|
|
21
|
+
deduplicate_users 1.24 ms 1.51 ms ▼ 38.1% faster
|
|
22
|
+
build_index 84.21 ms 97.13 ms ▲ 12.4% slower ⚠
|
|
23
|
+
filter_records 0.88 ms 0.92 ms ~ +0.2%
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## What it does
|
|
29
|
+
|
|
30
|
+
- **Precise timing** — runs each function in an isolated subprocess with warmup, batch timing, and outlier trimming
|
|
31
|
+
- **Regression detection** — compares against the last stored result and flags slowdowns
|
|
32
|
+
- **Git hook integration** — blocks pushes automatically when a regression exceeds your threshold
|
|
33
|
+
- **History tracking** — stores all runs in a local SQLite database so you can see trends over time
|
|
34
|
+
|
|
35
|
+
No external services. No accounts. Runs entirely on your machine.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install pyyaml
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Then copy `bench.py` and `bench_db.py` into your project root.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Quick start
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Benchmark a specific function right now
|
|
53
|
+
python3 bench.py myfile.py::my_function
|
|
54
|
+
|
|
55
|
+
# Show history for tracked functions
|
|
56
|
+
python3 bench.py --history
|
|
57
|
+
|
|
58
|
+
# High-precision mode (batch timing, 3s measurement window)
|
|
59
|
+
python3 bench.py --precise
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Track functions automatically
|
|
65
|
+
|
|
66
|
+
Create `acenly.yml` in your project root (see `acenly.example.yml`):
|
|
67
|
+
|
|
68
|
+
```yaml
|
|
69
|
+
benchmark:
|
|
70
|
+
track:
|
|
71
|
+
- file: mymodule/utils.py
|
|
72
|
+
function: process_batch
|
|
73
|
+
- file: mymodule/search.py
|
|
74
|
+
function: find_duplicates
|
|
75
|
+
|
|
76
|
+
regression_warn: 0.10 # warn if 10% slower
|
|
77
|
+
regression_block: 0.25 # block push if 25% slower
|
|
78
|
+
noise_floor: 0.05 # ignore changes smaller than 5%
|
|
79
|
+
trials: 5
|
|
80
|
+
warmup: 2
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Then install the git hook:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
python3 bench.py --install-hooks
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
From now on, every `git push` runs the benchmark automatically. If a function regresses past the block threshold, the push is stopped.
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
python3 bench.py --skip-hooks # bypass when needed
|
|
93
|
+
python3 bench.py --uninstall-hooks
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Options
|
|
99
|
+
|
|
100
|
+
| Flag | Description |
|
|
101
|
+
|------|-------------|
|
|
102
|
+
| `file.py::func` | Benchmark a specific function |
|
|
103
|
+
| `--compare` | Show diff vs last stored result |
|
|
104
|
+
| `--history` | Print run history for tracked functions |
|
|
105
|
+
| `--precise` | High-precision mode: adaptive warmup, batch timing |
|
|
106
|
+
| `--repeat N` | Run N times, keep the best result |
|
|
107
|
+
| `--install-hooks` | Install git pre-push hook |
|
|
108
|
+
| `--uninstall-hooks` | Remove git pre-push hook |
|
|
109
|
+
| `--skip-hooks` | Run benchmark without enforcing regression block |
|
|
110
|
+
| `--hook-mode` | Used internally by the git hook |
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## How timing works
|
|
115
|
+
|
|
116
|
+
Each benchmark runs in a **separate subprocess** to avoid interference from the parent process. In normal mode, each function is called `trials` times with `warmup` discarded runs first. In `--precise` mode:
|
|
117
|
+
|
|
118
|
+
1. **Adaptive warmup** — keeps running until timing variance drops below 3% (CPU caches settled)
|
|
119
|
+
2. **Batch calibration** — finds a batch size so each measurement window takes ~50ms, then divides — this eliminates OS scheduler jitter from individual timings
|
|
120
|
+
3. **3-second window** — collects ~60 batch measurements, reports the minimum (least OS interference)
|
|
121
|
+
|
|
122
|
+
Results are stored in `bench.db` alongside your project.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
acenly_bench/__init__.py
|
|
4
|
+
acenly_bench/__main__.py
|
|
5
|
+
acenly_bench/bench.py
|
|
6
|
+
acenly_bench/db.py
|
|
7
|
+
acenly_bench.egg-info/PKG-INFO
|
|
8
|
+
acenly_bench.egg-info/SOURCES.txt
|
|
9
|
+
acenly_bench.egg-info/dependency_links.txt
|
|
10
|
+
acenly_bench.egg-info/entry_points.txt
|
|
11
|
+
acenly_bench.egg-info/requires.txt
|
|
12
|
+
acenly_bench.egg-info/top_level.txt
|
|
13
|
+
acenly_bench/hooks/pre-push.sh
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pyyaml
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
acenly_bench
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "acenly-bench"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Track Python function performance across commits. Catch regressions before they ship."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
dependencies = ["pyyaml"]
|
|
13
|
+
|
|
14
|
+
[project.urls]
|
|
15
|
+
Homepage = "https://acenly.com"
|
|
16
|
+
Repository = "https://github.com/YannWeiler/acenly_bench"
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
acenly-bench = "acenly_bench.bench:main"
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
include = ["acenly_bench*"]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.package-data]
|
|
25
|
+
acenly_bench = ["hooks/*"]
|