macsmart 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- macsmart/__init__.py +3 -0
- macsmart/benchmark/__init__.py +1 -0
- macsmart/benchmark/energy.py +140 -0
- macsmart/benchmark/energy_compare.py +182 -0
- macsmart/benchmark/report.py +253 -0
- macsmart/benchmark/runner.py +148 -0
- macsmart/cli.py +569 -0
- macsmart/dashboard/__init__.py +1 -0
- macsmart/dashboard/server.py +159 -0
- macsmart/dashboard/static/dashboard.js +174 -0
- macsmart/dashboard/static/index.html +60 -0
- macsmart/dashboard/static/style.css +138 -0
- macsmart/data/__init__.py +0 -0
- macsmart/data/registry.yaml +216 -0
- macsmart/manager/__init__.py +1 -0
- macsmart/manager/download.py +224 -0
- macsmart/manager/runtime.py +101 -0
- macsmart/manager/session.py +201 -0
- macsmart/manager/swapper.py +153 -0
- macsmart/manager/watchdog.py +194 -0
- macsmart/profiler/__init__.py +1 -0
- macsmart/profiler/battery.py +65 -0
- macsmart/profiler/memory.py +99 -0
- macsmart/profiler/system.py +226 -0
- macsmart/profiler/thermal.py +103 -0
- macsmart/recommender/__init__.py +1 -0
- macsmart/recommender/engine.py +114 -0
- macsmart/recommender/models_db.py +94 -0
- macsmart/recommender/task_router.py +100 -0
- macsmart/ui/__init__.py +1 -0
- macsmart/ui/terminal.py +675 -0
- macsmart-0.1.0.dist-info/METADATA +153 -0
- macsmart-0.1.0.dist-info/RECORD +37 -0
- macsmart-0.1.0.dist-info/WHEEL +5 -0
- macsmart-0.1.0.dist-info/entry_points.txt +2 -0
- macsmart-0.1.0.dist-info/licenses/LICENSE +21 -0
- macsmart-0.1.0.dist-info/top_level.txt +1 -0
macsmart/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Benchmark runner for measuring LLM inference performance."""
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Energy measurement using powermetrics wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import platform
|
|
6
|
+
import re
|
|
7
|
+
import subprocess
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class EnergyMeasurement:
|
|
13
|
+
"""Energy consumption during a benchmark run."""
|
|
14
|
+
|
|
15
|
+
cpu_power_watts: float | None
|
|
16
|
+
gpu_power_watts: float | None
|
|
17
|
+
ane_power_watts: float | None # Apple Neural Engine
|
|
18
|
+
total_power_watts: float | None
|
|
19
|
+
duration_sec: float
|
|
20
|
+
total_energy_joules: float | None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _parse_powermetrics_output(output: str) -> dict[str, float]:
|
|
24
|
+
"""Parse power values from powermetrics output.
|
|
25
|
+
|
|
26
|
+
Looks for lines like:
|
|
27
|
+
CPU Power: 1234 mW
|
|
28
|
+
GPU Power: 567 mW
|
|
29
|
+
ANE Power: 89 mW
|
|
30
|
+
Combined Power (CPU + GPU + ANE): 1890 mW
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
output: Raw stdout from powermetrics.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Dict mapping component names to power in watts.
|
|
37
|
+
"""
|
|
38
|
+
powers: dict[str, float] = {}
|
|
39
|
+
for line in output.splitlines():
|
|
40
|
+
# Match lines like "CPU Power: 1234 mW"
|
|
41
|
+
match = re.match(r"\s*([\w\s\+\(\)]+?)\s*Power[^:]*:\s*([\d.]+)\s*mW", line, re.IGNORECASE)
|
|
42
|
+
if match:
|
|
43
|
+
name = match.group(1).strip().lower()
|
|
44
|
+
mw = float(match.group(2))
|
|
45
|
+
powers[name] = mw / 1000.0 # Convert mW to W
|
|
46
|
+
return powers
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def measure_energy(duration_sec: float = 10.0) -> EnergyMeasurement:
|
|
50
|
+
"""Measure energy consumption using powermetrics.
|
|
51
|
+
|
|
52
|
+
Requires sudo access for powermetrics. Samples for the given
|
|
53
|
+
duration and returns averaged power readings.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
duration_sec: How long to measure in seconds.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
EnergyMeasurement with power data. Fields are None if
|
|
60
|
+
powermetrics is unavailable or the data cannot be parsed.
|
|
61
|
+
"""
|
|
62
|
+
if not is_powermetrics_available():
|
|
63
|
+
return EnergyMeasurement(
|
|
64
|
+
cpu_power_watts=None,
|
|
65
|
+
gpu_power_watts=None,
|
|
66
|
+
ane_power_watts=None,
|
|
67
|
+
total_power_watts=None,
|
|
68
|
+
duration_sec=duration_sec,
|
|
69
|
+
total_energy_joules=None,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
interval_ms = int(duration_sec * 1000)
|
|
73
|
+
try:
|
|
74
|
+
result = subprocess.run(
|
|
75
|
+
[
|
|
76
|
+
"sudo", "-n", "powermetrics",
|
|
77
|
+
"--samplers", "cpu_power",
|
|
78
|
+
"-i", str(interval_ms),
|
|
79
|
+
"-n", "1",
|
|
80
|
+
],
|
|
81
|
+
capture_output=True,
|
|
82
|
+
text=True,
|
|
83
|
+
timeout=duration_sec + 10,
|
|
84
|
+
)
|
|
85
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
86
|
+
return EnergyMeasurement(
|
|
87
|
+
cpu_power_watts=None,
|
|
88
|
+
gpu_power_watts=None,
|
|
89
|
+
ane_power_watts=None,
|
|
90
|
+
total_power_watts=None,
|
|
91
|
+
duration_sec=duration_sec,
|
|
92
|
+
total_energy_joules=None,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
powers = _parse_powermetrics_output(result.stdout)
|
|
96
|
+
|
|
97
|
+
cpu_w = powers.get("cpu")
|
|
98
|
+
gpu_w = powers.get("gpu")
|
|
99
|
+
ane_w = powers.get("ane")
|
|
100
|
+
|
|
101
|
+
# Look for combined/total power
|
|
102
|
+
total_w: float | None = None
|
|
103
|
+
for key, val in powers.items():
|
|
104
|
+
if "combined" in key or "total" in key:
|
|
105
|
+
total_w = val
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
if total_w is None and any(v is not None for v in [cpu_w, gpu_w, ane_w]):
|
|
109
|
+
total_w = sum(v for v in [cpu_w, gpu_w, ane_w] if v is not None)
|
|
110
|
+
|
|
111
|
+
energy_j = total_w * duration_sec if total_w is not None else None
|
|
112
|
+
|
|
113
|
+
return EnergyMeasurement(
|
|
114
|
+
cpu_power_watts=cpu_w,
|
|
115
|
+
gpu_power_watts=gpu_w,
|
|
116
|
+
ane_power_watts=ane_w,
|
|
117
|
+
total_power_watts=total_w,
|
|
118
|
+
duration_sec=duration_sec,
|
|
119
|
+
total_energy_joules=round(energy_j, 2) if energy_j is not None else None,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def is_powermetrics_available() -> bool:
|
|
124
|
+
"""Check if powermetrics is available and we have passwordless sudo.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
True if energy measurement is possible.
|
|
128
|
+
"""
|
|
129
|
+
if platform.system() != "Darwin":
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
result = subprocess.run(
|
|
134
|
+
["sudo", "-n", "powermetrics", "--samplers", "cpu_power", "-i", "1", "-n", "1"],
|
|
135
|
+
capture_output=True,
|
|
136
|
+
timeout=5,
|
|
137
|
+
)
|
|
138
|
+
return result.returncode == 0
|
|
139
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
140
|
+
return False
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Energy benchmark comparison between battery and AC power."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import asdict, dataclass
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from macsmart.benchmark.energy import EnergyMeasurement, measure_energy
|
|
11
|
+
from macsmart.benchmark.runner import BenchmarkResult, run_benchmark
|
|
12
|
+
|
|
13
|
+
_DEFAULT_RESULTS_DIR = Path(__file__).resolve().parent.parent.parent / "benchmarks"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class EnergyBenchmarkResult:
|
|
18
|
+
"""Benchmark result combined with energy measurement and power source."""
|
|
19
|
+
|
|
20
|
+
benchmark: BenchmarkResult
|
|
21
|
+
energy: EnergyMeasurement
|
|
22
|
+
power_source: str # "battery" or "ac"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class EnergyComparison:
|
|
27
|
+
"""Side-by-side comparison of battery vs AC energy benchmarks."""
|
|
28
|
+
|
|
29
|
+
battery: EnergyBenchmarkResult
|
|
30
|
+
ac: EnergyBenchmarkResult
|
|
31
|
+
speed_ratio: float | None # ac.tokens_per_sec / battery.tokens_per_sec
|
|
32
|
+
ttft_delta_ms: float # ac.ttft - battery.ttft
|
|
33
|
+
energy_ratio: float | None # ac.total_energy / battery.total_energy
|
|
34
|
+
efficiency_battery: float | None # tokens / joule on battery
|
|
35
|
+
efficiency_ac: float | None # tokens / joule on AC
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def run_energy_benchmark(
|
|
39
|
+
model_repo: str,
|
|
40
|
+
power_source: str,
|
|
41
|
+
prompt: str | None = None,
|
|
42
|
+
max_tokens: int = 256,
|
|
43
|
+
energy_duration_sec: float = 10.0,
|
|
44
|
+
) -> EnergyBenchmarkResult:
|
|
45
|
+
"""Run a benchmark with energy measurement.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
model_repo: HuggingFace repo ID.
|
|
49
|
+
power_source: "battery" or "ac".
|
|
50
|
+
prompt: Custom prompt.
|
|
51
|
+
max_tokens: Maximum tokens to generate.
|
|
52
|
+
energy_duration_sec: Duration for energy sampling.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Combined benchmark + energy result.
|
|
56
|
+
"""
|
|
57
|
+
benchmark = run_benchmark(model_repo, prompt=prompt, max_tokens=max_tokens)
|
|
58
|
+
energy = measure_energy(duration_sec=energy_duration_sec)
|
|
59
|
+
|
|
60
|
+
return EnergyBenchmarkResult(
|
|
61
|
+
benchmark=benchmark,
|
|
62
|
+
energy=energy,
|
|
63
|
+
power_source=power_source,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def compare_energy(
|
|
68
|
+
battery: EnergyBenchmarkResult,
|
|
69
|
+
ac: EnergyBenchmarkResult,
|
|
70
|
+
) -> EnergyComparison:
|
|
71
|
+
"""Compare battery vs AC energy benchmark results.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
battery: Benchmark result on battery power.
|
|
75
|
+
ac: Benchmark result on AC power.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
EnergyComparison with computed ratios and deltas.
|
|
79
|
+
"""
|
|
80
|
+
batt_tps = battery.benchmark.tokens_per_sec
|
|
81
|
+
ac_tps = ac.benchmark.tokens_per_sec
|
|
82
|
+
|
|
83
|
+
speed_ratio = round(ac_tps / batt_tps, 3) if batt_tps > 0 else None
|
|
84
|
+
ttft_delta = round(ac.benchmark.ttft_ms - battery.benchmark.ttft_ms, 2)
|
|
85
|
+
|
|
86
|
+
batt_energy = battery.energy.total_energy_joules
|
|
87
|
+
ac_energy = ac.energy.total_energy_joules
|
|
88
|
+
|
|
89
|
+
energy_ratio: float | None = None
|
|
90
|
+
if batt_energy and batt_energy > 0 and ac_energy is not None:
|
|
91
|
+
energy_ratio = round(ac_energy / batt_energy, 3)
|
|
92
|
+
|
|
93
|
+
eff_battery: float | None = None
|
|
94
|
+
if batt_energy and batt_energy > 0:
|
|
95
|
+
eff_battery = round(battery.benchmark.generation_tokens / batt_energy, 3)
|
|
96
|
+
|
|
97
|
+
eff_ac: float | None = None
|
|
98
|
+
if ac_energy and ac_energy > 0:
|
|
99
|
+
eff_ac = round(ac.benchmark.generation_tokens / ac_energy, 3)
|
|
100
|
+
|
|
101
|
+
return EnergyComparison(
|
|
102
|
+
battery=battery,
|
|
103
|
+
ac=ac,
|
|
104
|
+
speed_ratio=speed_ratio,
|
|
105
|
+
ttft_delta_ms=ttft_delta,
|
|
106
|
+
energy_ratio=energy_ratio,
|
|
107
|
+
efficiency_battery=eff_battery,
|
|
108
|
+
efficiency_ac=eff_ac,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def save_energy_result(
|
|
113
|
+
result: EnergyBenchmarkResult,
|
|
114
|
+
output_dir: Path | None = None,
|
|
115
|
+
) -> Path:
|
|
116
|
+
"""Save an energy benchmark result to JSON.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
result: The energy benchmark result.
|
|
120
|
+
output_dir: Output directory. Defaults to benchmarks/.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Path to the saved JSON file.
|
|
124
|
+
"""
|
|
125
|
+
out = output_dir or _DEFAULT_RESULTS_DIR
|
|
126
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
127
|
+
|
|
128
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
129
|
+
safe_name = result.benchmark.model_name.replace("/", "_").replace(" ", "_")
|
|
130
|
+
filename = f"energy_{result.power_source}_{safe_name}_{timestamp}.json"
|
|
131
|
+
|
|
132
|
+
data = {
|
|
133
|
+
"benchmark": asdict(result.benchmark),
|
|
134
|
+
"energy": asdict(result.energy),
|
|
135
|
+
"power_source": result.power_source,
|
|
136
|
+
"timestamp": timestamp,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
path = out / filename
|
|
140
|
+
with open(path, "w") as f:
|
|
141
|
+
json.dump(data, f, indent=2)
|
|
142
|
+
|
|
143
|
+
return path
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def load_energy_results(
|
|
147
|
+
results_dir: Path | None = None,
|
|
148
|
+
) -> list[EnergyBenchmarkResult]:
|
|
149
|
+
"""Load all energy benchmark results from disk.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
results_dir: Directory to load from. Defaults to benchmarks/.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
List of EnergyBenchmarkResult objects.
|
|
156
|
+
"""
|
|
157
|
+
directory = results_dir or _DEFAULT_RESULTS_DIR
|
|
158
|
+
if not directory.exists():
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
results: list[EnergyBenchmarkResult] = []
|
|
162
|
+
for path in sorted(directory.glob("energy_*.json"), reverse=True):
|
|
163
|
+
try:
|
|
164
|
+
with open(path) as f:
|
|
165
|
+
data = json.load(f)
|
|
166
|
+
except (json.JSONDecodeError, OSError):
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
data.pop("timestamp", None)
|
|
171
|
+
benchmark = BenchmarkResult(**data["benchmark"])
|
|
172
|
+
energy = EnergyMeasurement(**data["energy"])
|
|
173
|
+
result = EnergyBenchmarkResult(
|
|
174
|
+
benchmark=benchmark,
|
|
175
|
+
energy=energy,
|
|
176
|
+
power_source=data["power_source"],
|
|
177
|
+
)
|
|
178
|
+
results.append(result)
|
|
179
|
+
except (TypeError, KeyError):
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
return results
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""Generate benchmark reports in JSON and markdown formats."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import asdict
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from macsmart.benchmark.runner import BenchmarkResult
|
|
11
|
+
|
|
12
|
+
# Default output directory for benchmark results.
|
|
13
|
+
_DEFAULT_RESULTS_DIR = Path(__file__).resolve().parent.parent.parent / "benchmarks"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def save_result(result: BenchmarkResult, output_dir: Path | None = None) -> Path:
|
|
17
|
+
"""Save a benchmark result to the benchmarks/ directory as JSON.
|
|
18
|
+
|
|
19
|
+
Files are named with model name and timestamp for uniqueness.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
result: The benchmark result to save.
|
|
23
|
+
output_dir: Optional output directory. Defaults to benchmarks/.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Path to the saved JSON file.
|
|
27
|
+
"""
|
|
28
|
+
out = output_dir or _DEFAULT_RESULTS_DIR
|
|
29
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
32
|
+
safe_name = result.model_name.replace("/", "_").replace(" ", "_")
|
|
33
|
+
filename = f"{safe_name}_{result.quantization}_{timestamp}.json"
|
|
34
|
+
|
|
35
|
+
data = asdict(result)
|
|
36
|
+
data["timestamp"] = timestamp
|
|
37
|
+
|
|
38
|
+
path = out / filename
|
|
39
|
+
with open(path, "w") as f:
|
|
40
|
+
json.dump(data, f, indent=2)
|
|
41
|
+
|
|
42
|
+
return path
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def generate_markdown_report(results: list[BenchmarkResult]) -> str:
|
|
46
|
+
"""Generate a markdown comparison table from benchmark results.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
results: List of benchmark results to compare.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Markdown-formatted report string.
|
|
53
|
+
"""
|
|
54
|
+
if not results:
|
|
55
|
+
return "No benchmark results to report."
|
|
56
|
+
|
|
57
|
+
lines: list[str] = []
|
|
58
|
+
lines.append("# Benchmark Report")
|
|
59
|
+
lines.append("")
|
|
60
|
+
lines.append(f"Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}")
|
|
61
|
+
lines.append("")
|
|
62
|
+
lines.append(
|
|
63
|
+
"| Model | Quant | TTFT (ms) | Tokens/s | Peak Mem (GB) | "
|
|
64
|
+
"Swap (GB) | Gen Tokens | Duration (s) |"
|
|
65
|
+
)
|
|
66
|
+
lines.append(
|
|
67
|
+
"|-------|-------|----------:|----------:|--------------:|"
|
|
68
|
+
"----------:|-----------:|-------------:|"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
for r in results:
|
|
72
|
+
lines.append(
|
|
73
|
+
f"| {r.model_name} | {r.quantization} | "
|
|
74
|
+
f"{r.ttft_ms:.1f} | {r.tokens_per_sec:.1f} | "
|
|
75
|
+
f"{r.peak_memory_gb:.2f} | {r.swap_used_gb:.2f} | "
|
|
76
|
+
f"{r.generation_tokens} | {r.duration_sec:.1f} |"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
lines.append("")
|
|
80
|
+
return "\n".join(lines)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def generate_energy_comparison_report(comparison: object) -> str:
|
|
84
|
+
"""Generate a markdown report comparing battery vs AC energy usage.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
comparison: EnergyComparison object.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Markdown-formatted report string.
|
|
91
|
+
"""
|
|
92
|
+
lines: list[str] = []
|
|
93
|
+
lines.append("# Energy Comparison Report")
|
|
94
|
+
lines.append("")
|
|
95
|
+
lines.append(f"Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}")
|
|
96
|
+
lines.append("")
|
|
97
|
+
lines.append(f"**Model:** {comparison.battery.benchmark.model_name}")
|
|
98
|
+
lines.append("")
|
|
99
|
+
|
|
100
|
+
lines.append("| Metric | Battery | AC | Delta |")
|
|
101
|
+
lines.append("|--------|--------:|---:|------:|")
|
|
102
|
+
|
|
103
|
+
b = comparison.battery.benchmark
|
|
104
|
+
a = comparison.ac.benchmark
|
|
105
|
+
|
|
106
|
+
lines.append(
|
|
107
|
+
f"| Tokens/s | {b.tokens_per_sec:.1f} | {a.tokens_per_sec:.1f} | "
|
|
108
|
+
f"{a.tokens_per_sec - b.tokens_per_sec:+.1f} |"
|
|
109
|
+
)
|
|
110
|
+
lines.append(
|
|
111
|
+
f"| TTFT (ms) | {b.ttft_ms:.1f} | {a.ttft_ms:.1f} | "
|
|
112
|
+
f"{a.ttft_ms - b.ttft_ms:+.1f} |"
|
|
113
|
+
)
|
|
114
|
+
lines.append(
|
|
115
|
+
f"| Peak Memory (GB) | {b.peak_memory_gb:.2f} | {a.peak_memory_gb:.2f} | "
|
|
116
|
+
f"{a.peak_memory_gb - b.peak_memory_gb:+.2f} |"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
be = comparison.battery.energy
|
|
120
|
+
ae = comparison.ac.energy
|
|
121
|
+
bw = be.total_power_watts if be.total_power_watts is not None else 0.0
|
|
122
|
+
aw = ae.total_power_watts if ae.total_power_watts is not None else 0.0
|
|
123
|
+
lines.append(
|
|
124
|
+
f"| Power (W) | {bw:.1f} | {aw:.1f} | {aw - bw:+.1f} |"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
bj = be.total_energy_joules if be.total_energy_joules is not None else 0.0
|
|
128
|
+
aj = ae.total_energy_joules if ae.total_energy_joules is not None else 0.0
|
|
129
|
+
lines.append(
|
|
130
|
+
f"| Energy (J) | {bj:.1f} | {aj:.1f} | {aj - bj:+.1f} |"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
lines.append("")
|
|
134
|
+
|
|
135
|
+
if comparison.efficiency_battery is not None:
|
|
136
|
+
lines.append(f"**Battery efficiency:** {comparison.efficiency_battery:.3f} tokens/J")
|
|
137
|
+
if comparison.efficiency_ac is not None:
|
|
138
|
+
lines.append(f"**AC efficiency:** {comparison.efficiency_ac:.3f} tokens/J")
|
|
139
|
+
if comparison.speed_ratio is not None:
|
|
140
|
+
lines.append(f"**Speed ratio (AC/Battery):** {comparison.speed_ratio:.3f}x")
|
|
141
|
+
|
|
142
|
+
lines.append("")
|
|
143
|
+
return "\n".join(lines)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def load_results(results_dir: Path | None = None) -> list[BenchmarkResult]:
|
|
147
|
+
"""Load all stored benchmark results from the benchmarks/ directory.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
results_dir: Optional directory to load from. Defaults to benchmarks/.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of previously saved BenchmarkResult objects, sorted by
|
|
154
|
+
timestamp (newest first).
|
|
155
|
+
"""
|
|
156
|
+
directory = results_dir or _DEFAULT_RESULTS_DIR
|
|
157
|
+
if not directory.exists():
|
|
158
|
+
return []
|
|
159
|
+
|
|
160
|
+
results: list[tuple[str, BenchmarkResult]] = []
|
|
161
|
+
for path in directory.glob("*.json"):
|
|
162
|
+
try:
|
|
163
|
+
with open(path) as f:
|
|
164
|
+
data = json.load(f)
|
|
165
|
+
except (json.JSONDecodeError, OSError):
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
timestamp = data.pop("timestamp", "")
|
|
169
|
+
try:
|
|
170
|
+
result = BenchmarkResult(**data)
|
|
171
|
+
results.append((timestamp, result))
|
|
172
|
+
except TypeError:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
# Sort newest first
|
|
176
|
+
results.sort(key=lambda x: x[0], reverse=True)
|
|
177
|
+
return [r for _, r in results]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def load_result_from_file(path: Path) -> BenchmarkResult:
|
|
181
|
+
"""Load a single benchmark result from a JSON file.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
path: Path to the JSON result file.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
BenchmarkResult loaded from the file.
|
|
188
|
+
|
|
189
|
+
Raises:
|
|
190
|
+
FileNotFoundError: If the file does not exist.
|
|
191
|
+
ValueError: If the file is not valid benchmark JSON.
|
|
192
|
+
"""
|
|
193
|
+
if not path.exists():
|
|
194
|
+
raise FileNotFoundError(f"Result file not found: {path}")
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
with open(path) as f:
|
|
198
|
+
data = json.load(f)
|
|
199
|
+
except json.JSONDecodeError as e:
|
|
200
|
+
raise ValueError(f"Invalid JSON in {path}: {e}") from e
|
|
201
|
+
|
|
202
|
+
data.pop("timestamp", None)
|
|
203
|
+
try:
|
|
204
|
+
return BenchmarkResult(**data)
|
|
205
|
+
except TypeError as e:
|
|
206
|
+
raise ValueError(f"Invalid benchmark result in {path}: {e}") from e
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_latest_result_paths(
|
|
210
|
+
n: int = 2,
|
|
211
|
+
results_dir: Path | None = None,
|
|
212
|
+
) -> list[Path]:
|
|
213
|
+
"""Return the N most recent non-energy benchmark result file paths.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
n: Number of result paths to return.
|
|
217
|
+
results_dir: Directory to search. Defaults to benchmarks/.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List of Path objects sorted newest-first.
|
|
221
|
+
|
|
222
|
+
Raises:
|
|
223
|
+
ValueError: If fewer than N result files exist.
|
|
224
|
+
"""
|
|
225
|
+
directory = results_dir or _DEFAULT_RESULTS_DIR
|
|
226
|
+
if not directory.exists():
|
|
227
|
+
raise ValueError(f"Results directory does not exist: {directory}")
|
|
228
|
+
|
|
229
|
+
paths = sorted(
|
|
230
|
+
[p for p in directory.glob("*.json") if "energy" not in p.name.lower()],
|
|
231
|
+
key=lambda p: p.stat().st_mtime,
|
|
232
|
+
reverse=True,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if len(paths) < n:
|
|
236
|
+
raise ValueError(
|
|
237
|
+
f"Need at least {n} result files, found {len(paths)} in {directory}"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return paths[:n]
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def results_to_json_api(results_dir: Path | None = None) -> list[dict]:
|
|
244
|
+
"""Serialize benchmark results for the dashboard JSON API.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
results_dir: Directory to load results from.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
List of result dicts ready for JSON serialization.
|
|
251
|
+
"""
|
|
252
|
+
results = load_results(results_dir=results_dir)
|
|
253
|
+
return [asdict(r) for r in results]
|