macsmart 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
macsmart/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """MacSmart LLM — Memory-intelligent local LLM orchestration for 16GB Apple Silicon Macs."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """Benchmark runner for measuring LLM inference performance."""
@@ -0,0 +1,140 @@
1
+ """Energy measurement using powermetrics wrapper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import platform
6
+ import re
7
+ import subprocess
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass
12
+ class EnergyMeasurement:
13
+ """Energy consumption during a benchmark run."""
14
+
15
+ cpu_power_watts: float | None
16
+ gpu_power_watts: float | None
17
+ ane_power_watts: float | None # Apple Neural Engine
18
+ total_power_watts: float | None
19
+ duration_sec: float
20
+ total_energy_joules: float | None
21
+
22
+
23
+ def _parse_powermetrics_output(output: str) -> dict[str, float]:
24
+ """Parse power values from powermetrics output.
25
+
26
+ Looks for lines like:
27
+ CPU Power: 1234 mW
28
+ GPU Power: 567 mW
29
+ ANE Power: 89 mW
30
+ Combined Power (CPU + GPU + ANE): 1890 mW
31
+
32
+ Args:
33
+ output: Raw stdout from powermetrics.
34
+
35
+ Returns:
36
+ Dict mapping component names to power in watts.
37
+ """
38
+ powers: dict[str, float] = {}
39
+ for line in output.splitlines():
40
+ # Match lines like "CPU Power: 1234 mW"
41
+ match = re.match(r"\s*([\w\s\+\(\)]+?)\s*Power[^:]*:\s*([\d.]+)\s*mW", line, re.IGNORECASE)
42
+ if match:
43
+ name = match.group(1).strip().lower()
44
+ mw = float(match.group(2))
45
+ powers[name] = mw / 1000.0 # Convert mW to W
46
+ return powers
47
+
48
+
49
+ def measure_energy(duration_sec: float = 10.0) -> EnergyMeasurement:
50
+ """Measure energy consumption using powermetrics.
51
+
52
+ Requires sudo access for powermetrics. Samples for the given
53
+ duration and returns averaged power readings.
54
+
55
+ Args:
56
+ duration_sec: How long to measure in seconds.
57
+
58
+ Returns:
59
+ EnergyMeasurement with power data. Fields are None if
60
+ powermetrics is unavailable or the data cannot be parsed.
61
+ """
62
+ if not is_powermetrics_available():
63
+ return EnergyMeasurement(
64
+ cpu_power_watts=None,
65
+ gpu_power_watts=None,
66
+ ane_power_watts=None,
67
+ total_power_watts=None,
68
+ duration_sec=duration_sec,
69
+ total_energy_joules=None,
70
+ )
71
+
72
+ interval_ms = int(duration_sec * 1000)
73
+ try:
74
+ result = subprocess.run(
75
+ [
76
+ "sudo", "-n", "powermetrics",
77
+ "--samplers", "cpu_power",
78
+ "-i", str(interval_ms),
79
+ "-n", "1",
80
+ ],
81
+ capture_output=True,
82
+ text=True,
83
+ timeout=duration_sec + 10,
84
+ )
85
+ except (subprocess.TimeoutExpired, FileNotFoundError):
86
+ return EnergyMeasurement(
87
+ cpu_power_watts=None,
88
+ gpu_power_watts=None,
89
+ ane_power_watts=None,
90
+ total_power_watts=None,
91
+ duration_sec=duration_sec,
92
+ total_energy_joules=None,
93
+ )
94
+
95
+ powers = _parse_powermetrics_output(result.stdout)
96
+
97
+ cpu_w = powers.get("cpu")
98
+ gpu_w = powers.get("gpu")
99
+ ane_w = powers.get("ane")
100
+
101
+ # Look for combined/total power
102
+ total_w: float | None = None
103
+ for key, val in powers.items():
104
+ if "combined" in key or "total" in key:
105
+ total_w = val
106
+ break
107
+
108
+ if total_w is None and any(v is not None for v in [cpu_w, gpu_w, ane_w]):
109
+ total_w = sum(v for v in [cpu_w, gpu_w, ane_w] if v is not None)
110
+
111
+ energy_j = total_w * duration_sec if total_w is not None else None
112
+
113
+ return EnergyMeasurement(
114
+ cpu_power_watts=cpu_w,
115
+ gpu_power_watts=gpu_w,
116
+ ane_power_watts=ane_w,
117
+ total_power_watts=total_w,
118
+ duration_sec=duration_sec,
119
+ total_energy_joules=round(energy_j, 2) if energy_j is not None else None,
120
+ )
121
+
122
+
123
+ def is_powermetrics_available() -> bool:
124
+ """Check if powermetrics is available and we have passwordless sudo.
125
+
126
+ Returns:
127
+ True if energy measurement is possible.
128
+ """
129
+ if platform.system() != "Darwin":
130
+ return False
131
+
132
+ try:
133
+ result = subprocess.run(
134
+ ["sudo", "-n", "powermetrics", "--samplers", "cpu_power", "-i", "1", "-n", "1"],
135
+ capture_output=True,
136
+ timeout=5,
137
+ )
138
+ return result.returncode == 0
139
+ except (subprocess.TimeoutExpired, FileNotFoundError):
140
+ return False
@@ -0,0 +1,182 @@
1
+ """Energy benchmark comparison between battery and AC power."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import asdict, dataclass
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+ from macsmart.benchmark.energy import EnergyMeasurement, measure_energy
11
+ from macsmart.benchmark.runner import BenchmarkResult, run_benchmark
12
+
13
+ _DEFAULT_RESULTS_DIR = Path(__file__).resolve().parent.parent.parent / "benchmarks"
14
+
15
+
16
+ @dataclass
17
+ class EnergyBenchmarkResult:
18
+ """Benchmark result combined with energy measurement and power source."""
19
+
20
+ benchmark: BenchmarkResult
21
+ energy: EnergyMeasurement
22
+ power_source: str # "battery" or "ac"
23
+
24
+
25
+ @dataclass
26
+ class EnergyComparison:
27
+ """Side-by-side comparison of battery vs AC energy benchmarks."""
28
+
29
+ battery: EnergyBenchmarkResult
30
+ ac: EnergyBenchmarkResult
31
+ speed_ratio: float | None # ac.tokens_per_sec / battery.tokens_per_sec
32
+ ttft_delta_ms: float # ac.ttft - battery.ttft
33
+ energy_ratio: float | None # ac.total_energy / battery.total_energy
34
+ efficiency_battery: float | None # tokens / joule on battery
35
+ efficiency_ac: float | None # tokens / joule on AC
36
+
37
+
38
+ def run_energy_benchmark(
39
+ model_repo: str,
40
+ power_source: str,
41
+ prompt: str | None = None,
42
+ max_tokens: int = 256,
43
+ energy_duration_sec: float = 10.0,
44
+ ) -> EnergyBenchmarkResult:
45
+ """Run a benchmark with energy measurement.
46
+
47
+ Args:
48
+ model_repo: HuggingFace repo ID.
49
+ power_source: "battery" or "ac".
50
+ prompt: Custom prompt.
51
+ max_tokens: Maximum tokens to generate.
52
+ energy_duration_sec: Duration for energy sampling.
53
+
54
+ Returns:
55
+ Combined benchmark + energy result.
56
+ """
57
+ benchmark = run_benchmark(model_repo, prompt=prompt, max_tokens=max_tokens)
58
+ energy = measure_energy(duration_sec=energy_duration_sec)
59
+
60
+ return EnergyBenchmarkResult(
61
+ benchmark=benchmark,
62
+ energy=energy,
63
+ power_source=power_source,
64
+ )
65
+
66
+
67
+ def compare_energy(
68
+ battery: EnergyBenchmarkResult,
69
+ ac: EnergyBenchmarkResult,
70
+ ) -> EnergyComparison:
71
+ """Compare battery vs AC energy benchmark results.
72
+
73
+ Args:
74
+ battery: Benchmark result on battery power.
75
+ ac: Benchmark result on AC power.
76
+
77
+ Returns:
78
+ EnergyComparison with computed ratios and deltas.
79
+ """
80
+ batt_tps = battery.benchmark.tokens_per_sec
81
+ ac_tps = ac.benchmark.tokens_per_sec
82
+
83
+ speed_ratio = round(ac_tps / batt_tps, 3) if batt_tps > 0 else None
84
+ ttft_delta = round(ac.benchmark.ttft_ms - battery.benchmark.ttft_ms, 2)
85
+
86
+ batt_energy = battery.energy.total_energy_joules
87
+ ac_energy = ac.energy.total_energy_joules
88
+
89
+ energy_ratio: float | None = None
90
+ if batt_energy and batt_energy > 0 and ac_energy is not None:
91
+ energy_ratio = round(ac_energy / batt_energy, 3)
92
+
93
+ eff_battery: float | None = None
94
+ if batt_energy and batt_energy > 0:
95
+ eff_battery = round(battery.benchmark.generation_tokens / batt_energy, 3)
96
+
97
+ eff_ac: float | None = None
98
+ if ac_energy and ac_energy > 0:
99
+ eff_ac = round(ac.benchmark.generation_tokens / ac_energy, 3)
100
+
101
+ return EnergyComparison(
102
+ battery=battery,
103
+ ac=ac,
104
+ speed_ratio=speed_ratio,
105
+ ttft_delta_ms=ttft_delta,
106
+ energy_ratio=energy_ratio,
107
+ efficiency_battery=eff_battery,
108
+ efficiency_ac=eff_ac,
109
+ )
110
+
111
+
112
+ def save_energy_result(
113
+ result: EnergyBenchmarkResult,
114
+ output_dir: Path | None = None,
115
+ ) -> Path:
116
+ """Save an energy benchmark result to JSON.
117
+
118
+ Args:
119
+ result: The energy benchmark result.
120
+ output_dir: Output directory. Defaults to benchmarks/.
121
+
122
+ Returns:
123
+ Path to the saved JSON file.
124
+ """
125
+ out = output_dir or _DEFAULT_RESULTS_DIR
126
+ out.mkdir(parents=True, exist_ok=True)
127
+
128
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
129
+ safe_name = result.benchmark.model_name.replace("/", "_").replace(" ", "_")
130
+ filename = f"energy_{result.power_source}_{safe_name}_{timestamp}.json"
131
+
132
+ data = {
133
+ "benchmark": asdict(result.benchmark),
134
+ "energy": asdict(result.energy),
135
+ "power_source": result.power_source,
136
+ "timestamp": timestamp,
137
+ }
138
+
139
+ path = out / filename
140
+ with open(path, "w") as f:
141
+ json.dump(data, f, indent=2)
142
+
143
+ return path
144
+
145
+
146
+ def load_energy_results(
147
+ results_dir: Path | None = None,
148
+ ) -> list[EnergyBenchmarkResult]:
149
+ """Load all energy benchmark results from disk.
150
+
151
+ Args:
152
+ results_dir: Directory to load from. Defaults to benchmarks/.
153
+
154
+ Returns:
155
+ List of EnergyBenchmarkResult objects.
156
+ """
157
+ directory = results_dir or _DEFAULT_RESULTS_DIR
158
+ if not directory.exists():
159
+ return []
160
+
161
+ results: list[EnergyBenchmarkResult] = []
162
+ for path in sorted(directory.glob("energy_*.json"), reverse=True):
163
+ try:
164
+ with open(path) as f:
165
+ data = json.load(f)
166
+ except (json.JSONDecodeError, OSError):
167
+ continue
168
+
169
+ try:
170
+ data.pop("timestamp", None)
171
+ benchmark = BenchmarkResult(**data["benchmark"])
172
+ energy = EnergyMeasurement(**data["energy"])
173
+ result = EnergyBenchmarkResult(
174
+ benchmark=benchmark,
175
+ energy=energy,
176
+ power_source=data["power_source"],
177
+ )
178
+ results.append(result)
179
+ except (TypeError, KeyError):
180
+ continue
181
+
182
+ return results
@@ -0,0 +1,253 @@
1
+ """Generate benchmark reports in JSON and markdown formats."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import asdict
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+ from macsmart.benchmark.runner import BenchmarkResult
11
+
12
+ # Default output directory for benchmark results.
13
+ _DEFAULT_RESULTS_DIR = Path(__file__).resolve().parent.parent.parent / "benchmarks"
14
+
15
+
16
+ def save_result(result: BenchmarkResult, output_dir: Path | None = None) -> Path:
17
+ """Save a benchmark result to the benchmarks/ directory as JSON.
18
+
19
+ Files are named with model name and timestamp for uniqueness.
20
+
21
+ Args:
22
+ result: The benchmark result to save.
23
+ output_dir: Optional output directory. Defaults to benchmarks/.
24
+
25
+ Returns:
26
+ Path to the saved JSON file.
27
+ """
28
+ out = output_dir or _DEFAULT_RESULTS_DIR
29
+ out.mkdir(parents=True, exist_ok=True)
30
+
31
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
32
+ safe_name = result.model_name.replace("/", "_").replace(" ", "_")
33
+ filename = f"{safe_name}_{result.quantization}_{timestamp}.json"
34
+
35
+ data = asdict(result)
36
+ data["timestamp"] = timestamp
37
+
38
+ path = out / filename
39
+ with open(path, "w") as f:
40
+ json.dump(data, f, indent=2)
41
+
42
+ return path
43
+
44
+
45
+ def generate_markdown_report(results: list[BenchmarkResult]) -> str:
46
+ """Generate a markdown comparison table from benchmark results.
47
+
48
+ Args:
49
+ results: List of benchmark results to compare.
50
+
51
+ Returns:
52
+ Markdown-formatted report string.
53
+ """
54
+ if not results:
55
+ return "No benchmark results to report."
56
+
57
+ lines: list[str] = []
58
+ lines.append("# Benchmark Report")
59
+ lines.append("")
60
+ lines.append(f"Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}")
61
+ lines.append("")
62
+ lines.append(
63
+ "| Model | Quant | TTFT (ms) | Tokens/s | Peak Mem (GB) | "
64
+ "Swap (GB) | Gen Tokens | Duration (s) |"
65
+ )
66
+ lines.append(
67
+ "|-------|-------|----------:|----------:|--------------:|"
68
+ "----------:|-----------:|-------------:|"
69
+ )
70
+
71
+ for r in results:
72
+ lines.append(
73
+ f"| {r.model_name} | {r.quantization} | "
74
+ f"{r.ttft_ms:.1f} | {r.tokens_per_sec:.1f} | "
75
+ f"{r.peak_memory_gb:.2f} | {r.swap_used_gb:.2f} | "
76
+ f"{r.generation_tokens} | {r.duration_sec:.1f} |"
77
+ )
78
+
79
+ lines.append("")
80
+ return "\n".join(lines)
81
+
82
+
83
+ def generate_energy_comparison_report(comparison: object) -> str:
84
+ """Generate a markdown report comparing battery vs AC energy usage.
85
+
86
+ Args:
87
+ comparison: EnergyComparison object.
88
+
89
+ Returns:
90
+ Markdown-formatted report string.
91
+ """
92
+ lines: list[str] = []
93
+ lines.append("# Energy Comparison Report")
94
+ lines.append("")
95
+ lines.append(f"Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}")
96
+ lines.append("")
97
+ lines.append(f"**Model:** {comparison.battery.benchmark.model_name}")
98
+ lines.append("")
99
+
100
+ lines.append("| Metric | Battery | AC | Delta |")
101
+ lines.append("|--------|--------:|---:|------:|")
102
+
103
+ b = comparison.battery.benchmark
104
+ a = comparison.ac.benchmark
105
+
106
+ lines.append(
107
+ f"| Tokens/s | {b.tokens_per_sec:.1f} | {a.tokens_per_sec:.1f} | "
108
+ f"{a.tokens_per_sec - b.tokens_per_sec:+.1f} |"
109
+ )
110
+ lines.append(
111
+ f"| TTFT (ms) | {b.ttft_ms:.1f} | {a.ttft_ms:.1f} | "
112
+ f"{a.ttft_ms - b.ttft_ms:+.1f} |"
113
+ )
114
+ lines.append(
115
+ f"| Peak Memory (GB) | {b.peak_memory_gb:.2f} | {a.peak_memory_gb:.2f} | "
116
+ f"{a.peak_memory_gb - b.peak_memory_gb:+.2f} |"
117
+ )
118
+
119
+ be = comparison.battery.energy
120
+ ae = comparison.ac.energy
121
+ bw = be.total_power_watts if be.total_power_watts is not None else 0.0
122
+ aw = ae.total_power_watts if ae.total_power_watts is not None else 0.0
123
+ lines.append(
124
+ f"| Power (W) | {bw:.1f} | {aw:.1f} | {aw - bw:+.1f} |"
125
+ )
126
+
127
+ bj = be.total_energy_joules if be.total_energy_joules is not None else 0.0
128
+ aj = ae.total_energy_joules if ae.total_energy_joules is not None else 0.0
129
+ lines.append(
130
+ f"| Energy (J) | {bj:.1f} | {aj:.1f} | {aj - bj:+.1f} |"
131
+ )
132
+
133
+ lines.append("")
134
+
135
+ if comparison.efficiency_battery is not None:
136
+ lines.append(f"**Battery efficiency:** {comparison.efficiency_battery:.3f} tokens/J")
137
+ if comparison.efficiency_ac is not None:
138
+ lines.append(f"**AC efficiency:** {comparison.efficiency_ac:.3f} tokens/J")
139
+ if comparison.speed_ratio is not None:
140
+ lines.append(f"**Speed ratio (AC/Battery):** {comparison.speed_ratio:.3f}x")
141
+
142
+ lines.append("")
143
+ return "\n".join(lines)
144
+
145
+
146
+ def load_results(results_dir: Path | None = None) -> list[BenchmarkResult]:
147
+ """Load all stored benchmark results from the benchmarks/ directory.
148
+
149
+ Args:
150
+ results_dir: Optional directory to load from. Defaults to benchmarks/.
151
+
152
+ Returns:
153
+ List of previously saved BenchmarkResult objects, sorted by
154
+ timestamp (newest first).
155
+ """
156
+ directory = results_dir or _DEFAULT_RESULTS_DIR
157
+ if not directory.exists():
158
+ return []
159
+
160
+ results: list[tuple[str, BenchmarkResult]] = []
161
+ for path in directory.glob("*.json"):
162
+ try:
163
+ with open(path) as f:
164
+ data = json.load(f)
165
+ except (json.JSONDecodeError, OSError):
166
+ continue
167
+
168
+ timestamp = data.pop("timestamp", "")
169
+ try:
170
+ result = BenchmarkResult(**data)
171
+ results.append((timestamp, result))
172
+ except TypeError:
173
+ continue
174
+
175
+ # Sort newest first
176
+ results.sort(key=lambda x: x[0], reverse=True)
177
+ return [r for _, r in results]
178
+
179
+
180
+ def load_result_from_file(path: Path) -> BenchmarkResult:
181
+ """Load a single benchmark result from a JSON file.
182
+
183
+ Args:
184
+ path: Path to the JSON result file.
185
+
186
+ Returns:
187
+ BenchmarkResult loaded from the file.
188
+
189
+ Raises:
190
+ FileNotFoundError: If the file does not exist.
191
+ ValueError: If the file is not valid benchmark JSON.
192
+ """
193
+ if not path.exists():
194
+ raise FileNotFoundError(f"Result file not found: {path}")
195
+
196
+ try:
197
+ with open(path) as f:
198
+ data = json.load(f)
199
+ except json.JSONDecodeError as e:
200
+ raise ValueError(f"Invalid JSON in {path}: {e}") from e
201
+
202
+ data.pop("timestamp", None)
203
+ try:
204
+ return BenchmarkResult(**data)
205
+ except TypeError as e:
206
+ raise ValueError(f"Invalid benchmark result in {path}: {e}") from e
207
+
208
+
209
+ def get_latest_result_paths(
210
+ n: int = 2,
211
+ results_dir: Path | None = None,
212
+ ) -> list[Path]:
213
+ """Return the N most recent non-energy benchmark result file paths.
214
+
215
+ Args:
216
+ n: Number of result paths to return.
217
+ results_dir: Directory to search. Defaults to benchmarks/.
218
+
219
+ Returns:
220
+ List of Path objects sorted newest-first.
221
+
222
+ Raises:
223
+ ValueError: If fewer than N result files exist.
224
+ """
225
+ directory = results_dir or _DEFAULT_RESULTS_DIR
226
+ if not directory.exists():
227
+ raise ValueError(f"Results directory does not exist: {directory}")
228
+
229
+ paths = sorted(
230
+ [p for p in directory.glob("*.json") if "energy" not in p.name.lower()],
231
+ key=lambda p: p.stat().st_mtime,
232
+ reverse=True,
233
+ )
234
+
235
+ if len(paths) < n:
236
+ raise ValueError(
237
+ f"Need at least {n} result files, found {len(paths)} in {directory}"
238
+ )
239
+
240
+ return paths[:n]
241
+
242
+
243
+ def results_to_json_api(results_dir: Path | None = None) -> list[dict]:
244
+ """Serialize benchmark results for the dashboard JSON API.
245
+
246
+ Args:
247
+ results_dir: Directory to load results from.
248
+
249
+ Returns:
250
+ List of result dicts ready for JSON serialization.
251
+ """
252
+ results = load_results(results_dir=results_dir)
253
+ return [asdict(r) for r in results]