darwin-perf 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- darwin_perf-0.2.0/LICENSE +21 -0
- darwin_perf-0.2.0/PKG-INFO +271 -0
- darwin_perf-0.2.0/README.md +236 -0
- darwin_perf-0.2.0/pyproject.toml +44 -0
- darwin_perf-0.2.0/setup.cfg +4 -0
- darwin_perf-0.2.0/setup.py +14 -0
- darwin_perf-0.2.0/src/darwin_perf/__init__.py +381 -0
- darwin_perf-0.2.0/src/darwin_perf/__main__.py +4 -0
- darwin_perf-0.2.0/src/darwin_perf/_native.c +1394 -0
- darwin_perf-0.2.0/src/darwin_perf/cli.py +174 -0
- darwin_perf-0.2.0/src/darwin_perf/gui.py +331 -0
- darwin_perf-0.2.0/src/darwin_perf/py.typed +0 -0
- darwin_perf-0.2.0/src/darwin_perf/tui.py +332 -0
- darwin_perf-0.2.0/src/darwin_perf.egg-info/PKG-INFO +271 -0
- darwin_perf-0.2.0/src/darwin_perf.egg-info/SOURCES.txt +18 -0
- darwin_perf-0.2.0/src/darwin_perf.egg-info/dependency_links.txt +1 -0
- darwin_perf-0.2.0/src/darwin_perf.egg-info/entry_points.txt +2 -0
- darwin_perf-0.2.0/src/darwin_perf.egg-info/requires.txt +10 -0
- darwin_perf-0.2.0/src/darwin_perf.egg-info/top_level.txt +1 -0
- darwin_perf-0.2.0/tests/test_basic.py +79 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Adam Mikulis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: darwin-perf
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: System performance monitoring for macOS Apple Silicon — GPU, CPU, memory, energy, disk I/O via Mach APIs. No sudo needed.
|
|
5
|
+
Author: Adam Mikulis
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/adammikulis/darwin-perf
|
|
8
|
+
Project-URL: Issues, https://github.com/adammikulis/darwin-perf/issues
|
|
9
|
+
Keywords: macos,darwin,gpu,cpu,monitoring,performance,apple-silicon,metal,mlx,pytorch,mps,m1,m2,m3,m4,m5
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: MacOS
|
|
15
|
+
Classifier: Programming Language :: C
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: System :: Monitoring
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Requires-Python: >=3.9
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Provides-Extra: tui
|
|
28
|
+
Requires-Dist: textual>=0.40; extra == "tui"
|
|
29
|
+
Provides-Extra: gui
|
|
30
|
+
Requires-Dist: pywebview>=5.0; extra == "gui"
|
|
31
|
+
Provides-Extra: all
|
|
32
|
+
Requires-Dist: textual>=0.40; extra == "all"
|
|
33
|
+
Requires-Dist: pywebview>=5.0; extra == "all"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# macos-gpu-proc
|
|
37
|
+
|
|
38
|
+
Per-process GPU utilization, CPU, memory, and energy monitoring for macOS Apple Silicon. **No sudo needed.**
|
|
39
|
+
|
|
40
|
+
Reads GPU client data directly from the IORegistry — the same data source Activity Monitor uses. Auto-discovers every process using the GPU.
|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install macos-gpu-proc
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from macos_gpu_proc import snapshot
|
|
52
|
+
|
|
53
|
+
# One call — auto-discovers all GPU processes, returns utilization %
|
|
54
|
+
for proc in snapshot():
|
|
55
|
+
print(f"{proc['name']:20s} GPU {proc['gpu_percent']:5.1f}% "
|
|
56
|
+
f"CPU {proc['cpu_percent']:5.1f}% {proc['memory_mb']:.0f}MB "
|
|
57
|
+
f"{proc['energy_w']:.1f}W")
|
|
58
|
+
|
|
59
|
+
# Example output:
|
|
60
|
+
# python3.12 GPU 85.7% CPU 102.3% 2048MB 12.3W
|
|
61
|
+
# WindowServer GPU 3.2% CPU 0.1% 45MB 0.4W
|
|
62
|
+
# Code Helper (GPU GPU 1.9% CPU 0.1% 670MB 0.1W
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Each dict in the list contains: `pid`, `name`, `gpu_percent`, `cpu_percent`, `memory_mb`, `energy_w`, `threads`, and `gpu_ns`.
|
|
66
|
+
|
|
67
|
+
### GPU Power & Frequency
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from macos_gpu_proc import gpu_power
|
|
71
|
+
|
|
72
|
+
power = gpu_power(interval=1.0) # samples over 1 second
|
|
73
|
+
print(f"GPU Power: {power['gpu_power_w']:.2f}W")
|
|
74
|
+
print(f"GPU Freq: {power['gpu_freq_mhz']} MHz (weighted avg)")
|
|
75
|
+
print(f"Throttled: {power['throttled']}")
|
|
76
|
+
for state in power['frequency_states']:
|
|
77
|
+
print(f" {state['state']}: {state['freq_mhz']}MHz ({state['residency_pct']:.1f}%)")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Uses `libIOReport.dylib` (the same data source as `powermetrics`). No sudo needed.
|
|
81
|
+
|
|
82
|
+
### GPU DVFS Frequency Table
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from macos_gpu_proc import gpu_freq_table
|
|
86
|
+
|
|
87
|
+
for i, freq in enumerate(gpu_freq_table()):
|
|
88
|
+
print(f"P{i+1}: {freq} MHz")
|
|
89
|
+
# P1: 338 MHz, P2: 618 MHz, ..., P15: 1578 MHz
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### System-Wide GPU
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from macos_gpu_proc import system_gpu_stats
|
|
96
|
+
|
|
97
|
+
stats = system_gpu_stats()
|
|
98
|
+
print(f"{stats['model']} ({stats['gpu_core_count']} cores)")
|
|
99
|
+
print(f"Device utilization: {stats['device_utilization']}%")
|
|
100
|
+
print(f"GPU VRAM in use: {stats['in_use_system_memory']/1e9:.1f}GB")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### GpuMonitor (continuous monitoring)
|
|
104
|
+
|
|
105
|
+
Monitor your own training process — no PID lookup needed:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from macos_gpu_proc import GpuMonitor
|
|
109
|
+
|
|
110
|
+
mon = GpuMonitor() # monitors the current process
|
|
111
|
+
for batch in dataloader:
|
|
112
|
+
train(batch)
|
|
113
|
+
print(f"GPU: {mon.sample():.1f}%")
|
|
114
|
+
|
|
115
|
+
# Or as a context manager:
|
|
116
|
+
with GpuMonitor() as mon:
|
|
117
|
+
mon.start(interval=2.0) # background sampling
|
|
118
|
+
train()
|
|
119
|
+
print(mon.summary()) # {'gpu_pct_avg': 42.1, 'gpu_pct_max': 87.3, ...}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Low-Level Access
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from macos_gpu_proc import gpu_clients, gpu_time_ns, proc_info
|
|
126
|
+
|
|
127
|
+
# All GPU clients (raw cumulative data)
|
|
128
|
+
for c in gpu_clients():
|
|
129
|
+
print(f"PID {c['pid']} ({c['name']}): {c['gpu_ns']/1e9:.1f}s GPU time")
|
|
130
|
+
|
|
131
|
+
# Per-process stats (CPU, memory, energy, disk I/O, threads)
|
|
132
|
+
info = proc_info(1234)
|
|
133
|
+
print(f"Memory: {info['memory']/1e6:.0f}MB, Energy: {info['energy_nj']/1e9:.1f}J")
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## CLI
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
gpu-proc # live per-process GPU monitor — auto-discovers all GPU processes
|
|
140
|
+
gpu-proc --once # single snapshot
|
|
141
|
+
gpu-proc --tui # rich terminal UI with sparkline graphs (pip install macos-gpu-proc[tui])
|
|
142
|
+
gpu-proc --gui # native floating window monitor (pip install macos-gpu-proc[gui])
|
|
143
|
+
gpu-proc -i 1 # 1-second update interval
|
|
144
|
+
gpu-proc --pid 1234 # monitor specific PID
|
|
145
|
+
python -m macos_gpu_proc # alternative entry point (same as gpu-proc)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## API Reference
|
|
149
|
+
|
|
150
|
+
### Python API
|
|
151
|
+
|
|
152
|
+
| Function | Description |
|
|
153
|
+
|----------|-------------|
|
|
154
|
+
| `snapshot(interval=1.0)` | **One call does it all** — returns `[{'pid', 'name', 'gpu_percent', 'cpu_percent', 'memory_mb', 'energy_w', ...}]` |
|
|
155
|
+
| `snapshot(detailed=True)` | Extended fields: IPC, wakeups, peak memory, neural engine, disk I/O |
|
|
156
|
+
|
|
157
|
+
### C Extension Functions
|
|
158
|
+
|
|
159
|
+
| Function | Description |
|
|
160
|
+
|----------|-------------|
|
|
161
|
+
| `gpu_clients()` | Auto-discover all GPU-active processes: `[{'pid', 'name', 'gpu_ns'}, ...]` |
|
|
162
|
+
| `gpu_time_ns(pid)` | Cumulative GPU nanoseconds for a PID |
|
|
163
|
+
| `gpu_time_ns_multi(pids)` | Batch GPU ns for multiple PIDs (single IORegistry scan) |
|
|
164
|
+
| `cpu_time_ns(pid)` | Cumulative CPU nanoseconds (user + system) |
|
|
165
|
+
| `proc_info(pid)` | Full process stats (CPU, memory, energy, disk, threads) |
|
|
166
|
+
| `system_gpu_stats()` | System GPU: utilization %, VRAM, model, core count |
|
|
167
|
+
| `gpu_power(interval)` | GPU power (watts), frequency (MHz), P-state residency, thermal throttling |
|
|
168
|
+
| `gpu_freq_table()` | GPU DVFS frequency table (MHz per P-state) from pmgr |
|
|
169
|
+
| `ppid(pid)` | Parent process ID for a PID (-1 on error) |
|
|
170
|
+
|
|
171
|
+
### proc_info fields
|
|
172
|
+
|
|
173
|
+
| Field | Description |
|
|
174
|
+
|-------|-------------|
|
|
175
|
+
| **CPU** | |
|
|
176
|
+
| `cpu_ns` | Cumulative CPU time (user + system) in nanoseconds |
|
|
177
|
+
| `cpu_user_ns` | User CPU time |
|
|
178
|
+
| `cpu_system_ns` | System/kernel CPU time |
|
|
179
|
+
| `instructions` | Retired instructions (for IPC calculation) |
|
|
180
|
+
| `cycles` | CPU cycles (for IPC calculation) |
|
|
181
|
+
| `runnable_time` | Time process was runnable but not running (ns) |
|
|
182
|
+
| `billed_system_time` | Billed CPU time (ns) |
|
|
183
|
+
| `serviced_system_time` | Serviced CPU time (ns) |
|
|
184
|
+
| **Memory** | |
|
|
185
|
+
| `memory` | Physical memory footprint (bytes) |
|
|
186
|
+
| `real_memory` | Resident memory (bytes) |
|
|
187
|
+
| `wired_size` | Wired (non-pageable) memory (bytes) |
|
|
188
|
+
| `peak_memory` | Lifetime peak physical footprint (bytes) |
|
|
189
|
+
| `neural_footprint` | Neural Engine memory (bytes) |
|
|
190
|
+
| `pageins` | Page-in count (memory pressure indicator) |
|
|
191
|
+
| **Disk** | |
|
|
192
|
+
| `disk_read_bytes` | Cumulative disk reads |
|
|
193
|
+
| `disk_write_bytes` | Cumulative disk writes |
|
|
194
|
+
| `logical_writes` | Logical writes including CoW (bytes) |
|
|
195
|
+
| **Energy** | |
|
|
196
|
+
| `energy_nj` | Cumulative energy (nanojoules) — delta over time = watts |
|
|
197
|
+
| `idle_wakeups` | Package idle wakeups (energy efficiency metric) |
|
|
198
|
+
| `interrupt_wakeups` | Interrupt wakeups |
|
|
199
|
+
| **Other** | |
|
|
200
|
+
| `threads` | Current thread count |
|
|
201
|
+
|
|
202
|
+
### system_gpu_stats fields
|
|
203
|
+
|
|
204
|
+
| Field | Description |
|
|
205
|
+
|-------|-------------|
|
|
206
|
+
| `model` | GPU model name (e.g., "Apple M4 Max") |
|
|
207
|
+
| `gpu_core_count` | Number of GPU cores |
|
|
208
|
+
| `device_utilization` | Device utilization % (0-100) |
|
|
209
|
+
| `tiler_utilization` | Tiler utilization % |
|
|
210
|
+
| `renderer_utilization` | Renderer utilization % |
|
|
211
|
+
| `alloc_system_memory` | Total GPU-allocated system memory |
|
|
212
|
+
| `in_use_system_memory` | Currently used GPU memory |
|
|
213
|
+
| `in_use_system_memory_driver` | Driver-side in-use memory |
|
|
214
|
+
| `allocated_pb_size` | Parameter buffer allocation (bytes) |
|
|
215
|
+
| `recovery_count` | GPU recovery (crash) count |
|
|
216
|
+
| `last_recovery_time` | Timestamp of last GPU recovery |
|
|
217
|
+
| `split_scene_count` | Tiler split scene events |
|
|
218
|
+
| `tiled_scene_bytes` | Current tiled scene buffer size |
|
|
219
|
+
|
|
220
|
+
## How It Works
|
|
221
|
+
|
|
222
|
+
Apple doesn't provide a public API for per-process GPU metrics on Apple Silicon. The commonly referenced `task_info(TASK_POWER_INFO_V2)` has a `task_gpu_utilisation` field, but Apple never populates it — it always returns 0.
|
|
223
|
+
|
|
224
|
+
The data *does* exist, in the IORegistry. Every Metal command queue creates an `AGXDeviceUserClient` object as a child of the GPU accelerator. You can see them with:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
ioreg -c AGXDeviceUserClient -r -d 0
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Each entry carries:
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
"IOUserClientCreator" = "pid 4245, python3.12"
|
|
234
|
+
"AppUsage" = ({"API"="Metal", "accumulatedGPUTime"=123632000000})
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
`accumulatedGPUTime` is cumulative GPU nanoseconds — sample twice, divide by elapsed time, and you have utilization %. This is world-readable, no sudo or SIP changes needed.
|
|
238
|
+
|
|
239
|
+
**The catch:** `IOServiceGetMatchingServices("AGXDeviceUserClient")` returns 0 results because user client objects are `!registered` in the IOKit matching system. You have to find the parent accelerator first and walk its children:
|
|
240
|
+
|
|
241
|
+
```c
|
|
242
|
+
// Find the AGX accelerator
|
|
243
|
+
IOServiceGetMatchingServices(kIOMainPortDefault,
|
|
244
|
+
IOServiceMatching("AGXAccelerator"), &iter);
|
|
245
|
+
|
|
246
|
+
// Iterate its children in the IOService plane
|
|
247
|
+
io_service_t accel = IOIteratorNext(iter);
|
|
248
|
+
IORegistryEntryGetChildIterator(accel, kIOServicePlane, &child_iter);
|
|
249
|
+
|
|
250
|
+
// Each child is an AGXDeviceUserClient with AppUsage data
|
|
251
|
+
while ((child = IOIteratorNext(child_iter))) {
|
|
252
|
+
CFStringRef creator = IORegistryEntryCreateCFProperty(child,
|
|
253
|
+
CFSTR("IOUserClientCreator"), ...); // "pid 4245, python3.12"
|
|
254
|
+
CFArrayRef usage = IORegistryEntryCreateCFProperty(child,
|
|
255
|
+
CFSTR("AppUsage"), ...); // [{accumulatedGPUTime: ns}]
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
System-wide GPU utilization comes from the accelerator's `PerformanceStatistics` property (`Device Utilization %`, `Tiler Utilization %`, `Renderer Utilization %`).
|
|
260
|
+
|
|
261
|
+
CPU, memory, energy, disk I/O, and thread stats come from `proc_pid_rusage(RUSAGE_INFO_V6)` and `proc_pidinfo(PROC_PIDTASKINFO)` — both unprivileged for same-user processes.
|
|
262
|
+
|
|
263
|
+
## Requirements
|
|
264
|
+
|
|
265
|
+
- macOS with Apple Silicon (M1/M2/M3/M4/M5)
|
|
266
|
+
- Python 3.9+
|
|
267
|
+
- Zero dependencies
|
|
268
|
+
|
|
269
|
+
## License
|
|
270
|
+
|
|
271
|
+
MIT
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# macos-gpu-proc
|
|
2
|
+
|
|
3
|
+
Per-process GPU utilization, CPU, memory, and energy monitoring for macOS Apple Silicon. **No sudo needed.**
|
|
4
|
+
|
|
5
|
+
Reads GPU client data directly from the IORegistry — the same data source Activity Monitor uses. Auto-discovers every process using the GPU.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install macos-gpu-proc
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from macos_gpu_proc import snapshot
|
|
17
|
+
|
|
18
|
+
# One call — auto-discovers all GPU processes, returns utilization %
|
|
19
|
+
for proc in snapshot():
|
|
20
|
+
print(f"{proc['name']:20s} GPU {proc['gpu_percent']:5.1f}% "
|
|
21
|
+
f"CPU {proc['cpu_percent']:5.1f}% {proc['memory_mb']:.0f}MB "
|
|
22
|
+
f"{proc['energy_w']:.1f}W")
|
|
23
|
+
|
|
24
|
+
# Example output:
|
|
25
|
+
# python3.12 GPU 85.7% CPU 102.3% 2048MB 12.3W
|
|
26
|
+
# WindowServer GPU 3.2% CPU 0.1% 45MB 0.4W
|
|
27
|
+
# Code Helper (GPU GPU 1.9% CPU 0.1% 670MB 0.1W
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Each dict in the list contains: `pid`, `name`, `gpu_percent`, `cpu_percent`, `memory_mb`, `energy_w`, `threads`, and `gpu_ns`.
|
|
31
|
+
|
|
32
|
+
### GPU Power & Frequency
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from macos_gpu_proc import gpu_power
|
|
36
|
+
|
|
37
|
+
power = gpu_power(interval=1.0) # samples over 1 second
|
|
38
|
+
print(f"GPU Power: {power['gpu_power_w']:.2f}W")
|
|
39
|
+
print(f"GPU Freq: {power['gpu_freq_mhz']} MHz (weighted avg)")
|
|
40
|
+
print(f"Throttled: {power['throttled']}")
|
|
41
|
+
for state in power['frequency_states']:
|
|
42
|
+
print(f" {state['state']}: {state['freq_mhz']}MHz ({state['residency_pct']:.1f}%)")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Uses `libIOReport.dylib` (the same data source as `powermetrics`). No sudo needed.
|
|
46
|
+
|
|
47
|
+
### GPU DVFS Frequency Table
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from macos_gpu_proc import gpu_freq_table
|
|
51
|
+
|
|
52
|
+
for i, freq in enumerate(gpu_freq_table()):
|
|
53
|
+
print(f"P{i+1}: {freq} MHz")
|
|
54
|
+
# P1: 338 MHz, P2: 618 MHz, ..., P15: 1578 MHz
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### System-Wide GPU
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from macos_gpu_proc import system_gpu_stats
|
|
61
|
+
|
|
62
|
+
stats = system_gpu_stats()
|
|
63
|
+
print(f"{stats['model']} ({stats['gpu_core_count']} cores)")
|
|
64
|
+
print(f"Device utilization: {stats['device_utilization']}%")
|
|
65
|
+
print(f"GPU VRAM in use: {stats['in_use_system_memory']/1e9:.1f}GB")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### GpuMonitor (continuous monitoring)
|
|
69
|
+
|
|
70
|
+
Monitor your own training process — no PID lookup needed:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from macos_gpu_proc import GpuMonitor
|
|
74
|
+
|
|
75
|
+
mon = GpuMonitor() # monitors the current process
|
|
76
|
+
for batch in dataloader:
|
|
77
|
+
train(batch)
|
|
78
|
+
print(f"GPU: {mon.sample():.1f}%")
|
|
79
|
+
|
|
80
|
+
# Or as a context manager:
|
|
81
|
+
with GpuMonitor() as mon:
|
|
82
|
+
mon.start(interval=2.0) # background sampling
|
|
83
|
+
train()
|
|
84
|
+
print(mon.summary()) # {'gpu_pct_avg': 42.1, 'gpu_pct_max': 87.3, ...}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Low-Level Access
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from macos_gpu_proc import gpu_clients, gpu_time_ns, proc_info
|
|
91
|
+
|
|
92
|
+
# All GPU clients (raw cumulative data)
|
|
93
|
+
for c in gpu_clients():
|
|
94
|
+
print(f"PID {c['pid']} ({c['name']}): {c['gpu_ns']/1e9:.1f}s GPU time")
|
|
95
|
+
|
|
96
|
+
# Per-process stats (CPU, memory, energy, disk I/O, threads)
|
|
97
|
+
info = proc_info(1234)
|
|
98
|
+
print(f"Memory: {info['memory']/1e6:.0f}MB, Energy: {info['energy_nj']/1e9:.1f}J")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## CLI
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
gpu-proc # live per-process GPU monitor — auto-discovers all GPU processes
|
|
105
|
+
gpu-proc --once # single snapshot
|
|
106
|
+
gpu-proc --tui # rich terminal UI with sparkline graphs (pip install macos-gpu-proc[tui])
|
|
107
|
+
gpu-proc --gui # native floating window monitor (pip install macos-gpu-proc[gui])
|
|
108
|
+
gpu-proc -i 1 # 1-second update interval
|
|
109
|
+
gpu-proc --pid 1234 # monitor specific PID
|
|
110
|
+
python -m macos_gpu_proc # alternative entry point (same as gpu-proc)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## API Reference
|
|
114
|
+
|
|
115
|
+
### Python API
|
|
116
|
+
|
|
117
|
+
| Function | Description |
|
|
118
|
+
|----------|-------------|
|
|
119
|
+
| `snapshot(interval=1.0)` | **One call does it all** — returns `[{'pid', 'name', 'gpu_percent', 'cpu_percent', 'memory_mb', 'energy_w', ...}]` |
|
|
120
|
+
| `snapshot(detailed=True)` | Extended fields: IPC, wakeups, peak memory, neural engine, disk I/O |
|
|
121
|
+
|
|
122
|
+
### C Extension Functions
|
|
123
|
+
|
|
124
|
+
| Function | Description |
|
|
125
|
+
|----------|-------------|
|
|
126
|
+
| `gpu_clients()` | Auto-discover all GPU-active processes: `[{'pid', 'name', 'gpu_ns'}, ...]` |
|
|
127
|
+
| `gpu_time_ns(pid)` | Cumulative GPU nanoseconds for a PID |
|
|
128
|
+
| `gpu_time_ns_multi(pids)` | Batch GPU ns for multiple PIDs (single IORegistry scan) |
|
|
129
|
+
| `cpu_time_ns(pid)` | Cumulative CPU nanoseconds (user + system) |
|
|
130
|
+
| `proc_info(pid)` | Full process stats (CPU, memory, energy, disk, threads) |
|
|
131
|
+
| `system_gpu_stats()` | System GPU: utilization %, VRAM, model, core count |
|
|
132
|
+
| `gpu_power(interval)` | GPU power (watts), frequency (MHz), P-state residency, thermal throttling |
|
|
133
|
+
| `gpu_freq_table()` | GPU DVFS frequency table (MHz per P-state) from pmgr |
|
|
134
|
+
| `ppid(pid)` | Parent process ID for a PID (-1 on error) |
|
|
135
|
+
|
|
136
|
+
### proc_info fields
|
|
137
|
+
|
|
138
|
+
| Field | Description |
|
|
139
|
+
|-------|-------------|
|
|
140
|
+
| **CPU** | |
|
|
141
|
+
| `cpu_ns` | Cumulative CPU time (user + system) in nanoseconds |
|
|
142
|
+
| `cpu_user_ns` | User CPU time |
|
|
143
|
+
| `cpu_system_ns` | System/kernel CPU time |
|
|
144
|
+
| `instructions` | Retired instructions (for IPC calculation) |
|
|
145
|
+
| `cycles` | CPU cycles (for IPC calculation) |
|
|
146
|
+
| `runnable_time` | Time process was runnable but not running (ns) |
|
|
147
|
+
| `billed_system_time` | Billed CPU time (ns) |
|
|
148
|
+
| `serviced_system_time` | Serviced CPU time (ns) |
|
|
149
|
+
| **Memory** | |
|
|
150
|
+
| `memory` | Physical memory footprint (bytes) |
|
|
151
|
+
| `real_memory` | Resident memory (bytes) |
|
|
152
|
+
| `wired_size` | Wired (non-pageable) memory (bytes) |
|
|
153
|
+
| `peak_memory` | Lifetime peak physical footprint (bytes) |
|
|
154
|
+
| `neural_footprint` | Neural Engine memory (bytes) |
|
|
155
|
+
| `pageins` | Page-in count (memory pressure indicator) |
|
|
156
|
+
| **Disk** | |
|
|
157
|
+
| `disk_read_bytes` | Cumulative disk reads |
|
|
158
|
+
| `disk_write_bytes` | Cumulative disk writes |
|
|
159
|
+
| `logical_writes` | Logical writes including CoW (bytes) |
|
|
160
|
+
| **Energy** | |
|
|
161
|
+
| `energy_nj` | Cumulative energy (nanojoules) — delta over time = watts |
|
|
162
|
+
| `idle_wakeups` | Package idle wakeups (energy efficiency metric) |
|
|
163
|
+
| `interrupt_wakeups` | Interrupt wakeups |
|
|
164
|
+
| **Other** | |
|
|
165
|
+
| `threads` | Current thread count |
|
|
166
|
+
|
|
167
|
+
### system_gpu_stats fields
|
|
168
|
+
|
|
169
|
+
| Field | Description |
|
|
170
|
+
|-------|-------------|
|
|
171
|
+
| `model` | GPU model name (e.g., "Apple M4 Max") |
|
|
172
|
+
| `gpu_core_count` | Number of GPU cores |
|
|
173
|
+
| `device_utilization` | Device utilization % (0-100) |
|
|
174
|
+
| `tiler_utilization` | Tiler utilization % |
|
|
175
|
+
| `renderer_utilization` | Renderer utilization % |
|
|
176
|
+
| `alloc_system_memory` | Total GPU-allocated system memory |
|
|
177
|
+
| `in_use_system_memory` | Currently used GPU memory |
|
|
178
|
+
| `in_use_system_memory_driver` | Driver-side in-use memory |
|
|
179
|
+
| `allocated_pb_size` | Parameter buffer allocation (bytes) |
|
|
180
|
+
| `recovery_count` | GPU recovery (crash) count |
|
|
181
|
+
| `last_recovery_time` | Timestamp of last GPU recovery |
|
|
182
|
+
| `split_scene_count` | Tiler split scene events |
|
|
183
|
+
| `tiled_scene_bytes` | Current tiled scene buffer size |
|
|
184
|
+
|
|
185
|
+
## How It Works
|
|
186
|
+
|
|
187
|
+
Apple doesn't provide a public API for per-process GPU metrics on Apple Silicon. The commonly referenced `task_info(TASK_POWER_INFO_V2)` has a `task_gpu_utilisation` field, but Apple never populates it — it always returns 0.
|
|
188
|
+
|
|
189
|
+
The data *does* exist, in the IORegistry. Every Metal command queue creates an `AGXDeviceUserClient` object as a child of the GPU accelerator. You can see them with:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
ioreg -c AGXDeviceUserClient -r -d 0
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Each entry carries:
|
|
196
|
+
|
|
197
|
+
```
|
|
198
|
+
"IOUserClientCreator" = "pid 4245, python3.12"
|
|
199
|
+
"AppUsage" = ({"API"="Metal", "accumulatedGPUTime"=123632000000})
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
`accumulatedGPUTime` is cumulative GPU nanoseconds — sample twice, divide by elapsed time, and you have utilization %. This is world-readable, no sudo or SIP changes needed.
|
|
203
|
+
|
|
204
|
+
**The catch:** `IOServiceGetMatchingServices("AGXDeviceUserClient")` returns 0 results because user client objects are `!registered` in the IOKit matching system. You have to find the parent accelerator first and walk its children:
|
|
205
|
+
|
|
206
|
+
```c
|
|
207
|
+
// Find the AGX accelerator
|
|
208
|
+
IOServiceGetMatchingServices(kIOMainPortDefault,
|
|
209
|
+
IOServiceMatching("AGXAccelerator"), &iter);
|
|
210
|
+
|
|
211
|
+
// Iterate its children in the IOService plane
|
|
212
|
+
io_service_t accel = IOIteratorNext(iter);
|
|
213
|
+
IORegistryEntryGetChildIterator(accel, kIOServicePlane, &child_iter);
|
|
214
|
+
|
|
215
|
+
// Each child is an AGXDeviceUserClient with AppUsage data
|
|
216
|
+
while ((child = IOIteratorNext(child_iter))) {
|
|
217
|
+
CFStringRef creator = IORegistryEntryCreateCFProperty(child,
|
|
218
|
+
CFSTR("IOUserClientCreator"), ...); // "pid 4245, python3.12"
|
|
219
|
+
CFArrayRef usage = IORegistryEntryCreateCFProperty(child,
|
|
220
|
+
CFSTR("AppUsage"), ...); // [{accumulatedGPUTime: ns}]
|
|
221
|
+
}
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
System-wide GPU utilization comes from the accelerator's `PerformanceStatistics` property (`Device Utilization %`, `Tiler Utilization %`, `Renderer Utilization %`).
|
|
225
|
+
|
|
226
|
+
CPU, memory, energy, disk I/O, and thread stats come from `proc_pid_rusage(RUSAGE_INFO_V6)` and `proc_pidinfo(PROC_PIDTASKINFO)` — both unprivileged for same-user processes.
|
|
227
|
+
|
|
228
|
+
## Requirements
|
|
229
|
+
|
|
230
|
+
- macOS with Apple Silicon (M1/M2/M3/M4/M5)
|
|
231
|
+
- Python 3.9+
|
|
232
|
+
- Zero dependencies
|
|
233
|
+
|
|
234
|
+
## License
|
|
235
|
+
|
|
236
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "darwin-perf"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "System performance monitoring for macOS Apple Silicon — GPU, CPU, memory, energy, disk I/O via Mach APIs. No sudo needed."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{name = "Adam Mikulis"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: MacOS",
|
|
19
|
+
"Programming Language :: C",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: System :: Monitoring",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
|
+
]
|
|
29
|
+
keywords = ["macos", "darwin", "gpu", "cpu", "monitoring", "performance", "apple-silicon", "metal", "mlx", "pytorch", "mps", "m1", "m2", "m3", "m4", "m5"]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
tui = ["textual>=0.40"]
|
|
33
|
+
gui = ["pywebview>=5.0"]
|
|
34
|
+
all = ["textual>=0.40", "pywebview>=5.0"]
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
darwin-perf = "darwin_perf.cli:main"
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/adammikulis/darwin-perf"
|
|
41
|
+
Issues = "https://github.com/adammikulis/darwin-perf/issues"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
where = ["src"]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Build script for the native C extension."""
|
|
2
|
+
|
|
3
|
+
from setuptools import Extension, setup
|
|
4
|
+
|
|
5
|
+
setup(
|
|
6
|
+
ext_modules=[
|
|
7
|
+
Extension(
|
|
8
|
+
"darwin_perf._native",
|
|
9
|
+
sources=["src/darwin_perf/_native.c"],
|
|
10
|
+
extra_link_args=["-framework", "IOKit", "-framework", "CoreFoundation"],
|
|
11
|
+
language="c",
|
|
12
|
+
),
|
|
13
|
+
],
|
|
14
|
+
)
|