lindoze 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lindoze/__init__.py +0 -0
- lindoze/__main__.py +40 -0
- lindoze/gpu_backends.py +512 -0
- lindoze/graphs.py +152 -0
- lindoze/pages/__init__.py +0 -0
- lindoze/pages/cpu.py +167 -0
- lindoze/pages/gpu.py +92 -0
- lindoze/pages/io.py +101 -0
- lindoze/pages/memory.py +63 -0
- lindoze/pages/performance.py +213 -0
- lindoze/pages/processes.py +553 -0
- lindoze/pages/startup.py +373 -0
- lindoze/process_sampler.py +116 -0
- lindoze/sampler.py +154 -0
- lindoze/window.py +123 -0
- lindoze-0.2.2.dist-info/METADATA +133 -0
- lindoze-0.2.2.dist-info/RECORD +21 -0
- lindoze-0.2.2.dist-info/WHEEL +5 -0
- lindoze-0.2.2.dist-info/entry_points.txt +2 -0
- lindoze-0.2.2.dist-info/licenses/LICENSE +674 -0
- lindoze-0.2.2.dist-info/top_level.txt +1 -0
lindoze/__init__.py
ADDED
|
File without changes
|
lindoze/__main__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main() -> int:
|
|
8
|
+
parser = argparse.ArgumentParser(prog="lindoze")
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
"--dump-gpu", action="store_true",
|
|
11
|
+
help="Dump raw GPU detection state (sysfs paths, PMU fds/values) to "
|
|
12
|
+
"stderr and exit. Paste the output when filing an Intel GPU bug.",
|
|
13
|
+
)
|
|
14
|
+
args = parser.parse_args()
|
|
15
|
+
|
|
16
|
+
if args.dump_gpu:
|
|
17
|
+
from .gpu_backends import dump_gpus
|
|
18
|
+
dump_gpus()
|
|
19
|
+
return 0
|
|
20
|
+
|
|
21
|
+
import setproctitle
|
|
22
|
+
from PySide6.QtWidgets import QApplication
|
|
23
|
+
from .window import MainWindow
|
|
24
|
+
|
|
25
|
+
# Override the python interpreter's argv[0] / /proc/<pid>/comm so the
|
|
26
|
+
# app shows up as "lindoze" in ps/htop/our own Processes tab instead of
|
|
27
|
+
# the misleading "python". Comm name truncates to 15 chars in kernel.
|
|
28
|
+
setproctitle.setproctitle("lindoze")
|
|
29
|
+
|
|
30
|
+
app = QApplication(sys.argv)
|
|
31
|
+
app.setOrganizationName("Lindoze")
|
|
32
|
+
app.setOrganizationDomain("lindoze.local")
|
|
33
|
+
app.setApplicationName("Lindoze Process Manager")
|
|
34
|
+
win = MainWindow()
|
|
35
|
+
win.show()
|
|
36
|
+
return app.exec()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
if __name__ == "__main__":
|
|
40
|
+
sys.exit(main())
|
lindoze/gpu_backends.py
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
"""GPU backend abstraction. Detects all GPUs on the system and provides a
|
|
2
|
+
uniform sample() interface per device.
|
|
3
|
+
|
|
4
|
+
Supported vendors:
|
|
5
|
+
- NVIDIA via NVML (pynvml)
|
|
6
|
+
- AMD via sysfs (/sys/class/drm/cardN/device/...)
|
|
7
|
+
- Intel (experimental, i915 driver) via sysfs/hwmon for static stats and the
|
|
8
|
+
i915 perf PMU for live engine utilization. Xe-driver GPUs degrade to
|
|
9
|
+
temp/freq only until a v0.3 tester reports back.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import ctypes
|
|
14
|
+
import ctypes.util
|
|
15
|
+
import os
|
|
16
|
+
import subprocess
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_SAMPLE_KEYS = (
|
|
22
|
+
"name", "util", "mem_used", "mem_total", "temp", "power",
|
|
23
|
+
"clk_core", "clk_mem", "enc", "dec",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _read_int(path: Path) -> Optional[int]:
|
|
28
|
+
try:
|
|
29
|
+
return int(path.read_text().strip())
|
|
30
|
+
except (OSError, ValueError):
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _read_text(path: Path) -> Optional[str]:
|
|
35
|
+
try:
|
|
36
|
+
return path.read_text().strip()
|
|
37
|
+
except OSError:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---- NVIDIA backend
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
import pynvml
|
|
45
|
+
pynvml.nvmlInit()
|
|
46
|
+
_NVML_OK = True
|
|
47
|
+
except Exception:
|
|
48
|
+
_NVML_OK = False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class NVIDIABackend:
|
|
52
|
+
vendor = "nvidia"
|
|
53
|
+
|
|
54
|
+
def __init__(self, index: int) -> None:
|
|
55
|
+
self.index = index
|
|
56
|
+
self._handle = pynvml.nvmlDeviceGetHandleByIndex(index)
|
|
57
|
+
name = pynvml.nvmlDeviceGetName(self._handle)
|
|
58
|
+
self.name = name.decode() if isinstance(name, bytes) else name
|
|
59
|
+
|
|
60
|
+
def sample(self) -> dict:
|
|
61
|
+
h = self._handle
|
|
62
|
+
try:
|
|
63
|
+
util = pynvml.nvmlDeviceGetUtilizationRates(h).gpu
|
|
64
|
+
except pynvml.NVMLError:
|
|
65
|
+
util = 0
|
|
66
|
+
try:
|
|
67
|
+
mem = pynvml.nvmlDeviceGetMemoryInfo(h)
|
|
68
|
+
mem_used, mem_total = mem.used, mem.total
|
|
69
|
+
except pynvml.NVMLError:
|
|
70
|
+
mem_used = mem_total = 0
|
|
71
|
+
try:
|
|
72
|
+
temp = pynvml.nvmlDeviceGetTemperature(h, pynvml.NVML_TEMPERATURE_GPU)
|
|
73
|
+
except pynvml.NVMLError:
|
|
74
|
+
temp = None
|
|
75
|
+
try:
|
|
76
|
+
power = pynvml.nvmlDeviceGetPowerUsage(h) / 1000.0
|
|
77
|
+
except pynvml.NVMLError:
|
|
78
|
+
power = None
|
|
79
|
+
try:
|
|
80
|
+
clk_core = pynvml.nvmlDeviceGetClockInfo(h, pynvml.NVML_CLOCK_GRAPHICS)
|
|
81
|
+
clk_mem = pynvml.nvmlDeviceGetClockInfo(h, pynvml.NVML_CLOCK_MEM)
|
|
82
|
+
except pynvml.NVMLError:
|
|
83
|
+
clk_core = clk_mem = None
|
|
84
|
+
try:
|
|
85
|
+
enc = pynvml.nvmlDeviceGetEncoderUtilization(h)[0]
|
|
86
|
+
dec = pynvml.nvmlDeviceGetDecoderUtilization(h)[0]
|
|
87
|
+
except pynvml.NVMLError:
|
|
88
|
+
enc = dec = None
|
|
89
|
+
return dict(
|
|
90
|
+
name=self.name, util=util, mem_used=mem_used, mem_total=mem_total,
|
|
91
|
+
temp=temp, power=power, clk_core=clk_core, clk_mem=clk_mem,
|
|
92
|
+
enc=enc, dec=dec,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---- AMD backend (sysfs)
|
|
97
|
+
|
|
98
|
+
def _lspci_name(pci_vendor: str, pci_device: str) -> str:
|
|
99
|
+
"""Friendly name from lspci output, e.g. 'Raphael' for 1002:164e."""
|
|
100
|
+
try:
|
|
101
|
+
out = subprocess.check_output(
|
|
102
|
+
["lspci", "-d", f"{pci_vendor.removeprefix('0x')}:{pci_device.removeprefix('0x')}"],
|
|
103
|
+
text=True, timeout=2,
|
|
104
|
+
)
|
|
105
|
+
# "04:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] Raphael (rev d8)"
|
|
106
|
+
if ":" in out:
|
|
107
|
+
after_class = out.split(":", 2)[-1].strip()
|
|
108
|
+
# Drop "(rev ..)" tail
|
|
109
|
+
if "(" in after_class:
|
|
110
|
+
after_class = after_class[: after_class.rindex("(")].strip()
|
|
111
|
+
# Strip the leading "Advanced Micro Devices, Inc. [AMD/ATI] " noise
|
|
112
|
+
for prefix in ("Advanced Micro Devices, Inc. [AMD/ATI] ",
|
|
113
|
+
"Advanced Micro Devices, Inc. ",
|
|
114
|
+
"[AMD/ATI] "):
|
|
115
|
+
if after_class.startswith(prefix):
|
|
116
|
+
after_class = after_class[len(prefix):]
|
|
117
|
+
break
|
|
118
|
+
return f"AMD {after_class}"
|
|
119
|
+
except (subprocess.SubprocessError, OSError, FileNotFoundError):
|
|
120
|
+
pass
|
|
121
|
+
return "AMD GPU"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class AMDBackend:
|
|
125
|
+
vendor = "amd"
|
|
126
|
+
|
|
127
|
+
def __init__(self, card_path: Path) -> None:
|
|
128
|
+
self.card_path = card_path
|
|
129
|
+
self.device_path = card_path / "device"
|
|
130
|
+
vendor = _read_text(self.device_path / "vendor") or ""
|
|
131
|
+
device = _read_text(self.device_path / "device") or ""
|
|
132
|
+
self.name = _lspci_name(vendor, device)
|
|
133
|
+
hwmons = list((self.device_path / "hwmon").glob("hwmon*"))
|
|
134
|
+
self.hwmon = hwmons[0] if hwmons else None
|
|
135
|
+
|
|
136
|
+
def sample(self) -> dict:
|
|
137
|
+
util = _read_int(self.device_path / "gpu_busy_percent") or 0
|
|
138
|
+
mem_total = _read_int(self.device_path / "mem_info_vram_total") or 0
|
|
139
|
+
mem_used = _read_int(self.device_path / "mem_info_vram_used") or 0
|
|
140
|
+
# AMD's vcn_busy_percent covers both encode and decode together —
|
|
141
|
+
# report it under both fields since the GPU page treats them separately.
|
|
142
|
+
vcn = _read_int(self.device_path / "vcn_busy_percent")
|
|
143
|
+
|
|
144
|
+
temp = power = clk_core = None
|
|
145
|
+
if self.hwmon is not None:
|
|
146
|
+
t = _read_int(self.hwmon / "temp1_input")
|
|
147
|
+
if t is not None:
|
|
148
|
+
temp = t // 1000 # milli-°C to °C
|
|
149
|
+
# Some amdgpu builds expose power1_average, others only power1_input.
|
|
150
|
+
for power_field in ("power1_average", "power1_input"):
|
|
151
|
+
p = _read_int(self.hwmon / power_field)
|
|
152
|
+
if p is not None:
|
|
153
|
+
power = p / 1_000_000 # microwatts to watts
|
|
154
|
+
break
|
|
155
|
+
f = _read_int(self.hwmon / "freq1_input")
|
|
156
|
+
if f is not None:
|
|
157
|
+
clk_core = f // 1_000_000 # Hz to MHz
|
|
158
|
+
|
|
159
|
+
# AMD APUs share a power budget across the whole socket — the hwmon
|
|
160
|
+
# power reading covers CPU+iGPU+uncore, not just the iGPU. Modern
|
|
161
|
+
# discrete AMD cards all ship with >=8GB VRAM, so <4GB total reliably
|
|
162
|
+
# identifies an integrated GPU for our labeling purposes.
|
|
163
|
+
is_integrated = mem_total > 0 and mem_total < 4 * 1024**3
|
|
164
|
+
return dict(
|
|
165
|
+
name=self.name, util=util, mem_used=mem_used, mem_total=mem_total,
|
|
166
|
+
temp=temp, power=power, clk_core=clk_core, clk_mem=None,
|
|
167
|
+
enc=vcn, dec=vcn, is_integrated=is_integrated,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ---- Intel backend (sysfs + hwmon + i915 perf PMU)
|
|
172
|
+
#
|
|
173
|
+
# i915 doesn't expose AMD-style aggregate gpu_busy_percent. For live util we
|
|
174
|
+
# open the i915 perf PMU (one fd per engine) via perf_event_open(). The PMU
|
|
175
|
+
# returns a monotonically-increasing busy-ns counter per engine; sampling the
|
|
176
|
+
# delta against wall time gives a per-engine busy %. We average across engines
|
|
177
|
+
# for an overall figure, matching how intel_gpu_top labels "overall".
|
|
178
|
+
#
|
|
179
|
+
# Static stats (temp/power/freq) come from sysfs+hwmon and work even if the
|
|
180
|
+
# PMU path fails (sandboxed env, perf_event_paranoid sysctl, etc.). In that
|
|
181
|
+
# case util reports None and the page draws "—".
|
|
182
|
+
|
|
183
|
+
# x86_64 syscall number for perf_event_open. ARM64 is 241; we'd add a lookup
|
|
184
|
+
# table when someone runs Lindoze on aarch64 Intel hw (basically never).
|
|
185
|
+
_SYS_PERF_EVENT_OPEN = 298
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class _PerfEventAttr(ctypes.Structure):
|
|
189
|
+
# Subset of struct perf_event_attr — we only need to set type/size/config.
|
|
190
|
+
# The trailing pad reaches PERF_ATTR_SIZE_VER7 (128 bytes) so the kernel
|
|
191
|
+
# accepts the size field across reasonably current kernels.
|
|
192
|
+
_fields_ = [
|
|
193
|
+
("type", ctypes.c_uint32),
|
|
194
|
+
("size", ctypes.c_uint32),
|
|
195
|
+
("config", ctypes.c_uint64),
|
|
196
|
+
("sample_period_or_freq", ctypes.c_uint64),
|
|
197
|
+
("sample_type", ctypes.c_uint64),
|
|
198
|
+
("read_format", ctypes.c_uint64),
|
|
199
|
+
("flags", ctypes.c_uint64),
|
|
200
|
+
("_pad", ctypes.c_uint8 * 96),
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _open_perf_event(pmu_type: int, config: int, cpu: int) -> Optional[int]:
|
|
205
|
+
"""perf_event_open for a single i915 PMU event. Returns fd or None."""
|
|
206
|
+
libc = ctypes.CDLL(ctypes.util.find_library("c") or "libc.so.6", use_errno=True)
|
|
207
|
+
attr = _PerfEventAttr()
|
|
208
|
+
attr.type = pmu_type
|
|
209
|
+
attr.size = ctypes.sizeof(_PerfEventAttr)
|
|
210
|
+
attr.config = config
|
|
211
|
+
# pid=-1, cpu=N → system-wide counter on that CPU. Group fd=-1, flags=0.
|
|
212
|
+
fd = libc.syscall(
|
|
213
|
+
ctypes.c_long(_SYS_PERF_EVENT_OPEN),
|
|
214
|
+
ctypes.byref(attr), ctypes.c_int(-1), ctypes.c_int(cpu),
|
|
215
|
+
ctypes.c_int(-1), ctypes.c_ulong(0),
|
|
216
|
+
)
|
|
217
|
+
return fd if fd >= 0 else None
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _read_pmu_counter(fd: int) -> Optional[int]:
|
|
221
|
+
try:
|
|
222
|
+
data = os.read(fd, 8)
|
|
223
|
+
if len(data) != 8:
|
|
224
|
+
return None
|
|
225
|
+
return int.from_bytes(data, "little", signed=False)
|
|
226
|
+
except OSError:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _lspci_name_intel(pci_vendor: str, pci_device: str) -> str:
|
|
231
|
+
# Same shape as _lspci_name but stripping Intel marketing prefixes.
|
|
232
|
+
try:
|
|
233
|
+
out = subprocess.check_output(
|
|
234
|
+
["lspci", "-d", f"{pci_vendor.removeprefix('0x')}:{pci_device.removeprefix('0x')}"],
|
|
235
|
+
text=True, timeout=2,
|
|
236
|
+
)
|
|
237
|
+
if ":" in out:
|
|
238
|
+
after_class = out.split(":", 2)[-1].strip()
|
|
239
|
+
if "(" in after_class:
|
|
240
|
+
after_class = after_class[: after_class.rindex("(")].strip()
|
|
241
|
+
for prefix in ("Intel Corporation ", "Intel Corp. ", "Intel "):
|
|
242
|
+
if after_class.startswith(prefix):
|
|
243
|
+
after_class = after_class[len(prefix):]
|
|
244
|
+
break
|
|
245
|
+
return f"Intel {after_class}"
|
|
246
|
+
except (subprocess.SubprocessError, OSError, FileNotFoundError):
|
|
247
|
+
pass
|
|
248
|
+
return "Intel GPU"
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class IntelBackend:
|
|
252
|
+
vendor = "intel"
|
|
253
|
+
|
|
254
|
+
def __init__(self, card_path: Path) -> None:
|
|
255
|
+
self.card_path = card_path
|
|
256
|
+
self.device_path = card_path / "device"
|
|
257
|
+
vendor = _read_text(self.device_path / "vendor") or ""
|
|
258
|
+
device = _read_text(self.device_path / "device") or ""
|
|
259
|
+
self.name = _lspci_name_intel(vendor, device)
|
|
260
|
+
|
|
261
|
+
# hwmon: i915 driver registers a hwmon device under device/hwmon/.
|
|
262
|
+
hwmons = list((self.device_path / "hwmon").glob("hwmon*")) \
|
|
263
|
+
if (self.device_path / "hwmon").exists() else []
|
|
264
|
+
self.hwmon = hwmons[0] if hwmons else None
|
|
265
|
+
|
|
266
|
+
# i915 frequency sysfs nodes (mhz). Modern kernels expose these under
|
|
267
|
+
# card_path/ directly; older Gen7 (Ivy Bridge) instead places them at
|
|
268
|
+
# device_path/gt_*_freq_mhz. Xe uses different paths again; if none of
|
|
269
|
+
# the candidates exist we report clk_core=None.
|
|
270
|
+
self._freq_cur = self._pick_existing(
|
|
271
|
+
self.card_path / "gt_cur_freq_mhz",
|
|
272
|
+
self.device_path / "gt_cur_freq_mhz",
|
|
273
|
+
)
|
|
274
|
+
self._freq_max = self._pick_existing(
|
|
275
|
+
self.card_path / "gt_max_freq_mhz",
|
|
276
|
+
self.device_path / "gt_max_freq_mhz",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# PMU setup — best-effort. Any failure → no util reading.
|
|
280
|
+
self._pmu_fds: list[int] = []
|
|
281
|
+
self._pmu_prev_ns: list[int] = []
|
|
282
|
+
self._pmu_prev_wall: Optional[int] = None
|
|
283
|
+
self._setup_pmu()
|
|
284
|
+
|
|
285
|
+
def _setup_pmu(self) -> None:
|
|
286
|
+
pmu_dir = Path("/sys/bus/event_source/devices/i915")
|
|
287
|
+
if not pmu_dir.exists():
|
|
288
|
+
return
|
|
289
|
+
pmu_type = _read_int(pmu_dir / "type")
|
|
290
|
+
if pmu_type is None:
|
|
291
|
+
return
|
|
292
|
+
# Pick a CPU from the PMU's cpumask (i915 PMU is bound to one CPU).
|
|
293
|
+
cpumask_text = _read_text(pmu_dir / "cpumask") or "0"
|
|
294
|
+
try:
|
|
295
|
+
cpu = int(cpumask_text.split("-")[0].split(",")[0])
|
|
296
|
+
except ValueError:
|
|
297
|
+
cpu = 0
|
|
298
|
+
# Each events/*-busy file holds e.g. "event=0x0" or "event=0x1000".
|
|
299
|
+
events_dir = pmu_dir / "events"
|
|
300
|
+
if not events_dir.exists():
|
|
301
|
+
return
|
|
302
|
+
for ev_file in sorted(events_dir.glob("*-busy")):
|
|
303
|
+
config = self._parse_event_config(ev_file)
|
|
304
|
+
if config is None:
|
|
305
|
+
continue
|
|
306
|
+
fd = _open_perf_event(pmu_type, config, cpu)
|
|
307
|
+
if fd is None:
|
|
308
|
+
continue
|
|
309
|
+
self._pmu_fds.append(fd)
|
|
310
|
+
self._pmu_prev_ns.append(0)
|
|
311
|
+
# Prime initial reads so the first sample() reports a real delta.
|
|
312
|
+
if self._pmu_fds:
|
|
313
|
+
for i, fd in enumerate(self._pmu_fds):
|
|
314
|
+
v = _read_pmu_counter(fd)
|
|
315
|
+
if v is not None:
|
|
316
|
+
self._pmu_prev_ns[i] = v
|
|
317
|
+
self._pmu_prev_wall = self._monotonic_ns()
|
|
318
|
+
|
|
319
|
+
@staticmethod
|
|
320
|
+
def _pick_existing(*candidates: Path) -> Optional[Path]:
|
|
321
|
+
for p in candidates:
|
|
322
|
+
if p.exists():
|
|
323
|
+
return p
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def _parse_event_config(ev_file: Path) -> Optional[int]:
|
|
328
|
+
text = _read_text(ev_file)
|
|
329
|
+
if not text:
|
|
330
|
+
return None
|
|
331
|
+
# Format is "event=0xN" or "event=0xN,umask=..." — we only need event=.
|
|
332
|
+
for part in text.split(","):
|
|
333
|
+
kv = part.strip().split("=", 1)
|
|
334
|
+
if len(kv) == 2 and kv[0] == "event":
|
|
335
|
+
try:
|
|
336
|
+
return int(kv[1], 0)
|
|
337
|
+
except ValueError:
|
|
338
|
+
return None
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def _monotonic_ns() -> int:
|
|
343
|
+
# time.monotonic_ns wall-clock proxy; matches PMU's CLOCK_MONOTONIC.
|
|
344
|
+
import time
|
|
345
|
+
return time.monotonic_ns()
|
|
346
|
+
|
|
347
|
+
def _sample_util(self) -> Optional[int]:
|
|
348
|
+
if not self._pmu_fds or self._pmu_prev_wall is None:
|
|
349
|
+
return None
|
|
350
|
+
now_wall = self._monotonic_ns()
|
|
351
|
+
elapsed = now_wall - self._pmu_prev_wall
|
|
352
|
+
if elapsed <= 0:
|
|
353
|
+
return None
|
|
354
|
+
busy_sum = 0
|
|
355
|
+
n = 0
|
|
356
|
+
for i, fd in enumerate(self._pmu_fds):
|
|
357
|
+
v = _read_pmu_counter(fd)
|
|
358
|
+
if v is None:
|
|
359
|
+
continue
|
|
360
|
+
delta = v - self._pmu_prev_ns[i]
|
|
361
|
+
self._pmu_prev_ns[i] = v
|
|
362
|
+
if delta < 0:
|
|
363
|
+
delta = 0 # counter rolled or reset
|
|
364
|
+
busy_sum += delta
|
|
365
|
+
n += 1
|
|
366
|
+
self._pmu_prev_wall = now_wall
|
|
367
|
+
if n == 0:
|
|
368
|
+
return None
|
|
369
|
+
# Average engine busy % across the engines we managed to open.
|
|
370
|
+
avg = (busy_sum * 100) // (elapsed * n)
|
|
371
|
+
return max(0, min(100, int(avg)))
|
|
372
|
+
|
|
373
|
+
def sample(self) -> dict:
|
|
374
|
+
util = self._sample_util()
|
|
375
|
+
|
|
376
|
+
temp = power = None
|
|
377
|
+
if self.hwmon is not None:
|
|
378
|
+
t = _read_int(self.hwmon / "temp1_input")
|
|
379
|
+
if t is not None:
|
|
380
|
+
temp = t // 1000
|
|
381
|
+
# i915 hwmon publishes energy1_input (microjoules cumulative), not
|
|
382
|
+
# instantaneous power. We'd need to delta it; for v0.2 we just
|
|
383
|
+
# check for a direct power1_average if it exists (some platforms).
|
|
384
|
+
p = _read_int(self.hwmon / "power1_average")
|
|
385
|
+
if p is not None:
|
|
386
|
+
power = p / 1_000_000
|
|
387
|
+
|
|
388
|
+
clk_core = _read_int(self._freq_cur) if self._freq_cur else None
|
|
389
|
+
|
|
390
|
+
# Intel iGPUs share system RAM; no dedicated VRAM sysfs surface
|
|
391
|
+
# without debugfs/root. Mark as integrated so the page labels power
|
|
392
|
+
# consistently with AMD APUs.
|
|
393
|
+
return dict(
|
|
394
|
+
name=self.name, util=util, mem_used=0, mem_total=0,
|
|
395
|
+
temp=temp, power=power, clk_core=clk_core, clk_mem=None,
|
|
396
|
+
enc=None, dec=None, is_integrated=True,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
# ---- Detection
|
|
401
|
+
|
|
402
|
+
def _detect_intel() -> list[IntelBackend]:
|
|
403
|
+
backends: list[IntelBackend] = []
|
|
404
|
+
drm = Path("/sys/class/drm")
|
|
405
|
+
if not drm.exists():
|
|
406
|
+
return backends
|
|
407
|
+
for card in sorted(drm.glob("card*")):
|
|
408
|
+
if "-" in card.name:
|
|
409
|
+
continue
|
|
410
|
+
if _read_text(card / "device" / "vendor") != "0x8086":
|
|
411
|
+
continue
|
|
412
|
+
driver_link = card / "device" / "driver"
|
|
413
|
+
driver = driver_link.resolve().name if driver_link.exists() else ""
|
|
414
|
+
if driver not in ("i915", "xe"):
|
|
415
|
+
continue
|
|
416
|
+
backends.append(IntelBackend(card))
|
|
417
|
+
return backends
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _detect_amd() -> list[AMDBackend]:
|
|
421
|
+
backends: list[AMDBackend] = []
|
|
422
|
+
drm = Path("/sys/class/drm")
|
|
423
|
+
if not drm.exists():
|
|
424
|
+
return backends
|
|
425
|
+
for card in sorted(drm.glob("card*")):
|
|
426
|
+
if "-" in card.name: # skip card0-DP-1, etc. (connectors)
|
|
427
|
+
continue
|
|
428
|
+
vendor = _read_text(card / "device" / "vendor")
|
|
429
|
+
if vendor == "0x1002":
|
|
430
|
+
# Make sure it has the bare minimum sysfs surface we need.
|
|
431
|
+
if (card / "device" / "gpu_busy_percent").exists():
|
|
432
|
+
backends.append(AMDBackend(card))
|
|
433
|
+
return backends
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _detect_nvidia() -> list[NVIDIABackend]:
|
|
437
|
+
if not _NVML_OK:
|
|
438
|
+
return []
|
|
439
|
+
try:
|
|
440
|
+
n = pynvml.nvmlDeviceGetCount()
|
|
441
|
+
except Exception:
|
|
442
|
+
return []
|
|
443
|
+
return [NVIDIABackend(i) for i in range(n)]
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def detect_gpus() -> list:
|
|
447
|
+
"""All detected GPUs across vendors. Order is stable across runs (NVIDIA
|
|
448
|
+
by NVML index, AMD by sysfs cardN order)."""
|
|
449
|
+
return _detect_nvidia() + _detect_amd() + _detect_intel()
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
# ---- Diagnostics
|
|
453
|
+
|
|
454
|
+
def dump_gpus(stream=None) -> None:
|
|
455
|
+
"""Verbose GPU-detection dump for bug reports. Writes raw sysfs/PMU state
|
|
456
|
+
so a user can paste a single block of output and have everything we need
|
|
457
|
+
to debug an Intel/AMD/NVIDIA detection or sampling failure."""
|
|
458
|
+
import sys
|
|
459
|
+
import time
|
|
460
|
+
if stream is None:
|
|
461
|
+
stream = sys.stderr
|
|
462
|
+
|
|
463
|
+
def p(msg: str = "") -> None:
|
|
464
|
+
print(msg, file=stream)
|
|
465
|
+
|
|
466
|
+
p("=== Lindoze GPU diagnostic dump ===")
|
|
467
|
+
drm = Path("/sys/class/drm")
|
|
468
|
+
p(f"/sys/class/drm exists: {drm.exists()}")
|
|
469
|
+
if drm.exists():
|
|
470
|
+
for card in sorted(drm.glob("card*")):
|
|
471
|
+
if "-" in card.name:
|
|
472
|
+
continue
|
|
473
|
+
vendor = _read_text(card / "device" / "vendor")
|
|
474
|
+
device = _read_text(card / "device" / "device")
|
|
475
|
+
driver_link = card / "device" / "driver"
|
|
476
|
+
driver = driver_link.resolve().name if driver_link.exists() else "(none)"
|
|
477
|
+
p(f" {card.name}: vendor={vendor} device={device} driver={driver}")
|
|
478
|
+
|
|
479
|
+
pmu_dir = Path("/sys/bus/event_source/devices/i915")
|
|
480
|
+
p(f"\ni915 PMU dir exists: {pmu_dir.exists()}")
|
|
481
|
+
if pmu_dir.exists():
|
|
482
|
+
p(f" type = {_read_text(pmu_dir / 'type')}")
|
|
483
|
+
p(f" cpumask = {_read_text(pmu_dir / 'cpumask')}")
|
|
484
|
+
events_dir = pmu_dir / "events"
|
|
485
|
+
if events_dir.exists():
|
|
486
|
+
for ev in sorted(events_dir.glob("*")):
|
|
487
|
+
p(f" events/{ev.name} = {_read_text(ev)}")
|
|
488
|
+
|
|
489
|
+
paranoid = _read_text(Path("/proc/sys/kernel/perf_event_paranoid"))
|
|
490
|
+
p(f"\nperf_event_paranoid = {paranoid}")
|
|
491
|
+
|
|
492
|
+
backends = detect_gpus()
|
|
493
|
+
p(f"\nDetected backends: {len(backends)}")
|
|
494
|
+
for b in backends:
|
|
495
|
+
p(f"\n--- {b.vendor}: {b.name}")
|
|
496
|
+
if isinstance(b, IntelBackend):
|
|
497
|
+
p(f" card_path = {b.card_path}")
|
|
498
|
+
p(f" device_path = {b.device_path}")
|
|
499
|
+
p(f" freq_cur = {b._freq_cur} -> {_read_int(b._freq_cur) if b._freq_cur else None}")
|
|
500
|
+
p(f" freq_max = {b._freq_max} -> {_read_int(b._freq_max) if b._freq_max else None}")
|
|
501
|
+
p(f" hwmon = {b.hwmon}")
|
|
502
|
+
p(f" pmu_fds = {b._pmu_fds}")
|
|
503
|
+
p(f" pmu_prev_ns = {b._pmu_prev_ns}")
|
|
504
|
+
# Take two samples ~250ms apart so the user sees a real util read.
|
|
505
|
+
s1 = b.sample()
|
|
506
|
+
time.sleep(0.25)
|
|
507
|
+
s2 = b.sample()
|
|
508
|
+
p(f" sample[0] = {s1}")
|
|
509
|
+
p(f" sample[1] = {s2}")
|
|
510
|
+
else:
|
|
511
|
+
p(f" sample = {b.sample()}")
|
|
512
|
+
p("\n=== end dump ===")
|