amd-debug-tools 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of amd-debug-tools might be problematic. Click here for more details.
- amd_debug/__init__.py +45 -0
- amd_debug/acpi.py +107 -0
- amd_debug/bash/amd-s2idle +89 -0
- amd_debug/battery.py +87 -0
- amd_debug/bios.py +138 -0
- amd_debug/common.py +324 -0
- amd_debug/database.py +331 -0
- amd_debug/failures.py +588 -0
- amd_debug/installer.py +404 -0
- amd_debug/kernel.py +389 -0
- amd_debug/prerequisites.py +1215 -0
- amd_debug/pstate.py +314 -0
- amd_debug/s2idle-hook +72 -0
- amd_debug/s2idle.py +406 -0
- amd_debug/sleep_report.py +453 -0
- amd_debug/templates/html +427 -0
- amd_debug/templates/md +39 -0
- amd_debug/templates/stdout +13 -0
- amd_debug/templates/txt +23 -0
- amd_debug/validator.py +863 -0
- amd_debug/wake.py +111 -0
- amd_debug_tools-0.2.0.dist-info/METADATA +180 -0
- amd_debug_tools-0.2.0.dist-info/RECORD +27 -0
- amd_debug_tools-0.2.0.dist-info/WHEEL +5 -0
- amd_debug_tools-0.2.0.dist-info/entry_points.txt +4 -0
- amd_debug_tools-0.2.0.dist-info/licenses/LICENSE +19 -0
- amd_debug_tools-0.2.0.dist-info/top_level.txt +1 -0
amd_debug/validator.py
ADDED
|
@@ -0,0 +1,863 @@
|
|
|
1
|
+
#!/usr/bin/python3
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
import glob
|
|
5
|
+
import math
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import random
|
|
9
|
+
import subprocess
|
|
10
|
+
import time
|
|
11
|
+
from datetime import timedelta, datetime
|
|
12
|
+
from packaging import version
|
|
13
|
+
from pyudev import Context
|
|
14
|
+
|
|
15
|
+
from amd_debug.sleep_report import SleepReport
|
|
16
|
+
from amd_debug.database import SleepDatabase
|
|
17
|
+
from amd_debug.battery import Batteries
|
|
18
|
+
from amd_debug.kernel import get_kernel_log, get_kernel_command_line, sscanf_bios_args
|
|
19
|
+
from amd_debug.common import (
|
|
20
|
+
print_color,
|
|
21
|
+
read_file,
|
|
22
|
+
check_lockdown,
|
|
23
|
+
run_countdown,
|
|
24
|
+
BIT,
|
|
25
|
+
AmdTool,
|
|
26
|
+
)
|
|
27
|
+
from amd_debug.acpi import AcpicaTracer
|
|
28
|
+
from amd_debug.failures import (
|
|
29
|
+
AcpiBiosError,
|
|
30
|
+
Irq1Workaround,
|
|
31
|
+
LowHardwareSleepResidency,
|
|
32
|
+
SpuriousWakeup,
|
|
33
|
+
RtcAlarmWrong,
|
|
34
|
+
IommuPageFault,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Headers:
|
|
39
|
+
"""Header strings for the debug output"""
|
|
40
|
+
|
|
41
|
+
Irq1Workaround = "Disabling IRQ1 wakeup source to avoid platform firmware bug"
|
|
42
|
+
WokeFromIrq = "Woke up from IRQ"
|
|
43
|
+
LastCycleResults = "Results from last s2idle cycle"
|
|
44
|
+
CycleCount = "Suspend cycle"
|
|
45
|
+
SuspendDuration = "Suspend timer programmed for"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def soc_needs_irq1_wa(family, model, smu_version):
|
|
49
|
+
"""Check if the SoC needs the IRQ1 workaround"""
|
|
50
|
+
if family == 0x17:
|
|
51
|
+
if model in [0x68, 0x60]:
|
|
52
|
+
return True
|
|
53
|
+
elif family == 0x19:
|
|
54
|
+
if model == 0x50:
|
|
55
|
+
return version.parse(smu_version) < version.parse("64.66.0")
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def toggle_pm_debug(enable):
|
|
60
|
+
"""Enable or disable pm_debug_messages"""
|
|
61
|
+
pm_debug_messages = os.path.join("/", "sys", "power", "pm_debug_messages")
|
|
62
|
+
with open(pm_debug_messages, "w", encoding="utf-8") as w:
|
|
63
|
+
w.write("1" if enable else "0")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def pm_debugging(func):
|
|
67
|
+
"""Decorator to enable pm_debug_messages"""
|
|
68
|
+
|
|
69
|
+
def runner(*args, **kwargs):
|
|
70
|
+
toggle_pm_debug(True)
|
|
71
|
+
ret = func(*args, **kwargs)
|
|
72
|
+
toggle_pm_debug(False)
|
|
73
|
+
|
|
74
|
+
return ret
|
|
75
|
+
|
|
76
|
+
return runner
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class SleepValidator(AmdTool):
|
|
80
|
+
"""Class to validate the sleep state"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, tool_debug, bios_debug):
|
|
83
|
+
log_prefix = "s2idle" if tool_debug else None
|
|
84
|
+
super().__init__(log_prefix)
|
|
85
|
+
|
|
86
|
+
self.pyudev = Context()
|
|
87
|
+
|
|
88
|
+
self.kernel_log = get_kernel_log()
|
|
89
|
+
self.db = SleepDatabase()
|
|
90
|
+
self.batteries = Batteries()
|
|
91
|
+
self.acpica = AcpicaTracer()
|
|
92
|
+
self.bios_debug = bios_debug
|
|
93
|
+
self.cpu_family = ""
|
|
94
|
+
self.cpu_model = ""
|
|
95
|
+
self.cpu_model_string = ""
|
|
96
|
+
self.smu_version = ""
|
|
97
|
+
self.smu_program = ""
|
|
98
|
+
self.last_suspend = datetime.now()
|
|
99
|
+
self.requested_duration = 0
|
|
100
|
+
self.userspace_duration = 0
|
|
101
|
+
self.kernel_duration = 0
|
|
102
|
+
self.hw_sleep_duration = 0
|
|
103
|
+
self.failures = []
|
|
104
|
+
self.gpes = {}
|
|
105
|
+
self.display_debug = tool_debug
|
|
106
|
+
self.lockdown = check_lockdown()
|
|
107
|
+
self.logind = False
|
|
108
|
+
self.upep = False
|
|
109
|
+
self.cycle_count = 0
|
|
110
|
+
self.upep = False
|
|
111
|
+
self.upep_microsoft = False
|
|
112
|
+
self.wakeup_irqs = []
|
|
113
|
+
self.idle_masks = []
|
|
114
|
+
self.acpi_errors = []
|
|
115
|
+
self.active_gpios = []
|
|
116
|
+
self.irq1_workaround = False
|
|
117
|
+
self.thermal = {}
|
|
118
|
+
self.wakeup_count = {}
|
|
119
|
+
self.page_faults = []
|
|
120
|
+
self.notify_devices = []
|
|
121
|
+
|
|
122
|
+
def capture_running_compositors(self):
|
|
123
|
+
"""Capture information about known compositor processes found"""
|
|
124
|
+
|
|
125
|
+
known_compositors = [
|
|
126
|
+
"kwin_wayland",
|
|
127
|
+
"gnome-shell",
|
|
128
|
+
"cosmic-session",
|
|
129
|
+
"hyprland",
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
# Get a list of all process directories in /proc
|
|
133
|
+
process_dirs = glob.glob("/proc/[0-9]*")
|
|
134
|
+
|
|
135
|
+
# Extract and print the process names
|
|
136
|
+
for proc_dir in process_dirs:
|
|
137
|
+
p = os.path.join(proc_dir, "exe")
|
|
138
|
+
if not os.path.exists(p):
|
|
139
|
+
continue
|
|
140
|
+
exe = os.path.basename(os.readlink(p)).split()[0]
|
|
141
|
+
if exe in known_compositors:
|
|
142
|
+
self.db.record_debug(f"{exe} compositor is running")
|
|
143
|
+
|
|
144
|
+
def capture_power_profile(self):
|
|
145
|
+
"""Capture power profile information"""
|
|
146
|
+
cmd = ["/usr/bin/powerprofilesctl"]
|
|
147
|
+
if os.path.exists(cmd[0]):
|
|
148
|
+
try:
|
|
149
|
+
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode(
|
|
150
|
+
"utf-8"
|
|
151
|
+
)
|
|
152
|
+
self.db.record_debug("Power Profiles:")
|
|
153
|
+
lines = output.split("\n")
|
|
154
|
+
lines = [line for line in lines if line.strip()]
|
|
155
|
+
for line in lines:
|
|
156
|
+
prefix = "│ " if line != lines[-1] else "└─"
|
|
157
|
+
self.db.record_debug(f"{prefix}{line.strip()}")
|
|
158
|
+
except subprocess.CalledProcessError as e:
|
|
159
|
+
self.db.record_debug("Failed to run powerprofilesctl: %s", e.output)
|
|
160
|
+
|
|
161
|
+
def capture_battery(self):
|
|
162
|
+
"""Capture battery energy levels"""
|
|
163
|
+
for name in self.batteries.get_batteries():
|
|
164
|
+
unit = self.batteries.get_energy_unit(name)
|
|
165
|
+
energy = self.batteries.get_energy(name)
|
|
166
|
+
full = self.batteries.get_energy_full(name)
|
|
167
|
+
self.db.record_debug(f"{name} energy level is {energy} {unit}")
|
|
168
|
+
report_unit = "W" if unit == "µWh" else "A"
|
|
169
|
+
self.db.record_battery_energy(name, energy, full, report_unit)
|
|
170
|
+
|
|
171
|
+
def check_rtc_cmos(self):
|
|
172
|
+
"""Check if the RTC is configured to use ACPI alarm"""
|
|
173
|
+
p = os.path.join(
|
|
174
|
+
"/", "sys", "module", "rtc_cmos", "parameters", "use_acpi_alarm"
|
|
175
|
+
)
|
|
176
|
+
val = read_file(p)
|
|
177
|
+
if val == "N":
|
|
178
|
+
self.db.record_cycle_data(
|
|
179
|
+
"`rtc_cmos` not configured to use ACPI alarm", "🚦"
|
|
180
|
+
)
|
|
181
|
+
self.failures += [RtcAlarmWrong()]
|
|
182
|
+
|
|
183
|
+
def check_gpes(self):
|
|
184
|
+
"""Capture general purpose event count"""
|
|
185
|
+
base = os.path.join("/", "sys", "firmware", "acpi", "interrupts")
|
|
186
|
+
for root, _dirs, files in os.walk(base, topdown=False):
|
|
187
|
+
for fname in files:
|
|
188
|
+
if not fname.startswith("gpe") or fname == "gpe_all":
|
|
189
|
+
continue
|
|
190
|
+
target = os.path.join(root, fname)
|
|
191
|
+
val = 0
|
|
192
|
+
with open(target, "r", encoding="utf-8") as r:
|
|
193
|
+
val = int(r.read().split()[0])
|
|
194
|
+
if fname in self.gpes and self.gpes[fname] != val:
|
|
195
|
+
self.db.record_debug(
|
|
196
|
+
f"{fname} increased from {self.gpes[fname]} to {val}",
|
|
197
|
+
)
|
|
198
|
+
self.gpes[fname] = val
|
|
199
|
+
|
|
200
|
+
def capture_wake_sources(self):
|
|
201
|
+
"""Capture possible wakeup sources"""
|
|
202
|
+
|
|
203
|
+
def get_input_sibling_name(pyudev, parent):
|
|
204
|
+
"""Get the name of the input sibling"""
|
|
205
|
+
for inp in pyudev.list_devices(subsystem="input", parent=parent):
|
|
206
|
+
if not "NAME" in inp.properties:
|
|
207
|
+
continue
|
|
208
|
+
return inp.properties["NAME"]
|
|
209
|
+
return ""
|
|
210
|
+
|
|
211
|
+
devices = []
|
|
212
|
+
for wake_dev in self.pyudev.list_devices(subsystem="wakeup"):
|
|
213
|
+
p = os.path.join(wake_dev.sys_path, "device", "power", "wakeup")
|
|
214
|
+
if not os.path.exists(p):
|
|
215
|
+
continue
|
|
216
|
+
wake_en = read_file(p)
|
|
217
|
+
name = ""
|
|
218
|
+
sys_name = wake_dev.sys_path
|
|
219
|
+
# determine the type of device it hangs off of
|
|
220
|
+
acpi = wake_dev.find_parent(subsystem="acpi")
|
|
221
|
+
serio = wake_dev.find_parent(subsystem="serio")
|
|
222
|
+
rtc = wake_dev.find_parent(subsystem="rtc")
|
|
223
|
+
pci = wake_dev.find_parent(subsystem="pci")
|
|
224
|
+
mhi = wake_dev.find_parent(subsystem="mhi")
|
|
225
|
+
pnp = wake_dev.find_parent(subsystem="pnp")
|
|
226
|
+
hid = wake_dev.find_parent(subsystem="hid")
|
|
227
|
+
thunderbolt_device = wake_dev.find_parent(
|
|
228
|
+
subsystem="thunderbolt", device_type="thunderbolt_device"
|
|
229
|
+
)
|
|
230
|
+
thunderbolt_domain = wake_dev.find_parent(
|
|
231
|
+
subsystem="thunderbolt", device_type="thunderbolt_domain"
|
|
232
|
+
)
|
|
233
|
+
i2c = wake_dev.find_parent(subsystem="i2c")
|
|
234
|
+
if i2c is not None:
|
|
235
|
+
sys_name = i2c.sys_name
|
|
236
|
+
name = get_input_sibling_name(self.pyudev, i2c)
|
|
237
|
+
elif thunderbolt_device is not None:
|
|
238
|
+
if "USB4_TYPE" in thunderbolt_device.properties:
|
|
239
|
+
name = (
|
|
240
|
+
f'USB4 {thunderbolt_device.properties["USB4_TYPE"]} controller'
|
|
241
|
+
)
|
|
242
|
+
sys_name = thunderbolt_device.sys_name
|
|
243
|
+
elif thunderbolt_domain is not None:
|
|
244
|
+
name = "Thunderbolt domain"
|
|
245
|
+
sys_name = thunderbolt_domain.sys_name
|
|
246
|
+
elif serio is not None:
|
|
247
|
+
sys_name = serio.sys_name
|
|
248
|
+
name = get_input_sibling_name(self.pyudev, serio)
|
|
249
|
+
elif rtc is not None:
|
|
250
|
+
sys_name = rtc.sys_name
|
|
251
|
+
for _parent in self.pyudev.list_devices(
|
|
252
|
+
subsystem="platform", parent=rtc, DRIVER="alarmtimer"
|
|
253
|
+
):
|
|
254
|
+
name = "Real Time Clock alarm timer"
|
|
255
|
+
break
|
|
256
|
+
elif mhi is not None:
|
|
257
|
+
sys_name = mhi.sys_name
|
|
258
|
+
name = "Mobile Broadband host interface"
|
|
259
|
+
elif hid is not None:
|
|
260
|
+
name = hid.properties["HID_NAME"]
|
|
261
|
+
sys_name = hid.sys_name
|
|
262
|
+
elif pci is not None:
|
|
263
|
+
sys_name = pci.sys_name
|
|
264
|
+
if (
|
|
265
|
+
"ID_PCI_SUBCLASS_FROM_DATABASE" in pci.properties
|
|
266
|
+
and "ID_VENDOR_FROM_DATABASE" in pci.properties
|
|
267
|
+
):
|
|
268
|
+
name = f'{pci.properties["ID_VENDOR_FROM_DATABASE"]} {pci.properties["ID_PCI_SUBCLASS_FROM_DATABASE"]}'
|
|
269
|
+
else:
|
|
270
|
+
name = f"PCI {pci.properties['PCI_CLASS']}"
|
|
271
|
+
elif acpi is not None:
|
|
272
|
+
sys_name = acpi.sys_name
|
|
273
|
+
if acpi.driver == "button":
|
|
274
|
+
for inp in self.pyudev.list_devices(subsystem="input", parent=acpi):
|
|
275
|
+
if not "NAME" in inp.properties:
|
|
276
|
+
continue
|
|
277
|
+
name = f"ACPI {inp.properties['NAME']}"
|
|
278
|
+
break
|
|
279
|
+
elif acpi.driver in ["battery", "ac"]:
|
|
280
|
+
for ps in self.pyudev.list_devices(
|
|
281
|
+
subsystem="power_supply", parent=acpi
|
|
282
|
+
):
|
|
283
|
+
if not "POWER_SUPPLY_NAME" in ps.properties:
|
|
284
|
+
continue
|
|
285
|
+
name = f"ACPI {ps.properties['POWER_SUPPLY_TYPE']}"
|
|
286
|
+
elif pnp is not None:
|
|
287
|
+
name = "Plug-n-play"
|
|
288
|
+
if pnp.driver == "rtc_cmos":
|
|
289
|
+
name = f"{name} Real Time Clock"
|
|
290
|
+
sys_name = pnp.sys_name
|
|
291
|
+
|
|
292
|
+
name = name.replace('"', "")
|
|
293
|
+
devices.append(f"{name} [{sys_name}]: {wake_en}")
|
|
294
|
+
devices.sort()
|
|
295
|
+
self.db.record_debug("Possible wakeup sources:")
|
|
296
|
+
for dev in devices:
|
|
297
|
+
# set prefix if last device
|
|
298
|
+
prefix = "│ " if dev != devices[-1] else "└─"
|
|
299
|
+
self.db.record_debug(f"{prefix}{dev}")
|
|
300
|
+
|
|
301
|
+
def capture_lid(self) -> None:
|
|
302
|
+
"""Capture lid state"""
|
|
303
|
+
p = os.path.join("/", "proc", "acpi", "button", "lid")
|
|
304
|
+
for root, _dirs, files in os.walk(p):
|
|
305
|
+
for fname in files:
|
|
306
|
+
p = os.path.join(root, fname)
|
|
307
|
+
state = read_file(p).split(":")[1].strip()
|
|
308
|
+
self.db.record_debug(f"ACPI Lid ({p}): {state}")
|
|
309
|
+
|
|
310
|
+
def capture_wakeup_irq_data(self) -> bool:
|
|
311
|
+
"""Capture the wakeup IRQ to the log"""
|
|
312
|
+
p = os.path.join("/", "sys", "power", "pm_wakeup_irq")
|
|
313
|
+
try:
|
|
314
|
+
n = read_file(p)
|
|
315
|
+
p = os.path.join("/", "sys", "kernel", "irq", n)
|
|
316
|
+
chip_name = read_file(os.path.join(p, "chip_name"))
|
|
317
|
+
name = read_file(os.path.join(p, "name"))
|
|
318
|
+
hw = read_file(os.path.join(p, "hwirq"))
|
|
319
|
+
actions = read_file(os.path.join(p, "actions"))
|
|
320
|
+
message = f"{Headers.WokeFromIrq} {n} ({chip_name} {hw}-{name} {actions})"
|
|
321
|
+
self.db.record_debug(message)
|
|
322
|
+
except OSError:
|
|
323
|
+
pass
|
|
324
|
+
return True
|
|
325
|
+
|
|
326
|
+
def capture_amdgpu_ips_status(self):
|
|
327
|
+
"""Capture the AMDGPU IPS status"""
|
|
328
|
+
for device in self.pyudev.list_devices(subsystem="pci", PCI_CLASS="38000"):
|
|
329
|
+
pci_id = device.properties.get("PCI_ID")
|
|
330
|
+
if not pci_id.startswith("1002"):
|
|
331
|
+
continue
|
|
332
|
+
slot = device.properties.get("PCI_SLOT_NAME")
|
|
333
|
+
p = os.path.join(
|
|
334
|
+
"/", "sys", "kernel", "debug", "dri", slot, "amdgpu_dm_ips_status"
|
|
335
|
+
)
|
|
336
|
+
if not os.path.exists(p):
|
|
337
|
+
continue
|
|
338
|
+
self.db.record_debug("IPS status")
|
|
339
|
+
try:
|
|
340
|
+
lines = read_file(p).split("\n")
|
|
341
|
+
for line in lines:
|
|
342
|
+
prefix = "│ " if line != lines[-1] else "└─"
|
|
343
|
+
self.db.record_debug(f"{prefix}{line}")
|
|
344
|
+
except PermissionError:
|
|
345
|
+
if self.lockdown:
|
|
346
|
+
self.db.record_debug(
|
|
347
|
+
"Unable to gather IPS state data due to kernel lockdown."
|
|
348
|
+
)
|
|
349
|
+
else:
|
|
350
|
+
self.db.record_debug("Failed to read IPS state data")
|
|
351
|
+
|
|
352
|
+
def capture_thermal(self):
|
|
353
|
+
"""Capture thermal zone information"""
|
|
354
|
+
devs = []
|
|
355
|
+
for dev in self.pyudev.list_devices(subsystem="acpi", DRIVER="thermal"):
|
|
356
|
+
devs.append(dev)
|
|
357
|
+
if not devs:
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
self.db.record_debug("Thermal zones")
|
|
361
|
+
for dev in devs:
|
|
362
|
+
prefix = "├─ " if dev != devs[-1] else "└─"
|
|
363
|
+
detail_prefix = "│ \t" if dev != devs[-1] else " \t"
|
|
364
|
+
name = os.path.basename(dev.device_path)
|
|
365
|
+
p = os.path.join(dev.sys_path, "thermal_zone")
|
|
366
|
+
temp = int(read_file(os.path.join(p, "temp"))) / 1000
|
|
367
|
+
|
|
368
|
+
self.db.record_debug(f"{prefix}{name}")
|
|
369
|
+
if name not in self.thermal:
|
|
370
|
+
self.db.record_debug(f"{detail_prefix} temp: {temp}°C")
|
|
371
|
+
else:
|
|
372
|
+
self.db.record_debug(
|
|
373
|
+
f"{detail_prefix} {self.thermal[name]}°C -> {temp}°C"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# handle all trip points
|
|
377
|
+
trip_count = 0
|
|
378
|
+
for f in os.listdir(p):
|
|
379
|
+
if "trip_point" not in f:
|
|
380
|
+
continue
|
|
381
|
+
if "temp" not in f:
|
|
382
|
+
continue
|
|
383
|
+
trip_count = trip_count + 1
|
|
384
|
+
|
|
385
|
+
for i in range(0, trip_count):
|
|
386
|
+
f = os.path.join(p, f"trip_point_{i}_type")
|
|
387
|
+
trip_type = read_file(f)
|
|
388
|
+
f = os.path.join(p, f"trip_point_{i}_temp")
|
|
389
|
+
trip = int(read_file(f)) / 1000
|
|
390
|
+
|
|
391
|
+
if name not in self.thermal:
|
|
392
|
+
self.db.record_debug(f"{detail_prefix} {trip_type} trip: {trip}°C")
|
|
393
|
+
|
|
394
|
+
if temp > trip:
|
|
395
|
+
self.db.record_prereq(
|
|
396
|
+
f"Thermal zone {name} past trip point {trip_type}: {trip}°C",
|
|
397
|
+
"🌡️",
|
|
398
|
+
)
|
|
399
|
+
return False
|
|
400
|
+
self.thermal[name] = temp
|
|
401
|
+
|
|
402
|
+
def capture_input_wakeup_count(self):
|
|
403
|
+
"""Capture wakeup count for input related devices"""
|
|
404
|
+
|
|
405
|
+
def get_wakeup_count(device):
|
|
406
|
+
"""Get the wakeup count for a device"""
|
|
407
|
+
p = os.path.join(device.sys_path, "power", "wakeup")
|
|
408
|
+
if not os.path.exists(p):
|
|
409
|
+
return None
|
|
410
|
+
p = os.path.join(device.sys_path, "power", "wakeup_count")
|
|
411
|
+
if not os.path.exists(p):
|
|
412
|
+
return None
|
|
413
|
+
return read_file(p)
|
|
414
|
+
|
|
415
|
+
wakeup_count = {}
|
|
416
|
+
for device in self.pyudev.list_devices(subsystem="input"):
|
|
417
|
+
count = get_wakeup_count(device)
|
|
418
|
+
if count is not None:
|
|
419
|
+
wakeup_count[device.sys_path] = count
|
|
420
|
+
continue
|
|
421
|
+
# iterate parents until finding one with a wakeup count
|
|
422
|
+
# or no more parents
|
|
423
|
+
parent = device.parent
|
|
424
|
+
while parent is not None:
|
|
425
|
+
count = get_wakeup_count(parent)
|
|
426
|
+
if count is not None:
|
|
427
|
+
wakeup_count[parent.sys_path] = count
|
|
428
|
+
break
|
|
429
|
+
parent = parent.parent
|
|
430
|
+
|
|
431
|
+
# diff the count
|
|
432
|
+
for device, count in wakeup_count.items():
|
|
433
|
+
if device not in self.wakeup_count:
|
|
434
|
+
continue
|
|
435
|
+
if self.wakeup_count[device] == count:
|
|
436
|
+
continue
|
|
437
|
+
self.db.record_debug(
|
|
438
|
+
f"Woke up from input source {device} ({self.wakeup_count[device]}->{count})",
|
|
439
|
+
"💤",
|
|
440
|
+
)
|
|
441
|
+
self.wakeup_count = wakeup_count
|
|
442
|
+
|
|
443
|
+
def capture_hw_sleep(self) -> bool:
|
|
444
|
+
"""Check for hardware sleep state"""
|
|
445
|
+
# try from kernel 6.4's suspend stats interface first because it works
|
|
446
|
+
# even with kernel lockdown
|
|
447
|
+
if not self.hw_sleep_duration:
|
|
448
|
+
p = os.path.join("/", "sys", "power", "suspend_stats", "last_hw_sleep")
|
|
449
|
+
if os.path.exists(p):
|
|
450
|
+
self.hw_sleep_duration = int(read_file(p)) / 10**6
|
|
451
|
+
if not self.hw_sleep_duration:
|
|
452
|
+
p = os.path.join("/", "sys", "kernel", "debug", "amd_pmc", "smu_fw_info")
|
|
453
|
+
try:
|
|
454
|
+
val = read_file(p)
|
|
455
|
+
for line in val.split("\n"):
|
|
456
|
+
if "Last S0i3 Status" in line:
|
|
457
|
+
continue
|
|
458
|
+
if "Time (in us) in S0i3" in line:
|
|
459
|
+
self.hw_sleep_duration = int(line.split(":")[1]) / 10**6
|
|
460
|
+
except PermissionError:
|
|
461
|
+
if self.lockdown:
|
|
462
|
+
self.db.record_cycle_data(
|
|
463
|
+
"Unable to gather hardware sleep data with lockdown engaged",
|
|
464
|
+
"🚦",
|
|
465
|
+
)
|
|
466
|
+
else:
|
|
467
|
+
self.db.record_cycle_data(
|
|
468
|
+
"Failed to read hardware sleep data", "🚦"
|
|
469
|
+
)
|
|
470
|
+
return False
|
|
471
|
+
except FileNotFoundError:
|
|
472
|
+
self.db.record_cycle_data("HW sleep statistics file missing", "❌")
|
|
473
|
+
return False
|
|
474
|
+
if not self.hw_sleep_duration:
|
|
475
|
+
self.db.record_cycle_data("Did not reach hardware sleep state", "❌")
|
|
476
|
+
|
|
477
|
+
return self.hw_sleep_duration is not None
|
|
478
|
+
|
|
479
|
+
def capture_command_line(self):
|
|
480
|
+
"""Capture the kernel command line to debug"""
|
|
481
|
+
self.db.record_debug(f"/proc/cmdline: {get_kernel_command_line()}")
|
|
482
|
+
|
|
483
|
+
def _analyze_kernel_log_line(self, line, priority):
|
|
484
|
+
bios_args = sscanf_bios_args(line)
|
|
485
|
+
if bios_args:
|
|
486
|
+
if isinstance(bios_args, str):
|
|
487
|
+
line = bios_args
|
|
488
|
+
priority = 7
|
|
489
|
+
else:
|
|
490
|
+
return
|
|
491
|
+
elif "Timekeeping suspended for" in line:
|
|
492
|
+
self.cycle_count += 1
|
|
493
|
+
for f in line.split():
|
|
494
|
+
try:
|
|
495
|
+
self.kernel_duration += float(f)
|
|
496
|
+
except ValueError:
|
|
497
|
+
pass
|
|
498
|
+
elif "Successfully transitioned to state" in line:
|
|
499
|
+
self.upep = True
|
|
500
|
+
if "Successfully transitioned to state lps0 ms entry" in line:
|
|
501
|
+
self.upep_microsoft = True
|
|
502
|
+
elif "_DSM function" in line:
|
|
503
|
+
self.upep = True
|
|
504
|
+
if "_DSM function 7" in line:
|
|
505
|
+
self.upep_microsoft = True
|
|
506
|
+
elif "Last suspend in deepest state for" in line:
|
|
507
|
+
for f in line.split():
|
|
508
|
+
if not f.endswith("us"):
|
|
509
|
+
continue
|
|
510
|
+
try:
|
|
511
|
+
self.hw_sleep_duration += float(f.strip("us")) / 10**6
|
|
512
|
+
except ValueError:
|
|
513
|
+
pass
|
|
514
|
+
elif "Triggering wakeup from IRQ" in line:
|
|
515
|
+
irq = int(line.split()[-1])
|
|
516
|
+
if irq and irq not in self.wakeup_irqs:
|
|
517
|
+
self.wakeup_irqs += [irq]
|
|
518
|
+
elif "SMU idlemask s0i3" in line:
|
|
519
|
+
self.idle_masks += [line.split()[-1]]
|
|
520
|
+
elif "ACPI BIOS Error" in line or "ACPI Error" in line:
|
|
521
|
+
self.acpi_errors += [line]
|
|
522
|
+
elif re.search("GPIO.*is active", line):
|
|
523
|
+
self.active_gpios += re.findall(
|
|
524
|
+
r"\d+", re.search("GPIO.*is active", line).group()
|
|
525
|
+
)
|
|
526
|
+
elif Headers.Irq1Workaround in line:
|
|
527
|
+
self.irq1_workaround = True
|
|
528
|
+
# AMD-Vi: Event logged [IO_PAGE_FAULT device=0000:00:0c.0 domain=0x0000 address=0x7e800000 flags=0x0050]
|
|
529
|
+
elif "Event logged [IO_PAGE_FAULT" in line:
|
|
530
|
+
# get the device from string
|
|
531
|
+
device = re.search(r"device=(.*?) domain", line)
|
|
532
|
+
if device:
|
|
533
|
+
device = device.group(1)
|
|
534
|
+
if device not in self.page_faults:
|
|
535
|
+
self.page_faults += [device]
|
|
536
|
+
|
|
537
|
+
# evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [UBTC] (Device) Value 0x80 (Status Change) Node 0000000080144eee
|
|
538
|
+
if "Dispatching Notify on" in line:
|
|
539
|
+
# add device without the [] to notify_devices if it's not already there
|
|
540
|
+
device = re.search(r"\[(.*?)\]", line)
|
|
541
|
+
if device:
|
|
542
|
+
device = device.group(1)
|
|
543
|
+
if device not in self.notify_devices:
|
|
544
|
+
self.notify_devices += [device]
|
|
545
|
+
priority = 7
|
|
546
|
+
|
|
547
|
+
self.db.record_debug(line, priority)
|
|
548
|
+
|
|
549
|
+
def analyze_kernel_log(self):
|
|
550
|
+
"""Analyze one of the lines from the kernel log"""
|
|
551
|
+
self.cycle_count = 0
|
|
552
|
+
self.upep = False
|
|
553
|
+
self.upep_microsoft = False
|
|
554
|
+
self.wakeup_irqs = []
|
|
555
|
+
self.idle_masks = []
|
|
556
|
+
self.acpi_errors = []
|
|
557
|
+
self.active_gpios = []
|
|
558
|
+
self.notify_devices = []
|
|
559
|
+
self.page_faults = []
|
|
560
|
+
self.irq1_workaround = False
|
|
561
|
+
self.kernel_log.process_callback(self._analyze_kernel_log_line)
|
|
562
|
+
|
|
563
|
+
if self.cycle_count:
|
|
564
|
+
self.db.record_cycle_data(
|
|
565
|
+
f"Hardware sleep cycle count: {self.cycle_count}",
|
|
566
|
+
"💤",
|
|
567
|
+
)
|
|
568
|
+
if self.wakeup_irqs:
|
|
569
|
+
if 1 in self.wakeup_irqs and soc_needs_irq1_wa(
|
|
570
|
+
self.cpu_family, self.cpu_model, self.smu_version
|
|
571
|
+
):
|
|
572
|
+
if self.irq1_workaround:
|
|
573
|
+
self.db.record_cycle_data(
|
|
574
|
+
"Kernel workaround for IRQ1 issue utilized", "○"
|
|
575
|
+
)
|
|
576
|
+
else:
|
|
577
|
+
self.db.record_cycle_data("IRQ1 found during wakeup", "🚦")
|
|
578
|
+
self.failures += [Irq1Workaround()]
|
|
579
|
+
if self.idle_masks:
|
|
580
|
+
bit_changed = 0
|
|
581
|
+
for i, mask_i in enumerate(self.idle_masks):
|
|
582
|
+
for _j, mask_j in enumerate(self.idle_masks[i:], start=i):
|
|
583
|
+
if mask_i != mask_j:
|
|
584
|
+
bit_changed = bit_changed | (int(mask_i, 16) & ~int(mask_j, 16))
|
|
585
|
+
if bit_changed:
|
|
586
|
+
for bit in range(0, 31):
|
|
587
|
+
if bit_changed & BIT(bit):
|
|
588
|
+
self.db.record_debug(
|
|
589
|
+
f"Idle mask bit {bit} (0x{BIT(bit):x}) changed during suspend",
|
|
590
|
+
"○",
|
|
591
|
+
)
|
|
592
|
+
if self.upep:
|
|
593
|
+
if self.upep_microsoft:
|
|
594
|
+
self.db.record_debug("Used Microsoft uPEP GUID in LPS0 _DSM")
|
|
595
|
+
else:
|
|
596
|
+
self.db.record_debug("Used AMD uPEP GUID in LPS0 _DSM")
|
|
597
|
+
if self.acpi_errors:
|
|
598
|
+
self.db.record_cycle_data("ACPI BIOS errors found", "❌")
|
|
599
|
+
self.failures += [AcpiBiosError(self.acpi_errors)]
|
|
600
|
+
if self.page_faults:
|
|
601
|
+
self.db.record_cycle_data("Page faults found", "❌")
|
|
602
|
+
self.failures += [IommuPageFault(self.page_faults)]
|
|
603
|
+
if self.notify_devices:
|
|
604
|
+
self.db.record_cycle_data(
|
|
605
|
+
f"Notify devices {self.notify_devices} found during suspend", "💤"
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
def analyze_duration(self, t0, t1, requested, kernel, hw):
|
|
609
|
+
"""Analyze the duration of the last cycle"""
|
|
610
|
+
userspace_duration = t1 - t0
|
|
611
|
+
min_suspend_duration = timedelta(seconds=requested * 0.9)
|
|
612
|
+
expected_wake_time = t0 + min_suspend_duration
|
|
613
|
+
if t1 > expected_wake_time:
|
|
614
|
+
print_color(
|
|
615
|
+
f"Userspace suspended for {userspace_duration}",
|
|
616
|
+
"✅",
|
|
617
|
+
)
|
|
618
|
+
else:
|
|
619
|
+
print_color(
|
|
620
|
+
f"Userspace suspended for {userspace_duration} (< minimum expected {min_suspend_duration})",
|
|
621
|
+
"❌",
|
|
622
|
+
)
|
|
623
|
+
self.failures += [SpuriousWakeup(requested, userspace_duration)]
|
|
624
|
+
percent = float(kernel) / userspace_duration.total_seconds()
|
|
625
|
+
print_color(
|
|
626
|
+
f"Kernel suspended for total of {timedelta(seconds=kernel)} ({percent:.2%})",
|
|
627
|
+
"✅",
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
percent = float(hw / userspace_duration.total_seconds())
|
|
631
|
+
if userspace_duration.total_seconds() >= 60:
|
|
632
|
+
if percent > 0.9:
|
|
633
|
+
symbol = "✅"
|
|
634
|
+
else:
|
|
635
|
+
symbol = "❌"
|
|
636
|
+
self.failures += [
|
|
637
|
+
LowHardwareSleepResidency(userspace_duration, percent)
|
|
638
|
+
]
|
|
639
|
+
else:
|
|
640
|
+
symbol = "✅"
|
|
641
|
+
print_color(
|
|
642
|
+
"In a hardware sleep state for {time} {percent_msg}".format(
|
|
643
|
+
time=timedelta(seconds=hw),
|
|
644
|
+
percent_msg="" if not percent else "({:.2%})".format(percent),
|
|
645
|
+
),
|
|
646
|
+
symbol,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
def post(self):
|
|
650
|
+
"""Post-process the suspend test results"""
|
|
651
|
+
checks = [
|
|
652
|
+
self.analyze_kernel_log,
|
|
653
|
+
self.capture_wakeup_irq_data,
|
|
654
|
+
self.check_gpes,
|
|
655
|
+
self.capture_lid,
|
|
656
|
+
self.check_rtc_cmos,
|
|
657
|
+
self.capture_hw_sleep,
|
|
658
|
+
self.capture_battery,
|
|
659
|
+
self.capture_amdgpu_ips_status,
|
|
660
|
+
self.capture_thermal,
|
|
661
|
+
self.capture_input_wakeup_count,
|
|
662
|
+
self.acpica.restore,
|
|
663
|
+
]
|
|
664
|
+
for check in checks:
|
|
665
|
+
check()
|
|
666
|
+
self.db.record_cycle(
|
|
667
|
+
self.requested_duration,
|
|
668
|
+
self.active_gpios,
|
|
669
|
+
self.wakeup_irqs,
|
|
670
|
+
self.kernel_duration,
|
|
671
|
+
self.hw_sleep_duration,
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
def prep(self):
|
|
675
|
+
"""Prepare the system for suspend testing"""
|
|
676
|
+
self.last_suspend = datetime.now()
|
|
677
|
+
self.kernel_log.seek_tail()
|
|
678
|
+
self.db.start_cycle(self.last_suspend)
|
|
679
|
+
self.kernel_duration = 0
|
|
680
|
+
self.hw_sleep_duration = 0
|
|
681
|
+
self.capture_battery()
|
|
682
|
+
self.check_gpes()
|
|
683
|
+
self.capture_lid()
|
|
684
|
+
self.capture_command_line()
|
|
685
|
+
self.capture_wake_sources()
|
|
686
|
+
self.capture_running_compositors()
|
|
687
|
+
self.capture_power_profile()
|
|
688
|
+
self.capture_amdgpu_ips_status()
|
|
689
|
+
self.capture_thermal()
|
|
690
|
+
self.capture_input_wakeup_count()
|
|
691
|
+
if self.bios_debug:
|
|
692
|
+
self.acpica.trace_bios()
|
|
693
|
+
else:
|
|
694
|
+
self.acpica.trace_notify()
|
|
695
|
+
self.db.record_cycle()
|
|
696
|
+
|
|
697
|
+
def program_wakealarm(self):
|
|
698
|
+
"""Program the RTC wakealarm to wake the system after the requested duration"""
|
|
699
|
+
wakealarm = None
|
|
700
|
+
for device in self.pyudev.list_devices(subsystem="rtc"):
|
|
701
|
+
wakealarm = os.path.join(device.sys_path, "wakealarm")
|
|
702
|
+
if wakealarm:
|
|
703
|
+
with open(wakealarm, "w", encoding="utf-8") as w:
|
|
704
|
+
w.write("0")
|
|
705
|
+
with open(wakealarm, "w", encoding="utf-8") as w:
|
|
706
|
+
w.write("+%s\n" % self.requested_duration)
|
|
707
|
+
else:
|
|
708
|
+
print_color("No RTC device found, please manually wake system", "🚦")
|
|
709
|
+
|
|
710
|
+
@pm_debugging
|
|
711
|
+
def suspend_system(self):
|
|
712
|
+
"""Suspend the system using the dbus or sysfs interface"""
|
|
713
|
+
|
|
714
|
+
def get_wakeup_count():
|
|
715
|
+
"""Get the wakeup count"""
|
|
716
|
+
p = os.path.join("/", "sys", "power", "wakeup_count")
|
|
717
|
+
if not os.path.exists(p):
|
|
718
|
+
return 0
|
|
719
|
+
try:
|
|
720
|
+
with open(p, "r", encoding="utf-8") as r:
|
|
721
|
+
return int(r.read())
|
|
722
|
+
except OSError:
|
|
723
|
+
return 0
|
|
724
|
+
|
|
725
|
+
if self.logind:
|
|
726
|
+
try:
|
|
727
|
+
import dbus
|
|
728
|
+
|
|
729
|
+
bus = dbus.SystemBus()
|
|
730
|
+
obj = bus.get_object(
|
|
731
|
+
"org.freedesktop.login1", "/org/freedesktop/login1"
|
|
732
|
+
)
|
|
733
|
+
intf = dbus.Interface(obj, "org.freedesktop.login1.Manager")
|
|
734
|
+
propf = dbus.Interface(obj, "org.freedesktop.DBus.Properties")
|
|
735
|
+
if intf.CanSuspend() != "yes":
|
|
736
|
+
self.db.record_cycle_data("Unable to suspend", "❌")
|
|
737
|
+
return False
|
|
738
|
+
intf.Suspend(True)
|
|
739
|
+
while propf.Get("org.freedesktop.login1.Manager", "PreparingForSleep"):
|
|
740
|
+
time.sleep(1)
|
|
741
|
+
return True
|
|
742
|
+
except dbus.exceptions.DBusException as e:
|
|
743
|
+
self.db.record_cycle_data(
|
|
744
|
+
f"Unable to communicate with logind: {e}", "❌"
|
|
745
|
+
)
|
|
746
|
+
return False
|
|
747
|
+
except ImportError:
|
|
748
|
+
self.db.record_cycle_data("Missing dbus", "❌")
|
|
749
|
+
return False
|
|
750
|
+
else:
|
|
751
|
+
old = get_wakeup_count()
|
|
752
|
+
try:
|
|
753
|
+
p = os.path.join("/", "sys", "power", "state")
|
|
754
|
+
with open(p, "w", encoding="utf-8") as w:
|
|
755
|
+
w.write("mem")
|
|
756
|
+
except OSError as e:
|
|
757
|
+
new = get_wakeup_count()
|
|
758
|
+
self.db.record_cycle_data(
|
|
759
|
+
f"Failed to set suspend state ({old} -> {new}): {e}", "❌"
|
|
760
|
+
)
|
|
761
|
+
return False
|
|
762
|
+
return True
|
|
763
|
+
|
|
764
|
+
def unlock_session(self):
|
|
765
|
+
"""Unlock the session using logind"""
|
|
766
|
+
if self.logind:
|
|
767
|
+
try:
|
|
768
|
+
import dbus
|
|
769
|
+
|
|
770
|
+
bus = dbus.SystemBus()
|
|
771
|
+
obj = bus.get_object(
|
|
772
|
+
"org.freedesktop.login1", "/org/freedesktop/login1"
|
|
773
|
+
)
|
|
774
|
+
intf = dbus.Interface(obj, "org.freedesktop.login1.Manager")
|
|
775
|
+
intf.UnlockSessions()
|
|
776
|
+
except dbus.exceptions.DBusException as e:
|
|
777
|
+
self.db.record_cycle_data(
|
|
778
|
+
f"Unable to communicate with logind: {e}", "❌"
|
|
779
|
+
)
|
|
780
|
+
return False
|
|
781
|
+
return True
|
|
782
|
+
|
|
783
|
+
def run(self, duration, count, wait, rand, logind):
|
|
784
|
+
"""Run the suspend test"""
|
|
785
|
+
if not count:
|
|
786
|
+
return True
|
|
787
|
+
|
|
788
|
+
if logind:
|
|
789
|
+
self.logind = True
|
|
790
|
+
|
|
791
|
+
if rand:
|
|
792
|
+
print_color(
|
|
793
|
+
f"Running {count} cycle random test with max duration of {duration}s and a max wait of {wait}s",
|
|
794
|
+
"🗣️",
|
|
795
|
+
)
|
|
796
|
+
elif count > 1:
|
|
797
|
+
length = timedelta(seconds=(duration + wait) * count)
|
|
798
|
+
print_color(
|
|
799
|
+
f"Running {count} cycles (Test finish expected @ {datetime.now() + length})".format(),
|
|
800
|
+
"🗣️",
|
|
801
|
+
)
|
|
802
|
+
for i in range(1, count + 1):
|
|
803
|
+
if rand:
|
|
804
|
+
self.requested_duration = random.randint(1, duration)
|
|
805
|
+
requested_wait = random.randint(1, wait)
|
|
806
|
+
else:
|
|
807
|
+
self.requested_duration = duration
|
|
808
|
+
requested_wait = wait
|
|
809
|
+
run_countdown("Suspending system", math.ceil(requested_wait / 2))
|
|
810
|
+
self.prep()
|
|
811
|
+
self.db.record_debug(
|
|
812
|
+
f"{Headers.SuspendDuration} {timedelta(seconds=self.requested_duration)}",
|
|
813
|
+
)
|
|
814
|
+
if count > 1:
|
|
815
|
+
header = f"{Headers.CycleCount} {i}: "
|
|
816
|
+
else:
|
|
817
|
+
header = ""
|
|
818
|
+
print_color(
|
|
819
|
+
f"{header}Started at {self.last_suspend} (cycle finish expected @ {datetime.now() + timedelta(seconds=self.requested_duration + requested_wait)})",
|
|
820
|
+
"🗣️",
|
|
821
|
+
)
|
|
822
|
+
self.program_wakealarm()
|
|
823
|
+
if not self.suspend_system():
|
|
824
|
+
self.db.sync()
|
|
825
|
+
self.report_cycle()
|
|
826
|
+
return False
|
|
827
|
+
run_countdown("Collecting data", math.ceil(requested_wait / 2))
|
|
828
|
+
self.post()
|
|
829
|
+
self.db.sync()
|
|
830
|
+
self.report_cycle()
|
|
831
|
+
self.unlock_session()
|
|
832
|
+
return True
|
|
833
|
+
|
|
834
|
+
def systemd_pre_hook(self):
|
|
835
|
+
"""Called before suspend"""
|
|
836
|
+
self.prep()
|
|
837
|
+
self.db.sync()
|
|
838
|
+
toggle_pm_debug(True)
|
|
839
|
+
|
|
840
|
+
def systemd_post_hook(self):
|
|
841
|
+
"""Called after resume"""
|
|
842
|
+
toggle_pm_debug(False)
|
|
843
|
+
t0 = self.db.get_last_cycle()
|
|
844
|
+
self.last_suspend = datetime.strptime(str(t0[0]), "%Y%m%d%H%M%S")
|
|
845
|
+
self.kernel_log.seek_tail(self.last_suspend)
|
|
846
|
+
self.db.start_cycle(self.last_suspend)
|
|
847
|
+
self.post()
|
|
848
|
+
self.db.sync()
|
|
849
|
+
|
|
850
|
+
def report_cycle(self):
|
|
851
|
+
"""Report the results of the last cycle"""
|
|
852
|
+
print_color(Headers.LastCycleResults, "🗣️")
|
|
853
|
+
|
|
854
|
+
app = SleepReport(
|
|
855
|
+
since=self.last_suspend,
|
|
856
|
+
until=self.last_suspend,
|
|
857
|
+
fname=None,
|
|
858
|
+
fmt="stdout",
|
|
859
|
+
tool_debug=self.display_debug,
|
|
860
|
+
report_debug=False,
|
|
861
|
+
)
|
|
862
|
+
app.run(inc_prereq=False)
|
|
863
|
+
return
|