amd-debug-tools 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of amd-debug-tools might be problematic. Click here for more details.

amd_debug/validator.py ADDED
@@ -0,0 +1,863 @@
1
+ #!/usr/bin/python3
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ import glob
5
+ import math
6
+ import os
7
+ import re
8
+ import random
9
+ import subprocess
10
+ import time
11
+ from datetime import timedelta, datetime
12
+ from packaging import version
13
+ from pyudev import Context
14
+
15
+ from amd_debug.sleep_report import SleepReport
16
+ from amd_debug.database import SleepDatabase
17
+ from amd_debug.battery import Batteries
18
+ from amd_debug.kernel import get_kernel_log, get_kernel_command_line, sscanf_bios_args
19
+ from amd_debug.common import (
20
+ print_color,
21
+ read_file,
22
+ check_lockdown,
23
+ run_countdown,
24
+ BIT,
25
+ AmdTool,
26
+ )
27
+ from amd_debug.acpi import AcpicaTracer
28
+ from amd_debug.failures import (
29
+ AcpiBiosError,
30
+ Irq1Workaround,
31
+ LowHardwareSleepResidency,
32
+ SpuriousWakeup,
33
+ RtcAlarmWrong,
34
+ IommuPageFault,
35
+ )
36
+
37
+
38
+ class Headers:
39
+ """Header strings for the debug output"""
40
+
41
+ Irq1Workaround = "Disabling IRQ1 wakeup source to avoid platform firmware bug"
42
+ WokeFromIrq = "Woke up from IRQ"
43
+ LastCycleResults = "Results from last s2idle cycle"
44
+ CycleCount = "Suspend cycle"
45
+ SuspendDuration = "Suspend timer programmed for"
46
+
47
+
48
+ def soc_needs_irq1_wa(family, model, smu_version):
49
+ """Check if the SoC needs the IRQ1 workaround"""
50
+ if family == 0x17:
51
+ if model in [0x68, 0x60]:
52
+ return True
53
+ elif family == 0x19:
54
+ if model == 0x50:
55
+ return version.parse(smu_version) < version.parse("64.66.0")
56
+ return False
57
+
58
+
59
+ def toggle_pm_debug(enable):
60
+ """Enable or disable pm_debug_messages"""
61
+ pm_debug_messages = os.path.join("/", "sys", "power", "pm_debug_messages")
62
+ with open(pm_debug_messages, "w", encoding="utf-8") as w:
63
+ w.write("1" if enable else "0")
64
+
65
+
66
+ def pm_debugging(func):
67
+ """Decorator to enable pm_debug_messages"""
68
+
69
+ def runner(*args, **kwargs):
70
+ toggle_pm_debug(True)
71
+ ret = func(*args, **kwargs)
72
+ toggle_pm_debug(False)
73
+
74
+ return ret
75
+
76
+ return runner
77
+
78
+
79
+ class SleepValidator(AmdTool):
80
+ """Class to validate the sleep state"""
81
+
82
+ def __init__(self, tool_debug, bios_debug):
83
+ log_prefix = "s2idle" if tool_debug else None
84
+ super().__init__(log_prefix)
85
+
86
+ self.pyudev = Context()
87
+
88
+ self.kernel_log = get_kernel_log()
89
+ self.db = SleepDatabase()
90
+ self.batteries = Batteries()
91
+ self.acpica = AcpicaTracer()
92
+ self.bios_debug = bios_debug
93
+ self.cpu_family = ""
94
+ self.cpu_model = ""
95
+ self.cpu_model_string = ""
96
+ self.smu_version = ""
97
+ self.smu_program = ""
98
+ self.last_suspend = datetime.now()
99
+ self.requested_duration = 0
100
+ self.userspace_duration = 0
101
+ self.kernel_duration = 0
102
+ self.hw_sleep_duration = 0
103
+ self.failures = []
104
+ self.gpes = {}
105
+ self.display_debug = tool_debug
106
+ self.lockdown = check_lockdown()
107
+ self.logind = False
108
+ self.upep = False
109
+ self.cycle_count = 0
110
+ self.upep = False
111
+ self.upep_microsoft = False
112
+ self.wakeup_irqs = []
113
+ self.idle_masks = []
114
+ self.acpi_errors = []
115
+ self.active_gpios = []
116
+ self.irq1_workaround = False
117
+ self.thermal = {}
118
+ self.wakeup_count = {}
119
+ self.page_faults = []
120
+ self.notify_devices = []
121
+
122
+ def capture_running_compositors(self):
123
+ """Capture information about known compositor processes found"""
124
+
125
+ known_compositors = [
126
+ "kwin_wayland",
127
+ "gnome-shell",
128
+ "cosmic-session",
129
+ "hyprland",
130
+ ]
131
+
132
+ # Get a list of all process directories in /proc
133
+ process_dirs = glob.glob("/proc/[0-9]*")
134
+
135
+ # Extract and print the process names
136
+ for proc_dir in process_dirs:
137
+ p = os.path.join(proc_dir, "exe")
138
+ if not os.path.exists(p):
139
+ continue
140
+ exe = os.path.basename(os.readlink(p)).split()[0]
141
+ if exe in known_compositors:
142
+ self.db.record_debug(f"{exe} compositor is running")
143
+
144
+ def capture_power_profile(self):
145
+ """Capture power profile information"""
146
+ cmd = ["/usr/bin/powerprofilesctl"]
147
+ if os.path.exists(cmd[0]):
148
+ try:
149
+ output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode(
150
+ "utf-8"
151
+ )
152
+ self.db.record_debug("Power Profiles:")
153
+ lines = output.split("\n")
154
+ lines = [line for line in lines if line.strip()]
155
+ for line in lines:
156
+ prefix = "│ " if line != lines[-1] else "└─"
157
+ self.db.record_debug(f"{prefix}{line.strip()}")
158
+ except subprocess.CalledProcessError as e:
159
+ self.db.record_debug("Failed to run powerprofilesctl: %s", e.output)
160
+
161
+ def capture_battery(self):
162
+ """Capture battery energy levels"""
163
+ for name in self.batteries.get_batteries():
164
+ unit = self.batteries.get_energy_unit(name)
165
+ energy = self.batteries.get_energy(name)
166
+ full = self.batteries.get_energy_full(name)
167
+ self.db.record_debug(f"{name} energy level is {energy} {unit}")
168
+ report_unit = "W" if unit == "µWh" else "A"
169
+ self.db.record_battery_energy(name, energy, full, report_unit)
170
+
171
+ def check_rtc_cmos(self):
172
+ """Check if the RTC is configured to use ACPI alarm"""
173
+ p = os.path.join(
174
+ "/", "sys", "module", "rtc_cmos", "parameters", "use_acpi_alarm"
175
+ )
176
+ val = read_file(p)
177
+ if val == "N":
178
+ self.db.record_cycle_data(
179
+ "`rtc_cmos` not configured to use ACPI alarm", "🚦"
180
+ )
181
+ self.failures += [RtcAlarmWrong()]
182
+
183
+ def check_gpes(self):
184
+ """Capture general purpose event count"""
185
+ base = os.path.join("/", "sys", "firmware", "acpi", "interrupts")
186
+ for root, _dirs, files in os.walk(base, topdown=False):
187
+ for fname in files:
188
+ if not fname.startswith("gpe") or fname == "gpe_all":
189
+ continue
190
+ target = os.path.join(root, fname)
191
+ val = 0
192
+ with open(target, "r", encoding="utf-8") as r:
193
+ val = int(r.read().split()[0])
194
+ if fname in self.gpes and self.gpes[fname] != val:
195
+ self.db.record_debug(
196
+ f"{fname} increased from {self.gpes[fname]} to {val}",
197
+ )
198
+ self.gpes[fname] = val
199
+
200
+ def capture_wake_sources(self):
201
+ """Capture possible wakeup sources"""
202
+
203
+ def get_input_sibling_name(pyudev, parent):
204
+ """Get the name of the input sibling"""
205
+ for inp in pyudev.list_devices(subsystem="input", parent=parent):
206
+ if not "NAME" in inp.properties:
207
+ continue
208
+ return inp.properties["NAME"]
209
+ return ""
210
+
211
+ devices = []
212
+ for wake_dev in self.pyudev.list_devices(subsystem="wakeup"):
213
+ p = os.path.join(wake_dev.sys_path, "device", "power", "wakeup")
214
+ if not os.path.exists(p):
215
+ continue
216
+ wake_en = read_file(p)
217
+ name = ""
218
+ sys_name = wake_dev.sys_path
219
+ # determine the type of device it hangs off of
220
+ acpi = wake_dev.find_parent(subsystem="acpi")
221
+ serio = wake_dev.find_parent(subsystem="serio")
222
+ rtc = wake_dev.find_parent(subsystem="rtc")
223
+ pci = wake_dev.find_parent(subsystem="pci")
224
+ mhi = wake_dev.find_parent(subsystem="mhi")
225
+ pnp = wake_dev.find_parent(subsystem="pnp")
226
+ hid = wake_dev.find_parent(subsystem="hid")
227
+ thunderbolt_device = wake_dev.find_parent(
228
+ subsystem="thunderbolt", device_type="thunderbolt_device"
229
+ )
230
+ thunderbolt_domain = wake_dev.find_parent(
231
+ subsystem="thunderbolt", device_type="thunderbolt_domain"
232
+ )
233
+ i2c = wake_dev.find_parent(subsystem="i2c")
234
+ if i2c is not None:
235
+ sys_name = i2c.sys_name
236
+ name = get_input_sibling_name(self.pyudev, i2c)
237
+ elif thunderbolt_device is not None:
238
+ if "USB4_TYPE" in thunderbolt_device.properties:
239
+ name = (
240
+ f'USB4 {thunderbolt_device.properties["USB4_TYPE"]} controller'
241
+ )
242
+ sys_name = thunderbolt_device.sys_name
243
+ elif thunderbolt_domain is not None:
244
+ name = "Thunderbolt domain"
245
+ sys_name = thunderbolt_domain.sys_name
246
+ elif serio is not None:
247
+ sys_name = serio.sys_name
248
+ name = get_input_sibling_name(self.pyudev, serio)
249
+ elif rtc is not None:
250
+ sys_name = rtc.sys_name
251
+ for _parent in self.pyudev.list_devices(
252
+ subsystem="platform", parent=rtc, DRIVER="alarmtimer"
253
+ ):
254
+ name = "Real Time Clock alarm timer"
255
+ break
256
+ elif mhi is not None:
257
+ sys_name = mhi.sys_name
258
+ name = "Mobile Broadband host interface"
259
+ elif hid is not None:
260
+ name = hid.properties["HID_NAME"]
261
+ sys_name = hid.sys_name
262
+ elif pci is not None:
263
+ sys_name = pci.sys_name
264
+ if (
265
+ "ID_PCI_SUBCLASS_FROM_DATABASE" in pci.properties
266
+ and "ID_VENDOR_FROM_DATABASE" in pci.properties
267
+ ):
268
+ name = f'{pci.properties["ID_VENDOR_FROM_DATABASE"]} {pci.properties["ID_PCI_SUBCLASS_FROM_DATABASE"]}'
269
+ else:
270
+ name = f"PCI {pci.properties['PCI_CLASS']}"
271
+ elif acpi is not None:
272
+ sys_name = acpi.sys_name
273
+ if acpi.driver == "button":
274
+ for inp in self.pyudev.list_devices(subsystem="input", parent=acpi):
275
+ if not "NAME" in inp.properties:
276
+ continue
277
+ name = f"ACPI {inp.properties['NAME']}"
278
+ break
279
+ elif acpi.driver in ["battery", "ac"]:
280
+ for ps in self.pyudev.list_devices(
281
+ subsystem="power_supply", parent=acpi
282
+ ):
283
+ if not "POWER_SUPPLY_NAME" in ps.properties:
284
+ continue
285
+ name = f"ACPI {ps.properties['POWER_SUPPLY_TYPE']}"
286
+ elif pnp is not None:
287
+ name = "Plug-n-play"
288
+ if pnp.driver == "rtc_cmos":
289
+ name = f"{name} Real Time Clock"
290
+ sys_name = pnp.sys_name
291
+
292
+ name = name.replace('"', "")
293
+ devices.append(f"{name} [{sys_name}]: {wake_en}")
294
+ devices.sort()
295
+ self.db.record_debug("Possible wakeup sources:")
296
+ for dev in devices:
297
+ # set prefix if last device
298
+ prefix = "│ " if dev != devices[-1] else "└─"
299
+ self.db.record_debug(f"{prefix}{dev}")
300
+
301
+ def capture_lid(self) -> None:
302
+ """Capture lid state"""
303
+ p = os.path.join("/", "proc", "acpi", "button", "lid")
304
+ for root, _dirs, files in os.walk(p):
305
+ for fname in files:
306
+ p = os.path.join(root, fname)
307
+ state = read_file(p).split(":")[1].strip()
308
+ self.db.record_debug(f"ACPI Lid ({p}): {state}")
309
+
310
+ def capture_wakeup_irq_data(self) -> bool:
311
+ """Capture the wakeup IRQ to the log"""
312
+ p = os.path.join("/", "sys", "power", "pm_wakeup_irq")
313
+ try:
314
+ n = read_file(p)
315
+ p = os.path.join("/", "sys", "kernel", "irq", n)
316
+ chip_name = read_file(os.path.join(p, "chip_name"))
317
+ name = read_file(os.path.join(p, "name"))
318
+ hw = read_file(os.path.join(p, "hwirq"))
319
+ actions = read_file(os.path.join(p, "actions"))
320
+ message = f"{Headers.WokeFromIrq} {n} ({chip_name} {hw}-{name} {actions})"
321
+ self.db.record_debug(message)
322
+ except OSError:
323
+ pass
324
+ return True
325
+
326
+ def capture_amdgpu_ips_status(self):
327
+ """Capture the AMDGPU IPS status"""
328
+ for device in self.pyudev.list_devices(subsystem="pci", PCI_CLASS="38000"):
329
+ pci_id = device.properties.get("PCI_ID")
330
+ if not pci_id.startswith("1002"):
331
+ continue
332
+ slot = device.properties.get("PCI_SLOT_NAME")
333
+ p = os.path.join(
334
+ "/", "sys", "kernel", "debug", "dri", slot, "amdgpu_dm_ips_status"
335
+ )
336
+ if not os.path.exists(p):
337
+ continue
338
+ self.db.record_debug("IPS status")
339
+ try:
340
+ lines = read_file(p).split("\n")
341
+ for line in lines:
342
+ prefix = "│ " if line != lines[-1] else "└─"
343
+ self.db.record_debug(f"{prefix}{line}")
344
+ except PermissionError:
345
+ if self.lockdown:
346
+ self.db.record_debug(
347
+ "Unable to gather IPS state data due to kernel lockdown."
348
+ )
349
+ else:
350
+ self.db.record_debug("Failed to read IPS state data")
351
+
352
+ def capture_thermal(self):
353
+ """Capture thermal zone information"""
354
+ devs = []
355
+ for dev in self.pyudev.list_devices(subsystem="acpi", DRIVER="thermal"):
356
+ devs.append(dev)
357
+ if not devs:
358
+ return
359
+
360
+ self.db.record_debug("Thermal zones")
361
+ for dev in devs:
362
+ prefix = "├─ " if dev != devs[-1] else "└─"
363
+ detail_prefix = "│ \t" if dev != devs[-1] else " \t"
364
+ name = os.path.basename(dev.device_path)
365
+ p = os.path.join(dev.sys_path, "thermal_zone")
366
+ temp = int(read_file(os.path.join(p, "temp"))) / 1000
367
+
368
+ self.db.record_debug(f"{prefix}{name}")
369
+ if name not in self.thermal:
370
+ self.db.record_debug(f"{detail_prefix} temp: {temp}°C")
371
+ else:
372
+ self.db.record_debug(
373
+ f"{detail_prefix} {self.thermal[name]}°C -> {temp}°C"
374
+ )
375
+
376
+ # handle all trip points
377
+ trip_count = 0
378
+ for f in os.listdir(p):
379
+ if "trip_point" not in f:
380
+ continue
381
+ if "temp" not in f:
382
+ continue
383
+ trip_count = trip_count + 1
384
+
385
+ for i in range(0, trip_count):
386
+ f = os.path.join(p, f"trip_point_{i}_type")
387
+ trip_type = read_file(f)
388
+ f = os.path.join(p, f"trip_point_{i}_temp")
389
+ trip = int(read_file(f)) / 1000
390
+
391
+ if name not in self.thermal:
392
+ self.db.record_debug(f"{detail_prefix} {trip_type} trip: {trip}°C")
393
+
394
+ if temp > trip:
395
+ self.db.record_prereq(
396
+ f"Thermal zone {name} past trip point {trip_type}: {trip}°C",
397
+ "🌡️",
398
+ )
399
+ return False
400
+ self.thermal[name] = temp
401
+
402
+ def capture_input_wakeup_count(self):
403
+ """Capture wakeup count for input related devices"""
404
+
405
+ def get_wakeup_count(device):
406
+ """Get the wakeup count for a device"""
407
+ p = os.path.join(device.sys_path, "power", "wakeup")
408
+ if not os.path.exists(p):
409
+ return None
410
+ p = os.path.join(device.sys_path, "power", "wakeup_count")
411
+ if not os.path.exists(p):
412
+ return None
413
+ return read_file(p)
414
+
415
+ wakeup_count = {}
416
+ for device in self.pyudev.list_devices(subsystem="input"):
417
+ count = get_wakeup_count(device)
418
+ if count is not None:
419
+ wakeup_count[device.sys_path] = count
420
+ continue
421
+ # iterate parents until finding one with a wakeup count
422
+ # or no more parents
423
+ parent = device.parent
424
+ while parent is not None:
425
+ count = get_wakeup_count(parent)
426
+ if count is not None:
427
+ wakeup_count[parent.sys_path] = count
428
+ break
429
+ parent = parent.parent
430
+
431
+ # diff the count
432
+ for device, count in wakeup_count.items():
433
+ if device not in self.wakeup_count:
434
+ continue
435
+ if self.wakeup_count[device] == count:
436
+ continue
437
+ self.db.record_debug(
438
+ f"Woke up from input source {device} ({self.wakeup_count[device]}->{count})",
439
+ "💤",
440
+ )
441
+ self.wakeup_count = wakeup_count
442
+
443
+ def capture_hw_sleep(self) -> bool:
444
+ """Check for hardware sleep state"""
445
+ # try from kernel 6.4's suspend stats interface first because it works
446
+ # even with kernel lockdown
447
+ if not self.hw_sleep_duration:
448
+ p = os.path.join("/", "sys", "power", "suspend_stats", "last_hw_sleep")
449
+ if os.path.exists(p):
450
+ self.hw_sleep_duration = int(read_file(p)) / 10**6
451
+ if not self.hw_sleep_duration:
452
+ p = os.path.join("/", "sys", "kernel", "debug", "amd_pmc", "smu_fw_info")
453
+ try:
454
+ val = read_file(p)
455
+ for line in val.split("\n"):
456
+ if "Last S0i3 Status" in line:
457
+ continue
458
+ if "Time (in us) in S0i3" in line:
459
+ self.hw_sleep_duration = int(line.split(":")[1]) / 10**6
460
+ except PermissionError:
461
+ if self.lockdown:
462
+ self.db.record_cycle_data(
463
+ "Unable to gather hardware sleep data with lockdown engaged",
464
+ "🚦",
465
+ )
466
+ else:
467
+ self.db.record_cycle_data(
468
+ "Failed to read hardware sleep data", "🚦"
469
+ )
470
+ return False
471
+ except FileNotFoundError:
472
+ self.db.record_cycle_data("HW sleep statistics file missing", "❌")
473
+ return False
474
+ if not self.hw_sleep_duration:
475
+ self.db.record_cycle_data("Did not reach hardware sleep state", "❌")
476
+
477
+ return self.hw_sleep_duration is not None
478
+
479
+ def capture_command_line(self):
480
+ """Capture the kernel command line to debug"""
481
+ self.db.record_debug(f"/proc/cmdline: {get_kernel_command_line()}")
482
+
483
+ def _analyze_kernel_log_line(self, line, priority):
484
+ bios_args = sscanf_bios_args(line)
485
+ if bios_args:
486
+ if isinstance(bios_args, str):
487
+ line = bios_args
488
+ priority = 7
489
+ else:
490
+ return
491
+ elif "Timekeeping suspended for" in line:
492
+ self.cycle_count += 1
493
+ for f in line.split():
494
+ try:
495
+ self.kernel_duration += float(f)
496
+ except ValueError:
497
+ pass
498
+ elif "Successfully transitioned to state" in line:
499
+ self.upep = True
500
+ if "Successfully transitioned to state lps0 ms entry" in line:
501
+ self.upep_microsoft = True
502
+ elif "_DSM function" in line:
503
+ self.upep = True
504
+ if "_DSM function 7" in line:
505
+ self.upep_microsoft = True
506
+ elif "Last suspend in deepest state for" in line:
507
+ for f in line.split():
508
+ if not f.endswith("us"):
509
+ continue
510
+ try:
511
+ self.hw_sleep_duration += float(f.strip("us")) / 10**6
512
+ except ValueError:
513
+ pass
514
+ elif "Triggering wakeup from IRQ" in line:
515
+ irq = int(line.split()[-1])
516
+ if irq and irq not in self.wakeup_irqs:
517
+ self.wakeup_irqs += [irq]
518
+ elif "SMU idlemask s0i3" in line:
519
+ self.idle_masks += [line.split()[-1]]
520
+ elif "ACPI BIOS Error" in line or "ACPI Error" in line:
521
+ self.acpi_errors += [line]
522
+ elif re.search("GPIO.*is active", line):
523
+ self.active_gpios += re.findall(
524
+ r"\d+", re.search("GPIO.*is active", line).group()
525
+ )
526
+ elif Headers.Irq1Workaround in line:
527
+ self.irq1_workaround = True
528
+ # AMD-Vi: Event logged [IO_PAGE_FAULT device=0000:00:0c.0 domain=0x0000 address=0x7e800000 flags=0x0050]
529
+ elif "Event logged [IO_PAGE_FAULT" in line:
530
+ # get the device from string
531
+ device = re.search(r"device=(.*?) domain", line)
532
+ if device:
533
+ device = device.group(1)
534
+ if device not in self.page_faults:
535
+ self.page_faults += [device]
536
+
537
+ # evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [UBTC] (Device) Value 0x80 (Status Change) Node 0000000080144eee
538
+ if "Dispatching Notify on" in line:
539
+ # add device without the [] to notify_devices if it's not already there
540
+ device = re.search(r"\[(.*?)\]", line)
541
+ if device:
542
+ device = device.group(1)
543
+ if device not in self.notify_devices:
544
+ self.notify_devices += [device]
545
+ priority = 7
546
+
547
+ self.db.record_debug(line, priority)
548
+
549
+ def analyze_kernel_log(self):
550
+ """Analyze one of the lines from the kernel log"""
551
+ self.cycle_count = 0
552
+ self.upep = False
553
+ self.upep_microsoft = False
554
+ self.wakeup_irqs = []
555
+ self.idle_masks = []
556
+ self.acpi_errors = []
557
+ self.active_gpios = []
558
+ self.notify_devices = []
559
+ self.page_faults = []
560
+ self.irq1_workaround = False
561
+ self.kernel_log.process_callback(self._analyze_kernel_log_line)
562
+
563
+ if self.cycle_count:
564
+ self.db.record_cycle_data(
565
+ f"Hardware sleep cycle count: {self.cycle_count}",
566
+ "💤",
567
+ )
568
+ if self.wakeup_irqs:
569
+ if 1 in self.wakeup_irqs and soc_needs_irq1_wa(
570
+ self.cpu_family, self.cpu_model, self.smu_version
571
+ ):
572
+ if self.irq1_workaround:
573
+ self.db.record_cycle_data(
574
+ "Kernel workaround for IRQ1 issue utilized", "○"
575
+ )
576
+ else:
577
+ self.db.record_cycle_data("IRQ1 found during wakeup", "🚦")
578
+ self.failures += [Irq1Workaround()]
579
+ if self.idle_masks:
580
+ bit_changed = 0
581
+ for i, mask_i in enumerate(self.idle_masks):
582
+ for _j, mask_j in enumerate(self.idle_masks[i:], start=i):
583
+ if mask_i != mask_j:
584
+ bit_changed = bit_changed | (int(mask_i, 16) & ~int(mask_j, 16))
585
+ if bit_changed:
586
+ for bit in range(0, 31):
587
+ if bit_changed & BIT(bit):
588
+ self.db.record_debug(
589
+ f"Idle mask bit {bit} (0x{BIT(bit):x}) changed during suspend",
590
+ "○",
591
+ )
592
+ if self.upep:
593
+ if self.upep_microsoft:
594
+ self.db.record_debug("Used Microsoft uPEP GUID in LPS0 _DSM")
595
+ else:
596
+ self.db.record_debug("Used AMD uPEP GUID in LPS0 _DSM")
597
+ if self.acpi_errors:
598
+ self.db.record_cycle_data("ACPI BIOS errors found", "❌")
599
+ self.failures += [AcpiBiosError(self.acpi_errors)]
600
+ if self.page_faults:
601
+ self.db.record_cycle_data("Page faults found", "❌")
602
+ self.failures += [IommuPageFault(self.page_faults)]
603
+ if self.notify_devices:
604
+ self.db.record_cycle_data(
605
+ f"Notify devices {self.notify_devices} found during suspend", "💤"
606
+ )
607
+
608
+ def analyze_duration(self, t0, t1, requested, kernel, hw):
609
+ """Analyze the duration of the last cycle"""
610
+ userspace_duration = t1 - t0
611
+ min_suspend_duration = timedelta(seconds=requested * 0.9)
612
+ expected_wake_time = t0 + min_suspend_duration
613
+ if t1 > expected_wake_time:
614
+ print_color(
615
+ f"Userspace suspended for {userspace_duration}",
616
+ "✅",
617
+ )
618
+ else:
619
+ print_color(
620
+ f"Userspace suspended for {userspace_duration} (< minimum expected {min_suspend_duration})",
621
+ "❌",
622
+ )
623
+ self.failures += [SpuriousWakeup(requested, userspace_duration)]
624
+ percent = float(kernel) / userspace_duration.total_seconds()
625
+ print_color(
626
+ f"Kernel suspended for total of {timedelta(seconds=kernel)} ({percent:.2%})",
627
+ "✅",
628
+ )
629
+
630
+ percent = float(hw / userspace_duration.total_seconds())
631
+ if userspace_duration.total_seconds() >= 60:
632
+ if percent > 0.9:
633
+ symbol = "✅"
634
+ else:
635
+ symbol = "❌"
636
+ self.failures += [
637
+ LowHardwareSleepResidency(userspace_duration, percent)
638
+ ]
639
+ else:
640
+ symbol = "✅"
641
+ print_color(
642
+ "In a hardware sleep state for {time} {percent_msg}".format(
643
+ time=timedelta(seconds=hw),
644
+ percent_msg="" if not percent else "({:.2%})".format(percent),
645
+ ),
646
+ symbol,
647
+ )
648
+
649
+ def post(self):
650
+ """Post-process the suspend test results"""
651
+ checks = [
652
+ self.analyze_kernel_log,
653
+ self.capture_wakeup_irq_data,
654
+ self.check_gpes,
655
+ self.capture_lid,
656
+ self.check_rtc_cmos,
657
+ self.capture_hw_sleep,
658
+ self.capture_battery,
659
+ self.capture_amdgpu_ips_status,
660
+ self.capture_thermal,
661
+ self.capture_input_wakeup_count,
662
+ self.acpica.restore,
663
+ ]
664
+ for check in checks:
665
+ check()
666
+ self.db.record_cycle(
667
+ self.requested_duration,
668
+ self.active_gpios,
669
+ self.wakeup_irqs,
670
+ self.kernel_duration,
671
+ self.hw_sleep_duration,
672
+ )
673
+
674
+ def prep(self):
675
+ """Prepare the system for suspend testing"""
676
+ self.last_suspend = datetime.now()
677
+ self.kernel_log.seek_tail()
678
+ self.db.start_cycle(self.last_suspend)
679
+ self.kernel_duration = 0
680
+ self.hw_sleep_duration = 0
681
+ self.capture_battery()
682
+ self.check_gpes()
683
+ self.capture_lid()
684
+ self.capture_command_line()
685
+ self.capture_wake_sources()
686
+ self.capture_running_compositors()
687
+ self.capture_power_profile()
688
+ self.capture_amdgpu_ips_status()
689
+ self.capture_thermal()
690
+ self.capture_input_wakeup_count()
691
+ if self.bios_debug:
692
+ self.acpica.trace_bios()
693
+ else:
694
+ self.acpica.trace_notify()
695
+ self.db.record_cycle()
696
+
697
+ def program_wakealarm(self):
698
+ """Program the RTC wakealarm to wake the system after the requested duration"""
699
+ wakealarm = None
700
+ for device in self.pyudev.list_devices(subsystem="rtc"):
701
+ wakealarm = os.path.join(device.sys_path, "wakealarm")
702
+ if wakealarm:
703
+ with open(wakealarm, "w", encoding="utf-8") as w:
704
+ w.write("0")
705
+ with open(wakealarm, "w", encoding="utf-8") as w:
706
+ w.write("+%s\n" % self.requested_duration)
707
+ else:
708
+ print_color("No RTC device found, please manually wake system", "🚦")
709
+
710
+ @pm_debugging
711
+ def suspend_system(self):
712
+ """Suspend the system using the dbus or sysfs interface"""
713
+
714
+ def get_wakeup_count():
715
+ """Get the wakeup count"""
716
+ p = os.path.join("/", "sys", "power", "wakeup_count")
717
+ if not os.path.exists(p):
718
+ return 0
719
+ try:
720
+ with open(p, "r", encoding="utf-8") as r:
721
+ return int(r.read())
722
+ except OSError:
723
+ return 0
724
+
725
+ if self.logind:
726
+ try:
727
+ import dbus
728
+
729
+ bus = dbus.SystemBus()
730
+ obj = bus.get_object(
731
+ "org.freedesktop.login1", "/org/freedesktop/login1"
732
+ )
733
+ intf = dbus.Interface(obj, "org.freedesktop.login1.Manager")
734
+ propf = dbus.Interface(obj, "org.freedesktop.DBus.Properties")
735
+ if intf.CanSuspend() != "yes":
736
+ self.db.record_cycle_data("Unable to suspend", "❌")
737
+ return False
738
+ intf.Suspend(True)
739
+ while propf.Get("org.freedesktop.login1.Manager", "PreparingForSleep"):
740
+ time.sleep(1)
741
+ return True
742
+ except dbus.exceptions.DBusException as e:
743
+ self.db.record_cycle_data(
744
+ f"Unable to communicate with logind: {e}", "❌"
745
+ )
746
+ return False
747
+ except ImportError:
748
+ self.db.record_cycle_data("Missing dbus", "❌")
749
+ return False
750
+ else:
751
+ old = get_wakeup_count()
752
+ try:
753
+ p = os.path.join("/", "sys", "power", "state")
754
+ with open(p, "w", encoding="utf-8") as w:
755
+ w.write("mem")
756
+ except OSError as e:
757
+ new = get_wakeup_count()
758
+ self.db.record_cycle_data(
759
+ f"Failed to set suspend state ({old} -> {new}): {e}", "❌"
760
+ )
761
+ return False
762
+ return True
763
+
764
+ def unlock_session(self):
765
+ """Unlock the session using logind"""
766
+ if self.logind:
767
+ try:
768
+ import dbus
769
+
770
+ bus = dbus.SystemBus()
771
+ obj = bus.get_object(
772
+ "org.freedesktop.login1", "/org/freedesktop/login1"
773
+ )
774
+ intf = dbus.Interface(obj, "org.freedesktop.login1.Manager")
775
+ intf.UnlockSessions()
776
+ except dbus.exceptions.DBusException as e:
777
+ self.db.record_cycle_data(
778
+ f"Unable to communicate with logind: {e}", "❌"
779
+ )
780
+ return False
781
+ return True
782
+
783
+ def run(self, duration, count, wait, rand, logind):
784
+ """Run the suspend test"""
785
+ if not count:
786
+ return True
787
+
788
+ if logind:
789
+ self.logind = True
790
+
791
+ if rand:
792
+ print_color(
793
+ f"Running {count} cycle random test with max duration of {duration}s and a max wait of {wait}s",
794
+ "🗣️",
795
+ )
796
+ elif count > 1:
797
+ length = timedelta(seconds=(duration + wait) * count)
798
+ print_color(
799
+ f"Running {count} cycles (Test finish expected @ {datetime.now() + length})".format(),
800
+ "🗣️",
801
+ )
802
+ for i in range(1, count + 1):
803
+ if rand:
804
+ self.requested_duration = random.randint(1, duration)
805
+ requested_wait = random.randint(1, wait)
806
+ else:
807
+ self.requested_duration = duration
808
+ requested_wait = wait
809
+ run_countdown("Suspending system", math.ceil(requested_wait / 2))
810
+ self.prep()
811
+ self.db.record_debug(
812
+ f"{Headers.SuspendDuration} {timedelta(seconds=self.requested_duration)}",
813
+ )
814
+ if count > 1:
815
+ header = f"{Headers.CycleCount} {i}: "
816
+ else:
817
+ header = ""
818
+ print_color(
819
+ f"{header}Started at {self.last_suspend} (cycle finish expected @ {datetime.now() + timedelta(seconds=self.requested_duration + requested_wait)})",
820
+ "🗣️",
821
+ )
822
+ self.program_wakealarm()
823
+ if not self.suspend_system():
824
+ self.db.sync()
825
+ self.report_cycle()
826
+ return False
827
+ run_countdown("Collecting data", math.ceil(requested_wait / 2))
828
+ self.post()
829
+ self.db.sync()
830
+ self.report_cycle()
831
+ self.unlock_session()
832
+ return True
833
+
834
+ def systemd_pre_hook(self):
835
+ """Called before suspend"""
836
+ self.prep()
837
+ self.db.sync()
838
+ toggle_pm_debug(True)
839
+
840
+ def systemd_post_hook(self):
841
+ """Called after resume"""
842
+ toggle_pm_debug(False)
843
+ t0 = self.db.get_last_cycle()
844
+ self.last_suspend = datetime.strptime(str(t0[0]), "%Y%m%d%H%M%S")
845
+ self.kernel_log.seek_tail(self.last_suspend)
846
+ self.db.start_cycle(self.last_suspend)
847
+ self.post()
848
+ self.db.sync()
849
+
850
+ def report_cycle(self):
851
+ """Report the results of the last cycle"""
852
+ print_color(Headers.LastCycleResults, "🗣️")
853
+
854
+ app = SleepReport(
855
+ since=self.last_suspend,
856
+ until=self.last_suspend,
857
+ fname=None,
858
+ fmt="stdout",
859
+ tool_debug=self.display_debug,
860
+ report_debug=False,
861
+ )
862
+ app.run(inc_prereq=False)
863
+ return