amd-node-scraper 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amd_node_scraper-0.0.1.dist-info/LICENSE +21 -0
- amd_node_scraper-0.0.1.dist-info/METADATA +424 -0
- amd_node_scraper-0.0.1.dist-info/RECORD +197 -0
- amd_node_scraper-0.0.1.dist-info/WHEEL +5 -0
- amd_node_scraper-0.0.1.dist-info/entry_points.txt +2 -0
- amd_node_scraper-0.0.1.dist-info/top_level.txt +1 -0
- nodescraper/__init__.py +32 -0
- nodescraper/base/__init__.py +34 -0
- nodescraper/base/inbandcollectortask.py +118 -0
- nodescraper/base/inbanddataplugin.py +39 -0
- nodescraper/base/regexanalyzer.py +120 -0
- nodescraper/cli/__init__.py +29 -0
- nodescraper/cli/cli.py +511 -0
- nodescraper/cli/constants.py +27 -0
- nodescraper/cli/dynamicparserbuilder.py +171 -0
- nodescraper/cli/helper.py +517 -0
- nodescraper/cli/inputargtypes.py +129 -0
- nodescraper/configbuilder.py +123 -0
- nodescraper/configregistry.py +66 -0
- nodescraper/configs/node_status.json +19 -0
- nodescraper/connection/__init__.py +25 -0
- nodescraper/connection/inband/__init__.py +46 -0
- nodescraper/connection/inband/inband.py +171 -0
- nodescraper/connection/inband/inbandlocal.py +93 -0
- nodescraper/connection/inband/inbandmanager.py +151 -0
- nodescraper/connection/inband/inbandremote.py +173 -0
- nodescraper/connection/inband/sshparams.py +43 -0
- nodescraper/constants.py +26 -0
- nodescraper/enums/__init__.py +40 -0
- nodescraper/enums/eventcategory.py +89 -0
- nodescraper/enums/eventpriority.py +42 -0
- nodescraper/enums/executionstatus.py +44 -0
- nodescraper/enums/osfamily.py +34 -0
- nodescraper/enums/systeminteraction.py +41 -0
- nodescraper/enums/systemlocation.py +33 -0
- nodescraper/generictypes.py +36 -0
- nodescraper/interfaces/__init__.py +44 -0
- nodescraper/interfaces/connectionmanager.py +143 -0
- nodescraper/interfaces/dataanalyzertask.py +138 -0
- nodescraper/interfaces/datacollectortask.py +185 -0
- nodescraper/interfaces/dataplugin.py +356 -0
- nodescraper/interfaces/plugin.py +127 -0
- nodescraper/interfaces/resultcollator.py +56 -0
- nodescraper/interfaces/task.py +164 -0
- nodescraper/interfaces/taskresulthook.py +39 -0
- nodescraper/models/__init__.py +48 -0
- nodescraper/models/analyzerargs.py +93 -0
- nodescraper/models/collectorargs.py +30 -0
- nodescraper/models/connectionconfig.py +34 -0
- nodescraper/models/datamodel.py +171 -0
- nodescraper/models/datapluginresult.py +39 -0
- nodescraper/models/event.py +158 -0
- nodescraper/models/pluginconfig.py +38 -0
- nodescraper/models/pluginresult.py +39 -0
- nodescraper/models/systeminfo.py +44 -0
- nodescraper/models/taskresult.py +185 -0
- nodescraper/models/timerangeargs.py +38 -0
- nodescraper/pluginexecutor.py +274 -0
- nodescraper/pluginregistry.py +152 -0
- nodescraper/plugins/__init__.py +25 -0
- nodescraper/plugins/inband/__init__.py +25 -0
- nodescraper/plugins/inband/amdsmi/__init__.py +28 -0
- nodescraper/plugins/inband/amdsmi/amdsmi_analyzer.py +821 -0
- nodescraper/plugins/inband/amdsmi/amdsmi_collector.py +1313 -0
- nodescraper/plugins/inband/amdsmi/amdsmi_plugin.py +43 -0
- nodescraper/plugins/inband/amdsmi/amdsmidata.py +1002 -0
- nodescraper/plugins/inband/amdsmi/analyzer_args.py +50 -0
- nodescraper/plugins/inband/amdsmi/cper.py +65 -0
- nodescraper/plugins/inband/bios/__init__.py +29 -0
- nodescraper/plugins/inband/bios/analyzer_args.py +64 -0
- nodescraper/plugins/inband/bios/bios_analyzer.py +93 -0
- nodescraper/plugins/inband/bios/bios_collector.py +93 -0
- nodescraper/plugins/inband/bios/bios_plugin.py +43 -0
- nodescraper/plugins/inband/bios/biosdata.py +30 -0
- nodescraper/plugins/inband/cmdline/__init__.py +25 -0
- nodescraper/plugins/inband/cmdline/analyzer_args.py +80 -0
- nodescraper/plugins/inband/cmdline/cmdline_analyzer.py +113 -0
- nodescraper/plugins/inband/cmdline/cmdline_collector.py +77 -0
- nodescraper/plugins/inband/cmdline/cmdline_plugin.py +43 -0
- nodescraper/plugins/inband/cmdline/cmdlinedata.py +30 -0
- nodescraper/plugins/inband/device_enumeration/__init__.py +29 -0
- nodescraper/plugins/inband/device_enumeration/analyzer_args.py +73 -0
- nodescraper/plugins/inband/device_enumeration/device_enumeration_analyzer.py +81 -0
- nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py +176 -0
- nodescraper/plugins/inband/device_enumeration/device_enumeration_plugin.py +45 -0
- nodescraper/plugins/inband/device_enumeration/deviceenumdata.py +36 -0
- nodescraper/plugins/inband/dimm/__init__.py +25 -0
- nodescraper/plugins/inband/dimm/collector_args.py +31 -0
- nodescraper/plugins/inband/dimm/dimm_collector.py +151 -0
- nodescraper/plugins/inband/dimm/dimm_plugin.py +40 -0
- nodescraper/plugins/inband/dimm/dimmdata.py +30 -0
- nodescraper/plugins/inband/dkms/__init__.py +25 -0
- nodescraper/plugins/inband/dkms/analyzer_args.py +85 -0
- nodescraper/plugins/inband/dkms/dkms_analyzer.py +106 -0
- nodescraper/plugins/inband/dkms/dkms_collector.py +76 -0
- nodescraper/plugins/inband/dkms/dkms_plugin.py +43 -0
- nodescraper/plugins/inband/dkms/dkmsdata.py +33 -0
- nodescraper/plugins/inband/dmesg/__init__.py +28 -0
- nodescraper/plugins/inband/dmesg/analyzer_args.py +33 -0
- nodescraper/plugins/inband/dmesg/collector_args.py +39 -0
- nodescraper/plugins/inband/dmesg/dmesg_analyzer.py +503 -0
- nodescraper/plugins/inband/dmesg/dmesg_collector.py +164 -0
- nodescraper/plugins/inband/dmesg/dmesg_plugin.py +44 -0
- nodescraper/plugins/inband/dmesg/dmesgdata.py +116 -0
- nodescraper/plugins/inband/fabrics/__init__.py +28 -0
- nodescraper/plugins/inband/fabrics/fabrics_collector.py +726 -0
- nodescraper/plugins/inband/fabrics/fabrics_plugin.py +37 -0
- nodescraper/plugins/inband/fabrics/fabricsdata.py +140 -0
- nodescraper/plugins/inband/journal/__init__.py +28 -0
- nodescraper/plugins/inband/journal/collector_args.py +33 -0
- nodescraper/plugins/inband/journal/journal_collector.py +107 -0
- nodescraper/plugins/inband/journal/journal_plugin.py +40 -0
- nodescraper/plugins/inband/journal/journaldata.py +44 -0
- nodescraper/plugins/inband/kernel/__init__.py +25 -0
- nodescraper/plugins/inband/kernel/analyzer_args.py +64 -0
- nodescraper/plugins/inband/kernel/kernel_analyzer.py +91 -0
- nodescraper/plugins/inband/kernel/kernel_collector.py +129 -0
- nodescraper/plugins/inband/kernel/kernel_plugin.py +43 -0
- nodescraper/plugins/inband/kernel/kerneldata.py +32 -0
- nodescraper/plugins/inband/kernel_module/__init__.py +25 -0
- nodescraper/plugins/inband/kernel_module/analyzer_args.py +59 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_analyzer.py +211 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_collector.py +264 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_data.py +60 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_plugin.py +43 -0
- nodescraper/plugins/inband/memory/__init__.py +25 -0
- nodescraper/plugins/inband/memory/analyzer_args.py +45 -0
- nodescraper/plugins/inband/memory/memory_analyzer.py +98 -0
- nodescraper/plugins/inband/memory/memory_collector.py +330 -0
- nodescraper/plugins/inband/memory/memory_plugin.py +43 -0
- nodescraper/plugins/inband/memory/memorydata.py +90 -0
- nodescraper/plugins/inband/network/__init__.py +28 -0
- nodescraper/plugins/inband/network/network_collector.py +1828 -0
- nodescraper/plugins/inband/network/network_plugin.py +37 -0
- nodescraper/plugins/inband/network/networkdata.py +319 -0
- nodescraper/plugins/inband/nvme/__init__.py +28 -0
- nodescraper/plugins/inband/nvme/nvme_collector.py +167 -0
- nodescraper/plugins/inband/nvme/nvme_plugin.py +37 -0
- nodescraper/plugins/inband/nvme/nvmedata.py +45 -0
- nodescraper/plugins/inband/os/__init__.py +25 -0
- nodescraper/plugins/inband/os/analyzer_args.py +64 -0
- nodescraper/plugins/inband/os/os_analyzer.py +73 -0
- nodescraper/plugins/inband/os/os_collector.py +131 -0
- nodescraper/plugins/inband/os/os_plugin.py +43 -0
- nodescraper/plugins/inband/os/osdata.py +31 -0
- nodescraper/plugins/inband/package/__init__.py +25 -0
- nodescraper/plugins/inband/package/analyzer_args.py +48 -0
- nodescraper/plugins/inband/package/package_analyzer.py +253 -0
- nodescraper/plugins/inband/package/package_collector.py +273 -0
- nodescraper/plugins/inband/package/package_plugin.py +43 -0
- nodescraper/plugins/inband/package/packagedata.py +41 -0
- nodescraper/plugins/inband/pcie/__init__.py +29 -0
- nodescraper/plugins/inband/pcie/analyzer_args.py +63 -0
- nodescraper/plugins/inband/pcie/pcie_analyzer.py +1081 -0
- nodescraper/plugins/inband/pcie/pcie_collector.py +690 -0
- nodescraper/plugins/inband/pcie/pcie_data.py +2017 -0
- nodescraper/plugins/inband/pcie/pcie_plugin.py +43 -0
- nodescraper/plugins/inband/process/__init__.py +25 -0
- nodescraper/plugins/inband/process/analyzer_args.py +45 -0
- nodescraper/plugins/inband/process/collector_args.py +31 -0
- nodescraper/plugins/inband/process/process_analyzer.py +91 -0
- nodescraper/plugins/inband/process/process_collector.py +115 -0
- nodescraper/plugins/inband/process/process_plugin.py +46 -0
- nodescraper/plugins/inband/process/processdata.py +34 -0
- nodescraper/plugins/inband/rocm/__init__.py +25 -0
- nodescraper/plugins/inband/rocm/analyzer_args.py +66 -0
- nodescraper/plugins/inband/rocm/rocm_analyzer.py +100 -0
- nodescraper/plugins/inband/rocm/rocm_collector.py +205 -0
- nodescraper/plugins/inband/rocm/rocm_plugin.py +43 -0
- nodescraper/plugins/inband/rocm/rocmdata.py +62 -0
- nodescraper/plugins/inband/storage/__init__.py +25 -0
- nodescraper/plugins/inband/storage/analyzer_args.py +38 -0
- nodescraper/plugins/inband/storage/collector_args.py +31 -0
- nodescraper/plugins/inband/storage/storage_analyzer.py +152 -0
- nodescraper/plugins/inband/storage/storage_collector.py +110 -0
- nodescraper/plugins/inband/storage/storage_plugin.py +44 -0
- nodescraper/plugins/inband/storage/storagedata.py +70 -0
- nodescraper/plugins/inband/sysctl/__init__.py +29 -0
- nodescraper/plugins/inband/sysctl/analyzer_args.py +67 -0
- nodescraper/plugins/inband/sysctl/sysctl_analyzer.py +81 -0
- nodescraper/plugins/inband/sysctl/sysctl_collector.py +101 -0
- nodescraper/plugins/inband/sysctl/sysctl_plugin.py +43 -0
- nodescraper/plugins/inband/sysctl/sysctldata.py +42 -0
- nodescraper/plugins/inband/syslog/__init__.py +28 -0
- nodescraper/plugins/inband/syslog/syslog_collector.py +121 -0
- nodescraper/plugins/inband/syslog/syslog_plugin.py +37 -0
- nodescraper/plugins/inband/syslog/syslogdata.py +46 -0
- nodescraper/plugins/inband/uptime/__init__.py +25 -0
- nodescraper/plugins/inband/uptime/uptime_collector.py +88 -0
- nodescraper/plugins/inband/uptime/uptime_plugin.py +37 -0
- nodescraper/plugins/inband/uptime/uptimedata.py +31 -0
- nodescraper/resultcollators/__init__.py +25 -0
- nodescraper/resultcollators/tablesummary.py +159 -0
- nodescraper/taskresulthooks/__init__.py +28 -0
- nodescraper/taskresulthooks/filesystemloghook.py +88 -0
- nodescraper/typeutils.py +171 -0
- nodescraper/utils.py +412 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
from nodescraper.base import InBandDataCollector
|
|
29
|
+
from nodescraper.connection.inband import TextFileArtifact
|
|
30
|
+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
|
|
31
|
+
from nodescraper.models import TaskResult
|
|
32
|
+
from nodescraper.utils import strip_ansi_codes
|
|
33
|
+
|
|
34
|
+
from .rocmdata import RocmDataModel
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class RocmCollector(InBandDataCollector[RocmDataModel, None]):
|
|
38
|
+
"""Collect ROCm version data"""
|
|
39
|
+
|
|
40
|
+
SUPPORTED_OS_FAMILY: set[OSFamily] = {OSFamily.LINUX}
|
|
41
|
+
|
|
42
|
+
DATA_MODEL = RocmDataModel
|
|
43
|
+
CMD_VERSION_PATHS = [
|
|
44
|
+
"/opt/rocm/.info/version-rocm",
|
|
45
|
+
"/opt/rocm/.info/version",
|
|
46
|
+
]
|
|
47
|
+
CMD_ROCMINFO = "{rocm_path}/bin/rocminfo"
|
|
48
|
+
CMD_ROCM_LATEST = "ls -v -d /opt/rocm-[3-7]* | tail -1"
|
|
49
|
+
CMD_ROCM_DIRS = "ls -v -d /opt/rocm*"
|
|
50
|
+
CMD_LD_CONF = "grep -i -E 'rocm' /etc/ld.so.conf.d/*"
|
|
51
|
+
CMD_ROCM_LIBS = "ldconfig -p | grep -i -E 'rocm'"
|
|
52
|
+
CMD_ENV_VARS = "env | grep -Ei 'rocm|hsa|hip|mpi|openmp|ucx|miopen'"
|
|
53
|
+
CMD_CLINFO = "{rocm_path}/opencl/bin/*/clinfo"
|
|
54
|
+
CMD_KFD_PROC = "ls /sys/class/kfd/kfd/proc/"
|
|
55
|
+
|
|
56
|
+
def collect_data(self, args=None) -> tuple[TaskResult, Optional[RocmDataModel]]:
|
|
57
|
+
"""Collect ROCm version data from the system.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
tuple[TaskResult, Optional[RocmDataModel]]: tuple containing the task result and ROCm data model if available.
|
|
61
|
+
"""
|
|
62
|
+
rocm_data = None
|
|
63
|
+
for path in self.CMD_VERSION_PATHS:
|
|
64
|
+
res = self._run_sut_cmd(f"grep . {path}")
|
|
65
|
+
if res.exit_code == 0:
|
|
66
|
+
try:
|
|
67
|
+
rocm_data = RocmDataModel(rocm_version=res.stdout)
|
|
68
|
+
self._log_event(
|
|
69
|
+
category="ROCM_VERSION_READ",
|
|
70
|
+
description="ROCm version data collected",
|
|
71
|
+
data=rocm_data.model_dump(include={"rocm_version"}),
|
|
72
|
+
priority=EventPriority.INFO,
|
|
73
|
+
)
|
|
74
|
+
self.result.message = f"ROCm version: {rocm_data.rocm_version}"
|
|
75
|
+
self.result.status = ExecutionStatus.OK
|
|
76
|
+
break
|
|
77
|
+
except ValueError as e:
|
|
78
|
+
self._log_event(
|
|
79
|
+
category=EventCategory.OS,
|
|
80
|
+
description=f"Invalid ROCm version format: {res.stdout}",
|
|
81
|
+
data={"version": res.stdout, "error": str(e)},
|
|
82
|
+
priority=EventPriority.ERROR,
|
|
83
|
+
console_log=True,
|
|
84
|
+
)
|
|
85
|
+
self.result.message = f"Invalid ROCm version format: {res.stdout}"
|
|
86
|
+
self.result.status = ExecutionStatus.ERROR
|
|
87
|
+
return self.result, None
|
|
88
|
+
else:
|
|
89
|
+
self._log_event(
|
|
90
|
+
category=EventCategory.OS,
|
|
91
|
+
description=f"Unable to read ROCm version from {self.CMD_VERSION_PATHS}",
|
|
92
|
+
data={"raw_output": res.stdout},
|
|
93
|
+
priority=EventPriority.ERROR,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Collect additional ROCm data if version was found
|
|
97
|
+
if rocm_data:
|
|
98
|
+
# Collect latest versioned ROCm path (rocm-[3-7]*)
|
|
99
|
+
versioned_path_res = self._run_sut_cmd(self.CMD_ROCM_LATEST)
|
|
100
|
+
if versioned_path_res.exit_code == 0:
|
|
101
|
+
rocm_data.rocm_latest_versioned_path = versioned_path_res.stdout.strip()
|
|
102
|
+
|
|
103
|
+
# Collect all ROCm paths as list
|
|
104
|
+
all_paths_res = self._run_sut_cmd(self.CMD_ROCM_DIRS)
|
|
105
|
+
if all_paths_res.exit_code == 0:
|
|
106
|
+
rocm_data.rocm_all_paths = [
|
|
107
|
+
path.strip()
|
|
108
|
+
for path in all_paths_res.stdout.strip().split("\n")
|
|
109
|
+
if path.strip()
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# Determine ROCm path for commands that need it
|
|
113
|
+
rocm_path = rocm_data.rocm_latest_versioned_path or "/opt/rocm"
|
|
114
|
+
|
|
115
|
+
# Collect rocminfo output as list of lines with ANSI codes stripped
|
|
116
|
+
rocminfo_cmd = self.CMD_ROCMINFO.format(rocm_path=rocm_path)
|
|
117
|
+
rocminfo_res = self._run_sut_cmd(rocminfo_cmd)
|
|
118
|
+
rocminfo_artifact_content = ""
|
|
119
|
+
if rocminfo_res.exit_code == 0:
|
|
120
|
+
# Split into lines and strip ANSI codes from each line
|
|
121
|
+
rocm_data.rocminfo = [
|
|
122
|
+
strip_ansi_codes(line) for line in rocminfo_res.stdout.strip().split("\n")
|
|
123
|
+
]
|
|
124
|
+
rocminfo_artifact_content += "=" * 80 + "\n"
|
|
125
|
+
rocminfo_artifact_content += "ROCMNFO OUTPUT\n"
|
|
126
|
+
rocminfo_artifact_content += "=" * 80 + "\n\n"
|
|
127
|
+
rocminfo_artifact_content += rocminfo_res.stdout
|
|
128
|
+
|
|
129
|
+
# Collect ld.so.conf ROCm entries
|
|
130
|
+
ld_conf_res = self._run_sut_cmd(self.CMD_LD_CONF)
|
|
131
|
+
if ld_conf_res.exit_code == 0:
|
|
132
|
+
rocm_data.ld_conf_rocm = [
|
|
133
|
+
line.strip() for line in ld_conf_res.stdout.strip().split("\n") if line.strip()
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
# Collect ROCm libraries from ldconfig
|
|
137
|
+
rocm_libs_res = self._run_sut_cmd(self.CMD_ROCM_LIBS)
|
|
138
|
+
if rocm_libs_res.exit_code == 0:
|
|
139
|
+
rocm_data.rocm_libs = [
|
|
140
|
+
line.strip()
|
|
141
|
+
for line in rocm_libs_res.stdout.strip().split("\n")
|
|
142
|
+
if line.strip()
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
# Collect ROCm-related environment variables
|
|
146
|
+
env_vars_res = self._run_sut_cmd(self.CMD_ENV_VARS)
|
|
147
|
+
if env_vars_res.exit_code == 0:
|
|
148
|
+
rocm_data.env_vars = [
|
|
149
|
+
line.strip() for line in env_vars_res.stdout.strip().split("\n") if line.strip()
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
# Collect clinfo output
|
|
153
|
+
clinfo_cmd = self.CMD_CLINFO.format(rocm_path=rocm_path)
|
|
154
|
+
clinfo_res = self._run_sut_cmd(clinfo_cmd)
|
|
155
|
+
|
|
156
|
+
# Always append clinfo section to artifact, even if empty or failed
|
|
157
|
+
if rocminfo_artifact_content:
|
|
158
|
+
rocminfo_artifact_content += "\n\n"
|
|
159
|
+
rocminfo_artifact_content += "=" * 80 + "\n"
|
|
160
|
+
rocminfo_artifact_content += "CLINFO OUTPUT\n"
|
|
161
|
+
rocminfo_artifact_content += "=" * 80 + "\n\n"
|
|
162
|
+
|
|
163
|
+
if clinfo_res.exit_code == 0:
|
|
164
|
+
rocm_data.clinfo = [
|
|
165
|
+
strip_ansi_codes(line) for line in clinfo_res.stdout.strip().split("\n")
|
|
166
|
+
]
|
|
167
|
+
rocminfo_artifact_content += clinfo_res.stdout
|
|
168
|
+
else:
|
|
169
|
+
# Add error information if clinfo failed
|
|
170
|
+
rocminfo_artifact_content += f"Command: {clinfo_res.command}\n"
|
|
171
|
+
rocminfo_artifact_content += f"Exit Code: {clinfo_res.exit_code}\n"
|
|
172
|
+
if clinfo_res.stderr:
|
|
173
|
+
rocminfo_artifact_content += f"Error: {clinfo_res.stderr}\n"
|
|
174
|
+
if clinfo_res.stdout:
|
|
175
|
+
rocminfo_artifact_content += f"Output: {clinfo_res.stdout}\n"
|
|
176
|
+
|
|
177
|
+
# Add combined rocminfo and clinfo output as a text file artifact
|
|
178
|
+
if rocminfo_artifact_content:
|
|
179
|
+
self.result.artifacts.append(
|
|
180
|
+
TextFileArtifact(filename="rocminfo.log", contents=rocminfo_artifact_content)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Collect KFD process list
|
|
184
|
+
kfd_proc_res = self._run_sut_cmd(self.CMD_KFD_PROC)
|
|
185
|
+
if kfd_proc_res.exit_code == 0:
|
|
186
|
+
rocm_data.kfd_proc = [
|
|
187
|
+
proc.strip() for proc in kfd_proc_res.stdout.strip().split("\n") if proc.strip()
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
if not rocm_data:
|
|
191
|
+
self._log_event(
|
|
192
|
+
category=EventCategory.OS,
|
|
193
|
+
description="Error checking ROCm version",
|
|
194
|
+
data={
|
|
195
|
+
"command": res.command,
|
|
196
|
+
"exit_code": res.exit_code,
|
|
197
|
+
"stderr": res.stderr,
|
|
198
|
+
},
|
|
199
|
+
priority=EventPriority.ERROR,
|
|
200
|
+
console_log=True,
|
|
201
|
+
)
|
|
202
|
+
self.result.message = "ROCm version not found"
|
|
203
|
+
self.result.status = ExecutionStatus.ERROR
|
|
204
|
+
|
|
205
|
+
return self.result, rocm_data
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
from nodescraper.base import InBandDataPlugin
|
|
27
|
+
|
|
28
|
+
from .analyzer_args import RocmAnalyzerArgs
|
|
29
|
+
from .rocm_analyzer import RocmAnalyzer
|
|
30
|
+
from .rocm_collector import RocmCollector
|
|
31
|
+
from .rocmdata import RocmDataModel
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RocmPlugin(InBandDataPlugin[RocmDataModel, None, RocmAnalyzerArgs]):
|
|
35
|
+
"""Plugin for collection and analysis of rocm version data"""
|
|
36
|
+
|
|
37
|
+
DATA_MODEL = RocmDataModel
|
|
38
|
+
|
|
39
|
+
COLLECTOR = RocmCollector
|
|
40
|
+
|
|
41
|
+
ANALYZER = RocmAnalyzer
|
|
42
|
+
|
|
43
|
+
ANALYZER_ARGS = RocmAnalyzerArgs
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
import re
|
|
27
|
+
from typing import List
|
|
28
|
+
|
|
29
|
+
from pydantic import field_validator
|
|
30
|
+
|
|
31
|
+
from nodescraper.models import DataModel
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RocmDataModel(DataModel):
|
|
35
|
+
rocm_version: str
|
|
36
|
+
rocminfo: List[str] = []
|
|
37
|
+
rocm_latest_versioned_path: str = ""
|
|
38
|
+
rocm_all_paths: List[str] = []
|
|
39
|
+
ld_conf_rocm: List[str] = []
|
|
40
|
+
rocm_libs: List[str] = []
|
|
41
|
+
env_vars: List[str] = []
|
|
42
|
+
clinfo: List[str] = []
|
|
43
|
+
kfd_proc: List[str] = []
|
|
44
|
+
|
|
45
|
+
@field_validator("rocm_version")
|
|
46
|
+
@classmethod
|
|
47
|
+
def validate_rocm_version(cls, rocm_version: str) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Validate the ROCm version format.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
rocm_version (str): The ROCm version string to validate.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ValueError: If the ROCm version does not match the expected format.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
str: The validated ROCm version string.
|
|
59
|
+
"""
|
|
60
|
+
if not re.match(r"^\d+(?:\.\d+){0,3}(-\d+)?$", rocm_version):
|
|
61
|
+
raise ValueError(f"ROCm version has invalid format: {rocm_version}")
|
|
62
|
+
return rocm_version
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
from pydantic import Field
|
|
29
|
+
|
|
30
|
+
from nodescraper.models.analyzerargs import AnalyzerArgs
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class StorageAnalyzerArgs(AnalyzerArgs):
|
|
34
|
+
min_required_free_space_abs: Optional[str] = None
|
|
35
|
+
min_required_free_space_prct: Optional[int] = None
|
|
36
|
+
ignore_devices: Optional[list[str]] = Field(default_factory=list)
|
|
37
|
+
check_devices: Optional[list[str]] = Field(default_factory=list)
|
|
38
|
+
regex_match: bool = False
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
|
|
27
|
+
from nodescraper.models import CollectorArgs
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class StorageCollectorArgs(CollectorArgs):
|
|
31
|
+
skip_sudo: bool = False
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
import re
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus
|
|
30
|
+
from nodescraper.interfaces import DataAnalyzer
|
|
31
|
+
from nodescraper.models import TaskResult
|
|
32
|
+
from nodescraper.utils import bytes_to_human_readable, convert_to_bytes
|
|
33
|
+
|
|
34
|
+
from .analyzer_args import StorageAnalyzerArgs
|
|
35
|
+
from .storagedata import StorageDataModel
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class StorageAnalyzer(DataAnalyzer[StorageDataModel, StorageAnalyzerArgs]):
|
|
39
|
+
"""Check storage usage"""
|
|
40
|
+
|
|
41
|
+
DATA_MODEL = StorageDataModel
|
|
42
|
+
|
|
43
|
+
def _matches_device_filter(
|
|
44
|
+
self, device_name: str, exp_devices: list[str], regex_match: bool
|
|
45
|
+
) -> bool:
|
|
46
|
+
"""Check if the device name matches any of the expected devices""
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
device_name (str): device name to check
|
|
50
|
+
exp_devices (list[str]): list of expected devices to match against
|
|
51
|
+
regex_match (bool): if True, use regex matching; otherwise, use exact match
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
bool: True if the device name matches any of the expected devices, False otherwise
|
|
55
|
+
"""
|
|
56
|
+
for exp_device in exp_devices:
|
|
57
|
+
if regex_match:
|
|
58
|
+
try:
|
|
59
|
+
device_regex = re.compile(exp_device)
|
|
60
|
+
except re.error:
|
|
61
|
+
self._log_event(
|
|
62
|
+
category=EventCategory.STORAGE,
|
|
63
|
+
description=f"Invalid regex pattern: {exp_device}",
|
|
64
|
+
priority=EventPriority.ERROR,
|
|
65
|
+
)
|
|
66
|
+
continue
|
|
67
|
+
if device_regex.match(device_name):
|
|
68
|
+
return True
|
|
69
|
+
elif device_name == exp_device:
|
|
70
|
+
return True
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
def analyze_data(
|
|
74
|
+
self, data: StorageDataModel, args: Optional[StorageAnalyzerArgs] = None
|
|
75
|
+
) -> TaskResult:
|
|
76
|
+
"""Analyze the storage data to check if there is enough free space
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
data (StorageDataModel): storage data to analyze
|
|
80
|
+
args (Optional[StorageAnalyzerArgs], optional): storage analysis arguments. Defaults to None.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
TaskResult: Result of the storage analysis containing the status and message.
|
|
84
|
+
"""
|
|
85
|
+
if args is None:
|
|
86
|
+
args = StorageAnalyzerArgs(min_required_free_space_prct=10)
|
|
87
|
+
elif args.min_required_free_space_abs is None and args.min_required_free_space_prct is None:
|
|
88
|
+
args.min_required_free_space_prct = 10
|
|
89
|
+
self.logger.warning(
|
|
90
|
+
"No thresholds provided for storage analyzer arguments; defaulting to 10% free"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if not data.storage_data:
|
|
94
|
+
self.result.message = "No storage data available"
|
|
95
|
+
self.result.status = ExecutionStatus.NOT_RAN
|
|
96
|
+
return self.result
|
|
97
|
+
|
|
98
|
+
self.result.status = ExecutionStatus.OK
|
|
99
|
+
passing_devices = []
|
|
100
|
+
failing_devices = []
|
|
101
|
+
for device_name, device_data in data.storage_data.items():
|
|
102
|
+
if args.check_devices:
|
|
103
|
+
if not self._matches_device_filter(
|
|
104
|
+
device_name, args.check_devices, args.regex_match
|
|
105
|
+
):
|
|
106
|
+
continue
|
|
107
|
+
elif args.ignore_devices:
|
|
108
|
+
if self._matches_device_filter(device_name, args.ignore_devices, args.regex_match):
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
condition = False
|
|
112
|
+
if args.min_required_free_space_abs:
|
|
113
|
+
min_free_abs = convert_to_bytes(args.min_required_free_space_abs)
|
|
114
|
+
free_abs = convert_to_bytes(str(device_data.free))
|
|
115
|
+
if free_abs and free_abs > min_free_abs:
|
|
116
|
+
condition = True
|
|
117
|
+
else:
|
|
118
|
+
condition = True
|
|
119
|
+
|
|
120
|
+
if args.min_required_free_space_prct:
|
|
121
|
+
free_prct = 100 - device_data.percent
|
|
122
|
+
condition = condition and (free_prct > args.min_required_free_space_prct)
|
|
123
|
+
|
|
124
|
+
if condition:
|
|
125
|
+
passing_devices.append(device_name)
|
|
126
|
+
else:
|
|
127
|
+
device = convert_to_bytes(str(device_data.total))
|
|
128
|
+
prct = device_data.percent
|
|
129
|
+
failing_devices.append(device_name)
|
|
130
|
+
event_data = {
|
|
131
|
+
"offending_device": {
|
|
132
|
+
"device": device_name,
|
|
133
|
+
"total": device_data.total,
|
|
134
|
+
"free": device_data.free,
|
|
135
|
+
"percent": device_data.percent,
|
|
136
|
+
},
|
|
137
|
+
}
|
|
138
|
+
self._log_event(
|
|
139
|
+
category=EventCategory.STORAGE,
|
|
140
|
+
description=f"Insufficient disk space: {bytes_to_human_readable(device)} and {prct}%, used on {device_name}",
|
|
141
|
+
data=event_data,
|
|
142
|
+
priority=EventPriority.CRITICAL,
|
|
143
|
+
console_log=True,
|
|
144
|
+
)
|
|
145
|
+
if failing_devices:
|
|
146
|
+
self.result.message = f"Insufficient disk space on " f"[{', '.join(failing_devices)}]"
|
|
147
|
+
self.result.status = ExecutionStatus.ERROR
|
|
148
|
+
else:
|
|
149
|
+
self.result.message = (
|
|
150
|
+
f"Sufficient disk space available on " f"[{', '.join(passing_devices)}]"
|
|
151
|
+
)
|
|
152
|
+
return self.result
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
import re
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
from nodescraper.base import InBandDataCollector
|
|
30
|
+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily
|
|
31
|
+
from nodescraper.models import TaskResult
|
|
32
|
+
|
|
33
|
+
from .collector_args import StorageCollectorArgs
|
|
34
|
+
from .storagedata import DeviceStorageData, StorageDataModel
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class StorageCollector(InBandDataCollector[StorageDataModel, None]):
|
|
38
|
+
"""Collect disk usage details"""
|
|
39
|
+
|
|
40
|
+
DATA_MODEL = StorageDataModel
|
|
41
|
+
CMD_WINDOWS = """wmic LogicalDisk Where DriveType="3" Get DeviceId,Size,FreeSpace"""
|
|
42
|
+
CMD = """sh -c 'df -lH -B1 | grep -v 'boot''"""
|
|
43
|
+
|
|
44
|
+
def collect_data(
|
|
45
|
+
self, args: Optional[StorageCollectorArgs] = None
|
|
46
|
+
) -> tuple[TaskResult, Optional[StorageDataModel]]:
|
|
47
|
+
"""read storage usage data"""
|
|
48
|
+
if args is None:
|
|
49
|
+
args = StorageCollectorArgs()
|
|
50
|
+
|
|
51
|
+
storage_data = {}
|
|
52
|
+
if self.system_info.os_family == OSFamily.WINDOWS:
|
|
53
|
+
res = self._run_sut_cmd(self.CMD_WINDOWS)
|
|
54
|
+
if res.exit_code == 0:
|
|
55
|
+
for line in res.stdout.splitlines()[1:]:
|
|
56
|
+
if line:
|
|
57
|
+
device_id, free_space, size = line.split()
|
|
58
|
+
storage_data[device_id] = DeviceStorageData(
|
|
59
|
+
total=int(size),
|
|
60
|
+
free=int(free_space),
|
|
61
|
+
used=int(size) - int(free_space),
|
|
62
|
+
percent=round((int(size) - int(free_space)) / int(size) * 100, 2),
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
if args.skip_sudo:
|
|
66
|
+
self.result.message = "Skipping sudo plugin"
|
|
67
|
+
self.result.status = ExecutionStatus.NOT_RAN
|
|
68
|
+
return self.result, None
|
|
69
|
+
res = self._run_sut_cmd(self.CMD, sudo=True)
|
|
70
|
+
if res.exit_code == 0:
|
|
71
|
+
for line in res.stdout.splitlines()[1:]:
|
|
72
|
+
if line:
|
|
73
|
+
device_id, size, used, available, percent = line.strip().split()[:5]
|
|
74
|
+
if device_id not in ["tmpfs", "overlay"]:
|
|
75
|
+
storage_data[device_id] = DeviceStorageData(
|
|
76
|
+
total=int(size),
|
|
77
|
+
free=int(available),
|
|
78
|
+
used=int(used),
|
|
79
|
+
percent=float(re.sub(r"%", "", percent)),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if res.exit_code != 0:
|
|
83
|
+
self._log_event(
|
|
84
|
+
category=EventCategory.OS,
|
|
85
|
+
description="Error checking available storage",
|
|
86
|
+
data={
|
|
87
|
+
"command": res.command,
|
|
88
|
+
"exit_code": res.exit_code,
|
|
89
|
+
"stderr": res.stderr,
|
|
90
|
+
},
|
|
91
|
+
priority=EventPriority.ERROR,
|
|
92
|
+
console_log=True,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if storage_data:
|
|
96
|
+
storage_data = dict(sorted(storage_data.items(), key=lambda x: x[1].total))
|
|
97
|
+
storage_model = StorageDataModel(storage_data=storage_data)
|
|
98
|
+
self._log_event(
|
|
99
|
+
category="STORAGE_READ",
|
|
100
|
+
description="Available storage read",
|
|
101
|
+
data=storage_model.model_dump(),
|
|
102
|
+
priority=EventPriority.INFO,
|
|
103
|
+
)
|
|
104
|
+
self.result.message = f"{len(storage_model.storage_data)} storage devices collected"
|
|
105
|
+
self.result.status = ExecutionStatus.OK
|
|
106
|
+
else:
|
|
107
|
+
storage_model = None
|
|
108
|
+
self.result.message = "Storage info not found"
|
|
109
|
+
self.result.status = ExecutionStatus.ERROR
|
|
110
|
+
return self.result, storage_model
|