amd-node-scraper 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amd_node_scraper-0.0.1.dist-info/LICENSE +21 -0
- amd_node_scraper-0.0.1.dist-info/METADATA +424 -0
- amd_node_scraper-0.0.1.dist-info/RECORD +197 -0
- amd_node_scraper-0.0.1.dist-info/WHEEL +5 -0
- amd_node_scraper-0.0.1.dist-info/entry_points.txt +2 -0
- amd_node_scraper-0.0.1.dist-info/top_level.txt +1 -0
- nodescraper/__init__.py +32 -0
- nodescraper/base/__init__.py +34 -0
- nodescraper/base/inbandcollectortask.py +118 -0
- nodescraper/base/inbanddataplugin.py +39 -0
- nodescraper/base/regexanalyzer.py +120 -0
- nodescraper/cli/__init__.py +29 -0
- nodescraper/cli/cli.py +511 -0
- nodescraper/cli/constants.py +27 -0
- nodescraper/cli/dynamicparserbuilder.py +171 -0
- nodescraper/cli/helper.py +517 -0
- nodescraper/cli/inputargtypes.py +129 -0
- nodescraper/configbuilder.py +123 -0
- nodescraper/configregistry.py +66 -0
- nodescraper/configs/node_status.json +19 -0
- nodescraper/connection/__init__.py +25 -0
- nodescraper/connection/inband/__init__.py +46 -0
- nodescraper/connection/inband/inband.py +171 -0
- nodescraper/connection/inband/inbandlocal.py +93 -0
- nodescraper/connection/inband/inbandmanager.py +151 -0
- nodescraper/connection/inband/inbandremote.py +173 -0
- nodescraper/connection/inband/sshparams.py +43 -0
- nodescraper/constants.py +26 -0
- nodescraper/enums/__init__.py +40 -0
- nodescraper/enums/eventcategory.py +89 -0
- nodescraper/enums/eventpriority.py +42 -0
- nodescraper/enums/executionstatus.py +44 -0
- nodescraper/enums/osfamily.py +34 -0
- nodescraper/enums/systeminteraction.py +41 -0
- nodescraper/enums/systemlocation.py +33 -0
- nodescraper/generictypes.py +36 -0
- nodescraper/interfaces/__init__.py +44 -0
- nodescraper/interfaces/connectionmanager.py +143 -0
- nodescraper/interfaces/dataanalyzertask.py +138 -0
- nodescraper/interfaces/datacollectortask.py +185 -0
- nodescraper/interfaces/dataplugin.py +356 -0
- nodescraper/interfaces/plugin.py +127 -0
- nodescraper/interfaces/resultcollator.py +56 -0
- nodescraper/interfaces/task.py +164 -0
- nodescraper/interfaces/taskresulthook.py +39 -0
- nodescraper/models/__init__.py +48 -0
- nodescraper/models/analyzerargs.py +93 -0
- nodescraper/models/collectorargs.py +30 -0
- nodescraper/models/connectionconfig.py +34 -0
- nodescraper/models/datamodel.py +171 -0
- nodescraper/models/datapluginresult.py +39 -0
- nodescraper/models/event.py +158 -0
- nodescraper/models/pluginconfig.py +38 -0
- nodescraper/models/pluginresult.py +39 -0
- nodescraper/models/systeminfo.py +44 -0
- nodescraper/models/taskresult.py +185 -0
- nodescraper/models/timerangeargs.py +38 -0
- nodescraper/pluginexecutor.py +274 -0
- nodescraper/pluginregistry.py +152 -0
- nodescraper/plugins/__init__.py +25 -0
- nodescraper/plugins/inband/__init__.py +25 -0
- nodescraper/plugins/inband/amdsmi/__init__.py +28 -0
- nodescraper/plugins/inband/amdsmi/amdsmi_analyzer.py +821 -0
- nodescraper/plugins/inband/amdsmi/amdsmi_collector.py +1313 -0
- nodescraper/plugins/inband/amdsmi/amdsmi_plugin.py +43 -0
- nodescraper/plugins/inband/amdsmi/amdsmidata.py +1002 -0
- nodescraper/plugins/inband/amdsmi/analyzer_args.py +50 -0
- nodescraper/plugins/inband/amdsmi/cper.py +65 -0
- nodescraper/plugins/inband/bios/__init__.py +29 -0
- nodescraper/plugins/inband/bios/analyzer_args.py +64 -0
- nodescraper/plugins/inband/bios/bios_analyzer.py +93 -0
- nodescraper/plugins/inband/bios/bios_collector.py +93 -0
- nodescraper/plugins/inband/bios/bios_plugin.py +43 -0
- nodescraper/plugins/inband/bios/biosdata.py +30 -0
- nodescraper/plugins/inband/cmdline/__init__.py +25 -0
- nodescraper/plugins/inband/cmdline/analyzer_args.py +80 -0
- nodescraper/plugins/inband/cmdline/cmdline_analyzer.py +113 -0
- nodescraper/plugins/inband/cmdline/cmdline_collector.py +77 -0
- nodescraper/plugins/inband/cmdline/cmdline_plugin.py +43 -0
- nodescraper/plugins/inband/cmdline/cmdlinedata.py +30 -0
- nodescraper/plugins/inband/device_enumeration/__init__.py +29 -0
- nodescraper/plugins/inband/device_enumeration/analyzer_args.py +73 -0
- nodescraper/plugins/inband/device_enumeration/device_enumeration_analyzer.py +81 -0
- nodescraper/plugins/inband/device_enumeration/device_enumeration_collector.py +176 -0
- nodescraper/plugins/inband/device_enumeration/device_enumeration_plugin.py +45 -0
- nodescraper/plugins/inband/device_enumeration/deviceenumdata.py +36 -0
- nodescraper/plugins/inband/dimm/__init__.py +25 -0
- nodescraper/plugins/inband/dimm/collector_args.py +31 -0
- nodescraper/plugins/inband/dimm/dimm_collector.py +151 -0
- nodescraper/plugins/inband/dimm/dimm_plugin.py +40 -0
- nodescraper/plugins/inband/dimm/dimmdata.py +30 -0
- nodescraper/plugins/inband/dkms/__init__.py +25 -0
- nodescraper/plugins/inband/dkms/analyzer_args.py +85 -0
- nodescraper/plugins/inband/dkms/dkms_analyzer.py +106 -0
- nodescraper/plugins/inband/dkms/dkms_collector.py +76 -0
- nodescraper/plugins/inband/dkms/dkms_plugin.py +43 -0
- nodescraper/plugins/inband/dkms/dkmsdata.py +33 -0
- nodescraper/plugins/inband/dmesg/__init__.py +28 -0
- nodescraper/plugins/inband/dmesg/analyzer_args.py +33 -0
- nodescraper/plugins/inband/dmesg/collector_args.py +39 -0
- nodescraper/plugins/inband/dmesg/dmesg_analyzer.py +503 -0
- nodescraper/plugins/inband/dmesg/dmesg_collector.py +164 -0
- nodescraper/plugins/inband/dmesg/dmesg_plugin.py +44 -0
- nodescraper/plugins/inband/dmesg/dmesgdata.py +116 -0
- nodescraper/plugins/inband/fabrics/__init__.py +28 -0
- nodescraper/plugins/inband/fabrics/fabrics_collector.py +726 -0
- nodescraper/plugins/inband/fabrics/fabrics_plugin.py +37 -0
- nodescraper/plugins/inband/fabrics/fabricsdata.py +140 -0
- nodescraper/plugins/inband/journal/__init__.py +28 -0
- nodescraper/plugins/inband/journal/collector_args.py +33 -0
- nodescraper/plugins/inband/journal/journal_collector.py +107 -0
- nodescraper/plugins/inband/journal/journal_plugin.py +40 -0
- nodescraper/plugins/inband/journal/journaldata.py +44 -0
- nodescraper/plugins/inband/kernel/__init__.py +25 -0
- nodescraper/plugins/inband/kernel/analyzer_args.py +64 -0
- nodescraper/plugins/inband/kernel/kernel_analyzer.py +91 -0
- nodescraper/plugins/inband/kernel/kernel_collector.py +129 -0
- nodescraper/plugins/inband/kernel/kernel_plugin.py +43 -0
- nodescraper/plugins/inband/kernel/kerneldata.py +32 -0
- nodescraper/plugins/inband/kernel_module/__init__.py +25 -0
- nodescraper/plugins/inband/kernel_module/analyzer_args.py +59 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_analyzer.py +211 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_collector.py +264 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_data.py +60 -0
- nodescraper/plugins/inband/kernel_module/kernel_module_plugin.py +43 -0
- nodescraper/plugins/inband/memory/__init__.py +25 -0
- nodescraper/plugins/inband/memory/analyzer_args.py +45 -0
- nodescraper/plugins/inband/memory/memory_analyzer.py +98 -0
- nodescraper/plugins/inband/memory/memory_collector.py +330 -0
- nodescraper/plugins/inband/memory/memory_plugin.py +43 -0
- nodescraper/plugins/inband/memory/memorydata.py +90 -0
- nodescraper/plugins/inband/network/__init__.py +28 -0
- nodescraper/plugins/inband/network/network_collector.py +1828 -0
- nodescraper/plugins/inband/network/network_plugin.py +37 -0
- nodescraper/plugins/inband/network/networkdata.py +319 -0
- nodescraper/plugins/inband/nvme/__init__.py +28 -0
- nodescraper/plugins/inband/nvme/nvme_collector.py +167 -0
- nodescraper/plugins/inband/nvme/nvme_plugin.py +37 -0
- nodescraper/plugins/inband/nvme/nvmedata.py +45 -0
- nodescraper/plugins/inband/os/__init__.py +25 -0
- nodescraper/plugins/inband/os/analyzer_args.py +64 -0
- nodescraper/plugins/inband/os/os_analyzer.py +73 -0
- nodescraper/plugins/inband/os/os_collector.py +131 -0
- nodescraper/plugins/inband/os/os_plugin.py +43 -0
- nodescraper/plugins/inband/os/osdata.py +31 -0
- nodescraper/plugins/inband/package/__init__.py +25 -0
- nodescraper/plugins/inband/package/analyzer_args.py +48 -0
- nodescraper/plugins/inband/package/package_analyzer.py +253 -0
- nodescraper/plugins/inband/package/package_collector.py +273 -0
- nodescraper/plugins/inband/package/package_plugin.py +43 -0
- nodescraper/plugins/inband/package/packagedata.py +41 -0
- nodescraper/plugins/inband/pcie/__init__.py +29 -0
- nodescraper/plugins/inband/pcie/analyzer_args.py +63 -0
- nodescraper/plugins/inband/pcie/pcie_analyzer.py +1081 -0
- nodescraper/plugins/inband/pcie/pcie_collector.py +690 -0
- nodescraper/plugins/inband/pcie/pcie_data.py +2017 -0
- nodescraper/plugins/inband/pcie/pcie_plugin.py +43 -0
- nodescraper/plugins/inband/process/__init__.py +25 -0
- nodescraper/plugins/inband/process/analyzer_args.py +45 -0
- nodescraper/plugins/inband/process/collector_args.py +31 -0
- nodescraper/plugins/inband/process/process_analyzer.py +91 -0
- nodescraper/plugins/inband/process/process_collector.py +115 -0
- nodescraper/plugins/inband/process/process_plugin.py +46 -0
- nodescraper/plugins/inband/process/processdata.py +34 -0
- nodescraper/plugins/inband/rocm/__init__.py +25 -0
- nodescraper/plugins/inband/rocm/analyzer_args.py +66 -0
- nodescraper/plugins/inband/rocm/rocm_analyzer.py +100 -0
- nodescraper/plugins/inband/rocm/rocm_collector.py +205 -0
- nodescraper/plugins/inband/rocm/rocm_plugin.py +43 -0
- nodescraper/plugins/inband/rocm/rocmdata.py +62 -0
- nodescraper/plugins/inband/storage/__init__.py +25 -0
- nodescraper/plugins/inband/storage/analyzer_args.py +38 -0
- nodescraper/plugins/inband/storage/collector_args.py +31 -0
- nodescraper/plugins/inband/storage/storage_analyzer.py +152 -0
- nodescraper/plugins/inband/storage/storage_collector.py +110 -0
- nodescraper/plugins/inband/storage/storage_plugin.py +44 -0
- nodescraper/plugins/inband/storage/storagedata.py +70 -0
- nodescraper/plugins/inband/sysctl/__init__.py +29 -0
- nodescraper/plugins/inband/sysctl/analyzer_args.py +67 -0
- nodescraper/plugins/inband/sysctl/sysctl_analyzer.py +81 -0
- nodescraper/plugins/inband/sysctl/sysctl_collector.py +101 -0
- nodescraper/plugins/inband/sysctl/sysctl_plugin.py +43 -0
- nodescraper/plugins/inband/sysctl/sysctldata.py +42 -0
- nodescraper/plugins/inband/syslog/__init__.py +28 -0
- nodescraper/plugins/inband/syslog/syslog_collector.py +121 -0
- nodescraper/plugins/inband/syslog/syslog_plugin.py +37 -0
- nodescraper/plugins/inband/syslog/syslogdata.py +46 -0
- nodescraper/plugins/inband/uptime/__init__.py +25 -0
- nodescraper/plugins/inband/uptime/uptime_collector.py +88 -0
- nodescraper/plugins/inband/uptime/uptime_plugin.py +37 -0
- nodescraper/plugins/inband/uptime/uptimedata.py +31 -0
- nodescraper/resultcollators/__init__.py +25 -0
- nodescraper/resultcollators/tablesummary.py +159 -0
- nodescraper/taskresulthooks/__init__.py +28 -0
- nodescraper/taskresulthooks/filesystemloghook.py +88 -0
- nodescraper/typeutils.py +171 -0
- nodescraper/utils.py +412 -0
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
#
|
|
3
|
+
# MIT License
|
|
4
|
+
#
|
|
5
|
+
# Copyright (c) 2025 Advanced Micro Devices, Inc.
|
|
6
|
+
#
|
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
# furnished to do so, subject to the following conditions:
|
|
13
|
+
#
|
|
14
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
# copies or substantial portions of the Software.
|
|
16
|
+
#
|
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
# SOFTWARE.
|
|
24
|
+
#
|
|
25
|
+
###############################################################################
|
|
26
|
+
import re
|
|
27
|
+
from typing import Dict, List, Optional, Tuple
|
|
28
|
+
|
|
29
|
+
from nodescraper.base import InBandDataCollector
|
|
30
|
+
from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus
|
|
31
|
+
from nodescraper.models import TaskResult
|
|
32
|
+
|
|
33
|
+
from .fabricsdata import (
|
|
34
|
+
FabricsDataModel,
|
|
35
|
+
IbdevNetdevMapping,
|
|
36
|
+
IbstatDevice,
|
|
37
|
+
IbvDeviceInfo,
|
|
38
|
+
MstDevice,
|
|
39
|
+
MstStatus,
|
|
40
|
+
OfedInfo,
|
|
41
|
+
RdmaDevice,
|
|
42
|
+
RdmaInfo,
|
|
43
|
+
RdmaLink,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class FabricsCollector(InBandDataCollector[FabricsDataModel, None]):
|
|
48
|
+
"""Collect InfiniBand/RDMA fabrics configuration details"""
|
|
49
|
+
|
|
50
|
+
DATA_MODEL = FabricsDataModel
|
|
51
|
+
CMD_IBSTAT = "ibstat"
|
|
52
|
+
CMD_IBV_DEVINFO = "ibv_devinfo"
|
|
53
|
+
CMD_IB_DEV_NETDEVS = "ls -l /sys/class/infiniband/*/device/net"
|
|
54
|
+
CMD_OFED_INFO = "ofed_info -s"
|
|
55
|
+
CMD_MST_START = "mst start"
|
|
56
|
+
CMD_MST_STATUS = "mst status -v"
|
|
57
|
+
CMD_RDMA_DEV = "rdma dev"
|
|
58
|
+
CMD_RDMA_LINK = "rdma link"
|
|
59
|
+
|
|
60
|
+
def _parse_ibstat(self, output: str) -> List[IbstatDevice]:
|
|
61
|
+
"""Parse 'ibstat' output into IbstatDevice objects.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
output: Raw output from 'ibstat' command
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
List of IbstatDevice objects
|
|
68
|
+
"""
|
|
69
|
+
devices = []
|
|
70
|
+
current_device = None
|
|
71
|
+
current_port = None
|
|
72
|
+
current_port_attrs: Dict[str, str] = {}
|
|
73
|
+
|
|
74
|
+
for line in output.splitlines():
|
|
75
|
+
line_stripped = line.strip()
|
|
76
|
+
|
|
77
|
+
# CA name line (e.g., "CA 'mlx5_0'")
|
|
78
|
+
if line.startswith("CA "):
|
|
79
|
+
# Save previous device if exists
|
|
80
|
+
if current_device:
|
|
81
|
+
devices.append(current_device)
|
|
82
|
+
|
|
83
|
+
# Extract CA name
|
|
84
|
+
match = re.search(r"CA\s+'([^']+)'", line)
|
|
85
|
+
if match:
|
|
86
|
+
ca_name = match.group(1)
|
|
87
|
+
current_device = IbstatDevice(ca_name=ca_name, raw_output=output)
|
|
88
|
+
current_port = None
|
|
89
|
+
current_port_attrs = {}
|
|
90
|
+
|
|
91
|
+
# Port line (e.g., "Port 1:")
|
|
92
|
+
elif line.startswith("Port ") and ":" in line:
|
|
93
|
+
# Save previous port if exists
|
|
94
|
+
if current_device and current_port is not None:
|
|
95
|
+
current_device.ports[current_port] = current_port_attrs
|
|
96
|
+
|
|
97
|
+
# Extract port number
|
|
98
|
+
match = re.search(r"Port\s+(\d+):", line)
|
|
99
|
+
if match:
|
|
100
|
+
current_port = int(match.group(1))
|
|
101
|
+
current_port_attrs = {}
|
|
102
|
+
|
|
103
|
+
# Attribute lines (indented with key: value format)
|
|
104
|
+
elif ":" in line_stripped and current_device:
|
|
105
|
+
parts = line_stripped.split(":", 1)
|
|
106
|
+
if len(parts) == 2:
|
|
107
|
+
key = parts[0].strip()
|
|
108
|
+
value = parts[1].strip()
|
|
109
|
+
|
|
110
|
+
# Store port-specific attributes
|
|
111
|
+
if current_port is not None:
|
|
112
|
+
current_port_attrs[key] = value
|
|
113
|
+
else:
|
|
114
|
+
# Store device-level attributes
|
|
115
|
+
if key == "CA type":
|
|
116
|
+
current_device.ca_type = value
|
|
117
|
+
elif key == "Number of ports":
|
|
118
|
+
try:
|
|
119
|
+
current_device.number_of_ports = int(value)
|
|
120
|
+
except ValueError:
|
|
121
|
+
pass
|
|
122
|
+
elif key == "Firmware version":
|
|
123
|
+
current_device.firmware_version = value
|
|
124
|
+
elif key == "Hardware version":
|
|
125
|
+
current_device.hardware_version = value
|
|
126
|
+
elif key == "Node GUID":
|
|
127
|
+
current_device.node_guid = value
|
|
128
|
+
elif key == "System image GUID":
|
|
129
|
+
current_device.system_image_guid = value
|
|
130
|
+
|
|
131
|
+
# Save last device and port
|
|
132
|
+
if current_device:
|
|
133
|
+
if current_port is not None:
|
|
134
|
+
current_device.ports[current_port] = current_port_attrs
|
|
135
|
+
devices.append(current_device)
|
|
136
|
+
|
|
137
|
+
return devices
|
|
138
|
+
|
|
139
|
+
def _parse_ibv_devinfo(self, output: str) -> List[IbvDeviceInfo]:
|
|
140
|
+
"""Parse 'ibv_devinfo' output into IbvDeviceInfo objects.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
output: Raw output from 'ibv_devinfo' command
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List of IbvDeviceInfo objects
|
|
147
|
+
"""
|
|
148
|
+
devices = []
|
|
149
|
+
current_device = None
|
|
150
|
+
current_port = None
|
|
151
|
+
current_port_attrs: Dict[str, str] = {}
|
|
152
|
+
|
|
153
|
+
for line in output.splitlines():
|
|
154
|
+
line_stripped = line.strip()
|
|
155
|
+
|
|
156
|
+
# Device header (e.g., "hca_id: mlx5_0")
|
|
157
|
+
if line.startswith("hca_id:"):
|
|
158
|
+
# Save previous device if exists
|
|
159
|
+
if current_device:
|
|
160
|
+
devices.append(current_device)
|
|
161
|
+
|
|
162
|
+
parts = line.split(":", 1)
|
|
163
|
+
if len(parts) == 2:
|
|
164
|
+
device_name = parts[1].strip()
|
|
165
|
+
current_device = IbvDeviceInfo(device=device_name, raw_output=output)
|
|
166
|
+
current_port = None
|
|
167
|
+
current_port_attrs = {}
|
|
168
|
+
|
|
169
|
+
# Port line (e.g., "port: 1")
|
|
170
|
+
elif line_stripped.startswith("port:") and current_device:
|
|
171
|
+
# Save previous port if exists
|
|
172
|
+
if current_port is not None:
|
|
173
|
+
current_device.ports[current_port] = current_port_attrs
|
|
174
|
+
|
|
175
|
+
parts = line_stripped.split(":", 1)
|
|
176
|
+
if len(parts) == 2:
|
|
177
|
+
try:
|
|
178
|
+
current_port = int(parts[1].strip())
|
|
179
|
+
current_port_attrs = {}
|
|
180
|
+
except ValueError:
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
# Attribute lines (with key: value format)
|
|
184
|
+
elif ":" in line_stripped and current_device:
|
|
185
|
+
parts = line_stripped.split(":", 1)
|
|
186
|
+
if len(parts) == 2:
|
|
187
|
+
key = parts[0].strip()
|
|
188
|
+
value = parts[1].strip()
|
|
189
|
+
|
|
190
|
+
# Store port-specific attributes
|
|
191
|
+
if current_port is not None:
|
|
192
|
+
current_port_attrs[key] = value
|
|
193
|
+
else:
|
|
194
|
+
# Store device-level attributes
|
|
195
|
+
if key == "node_guid":
|
|
196
|
+
current_device.node_guid = value
|
|
197
|
+
elif key == "sys_image_guid":
|
|
198
|
+
current_device.sys_image_guid = value
|
|
199
|
+
elif key == "vendor_id":
|
|
200
|
+
current_device.vendor_id = value
|
|
201
|
+
elif key == "vendor_part_id":
|
|
202
|
+
current_device.vendor_part_id = value
|
|
203
|
+
elif key == "hw_ver":
|
|
204
|
+
current_device.hw_ver = value
|
|
205
|
+
elif key == "fw_ver":
|
|
206
|
+
current_device.fw_ver = value
|
|
207
|
+
elif key == "node_type":
|
|
208
|
+
current_device.node_type = value
|
|
209
|
+
elif key == "transport_type" or key == "transport":
|
|
210
|
+
current_device.transport_type = value
|
|
211
|
+
|
|
212
|
+
# Save last device and port
|
|
213
|
+
if current_device:
|
|
214
|
+
if current_port is not None:
|
|
215
|
+
current_device.ports[current_port] = current_port_attrs
|
|
216
|
+
devices.append(current_device)
|
|
217
|
+
|
|
218
|
+
return devices
|
|
219
|
+
|
|
220
|
+
def _parse_ib_dev_netdevs(self, output: str) -> List[IbdevNetdevMapping]:
|
|
221
|
+
"""Parse 'ls -l /sys/class/infiniband/*/device/net' output into IbdevNetdevMapping objects.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
output: Raw output from 'ls -l /sys/class/infiniband/*/device/net' command
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
List of IbdevNetdevMapping objects
|
|
228
|
+
"""
|
|
229
|
+
mappings = []
|
|
230
|
+
current_ib_device = None
|
|
231
|
+
|
|
232
|
+
for line in output.splitlines():
|
|
233
|
+
line = line.strip()
|
|
234
|
+
if not line:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
# Check if this is a directory path line
|
|
238
|
+
# Example: /sys/class/infiniband/rocep105s0/device/net:
|
|
239
|
+
if line.startswith("/sys/class/infiniband/") and line.endswith(":"):
|
|
240
|
+
# Extract IB device name from path
|
|
241
|
+
path_match = re.search(r"/sys/class/infiniband/([^/]+)/device/net:", line)
|
|
242
|
+
if path_match:
|
|
243
|
+
current_ib_device = path_match.group(1)
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
# Skip "total" lines
|
|
247
|
+
if line.startswith("total"):
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
# Parse directory listing lines (network device names)
|
|
251
|
+
# Example: drwxr-xr-x 5 root root 0 Jan 8 18:01 benic5p1
|
|
252
|
+
if current_ib_device and line.startswith("d"):
|
|
253
|
+
parts = line.split()
|
|
254
|
+
if len(parts) >= 9:
|
|
255
|
+
# The last part is the network device name
|
|
256
|
+
netdev = parts[-1]
|
|
257
|
+
|
|
258
|
+
# Create mapping with default port 1 (most common for single-port devices)
|
|
259
|
+
# State is unknown from ls output
|
|
260
|
+
mapping = IbdevNetdevMapping(
|
|
261
|
+
ib_device=current_ib_device, port=1, netdev=netdev, state=None
|
|
262
|
+
)
|
|
263
|
+
mappings.append(mapping)
|
|
264
|
+
|
|
265
|
+
return mappings
|
|
266
|
+
|
|
267
|
+
def _parse_ofed_info(self, output: str) -> OfedInfo:
|
|
268
|
+
"""Parse 'ofed_info -s' output into OfedInfo object.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
output: Raw output from 'ofed_info -s' command
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
OfedInfo object
|
|
275
|
+
"""
|
|
276
|
+
version = None
|
|
277
|
+
|
|
278
|
+
# The output is typically just a version string, possibly with trailing colon
|
|
279
|
+
# Example: OFED-internal-25.10-1.7.1:
|
|
280
|
+
output_stripped = output.strip()
|
|
281
|
+
if output_stripped:
|
|
282
|
+
# Remove trailing colon if present
|
|
283
|
+
version = output_stripped.rstrip(":")
|
|
284
|
+
|
|
285
|
+
return OfedInfo(version=version, raw_output=output)
|
|
286
|
+
|
|
287
|
+
def _parse_mst_status(self, output: str) -> MstStatus:
|
|
288
|
+
"""Parse 'mst status -v' output into MstStatus object.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
output: Raw output from 'mst status -v' command
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
MstStatus object
|
|
295
|
+
"""
|
|
296
|
+
mst_status = MstStatus(raw_output=output)
|
|
297
|
+
devices = []
|
|
298
|
+
|
|
299
|
+
# Check if MST is started
|
|
300
|
+
if "MST modules:" in output or "MST devices:" in output or "PCI devices:" in output:
|
|
301
|
+
mst_status.mst_started = True
|
|
302
|
+
|
|
303
|
+
for line in output.splitlines():
|
|
304
|
+
line = line.strip()
|
|
305
|
+
if not line:
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
# Skip header lines
|
|
309
|
+
if (
|
|
310
|
+
line.startswith("MST modules:")
|
|
311
|
+
or line.startswith("PCI devices:")
|
|
312
|
+
or line.startswith("---")
|
|
313
|
+
):
|
|
314
|
+
continue
|
|
315
|
+
if line.startswith("DEVICE_TYPE") or line.startswith("MST PCI module"):
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
# Look for device lines containing "/dev/mst/"
|
|
319
|
+
if "/dev/mst/" in line:
|
|
320
|
+
parts = line.split()
|
|
321
|
+
|
|
322
|
+
# Handle old format: "/dev/mst/device_path" at the beginning
|
|
323
|
+
if line.startswith("/dev/mst/"):
|
|
324
|
+
device_path = parts[0]
|
|
325
|
+
device = MstDevice(device=device_path)
|
|
326
|
+
|
|
327
|
+
# Try to parse additional fields (old format with key=value)
|
|
328
|
+
for part in parts[1:]:
|
|
329
|
+
if "=" in part:
|
|
330
|
+
key, value = part.split("=", 1)
|
|
331
|
+
if key == "rdma":
|
|
332
|
+
device.rdma_device = value
|
|
333
|
+
elif key == "net":
|
|
334
|
+
device.net_device = value
|
|
335
|
+
elif ":" in value and "." in value:
|
|
336
|
+
device.pci_address = value
|
|
337
|
+
else:
|
|
338
|
+
device.attributes[key] = value
|
|
339
|
+
elif re.match(r"[0-9a-f]{2,4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]", part):
|
|
340
|
+
device.pci_address = part
|
|
341
|
+
|
|
342
|
+
devices.append(device)
|
|
343
|
+
|
|
344
|
+
# Handle new tabular format: DEVICE_TYPE MST PCI RDMA NET NUMA [VFIO]
|
|
345
|
+
# Example: ConnectX7(rev:0) /dev/mst/mt4129_pciconf9 ec:00.0 mlx5_4 net-enp235s0np0 1
|
|
346
|
+
else:
|
|
347
|
+
# Find the index of the /dev/mst/ device path
|
|
348
|
+
mst_idx = None
|
|
349
|
+
for i, part in enumerate(parts):
|
|
350
|
+
if part.startswith("/dev/mst/"):
|
|
351
|
+
mst_idx = i
|
|
352
|
+
break
|
|
353
|
+
|
|
354
|
+
if mst_idx is not None and len(parts) >= mst_idx + 3:
|
|
355
|
+
device_path = parts[mst_idx]
|
|
356
|
+
device = MstDevice(device=device_path)
|
|
357
|
+
|
|
358
|
+
# Store device type if available (before mst path)
|
|
359
|
+
if mst_idx > 0:
|
|
360
|
+
device.attributes["device_type"] = " ".join(parts[:mst_idx])
|
|
361
|
+
|
|
362
|
+
# PCI address (next column after MST path)
|
|
363
|
+
if mst_idx + 1 < len(parts):
|
|
364
|
+
pci_addr = parts[mst_idx + 1]
|
|
365
|
+
# Validate PCI address format (short or long form)
|
|
366
|
+
if re.match(r"[0-9a-f]{2,4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]", pci_addr):
|
|
367
|
+
device.pci_address = pci_addr
|
|
368
|
+
|
|
369
|
+
# RDMA device (column after PCI)
|
|
370
|
+
if mst_idx + 2 < len(parts):
|
|
371
|
+
rdma_dev = parts[mst_idx + 2]
|
|
372
|
+
if rdma_dev.startswith("mlx") or rdma_dev != "-":
|
|
373
|
+
device.rdma_device = rdma_dev
|
|
374
|
+
|
|
375
|
+
# NET device (column after RDMA)
|
|
376
|
+
if mst_idx + 3 < len(parts):
|
|
377
|
+
net_dev = parts[mst_idx + 3]
|
|
378
|
+
# Remove "net-" prefix if present
|
|
379
|
+
if net_dev.startswith("net-"):
|
|
380
|
+
net_dev = net_dev[4:]
|
|
381
|
+
if net_dev != "-":
|
|
382
|
+
device.net_device = net_dev
|
|
383
|
+
|
|
384
|
+
# NUMA node (column after NET)
|
|
385
|
+
if mst_idx + 4 < len(parts):
|
|
386
|
+
numa = parts[mst_idx + 4]
|
|
387
|
+
if numa.isdigit():
|
|
388
|
+
device.attributes["numa_node"] = numa
|
|
389
|
+
|
|
390
|
+
# VFIO or other attributes (remaining columns)
|
|
391
|
+
if mst_idx + 5 < len(parts):
|
|
392
|
+
device.attributes["vfio"] = " ".join(parts[mst_idx + 5 :])
|
|
393
|
+
|
|
394
|
+
devices.append(device)
|
|
395
|
+
|
|
396
|
+
mst_status.devices = devices
|
|
397
|
+
return mst_status
|
|
398
|
+
|
|
399
|
+
def _parse_rdma_dev(self, output: str) -> List[RdmaDevice]:
|
|
400
|
+
"""Parse 'rdma dev' output into RdmaDevice objects.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
output: Raw output from 'rdma dev' command
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
List of RdmaDevice objects
|
|
407
|
+
"""
|
|
408
|
+
devices = []
|
|
409
|
+
|
|
410
|
+
for line in output.splitlines():
|
|
411
|
+
line = line.strip()
|
|
412
|
+
if not line:
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
# Example InfiniBand format: 0: mlx5_0: node_type ca fw 16.28.2006 node_guid 0c42:a103:00b3:bfa0 sys_image_guid 0c42:a103:00b3:bfa0
|
|
416
|
+
# Example RoCE format: 0: rocep9s0: node_type ca fw 1.117.1-a-63 node_guid 0690:81ff:fe4a:6c40 sys_image_guid 0690:81ff:fe4a:6c40
|
|
417
|
+
parts = line.split()
|
|
418
|
+
if len(parts) < 2:
|
|
419
|
+
continue
|
|
420
|
+
|
|
421
|
+
# First part might be index followed by colon
|
|
422
|
+
device_name = None
|
|
423
|
+
start_idx = 0
|
|
424
|
+
|
|
425
|
+
if parts[0].endswith(":"):
|
|
426
|
+
# Skip index (e.g., "0:")
|
|
427
|
+
start_idx = 1
|
|
428
|
+
|
|
429
|
+
if start_idx < len(parts):
|
|
430
|
+
device_name = parts[start_idx].rstrip(":")
|
|
431
|
+
start_idx += 1
|
|
432
|
+
|
|
433
|
+
if not device_name:
|
|
434
|
+
continue
|
|
435
|
+
|
|
436
|
+
device = RdmaDevice(device=device_name)
|
|
437
|
+
|
|
438
|
+
# Parse remaining attributes
|
|
439
|
+
i = start_idx
|
|
440
|
+
while i < len(parts):
|
|
441
|
+
if parts[i] == "node_type" and i + 1 < len(parts):
|
|
442
|
+
device.node_type = parts[i + 1]
|
|
443
|
+
i += 2
|
|
444
|
+
elif parts[i] == "fw" and i + 1 < len(parts):
|
|
445
|
+
device.attributes["fw_version"] = parts[i + 1]
|
|
446
|
+
i += 2
|
|
447
|
+
elif parts[i] == "node_guid" and i + 1 < len(parts):
|
|
448
|
+
device.node_guid = parts[i + 1]
|
|
449
|
+
i += 2
|
|
450
|
+
elif parts[i] == "sys_image_guid" and i + 1 < len(parts):
|
|
451
|
+
device.sys_image_guid = parts[i + 1]
|
|
452
|
+
i += 2
|
|
453
|
+
elif parts[i] == "state" and i + 1 < len(parts):
|
|
454
|
+
device.state = parts[i + 1]
|
|
455
|
+
i += 2
|
|
456
|
+
else:
|
|
457
|
+
# Store as generic attribute
|
|
458
|
+
if i + 1 < len(parts) and not parts[i + 1].startswith("-"):
|
|
459
|
+
device.attributes[parts[i]] = parts[i + 1]
|
|
460
|
+
i += 2
|
|
461
|
+
else:
|
|
462
|
+
i += 1
|
|
463
|
+
|
|
464
|
+
devices.append(device)
|
|
465
|
+
|
|
466
|
+
return devices
|
|
467
|
+
|
|
468
|
+
def _parse_rdma_link(self, output: str) -> List[RdmaLink]:
|
|
469
|
+
"""Parse 'rdma link' output into RdmaLink objects.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
output: Raw output from 'rdma link' command
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
List of RdmaLink objects
|
|
476
|
+
"""
|
|
477
|
+
links = []
|
|
478
|
+
|
|
479
|
+
for line in output.splitlines():
|
|
480
|
+
line = line.strip()
|
|
481
|
+
if not line:
|
|
482
|
+
continue
|
|
483
|
+
|
|
484
|
+
# Example InfiniBand format: link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev ib0
|
|
485
|
+
# Example RoCE format: link rocep9s0/1 state DOWN physical_state POLLING netdev benic8p1
|
|
486
|
+
# Example alternate format: 0/1: mlx5_0/1: state ACTIVE physical_state LINK_UP
|
|
487
|
+
match = re.search(r"(\S+)/(\d+)", line)
|
|
488
|
+
if not match:
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
device_name = match.group(1)
|
|
492
|
+
port = int(match.group(2))
|
|
493
|
+
|
|
494
|
+
link = RdmaLink(device=device_name, port=port)
|
|
495
|
+
|
|
496
|
+
# Parse remaining attributes
|
|
497
|
+
parts = line.split()
|
|
498
|
+
i = 0
|
|
499
|
+
while i < len(parts):
|
|
500
|
+
if parts[i] == "state" and i + 1 < len(parts):
|
|
501
|
+
link.state = parts[i + 1]
|
|
502
|
+
i += 2
|
|
503
|
+
elif parts[i] == "physical_state" and i + 1 < len(parts):
|
|
504
|
+
link.physical_state = parts[i + 1]
|
|
505
|
+
i += 2
|
|
506
|
+
elif parts[i] == "netdev" and i + 1 < len(parts):
|
|
507
|
+
link.netdev = parts[i + 1]
|
|
508
|
+
i += 2
|
|
509
|
+
else:
|
|
510
|
+
# Store as generic attribute if it's a key-value pair
|
|
511
|
+
if i + 1 < len(parts) and not parts[i + 1].startswith("-"):
|
|
512
|
+
link.attributes[parts[i]] = parts[i + 1]
|
|
513
|
+
i += 2
|
|
514
|
+
else:
|
|
515
|
+
i += 1
|
|
516
|
+
|
|
517
|
+
links.append(link)
|
|
518
|
+
|
|
519
|
+
return links
|
|
520
|
+
|
|
521
|
+
def collect_data(
|
|
522
|
+
self,
|
|
523
|
+
args=None,
|
|
524
|
+
) -> Tuple[TaskResult, Optional[FabricsDataModel]]:
|
|
525
|
+
"""Collect InfiniBand/RDMA fabrics configuration from the system.
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
Tuple[TaskResult, Optional[FabricsDataModel]]: tuple containing the task result
|
|
529
|
+
and an instance of FabricsDataModel or None if collection failed.
|
|
530
|
+
"""
|
|
531
|
+
ibstat_devices = []
|
|
532
|
+
ibv_devices = []
|
|
533
|
+
ibdev_netdev_mappings = []
|
|
534
|
+
ofed_info = None
|
|
535
|
+
mst_status = None
|
|
536
|
+
rdma_info = None
|
|
537
|
+
|
|
538
|
+
# Collect ibstat information
|
|
539
|
+
res_ibstat = self._run_sut_cmd(self.CMD_IBSTAT)
|
|
540
|
+
if res_ibstat.exit_code == 0:
|
|
541
|
+
ibstat_devices = self._parse_ibstat(res_ibstat.stdout)
|
|
542
|
+
self._log_event(
|
|
543
|
+
category=EventCategory.NETWORK,
|
|
544
|
+
description=f"Collected {len(ibstat_devices)} IB devices from ibstat",
|
|
545
|
+
priority=EventPriority.INFO,
|
|
546
|
+
)
|
|
547
|
+
else:
|
|
548
|
+
self._log_event(
|
|
549
|
+
category=EventCategory.NETWORK,
|
|
550
|
+
description="Error collecting ibstat information",
|
|
551
|
+
data={"command": res_ibstat.command, "exit_code": res_ibstat.exit_code},
|
|
552
|
+
priority=EventPriority.WARNING,
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
# Collect ibv_devinfo information
|
|
556
|
+
res_ibv = self._run_sut_cmd(self.CMD_IBV_DEVINFO)
|
|
557
|
+
if res_ibv.exit_code == 0:
|
|
558
|
+
ibv_devices = self._parse_ibv_devinfo(res_ibv.stdout)
|
|
559
|
+
self._log_event(
|
|
560
|
+
category=EventCategory.NETWORK,
|
|
561
|
+
description=f"Collected {len(ibv_devices)} IB devices from ibv_devinfo",
|
|
562
|
+
priority=EventPriority.INFO,
|
|
563
|
+
)
|
|
564
|
+
else:
|
|
565
|
+
self._log_event(
|
|
566
|
+
category=EventCategory.NETWORK,
|
|
567
|
+
description="ibv_devinfo command not available or failed",
|
|
568
|
+
data={"command": res_ibv.command, "exit_code": res_ibv.exit_code},
|
|
569
|
+
priority=EventPriority.INFO,
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Collect IB device to netdev mappings
|
|
573
|
+
res_ib_dev_netdevs = self._run_sut_cmd(self.CMD_IB_DEV_NETDEVS)
|
|
574
|
+
if res_ib_dev_netdevs.exit_code == 0:
|
|
575
|
+
ibdev_netdev_mappings = self._parse_ib_dev_netdevs(res_ib_dev_netdevs.stdout)
|
|
576
|
+
self._log_event(
|
|
577
|
+
category=EventCategory.NETWORK,
|
|
578
|
+
description=f"Collected {len(ibdev_netdev_mappings)} IB to netdev mappings",
|
|
579
|
+
priority=EventPriority.INFO,
|
|
580
|
+
)
|
|
581
|
+
else:
|
|
582
|
+
self._log_event(
|
|
583
|
+
category=EventCategory.NETWORK,
|
|
584
|
+
description="No InfiniBand devices found in sysfs",
|
|
585
|
+
data={
|
|
586
|
+
"command": res_ib_dev_netdevs.command,
|
|
587
|
+
"exit_code": res_ib_dev_netdevs.exit_code,
|
|
588
|
+
},
|
|
589
|
+
priority=EventPriority.INFO,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
# Collect OFED version info
|
|
593
|
+
res_ofed = self._run_sut_cmd(self.CMD_OFED_INFO)
|
|
594
|
+
if res_ofed.exit_code == 0:
|
|
595
|
+
ofed_info = self._parse_ofed_info(res_ofed.stdout)
|
|
596
|
+
self._log_event(
|
|
597
|
+
category=EventCategory.NETWORK,
|
|
598
|
+
description=f"Collected OFED version: {ofed_info.version}",
|
|
599
|
+
priority=EventPriority.INFO,
|
|
600
|
+
)
|
|
601
|
+
else:
|
|
602
|
+
self._log_event(
|
|
603
|
+
category=EventCategory.NETWORK,
|
|
604
|
+
description="OFED not installed or ofed_info command not available",
|
|
605
|
+
data={"command": res_ofed.command, "exit_code": res_ofed.exit_code},
|
|
606
|
+
priority=EventPriority.INFO,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Start MST and collect status
|
|
610
|
+
# First start MST
|
|
611
|
+
res_mst_start = self._run_sut_cmd(self.CMD_MST_START, sudo=True)
|
|
612
|
+
if res_mst_start.exit_code == 0:
|
|
613
|
+
# Check output for success indicators
|
|
614
|
+
output_lower = res_mst_start.stdout.lower()
|
|
615
|
+
if "success" in output_lower or "loading mst" in output_lower:
|
|
616
|
+
self._log_event(
|
|
617
|
+
category=EventCategory.NETWORK,
|
|
618
|
+
description="MST service started successfully",
|
|
619
|
+
priority=EventPriority.INFO,
|
|
620
|
+
)
|
|
621
|
+
else:
|
|
622
|
+
self._log_event(
|
|
623
|
+
category=EventCategory.NETWORK,
|
|
624
|
+
description="MST service command completed but status unclear",
|
|
625
|
+
data={"output": res_mst_start.stdout},
|
|
626
|
+
priority=EventPriority.INFO,
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
self._log_event(
|
|
630
|
+
category=EventCategory.NETWORK,
|
|
631
|
+
description="MST tools not available (Mellanox-specific)",
|
|
632
|
+
data={"command": res_mst_start.command, "exit_code": res_mst_start.exit_code},
|
|
633
|
+
priority=EventPriority.INFO,
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Get MST status
|
|
637
|
+
res_mst_status = self._run_sut_cmd(self.CMD_MST_STATUS, sudo=True)
|
|
638
|
+
if res_mst_status.exit_code == 0:
|
|
639
|
+
mst_status = self._parse_mst_status(res_mst_status.stdout)
|
|
640
|
+
self._log_event(
|
|
641
|
+
category=EventCategory.NETWORK,
|
|
642
|
+
description=f"Collected MST status: {len(mst_status.devices)} devices",
|
|
643
|
+
priority=EventPriority.INFO,
|
|
644
|
+
)
|
|
645
|
+
else:
|
|
646
|
+
self._log_event(
|
|
647
|
+
category=EventCategory.NETWORK,
|
|
648
|
+
description="MST status not available (Mellanox-specific)",
|
|
649
|
+
data={"command": res_mst_status.command, "exit_code": res_mst_status.exit_code},
|
|
650
|
+
priority=EventPriority.INFO,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
# Collect RDMA device information
|
|
654
|
+
rdma_devices = []
|
|
655
|
+
res_rdma_dev = self._run_sut_cmd(self.CMD_RDMA_DEV)
|
|
656
|
+
if res_rdma_dev.exit_code == 0:
|
|
657
|
+
rdma_devices = self._parse_rdma_dev(res_rdma_dev.stdout)
|
|
658
|
+
self._log_event(
|
|
659
|
+
category=EventCategory.NETWORK,
|
|
660
|
+
description=f"Collected {len(rdma_devices)} RDMA devices",
|
|
661
|
+
priority=EventPriority.INFO,
|
|
662
|
+
)
|
|
663
|
+
else:
|
|
664
|
+
self._log_event(
|
|
665
|
+
category=EventCategory.NETWORK,
|
|
666
|
+
description="Error collecting RDMA device information",
|
|
667
|
+
data={"command": res_rdma_dev.command, "exit_code": res_rdma_dev.exit_code},
|
|
668
|
+
priority=EventPriority.WARNING,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
# Collect RDMA link information
|
|
672
|
+
rdma_links = []
|
|
673
|
+
res_rdma_link = self._run_sut_cmd(self.CMD_RDMA_LINK)
|
|
674
|
+
if res_rdma_link.exit_code == 0:
|
|
675
|
+
rdma_links = self._parse_rdma_link(res_rdma_link.stdout)
|
|
676
|
+
self._log_event(
|
|
677
|
+
category=EventCategory.NETWORK,
|
|
678
|
+
description=f"Collected {len(rdma_links)} RDMA links",
|
|
679
|
+
priority=EventPriority.INFO,
|
|
680
|
+
)
|
|
681
|
+
else:
|
|
682
|
+
self._log_event(
|
|
683
|
+
category=EventCategory.NETWORK,
|
|
684
|
+
description="Error collecting RDMA link information",
|
|
685
|
+
data={"command": res_rdma_link.command, "exit_code": res_rdma_link.exit_code},
|
|
686
|
+
priority=EventPriority.WARNING,
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
# Combine RDMA information
|
|
690
|
+
if rdma_devices or rdma_links:
|
|
691
|
+
rdma_info = RdmaInfo(
|
|
692
|
+
devices=rdma_devices,
|
|
693
|
+
links=rdma_links,
|
|
694
|
+
raw_output=res_rdma_dev.stdout + "\n" + res_rdma_link.stdout,
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
# Build the data model only if we collected any data
|
|
698
|
+
if (
|
|
699
|
+
ibstat_devices
|
|
700
|
+
or ibv_devices
|
|
701
|
+
or ibdev_netdev_mappings
|
|
702
|
+
or ofed_info
|
|
703
|
+
or mst_status
|
|
704
|
+
or rdma_info
|
|
705
|
+
):
|
|
706
|
+
fabrics_data = FabricsDataModel(
|
|
707
|
+
ibstat_devices=ibstat_devices,
|
|
708
|
+
ibv_devices=ibv_devices,
|
|
709
|
+
ibdev_netdev_mappings=ibdev_netdev_mappings,
|
|
710
|
+
ofed_info=ofed_info,
|
|
711
|
+
mst_status=mst_status,
|
|
712
|
+
rdma_info=rdma_info,
|
|
713
|
+
)
|
|
714
|
+
self.result.message = (
|
|
715
|
+
f"Collected fabrics data: {len(ibstat_devices)} ibstat devices, "
|
|
716
|
+
f"{len(ibv_devices)} ibv devices, {len(ibdev_netdev_mappings)} mappings, "
|
|
717
|
+
f"OFED: {ofed_info.version if ofed_info else 'N/A'}, "
|
|
718
|
+
f"MST devices: {len(mst_status.devices) if mst_status else 0}, "
|
|
719
|
+
f"RDMA devices: {len(rdma_info.devices) if rdma_info else 0}"
|
|
720
|
+
)
|
|
721
|
+
self.result.status = ExecutionStatus.OK
|
|
722
|
+
return self.result, fabrics_data
|
|
723
|
+
else:
|
|
724
|
+
self.result.message = "No InfiniBand/RDMA fabrics hardware detected on this system"
|
|
725
|
+
self.result.status = ExecutionStatus.ERROR
|
|
726
|
+
return self.result, None
|