gpustack-runtime 0.1.39.post2__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. gpustack_runtime/__main__.py +7 -3
  2. gpustack_runtime/_version.py +2 -2
  3. gpustack_runtime/_version_appendix.py +1 -1
  4. gpustack_runtime/cmds/__init__.py +2 -0
  5. gpustack_runtime/cmds/deployer.py +84 -2
  6. gpustack_runtime/cmds/images.py +2 -0
  7. gpustack_runtime/deployer/__init__.py +2 -0
  8. gpustack_runtime/deployer/__types__.py +52 -28
  9. gpustack_runtime/deployer/__utils__.py +99 -112
  10. gpustack_runtime/deployer/cdi/__init__.py +81 -0
  11. gpustack_runtime/deployer/cdi/__types__.py +667 -0
  12. gpustack_runtime/deployer/cdi/thead.py +103 -0
  13. gpustack_runtime/deployer/docker.py +42 -24
  14. gpustack_runtime/deployer/kuberentes.py +8 -4
  15. gpustack_runtime/deployer/podman.py +41 -23
  16. gpustack_runtime/detector/__init__.py +62 -3
  17. gpustack_runtime/detector/__types__.py +11 -0
  18. gpustack_runtime/detector/__utils__.py +23 -0
  19. gpustack_runtime/detector/amd.py +17 -9
  20. gpustack_runtime/detector/hygon.py +6 -1
  21. gpustack_runtime/detector/iluvatar.py +20 -5
  22. gpustack_runtime/detector/mthreads.py +8 -12
  23. gpustack_runtime/detector/nvidia.py +365 -168
  24. gpustack_runtime/detector/pyacl/__init__.py +9 -1
  25. gpustack_runtime/detector/pyamdgpu/__init__.py +8 -0
  26. gpustack_runtime/detector/pycuda/__init__.py +9 -1
  27. gpustack_runtime/detector/pydcmi/__init__.py +9 -2
  28. gpustack_runtime/detector/pyhgml/__init__.py +5879 -0
  29. gpustack_runtime/detector/pyhgml/libhgml.so +0 -0
  30. gpustack_runtime/detector/pyhgml/libuki.so +0 -0
  31. gpustack_runtime/detector/pyhsa/__init__.py +9 -0
  32. gpustack_runtime/detector/pyixml/__init__.py +89 -164
  33. gpustack_runtime/detector/pyrocmcore/__init__.py +42 -24
  34. gpustack_runtime/detector/pyrocmsmi/__init__.py +141 -138
  35. gpustack_runtime/detector/thead.py +733 -0
  36. gpustack_runtime/envs.py +128 -55
  37. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/METADATA +4 -2
  38. gpustack_runtime-0.1.40.dist-info/RECORD +55 -0
  39. gpustack_runtime/detector/pymtml/__init__.py +0 -770
  40. gpustack_runtime-0.1.39.post2.dist-info/RECORD +0 -49
  41. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/WHEEL +0 -0
  42. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/entry_points.txt +0 -0
  43. {gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.40.dist-info}/licenses/LICENSE +0 -0
@@ -331,10 +331,16 @@ def convertStrBytes(func):
331
331
 
332
332
 
333
333
  def _LoadHsaLibrary():
334
+ """
335
+ Load the library if it isn't loaded already.
336
+ """
334
337
  global hsaLib
338
+
335
339
  if hsaLib is None:
340
+ # lock to ensure only one caller loads the library
336
341
  libLoadLock.acquire()
337
342
  try:
343
+ # ensure the library still isn't loaded
338
344
  if hsaLib is None:
339
345
  if sys.platform.startswith("win"):
340
346
  # Do not support Windows yet.
@@ -360,7 +366,10 @@ def _LoadHsaLibrary():
360
366
  break
361
367
  except OSError:
362
368
  pass
369
+ if hsaLib is None:
370
+ raise HSAError(HSA_STATUS_ERROR_LIBRARY_NOT_FOUND)
363
371
  finally:
372
+ # lock is always released
364
373
  libLoadLock.release()
365
374
 
366
375
 
@@ -1,7 +1,6 @@
1
1
  ##
2
2
  # Python bindings for the IXML library
3
3
  ##
4
-
5
4
  #####
6
5
  # Copyright (c) 2011-2023, NVIDIA Corporation. All rights reserved.
7
6
  #
@@ -36,9 +35,9 @@
36
35
  from ctypes import *
37
36
  from functools import wraps
38
37
  import sys
39
- import os
40
38
  import threading
41
39
  import string
40
+ from pathlib import Path
42
41
 
43
42
  ## C Type mappings ##
44
43
  ## Enums
@@ -1076,7 +1075,7 @@ def _nvmlGetFunctionPointer(name):
1076
1075
  libLoadLock.acquire()
1077
1076
  try:
1078
1077
  # ensure library was loaded
1079
- if nvmlLib == None:
1078
+ if nvmlLib is None:
1080
1079
  raise NVMLError(NVML_ERROR_UNINITIALIZED)
1081
1080
  try:
1082
1081
  _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name)
@@ -2143,24 +2142,28 @@ def _LoadNvmlLibrary():
2143
2142
  """
2144
2143
  global nvmlLib
2145
2144
 
2146
- if nvmlLib == None:
2145
+ if nvmlLib is None:
2147
2146
  # lock to ensure only one caller loads the library
2148
2147
  libLoadLock.acquire()
2149
-
2150
2148
  try:
2151
2149
  # ensure the library still isn't loaded
2152
- if nvmlLib == None:
2153
- try:
2154
- if sys.platform.startswith("win"):
2155
- # IXML is typically used on Linux, but for completeness,
2156
- # Windows support would require different path handling.
2157
- _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND)
2158
- else:
2159
- nvmlLib = CDLL("libixml.so")
2160
- except OSError as ose:
2161
- _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND)
2162
- if nvmlLib == None:
2163
- _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND)
2150
+ if nvmlLib is None:
2151
+ if sys.platform.startswith("win"):
2152
+ # Do not support Windows yet.
2153
+ raise NVMLError(NVML_ERROR_LIBRARY_NOT_FOUND)
2154
+ # Linux path
2155
+ locs = [
2156
+ "libixml.so",
2157
+ str(Path(__file__).resolve().parent / "libixml.so"),
2158
+ ]
2159
+ for loc in locs:
2160
+ try:
2161
+ nvmlLib = CDLL(loc)
2162
+ break
2163
+ except OSError:
2164
+ pass
2165
+ if nvmlLib is None:
2166
+ raise NVMLError(NVML_ERROR_LIBRARY_NOT_FOUND)
2164
2167
  finally:
2165
2168
  # lock is always freed
2166
2169
  libLoadLock.release()
@@ -5020,153 +5023,75 @@ def nvmlDeviceGetMemClkMinMaxVfOffset(device, minOffset, maxOffset):
5020
5023
  ## Enums/defines
5021
5024
 
5022
5025
  #### GPM Metric Identifiers
5023
- NVML_GPM_METRIC_GRAPHICS_UTIL = (
5024
- 1 # Percentage of time any compute/graphics app was active on the GPU. 0.0 - 100.0
5025
- )
5026
- NVML_GPM_METRIC_SM_UTIL = 2 # Percentage of SMs that were busy. 0.0 - 100.0
5027
- NVML_GPM_METRIC_SM_OCCUPANCY = (
5028
- 3 # Percentage of warps that were active vs theoretical maximum. 0.0 - 100.0
5029
- )
5030
- NVML_GPM_METRIC_INTEGER_UTIL = (
5031
- 4 # Percentage of time the GPU's SMs were doing integer operations. 0.0 - 100.0
5032
- )
5033
- NVML_GPM_METRIC_ANY_TENSOR_UTIL = (
5034
- 5 # Percentage of time the GPU's SMs were doing ANY tensor operations. 0.0 - 100.0
5035
- )
5036
- NVML_GPM_METRIC_DFMA_TENSOR_UTIL = (
5037
- 6 # Percentage of time the GPU's SMs were doing DFMA tensor operations. 0.0 - 100.0
5038
- )
5039
- NVML_GPM_METRIC_HMMA_TENSOR_UTIL = (
5040
- 7 # Percentage of time the GPU's SMs were doing HMMA tensor operations. 0.0 - 100.0
5041
- )
5042
- NVML_GPM_METRIC_IMMA_TENSOR_UTIL = (
5043
- 9 # Percentage of time the GPU's SMs were doing IMMA tensor operations. 0.0 - 100.0
5044
- )
5045
- NVML_GPM_METRIC_DRAM_BW_UTIL = (
5046
- 10 # Percentage of DRAM bw used vs theoretical maximum. 0.0 - 100.0
5047
- )
5048
- NVML_GPM_METRIC_FP64_UTIL = (
5049
- 11 # Percentage of time the GPU's SMs were doing non-tensor FP64 math. 0.0 - 100.0
5050
- )
5051
- NVML_GPM_METRIC_FP32_UTIL = (
5052
- 12 # Percentage of time the GPU's SMs were doing non-tensor FP32 math. 0.0 - 100.0
5053
- )
5054
- NVML_GPM_METRIC_FP16_UTIL = (
5055
- 13 # Percentage of time the GPU's SMs were doing non-tensor FP16 math. 0.0 - 100.0
5056
- )
5057
- NVML_GPM_METRIC_PCIE_TX_PER_SEC = 20 # PCIe traffic from this GPU in MiB/sec
5058
- NVML_GPM_METRIC_PCIE_RX_PER_SEC = 21 # PCIe traffic to this GPU in MiB/sec
5059
- NVML_GPM_METRIC_NVDEC_0_UTIL = 30 # Percent utilization of NVDEC 0. 0.0 - 100.0
5060
- NVML_GPM_METRIC_NVDEC_1_UTIL = 31 # Percent utilization of NVDEC 1. 0.0 - 100.0
5061
- NVML_GPM_METRIC_NVDEC_2_UTIL = 32 # Percent utilization of NVDEC 2. 0.0 - 100.0
5062
- NVML_GPM_METRIC_NVDEC_3_UTIL = 33 # Percent utilization of NVDEC 3. 0.0 - 100.0
5063
- NVML_GPM_METRIC_NVDEC_4_UTIL = 34 # Percent utilization of NVDEC 4. 0.0 - 100.0
5064
- NVML_GPM_METRIC_NVDEC_5_UTIL = 35 # Percent utilization of NVDEC 5. 0.0 - 100.0
5065
- NVML_GPM_METRIC_NVDEC_6_UTIL = 36 # Percent utilization of NVDEC 6. 0.0 - 100.0
5066
- NVML_GPM_METRIC_NVDEC_7_UTIL = 37 # Percent utilization of NVDEC 7. 0.0 - 100.0
5067
- NVML_GPM_METRIC_NVJPG_0_UTIL = 40 # Percent utilization of NVJPG 0. 0.0 - 100.0
5068
- NVML_GPM_METRIC_NVJPG_1_UTIL = 41 # Percent utilization of NVJPG 1. 0.0 - 100.0
5069
- NVML_GPM_METRIC_NVJPG_2_UTIL = 42 # Percent utilization of NVJPG 2. 0.0 - 100.0
5070
- NVML_GPM_METRIC_NVJPG_3_UTIL = 43 # Percent utilization of NVJPG 3. 0.0 - 100.0
5071
- NVML_GPM_METRIC_NVJPG_4_UTIL = 44 # Percent utilization of NVJPG 4. 0.0 - 100.0
5072
- NVML_GPM_METRIC_NVJPG_5_UTIL = 45 # Percent utilization of NVJPG 5. 0.0 - 100.0
5073
- NVML_GPM_METRIC_NVJPG_6_UTIL = 46 # Percent utilization of NVJPG 6. 0.0 - 100.0
5074
- NVML_GPM_METRIC_NVJPG_7_UTIL = 47 # Percent utilization of NVJPG 7. 0.0 - 100.0
5075
- NVML_GPM_METRIC_NVOFA_0_UTIL = 50 # Percent utilization of NVOFA 0. 0.0 - 100.0
5076
- NVML_GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC = (
5077
- 60 # NvLink read bandwidth for all links in MiB/sec
5078
- )
5079
- NVML_GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC = (
5080
- 61 # NvLink write bandwidth for all links in MiB/sec
5081
- )
5082
- NVML_GPM_METRIC_NVLINK_L0_RX_PER_SEC = 62 # NvLink read bandwidth for link 0 in MiB/sec
5083
- NVML_GPM_METRIC_NVLINK_L0_TX_PER_SEC = (
5084
- 63 # NvLink write bandwidth for link 0 in MiB/sec
5085
- )
5086
- NVML_GPM_METRIC_NVLINK_L1_RX_PER_SEC = 64 # NvLink read bandwidth for link 1 in MiB/sec
5087
- NVML_GPM_METRIC_NVLINK_L1_TX_PER_SEC = (
5088
- 65 # NvLink write bandwidth for link 1 in MiB/sec
5089
- )
5090
- NVML_GPM_METRIC_NVLINK_L2_RX_PER_SEC = 66 # NvLink read bandwidth for link 2 in MiB/sec
5091
- NVML_GPM_METRIC_NVLINK_L2_TX_PER_SEC = (
5092
- 67 # NvLink write bandwidth for link 2 in MiB/sec
5093
- )
5094
- NVML_GPM_METRIC_NVLINK_L3_RX_PER_SEC = 68 # NvLink read bandwidth for link 3 in MiB/sec
5095
- NVML_GPM_METRIC_NVLINK_L3_TX_PER_SEC = (
5096
- 69 # NvLink write bandwidth for link 3 in MiB/sec
5097
- )
5098
- NVML_GPM_METRIC_NVLINK_L4_RX_PER_SEC = 70 # NvLink read bandwidth for link 4 in MiB/sec
5099
- NVML_GPM_METRIC_NVLINK_L4_TX_PER_SEC = (
5100
- 71 # NvLink write bandwidth for link 4 in MiB/sec
5101
- )
5102
- NVML_GPM_METRIC_NVLINK_L5_RX_PER_SEC = 72 # NvLink read bandwidth for link 5 in MiB/sec
5103
- NVML_GPM_METRIC_NVLINK_L5_TX_PER_SEC = (
5104
- 73 # NvLink write bandwidth for link 5 in MiB/sec
5105
- )
5106
- NVML_GPM_METRIC_NVLINK_L6_RX_PER_SEC = 74 # NvLink read bandwidth for link 6 in MiB/sec
5107
- NVML_GPM_METRIC_NVLINK_L6_TX_PER_SEC = (
5108
- 75 # NvLink write bandwidth for link 6 in MiB/sec
5109
- )
5110
- NVML_GPM_METRIC_NVLINK_L7_RX_PER_SEC = 76 # NvLink read bandwidth for link 7 in MiB/sec
5111
- NVML_GPM_METRIC_NVLINK_L7_TX_PER_SEC = (
5112
- 77 # NvLink write bandwidth for link 7 in MiB/sec
5113
- )
5114
- NVML_GPM_METRIC_NVLINK_L8_RX_PER_SEC = 78 # NvLink read bandwidth for link 8 in MiB/sec
5115
- NVML_GPM_METRIC_NVLINK_L8_TX_PER_SEC = (
5116
- 79 # NvLink write bandwidth for link 8 in MiB/sec
5117
- )
5118
- NVML_GPM_METRIC_NVLINK_L9_RX_PER_SEC = 80 # NvLink read bandwidth for link 9 in MiB/sec
5119
- NVML_GPM_METRIC_NVLINK_L9_TX_PER_SEC = (
5120
- 81 # NvLink write bandwidth for link 9 in MiB/sec
5121
- )
5122
- NVML_GPM_METRIC_NVLINK_L10_RX_PER_SEC = (
5123
- 82 # NvLink read bandwidth for link 10 in MiB/sec
5124
- )
5125
- NVML_GPM_METRIC_NVLINK_L10_TX_PER_SEC = (
5126
- 83 # NvLink write bandwidth for link 10 in MiB/sec
5127
- )
5128
- NVML_GPM_METRIC_NVLINK_L11_RX_PER_SEC = (
5129
- 84 # NvLink read bandwidth for link 11 in MiB/sec
5130
- )
5131
- NVML_GPM_METRIC_NVLINK_L11_TX_PER_SEC = (
5132
- 85 # NvLink write bandwidth for link 11 in MiB/sec
5133
- )
5134
- NVML_GPM_METRIC_NVLINK_L12_RX_PER_SEC = (
5135
- 86 # NvLink read bandwidth for link 12 in MiB/sec
5136
- )
5137
- NVML_GPM_METRIC_NVLINK_L12_TX_PER_SEC = (
5138
- 87 # NvLink write bandwidth for link 12 in MiB/sec
5139
- )
5140
- NVML_GPM_METRIC_NVLINK_L13_RX_PER_SEC = (
5141
- 88 # NvLink read bandwidth for link 13 in MiB/sec
5142
- )
5143
- NVML_GPM_METRIC_NVLINK_L13_TX_PER_SEC = (
5144
- 89 # NvLink write bandwidth for link 13 in MiB/sec
5145
- )
5146
- NVML_GPM_METRIC_NVLINK_L14_RX_PER_SEC = (
5147
- 90 # NvLink read bandwidth for link 14 in MiB/sec
5148
- )
5149
- NVML_GPM_METRIC_NVLINK_L14_TX_PER_SEC = (
5150
- 91 # NvLink write bandwidth for link 14 in MiB/sec
5151
- )
5152
- NVML_GPM_METRIC_NVLINK_L15_RX_PER_SEC = (
5153
- 92 # NvLink read bandwidth for link 15 in MiB/sec
5154
- )
5155
- NVML_GPM_METRIC_NVLINK_L15_TX_PER_SEC = (
5156
- 93 # NvLink write bandwidth for link 15 in MiB/sec
5157
- )
5158
- NVML_GPM_METRIC_NVLINK_L16_RX_PER_SEC = (
5159
- 94 # NvLink read bandwidth for link 16 in MiB/sec
5160
- )
5161
- NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC = (
5162
- 95 # NvLink write bandwidth for link 16 in MiB/sec
5163
- )
5164
- NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC = (
5165
- 96 # NvLink read bandwidth for link 17 in MiB/sec
5166
- )
5167
- NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC = (
5168
- 97 # NvLink write bandwidth for link 17 in MiB/sec
5169
- )
5026
+ NVML_GPM_METRIC_GRAPHICS_UTIL = 1
5027
+ NVML_GPM_METRIC_SM_UTIL = 2
5028
+ NVML_GPM_METRIC_SM_OCCUPANCY = 3
5029
+ NVML_GPM_METRIC_INTEGER_UTIL = 4
5030
+ NVML_GPM_METRIC_ANY_TENSOR_UTIL = 5
5031
+ NVML_GPM_METRIC_DFMA_TENSOR_UTIL = 6
5032
+ NVML_GPM_METRIC_HMMA_TENSOR_UTIL = 7
5033
+ NVML_GPM_METRIC_IMMA_TENSOR_UTIL = 9
5034
+ NVML_GPM_METRIC_DRAM_BW_UTIL = 10
5035
+ NVML_GPM_METRIC_FP64_UTIL = 11
5036
+ NVML_GPM_METRIC_FP32_UTIL = 12
5037
+ NVML_GPM_METRIC_FP16_UTIL = 13
5038
+ NVML_GPM_METRIC_PCIE_TX_PER_SEC = 20
5039
+ NVML_GPM_METRIC_PCIE_RX_PER_SEC = 21
5040
+ NVML_GPM_METRIC_NVDEC_0_UTIL = 30
5041
+ NVML_GPM_METRIC_NVDEC_1_UTIL = 31
5042
+ NVML_GPM_METRIC_NVDEC_2_UTIL = 32
5043
+ NVML_GPM_METRIC_NVDEC_3_UTIL = 33
5044
+ NVML_GPM_METRIC_NVDEC_4_UTIL = 34
5045
+ NVML_GPM_METRIC_NVDEC_5_UTIL = 35
5046
+ NVML_GPM_METRIC_NVDEC_6_UTIL = 36
5047
+ NVML_GPM_METRIC_NVDEC_7_UTIL = 37
5048
+ NVML_GPM_METRIC_NVJPG_0_UTIL = 40
5049
+ NVML_GPM_METRIC_NVJPG_1_UTIL = 41
5050
+ NVML_GPM_METRIC_NVJPG_2_UTIL = 42
5051
+ NVML_GPM_METRIC_NVJPG_3_UTIL = 43
5052
+ NVML_GPM_METRIC_NVJPG_4_UTIL = 44
5053
+ NVML_GPM_METRIC_NVJPG_5_UTIL = 45
5054
+ NVML_GPM_METRIC_NVJPG_6_UTIL = 46
5055
+ NVML_GPM_METRIC_NVJPG_7_UTIL = 47
5056
+ NVML_GPM_METRIC_NVOFA_0_UTIL = 50
5057
+ NVML_GPM_METRIC_NVLINK_TOTAL_RX_PER_SEC = 60
5058
+ NVML_GPM_METRIC_NVLINK_TOTAL_TX_PER_SEC = 61
5059
+ NVML_GPM_METRIC_NVLINK_L0_RX_PER_SEC = 62
5060
+ NVML_GPM_METRIC_NVLINK_L0_TX_PER_SEC = 63
5061
+ NVML_GPM_METRIC_NVLINK_L1_RX_PER_SEC = 64
5062
+ NVML_GPM_METRIC_NVLINK_L1_TX_PER_SEC = 65
5063
+ NVML_GPM_METRIC_NVLINK_L2_RX_PER_SEC = 66
5064
+ NVML_GPM_METRIC_NVLINK_L2_TX_PER_SEC = 67
5065
+ NVML_GPM_METRIC_NVLINK_L3_RX_PER_SEC = 68
5066
+ NVML_GPM_METRIC_NVLINK_L3_TX_PER_SEC = 69
5067
+ NVML_GPM_METRIC_NVLINK_L4_RX_PER_SEC = 70
5068
+ NVML_GPM_METRIC_NVLINK_L4_TX_PER_SEC = 71
5069
+ NVML_GPM_METRIC_NVLINK_L5_RX_PER_SEC = 72
5070
+ NVML_GPM_METRIC_NVLINK_L5_TX_PER_SEC = 73
5071
+ NVML_GPM_METRIC_NVLINK_L6_RX_PER_SEC = 74
5072
+ NVML_GPM_METRIC_NVLINK_L6_TX_PER_SEC = 75
5073
+ NVML_GPM_METRIC_NVLINK_L7_RX_PER_SEC = 76
5074
+ NVML_GPM_METRIC_NVLINK_L7_TX_PER_SEC = 77
5075
+ NVML_GPM_METRIC_NVLINK_L8_RX_PER_SEC = 78
5076
+ NVML_GPM_METRIC_NVLINK_L8_TX_PER_SEC = 79
5077
+ NVML_GPM_METRIC_NVLINK_L9_RX_PER_SEC = 80
5078
+ NVML_GPM_METRIC_NVLINK_L9_TX_PER_SEC = 81
5079
+ NVML_GPM_METRIC_NVLINK_L10_RX_PER_SEC = 82
5080
+ NVML_GPM_METRIC_NVLINK_L10_TX_PER_SEC = 83
5081
+ NVML_GPM_METRIC_NVLINK_L11_RX_PER_SEC = 84
5082
+ NVML_GPM_METRIC_NVLINK_L11_TX_PER_SEC = 85
5083
+ NVML_GPM_METRIC_NVLINK_L12_RX_PER_SEC = 86
5084
+ NVML_GPM_METRIC_NVLINK_L12_TX_PER_SEC = 87
5085
+ NVML_GPM_METRIC_NVLINK_L13_RX_PER_SEC = 88
5086
+ NVML_GPM_METRIC_NVLINK_L13_TX_PER_SEC = 89
5087
+ NVML_GPM_METRIC_NVLINK_L14_RX_PER_SEC = 90
5088
+ NVML_GPM_METRIC_NVLINK_L14_TX_PER_SEC = 91
5089
+ NVML_GPM_METRIC_NVLINK_L15_RX_PER_SEC = 92
5090
+ NVML_GPM_METRIC_NVLINK_L15_TX_PER_SEC = 93
5091
+ NVML_GPM_METRIC_NVLINK_L16_RX_PER_SEC = 94
5092
+ NVML_GPM_METRIC_NVLINK_L16_TX_PER_SEC = 95
5093
+ NVML_GPM_METRIC_NVLINK_L17_RX_PER_SEC = 96
5094
+ NVML_GPM_METRIC_NVLINK_L17_TX_PER_SEC = 97
5170
5095
  NVML_GPM_METRIC_MAX = 98
5171
5096
 
5172
5097
  ## Structs
@@ -1,42 +1,58 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ import sys
4
5
  import threading
5
6
  from ctypes import *
6
7
  from pathlib import Path
7
8
 
9
+ # Example ROCM_SMI_LIB_PATH
10
+ # - /opt/dtk-24.04.3/rocm_smi/lib
11
+ # - /opt/rocm/rocm_smi/lib
12
+ rocmcore_lib_path = os.getenv("ROCM_CORE_LIB_PATH")
13
+ if not rocmcore_lib_path:
14
+ # Example ROCM_PATH/ROCM_HOME
15
+ # - /opt/dtk-24.04.3
16
+ # - /opt/rocm
17
+ rocm_path = Path(os.getenv("ROCM_HOME", os.getenv("ROCM_PATH") or "/opt/rocm"))
18
+ rocmcore_lib_path = str(rocm_path / "lib")
19
+ else:
20
+ rocm_path = Path(
21
+ os.getenv(
22
+ "ROCM_HOME",
23
+ os.getenv("ROCM_PATH") or str(Path(rocmcore_lib_path).parent.parent),
24
+ )
25
+ )
26
+
27
+ rocmcore_lib_loc = Path(rocmcore_lib_path) / "librocm-core.so"
28
+
8
29
  ## Lib loading ##
9
30
  rocmcoreLib = None
10
31
  libLoadLock = threading.Lock()
11
32
 
12
- if rocmcoreLib is None:
13
- # Example ROCM_SMI_LIB_PATH
14
- # - /opt/dtk-24.04.3/rocm_smi/lib
15
- # - /opt/rocm/rocm_smi/lib
16
- rocmcore_lib_path = os.getenv("ROCM_CORE_LIB_PATH")
17
- if not rocmcore_lib_path:
18
- # Example ROCM_PATH/ROCM_HOME
19
- # - /opt/dtk-24.04.3
20
- # - /opt/rocm
21
- rocm_path = Path(os.getenv("ROCM_HOME", os.getenv("ROCM_PATH") or "/opt/rocm"))
22
- rocmcore_lib_path = str(rocm_path / "lib")
23
- else:
24
- rocm_path = Path(
25
- os.getenv(
26
- "ROCM_HOME",
27
- os.getenv("ROCM_PATH") or str(Path(rocmcore_lib_path).parent.parent),
28
- )
29
- )
30
33
 
31
- rocmcore_lib_loc = Path(rocmcore_lib_path) / "librocm-core.so"
32
- if rocmcore_lib_loc.exists():
34
+ def _LoadRocmCoreLibrary():
35
+ """
36
+ Load the library if it isn't loaded already.
37
+ """
38
+ global rocmcoreLib
39
+
40
+ if rocmcoreLib is None:
41
+ # lock to ensure only one caller loads the library
33
42
  libLoadLock.acquire()
34
43
  try:
35
- if not rocmcoreLib:
36
- rocmcoreLib = CDLL(rocmcore_lib_loc)
37
- except OSError:
38
- pass
44
+ # ensure the library still isn't loaded
45
+ if (
46
+ rocmcoreLib is None
47
+ and not sys.platform.startswith("win")
48
+ and rocmcore_lib_loc.is_file()
49
+ ):
50
+ try:
51
+ rocmcoreLib = CDLL(str(rocmcore_lib_loc))
52
+ except OSError:
53
+ pass
39
54
  finally:
55
+ # lock is always released
40
56
  libLoadLock.release()
41
57
 
42
58
 
@@ -59,6 +75,8 @@ def getROCmVersion() -> str | None:
59
75
  except OSError:
60
76
  continue
61
77
 
78
+ _LoadRocmCoreLibrary()
79
+
62
80
  if rocmcoreLib:
63
81
  try:
64
82
  major = c_uint32()