returnn 1.20260105.192646__py3-none-any.whl → 1.20260119.15400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. returnn/PKG-INFO +1 -1
  2. returnn/__old_mod_loader__.py +26 -2
  3. returnn/_setup_info_generated.py +2 -2
  4. returnn/datasets/lm.py +110 -42
  5. returnn/frontend/__init__.py +1 -0
  6. returnn/frontend/_backend.py +41 -0
  7. returnn/frontend/_native/__init__.py +22 -0
  8. returnn/frontend/_numpy_backend.py +7 -0
  9. returnn/frontend/_utils.py +1 -1
  10. returnn/frontend/array_.py +6 -5
  11. returnn/frontend/assert_.py +35 -0
  12. returnn/frontend/device.py +14 -1
  13. returnn/frontend/encoder/conformer.py +19 -0
  14. returnn/frontend/loss.py +183 -3
  15. returnn/frontend/math_.py +54 -14
  16. returnn/native_op.cpp +104 -174
  17. returnn/native_op.py +36 -31
  18. returnn/tensor/_dim_extra.py +7 -7
  19. returnn/tensor/_tensor_extra.py +10 -10
  20. returnn/tensor/utils.py +1 -1
  21. returnn/tf/frontend_layers/_backend.py +3 -1
  22. returnn/tf/layers/basic.py +13 -2
  23. returnn/tf/native_op.py +16 -5
  24. returnn/tf/util/basic.py +7 -201
  25. returnn/torch/engine.py +120 -3
  26. returnn/torch/frontend/_backend.py +166 -22
  27. returnn/torch/frontend/bridge.py +61 -0
  28. returnn/torch/frontend/compile_helper.py +106 -0
  29. returnn/torch/util/array_.py +30 -0
  30. returnn/torch/util/assert_.py +122 -0
  31. returnn/torch/util/native_op.py +885 -0
  32. returnn/torch/util/native_op_code_compiler.py +308 -0
  33. returnn/util/basic.py +3 -1
  34. returnn/util/cuda_env.py +332 -0
  35. returnn/util/debug.py +1 -0
  36. returnn/util/fsa.py +17 -13
  37. returnn/util/native_code_compiler.py +104 -47
  38. {returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/METADATA +1 -1
  39. {returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/RECORD +42 -36
  40. {returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/WHEEL +1 -1
  41. {returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/LICENSE +0 -0
  42. {returnn-1.20260105.192646.dist-info → returnn-1.20260119.15400.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,308 @@
1
+ """
2
+ Helper to compile Torch ops on-the-fly, similar to Theano / :class:`returnn.tf.util.basic.OpCodeCompiler`,
3
+ similar to :mod:`torch.utils.cpp_extension`.
4
+
5
+ See :class:`OpCodeCompiler`.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ from typing import Union, Optional, Sequence, Dict, List
10
+ import os
11
+ import sysconfig
12
+
13
+ import torch
14
+ from torch.utils import cpp_extension
15
+
16
+ from returnn.util.basic import NativeCodeCompiler
17
+ from returnn.util.cuda_env import CudaEnv as _CudaEnvBase, get_best_nvcc_path_for_cuda_version
18
+
19
+
20
+ class OpCodeCompiler(NativeCodeCompiler):
21
+ """
22
+ Helper class to compile Torch ops on-the-fly, similar to Theano,
23
+ and similar to :class:`returnn.tf.util.basic.OpCodeCompiler`.
24
+
25
+ Note that PyTorch already has its own code for this,
26
+ see :mod:`torch.utils.cpp_extension`, :func:`torch.utils.cpp_extension.load_inline`, etc.
27
+ However, there are some shortcomings there that we try to do better:
28
+
29
+ * The way we find CUDA/nvcc is more robust.
30
+ * The way we find the C/C++ compiler is more robust.
31
+ * The automatic selection of options for nvcc is more robust.
32
+ E.g. the compute version is not higher than what the selected CUDA supports.
33
+
34
+ https://docs.pytorch.org/tutorials/advanced/cpp_custom_ops.html
35
+ """
36
+
37
+ CacheDirName = "returnn_torch_cache/ops"
38
+
39
+ def __init__(
40
+ self,
41
+ base_name: str,
42
+ *,
43
+ code: str,
44
+ use_cuda_if_available: bool = True,
45
+ cuda_auto_min_compute_capability: bool = True,
46
+ include_paths: Sequence[str] = (),
47
+ ld_flags: Sequence[str] = (),
48
+ c_macro_defines: Optional[Dict[str, Union[str, int, None]]] = None,
49
+ is_python_module: bool = False,
50
+ **kwargs,
51
+ ):
52
+ self._cuda_env = None
53
+ if use_cuda_if_available and torch.cuda.is_available():
54
+ self._cuda_env = CudaEnv.get_instance()
55
+ # Currently we assume that if we provide CUDA code (thus set use_cuda_if_available=True),
56
+ # that if there is a GPU available (as TF reports it),
57
+ # we also expect that we find CUDA.
58
+ # Otherwise you would end up with ops compiled for CPU only although they support CUDA
59
+ # and the user expects them to run on GPU.
60
+ assert self._with_cuda(), "OpCodeCompiler: use_cuda_if_available=True but no CUDA found"
61
+
62
+ self._nvcc_opts = []
63
+ if self._with_cuda() and cuda_auto_min_compute_capability:
64
+ # Get CUDA compute capability of the current GPU device.
65
+ min_compute_capability = _get_available_gpu_cuda_min_compute_capability()
66
+ if min_compute_capability:
67
+ min_compute_capability = min(min_compute_capability, self._cuda_env.get_max_compute_capability())
68
+ self._nvcc_opts += ["-arch", "compute_%i" % int(min_compute_capability * 10)]
69
+
70
+ if self._with_cuda():
71
+ self._nvcc_opts += cpp_extension.COMMON_NVCC_FLAGS
72
+
73
+ # Example call from torch.utils.cpp_extension:
74
+ # /usr/local/cuda-11.0/bin/nvcc
75
+ # --generate-dependencies-with-compile --dependency-output cuda.cuda.o.d
76
+ # -DTORCH_EXTENSION_NAME=async_assert_ext
77
+ # -DTORCH_API_INCLUDE_EXTENSION_H
78
+ # -isystem /home/az/py-venv/py3.12-torch2.9/lib/python3.12/site-packages/torch/include
79
+ # -isystem /home/az/py-venv/py3.12-torch2.9/lib/python3.12/site-packages/torch/include/torch/csrc/api/include
80
+ # -isystem /usr/local/cuda-11.0/include -isystem /usr/include/python3.12
81
+ # -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__
82
+ # -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__
83
+ # --expt-relaxed-constexpr
84
+ # -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86
85
+ # --compiler-options '-fPIC' -std=c++17
86
+ # -c /home/az/.cache/torch_extensions/py312_cu128/async_assert_ext/cuda.cu -o cuda.cuda.o
87
+
88
+ torch_path = os.path.dirname(torch.__file__)
89
+ torch_include = torch_path + "/include"
90
+ assert os.path.isdir(torch_include)
91
+
92
+ python_include = sysconfig.get_path("include", scheme="posix_prefix")
93
+
94
+ include_paths = list(include_paths) + [torch_include, torch_include + "/torch/csrc/api/include", python_include]
95
+
96
+ c_macro_defines = {} if c_macro_defines is None else c_macro_defines.copy()
97
+ c_macro_defines.setdefault("TORCH_EXTENSION_NAME", base_name)
98
+ c_macro_defines.setdefault("TORCH_API_INCLUDE_EXTENSION_H", "")
99
+ # We have some assert in our kernels that we want to disable.
100
+ c_macro_defines.setdefault("NDEBUG", 1)
101
+
102
+ ld_flags = list(ld_flags)
103
+ ld_flags.append("--no-as-needed")
104
+ ld_flags.append(f"-L{cpp_extension.TORCH_LIB_PATH}")
105
+ ld_flags.append("-lc10")
106
+ if self._with_cuda():
107
+ ld_flags.append("-lc10_cuda")
108
+ ld_flags.append("-ltorch_cpu")
109
+ if self._with_cuda():
110
+ ld_flags.append("-ltorch_cuda")
111
+ ld_flags.append("-ltorch")
112
+ ld_flags.append("-ltorch_python")
113
+
114
+ if self._with_cuda():
115
+ ld_flags.append(self._cuda_env.get_ld_flag_for_linking_cudart())
116
+ # maybe add CUDNN?
117
+
118
+ # noinspection PyUnresolvedReferences,PyProtectedMember
119
+ use_cxx11_abi = torch._C._GLIBCXX_USE_CXX11_ABI
120
+
121
+ super().__init__(
122
+ base_name=base_name,
123
+ code=code,
124
+ include_paths=include_paths,
125
+ c_macro_defines=c_macro_defines,
126
+ ld_flags=ld_flags,
127
+ use_cxx11_abi=use_cxx11_abi,
128
+ **kwargs,
129
+ )
130
+ self.is_python_module = is_python_module
131
+ self._mod = None
132
+
133
+ def __repr__(self):
134
+ return "<%s %r CUDA %s in %r>" % (self.__class__.__name__, self.base_name, self._with_cuda(), self._mod_path)
135
+
136
+ _relevant_info_keys = NativeCodeCompiler._relevant_info_keys + (
137
+ "torch_version",
138
+ "with_cuda",
139
+ "cuda_path",
140
+ "nvcc_opts",
141
+ )
142
+
143
+ def _make_info_dict(self):
144
+ from returnn.util.basic import describe_torch_version
145
+
146
+ d = super()._make_info_dict()
147
+ d.update(
148
+ {
149
+ "torch_version": describe_torch_version(),
150
+ "with_cuda": self._with_cuda(),
151
+ "cuda_path": self._cuda_env.cuda_path if self._with_cuda() else None,
152
+ "nvcc_opts": (
153
+ (tuple(self._cuda_env.get_compiler_opts()) + tuple(self._nvcc_opts)) if self._with_cuda() else None
154
+ ),
155
+ }
156
+ )
157
+ return d
158
+
159
+ @classmethod
160
+ def cuda_available(cls):
161
+ """
162
+ :return: whether CUDA is available. if True, and you initiate with use_cuda_if_available=True,
163
+ then _with_cuda() should also be True.
164
+ :rtype: bool
165
+ """
166
+ if not torch.cuda.is_available():
167
+ return False
168
+ cuda_env = CudaEnv.get_instance()
169
+ return cuda_env.is_available()
170
+
171
+ def _with_cuda(self):
172
+ return bool(self._cuda_env and self._cuda_env.is_available())
173
+
174
+ cpp_version = 17
175
+
176
+ def _get_compiler_bin(self):
177
+ if self._with_cuda():
178
+ return self._cuda_env.get_compiler_bin()
179
+ return super()._get_compiler_bin()
180
+
181
+ def _transform_compiler_opts(self, opts: List[str]) -> List[str]:
182
+ if self._with_cuda():
183
+ nvcc_opts = self._cuda_env.get_compiler_opts()
184
+ for opt in opts:
185
+ nvcc_opts += ["-Xcompiler", opt]
186
+ nvcc_opts += self._nvcc_opts
187
+ return nvcc_opts
188
+ return super()._transform_compiler_opts(opts)
189
+
190
+ def _transform_ld_flags(self, opts: Sequence[str]) -> Sequence[str]:
191
+ if self._with_cuda():
192
+ res = []
193
+ for opt in opts:
194
+ if opt.startswith("-L") or opt.startswith("-l"):
195
+ res.append(opt)
196
+ else:
197
+ res += ["-Xlinker", opt]
198
+ return res
199
+ return super()._transform_ld_flags(opts)
200
+
201
+ def load_module(self):
202
+ """
203
+ :return: module
204
+ """
205
+ if self._mod:
206
+ return self._mod
207
+ self._maybe_compile()
208
+
209
+ if self.is_python_module:
210
+ # Load as a Python module.
211
+ # E.g. PYBIND11_MODULE or so was used in the code.
212
+ import importlib.util
213
+
214
+ # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
215
+ spec = importlib.util.spec_from_file_location(self.base_name, self._so_filename)
216
+ assert spec is not None
217
+ module = importlib.util.module_from_spec(spec)
218
+ assert isinstance(spec.loader, importlib.abc.Loader)
219
+ spec.loader.exec_module(module)
220
+
221
+ else:
222
+ # Load as a Torch extension.
223
+ # TORCH_LIBRARY / TORCH_LIBRARY_IMPL was used in the code.
224
+ torch.ops.load_library(self._so_filename)
225
+ module = getattr(torch.ops, self.base_name)
226
+
227
+ self._mod = module
228
+ return module
229
+
230
+
231
+ class CudaEnv(_CudaEnvBase):
232
+ """specialized CudaEnv for PyTorch"""
233
+
234
+ # If cudart is loaded (e.g. via Torch), we really want to use that one.
235
+ _runtime_libcudart_path_must_be_valid = True
236
+
237
+ def __init__(self):
238
+ super().__init__()
239
+
240
+ from returnn.util.basic import find_libcudart_from_runtime
241
+
242
+ self._runtime_libcudart = find_libcudart_from_runtime()
243
+ self._compiler_bin = None
244
+ if self.cuda_path:
245
+ if os.path.exists(f"{self.cuda_path}/bin/nvcc"):
246
+ self._compiler_bin = f"{self.cuda_path}/bin/nvcc"
247
+ else:
248
+ self._compiler_bin = get_best_nvcc_path_for_cuda_version(self.get_cuda_version())
249
+
250
+ @classmethod
251
+ def _check_valid_cuda_path(cls, p: str) -> bool:
252
+ """
253
+ :param p: path to CUDA, e.g. "/usr/local/cuda-8.0"
254
+ :return: whether this is a valid CUDA path, i.e. we find all what we need
255
+ """
256
+ if cls.verbose_find_cuda:
257
+ print("check valid CUDA path: %s" % p)
258
+ # Don't check nvcc here yet.
259
+ # The pip package might not have it, but otherwise provides lib + headers
260
+ # that we want to use, as this is likely the same that PyTorch uses.
261
+ if not os.path.exists("%s/include/cuda.h" % p):
262
+ return False
263
+ if p.endswith("/site-packages/nvidia/cuda_runtime"):
264
+ # special case for the nvidia CUDA pip package
265
+ if not any(name.startswith("libcudart.") for name in os.listdir(p + "/lib")):
266
+ return False
267
+ else:
268
+ if not os.path.exists("%s/%s/libcudart.so" % (p, cls._get_lib_dir_name(p))):
269
+ return False
270
+ return True
271
+
272
+ def get_lib_dir_path(self) -> str:
273
+ """
274
+ :return: path
275
+ """
276
+ if self._runtime_libcudart:
277
+ return os.path.dirname(self._runtime_libcudart)
278
+ return super().get_lib_dir_path()
279
+
280
+ def get_ld_flag_for_linking_cudart(self) -> str:
281
+ """ld flag"""
282
+ if self._runtime_libcudart:
283
+ return f"-l:{os.path.basename(self._runtime_libcudart)}"
284
+ return "-lcudart"
285
+
286
+ def get_compiler_bin(self) -> str:
287
+ """
288
+ :return: path
289
+ """
290
+ return self._compiler_bin
291
+
292
+
293
+ def _get_available_gpu_cuda_min_compute_capability() -> Optional[float]:
294
+ """
295
+ Uses :func:`get_available_gpu_devices`.
296
+
297
+ :return: e.g. 3.0, or 5.0, etc, or None
298
+ """
299
+ count = torch.cuda.device_count()
300
+ cap = None
301
+ for i in range(count):
302
+ props = torch.cuda.get_device_properties(i)
303
+ dev_cap = float(f"{props.major}.{props.minor}")
304
+ if cap is None:
305
+ cap = dev_cap
306
+ else:
307
+ cap = min(cap, dev_cap)
308
+ return cap
returnn/util/basic.py CHANGED
@@ -3816,6 +3816,8 @@ def should_write_to_disk(config):
3816
3816
  return False
3817
3817
  if config.is_true("dry_run"):
3818
3818
  return False
3819
+ if config.is_true("torch_profile"):
3820
+ return False
3819
3821
  return True
3820
3822
 
3821
3823
 
@@ -4502,7 +4504,7 @@ _find_libcudart_from_runtime_cached = None
4502
4504
  def find_libcudart_from_runtime():
4503
4505
  """
4504
4506
  Looks through all libs via :func:`collect_proc_maps_exec_files`,
4505
- and searches for all which have the ``sgemm`` symbol.
4507
+ and searches for libcudart.
4506
4508
  Currently only works on Linux (because collect_proc_maps_exec_files).
4507
4509
 
4508
4510
  :return: list of libs (their path)
@@ -0,0 +1,332 @@
1
+ """
2
+ CUDA environment detection and information.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Dict, Tuple, List
7
+ import os
8
+ import re
9
+
10
+
11
+ class CudaEnv:
12
+ """
13
+ Information about the Nvidia CUDA environment, and library.
14
+ Also path to ``nvcc``, the CUDA compiler.
15
+ """
16
+
17
+ _instance_per_cls: Dict[type, CudaEnv] = {}
18
+ verbose_find_cuda = False
19
+
20
+ def __init__(self):
21
+ from returnn.util.basic import to_bool
22
+
23
+ if to_bool(os.environ.get("DISABLE_CUDA", "0")):
24
+ self.cuda_path = None
25
+ if self.verbose_find_cuda:
26
+ print("CUDA disabled via env DISABLE_CUDA.")
27
+ elif os.environ.get("CUDA_VISIBLE_DEVICES", None) in ["", "-1"]:
28
+ self.cuda_path = None
29
+ if self.verbose_find_cuda:
30
+ print(f"CUDA disabled via env CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']!r}.")
31
+ else:
32
+ self.cuda_path = self._find_cuda_path()
33
+ if self.verbose_find_cuda:
34
+ print("CUDA path:", self.cuda_path)
35
+
36
+ self._max_compute_capability = None
37
+ self._cuda_version = None
38
+
39
+ @classmethod
40
+ def _find_nvcc_in_path(cls):
41
+ """
42
+ :return: yields full path to nvcc
43
+ :rtype: list[str]
44
+ """
45
+ for p in os.environ["PATH"].split(":"):
46
+ pp = "%s/nvcc" % p
47
+ if os.path.exists(pp):
48
+ yield pp
49
+
50
+ @classmethod
51
+ def _find_lib_in_ld_path(cls):
52
+ """
53
+ :return: yields full path to libcudart.so
54
+ :rtype: list[str]
55
+ """
56
+ from returnn.util.basic import get_ld_paths
57
+
58
+ for p in get_ld_paths():
59
+ pp = "%s/libcudart.so" % p
60
+ if os.path.exists(pp):
61
+ yield pp
62
+
63
+ @classmethod
64
+ def _get_lib_dir_name(cls, base_path):
65
+ """
66
+ :return: dir name in base path
67
+ :rtype: str
68
+ """
69
+ from returnn.util.basic import is_64bit_platform, get_ld_paths
70
+
71
+ for ld_path in get_ld_paths():
72
+ # We also want to allow "lib/x86_64-linux-gnu" for "/usr".
73
+ # However, this logic should not be triggered for incorrect cases.
74
+ # E.g. base_path="/usr" would be the prefix for most LD paths.
75
+ if ld_path.startswith(base_path + "/lib") and os.path.exists("%s/libcudart.so" % ld_path):
76
+ return ld_path[len(base_path) + 1 :]
77
+ if is_64bit_platform():
78
+ return "lib64"
79
+ return "lib"
80
+
81
+ _runtime_libcudart_path_must_be_valid: bool = False
82
+
83
+ @classmethod
84
+ def _cuda_path_candidate_via_proc_map_libcudart(cls):
85
+ from returnn.util.basic import find_libcudart_from_runtime
86
+
87
+ fn = find_libcudart_from_runtime()
88
+ if cls.verbose_find_cuda:
89
+ print("libcudart.so found from /proc/maps:", fn)
90
+ if not fn:
91
+ return None
92
+ # fn is e.g. '/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcudart.so.8.0.61',
93
+ # or maybe '/usr/local/cuda-8.0/lib64/libcudart.so'
94
+ # or maybe ".../site-packages/nvidia/cuda_runtime/lib/libcudart.so.12"
95
+ # or ".../site-packages/nvidia/cu13/lib/libcudart.so.13"
96
+ p = os.path.dirname(os.path.dirname(fn))
97
+ while not cls._check_valid_cuda_path(p):
98
+ p = os.path.dirname(p)
99
+ if p in ["", "/"]:
100
+ if cls.verbose_find_cuda:
101
+ print(f"Loaded lib {fn} does not seem to be in valid CUDA path.")
102
+ assert not cls._runtime_libcudart_path_must_be_valid
103
+ return None
104
+ assert cls._check_valid_cuda_path(p)
105
+ return p
106
+
107
+ @classmethod
108
+ def _cuda_path_candidates(cls):
109
+ p = cls._cuda_path_candidate_via_proc_map_libcudart()
110
+ if p:
111
+ yield p
112
+ if os.environ.get("CUDA_HOME"):
113
+ yield os.environ.get("CUDA_HOME")
114
+ if os.environ.get("CUDA_PATH"):
115
+ yield os.environ.get("CUDA_PATH")
116
+ for p in cls._find_nvcc_in_path():
117
+ # Expect p == "/usr/local/cuda-8.0/bin/nvcc" or so.
118
+ postfix = "/bin/nvcc"
119
+ if cls.verbose_find_cuda:
120
+ print("found cuda nvcc (wanted postfix: %r): %s" % (postfix, p))
121
+ if not p.endswith(postfix):
122
+ continue
123
+ yield p[: -len(postfix)] or "/"
124
+ for p in cls._find_lib_in_ld_path():
125
+ # Expect p == "/usr/local/cuda-8.0/lib64/libcudart.so" or so.
126
+ d = "/".join(p.split("/")[:-2]) or "/" # Get "/usr/local/cuda-8.0".
127
+ if cls.verbose_find_cuda:
128
+ print("found cuda lib: %s (path %s)" % (p, d))
129
+ yield d
130
+ # Check common installation location.
131
+ for path in get_cuda_path_candidates_from_common_install_locations():
132
+ yield path
133
+
134
+ @classmethod
135
+ def _check_valid_cuda_path(cls, p):
136
+ """
137
+ :param str p: path to CUDA, e.g. "/usr/local/cuda-8.0"
138
+ :return: whether this is a valid CUDA path, i.e. we find all what we need
139
+ :rtype: bool
140
+ """
141
+ if cls.verbose_find_cuda:
142
+ print("check valid CUDA path: %s" % p)
143
+ if not os.path.exists("%s/bin/nvcc" % p):
144
+ return False
145
+ if not os.path.exists("%s/include/cuda.h" % p):
146
+ return False
147
+ if not os.path.exists("%s/%s/libcudart.so" % (p, cls._get_lib_dir_name(p))):
148
+ return False
149
+ return True
150
+
151
+ @classmethod
152
+ def _find_cuda_path(cls):
153
+ """
154
+ :return: base CUDA path if we find one, otherwise None
155
+ :rtype: str|None
156
+ """
157
+ for p in cls._cuda_path_candidates():
158
+ if cls._check_valid_cuda_path(p):
159
+ return p
160
+ return None
161
+
162
+ def is_available(self):
163
+ """
164
+ :rtype: bool
165
+ """
166
+ return bool(self.cuda_path)
167
+
168
+ def get_cuda_version(self) -> Tuple[int, int]:
169
+ """
170
+ Get CUDA version as (major, minor).
171
+ """
172
+ if self._cuda_version:
173
+ return self._cuda_version
174
+ assert self.cuda_path
175
+ # Parse CUDA_VERSION from cuda.h.
176
+ cuda_h_path = f"{self.cuda_path}/include/cuda.h"
177
+ self._cuda_version = _parse_cuda_version_from_cuda_h(cuda_h_path)
178
+ return self._cuda_version
179
+
180
+ def get_max_compute_capability(self):
181
+ """
182
+ :return: the highest compute capability supported by nvcc, or float("inf") if not known
183
+ :rtype: float
184
+ """
185
+ if self._max_compute_capability is None:
186
+ cuda_occupancy_path = "%s/include/cuda_occupancy.h" % self.cuda_path
187
+ if os.path.exists(cuda_occupancy_path):
188
+ major, minor = None, 0
189
+ for line in open(cuda_occupancy_path).read().splitlines():
190
+ m = re.match("^#define\\s+__CUDA_OCC_(MAJOR|MINOR)__\\s+([0-9]+)$", line)
191
+ if m:
192
+ s, v = m.groups()
193
+ v = int(v)
194
+ if s == "MAJOR":
195
+ major = v
196
+ else:
197
+ minor = v
198
+ if major:
199
+ self._max_compute_capability = float(major) + float(minor) * 0.1
200
+ if self._max_compute_capability is None:
201
+ self._max_compute_capability = float("inf")
202
+ return self._max_compute_capability
203
+
204
+ @staticmethod
205
+ def get_cc_bin() -> str:
206
+ """
207
+ :return: path
208
+ """
209
+ from .native_code_compiler import get_cc_bin
210
+
211
+ return get_cc_bin()
212
+
213
+ def get_compiler_opts(self):
214
+ """
215
+ :rtype: list[str]
216
+ """
217
+ return [
218
+ "-ccbin",
219
+ self.get_cc_bin(),
220
+ "-I",
221
+ "%s/targets/x86_64-linux/include" % self.cuda_path,
222
+ "-I",
223
+ "%s/include" % self.cuda_path,
224
+ "-L",
225
+ self.get_lib_dir_path(),
226
+ "-x",
227
+ "cu",
228
+ "-v",
229
+ ]
230
+
231
+ def get_lib_dir_path(self) -> str:
232
+ """library path"""
233
+ return "%s/%s" % (self.cuda_path, self._get_lib_dir_name(self.cuda_path))
234
+
235
+ def get_compiler_bin(self):
236
+ """
237
+ :return: path
238
+ :rtype: str
239
+ """
240
+ assert self.cuda_path
241
+ return "%s/bin/nvcc" % self.cuda_path
242
+
243
+ @classmethod
244
+ def get_instance(cls) -> CudaEnv:
245
+ """
246
+ :return: instance for this class
247
+ """
248
+ if cls._instance_per_cls.get(cls) is not None:
249
+ return cls._instance_per_cls[cls]
250
+ cls._instance_per_cls[cls] = cls()
251
+ return cls._instance_per_cls[cls]
252
+
253
+
254
+ def get_cuda_path_candidates_from_common_install_locations() -> List[str]:
255
+ """
256
+ :return: list of possible CUDA installation paths from common locations
257
+ """
258
+ cuda_paths = []
259
+
260
+ if os.path.exists("/usr/local"):
261
+ for name in sorted(os.listdir("/usr/local")):
262
+ if name.startswith("cuda-") or name == "cuda":
263
+ p = f"/usr/local/{name}"
264
+ if _check_valid_cuda_path_with_nvcc(p):
265
+ version = _parse_cuda_version_from_cuda_h(f"{p}/include/cuda.h")
266
+ cuda_paths.append((version, p))
267
+
268
+ # (stable) sort by version, highest version first
269
+ cuda_paths.sort(key=lambda x: x[0], reverse=True)
270
+ return [p for (_, p) in cuda_paths]
271
+
272
+
273
+ def get_best_nvcc_path_for_cuda_version(cuda_version: Tuple[int, int]) -> str:
274
+ """
275
+ :return: path to nvcc
276
+ :rtype: str
277
+ """
278
+ cuda_paths = []
279
+
280
+ # noinspection PyProtectedMember
281
+ for p in CudaEnv._cuda_path_candidates():
282
+ if _check_valid_cuda_path_with_nvcc(p):
283
+ version = _parse_cuda_version_from_cuda_h(f"{p}/include/cuda.h")
284
+ if version == cuda_version:
285
+ # if we found a matching one, directly return it
286
+ return f"{p}/bin/nvcc"
287
+ cuda_paths.append((version, p))
288
+
289
+ if not cuda_paths:
290
+ raise RuntimeError(f"No valid CUDA installation found for version {cuda_version}.")
291
+
292
+ only_higher_versions = [(version, p) for (version, p) in cuda_paths if version >= cuda_version]
293
+ if only_higher_versions:
294
+ only_higher_versions.sort(key=lambda x: x[0])
295
+ # return the lowest higher version
296
+ if only_higher_versions[0][0] != cuda_version[0]: # major version differs
297
+ print(
298
+ f"Warning: No exact match for CUDA version {cuda_version}, "
299
+ f"using version {only_higher_versions[0]} instead."
300
+ )
301
+ return f"{only_higher_versions[0][1]}/bin/nvcc"
302
+
303
+ cuda_paths.sort(key=lambda x: x[0])
304
+ # return the highest lower version
305
+ print(f"Warning: No exact match for CUDA version {cuda_version}, using lower version {cuda_paths[-1][0]} instead.")
306
+ return f"{cuda_paths[-1][1]}/bin/nvcc"
307
+
308
+
309
+ def _check_valid_cuda_path_with_nvcc(p: str) -> bool:
310
+ """
311
+ :param str p: path to CUDA, e.g. "/usr/local/cuda-8.0"
312
+ :return: whether this is a valid CUDA path, i.e. we find all what we need
313
+ :rtype: bool
314
+ """
315
+ if not os.path.exists("%s/bin/nvcc" % p):
316
+ return False
317
+ if not os.path.exists("%s/include/cuda.h" % p):
318
+ return False
319
+ return True
320
+
321
+
322
+ def _parse_cuda_version_from_cuda_h(cuda_h_path: str) -> Tuple[int, int]:
323
+ assert os.path.exists(cuda_h_path)
324
+ for line in open(cuda_h_path).read().splitlines():
325
+ # Like: #define CUDA_VERSION 12080
326
+ m = re.match(r"^#define\s+CUDA_VERSION\s+([0-9]+)$", line)
327
+ if m:
328
+ version_num = int(m.group(1))
329
+ major = version_num // 1000
330
+ minor = (version_num % 1000) // 10
331
+ return major, minor
332
+ raise RuntimeError(f"Could not determine CUDA version from {cuda_h_path}.")
returnn/util/debug.py CHANGED
@@ -309,6 +309,7 @@ def _get_native_signal_handler_lib_filename() -> str:
309
309
  old_signal_handler[SIGILL] = signal(SIGILL, signal_handler);
310
310
  old_signal_handler[SIGABRT] = signal(SIGABRT, signal_handler);
311
311
  old_signal_handler[SIGFPE] = signal(SIGFPE, signal_handler);
312
+ old_signal_handler[SIGUSR1] = signal(SIGUSR1, signal_handler);
312
313
  }
313
314
  """
314
315
  ),