numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +51 -16
  3. numba_cuda/numba/cuda/codegen.py +11 -9
  4. numba_cuda/numba/cuda/compiler.py +3 -39
  5. numba_cuda/numba/cuda/cuda_paths.py +20 -22
  6. numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
  7. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  8. numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
  9. numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
  10. numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
  11. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
  12. numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
  13. numba_cuda/numba/cuda/decorators.py +18 -0
  14. numba_cuda/numba/cuda/dispatcher.py +1 -0
  15. numba_cuda/numba/cuda/flags.py +36 -0
  16. numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
  17. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
  18. numba_cuda/numba/cuda/target.py +55 -2
  19. numba_cuda/numba/cuda/testing.py +0 -22
  20. numba_cuda/numba/cuda/tests/__init__.py +0 -2
  21. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
  22. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
  23. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
  24. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
  25. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
  26. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
  27. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
  28. numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
  29. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
  30. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
  31. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
  32. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
  33. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  34. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
  35. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
  36. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
  37. numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
  38. numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
  39. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
  40. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
  41. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
  42. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
  43. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
  44. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.15.1
1
+ 0.16.0
@@ -2,24 +2,28 @@ import importlib
2
2
  from numba import runtests
3
3
  from numba.core import config
4
4
  from .utils import _readenv
5
+ import warnings
5
6
 
6
- # Enable pynvjitlink if the environment variables NUMBA_CUDA_ENABLE_PYNVJITLINK
7
- # or CUDA_ENABLE_PYNVJITLINK are set, or if the pynvjitlink module is found. If
8
- # explicitly disabled, do not use pynvjitlink, even if present in the env.
9
- _pynvjitlink_enabled_in_env = _readenv(
10
- "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
11
- )
12
- _pynvjitlink_enabled_in_cfg = getattr(config, "CUDA_ENABLE_PYNVJITLINK", None)
13
7
 
14
- if _pynvjitlink_enabled_in_env is not None:
15
- ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
16
- elif _pynvjitlink_enabled_in_cfg is not None:
17
- ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_cfg
18
- else:
19
- ENABLE_PYNVJITLINK = importlib.util.find_spec("pynvjitlink") is not None
8
+ # Enable pynvjitlink based on the following precedence:
9
+ # 1. Config setting "CUDA_ENABLE_PYNVJITLINK" (highest priority)
10
+ # 2. Environment variable "NUMBA_CUDA_ENABLE_PYNVJITLINK"
11
+ # 3. Auto-detection of pynvjitlink module (lowest priority)
12
+
13
+ pynvjitlink_auto_enabled = False
20
14
 
21
- if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
22
- config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
15
+ if getattr(config, "CUDA_ENABLE_PYNVJITLINK", None) is None:
16
+ if (
17
+ _pynvjitlink_enabled_in_env := _readenv(
18
+ "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
19
+ )
20
+ ) is not None:
21
+ config.CUDA_ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
22
+ else:
23
+ pynvjitlink_auto_enabled = (
24
+ importlib.util.find_spec("pynvjitlink") is not None
25
+ )
26
+ config.CUDA_ENABLE_PYNVJITLINK = pynvjitlink_auto_enabled
23
27
 
24
28
  # Upstream numba sets CUDA_USE_NVIDIA_BINDING to 0 by default, so it always
25
29
  # exists. Override, but not if explicitly set to 0 in the envioronment.
@@ -32,7 +36,10 @@ else:
32
36
  USE_NV_BINDING = True
33
37
  config.CUDA_USE_NVIDIA_BINDING = USE_NV_BINDING
34
38
  if config.CUDA_USE_NVIDIA_BINDING:
35
- if not importlib.util.find_spec("cuda.bindings"):
39
+ if not (
40
+ importlib.util.find_spec("cuda")
41
+ and importlib.util.find_spec("cuda.bindings")
42
+ ):
36
43
  raise ImportError(
37
44
  "CUDA bindings not found. Please pip install the "
38
45
  "cuda-bindings package. Alternatively, install "
@@ -43,6 +50,21 @@ if config.CUDA_USE_NVIDIA_BINDING:
43
50
  "bindings."
44
51
  )
45
52
 
53
+ if config.CUDA_ENABLE_PYNVJITLINK:
54
+ if USE_NV_BINDING:
55
+ warnings.warn(
56
+ "Explicitly enabling pynvjitlink is no longer necessary. "
57
+ "NVIDIA bindings are enabled. cuda.core will be used "
58
+ "in place of pynvjitlink."
59
+ )
60
+ elif pynvjitlink_auto_enabled:
61
+ # Ignore the fact that pynvjitlink is enabled, because that was an
62
+ # automatic decision based on discovering pynvjitlink was present; the
63
+ # user didn't ask for it
64
+ pass
65
+ else:
66
+ raise RuntimeError("nvJitLink requires the NVIDIA CUDA bindings. ")
67
+
46
68
  if config.ENABLE_CUDASIM:
47
69
  from .simulator_init import *
48
70
  else:
@@ -61,6 +83,19 @@ from numba.cuda.compiler import (
61
83
  implementation = "NVIDIA"
62
84
 
63
85
 
86
+ # The default compute capability as set by the upstream Numba implementation.
87
+ config_default_cc = config.CUDA_DEFAULT_PTX_CC
88
+
89
+ # The default compute capability for Numba-CUDA. This will usually override the
90
+ # upstream Numba built-in default of 5.0, unless the user has set it even
91
+ # higher, in which case we should use the user-specified value. This default is
92
+ # aligned with recent toolkit versions.
93
+ numba_cuda_default_ptx_cc = (7, 5)
94
+
95
+ if numba_cuda_default_ptx_cc > config_default_cc:
96
+ config.CUDA_DEFAULT_PTX_CC = numba_cuda_default_ptx_cc
97
+
98
+
64
99
  def test(*args, **kwargs):
65
100
  if not is_available():
66
101
  raise cuda_error()
@@ -2,7 +2,7 @@ from llvmlite import ir
2
2
 
3
3
  from numba.core import config, serialize
4
4
  from numba.core.codegen import Codegen, CodeLibrary
5
- from .cudadrv import devices, driver, nvvm, runtime
5
+ from .cudadrv import devices, driver, nvrtc, nvvm, runtime
6
6
  from numba.cuda.cudadrv.libs import get_cudalib
7
7
  from numba.cuda.cudadrv.linkable_code import LinkableCode
8
8
  from numba.cuda.memory_management.nrt import NRT_LIBRARY
@@ -22,7 +22,10 @@ def run_nvdisasm(cubin, flags):
22
22
  try:
23
23
  fd, fname = tempfile.mkstemp()
24
24
  with open(fname, "wb") as f:
25
- f.write(cubin)
25
+ if config.CUDA_USE_NVIDIA_BINDING:
26
+ f.write(cubin.code)
27
+ else:
28
+ f.write(cubin)
26
29
 
27
30
  try:
28
31
  cp = subprocess.run(
@@ -208,7 +211,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
208
211
  if ptxes:
209
212
  return ptxes
210
213
 
211
- arch = nvvm.get_arch_option(*cc)
214
+ arch = nvrtc.get_arch_option(*cc)
212
215
  options = self._nvvm_options.copy()
213
216
  options["arch"] = arch
214
217
 
@@ -237,7 +240,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
237
240
  if ltoir is not None:
238
241
  return ltoir
239
242
 
240
- arch = nvvm.get_arch_option(*cc)
243
+ arch = nvrtc.get_arch_option(*cc)
241
244
  options = self._nvvm_options.copy()
242
245
  options["arch"] = arch
243
246
  options["gen-lto"] = None
@@ -271,7 +274,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
271
274
  return cubin
272
275
 
273
276
  if self._lto and config.DUMP_ASSEMBLY:
274
- linker = driver.Linker.new(
277
+ linker = driver._Linker.new(
275
278
  max_registers=self._max_registers,
276
279
  cc=cc,
277
280
  additional_flags=["-ptx"],
@@ -280,14 +283,14 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
280
283
  # `-ptx` flag is meant to view the optimized PTX for LTO objects.
281
284
  # Non-LTO objects are not passed to linker.
282
285
  self._link_all(linker, cc, ignore_nonlto=True)
283
-
284
- ptx = linker.get_linked_ptx().decode("utf-8")
286
+ ptx = linker.get_linked_ptx()
287
+ ptx = ptx.decode("utf-8")
285
288
 
286
289
  print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, "-"))
287
290
  print(ptx)
288
291
  print("=" * 80)
289
292
 
290
- linker = driver.Linker.new(
293
+ linker = driver._Linker.new(
291
294
  max_registers=self._max_registers, cc=cc, lto=self._lto
292
295
  )
293
296
  self._link_all(linker, cc, ignore_nonlto=False)
@@ -312,7 +315,6 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
312
315
  cufunc = self._cufunc_cache.get(device.id, None)
313
316
  if cufunc:
314
317
  return cufunc
315
-
316
318
  cubin = self.get_cubin(cc=device.compute_capability)
317
319
  module = ctx.create_module_image(
318
320
  cubin, self._setup_functions, self._teardown_functions
@@ -14,8 +14,6 @@ from numba.core.compiler import (
14
14
  sanitize_compile_result_entries,
15
15
  CompilerBase,
16
16
  DefaultPassBuilder,
17
- Flags,
18
- Option,
19
17
  CompileResult,
20
18
  )
21
19
  from numba.core.compiler_lock import global_compiler_lock
@@ -37,47 +35,13 @@ from warnings import warn
37
35
  from numba.cuda import nvvmutils
38
36
  from numba.cuda.api import get_current_device
39
37
  from numba.cuda.codegen import ExternalCodeLibrary
40
- from numba.cuda.cudadrv import nvvm
38
+ from numba.cuda.cudadrv import nvvm, nvrtc
41
39
  from numba.cuda.descriptor import cuda_target
40
+ from numba.cuda.flags import CUDAFlags
42
41
  from numba.cuda.target import CUDACABICallConv
43
42
  from numba.cuda import lowering
44
43
 
45
44
 
46
- def _nvvm_options_type(x):
47
- if x is None:
48
- return None
49
-
50
- else:
51
- assert isinstance(x, dict)
52
- return x
53
-
54
-
55
- def _optional_int_type(x):
56
- if x is None:
57
- return None
58
-
59
- else:
60
- assert isinstance(x, int)
61
- return x
62
-
63
-
64
- class CUDAFlags(Flags):
65
- nvvm_options = Option(
66
- type=_nvvm_options_type,
67
- default=None,
68
- doc="NVVM options",
69
- )
70
- compute_capability = Option(
71
- type=tuple,
72
- default=None,
73
- doc="Compute Capability",
74
- )
75
- max_registers = Option(
76
- type=_optional_int_type, default=None, doc="Max registers"
77
- )
78
- lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
79
-
80
-
81
45
  # The CUDACompileResult (CCR) has a specially-defined entry point equal to its
82
46
  # id. This is because the entry point is used as a key into a dict of
83
47
  # overloads by the base dispatcher. The id of the CCR is the only small and
@@ -676,7 +640,7 @@ def compile(
676
640
  # If the user has used the config variable to specify a non-default that is
677
641
  # greater than the lowest non-deprecated one, then we should default to
678
642
  # their specified CC instead of the lowest non-deprecated one.
679
- MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC)
643
+ MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc())
680
644
  cc = cc or MIN_CC
681
645
 
682
646
  cres = compile_cuda(
@@ -132,16 +132,9 @@ def _get_nvvm_wheel():
132
132
  return None
133
133
 
134
134
 
135
- def get_major_cuda_version():
136
- # TODO: remove once cuda-python is
137
- # a hard dependency
138
- from numba.cuda.cudadrv.runtime import get_version
139
-
140
- return get_version()[0]
141
-
142
-
143
135
  def get_nvrtc_dso_path():
144
136
  site_paths = [site.getusersitepackages()] + site.getsitepackages()
137
+
145
138
  for sp in site_paths:
146
139
  lib_dir = os.path.join(
147
140
  sp,
@@ -150,23 +143,28 @@ def get_nvrtc_dso_path():
150
143
  ("bin" if IS_WIN32 else "lib") if sp else None,
151
144
  )
152
145
  if lib_dir and os.path.exists(lib_dir):
153
- try:
154
- major = get_major_cuda_version()
155
- if major == 11:
156
- cu_ver = "112" if IS_WIN32 else "11.2"
157
- elif major == 12:
158
- cu_ver = "120" if IS_WIN32 else "12"
159
- else:
160
- raise NotImplementedError(f"CUDA {major} is not supported")
161
-
162
- return os.path.join(
146
+ chosen_path = None
147
+
148
+ # Check for each version of the NVRTC DLL, preferring the most
149
+ # recent.
150
+ versions = (
151
+ "112" if IS_WIN32 else "11.2",
152
+ "120" if IS_WIN32 else "12",
153
+ "130" if IS_WIN32 else "13",
154
+ )
155
+
156
+ for version in versions:
157
+ dso_path = os.path.join(
163
158
  lib_dir,
164
- f"nvrtc64_{cu_ver}_0.dll"
159
+ f"nvrtc64_{version}_0.dll"
165
160
  if IS_WIN32
166
- else f"libnvrtc.so.{cu_ver}",
161
+ else f"libnvrtc.so.{version}",
167
162
  )
168
- except RuntimeError:
169
- continue
163
+
164
+ if os.path.exists(dso_path) and os.path.isfile(dso_path):
165
+ chosen_path = dso_path
166
+
167
+ return chosen_path
170
168
 
171
169
 
172
170
  def _get_nvrtc_wheel():