numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +51 -16
- numba_cuda/numba/cuda/codegen.py +11 -9
- numba_cuda/numba/cuda/compiler.py +3 -39
- numba_cuda/numba/cuda/cuda_paths.py +20 -22
- numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
- numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
- numba_cuda/numba/cuda/decorators.py +18 -0
- numba_cuda/numba/cuda/dispatcher.py +1 -0
- numba_cuda/numba/cuda/flags.py +36 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
- numba_cuda/numba/cuda/target.py +55 -2
- numba_cuda/numba/cuda/testing.py +0 -22
- numba_cuda/numba/cuda/tests/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.16.0
|
@@ -2,24 +2,28 @@ import importlib
|
|
2
2
|
from numba import runtests
|
3
3
|
from numba.core import config
|
4
4
|
from .utils import _readenv
|
5
|
+
import warnings
|
5
6
|
|
6
|
-
# Enable pynvjitlink if the environment variables NUMBA_CUDA_ENABLE_PYNVJITLINK
|
7
|
-
# or CUDA_ENABLE_PYNVJITLINK are set, or if the pynvjitlink module is found. If
|
8
|
-
# explicitly disabled, do not use pynvjitlink, even if present in the env.
|
9
|
-
_pynvjitlink_enabled_in_env = _readenv(
|
10
|
-
"NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
|
11
|
-
)
|
12
|
-
_pynvjitlink_enabled_in_cfg = getattr(config, "CUDA_ENABLE_PYNVJITLINK", None)
|
13
7
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
8
|
+
# Enable pynvjitlink based on the following precedence:
|
9
|
+
# 1. Config setting "CUDA_ENABLE_PYNVJITLINK" (highest priority)
|
10
|
+
# 2. Environment variable "NUMBA_CUDA_ENABLE_PYNVJITLINK"
|
11
|
+
# 3. Auto-detection of pynvjitlink module (lowest priority)
|
12
|
+
|
13
|
+
pynvjitlink_auto_enabled = False
|
20
14
|
|
21
|
-
if
|
22
|
-
|
15
|
+
if getattr(config, "CUDA_ENABLE_PYNVJITLINK", None) is None:
|
16
|
+
if (
|
17
|
+
_pynvjitlink_enabled_in_env := _readenv(
|
18
|
+
"NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
|
19
|
+
)
|
20
|
+
) is not None:
|
21
|
+
config.CUDA_ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
|
22
|
+
else:
|
23
|
+
pynvjitlink_auto_enabled = (
|
24
|
+
importlib.util.find_spec("pynvjitlink") is not None
|
25
|
+
)
|
26
|
+
config.CUDA_ENABLE_PYNVJITLINK = pynvjitlink_auto_enabled
|
23
27
|
|
24
28
|
# Upstream numba sets CUDA_USE_NVIDIA_BINDING to 0 by default, so it always
|
25
29
|
# exists. Override, but not if explicitly set to 0 in the envioronment.
|
@@ -32,7 +36,10 @@ else:
|
|
32
36
|
USE_NV_BINDING = True
|
33
37
|
config.CUDA_USE_NVIDIA_BINDING = USE_NV_BINDING
|
34
38
|
if config.CUDA_USE_NVIDIA_BINDING:
|
35
|
-
if not
|
39
|
+
if not (
|
40
|
+
importlib.util.find_spec("cuda")
|
41
|
+
and importlib.util.find_spec("cuda.bindings")
|
42
|
+
):
|
36
43
|
raise ImportError(
|
37
44
|
"CUDA bindings not found. Please pip install the "
|
38
45
|
"cuda-bindings package. Alternatively, install "
|
@@ -43,6 +50,21 @@ if config.CUDA_USE_NVIDIA_BINDING:
|
|
43
50
|
"bindings."
|
44
51
|
)
|
45
52
|
|
53
|
+
if config.CUDA_ENABLE_PYNVJITLINK:
|
54
|
+
if USE_NV_BINDING:
|
55
|
+
warnings.warn(
|
56
|
+
"Explicitly enabling pynvjitlink is no longer necessary. "
|
57
|
+
"NVIDIA bindings are enabled. cuda.core will be used "
|
58
|
+
"in place of pynvjitlink."
|
59
|
+
)
|
60
|
+
elif pynvjitlink_auto_enabled:
|
61
|
+
# Ignore the fact that pynvjitlink is enabled, because that was an
|
62
|
+
# automatic decision based on discovering pynvjitlink was present; the
|
63
|
+
# user didn't ask for it
|
64
|
+
pass
|
65
|
+
else:
|
66
|
+
raise RuntimeError("nvJitLink requires the NVIDIA CUDA bindings. ")
|
67
|
+
|
46
68
|
if config.ENABLE_CUDASIM:
|
47
69
|
from .simulator_init import *
|
48
70
|
else:
|
@@ -61,6 +83,19 @@ from numba.cuda.compiler import (
|
|
61
83
|
implementation = "NVIDIA"
|
62
84
|
|
63
85
|
|
86
|
+
# The default compute capability as set by the upstream Numba implementation.
|
87
|
+
config_default_cc = config.CUDA_DEFAULT_PTX_CC
|
88
|
+
|
89
|
+
# The default compute capability for Numba-CUDA. This will usually override the
|
90
|
+
# upstream Numba built-in default of 5.0, unless the user has set it even
|
91
|
+
# higher, in which case we should use the user-specified value. This default is
|
92
|
+
# aligned with recent toolkit versions.
|
93
|
+
numba_cuda_default_ptx_cc = (7, 5)
|
94
|
+
|
95
|
+
if numba_cuda_default_ptx_cc > config_default_cc:
|
96
|
+
config.CUDA_DEFAULT_PTX_CC = numba_cuda_default_ptx_cc
|
97
|
+
|
98
|
+
|
64
99
|
def test(*args, **kwargs):
|
65
100
|
if not is_available():
|
66
101
|
raise cuda_error()
|
numba_cuda/numba/cuda/codegen.py
CHANGED
@@ -2,7 +2,7 @@ from llvmlite import ir
|
|
2
2
|
|
3
3
|
from numba.core import config, serialize
|
4
4
|
from numba.core.codegen import Codegen, CodeLibrary
|
5
|
-
from .cudadrv import devices, driver, nvvm, runtime
|
5
|
+
from .cudadrv import devices, driver, nvrtc, nvvm, runtime
|
6
6
|
from numba.cuda.cudadrv.libs import get_cudalib
|
7
7
|
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
8
8
|
from numba.cuda.memory_management.nrt import NRT_LIBRARY
|
@@ -22,7 +22,10 @@ def run_nvdisasm(cubin, flags):
|
|
22
22
|
try:
|
23
23
|
fd, fname = tempfile.mkstemp()
|
24
24
|
with open(fname, "wb") as f:
|
25
|
-
|
25
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
26
|
+
f.write(cubin.code)
|
27
|
+
else:
|
28
|
+
f.write(cubin)
|
26
29
|
|
27
30
|
try:
|
28
31
|
cp = subprocess.run(
|
@@ -208,7 +211,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
208
211
|
if ptxes:
|
209
212
|
return ptxes
|
210
213
|
|
211
|
-
arch =
|
214
|
+
arch = nvrtc.get_arch_option(*cc)
|
212
215
|
options = self._nvvm_options.copy()
|
213
216
|
options["arch"] = arch
|
214
217
|
|
@@ -237,7 +240,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
237
240
|
if ltoir is not None:
|
238
241
|
return ltoir
|
239
242
|
|
240
|
-
arch =
|
243
|
+
arch = nvrtc.get_arch_option(*cc)
|
241
244
|
options = self._nvvm_options.copy()
|
242
245
|
options["arch"] = arch
|
243
246
|
options["gen-lto"] = None
|
@@ -271,7 +274,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
271
274
|
return cubin
|
272
275
|
|
273
276
|
if self._lto and config.DUMP_ASSEMBLY:
|
274
|
-
linker = driver.
|
277
|
+
linker = driver._Linker.new(
|
275
278
|
max_registers=self._max_registers,
|
276
279
|
cc=cc,
|
277
280
|
additional_flags=["-ptx"],
|
@@ -280,14 +283,14 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
280
283
|
# `-ptx` flag is meant to view the optimized PTX for LTO objects.
|
281
284
|
# Non-LTO objects are not passed to linker.
|
282
285
|
self._link_all(linker, cc, ignore_nonlto=True)
|
283
|
-
|
284
|
-
ptx =
|
286
|
+
ptx = linker.get_linked_ptx()
|
287
|
+
ptx = ptx.decode("utf-8")
|
285
288
|
|
286
289
|
print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, "-"))
|
287
290
|
print(ptx)
|
288
291
|
print("=" * 80)
|
289
292
|
|
290
|
-
linker = driver.
|
293
|
+
linker = driver._Linker.new(
|
291
294
|
max_registers=self._max_registers, cc=cc, lto=self._lto
|
292
295
|
)
|
293
296
|
self._link_all(linker, cc, ignore_nonlto=False)
|
@@ -312,7 +315,6 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
312
315
|
cufunc = self._cufunc_cache.get(device.id, None)
|
313
316
|
if cufunc:
|
314
317
|
return cufunc
|
315
|
-
|
316
318
|
cubin = self.get_cubin(cc=device.compute_capability)
|
317
319
|
module = ctx.create_module_image(
|
318
320
|
cubin, self._setup_functions, self._teardown_functions
|
@@ -14,8 +14,6 @@ from numba.core.compiler import (
|
|
14
14
|
sanitize_compile_result_entries,
|
15
15
|
CompilerBase,
|
16
16
|
DefaultPassBuilder,
|
17
|
-
Flags,
|
18
|
-
Option,
|
19
17
|
CompileResult,
|
20
18
|
)
|
21
19
|
from numba.core.compiler_lock import global_compiler_lock
|
@@ -37,47 +35,13 @@ from warnings import warn
|
|
37
35
|
from numba.cuda import nvvmutils
|
38
36
|
from numba.cuda.api import get_current_device
|
39
37
|
from numba.cuda.codegen import ExternalCodeLibrary
|
40
|
-
from numba.cuda.cudadrv import nvvm
|
38
|
+
from numba.cuda.cudadrv import nvvm, nvrtc
|
41
39
|
from numba.cuda.descriptor import cuda_target
|
40
|
+
from numba.cuda.flags import CUDAFlags
|
42
41
|
from numba.cuda.target import CUDACABICallConv
|
43
42
|
from numba.cuda import lowering
|
44
43
|
|
45
44
|
|
46
|
-
def _nvvm_options_type(x):
|
47
|
-
if x is None:
|
48
|
-
return None
|
49
|
-
|
50
|
-
else:
|
51
|
-
assert isinstance(x, dict)
|
52
|
-
return x
|
53
|
-
|
54
|
-
|
55
|
-
def _optional_int_type(x):
|
56
|
-
if x is None:
|
57
|
-
return None
|
58
|
-
|
59
|
-
else:
|
60
|
-
assert isinstance(x, int)
|
61
|
-
return x
|
62
|
-
|
63
|
-
|
64
|
-
class CUDAFlags(Flags):
|
65
|
-
nvvm_options = Option(
|
66
|
-
type=_nvvm_options_type,
|
67
|
-
default=None,
|
68
|
-
doc="NVVM options",
|
69
|
-
)
|
70
|
-
compute_capability = Option(
|
71
|
-
type=tuple,
|
72
|
-
default=None,
|
73
|
-
doc="Compute Capability",
|
74
|
-
)
|
75
|
-
max_registers = Option(
|
76
|
-
type=_optional_int_type, default=None, doc="Max registers"
|
77
|
-
)
|
78
|
-
lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
|
79
|
-
|
80
|
-
|
81
45
|
# The CUDACompileResult (CCR) has a specially-defined entry point equal to its
|
82
46
|
# id. This is because the entry point is used as a key into a dict of
|
83
47
|
# overloads by the base dispatcher. The id of the CCR is the only small and
|
@@ -676,7 +640,7 @@ def compile(
|
|
676
640
|
# If the user has used the config variable to specify a non-default that is
|
677
641
|
# greater than the lowest non-deprecated one, then we should default to
|
678
642
|
# their specified CC instead of the lowest non-deprecated one.
|
679
|
-
MIN_CC = max(config.CUDA_DEFAULT_PTX_CC,
|
643
|
+
MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc())
|
680
644
|
cc = cc or MIN_CC
|
681
645
|
|
682
646
|
cres = compile_cuda(
|
@@ -132,16 +132,9 @@ def _get_nvvm_wheel():
|
|
132
132
|
return None
|
133
133
|
|
134
134
|
|
135
|
-
def get_major_cuda_version():
|
136
|
-
# TODO: remove once cuda-python is
|
137
|
-
# a hard dependency
|
138
|
-
from numba.cuda.cudadrv.runtime import get_version
|
139
|
-
|
140
|
-
return get_version()[0]
|
141
|
-
|
142
|
-
|
143
135
|
def get_nvrtc_dso_path():
|
144
136
|
site_paths = [site.getusersitepackages()] + site.getsitepackages()
|
137
|
+
|
145
138
|
for sp in site_paths:
|
146
139
|
lib_dir = os.path.join(
|
147
140
|
sp,
|
@@ -150,23 +143,28 @@ def get_nvrtc_dso_path():
|
|
150
143
|
("bin" if IS_WIN32 else "lib") if sp else None,
|
151
144
|
)
|
152
145
|
if lib_dir and os.path.exists(lib_dir):
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
else
|
160
|
-
|
161
|
-
|
162
|
-
|
146
|
+
chosen_path = None
|
147
|
+
|
148
|
+
# Check for each version of the NVRTC DLL, preferring the most
|
149
|
+
# recent.
|
150
|
+
versions = (
|
151
|
+
"112" if IS_WIN32 else "11.2",
|
152
|
+
"120" if IS_WIN32 else "12",
|
153
|
+
"130" if IS_WIN32 else "13",
|
154
|
+
)
|
155
|
+
|
156
|
+
for version in versions:
|
157
|
+
dso_path = os.path.join(
|
163
158
|
lib_dir,
|
164
|
-
f"nvrtc64_{
|
159
|
+
f"nvrtc64_{version}_0.dll"
|
165
160
|
if IS_WIN32
|
166
|
-
else f"libnvrtc.so.{
|
161
|
+
else f"libnvrtc.so.{version}",
|
167
162
|
)
|
168
|
-
|
169
|
-
|
163
|
+
|
164
|
+
if os.path.exists(dso_path) and os.path.isfile(dso_path):
|
165
|
+
chosen_path = dso_path
|
166
|
+
|
167
|
+
return chosen_path
|
170
168
|
|
171
169
|
|
172
170
|
def _get_nvrtc_wheel():
|