numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +51 -16
- numba_cuda/numba/cuda/codegen.py +11 -9
- numba_cuda/numba/cuda/compiler.py +3 -39
- numba_cuda/numba/cuda/cuda_paths.py +20 -22
- numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
- numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
- numba_cuda/numba/cuda/decorators.py +18 -0
- numba_cuda/numba/cuda/dispatcher.py +1 -0
- numba_cuda/numba/cuda/flags.py +36 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
- numba_cuda/numba/cuda/target.py +55 -2
- numba_cuda/numba/cuda/testing.py +0 -22
- numba_cuda/numba/cuda/tests/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
@@ -1,147 +1,16 @@
|
|
1
1
|
"""
|
2
|
-
CUDA Runtime wrapper.
|
2
|
+
Former CUDA Runtime wrapper.
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
The toolkit version can now be obtained from NVRTC, so we don't use a binding
|
5
|
+
to the runtime anymore. This file is provided to maintain the existing API.
|
6
6
|
"""
|
7
7
|
|
8
|
-
import
|
9
|
-
import functools
|
10
|
-
import sys
|
11
|
-
|
12
|
-
from numba.core import config
|
13
|
-
from numba.cuda.cudadrv.driver import ERROR_MAP, make_logger
|
14
|
-
from numba.cuda.cudadrv.error import CudaSupportError, CudaRuntimeError
|
15
|
-
from numba.cuda.cudadrv.libs import open_cudalib
|
16
|
-
from numba.cuda.cudadrv.rtapi import API_PROTOTYPES
|
17
|
-
from numba.cuda.cudadrv import enums
|
18
|
-
|
19
|
-
|
20
|
-
class CudaRuntimeAPIError(CudaRuntimeError):
|
21
|
-
"""
|
22
|
-
Raised when there is an error accessing a C API from the CUDA Runtime.
|
23
|
-
"""
|
24
|
-
|
25
|
-
def __init__(self, code, msg):
|
26
|
-
self.code = code
|
27
|
-
self.msg = msg
|
28
|
-
super().__init__(code, msg)
|
29
|
-
|
30
|
-
def __str__(self):
|
31
|
-
return "[%s] %s" % (self.code, self.msg)
|
8
|
+
from numba.cuda.cudadrv.nvrtc import NVRTC
|
32
9
|
|
33
10
|
|
34
11
|
class Runtime:
|
35
|
-
"""
|
36
|
-
Runtime object that lazily binds runtime API functions.
|
37
|
-
"""
|
38
|
-
|
39
|
-
def __init__(self):
|
40
|
-
self.is_initialized = False
|
41
|
-
|
42
|
-
def _initialize(self):
|
43
|
-
# lazily initialize logger
|
44
|
-
global _logger
|
45
|
-
_logger = make_logger()
|
46
|
-
|
47
|
-
if config.DISABLE_CUDA:
|
48
|
-
msg = (
|
49
|
-
"CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 "
|
50
|
-
"in the environment, or because CUDA is unsupported on "
|
51
|
-
"32-bit systems."
|
52
|
-
)
|
53
|
-
raise CudaSupportError(msg)
|
54
|
-
self.lib = open_cudalib("cudart")
|
55
|
-
|
56
|
-
self.is_initialized = True
|
57
|
-
|
58
|
-
def __getattr__(self, fname):
|
59
|
-
# First request of a runtime API function
|
60
|
-
try:
|
61
|
-
proto = API_PROTOTYPES[fname]
|
62
|
-
except KeyError:
|
63
|
-
raise AttributeError(fname)
|
64
|
-
restype = proto[0]
|
65
|
-
argtypes = proto[1:]
|
66
|
-
|
67
|
-
if not self.is_initialized:
|
68
|
-
self._initialize()
|
69
|
-
|
70
|
-
# Find function in runtime library
|
71
|
-
libfn = self._find_api(fname)
|
72
|
-
libfn.restype = restype
|
73
|
-
libfn.argtypes = argtypes
|
74
|
-
|
75
|
-
safe_call = self._wrap_api_call(fname, libfn)
|
76
|
-
setattr(self, fname, safe_call)
|
77
|
-
return safe_call
|
78
|
-
|
79
|
-
def _wrap_api_call(self, fname, libfn):
|
80
|
-
@functools.wraps(libfn)
|
81
|
-
def safe_cuda_api_call(*args):
|
82
|
-
_logger.debug("call runtime api: %s", libfn.__name__)
|
83
|
-
retcode = libfn(*args)
|
84
|
-
self._check_error(fname, retcode)
|
85
|
-
|
86
|
-
return safe_cuda_api_call
|
87
|
-
|
88
|
-
def _check_error(self, fname, retcode):
|
89
|
-
if retcode != enums.CUDA_SUCCESS:
|
90
|
-
errname = ERROR_MAP.get(retcode, "cudaErrorUnknown")
|
91
|
-
msg = "Call to %s results in %s" % (fname, errname)
|
92
|
-
_logger.error(msg)
|
93
|
-
raise CudaRuntimeAPIError(retcode, msg)
|
94
|
-
|
95
|
-
def _find_api(self, fname):
|
96
|
-
try:
|
97
|
-
return getattr(self.lib, fname)
|
98
|
-
except AttributeError:
|
99
|
-
pass
|
100
|
-
|
101
|
-
# Not found.
|
102
|
-
# Delay missing function error to use
|
103
|
-
def absent_function(*args, **kws):
|
104
|
-
msg = "runtime missing function: %s."
|
105
|
-
raise CudaRuntimeError(msg % fname)
|
106
|
-
|
107
|
-
setattr(self, fname, absent_function)
|
108
|
-
return absent_function
|
109
|
-
|
110
12
|
def get_version(self):
|
111
|
-
|
112
|
-
Returns the CUDA Runtime version as a tuple (major, minor).
|
113
|
-
"""
|
114
|
-
rtver = ctypes.c_int()
|
115
|
-
self.cudaRuntimeGetVersion(ctypes.byref(rtver))
|
116
|
-
# The version is encoded as (1000 * major) + (10 * minor)
|
117
|
-
major = rtver.value // 1000
|
118
|
-
minor = (rtver.value - (major * 1000)) // 10
|
119
|
-
return (major, minor)
|
120
|
-
|
121
|
-
def is_supported_version(self):
|
122
|
-
"""
|
123
|
-
Returns True if the CUDA Runtime is a supported version.
|
124
|
-
"""
|
125
|
-
|
126
|
-
return self.get_version() in self.supported_versions
|
127
|
-
|
128
|
-
@property
|
129
|
-
def supported_versions(self):
|
130
|
-
"""A tuple of all supported CUDA toolkit versions. Versions are given in
|
131
|
-
the form ``(major_version, minor_version)``."""
|
132
|
-
if sys.platform not in ("linux", "win32") or config.MACHINE_BITS != 64:
|
133
|
-
# Only 64-bit Linux and Windows are supported
|
134
|
-
return ()
|
135
|
-
return (
|
136
|
-
(11, 0),
|
137
|
-
(11, 1),
|
138
|
-
(11, 2),
|
139
|
-
(11, 3),
|
140
|
-
(11, 4),
|
141
|
-
(11, 5),
|
142
|
-
(11, 6),
|
143
|
-
(11, 7),
|
144
|
-
)
|
13
|
+
return NVRTC().get_version()
|
145
14
|
|
146
15
|
|
147
16
|
runtime = Runtime()
|
@@ -4,6 +4,7 @@ from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
|
4
4
|
from numba.cuda.compiler import declare_device_function
|
5
5
|
from numba.cuda.dispatcher import CUDADispatcher
|
6
6
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
7
|
+
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
7
8
|
|
8
9
|
|
9
10
|
_msg_deprecated_signature_arg = (
|
@@ -24,6 +25,7 @@ def jit(
|
|
24
25
|
lineinfo=False,
|
25
26
|
cache=False,
|
26
27
|
launch_bounds=None,
|
28
|
+
lto=None,
|
27
29
|
**kws,
|
28
30
|
):
|
29
31
|
"""
|
@@ -83,6 +85,10 @@ def jit(
|
|
83
85
|
If a scalar is provided, it is used as the maximum
|
84
86
|
number of threads per block.
|
85
87
|
:type launch_bounds: int | tuple[int]
|
88
|
+
:param lto: Whether to enable LTO. If unspecified, LTO is enabled by
|
89
|
+
default when pynvjitlink is available, except for kernels where
|
90
|
+
``debug=True``.
|
91
|
+
:type lto: bool
|
86
92
|
"""
|
87
93
|
|
88
94
|
if link and config.ENABLE_CUDASIM:
|
@@ -136,6 +142,16 @@ def jit(
|
|
136
142
|
if device and kws.get("link"):
|
137
143
|
raise ValueError("link keyword invalid for device function")
|
138
144
|
|
145
|
+
if lto is None:
|
146
|
+
# Default to using LTO if pynvjitlink is available and we're not debugging
|
147
|
+
lto = _have_nvjitlink() and not debug
|
148
|
+
else:
|
149
|
+
if lto and not _have_nvjitlink():
|
150
|
+
raise RuntimeError(
|
151
|
+
"LTO requires nvjitlink, which is not available"
|
152
|
+
"or not sufficiently recent (>=12.3)"
|
153
|
+
)
|
154
|
+
|
139
155
|
if sigutils.is_signature(func_or_sig):
|
140
156
|
signatures = [func_or_sig]
|
141
157
|
specialized = True
|
@@ -165,6 +181,7 @@ def jit(
|
|
165
181
|
targetoptions["forceinline"] = forceinline
|
166
182
|
targetoptions["extensions"] = extensions
|
167
183
|
targetoptions["launch_bounds"] = launch_bounds
|
184
|
+
targetoptions["lto"] = lto
|
168
185
|
|
169
186
|
disp = CUDADispatcher(func, targetoptions=targetoptions)
|
170
187
|
|
@@ -235,6 +252,7 @@ def jit(
|
|
235
252
|
targetoptions["forceinline"] = forceinline
|
236
253
|
targetoptions["extensions"] = extensions
|
237
254
|
targetoptions["launch_bounds"] = launch_bounds
|
255
|
+
targetoptions["lto"] = lto
|
238
256
|
disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
|
239
257
|
|
240
258
|
if cache:
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from numba.core.compiler import Flags, Option
|
2
|
+
|
3
|
+
|
4
|
+
def _nvvm_options_type(x):
|
5
|
+
if x is None:
|
6
|
+
return None
|
7
|
+
|
8
|
+
else:
|
9
|
+
assert isinstance(x, dict)
|
10
|
+
return x
|
11
|
+
|
12
|
+
|
13
|
+
def _optional_int_type(x):
|
14
|
+
if x is None:
|
15
|
+
return None
|
16
|
+
|
17
|
+
else:
|
18
|
+
assert isinstance(x, int)
|
19
|
+
return x
|
20
|
+
|
21
|
+
|
22
|
+
class CUDAFlags(Flags):
|
23
|
+
nvvm_options = Option(
|
24
|
+
type=_nvvm_options_type,
|
25
|
+
default=None,
|
26
|
+
doc="NVVM options",
|
27
|
+
)
|
28
|
+
compute_capability = Option(
|
29
|
+
type=tuple,
|
30
|
+
default=None,
|
31
|
+
doc="Compute Capability",
|
32
|
+
)
|
33
|
+
max_registers = Option(
|
34
|
+
type=_optional_int_type, default=None, doc="Max registers"
|
35
|
+
)
|
36
|
+
lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
from numba import cuda, config
|
7
7
|
from numba.core.runtime.nrt import _nrt_mstats
|
8
8
|
from numba.cuda.cudadrv.driver import (
|
9
|
-
|
9
|
+
_Linker,
|
10
10
|
driver,
|
11
11
|
launch_kernel,
|
12
12
|
USE_NV_BINDING,
|
@@ -80,7 +80,7 @@ class _Runtime:
|
|
80
80
|
cc = get_current_device().compute_capability
|
81
81
|
|
82
82
|
# Create a new linker instance and add the cu file
|
83
|
-
linker =
|
83
|
+
linker = _Linker.new(cc=cc)
|
84
84
|
linker.add_cu_file(memsys_mod)
|
85
85
|
|
86
86
|
# Complete the linker and create a module from it
|
@@ -34,10 +34,10 @@ class FakeDriver(object):
|
|
34
34
|
driver = FakeDriver()
|
35
35
|
|
36
36
|
|
37
|
-
class
|
37
|
+
class _Linker:
|
38
38
|
@classmethod
|
39
39
|
def new(cls, max_registers=0, lineinfo=False, cc=None):
|
40
|
-
return
|
40
|
+
return _Linker()
|
41
41
|
|
42
42
|
@property
|
43
43
|
def lto(self):
|
@@ -67,3 +67,7 @@ PyNvJitLinker = None
|
|
67
67
|
|
68
68
|
if config.ENABLE_CUDASIM:
|
69
69
|
config.CUDA_ENABLE_PYNVJITLINK = False
|
70
|
+
|
71
|
+
|
72
|
+
def _have_nvjitlink():
|
73
|
+
return False
|
numba_cuda/numba/cuda/target.py
CHANGED
@@ -2,9 +2,20 @@ import re
|
|
2
2
|
from functools import cached_property
|
3
3
|
import llvmlite.binding as ll
|
4
4
|
from llvmlite import ir
|
5
|
-
|
6
|
-
|
5
|
+
import warnings
|
6
|
+
|
7
|
+
from numba.core import (
|
8
|
+
cgutils,
|
9
|
+
compiler,
|
10
|
+
config,
|
11
|
+
itanium_mangler,
|
12
|
+
targetconfig,
|
13
|
+
types,
|
14
|
+
typing,
|
15
|
+
)
|
16
|
+
from numba.core.compiler_lock import global_compiler_lock
|
7
17
|
from numba.core.dispatcher import Dispatcher
|
18
|
+
from numba.core.errors import NumbaWarning
|
8
19
|
from numba.core.base import BaseContext
|
9
20
|
from numba.core.callconv import BaseCallConv, MinimalCallConv
|
10
21
|
from numba.core.typing import cmathdecl
|
@@ -13,6 +24,7 @@ from numba.core import datamodel
|
|
13
24
|
from .cudadrv import nvvm
|
14
25
|
from numba.cuda import codegen, ufuncs
|
15
26
|
from numba.cuda.debuginfo import CUDADIBuilder
|
27
|
+
from numba.cuda.flags import CUDAFlags
|
16
28
|
from numba.cuda.models import cuda_data_manager
|
17
29
|
|
18
30
|
# -----------------------------------------------------------------------------
|
@@ -288,6 +300,47 @@ class CUDATargetContext(BaseContext):
|
|
288
300
|
def get_ufunc_info(self, ufunc_key):
|
289
301
|
return ufuncs.get_ufunc_info(ufunc_key)
|
290
302
|
|
303
|
+
def _compile_subroutine_no_cache(
|
304
|
+
self, builder, impl, sig, locals=None, flags=None
|
305
|
+
):
|
306
|
+
# Overrides numba.core.base.BaseContext._compile_subroutine_no_cache().
|
307
|
+
# Modified to use flags from the context stack if they are not provided
|
308
|
+
# (pending a fix in Numba upstream).
|
309
|
+
|
310
|
+
if locals is None:
|
311
|
+
locals = {}
|
312
|
+
|
313
|
+
with global_compiler_lock:
|
314
|
+
codegen = self.codegen()
|
315
|
+
library = codegen.create_library(impl.__name__)
|
316
|
+
if flags is None:
|
317
|
+
cstk = targetconfig.ConfigStack()
|
318
|
+
if cstk:
|
319
|
+
flags = cstk.top().copy()
|
320
|
+
else:
|
321
|
+
msg = "There should always be a context stack; none found."
|
322
|
+
warnings.warn(msg, NumbaWarning)
|
323
|
+
flags = CUDAFlags()
|
324
|
+
|
325
|
+
flags.no_compile = True
|
326
|
+
flags.no_cpython_wrapper = True
|
327
|
+
flags.no_cfunc_wrapper = True
|
328
|
+
|
329
|
+
cres = compiler.compile_internal(
|
330
|
+
self.typing_context,
|
331
|
+
self,
|
332
|
+
library,
|
333
|
+
impl,
|
334
|
+
sig.args,
|
335
|
+
sig.return_type,
|
336
|
+
flags,
|
337
|
+
locals=locals,
|
338
|
+
)
|
339
|
+
|
340
|
+
# Allow inlining the function inside callers
|
341
|
+
self.active_code_library.add_linking_library(cres.library)
|
342
|
+
return cres
|
343
|
+
|
291
344
|
|
292
345
|
class CUDACallConv(MinimalCallConv):
|
293
346
|
def decorate_function(self, fn, args, fe_argtypes, noalias=False):
|
numba_cuda/numba/cuda/testing.py
CHANGED
@@ -35,14 +35,6 @@ class CUDATestCase(SerialMixin, TestCase):
|
|
35
35
|
config.CUDA_LOW_OCCUPANCY_WARNINGS = self._low_occupancy_warnings
|
36
36
|
config.CUDA_WARN_ON_IMPLICIT_COPY = self._warn_on_implicit_copy
|
37
37
|
|
38
|
-
def skip_if_lto(self, reason):
|
39
|
-
# Some linkers need the compute capability to be specified, so we
|
40
|
-
# always specify it here.
|
41
|
-
cc = devices.get_context().device.compute_capability
|
42
|
-
linker = driver.Linker.new(cc=cc)
|
43
|
-
if linker.lto:
|
44
|
-
self.skipTest(reason)
|
45
|
-
|
46
38
|
|
47
39
|
class ContextResettingTestCase(CUDATestCase):
|
48
40
|
"""
|
@@ -59,20 +51,6 @@ class ContextResettingTestCase(CUDATestCase):
|
|
59
51
|
reset()
|
60
52
|
|
61
53
|
|
62
|
-
def ensure_supported_ccs_initialized():
|
63
|
-
from numba.cuda import is_available as cuda_is_available
|
64
|
-
from numba.cuda.cudadrv import nvvm
|
65
|
-
|
66
|
-
if cuda_is_available():
|
67
|
-
# Ensure that cudart.so is loaded and the list of supported compute
|
68
|
-
# capabilities in the nvvm module is populated before a fork. This is
|
69
|
-
# needed because some compilation tests don't require a CUDA context,
|
70
|
-
# but do use NVVM, and it is required that libcudart.so should be
|
71
|
-
# loaded before a fork (note that the requirement is not explicitly
|
72
|
-
# documented).
|
73
|
-
nvvm.get_supported_ccs()
|
74
|
-
|
75
|
-
|
76
54
|
def skip_on_cudasim(reason):
|
77
55
|
"""Skip this test if running on the CUDA simulator"""
|
78
56
|
return unittest.skipIf(config.ENABLE_CUDASIM, reason)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from fnmatch import fnmatch
|
2
|
-
from numba.cuda.testing import ensure_supported_ccs_initialized
|
3
2
|
from numba.testing import unittest
|
4
3
|
from numba import cuda
|
5
4
|
from os.path import dirname, isfile, join, normpath, relpath, splitext
|
@@ -42,7 +41,6 @@ def load_testsuite(loader, dir):
|
|
42
41
|
def load_tests(loader, tests, pattern):
|
43
42
|
suite = unittest.TestSuite()
|
44
43
|
this_dir = dirname(__file__)
|
45
|
-
ensure_supported_ccs_initialized()
|
46
44
|
suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
|
47
45
|
if cuda.is_available():
|
48
46
|
suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
|
@@ -109,7 +109,21 @@ class Test3rdPartyContext(CUDATestCase):
|
|
109
109
|
if driver.USE_NV_BINDING:
|
110
110
|
flags = 0
|
111
111
|
dev = driver.binding.CUdevice(0)
|
112
|
-
|
112
|
+
|
113
|
+
result, version = driver.binding.cuDriverGetVersion()
|
114
|
+
self.assertEqual(
|
115
|
+
result,
|
116
|
+
driver.binding.CUresult.CUDA_SUCCESS,
|
117
|
+
"Error getting CUDA driver version",
|
118
|
+
)
|
119
|
+
|
120
|
+
# CUDA 13's cuCtxCreate has an optional parameter prepended
|
121
|
+
if version >= 13000:
|
122
|
+
args = (None, flags, dev)
|
123
|
+
else:
|
124
|
+
args = (flags, dev)
|
125
|
+
|
126
|
+
hctx = the_driver.cuCtxCreate(*args)
|
113
127
|
else:
|
114
128
|
hctx = driver.drvapi.cu_context()
|
115
129
|
the_driver.cuCtxCreate(byref(hctx), 0, 0)
|
@@ -1,14 +1,14 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import warnings
|
3
|
+
from numba import config
|
3
4
|
from numba.cuda.testing import unittest
|
4
5
|
from numba.cuda.testing import skip_on_cudasim, skip_if_cuda_includes_missing
|
5
6
|
from numba.cuda.testing import CUDATestCase, test_data_dir
|
6
|
-
from numba.cuda.cudadrv.driver import CudaAPIError,
|
7
|
-
from numba.cuda.cudadrv.error import NvrtcError
|
7
|
+
from numba.cuda.cudadrv.driver import CudaAPIError, _Linker, LinkerError
|
8
8
|
from numba.cuda import require_context
|
9
9
|
from numba.tests.support import ignore_internal_warnings
|
10
10
|
from numba import cuda, void, float64, int64, int32, typeof, float32
|
11
|
-
|
11
|
+
from numba.cuda.cudadrv.error import NvrtcError
|
12
12
|
|
13
13
|
CONST1D = np.arange(10, dtype=np.float64)
|
14
14
|
|
@@ -107,7 +107,7 @@ class TestLinker(CUDATestCase):
|
|
107
107
|
@require_context
|
108
108
|
def test_linker_basic(self):
|
109
109
|
"""Simply go through the constructor and destructor"""
|
110
|
-
linker =
|
110
|
+
linker = _Linker.new(cc=(7, 5))
|
111
111
|
del linker
|
112
112
|
|
113
113
|
def _test_linking(self, eager):
|
@@ -183,7 +183,13 @@ class TestLinker(CUDATestCase):
|
|
183
183
|
|
184
184
|
link = str(test_data_dir / "error.cu")
|
185
185
|
|
186
|
-
|
186
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
187
|
+
from cuda.core.experimental._utils.cuda_utils import NVRTCError
|
188
|
+
|
189
|
+
errty = NVRTCError
|
190
|
+
else:
|
191
|
+
errty = NvrtcError
|
192
|
+
with self.assertRaises(errty) as e:
|
187
193
|
|
188
194
|
@cuda.jit("void(int32)", link=[link])
|
189
195
|
def kernel(x):
|
@@ -191,7 +197,12 @@ class TestLinker(CUDATestCase):
|
|
191
197
|
|
192
198
|
msg = e.exception.args[0]
|
193
199
|
# Check the error message refers to the NVRTC compile
|
194
|
-
|
200
|
+
nvrtc_err_str = (
|
201
|
+
"NVRTC_ERROR_COMPILATION"
|
202
|
+
if config.CUDA_USE_NVIDIA_BINDING
|
203
|
+
else "NVRTC Compilation failure"
|
204
|
+
)
|
205
|
+
self.assertIn(nvrtc_err_str, msg)
|
195
206
|
# Check the expected error in the CUDA source is reported
|
196
207
|
self.assertIn('identifier "SYNTAX" is undefined', msg)
|
197
208
|
# Check the filename is reported correctly
|