numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +51 -16
- numba_cuda/numba/cuda/codegen.py +11 -9
- numba_cuda/numba/cuda/compiler.py +3 -39
- numba_cuda/numba/cuda/cuda_paths.py +20 -22
- numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
- numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
- numba_cuda/numba/cuda/decorators.py +18 -0
- numba_cuda/numba/cuda/dispatcher.py +1 -0
- numba_cuda/numba/cuda/flags.py +36 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
- numba_cuda/numba/cuda/target.py +55 -2
- numba_cuda/numba/cuda/testing.py +0 -22
- numba_cuda/numba/cuda/tests/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,12 @@
|
|
1
1
|
from numba.cuda.testing import unittest
|
2
2
|
from numba.cuda.testing import skip_on_cudasim
|
3
3
|
from numba.cuda.testing import CUDATestCase
|
4
|
-
from numba.cuda.cudadrv.driver import PyNvJitLinker
|
5
4
|
from numba.cuda import get_current_device
|
5
|
+
from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
|
6
6
|
|
7
7
|
from numba import cuda
|
8
8
|
from numba import config
|
9
|
-
from numba.tests.support import run_in_subprocess, override_config
|
10
9
|
|
11
|
-
try:
|
12
|
-
import pynvjitlink # noqa: F401
|
13
|
-
|
14
|
-
PYNVJITLINK_INSTALLED = True
|
15
|
-
except ImportError:
|
16
|
-
PYNVJITLINK_INSTALLED = False
|
17
|
-
|
18
|
-
|
19
|
-
import itertools
|
20
10
|
import os
|
21
11
|
import io
|
22
12
|
import contextlib
|
@@ -52,85 +42,13 @@ if TEST_BIN_DIR:
|
|
52
42
|
|
53
43
|
|
54
44
|
@unittest.skipIf(
|
55
|
-
not config.
|
56
|
-
|
45
|
+
not config.CUDA_USE_NVIDIA_BINDING
|
46
|
+
or not TEST_BIN_DIR
|
47
|
+
or not _have_nvjitlink(),
|
48
|
+
"NVIDIA cuda bindings not enabled or nvJitLink not installed or new enough (>12.3)",
|
57
49
|
)
|
58
50
|
@skip_on_cudasim("Linking unsupported in the simulator")
|
59
51
|
class TestLinker(CUDATestCase):
|
60
|
-
def test_nvjitlink_create(self):
|
61
|
-
patched_linker = PyNvJitLinker(cc=(7, 5))
|
62
|
-
assert "-arch=sm_75" in patched_linker.options
|
63
|
-
|
64
|
-
def test_nvjitlink_create_no_cc_error(self):
|
65
|
-
# nvJitLink expects at least the architecture to be specified.
|
66
|
-
with self.assertRaisesRegex(
|
67
|
-
RuntimeError, "PyNvJitLinker requires CC to be specified"
|
68
|
-
):
|
69
|
-
PyNvJitLinker()
|
70
|
-
|
71
|
-
def test_nvjitlink_invalid_arch_error(self):
|
72
|
-
from pynvjitlink.api import NvJitLinkError
|
73
|
-
|
74
|
-
# CC 0.0 is not a valid compute capability
|
75
|
-
with self.assertRaisesRegex(
|
76
|
-
NvJitLinkError, "NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"
|
77
|
-
):
|
78
|
-
PyNvJitLinker(cc=(0, 0))
|
79
|
-
|
80
|
-
def test_nvjitlink_invalid_cc_type_error(self):
|
81
|
-
with self.assertRaisesRegex(
|
82
|
-
TypeError, "`cc` must be a list or tuple of length 2"
|
83
|
-
):
|
84
|
-
PyNvJitLinker(cc=0)
|
85
|
-
|
86
|
-
def test_nvjitlink_ptx_compile_options(self):
|
87
|
-
max_registers = (None, 32)
|
88
|
-
lineinfo = (False, True)
|
89
|
-
lto = (False, True)
|
90
|
-
additional_flags = (None, ("-g",), ("-g", "-time"))
|
91
|
-
for (
|
92
|
-
max_registers_i,
|
93
|
-
line_info_i,
|
94
|
-
lto_i,
|
95
|
-
additional_flags_i,
|
96
|
-
) in itertools.product(max_registers, lineinfo, lto, additional_flags):
|
97
|
-
with self.subTest(
|
98
|
-
max_registers=max_registers_i,
|
99
|
-
lineinfo=line_info_i,
|
100
|
-
lto=lto_i,
|
101
|
-
additional_flags=additional_flags_i,
|
102
|
-
):
|
103
|
-
patched_linker = PyNvJitLinker(
|
104
|
-
cc=(7, 5),
|
105
|
-
max_registers=max_registers_i,
|
106
|
-
lineinfo=line_info_i,
|
107
|
-
lto=lto_i,
|
108
|
-
additional_flags=additional_flags_i,
|
109
|
-
)
|
110
|
-
assert "-arch=sm_75" in patched_linker.options
|
111
|
-
|
112
|
-
if max_registers_i:
|
113
|
-
assert (
|
114
|
-
f"-maxrregcount={max_registers_i}"
|
115
|
-
in patched_linker.options
|
116
|
-
)
|
117
|
-
else:
|
118
|
-
assert "-maxrregcount" not in patched_linker.options
|
119
|
-
|
120
|
-
if line_info_i:
|
121
|
-
assert "-lineinfo" in patched_linker.options
|
122
|
-
else:
|
123
|
-
assert "-lineinfo" not in patched_linker.options
|
124
|
-
|
125
|
-
if lto_i:
|
126
|
-
assert "-lto" in patched_linker.options
|
127
|
-
else:
|
128
|
-
assert "-lto" not in patched_linker.options
|
129
|
-
|
130
|
-
if additional_flags_i:
|
131
|
-
for flag in additional_flags_i:
|
132
|
-
assert flag in patched_linker.options
|
133
|
-
|
134
52
|
def test_nvjitlink_add_file_guess_ext_linkable_code(self):
|
135
53
|
files = (
|
136
54
|
test_device_functions_a,
|
@@ -142,24 +60,20 @@ class TestLinker(CUDATestCase):
|
|
142
60
|
)
|
143
61
|
for file in files:
|
144
62
|
with self.subTest(file=file):
|
145
|
-
|
146
|
-
|
147
|
-
)
|
148
|
-
patched_linker.add_file_guess_ext(file)
|
63
|
+
linker = _Linker(cc=get_current_device().compute_capability)
|
64
|
+
linker.add_file_guess_ext(file)
|
149
65
|
|
150
66
|
def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
|
151
67
|
with open(test_device_functions_cubin, "rb") as f:
|
152
68
|
content = f.read()
|
153
69
|
|
154
|
-
|
155
|
-
cc=get_current_device().compute_capability
|
156
|
-
)
|
70
|
+
linker = _Linker(cc=get_current_device().compute_capability)
|
157
71
|
with self.assertRaisesRegex(
|
158
72
|
TypeError, "Expected path to file or a LinkableCode"
|
159
73
|
):
|
160
74
|
# Feeding raw data as bytes to add_file_guess_ext should raise,
|
161
75
|
# because there's no way to know what kind of file to treat it as
|
162
|
-
|
76
|
+
linker.add_file_guess_ext(content)
|
163
77
|
|
164
78
|
def test_nvjitlink_jit_with_linkable_code(self):
|
165
79
|
files = (
|
@@ -261,77 +175,5 @@ class TestLinker(CUDATestCase):
|
|
261
175
|
pass
|
262
176
|
|
263
177
|
|
264
|
-
@unittest.skipIf(
|
265
|
-
not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
|
266
|
-
reason="pynvjitlink not enabled",
|
267
|
-
)
|
268
|
-
@skip_on_cudasim("Linking unsupported in the simulator")
|
269
|
-
class TestLinkerUsage(CUDATestCase):
|
270
|
-
"""Test that whether pynvjitlink can be enabled by both environment variable
|
271
|
-
and modification of config at runtime.
|
272
|
-
"""
|
273
|
-
|
274
|
-
src = """if 1:
|
275
|
-
import os
|
276
|
-
from numba import cuda, config
|
277
|
-
|
278
|
-
{config}
|
279
|
-
|
280
|
-
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
281
|
-
if TEST_BIN_DIR:
|
282
|
-
test_device_functions_cubin = os.path.join(
|
283
|
-
TEST_BIN_DIR, "test_device_functions.cubin"
|
284
|
-
)
|
285
|
-
|
286
|
-
sig = "uint32(uint32, uint32)"
|
287
|
-
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
288
|
-
|
289
|
-
@cuda.jit(link=[test_device_functions_cubin], lto=True)
|
290
|
-
def kernel(result):
|
291
|
-
result[0] = add_from_numba(1, 2)
|
292
|
-
|
293
|
-
result = cuda.device_array(1)
|
294
|
-
kernel[1, 1](result)
|
295
|
-
assert result[0] == 3
|
296
|
-
"""
|
297
|
-
|
298
|
-
def test_linker_enabled_envvar(self):
|
299
|
-
env = os.environ.copy()
|
300
|
-
env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
|
301
|
-
run_in_subprocess(self.src.format(config=""), env=env)
|
302
|
-
|
303
|
-
def test_linker_disabled_envvar(self):
|
304
|
-
env = os.environ.copy()
|
305
|
-
env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "0"
|
306
|
-
with self.assertRaisesRegex(
|
307
|
-
AssertionError, "LTO and additional flags require PyNvJitLinker"
|
308
|
-
):
|
309
|
-
# Actual error raised is `ValueError`, but `run_in_subprocess`
|
310
|
-
# reraises as AssertionError.
|
311
|
-
run_in_subprocess(self.src.format(config=""), env=env)
|
312
|
-
|
313
|
-
def test_linker_enabled_config(self):
|
314
|
-
env = os.environ.copy()
|
315
|
-
env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
|
316
|
-
run_in_subprocess(
|
317
|
-
self.src.format(config="config.CUDA_ENABLE_PYNVJITLINK = True"),
|
318
|
-
env=env,
|
319
|
-
)
|
320
|
-
|
321
|
-
def test_linker_disabled_config(self):
|
322
|
-
env = os.environ.copy()
|
323
|
-
env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
|
324
|
-
with override_config("CUDA_ENABLE_PYNVJITLINK", False):
|
325
|
-
with self.assertRaisesRegex(
|
326
|
-
AssertionError, "LTO and additional flags require PyNvJitLinker"
|
327
|
-
):
|
328
|
-
run_in_subprocess(
|
329
|
-
self.src.format(
|
330
|
-
config="config.CUDA_ENABLE_PYNVJITLINK = False"
|
331
|
-
),
|
332
|
-
env=env,
|
333
|
-
)
|
334
|
-
|
335
|
-
|
336
178
|
if __name__ == "__main__":
|
337
179
|
unittest.main()
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from numba.cuda.cudadrv import nvrtc
|
2
|
+
from numba.cuda.testing import skip_on_cudasim
|
3
|
+
|
4
|
+
import unittest
|
5
|
+
|
6
|
+
|
7
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
8
|
+
class TestArchOption(unittest.TestCase):
|
9
|
+
def test_get_arch_option(self):
|
10
|
+
# Test returning the nearest lowest arch.
|
11
|
+
self.assertEqual(nvrtc.get_arch_option(7, 5), "compute_75")
|
12
|
+
self.assertEqual(nvrtc.get_arch_option(7, 7), "compute_75")
|
13
|
+
self.assertEqual(nvrtc.get_arch_option(8, 5), "compute_80")
|
14
|
+
self.assertEqual(nvrtc.get_arch_option(9, 1), "compute_90")
|
15
|
+
# Test known arch.
|
16
|
+
supported_cc = nvrtc.NVRTC().get_supported_archs()
|
17
|
+
for arch in supported_cc:
|
18
|
+
self.assertEqual(
|
19
|
+
nvrtc.get_arch_option(*arch), "compute_%d%d" % arch
|
20
|
+
)
|
21
|
+
self.assertEqual(
|
22
|
+
nvrtc.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
if __name__ == "__main__":
|
27
|
+
unittest.main()
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import warnings
|
2
2
|
|
3
3
|
from llvmlite import ir
|
4
|
-
from numba.cuda.cudadrv import nvvm, runtime
|
4
|
+
from numba.cuda.cudadrv import nvrtc, nvvm, runtime
|
5
5
|
from numba.cuda.testing import unittest
|
6
6
|
from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
|
7
7
|
from numba.cuda.testing import skip_on_cudasim
|
@@ -30,7 +30,7 @@ class TestNvvmDriver(unittest.TestCase):
|
|
30
30
|
self.skipTest("-gen-lto unavailable in this toolkit version")
|
31
31
|
|
32
32
|
nvvmir = self.get_nvvmir()
|
33
|
-
arch = "compute_%d%d" %
|
33
|
+
arch = "compute_%d%d" % nvrtc.get_lowest_supported_cc()
|
34
34
|
ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
|
35
35
|
|
36
36
|
# Verify we correctly passed the option by checking if we got LTOIR
|
@@ -110,7 +110,7 @@ class TestNvvmDriver(unittest.TestCase):
|
|
110
110
|
|
111
111
|
def test_nvvm_support(self):
|
112
112
|
"""Test supported CC by NVVM"""
|
113
|
-
for arch in
|
113
|
+
for arch in nvrtc.get_supported_ccs():
|
114
114
|
self._test_nvvm_support(arch=arch)
|
115
115
|
|
116
116
|
def test_nvvm_warning(self):
|
@@ -135,22 +135,6 @@ class TestNvvmDriver(unittest.TestCase):
|
|
135
135
|
self.assertIn("overriding noinline attribute", str(w[0]))
|
136
136
|
|
137
137
|
|
138
|
-
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
139
|
-
class TestArchOption(unittest.TestCase):
|
140
|
-
def test_get_arch_option(self):
|
141
|
-
# Test returning the nearest lowest arch.
|
142
|
-
self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75")
|
143
|
-
self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75")
|
144
|
-
self.assertEqual(nvvm.get_arch_option(8, 8), "compute_87")
|
145
|
-
# Test known arch.
|
146
|
-
supported_cc = nvvm.get_supported_ccs()
|
147
|
-
for arch in supported_cc:
|
148
|
-
self.assertEqual(nvvm.get_arch_option(*arch), "compute_%d%d" % arch)
|
149
|
-
self.assertEqual(
|
150
|
-
nvvm.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
|
151
|
-
)
|
152
|
-
|
153
|
-
|
154
138
|
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
155
139
|
class TestLibDevice(unittest.TestCase):
|
156
140
|
def test_libdevice_load(self):
|
@@ -1,9 +1,6 @@
|
|
1
1
|
import multiprocessing
|
2
2
|
import os
|
3
|
-
from numba.
|
4
|
-
from numba.cuda.cudadrv.runtime import runtime
|
5
|
-
from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim
|
6
|
-
from unittest.mock import patch
|
3
|
+
from numba.cuda.testing import unittest, SerialMixin
|
7
4
|
|
8
5
|
|
9
6
|
def set_visible_devices_and_check(q):
|
@@ -18,39 +15,6 @@ def set_visible_devices_and_check(q):
|
|
18
15
|
q.put(-1)
|
19
16
|
|
20
17
|
|
21
|
-
if config.ENABLE_CUDASIM:
|
22
|
-
SUPPORTED_VERSIONS = ((-1, -1),)
|
23
|
-
else:
|
24
|
-
SUPPORTED_VERSIONS = (
|
25
|
-
(11, 0),
|
26
|
-
(11, 1),
|
27
|
-
(11, 2),
|
28
|
-
(11, 3),
|
29
|
-
(11, 4),
|
30
|
-
(11, 5),
|
31
|
-
(11, 6),
|
32
|
-
(11, 7),
|
33
|
-
)
|
34
|
-
|
35
|
-
|
36
|
-
class TestRuntime(unittest.TestCase):
|
37
|
-
def test_is_supported_version_true(self):
|
38
|
-
for v in SUPPORTED_VERSIONS:
|
39
|
-
with patch.object(runtime, "get_version", return_value=v):
|
40
|
-
self.assertTrue(runtime.is_supported_version())
|
41
|
-
|
42
|
-
@skip_on_cudasim("The simulator always simulates a supported runtime")
|
43
|
-
def test_is_supported_version_false(self):
|
44
|
-
# Check with an old unsupported version and some potential future
|
45
|
-
# versions
|
46
|
-
for v in ((10, 2), (11, 8), (12, 0)):
|
47
|
-
with patch.object(runtime, "get_version", return_value=v):
|
48
|
-
self.assertFalse(runtime.is_supported_version())
|
49
|
-
|
50
|
-
def test_supported_versions(self):
|
51
|
-
self.assertEqual(SUPPORTED_VERSIONS, runtime.supported_versions)
|
52
|
-
|
53
|
-
|
54
18
|
class TestVisibleDevices(unittest.TestCase, SerialMixin):
|
55
19
|
def test_visible_devices_set_after_import(self):
|
56
20
|
# See Issue #6149. This test checks that we can set
|
@@ -265,7 +265,7 @@ class TestCompileForCurrentDevice(CUDATestCase):
|
|
265
265
|
# Check we target the current device's compute capability, or the
|
266
266
|
# closest compute capability supported by the current toolkit.
|
267
267
|
device_cc = cuda.get_current_device().compute_capability
|
268
|
-
cc = cuda.cudadrv.
|
268
|
+
cc = cuda.cudadrv.nvrtc.find_closest_arch(device_cc)
|
269
269
|
target = f".target sm_{cc[0]}{cc[1]}"
|
270
270
|
self.assertIn(target, ptx)
|
271
271
|
|
@@ -14,15 +14,6 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
14
14
|
These tests only checks the compiled PTX for debuginfo section
|
15
15
|
"""
|
16
16
|
|
17
|
-
def setUp(self):
|
18
|
-
super().setUp()
|
19
|
-
# If we're using LTO then we can't check the PTX in these tests,
|
20
|
-
# because we produce LTO-IR, which is opaque to the user.
|
21
|
-
# Additionally, LTO optimizes away the exception status due to an
|
22
|
-
# oversight in the way we generate it (it is not added to the used
|
23
|
-
# list).
|
24
|
-
self.skip_if_lto("Exceptions not supported with LTO")
|
25
|
-
|
26
17
|
def _getasm(self, fn, sig):
|
27
18
|
fn.compile(sig)
|
28
19
|
return fn.inspect_asm(sig)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from numba import cuda
|
2
2
|
from numba.core.errors import TypingError
|
3
3
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
4
|
+
from numba import config
|
4
5
|
|
5
6
|
|
6
7
|
def noop(x):
|
@@ -89,6 +90,19 @@ class TestJitErrors(CUDATestCase):
|
|
89
90
|
self.assertIn("resolving callee type: type(CUDADispatcher", excstr)
|
90
91
|
self.assertIn("NameError: name 'floor' is not defined", excstr)
|
91
92
|
|
93
|
+
@skip_on_cudasim("Simulator does not use pynvjitlink")
|
94
|
+
@unittest.skipIf(
|
95
|
+
config.CUDA_USE_NVIDIA_BINDING, "NVIDIA cuda bindings enabled"
|
96
|
+
)
|
97
|
+
def test_lto_without_nvjitlink_error(self):
|
98
|
+
with self.assertRaisesRegex(RuntimeError, "LTO requires nvjitlink"):
|
99
|
+
|
100
|
+
@cuda.jit(lto=True)
|
101
|
+
def f():
|
102
|
+
pass
|
103
|
+
|
104
|
+
f[1, 1]()
|
105
|
+
|
92
106
|
|
93
107
|
if __name__ == "__main__":
|
94
108
|
unittest.main()
|
@@ -6,12 +6,6 @@ from numba.core import config
|
|
6
6
|
|
7
7
|
|
8
8
|
class TestException(CUDATestCase):
|
9
|
-
def setUp(self):
|
10
|
-
super().setUp()
|
11
|
-
# LTO optimizes away the exception status due to an oversight
|
12
|
-
# in the way we generate it (it is not added to the used list).
|
13
|
-
self.skip_if_lto("Exceptions not supported with LTO")
|
14
|
-
|
15
9
|
def test_exception(self):
|
16
10
|
def foo(ary):
|
17
11
|
x = cuda.threadIdx.x
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
2
|
+
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
2
3
|
from llvmlite import ir
|
3
4
|
|
4
5
|
import numpy as np
|
@@ -210,7 +211,7 @@ class TestExtendingLinkage(CUDATestCase):
|
|
210
211
|
(test_device_functions_ltoir, cuda.LTOIR),
|
211
212
|
)
|
212
213
|
|
213
|
-
lto =
|
214
|
+
lto = _have_nvjitlink()
|
214
215
|
|
215
216
|
for path, ctor in files:
|
216
217
|
if ctor == cuda.LTOIR and not lto:
|
@@ -188,10 +188,6 @@ class TestFastMathOption(CUDATestCase):
|
|
188
188
|
)
|
189
189
|
|
190
190
|
def test_divf_exception(self):
|
191
|
-
# LTO optimizes away the exception status due to an oversight
|
192
|
-
# in the way we generate it (it is not added to the used list).
|
193
|
-
self.skip_if_lto("Exceptions not supported with LTO")
|
194
|
-
|
195
191
|
def f10(r, x, y):
|
196
192
|
r[0] = x / y
|
197
193
|
|
@@ -198,6 +198,24 @@ class TestCudaLineInfo(CUDATestCase):
|
|
198
198
|
"debug and lineinfo are mutually exclusive", str(w[0].message)
|
199
199
|
)
|
200
200
|
|
201
|
+
def test_lineinfo_with_compile_internal(self):
|
202
|
+
# Calling a function implemented using compile_internal should not
|
203
|
+
# enable full debug info generation. See Numba-CUDA Issue #271,
|
204
|
+
# https://github.com/NVIDIA/numba-cuda/issues/271
|
205
|
+
|
206
|
+
@cuda.jit("void(complex128[::1], complex128[::1])", lineinfo=True)
|
207
|
+
def complex_abs_use(r, x):
|
208
|
+
r[0] = abs(x[0])
|
209
|
+
|
210
|
+
cc = cuda.get_current_device().compute_capability
|
211
|
+
ov = complex_abs_use.overloads[complex_abs_use.signatures[0]]
|
212
|
+
ptx = ov.inspect_asm(cc)
|
213
|
+
|
214
|
+
target = ".target sm_%s%s" % cc
|
215
|
+
target_debug = f"{target}, debug"
|
216
|
+
self.assertIn(target, ptx)
|
217
|
+
self.assertNotIn(target_debug, ptx)
|
218
|
+
|
201
219
|
|
202
220
|
if __name__ == "__main__":
|
203
221
|
unittest.main()
|
@@ -13,13 +13,6 @@ regex_pattern = (
|
|
13
13
|
|
14
14
|
|
15
15
|
class TestUserExc(CUDATestCase):
|
16
|
-
def setUp(self):
|
17
|
-
super().setUp()
|
18
|
-
# LTO optimizes away the exception status due to an oversight
|
19
|
-
# in the way we generate it (it is not added to the used list).
|
20
|
-
# See https://github.com/numba/numba/issues/9526.
|
21
|
-
self.skip_if_lto("Exceptions not supported with LTO")
|
22
|
-
|
23
16
|
def test_user_exception(self):
|
24
17
|
@cuda.jit("void(int32)", debug=True, opt=False)
|
25
18
|
def test_exc(x):
|
@@ -169,7 +169,16 @@ class TestNrtLinking(CUDATestCase):
|
|
169
169
|
cc = get_current_device().compute_capability
|
170
170
|
ptx, _ = compile(src, "external_nrt.cu", cc)
|
171
171
|
|
172
|
-
@cuda.jit(
|
172
|
+
@cuda.jit(
|
173
|
+
link=[
|
174
|
+
PTXSource(
|
175
|
+
ptx.code
|
176
|
+
if config.CUDA_USE_NVIDIA_BINDING
|
177
|
+
else ptx.encode(),
|
178
|
+
nrt=True,
|
179
|
+
)
|
180
|
+
]
|
181
|
+
)
|
173
182
|
def kernel():
|
174
183
|
allocate_deallocate_handle()
|
175
184
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.0
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -14,16 +14,19 @@ License-File: LICENSE
|
|
14
14
|
Requires-Dist: numba>=0.59.1
|
15
15
|
Provides-Extra: cu11
|
16
16
|
Requires-Dist: cuda-bindings==11.8.*; extra == "cu11"
|
17
|
+
Requires-Dist: cuda-core==0.3.*; extra == "cu11"
|
17
18
|
Requires-Dist: cuda-python==11.8.*; extra == "cu11"
|
18
19
|
Requires-Dist: nvidia-cuda-nvcc-cu11; extra == "cu11"
|
19
20
|
Requires-Dist: nvidia-cuda-runtime-cu11; extra == "cu11"
|
20
21
|
Requires-Dist: nvidia-cuda-nvrtc-cu11; extra == "cu11"
|
21
22
|
Provides-Extra: cu12
|
22
23
|
Requires-Dist: cuda-bindings==12.9.*; extra == "cu12"
|
24
|
+
Requires-Dist: cuda-core==0.3.*; extra == "cu12"
|
23
25
|
Requires-Dist: cuda-python==12.9.*; extra == "cu12"
|
24
26
|
Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
|
25
27
|
Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
|
26
28
|
Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
|
29
|
+
Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
|
27
30
|
Provides-Extra: test
|
28
31
|
Requires-Dist: psutil; extra == "test"
|
29
32
|
Requires-Dist: cffi; extra == "test"
|
@@ -51,17 +54,10 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
|
51
54
|
To raise questions or initiate discussions, please use the [Numba Discourse
|
52
55
|
forum](https://numba.discourse.group).
|
53
56
|
|
54
|
-
## Installation with pip
|
57
|
+
## Installation with pip or conda
|
55
58
|
|
56
|
-
|
57
|
-
pip install numba-cuda
|
58
|
-
```
|
59
|
-
|
60
|
-
## Installation with Conda
|
59
|
+
Please refer to the [Installation documentation](https://nvidia.github.io/numba-cuda/user/installation.html#installation-with-a-python-package-manager).
|
61
60
|
|
62
|
-
```shell
|
63
|
-
conda install -c conda-forge numba-cuda
|
64
|
-
```
|
65
61
|
|
66
62
|
## Installation from source
|
67
63
|
|
@@ -71,6 +67,8 @@ Install as an editable install:
|
|
71
67
|
pip install -e .
|
72
68
|
```
|
73
69
|
|
70
|
+
If you want to manage all run-time dependencies yourself, also pass the `--no-deps` flag.
|
71
|
+
|
74
72
|
## Running tests
|
75
73
|
|
76
74
|
```
|