numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +51 -16
  3. numba_cuda/numba/cuda/codegen.py +11 -9
  4. numba_cuda/numba/cuda/compiler.py +3 -39
  5. numba_cuda/numba/cuda/cuda_paths.py +20 -22
  6. numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
  7. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  8. numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
  9. numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
  10. numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
  11. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
  12. numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
  13. numba_cuda/numba/cuda/decorators.py +18 -0
  14. numba_cuda/numba/cuda/dispatcher.py +1 -0
  15. numba_cuda/numba/cuda/flags.py +36 -0
  16. numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
  17. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
  18. numba_cuda/numba/cuda/target.py +55 -2
  19. numba_cuda/numba/cuda/testing.py +0 -22
  20. numba_cuda/numba/cuda/tests/__init__.py +0 -2
  21. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
  22. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
  23. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
  24. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
  25. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
  26. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
  27. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
  28. numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
  29. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
  30. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
  31. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
  32. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
  33. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  34. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
  35. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
  36. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
  37. numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
  38. numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
  39. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
  40. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
  41. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
  42. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
  43. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
  44. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,12 @@
1
1
  from numba.cuda.testing import unittest
2
2
  from numba.cuda.testing import skip_on_cudasim
3
3
  from numba.cuda.testing import CUDATestCase
4
- from numba.cuda.cudadrv.driver import PyNvJitLinker
5
4
  from numba.cuda import get_current_device
5
+ from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
6
6
 
7
7
  from numba import cuda
8
8
  from numba import config
9
- from numba.tests.support import run_in_subprocess, override_config
10
9
 
11
- try:
12
- import pynvjitlink # noqa: F401
13
-
14
- PYNVJITLINK_INSTALLED = True
15
- except ImportError:
16
- PYNVJITLINK_INSTALLED = False
17
-
18
-
19
- import itertools
20
10
  import os
21
11
  import io
22
12
  import contextlib
@@ -52,85 +42,13 @@ if TEST_BIN_DIR:
52
42
 
53
43
 
54
44
  @unittest.skipIf(
55
- not config.CUDA_ENABLE_PYNVJITLINK or not TEST_BIN_DIR,
56
- "pynvjitlink not enabled",
45
+ not config.CUDA_USE_NVIDIA_BINDING
46
+ or not TEST_BIN_DIR
47
+ or not _have_nvjitlink(),
48
+ "NVIDIA cuda bindings not enabled or nvJitLink not installed or new enough (>12.3)",
57
49
  )
58
50
  @skip_on_cudasim("Linking unsupported in the simulator")
59
51
  class TestLinker(CUDATestCase):
60
- def test_nvjitlink_create(self):
61
- patched_linker = PyNvJitLinker(cc=(7, 5))
62
- assert "-arch=sm_75" in patched_linker.options
63
-
64
- def test_nvjitlink_create_no_cc_error(self):
65
- # nvJitLink expects at least the architecture to be specified.
66
- with self.assertRaisesRegex(
67
- RuntimeError, "PyNvJitLinker requires CC to be specified"
68
- ):
69
- PyNvJitLinker()
70
-
71
- def test_nvjitlink_invalid_arch_error(self):
72
- from pynvjitlink.api import NvJitLinkError
73
-
74
- # CC 0.0 is not a valid compute capability
75
- with self.assertRaisesRegex(
76
- NvJitLinkError, "NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"
77
- ):
78
- PyNvJitLinker(cc=(0, 0))
79
-
80
- def test_nvjitlink_invalid_cc_type_error(self):
81
- with self.assertRaisesRegex(
82
- TypeError, "`cc` must be a list or tuple of length 2"
83
- ):
84
- PyNvJitLinker(cc=0)
85
-
86
- def test_nvjitlink_ptx_compile_options(self):
87
- max_registers = (None, 32)
88
- lineinfo = (False, True)
89
- lto = (False, True)
90
- additional_flags = (None, ("-g",), ("-g", "-time"))
91
- for (
92
- max_registers_i,
93
- line_info_i,
94
- lto_i,
95
- additional_flags_i,
96
- ) in itertools.product(max_registers, lineinfo, lto, additional_flags):
97
- with self.subTest(
98
- max_registers=max_registers_i,
99
- lineinfo=line_info_i,
100
- lto=lto_i,
101
- additional_flags=additional_flags_i,
102
- ):
103
- patched_linker = PyNvJitLinker(
104
- cc=(7, 5),
105
- max_registers=max_registers_i,
106
- lineinfo=line_info_i,
107
- lto=lto_i,
108
- additional_flags=additional_flags_i,
109
- )
110
- assert "-arch=sm_75" in patched_linker.options
111
-
112
- if max_registers_i:
113
- assert (
114
- f"-maxrregcount={max_registers_i}"
115
- in patched_linker.options
116
- )
117
- else:
118
- assert "-maxrregcount" not in patched_linker.options
119
-
120
- if line_info_i:
121
- assert "-lineinfo" in patched_linker.options
122
- else:
123
- assert "-lineinfo" not in patched_linker.options
124
-
125
- if lto_i:
126
- assert "-lto" in patched_linker.options
127
- else:
128
- assert "-lto" not in patched_linker.options
129
-
130
- if additional_flags_i:
131
- for flag in additional_flags_i:
132
- assert flag in patched_linker.options
133
-
134
52
  def test_nvjitlink_add_file_guess_ext_linkable_code(self):
135
53
  files = (
136
54
  test_device_functions_a,
@@ -142,24 +60,20 @@ class TestLinker(CUDATestCase):
142
60
  )
143
61
  for file in files:
144
62
  with self.subTest(file=file):
145
- patched_linker = PyNvJitLinker(
146
- cc=get_current_device().compute_capability
147
- )
148
- patched_linker.add_file_guess_ext(file)
63
+ linker = _Linker(cc=get_current_device().compute_capability)
64
+ linker.add_file_guess_ext(file)
149
65
 
150
66
  def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
151
67
  with open(test_device_functions_cubin, "rb") as f:
152
68
  content = f.read()
153
69
 
154
- patched_linker = PyNvJitLinker(
155
- cc=get_current_device().compute_capability
156
- )
70
+ linker = _Linker(cc=get_current_device().compute_capability)
157
71
  with self.assertRaisesRegex(
158
72
  TypeError, "Expected path to file or a LinkableCode"
159
73
  ):
160
74
  # Feeding raw data as bytes to add_file_guess_ext should raise,
161
75
  # because there's no way to know what kind of file to treat it as
162
- patched_linker.add_file_guess_ext(content)
76
+ linker.add_file_guess_ext(content)
163
77
 
164
78
  def test_nvjitlink_jit_with_linkable_code(self):
165
79
  files = (
@@ -261,77 +175,5 @@ class TestLinker(CUDATestCase):
261
175
  pass
262
176
 
263
177
 
264
- @unittest.skipIf(
265
- not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
266
- reason="pynvjitlink not enabled",
267
- )
268
- @skip_on_cudasim("Linking unsupported in the simulator")
269
- class TestLinkerUsage(CUDATestCase):
270
- """Test that whether pynvjitlink can be enabled by both environment variable
271
- and modification of config at runtime.
272
- """
273
-
274
- src = """if 1:
275
- import os
276
- from numba import cuda, config
277
-
278
- {config}
279
-
280
- TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
281
- if TEST_BIN_DIR:
282
- test_device_functions_cubin = os.path.join(
283
- TEST_BIN_DIR, "test_device_functions.cubin"
284
- )
285
-
286
- sig = "uint32(uint32, uint32)"
287
- add_from_numba = cuda.declare_device("add_from_numba", sig)
288
-
289
- @cuda.jit(link=[test_device_functions_cubin], lto=True)
290
- def kernel(result):
291
- result[0] = add_from_numba(1, 2)
292
-
293
- result = cuda.device_array(1)
294
- kernel[1, 1](result)
295
- assert result[0] == 3
296
- """
297
-
298
- def test_linker_enabled_envvar(self):
299
- env = os.environ.copy()
300
- env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
301
- run_in_subprocess(self.src.format(config=""), env=env)
302
-
303
- def test_linker_disabled_envvar(self):
304
- env = os.environ.copy()
305
- env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "0"
306
- with self.assertRaisesRegex(
307
- AssertionError, "LTO and additional flags require PyNvJitLinker"
308
- ):
309
- # Actual error raised is `ValueError`, but `run_in_subprocess`
310
- # reraises as AssertionError.
311
- run_in_subprocess(self.src.format(config=""), env=env)
312
-
313
- def test_linker_enabled_config(self):
314
- env = os.environ.copy()
315
- env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
316
- run_in_subprocess(
317
- self.src.format(config="config.CUDA_ENABLE_PYNVJITLINK = True"),
318
- env=env,
319
- )
320
-
321
- def test_linker_disabled_config(self):
322
- env = os.environ.copy()
323
- env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
324
- with override_config("CUDA_ENABLE_PYNVJITLINK", False):
325
- with self.assertRaisesRegex(
326
- AssertionError, "LTO and additional flags require PyNvJitLinker"
327
- ):
328
- run_in_subprocess(
329
- self.src.format(
330
- config="config.CUDA_ENABLE_PYNVJITLINK = False"
331
- ),
332
- env=env,
333
- )
334
-
335
-
336
178
  if __name__ == "__main__":
337
179
  unittest.main()
@@ -0,0 +1,27 @@
1
+ from numba.cuda.cudadrv import nvrtc
2
+ from numba.cuda.testing import skip_on_cudasim
3
+
4
+ import unittest
5
+
6
+
7
+ @skip_on_cudasim("NVVM Driver unsupported in the simulator")
8
+ class TestArchOption(unittest.TestCase):
9
+ def test_get_arch_option(self):
10
+ # Test returning the nearest lowest arch.
11
+ self.assertEqual(nvrtc.get_arch_option(7, 5), "compute_75")
12
+ self.assertEqual(nvrtc.get_arch_option(7, 7), "compute_75")
13
+ self.assertEqual(nvrtc.get_arch_option(8, 5), "compute_80")
14
+ self.assertEqual(nvrtc.get_arch_option(9, 1), "compute_90")
15
+ # Test known arch.
16
+ supported_cc = nvrtc.NVRTC().get_supported_archs()
17
+ for arch in supported_cc:
18
+ self.assertEqual(
19
+ nvrtc.get_arch_option(*arch), "compute_%d%d" % arch
20
+ )
21
+ self.assertEqual(
22
+ nvrtc.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
23
+ )
24
+
25
+
26
+ if __name__ == "__main__":
27
+ unittest.main()
@@ -1,7 +1,7 @@
1
1
  import warnings
2
2
 
3
3
  from llvmlite import ir
4
- from numba.cuda.cudadrv import nvvm, runtime
4
+ from numba.cuda.cudadrv import nvrtc, nvvm, runtime
5
5
  from numba.cuda.testing import unittest
6
6
  from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
7
7
  from numba.cuda.testing import skip_on_cudasim
@@ -30,7 +30,7 @@ class TestNvvmDriver(unittest.TestCase):
30
30
  self.skipTest("-gen-lto unavailable in this toolkit version")
31
31
 
32
32
  nvvmir = self.get_nvvmir()
33
- arch = "compute_%d%d" % nvvm.LOWEST_CURRENT_CC
33
+ arch = "compute_%d%d" % nvrtc.get_lowest_supported_cc()
34
34
  ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
35
35
 
36
36
  # Verify we correctly passed the option by checking if we got LTOIR
@@ -110,7 +110,7 @@ class TestNvvmDriver(unittest.TestCase):
110
110
 
111
111
  def test_nvvm_support(self):
112
112
  """Test supported CC by NVVM"""
113
- for arch in nvvm.get_supported_ccs():
113
+ for arch in nvrtc.get_supported_ccs():
114
114
  self._test_nvvm_support(arch=arch)
115
115
 
116
116
  def test_nvvm_warning(self):
@@ -135,22 +135,6 @@ class TestNvvmDriver(unittest.TestCase):
135
135
  self.assertIn("overriding noinline attribute", str(w[0]))
136
136
 
137
137
 
138
- @skip_on_cudasim("NVVM Driver unsupported in the simulator")
139
- class TestArchOption(unittest.TestCase):
140
- def test_get_arch_option(self):
141
- # Test returning the nearest lowest arch.
142
- self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75")
143
- self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75")
144
- self.assertEqual(nvvm.get_arch_option(8, 8), "compute_87")
145
- # Test known arch.
146
- supported_cc = nvvm.get_supported_ccs()
147
- for arch in supported_cc:
148
- self.assertEqual(nvvm.get_arch_option(*arch), "compute_%d%d" % arch)
149
- self.assertEqual(
150
- nvvm.get_arch_option(1000, 0), "compute_%d%d" % supported_cc[-1]
151
- )
152
-
153
-
154
138
  @skip_on_cudasim("NVVM Driver unsupported in the simulator")
155
139
  class TestLibDevice(unittest.TestCase):
156
140
  def test_libdevice_load(self):
@@ -1,9 +1,6 @@
1
1
  import multiprocessing
2
2
  import os
3
- from numba.core import config
4
- from numba.cuda.cudadrv.runtime import runtime
5
- from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim
6
- from unittest.mock import patch
3
+ from numba.cuda.testing import unittest, SerialMixin
7
4
 
8
5
 
9
6
  def set_visible_devices_and_check(q):
@@ -18,39 +15,6 @@ def set_visible_devices_and_check(q):
18
15
  q.put(-1)
19
16
 
20
17
 
21
- if config.ENABLE_CUDASIM:
22
- SUPPORTED_VERSIONS = ((-1, -1),)
23
- else:
24
- SUPPORTED_VERSIONS = (
25
- (11, 0),
26
- (11, 1),
27
- (11, 2),
28
- (11, 3),
29
- (11, 4),
30
- (11, 5),
31
- (11, 6),
32
- (11, 7),
33
- )
34
-
35
-
36
- class TestRuntime(unittest.TestCase):
37
- def test_is_supported_version_true(self):
38
- for v in SUPPORTED_VERSIONS:
39
- with patch.object(runtime, "get_version", return_value=v):
40
- self.assertTrue(runtime.is_supported_version())
41
-
42
- @skip_on_cudasim("The simulator always simulates a supported runtime")
43
- def test_is_supported_version_false(self):
44
- # Check with an old unsupported version and some potential future
45
- # versions
46
- for v in ((10, 2), (11, 8), (12, 0)):
47
- with patch.object(runtime, "get_version", return_value=v):
48
- self.assertFalse(runtime.is_supported_version())
49
-
50
- def test_supported_versions(self):
51
- self.assertEqual(SUPPORTED_VERSIONS, runtime.supported_versions)
52
-
53
-
54
18
  class TestVisibleDevices(unittest.TestCase, SerialMixin):
55
19
  def test_visible_devices_set_after_import(self):
56
20
  # See Issue #6149. This test checks that we can set
@@ -1,8 +1,6 @@
1
- from numba.cuda.testing import ensure_supported_ccs_initialized
2
1
  from numba.cuda.tests import load_testsuite
3
2
  import os
4
3
 
5
4
 
6
5
  def load_tests(loader, tests, pattern):
7
- ensure_supported_ccs_initialized()
8
6
  return load_testsuite(loader, os.path.dirname(__file__))
@@ -265,7 +265,7 @@ class TestCompileForCurrentDevice(CUDATestCase):
265
265
  # Check we target the current device's compute capability, or the
266
266
  # closest compute capability supported by the current toolkit.
267
267
  device_cc = cuda.get_current_device().compute_capability
268
- cc = cuda.cudadrv.nvvm.find_closest_arch(device_cc)
268
+ cc = cuda.cudadrv.nvrtc.find_closest_arch(device_cc)
269
269
  target = f".target sm_{cc[0]}{cc[1]}"
270
270
  self.assertIn(target, ptx)
271
271
 
@@ -14,15 +14,6 @@ class TestCudaDebugInfo(CUDATestCase):
14
14
  These tests only checks the compiled PTX for debuginfo section
15
15
  """
16
16
 
17
- def setUp(self):
18
- super().setUp()
19
- # If we're using LTO then we can't check the PTX in these tests,
20
- # because we produce LTO-IR, which is opaque to the user.
21
- # Additionally, LTO optimizes away the exception status due to an
22
- # oversight in the way we generate it (it is not added to the used
23
- # list).
24
- self.skip_if_lto("Exceptions not supported with LTO")
25
-
26
17
  def _getasm(self, fn, sig):
27
18
  fn.compile(sig)
28
19
  return fn.inspect_asm(sig)
@@ -1,6 +1,7 @@
1
1
  from numba import cuda
2
2
  from numba.core.errors import TypingError
3
3
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
4
+ from numba import config
4
5
 
5
6
 
6
7
  def noop(x):
@@ -89,6 +90,19 @@ class TestJitErrors(CUDATestCase):
89
90
  self.assertIn("resolving callee type: type(CUDADispatcher", excstr)
90
91
  self.assertIn("NameError: name 'floor' is not defined", excstr)
91
92
 
93
+ @skip_on_cudasim("Simulator does not use pynvjitlink")
94
+ @unittest.skipIf(
95
+ config.CUDA_USE_NVIDIA_BINDING, "NVIDIA cuda bindings enabled"
96
+ )
97
+ def test_lto_without_nvjitlink_error(self):
98
+ with self.assertRaisesRegex(RuntimeError, "LTO requires nvjitlink"):
99
+
100
+ @cuda.jit(lto=True)
101
+ def f():
102
+ pass
103
+
104
+ f[1, 1]()
105
+
92
106
 
93
107
  if __name__ == "__main__":
94
108
  unittest.main()
@@ -6,12 +6,6 @@ from numba.core import config
6
6
 
7
7
 
8
8
  class TestException(CUDATestCase):
9
- def setUp(self):
10
- super().setUp()
11
- # LTO optimizes away the exception status due to an oversight
12
- # in the way we generate it (it is not added to the used list).
13
- self.skip_if_lto("Exceptions not supported with LTO")
14
-
15
9
  def test_exception(self):
16
10
  def foo(ary):
17
11
  x = cuda.threadIdx.x
@@ -1,4 +1,5 @@
1
1
  from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
2
+ from numba.cuda.cudadrv.driver import _have_nvjitlink
2
3
  from llvmlite import ir
3
4
 
4
5
  import numpy as np
@@ -210,7 +211,7 @@ class TestExtendingLinkage(CUDATestCase):
210
211
  (test_device_functions_ltoir, cuda.LTOIR),
211
212
  )
212
213
 
213
- lto = config.CUDA_ENABLE_PYNVJITLINK
214
+ lto = _have_nvjitlink()
214
215
 
215
216
  for path, ctor in files:
216
217
  if ctor == cuda.LTOIR and not lto:
@@ -188,10 +188,6 @@ class TestFastMathOption(CUDATestCase):
188
188
  )
189
189
 
190
190
  def test_divf_exception(self):
191
- # LTO optimizes away the exception status due to an oversight
192
- # in the way we generate it (it is not added to the used list).
193
- self.skip_if_lto("Exceptions not supported with LTO")
194
-
195
191
  def f10(r, x, y):
196
192
  r[0] = x / y
197
193
 
@@ -198,6 +198,24 @@ class TestCudaLineInfo(CUDATestCase):
198
198
  "debug and lineinfo are mutually exclusive", str(w[0].message)
199
199
  )
200
200
 
201
+ def test_lineinfo_with_compile_internal(self):
202
+ # Calling a function implemented using compile_internal should not
203
+ # enable full debug info generation. See Numba-CUDA Issue #271,
204
+ # https://github.com/NVIDIA/numba-cuda/issues/271
205
+
206
+ @cuda.jit("void(complex128[::1], complex128[::1])", lineinfo=True)
207
+ def complex_abs_use(r, x):
208
+ r[0] = abs(x[0])
209
+
210
+ cc = cuda.get_current_device().compute_capability
211
+ ov = complex_abs_use.overloads[complex_abs_use.signatures[0]]
212
+ ptx = ov.inspect_asm(cc)
213
+
214
+ target = ".target sm_%s%s" % cc
215
+ target_debug = f"{target}, debug"
216
+ self.assertIn(target, ptx)
217
+ self.assertNotIn(target_debug, ptx)
218
+
201
219
 
202
220
  if __name__ == "__main__":
203
221
  unittest.main()
@@ -13,13 +13,6 @@ regex_pattern = (
13
13
 
14
14
 
15
15
  class TestUserExc(CUDATestCase):
16
- def setUp(self):
17
- super().setUp()
18
- # LTO optimizes away the exception status due to an oversight
19
- # in the way we generate it (it is not added to the used list).
20
- # See https://github.com/numba/numba/issues/9526.
21
- self.skip_if_lto("Exceptions not supported with LTO")
22
-
23
16
  def test_user_exception(self):
24
17
  @cuda.jit("void(int32)", debug=True, opt=False)
25
18
  def test_exc(x):
@@ -1,8 +1,6 @@
1
- from numba.cuda.testing import ensure_supported_ccs_initialized
2
1
  from numba.cuda.tests import load_testsuite
3
2
  import os
4
3
 
5
4
 
6
5
  def load_tests(loader, tests, pattern):
7
- ensure_supported_ccs_initialized()
8
6
  return load_testsuite(loader, os.path.dirname(__file__))
@@ -1,8 +1,6 @@
1
- from numba.cuda.testing import ensure_supported_ccs_initialized
2
1
  from numba.cuda.tests import load_testsuite
3
2
  import os
4
3
 
5
4
 
6
5
  def load_tests(loader, tests, pattern):
7
- ensure_supported_ccs_initialized()
8
6
  return load_testsuite(loader, os.path.dirname(__file__))
@@ -169,7 +169,16 @@ class TestNrtLinking(CUDATestCase):
169
169
  cc = get_current_device().compute_capability
170
170
  ptx, _ = compile(src, "external_nrt.cu", cc)
171
171
 
172
- @cuda.jit(link=[PTXSource(ptx.encode(), nrt=True)])
172
+ @cuda.jit(
173
+ link=[
174
+ PTXSource(
175
+ ptx.code
176
+ if config.CUDA_USE_NVIDIA_BINDING
177
+ else ptx.encode(),
178
+ nrt=True,
179
+ )
180
+ ]
181
+ )
173
182
  def kernel():
174
183
  allocate_deallocate_handle()
175
184
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.15.1
3
+ Version: 0.16.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -14,16 +14,19 @@ License-File: LICENSE
14
14
  Requires-Dist: numba>=0.59.1
15
15
  Provides-Extra: cu11
16
16
  Requires-Dist: cuda-bindings==11.8.*; extra == "cu11"
17
+ Requires-Dist: cuda-core==0.3.*; extra == "cu11"
17
18
  Requires-Dist: cuda-python==11.8.*; extra == "cu11"
18
19
  Requires-Dist: nvidia-cuda-nvcc-cu11; extra == "cu11"
19
20
  Requires-Dist: nvidia-cuda-runtime-cu11; extra == "cu11"
20
21
  Requires-Dist: nvidia-cuda-nvrtc-cu11; extra == "cu11"
21
22
  Provides-Extra: cu12
22
23
  Requires-Dist: cuda-bindings==12.9.*; extra == "cu12"
24
+ Requires-Dist: cuda-core==0.3.*; extra == "cu12"
23
25
  Requires-Dist: cuda-python==12.9.*; extra == "cu12"
24
26
  Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
25
27
  Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
26
28
  Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
29
+ Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
27
30
  Provides-Extra: test
28
31
  Requires-Dist: psutil; extra == "test"
29
32
  Requires-Dist: cffi; extra == "test"
@@ -51,17 +54,10 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
51
54
  To raise questions or initiate discussions, please use the [Numba Discourse
52
55
  forum](https://numba.discourse.group).
53
56
 
54
- ## Installation with pip
57
+ ## Installation with pip or conda
55
58
 
56
- ```shell
57
- pip install numba-cuda
58
- ```
59
-
60
- ## Installation with Conda
59
+ Please refer to the [Installation documentation](https://nvidia.github.io/numba-cuda/user/installation.html#installation-with-a-python-package-manager).
61
60
 
62
- ```shell
63
- conda install -c conda-forge numba-cuda
64
- ```
65
61
 
66
62
  ## Installation from source
67
63
 
@@ -71,6 +67,8 @@ Install as an editable install:
71
67
  pip install -e .
72
68
  ```
73
69
 
70
+ If you want to manage all run-time dependencies yourself, also pass the `--no-deps` flag.
71
+
74
72
  ## Running tests
75
73
 
76
74
  ```