numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +51 -16
  3. numba_cuda/numba/cuda/codegen.py +11 -9
  4. numba_cuda/numba/cuda/compiler.py +3 -39
  5. numba_cuda/numba/cuda/cuda_paths.py +20 -22
  6. numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
  7. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  8. numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
  9. numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
  10. numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
  11. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
  12. numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
  13. numba_cuda/numba/cuda/decorators.py +18 -0
  14. numba_cuda/numba/cuda/dispatcher.py +1 -0
  15. numba_cuda/numba/cuda/flags.py +36 -0
  16. numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
  17. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
  18. numba_cuda/numba/cuda/target.py +55 -2
  19. numba_cuda/numba/cuda/testing.py +0 -22
  20. numba_cuda/numba/cuda/tests/__init__.py +0 -2
  21. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
  22. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
  23. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
  24. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
  25. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
  26. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
  27. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
  28. numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
  29. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
  30. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
  31. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
  32. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
  33. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  34. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
  35. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
  36. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
  37. numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
  38. numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
  39. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
  40. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
  41. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
  42. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
  43. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
  44. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
@@ -1,147 +1,16 @@
1
1
  """
2
- CUDA Runtime wrapper.
2
+ Former CUDA Runtime wrapper.
3
3
 
4
- This provides a very minimal set of bindings, since the Runtime API is not
5
- really used in Numba except for querying the Runtime version.
4
+ The toolkit version can now be obtained from NVRTC, so we don't use a binding
5
+ to the runtime anymore. This file is provided to maintain the existing API.
6
6
  """
7
7
 
8
- import ctypes
9
- import functools
10
- import sys
11
-
12
- from numba.core import config
13
- from numba.cuda.cudadrv.driver import ERROR_MAP, make_logger
14
- from numba.cuda.cudadrv.error import CudaSupportError, CudaRuntimeError
15
- from numba.cuda.cudadrv.libs import open_cudalib
16
- from numba.cuda.cudadrv.rtapi import API_PROTOTYPES
17
- from numba.cuda.cudadrv import enums
18
-
19
-
20
- class CudaRuntimeAPIError(CudaRuntimeError):
21
- """
22
- Raised when there is an error accessing a C API from the CUDA Runtime.
23
- """
24
-
25
- def __init__(self, code, msg):
26
- self.code = code
27
- self.msg = msg
28
- super().__init__(code, msg)
29
-
30
- def __str__(self):
31
- return "[%s] %s" % (self.code, self.msg)
8
+ from numba.cuda.cudadrv.nvrtc import NVRTC
32
9
 
33
10
 
34
11
  class Runtime:
35
- """
36
- Runtime object that lazily binds runtime API functions.
37
- """
38
-
39
- def __init__(self):
40
- self.is_initialized = False
41
-
42
- def _initialize(self):
43
- # lazily initialize logger
44
- global _logger
45
- _logger = make_logger()
46
-
47
- if config.DISABLE_CUDA:
48
- msg = (
49
- "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 "
50
- "in the environment, or because CUDA is unsupported on "
51
- "32-bit systems."
52
- )
53
- raise CudaSupportError(msg)
54
- self.lib = open_cudalib("cudart")
55
-
56
- self.is_initialized = True
57
-
58
- def __getattr__(self, fname):
59
- # First request of a runtime API function
60
- try:
61
- proto = API_PROTOTYPES[fname]
62
- except KeyError:
63
- raise AttributeError(fname)
64
- restype = proto[0]
65
- argtypes = proto[1:]
66
-
67
- if not self.is_initialized:
68
- self._initialize()
69
-
70
- # Find function in runtime library
71
- libfn = self._find_api(fname)
72
- libfn.restype = restype
73
- libfn.argtypes = argtypes
74
-
75
- safe_call = self._wrap_api_call(fname, libfn)
76
- setattr(self, fname, safe_call)
77
- return safe_call
78
-
79
- def _wrap_api_call(self, fname, libfn):
80
- @functools.wraps(libfn)
81
- def safe_cuda_api_call(*args):
82
- _logger.debug("call runtime api: %s", libfn.__name__)
83
- retcode = libfn(*args)
84
- self._check_error(fname, retcode)
85
-
86
- return safe_cuda_api_call
87
-
88
- def _check_error(self, fname, retcode):
89
- if retcode != enums.CUDA_SUCCESS:
90
- errname = ERROR_MAP.get(retcode, "cudaErrorUnknown")
91
- msg = "Call to %s results in %s" % (fname, errname)
92
- _logger.error(msg)
93
- raise CudaRuntimeAPIError(retcode, msg)
94
-
95
- def _find_api(self, fname):
96
- try:
97
- return getattr(self.lib, fname)
98
- except AttributeError:
99
- pass
100
-
101
- # Not found.
102
- # Delay missing function error to use
103
- def absent_function(*args, **kws):
104
- msg = "runtime missing function: %s."
105
- raise CudaRuntimeError(msg % fname)
106
-
107
- setattr(self, fname, absent_function)
108
- return absent_function
109
-
110
12
  def get_version(self):
111
- """
112
- Returns the CUDA Runtime version as a tuple (major, minor).
113
- """
114
- rtver = ctypes.c_int()
115
- self.cudaRuntimeGetVersion(ctypes.byref(rtver))
116
- # The version is encoded as (1000 * major) + (10 * minor)
117
- major = rtver.value // 1000
118
- minor = (rtver.value - (major * 1000)) // 10
119
- return (major, minor)
120
-
121
- def is_supported_version(self):
122
- """
123
- Returns True if the CUDA Runtime is a supported version.
124
- """
125
-
126
- return self.get_version() in self.supported_versions
127
-
128
- @property
129
- def supported_versions(self):
130
- """A tuple of all supported CUDA toolkit versions. Versions are given in
131
- the form ``(major_version, minor_version)``."""
132
- if sys.platform not in ("linux", "win32") or config.MACHINE_BITS != 64:
133
- # Only 64-bit Linux and Windows are supported
134
- return ()
135
- return (
136
- (11, 0),
137
- (11, 1),
138
- (11, 2),
139
- (11, 3),
140
- (11, 4),
141
- (11, 5),
142
- (11, 6),
143
- (11, 7),
144
- )
13
+ return NVRTC().get_version()
145
14
 
146
15
 
147
16
  runtime = Runtime()
@@ -4,6 +4,7 @@ from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
4
4
  from numba.cuda.compiler import declare_device_function
5
5
  from numba.cuda.dispatcher import CUDADispatcher
6
6
  from numba.cuda.simulator.kernel import FakeCUDAKernel
7
+ from numba.cuda.cudadrv.driver import _have_nvjitlink
7
8
 
8
9
 
9
10
  _msg_deprecated_signature_arg = (
@@ -24,6 +25,7 @@ def jit(
24
25
  lineinfo=False,
25
26
  cache=False,
26
27
  launch_bounds=None,
28
+ lto=None,
27
29
  **kws,
28
30
  ):
29
31
  """
@@ -83,6 +85,10 @@ def jit(
83
85
  If a scalar is provided, it is used as the maximum
84
86
  number of threads per block.
85
87
  :type launch_bounds: int | tuple[int]
88
+ :param lto: Whether to enable LTO. If unspecified, LTO is enabled by
89
+ default when pynvjitlink is available, except for kernels where
90
+ ``debug=True``.
91
+ :type lto: bool
86
92
  """
87
93
 
88
94
  if link and config.ENABLE_CUDASIM:
@@ -136,6 +142,16 @@ def jit(
136
142
  if device and kws.get("link"):
137
143
  raise ValueError("link keyword invalid for device function")
138
144
 
145
+ if lto is None:
146
+ # Default to using LTO if pynvjitlink is available and we're not debugging
147
+ lto = _have_nvjitlink() and not debug
148
+ else:
149
+ if lto and not _have_nvjitlink():
150
+ raise RuntimeError(
151
+ "LTO requires nvjitlink, which is not available"
152
+ "or not sufficiently recent (>=12.3)"
153
+ )
154
+
139
155
  if sigutils.is_signature(func_or_sig):
140
156
  signatures = [func_or_sig]
141
157
  specialized = True
@@ -165,6 +181,7 @@ def jit(
165
181
  targetoptions["forceinline"] = forceinline
166
182
  targetoptions["extensions"] = extensions
167
183
  targetoptions["launch_bounds"] = launch_bounds
184
+ targetoptions["lto"] = lto
168
185
 
169
186
  disp = CUDADispatcher(func, targetoptions=targetoptions)
170
187
 
@@ -235,6 +252,7 @@ def jit(
235
252
  targetoptions["forceinline"] = forceinline
236
253
  targetoptions["extensions"] = extensions
237
254
  targetoptions["launch_bounds"] = launch_bounds
255
+ targetoptions["lto"] = lto
238
256
  disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
239
257
 
240
258
  if cache:
@@ -129,6 +129,7 @@ class _Kernel(serialize.ReduceMixin):
129
129
  nvvm_options["g"] = None
130
130
 
131
131
  cc = get_current_device().compute_capability
132
+
132
133
  cres = compile_cuda(
133
134
  self.py_func,
134
135
  types.void,
@@ -0,0 +1,36 @@
1
+ from numba.core.compiler import Flags, Option
2
+
3
+
4
+ def _nvvm_options_type(x):
5
+ if x is None:
6
+ return None
7
+
8
+ else:
9
+ assert isinstance(x, dict)
10
+ return x
11
+
12
+
13
+ def _optional_int_type(x):
14
+ if x is None:
15
+ return None
16
+
17
+ else:
18
+ assert isinstance(x, int)
19
+ return x
20
+
21
+
22
+ class CUDAFlags(Flags):
23
+ nvvm_options = Option(
24
+ type=_nvvm_options_type,
25
+ default=None,
26
+ doc="NVVM options",
27
+ )
28
+ compute_capability = Option(
29
+ type=tuple,
30
+ default=None,
31
+ doc="Compute Capability",
32
+ )
33
+ max_registers = Option(
34
+ type=_optional_int_type, default=None, doc="Max registers"
35
+ )
36
+ lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from numba import cuda, config
7
7
  from numba.core.runtime.nrt import _nrt_mstats
8
8
  from numba.cuda.cudadrv.driver import (
9
- Linker,
9
+ _Linker,
10
10
  driver,
11
11
  launch_kernel,
12
12
  USE_NV_BINDING,
@@ -80,7 +80,7 @@ class _Runtime:
80
80
  cc = get_current_device().compute_capability
81
81
 
82
82
  # Create a new linker instance and add the cu file
83
- linker = Linker.new(cc=cc)
83
+ linker = _Linker.new(cc=cc)
84
84
  linker.add_cu_file(memsys_mod)
85
85
 
86
86
  # Complete the linker and create a module from it
@@ -34,10 +34,10 @@ class FakeDriver(object):
34
34
  driver = FakeDriver()
35
35
 
36
36
 
37
- class Linker:
37
+ class _Linker:
38
38
  @classmethod
39
39
  def new(cls, max_registers=0, lineinfo=False, cc=None):
40
- return Linker()
40
+ return _Linker()
41
41
 
42
42
  @property
43
43
  def lto(self):
@@ -67,3 +67,7 @@ PyNvJitLinker = None
67
67
 
68
68
  if config.ENABLE_CUDASIM:
69
69
  config.CUDA_ENABLE_PYNVJITLINK = False
70
+
71
+
72
+ def _have_nvjitlink():
73
+ return False
@@ -2,9 +2,20 @@ import re
2
2
  from functools import cached_property
3
3
  import llvmlite.binding as ll
4
4
  from llvmlite import ir
5
-
6
- from numba.core import cgutils, config, itanium_mangler, types, typing
5
+ import warnings
6
+
7
+ from numba.core import (
8
+ cgutils,
9
+ compiler,
10
+ config,
11
+ itanium_mangler,
12
+ targetconfig,
13
+ types,
14
+ typing,
15
+ )
16
+ from numba.core.compiler_lock import global_compiler_lock
7
17
  from numba.core.dispatcher import Dispatcher
18
+ from numba.core.errors import NumbaWarning
8
19
  from numba.core.base import BaseContext
9
20
  from numba.core.callconv import BaseCallConv, MinimalCallConv
10
21
  from numba.core.typing import cmathdecl
@@ -13,6 +24,7 @@ from numba.core import datamodel
13
24
  from .cudadrv import nvvm
14
25
  from numba.cuda import codegen, ufuncs
15
26
  from numba.cuda.debuginfo import CUDADIBuilder
27
+ from numba.cuda.flags import CUDAFlags
16
28
  from numba.cuda.models import cuda_data_manager
17
29
 
18
30
  # -----------------------------------------------------------------------------
@@ -288,6 +300,47 @@ class CUDATargetContext(BaseContext):
288
300
  def get_ufunc_info(self, ufunc_key):
289
301
  return ufuncs.get_ufunc_info(ufunc_key)
290
302
 
303
+ def _compile_subroutine_no_cache(
304
+ self, builder, impl, sig, locals=None, flags=None
305
+ ):
306
+ # Overrides numba.core.base.BaseContext._compile_subroutine_no_cache().
307
+ # Modified to use flags from the context stack if they are not provided
308
+ # (pending a fix in Numba upstream).
309
+
310
+ if locals is None:
311
+ locals = {}
312
+
313
+ with global_compiler_lock:
314
+ codegen = self.codegen()
315
+ library = codegen.create_library(impl.__name__)
316
+ if flags is None:
317
+ cstk = targetconfig.ConfigStack()
318
+ if cstk:
319
+ flags = cstk.top().copy()
320
+ else:
321
+ msg = "There should always be a context stack; none found."
322
+ warnings.warn(msg, NumbaWarning)
323
+ flags = CUDAFlags()
324
+
325
+ flags.no_compile = True
326
+ flags.no_cpython_wrapper = True
327
+ flags.no_cfunc_wrapper = True
328
+
329
+ cres = compiler.compile_internal(
330
+ self.typing_context,
331
+ self,
332
+ library,
333
+ impl,
334
+ sig.args,
335
+ sig.return_type,
336
+ flags,
337
+ locals=locals,
338
+ )
339
+
340
+ # Allow inlining the function inside callers
341
+ self.active_code_library.add_linking_library(cres.library)
342
+ return cres
343
+
291
344
 
292
345
  class CUDACallConv(MinimalCallConv):
293
346
  def decorate_function(self, fn, args, fe_argtypes, noalias=False):
@@ -35,14 +35,6 @@ class CUDATestCase(SerialMixin, TestCase):
35
35
  config.CUDA_LOW_OCCUPANCY_WARNINGS = self._low_occupancy_warnings
36
36
  config.CUDA_WARN_ON_IMPLICIT_COPY = self._warn_on_implicit_copy
37
37
 
38
- def skip_if_lto(self, reason):
39
- # Some linkers need the compute capability to be specified, so we
40
- # always specify it here.
41
- cc = devices.get_context().device.compute_capability
42
- linker = driver.Linker.new(cc=cc)
43
- if linker.lto:
44
- self.skipTest(reason)
45
-
46
38
 
47
39
  class ContextResettingTestCase(CUDATestCase):
48
40
  """
@@ -59,20 +51,6 @@ class ContextResettingTestCase(CUDATestCase):
59
51
  reset()
60
52
 
61
53
 
62
- def ensure_supported_ccs_initialized():
63
- from numba.cuda import is_available as cuda_is_available
64
- from numba.cuda.cudadrv import nvvm
65
-
66
- if cuda_is_available():
67
- # Ensure that cudart.so is loaded and the list of supported compute
68
- # capabilities in the nvvm module is populated before a fork. This is
69
- # needed because some compilation tests don't require a CUDA context,
70
- # but do use NVVM, and it is required that libcudart.so should be
71
- # loaded before a fork (note that the requirement is not explicitly
72
- # documented).
73
- nvvm.get_supported_ccs()
74
-
75
-
76
54
  def skip_on_cudasim(reason):
77
55
  """Skip this test if running on the CUDA simulator"""
78
56
  return unittest.skipIf(config.ENABLE_CUDASIM, reason)
@@ -1,5 +1,4 @@
1
1
  from fnmatch import fnmatch
2
- from numba.cuda.testing import ensure_supported_ccs_initialized
3
2
  from numba.testing import unittest
4
3
  from numba import cuda
5
4
  from os.path import dirname, isfile, join, normpath, relpath, splitext
@@ -42,7 +41,6 @@ def load_testsuite(loader, dir):
42
41
  def load_tests(loader, tests, pattern):
43
42
  suite = unittest.TestSuite()
44
43
  this_dir = dirname(__file__)
45
- ensure_supported_ccs_initialized()
46
44
  suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
47
45
  if cuda.is_available():
48
46
  suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
@@ -1,8 +1,6 @@
1
- from numba.cuda.testing import ensure_supported_ccs_initialized
2
1
  from numba.cuda.tests import load_testsuite
3
2
  import os
4
3
 
5
4
 
6
5
  def load_tests(loader, tests, pattern):
7
- ensure_supported_ccs_initialized()
8
6
  return load_testsuite(loader, os.path.dirname(__file__))
@@ -109,7 +109,21 @@ class Test3rdPartyContext(CUDATestCase):
109
109
  if driver.USE_NV_BINDING:
110
110
  flags = 0
111
111
  dev = driver.binding.CUdevice(0)
112
- hctx = the_driver.cuCtxCreate(flags, dev)
112
+
113
+ result, version = driver.binding.cuDriverGetVersion()
114
+ self.assertEqual(
115
+ result,
116
+ driver.binding.CUresult.CUDA_SUCCESS,
117
+ "Error getting CUDA driver version",
118
+ )
119
+
120
+ # CUDA 13's cuCtxCreate has an optional parameter prepended
121
+ if version >= 13000:
122
+ args = (None, flags, dev)
123
+ else:
124
+ args = (flags, dev)
125
+
126
+ hctx = the_driver.cuCtxCreate(*args)
113
127
  else:
114
128
  hctx = driver.drvapi.cu_context()
115
129
  the_driver.cuCtxCreate(byref(hctx), 0, 0)
@@ -1,14 +1,14 @@
1
1
  import numpy as np
2
2
  import warnings
3
+ from numba import config
3
4
  from numba.cuda.testing import unittest
4
5
  from numba.cuda.testing import skip_on_cudasim, skip_if_cuda_includes_missing
5
6
  from numba.cuda.testing import CUDATestCase, test_data_dir
6
- from numba.cuda.cudadrv.driver import CudaAPIError, Linker, LinkerError
7
- from numba.cuda.cudadrv.error import NvrtcError
7
+ from numba.cuda.cudadrv.driver import CudaAPIError, _Linker, LinkerError
8
8
  from numba.cuda import require_context
9
9
  from numba.tests.support import ignore_internal_warnings
10
10
  from numba import cuda, void, float64, int64, int32, typeof, float32
11
-
11
+ from numba.cuda.cudadrv.error import NvrtcError
12
12
 
13
13
  CONST1D = np.arange(10, dtype=np.float64)
14
14
 
@@ -107,7 +107,7 @@ class TestLinker(CUDATestCase):
107
107
  @require_context
108
108
  def test_linker_basic(self):
109
109
  """Simply go through the constructor and destructor"""
110
- linker = Linker.new(cc=(5, 3))
110
+ linker = _Linker.new(cc=(7, 5))
111
111
  del linker
112
112
 
113
113
  def _test_linking(self, eager):
@@ -183,7 +183,13 @@ class TestLinker(CUDATestCase):
183
183
 
184
184
  link = str(test_data_dir / "error.cu")
185
185
 
186
- with self.assertRaises(NvrtcError) as e:
186
+ if config.CUDA_USE_NVIDIA_BINDING:
187
+ from cuda.core.experimental._utils.cuda_utils import NVRTCError
188
+
189
+ errty = NVRTCError
190
+ else:
191
+ errty = NvrtcError
192
+ with self.assertRaises(errty) as e:
187
193
 
188
194
  @cuda.jit("void(int32)", link=[link])
189
195
  def kernel(x):
@@ -191,7 +197,12 @@ class TestLinker(CUDATestCase):
191
197
 
192
198
  msg = e.exception.args[0]
193
199
  # Check the error message refers to the NVRTC compile
194
- self.assertIn("NVRTC Compilation failure", msg)
200
+ nvrtc_err_str = (
201
+ "NVRTC_ERROR_COMPILATION"
202
+ if config.CUDA_USE_NVIDIA_BINDING
203
+ else "NVRTC Compilation failure"
204
+ )
205
+ self.assertIn(nvrtc_err_str, msg)
195
206
  # Check the expected error in the CUDA source is reported
196
207
  self.assertIn('identifier "SYNTAX" is undefined', msg)
197
208
  # Check the filename is reported correctly