PyPI - numba-cuda - Versions diffs - 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

numba-cuda 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +51 -16
numba_cuda/numba/cuda/codegen.py +11 -9
numba_cuda/numba/cuda/compiler.py +3 -39
numba_cuda/numba/cuda/cuda_paths.py +20 -22
numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
numba_cuda/numba/cuda/cudadrv/error.py +4 -0
numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
numba_cuda/numba/cuda/decorators.py +18 -0
numba_cuda/numba/cuda/dispatcher.py +1 -0
numba_cuda/numba/cuda/flags.py +36 -0
numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
numba_cuda/numba/cuda/target.py +55 -2
numba_cuda/numba/cuda/testing.py +0 -22
numba_cuda/numba/cuda/tests/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/cudadrv/runtime.py CHANGED Viewed

@@ -1,147 +1,16 @@
 """
-CUDA Runtime wrapper.
+Former CUDA Runtime wrapper.
-This provides a very minimal set of bindings, since the Runtime API is not
-really used in Numba except for querying the Runtime version.
+The toolkit version can now be obtained from NVRTC, so we don't use a binding
+to the runtime anymore. This file is provided to maintain the existing API.
 """
-import ctypes
-import functools
-import sys
-from numba.core import config
-from numba.cuda.cudadrv.driver import ERROR_MAP, make_logger
-from numba.cuda.cudadrv.error import CudaSupportError, CudaRuntimeError
-from numba.cuda.cudadrv.libs import open_cudalib
-from numba.cuda.cudadrv.rtapi import API_PROTOTYPES
-from numba.cuda.cudadrv import enums
-class CudaRuntimeAPIError(CudaRuntimeError):
-    """
-    Raised when there is an error accessing a C API from the CUDA Runtime.
-    """
-    def __init__(self, code, msg):
-        self.code = code
-        self.msg = msg
-        super().__init__(code, msg)
-    def __str__(self):
-        return "[%s] %s" % (self.code, self.msg)
+from numba.cuda.cudadrv.nvrtc import NVRTC
 class Runtime:
-    """
-    Runtime object that lazily binds runtime API functions.
-    """
-    def __init__(self):
-        self.is_initialized = False
-    def _initialize(self):
-        # lazily initialize logger
-        global _logger
-        _logger = make_logger()
-        if config.DISABLE_CUDA:
-            msg = (
-                "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 "
-                "in the environment, or because CUDA is unsupported on "
-                "32-bit systems."
-            )
-            raise CudaSupportError(msg)
-        self.lib = open_cudalib("cudart")
-        self.is_initialized = True
-    def __getattr__(self, fname):
-        # First request of a runtime API function
-        try:
-            proto = API_PROTOTYPES[fname]
-        except KeyError:
-            raise AttributeError(fname)
-        restype = proto[0]
-        argtypes = proto[1:]
-        if not self.is_initialized:
-            self._initialize()
-        # Find function in runtime library
-        libfn = self._find_api(fname)
-        libfn.restype = restype
-        libfn.argtypes = argtypes
-        safe_call = self._wrap_api_call(fname, libfn)
-        setattr(self, fname, safe_call)
-        return safe_call
-    def _wrap_api_call(self, fname, libfn):
-        @functools.wraps(libfn)
-        def safe_cuda_api_call(*args):
-            _logger.debug("call runtime api: %s", libfn.__name__)
-            retcode = libfn(*args)
-            self._check_error(fname, retcode)
-        return safe_cuda_api_call
-    def _check_error(self, fname, retcode):
-        if retcode != enums.CUDA_SUCCESS:
-            errname = ERROR_MAP.get(retcode, "cudaErrorUnknown")
-            msg = "Call to %s results in %s" % (fname, errname)
-            _logger.error(msg)
-            raise CudaRuntimeAPIError(retcode, msg)
-    def _find_api(self, fname):
-        try:
-            return getattr(self.lib, fname)
-        except AttributeError:
-            pass
-        # Not found.
-        # Delay missing function error to use
-        def absent_function(*args, **kws):
-            msg = "runtime missing function: %s."
-            raise CudaRuntimeError(msg % fname)
-        setattr(self, fname, absent_function)
-        return absent_function
     def get_version(self):
-        """
-        Returns the CUDA Runtime version as a tuple (major, minor).
-        """
-        rtver = ctypes.c_int()
-        self.cudaRuntimeGetVersion(ctypes.byref(rtver))
-        # The version is encoded as (1000 * major) + (10 * minor)
-        major = rtver.value // 1000
-        minor = (rtver.value - (major * 1000)) // 10
-        return (major, minor)
-    def is_supported_version(self):
-        """
-        Returns True if the CUDA Runtime is a supported version.
-        """
-        return self.get_version() in self.supported_versions
-    @property
-    def supported_versions(self):
-        """A tuple of all supported CUDA toolkit versions. Versions are given in
-        the form ``(major_version, minor_version)``."""
-        if sys.platform not in ("linux", "win32") or config.MACHINE_BITS != 64:
-            # Only 64-bit Linux and Windows are supported
-            return ()
-        return (
-            (11, 0),
-            (11, 1),
-            (11, 2),
-            (11, 3),
-            (11, 4),
-            (11, 5),
-            (11, 6),
-            (11, 7),
-        )
+        return NVRTC().get_version()
 runtime = Runtime()

numba_cuda/numba/cuda/decorators.py CHANGED Viewed

@@ -4,6 +4,7 @@ from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
 from numba.cuda.compiler import declare_device_function
 from numba.cuda.dispatcher import CUDADispatcher
 from numba.cuda.simulator.kernel import FakeCUDAKernel
+from numba.cuda.cudadrv.driver import _have_nvjitlink
 _msg_deprecated_signature_arg = (
@@ -24,6 +25,7 @@ def jit(
     lineinfo=False,
     cache=False,
     launch_bounds=None,
+    lto=None,
     **kws,
 ):
     """
@@ -83,6 +85,10 @@ def jit(
                           If a scalar is provided, it is used as the maximum
                           number of threads per block.
     :type launch_bounds: int | tuple[int]
+    :param lto: Whether to enable LTO. If unspecified, LTO is enabled by
+                default when pynvjitlink is available, except for kernels where
+                ``debug=True``.
+    :type lto: bool
     """
     if link and config.ENABLE_CUDASIM:
@@ -136,6 +142,16 @@ def jit(
     if device and kws.get("link"):
         raise ValueError("link keyword invalid for device function")
+    if lto is None:
+        # Default to using LTO if pynvjitlink is available and we're not debugging
+        lto = _have_nvjitlink() and not debug
+    else:
+        if lto and not _have_nvjitlink():
+            raise RuntimeError(
+                "LTO requires nvjitlink, which is not available"
+                "or not sufficiently recent (>=12.3)"
+            )
     if sigutils.is_signature(func_or_sig):
         signatures = [func_or_sig]
         specialized = True
@@ -165,6 +181,7 @@ def jit(
             targetoptions["forceinline"] = forceinline
             targetoptions["extensions"] = extensions
             targetoptions["launch_bounds"] = launch_bounds
+            targetoptions["lto"] = lto
             disp = CUDADispatcher(func, targetoptions=targetoptions)
@@ -235,6 +252,7 @@ def jit(
                 targetoptions["forceinline"] = forceinline
                 targetoptions["extensions"] = extensions
                 targetoptions["launch_bounds"] = launch_bounds
+                targetoptions["lto"] = lto
                 disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
                 if cache:

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -129,6 +129,7 @@ class _Kernel(serialize.ReduceMixin):
             nvvm_options["g"] = None
         cc = get_current_device().compute_capability
         cres = compile_cuda(
             self.py_func,
             types.void,

numba_cuda/numba/cuda/flags.py ADDED Viewed

@@ -0,0 +1,36 @@
+from numba.core.compiler import Flags, Option
+def _nvvm_options_type(x):
+    if x is None:
+        return None
+    else:
+        assert isinstance(x, dict)
+        return x
+def _optional_int_type(x):
+    if x is None:
+        return None
+    else:
+        assert isinstance(x, int)
+        return x
+class CUDAFlags(Flags):
+    nvvm_options = Option(
+        type=_nvvm_options_type,
+        default=None,
+        doc="NVVM options",
+    )
+    compute_capability = Option(
+        type=tuple,
+        default=None,
+        doc="Compute Capability",
+    )
+    max_registers = Option(
+        type=_optional_int_type, default=None, doc="Max registers"
+    )
+    lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")

numba_cuda/numba/cuda/memory_management/nrt.py CHANGED Viewed

@@ -6,7 +6,7 @@ import numpy as np
 from numba import cuda, config
 from numba.core.runtime.nrt import _nrt_mstats
 from numba.cuda.cudadrv.driver import (
-    Linker,
+    _Linker,
     driver,
     launch_kernel,
     USE_NV_BINDING,
@@ -80,7 +80,7 @@ class _Runtime:
         cc = get_current_device().compute_capability
         # Create a new linker instance and add the cu file
-        linker = Linker.new(cc=cc)
+        linker = _Linker.new(cc=cc)
         linker.add_cu_file(memsys_mod)
         # Complete the linker and create a module from it

numba_cuda/numba/cuda/simulator/cudadrv/driver.py CHANGED Viewed

@@ -34,10 +34,10 @@ class FakeDriver(object):
 driver = FakeDriver()
-class Linker:
+class _Linker:
     @classmethod
     def new(cls, max_registers=0, lineinfo=False, cc=None):
-        return Linker()
+        return _Linker()
     @property
     def lto(self):
@@ -67,3 +67,7 @@ PyNvJitLinker = None
 if config.ENABLE_CUDASIM:
     config.CUDA_ENABLE_PYNVJITLINK = False
+def _have_nvjitlink():
+    return False

numba_cuda/numba/cuda/target.py CHANGED Viewed

@@ -2,9 +2,20 @@ import re
 from functools import cached_property
 import llvmlite.binding as ll
 from llvmlite import ir
-from numba.core import cgutils, config, itanium_mangler, types, typing
+import warnings
+from numba.core import (
+    cgutils,
+    compiler,
+    config,
+    itanium_mangler,
+    targetconfig,
+    types,
+    typing,
+)
+from numba.core.compiler_lock import global_compiler_lock
 from numba.core.dispatcher import Dispatcher
+from numba.core.errors import NumbaWarning
 from numba.core.base import BaseContext
 from numba.core.callconv import BaseCallConv, MinimalCallConv
 from numba.core.typing import cmathdecl
@@ -13,6 +24,7 @@ from numba.core import datamodel
 from .cudadrv import nvvm
 from numba.cuda import codegen, ufuncs
 from numba.cuda.debuginfo import CUDADIBuilder
+from numba.cuda.flags import CUDAFlags
 from numba.cuda.models import cuda_data_manager
 # -----------------------------------------------------------------------------
@@ -288,6 +300,47 @@ class CUDATargetContext(BaseContext):
     def get_ufunc_info(self, ufunc_key):
         return ufuncs.get_ufunc_info(ufunc_key)
+    def _compile_subroutine_no_cache(
+        self, builder, impl, sig, locals=None, flags=None
+    ):
+        # Overrides numba.core.base.BaseContext._compile_subroutine_no_cache().
+        # Modified to use flags from the context stack if they are not provided
+        # (pending a fix in Numba upstream).
+        if locals is None:
+            locals = {}
+        with global_compiler_lock:
+            codegen = self.codegen()
+            library = codegen.create_library(impl.__name__)
+            if flags is None:
+                cstk = targetconfig.ConfigStack()
+                if cstk:
+                    flags = cstk.top().copy()
+                else:
+                    msg = "There should always be a context stack; none found."
+                    warnings.warn(msg, NumbaWarning)
+                    flags = CUDAFlags()
+            flags.no_compile = True
+            flags.no_cpython_wrapper = True
+            flags.no_cfunc_wrapper = True
+            cres = compiler.compile_internal(
+                self.typing_context,
+                self,
+                library,
+                impl,
+                sig.args,
+                sig.return_type,
+                flags,
+                locals=locals,
+            )
+            # Allow inlining the function inside callers
+            self.active_code_library.add_linking_library(cres.library)
+            return cres
 class CUDACallConv(MinimalCallConv):
     def decorate_function(self, fn, args, fe_argtypes, noalias=False):

numba_cuda/numba/cuda/testing.py CHANGED Viewed

@@ -35,14 +35,6 @@ class CUDATestCase(SerialMixin, TestCase):
         config.CUDA_LOW_OCCUPANCY_WARNINGS = self._low_occupancy_warnings
         config.CUDA_WARN_ON_IMPLICIT_COPY = self._warn_on_implicit_copy
-    def skip_if_lto(self, reason):
-        # Some linkers need the compute capability to be specified, so we
-        # always specify it here.
-        cc = devices.get_context().device.compute_capability
-        linker = driver.Linker.new(cc=cc)
-        if linker.lto:
-            self.skipTest(reason)
 class ContextResettingTestCase(CUDATestCase):
     """
@@ -59,20 +51,6 @@ class ContextResettingTestCase(CUDATestCase):
         reset()
-def ensure_supported_ccs_initialized():
-    from numba.cuda import is_available as cuda_is_available
-    from numba.cuda.cudadrv import nvvm
-    if cuda_is_available():
-        # Ensure that cudart.so is loaded and the list of supported compute
-        # capabilities in the nvvm module is populated before a fork. This is
-        # needed because some compilation tests don't require a CUDA context,
-        # but do use NVVM, and it is required that libcudart.so should be
-        # loaded before a fork (note that the requirement is not explicitly
-        # documented).
-        nvvm.get_supported_ccs()
 def skip_on_cudasim(reason):
     """Skip this test if running on the CUDA simulator"""
     return unittest.skipIf(config.ENABLE_CUDASIM, reason)

numba_cuda/numba/cuda/tests/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from fnmatch import fnmatch
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.testing import unittest
 from numba import cuda
 from os.path import dirname, isfile, join, normpath, relpath, splitext
@@ -42,7 +41,6 @@ def load_testsuite(loader, dir):
 def load_tests(loader, tests, pattern):
     suite = unittest.TestSuite()
     this_dir = dirname(__file__)
-    ensure_supported_ccs_initialized()
     suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
     if cuda.is_available():
         suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))

numba_cuda/numba/cuda/tests/cudadrv/__init__.py CHANGED Viewed

@@ -1,8 +1,6 @@
-from numba.cuda.testing import ensure_supported_ccs_initialized
 from numba.cuda.tests import load_testsuite
 import os
 def load_tests(loader, tests, pattern):
-    ensure_supported_ccs_initialized()
     return load_testsuite(loader, os.path.dirname(__file__))

numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py CHANGED Viewed

@@ -109,7 +109,21 @@ class Test3rdPartyContext(CUDATestCase):
         if driver.USE_NV_BINDING:
             flags = 0
             dev = driver.binding.CUdevice(0)
-            hctx = the_driver.cuCtxCreate(flags, dev)
+            result, version = driver.binding.cuDriverGetVersion()
+            self.assertEqual(
+                result,
+                driver.binding.CUresult.CUDA_SUCCESS,
+                "Error getting CUDA driver version",
+            )
+            # CUDA 13's cuCtxCreate has an optional parameter prepended
+            if version >= 13000:
+                args = (None, flags, dev)
+            else:
+                args = (flags, dev)
+            hctx = the_driver.cuCtxCreate(*args)
         else:
             hctx = driver.drvapi.cu_context()
             the_driver.cuCtxCreate(byref(hctx), 0, 0)

numba_cuda/numba/cuda/tests/cudadrv/test_linker.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import numpy as np
 import warnings
+from numba import config
 from numba.cuda.testing import unittest
 from numba.cuda.testing import skip_on_cudasim, skip_if_cuda_includes_missing
 from numba.cuda.testing import CUDATestCase, test_data_dir
-from numba.cuda.cudadrv.driver import CudaAPIError, Linker, LinkerError
-from numba.cuda.cudadrv.error import NvrtcError
+from numba.cuda.cudadrv.driver import CudaAPIError, _Linker, LinkerError
 from numba.cuda import require_context
 from numba.tests.support import ignore_internal_warnings
 from numba import cuda, void, float64, int64, int32, typeof, float32
+from numba.cuda.cudadrv.error import NvrtcError
 CONST1D = np.arange(10, dtype=np.float64)
@@ -107,7 +107,7 @@ class TestLinker(CUDATestCase):
     @require_context
     def test_linker_basic(self):
         """Simply go through the constructor and destructor"""
-        linker = Linker.new(cc=(5, 3))
+        linker = _Linker.new(cc=(7, 5))
         del linker
     def _test_linking(self, eager):
@@ -183,7 +183,13 @@ class TestLinker(CUDATestCase):
         link = str(test_data_dir / "error.cu")
-        with self.assertRaises(NvrtcError) as e:
+        if config.CUDA_USE_NVIDIA_BINDING:
+            from cuda.core.experimental._utils.cuda_utils import NVRTCError
+            errty = NVRTCError
+        else:
+            errty = NvrtcError
+        with self.assertRaises(errty) as e:
             @cuda.jit("void(int32)", link=[link])
             def kernel(x):
@@ -191,7 +197,12 @@ class TestLinker(CUDATestCase):
         msg = e.exception.args[0]
         # Check the error message refers to the NVRTC compile
-        self.assertIn("NVRTC Compilation failure", msg)
+        nvrtc_err_str = (
+            "NVRTC_ERROR_COMPILATION"
+            if config.CUDA_USE_NVIDIA_BINDING
+            else "NVRTC Compilation failure"
+        )
+        self.assertIn(nvrtc_err_str, msg)
         # Check the expected error in the CUDA source is reported
         self.assertIn('identifier "SYNTAX" is undefined', msg)
         # Check the filename is reported correctly

numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

numba-cuda 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl