PyPI - numba-cuda - Versions diffs - 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

numba-cuda 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +51 -16
numba_cuda/numba/cuda/codegen.py +11 -9
numba_cuda/numba/cuda/compiler.py +3 -39
numba_cuda/numba/cuda/cuda_paths.py +20 -22
numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
numba_cuda/numba/cuda/cudadrv/error.py +4 -0
numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
numba_cuda/numba/cuda/decorators.py +18 -0
numba_cuda/numba/cuda/dispatcher.py +1 -0
numba_cuda/numba/cuda/flags.py +36 -0
numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
numba_cuda/numba/cuda/target.py +55 -2
numba_cuda/numba/cuda/testing.py +0 -22
numba_cuda/numba/cuda/tests/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.15.1
1	+ 0.16.0

numba_cuda/numba/cuda/__init__.py CHANGED Viewed

@@ -2,24 +2,28 @@ import importlib
 from numba import runtests
 from numba.core import config
 from .utils import _readenv
+import warnings
-# Enable pynvjitlink if the environment variables NUMBA_CUDA_ENABLE_PYNVJITLINK
-# or CUDA_ENABLE_PYNVJITLINK are set, or if the pynvjitlink module is found. If
-# explicitly disabled, do not use pynvjitlink, even if present in the env.
-_pynvjitlink_enabled_in_env = _readenv(
-    "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
-)
-_pynvjitlink_enabled_in_cfg = getattr(config, "CUDA_ENABLE_PYNVJITLINK", None)
-if _pynvjitlink_enabled_in_env is not None:
-    ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
-elif _pynvjitlink_enabled_in_cfg is not None:
-    ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_cfg
-else:
-    ENABLE_PYNVJITLINK = importlib.util.find_spec("pynvjitlink") is not None
+# Enable pynvjitlink based on the following precedence:
+# 1. Config setting "CUDA_ENABLE_PYNVJITLINK" (highest priority)
+# 2. Environment variable "NUMBA_CUDA_ENABLE_PYNVJITLINK"
+# 3. Auto-detection of pynvjitlink module (lowest priority)
+pynvjitlink_auto_enabled = False
-if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
-    config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
+if getattr(config, "CUDA_ENABLE_PYNVJITLINK", None) is None:
+    if (
+        _pynvjitlink_enabled_in_env := _readenv(
+            "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
+        )
+    ) is not None:
+        config.CUDA_ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
+    else:
+        pynvjitlink_auto_enabled = (
+            importlib.util.find_spec("pynvjitlink") is not None
+        )
+        config.CUDA_ENABLE_PYNVJITLINK = pynvjitlink_auto_enabled
 # Upstream numba sets CUDA_USE_NVIDIA_BINDING to 0 by default, so it always
 # exists. Override, but not if explicitly set to 0 in the envioronment.
@@ -32,7 +36,10 @@ else:
     USE_NV_BINDING = True
     config.CUDA_USE_NVIDIA_BINDING = USE_NV_BINDING
 if config.CUDA_USE_NVIDIA_BINDING:
-    if not importlib.util.find_spec("cuda.bindings"):
+    if not (
+        importlib.util.find_spec("cuda")
+        and importlib.util.find_spec("cuda.bindings")
+    ):
         raise ImportError(
             "CUDA bindings not found. Please pip install the "
             "cuda-bindings package. Alternatively, install "
@@ -43,6 +50,21 @@ if config.CUDA_USE_NVIDIA_BINDING:
             "bindings."
         )
+if config.CUDA_ENABLE_PYNVJITLINK:
+    if USE_NV_BINDING:
+        warnings.warn(
+            "Explicitly enabling pynvjitlink is no longer necessary. "
+            "NVIDIA bindings are enabled. cuda.core will be used "
+            "in place of pynvjitlink."
+        )
+    elif pynvjitlink_auto_enabled:
+        # Ignore the fact that pynvjitlink is enabled, because that was an
+        # automatic decision based on discovering pynvjitlink was present; the
+        # user didn't ask for it
+        pass
+    else:
+        raise RuntimeError("nvJitLink requires the NVIDIA CUDA bindings. ")
 if config.ENABLE_CUDASIM:
     from .simulator_init import *
 else:
@@ -61,6 +83,19 @@ from numba.cuda.compiler import (
 implementation = "NVIDIA"
+# The default compute capability as set by the upstream Numba implementation.
+config_default_cc = config.CUDA_DEFAULT_PTX_CC
+# The default compute capability for Numba-CUDA. This will usually override the
+# upstream Numba built-in default of 5.0, unless the user has set it even
+# higher, in which case we should use the user-specified value. This default is
+# aligned with recent toolkit versions.
+numba_cuda_default_ptx_cc = (7, 5)
+if numba_cuda_default_ptx_cc > config_default_cc:
+    config.CUDA_DEFAULT_PTX_CC = numba_cuda_default_ptx_cc
 def test(*args, **kwargs):
     if not is_available():
         raise cuda_error()

numba_cuda/numba/cuda/codegen.py CHANGED Viewed

@@ -2,7 +2,7 @@ from llvmlite import ir
 from numba.core import config, serialize
 from numba.core.codegen import Codegen, CodeLibrary
-from .cudadrv import devices, driver, nvvm, runtime
+from .cudadrv import devices, driver, nvrtc, nvvm, runtime
 from numba.cuda.cudadrv.libs import get_cudalib
 from numba.cuda.cudadrv.linkable_code import LinkableCode
 from numba.cuda.memory_management.nrt import NRT_LIBRARY
@@ -22,7 +22,10 @@ def run_nvdisasm(cubin, flags):
     try:
         fd, fname = tempfile.mkstemp()
         with open(fname, "wb") as f:
-            f.write(cubin)
+            if config.CUDA_USE_NVIDIA_BINDING:
+                f.write(cubin.code)
+            else:
+                f.write(cubin)
         try:
             cp = subprocess.run(
@@ -208,7 +211,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         if ptxes:
             return ptxes
-        arch = nvvm.get_arch_option(*cc)
+        arch = nvrtc.get_arch_option(*cc)
         options = self._nvvm_options.copy()
         options["arch"] = arch
@@ -237,7 +240,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         if ltoir is not None:
             return ltoir
-        arch = nvvm.get_arch_option(*cc)
+        arch = nvrtc.get_arch_option(*cc)
         options = self._nvvm_options.copy()
         options["arch"] = arch
         options["gen-lto"] = None
@@ -271,7 +274,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
             return cubin
         if self._lto and config.DUMP_ASSEMBLY:
-            linker = driver.Linker.new(
+            linker = driver._Linker.new(
                 max_registers=self._max_registers,
                 cc=cc,
                 additional_flags=["-ptx"],
@@ -280,14 +283,14 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
             # `-ptx` flag is meant to view the optimized PTX for LTO objects.
             # Non-LTO objects are not passed to linker.
             self._link_all(linker, cc, ignore_nonlto=True)
-            ptx = linker.get_linked_ptx().decode("utf-8")
+            ptx = linker.get_linked_ptx()
+            ptx = ptx.decode("utf-8")
             print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, "-"))
             print(ptx)
             print("=" * 80)
-        linker = driver.Linker.new(
+        linker = driver._Linker.new(
             max_registers=self._max_registers, cc=cc, lto=self._lto
         )
         self._link_all(linker, cc, ignore_nonlto=False)
@@ -312,7 +315,6 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         cufunc = self._cufunc_cache.get(device.id, None)
         if cufunc:
             return cufunc
         cubin = self.get_cubin(cc=device.compute_capability)
         module = ctx.create_module_image(
             cubin, self._setup_functions, self._teardown_functions

numba_cuda/numba/cuda/compiler.py CHANGED Viewed

@@ -14,8 +14,6 @@ from numba.core.compiler import (
     sanitize_compile_result_entries,
     CompilerBase,
     DefaultPassBuilder,
-    Flags,
-    Option,
     CompileResult,
 )
 from numba.core.compiler_lock import global_compiler_lock
@@ -37,47 +35,13 @@ from warnings import warn
 from numba.cuda import nvvmutils
 from numba.cuda.api import get_current_device
 from numba.cuda.codegen import ExternalCodeLibrary
-from numba.cuda.cudadrv import nvvm
+from numba.cuda.cudadrv import nvvm, nvrtc
 from numba.cuda.descriptor import cuda_target
+from numba.cuda.flags import CUDAFlags
 from numba.cuda.target import CUDACABICallConv
 from numba.cuda import lowering
-def _nvvm_options_type(x):
-    if x is None:
-        return None
-    else:
-        assert isinstance(x, dict)
-        return x
-def _optional_int_type(x):
-    if x is None:
-        return None
-    else:
-        assert isinstance(x, int)
-        return x
-class CUDAFlags(Flags):
-    nvvm_options = Option(
-        type=_nvvm_options_type,
-        default=None,
-        doc="NVVM options",
-    )
-    compute_capability = Option(
-        type=tuple,
-        default=None,
-        doc="Compute Capability",
-    )
-    max_registers = Option(
-        type=_optional_int_type, default=None, doc="Max registers"
-    )
-    lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
 # The CUDACompileResult (CCR) has a specially-defined entry point equal to its
 # id.  This is because the entry point is used as a key into a dict of
 # overloads by the base dispatcher. The id of the CCR is the only small and
@@ -676,7 +640,7 @@ def compile(
     # If the user has used the config variable to specify a non-default that is
     # greater than the lowest non-deprecated one, then we should default to
     # their specified CC instead of the lowest non-deprecated one.
-    MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC)
+    MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc())
     cc = cc or MIN_CC
     cres = compile_cuda(

numba_cuda/numba/cuda/cuda_paths.py CHANGED Viewed

@@ -132,16 +132,9 @@ def _get_nvvm_wheel():
     return None
-def get_major_cuda_version():
-    # TODO: remove once cuda-python is
-    # a hard dependency
-    from numba.cuda.cudadrv.runtime import get_version
-    return get_version()[0]
 def get_nvrtc_dso_path():
     site_paths = [site.getusersitepackages()] + site.getsitepackages()
     for sp in site_paths:
         lib_dir = os.path.join(
             sp,
@@ -150,23 +143,28 @@ def get_nvrtc_dso_path():
             ("bin" if IS_WIN32 else "lib") if sp else None,
         )
         if lib_dir and os.path.exists(lib_dir):
-            try:
-                major = get_major_cuda_version()
-                if major == 11:
-                    cu_ver = "112" if IS_WIN32 else "11.2"
-                elif major == 12:
-                    cu_ver = "120" if IS_WIN32 else "12"
-                else:
-                    raise NotImplementedError(f"CUDA {major} is not supported")
-                return os.path.join(
+            chosen_path = None
+            # Check for each version of the NVRTC DLL, preferring the most
+            # recent.
+            versions = (
+                "112" if IS_WIN32 else "11.2",
+                "120" if IS_WIN32 else "12",
+                "130" if IS_WIN32 else "13",
+            )
+            for version in versions:
+                dso_path = os.path.join(
                     lib_dir,
-                    f"nvrtc64_{cu_ver}_0.dll"
+                    f"nvrtc64_{version}_0.dll"
                     if IS_WIN32
-                    else f"libnvrtc.so.{cu_ver}",
+                    else f"libnvrtc.so.{version}",
                 )
-            except RuntimeError:
-                continue
+                if os.path.exists(dso_path) and os.path.isfile(dso_path):
+                    chosen_path = dso_path
+            return chosen_path
 def _get_nvrtc_wheel():

numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

numba-cuda 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl