PyPI - numba-cuda - Versions diffs - 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl - Mend

numba-cuda 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.16
1	+ 0.0.18

numba_cuda/numba/cuda/codegen.py CHANGED Viewed

@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
     get_cufunc), which may be of different compute capabilities.
     """
-    def __init__(self, codegen, name, entry_name=None, max_registers=None,
-                 nvvm_options=None):
+    def __init__(
+        self,
+        codegen,
+        name,
+        entry_name=None,
+        max_registers=None,
+        lto=False,
+        nvvm_options=None
+    ):
         """
         codegen:
             Codegen object.
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
             kernel and not a device function.
         max_registers:
             The maximum register usage to aim for when linking.
+        lto:
+            Whether to enable link-time optimization.
         nvvm_options:
                 Dict of options to pass to NVVM.
         """
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         self._cufunc_cache = {}
         self._max_registers = max_registers
+        self._lto = lto
         if nvvm_options is None:
             nvvm_options = {}
         self._nvvm_options = nvvm_options
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         if cubin:
             return cubin
-        linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
+        linker = driver.Linker.new(
+            max_registers=self._max_registers, cc=cc, lto=self._lto
+        )
         if linker.lto:
             ltoir = self.get_ltoir(cc=cc)

numba_cuda/numba/cuda/cudadrv/driver.py CHANGED Viewed

@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
 system to freeze in some cases.
 """
 import sys
 import os
 import ctypes
@@ -19,6 +18,7 @@ import functools
 import warnings
 import logging
 import threading
+import traceback
 import asyncio
 import pathlib
 from itertools import product
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
 from .error import CudaSupportError, CudaDriverError
 from .drvapi import API_PROTOTYPES
 from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
+from .mappings import FILE_EXTENSION_MAP
+from .linkable_code import LinkableCode
 from numba.cuda.cudadrv import enums, drvapi, nvrtc
 USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
 _py_incref.argtypes = [ctypes.py_object]
+def _readenv(name, ctor, default):
+    value = os.environ.get(name)
+    if value is None:
+        return default() if callable(default) else default
+    try:
+        if ctor is bool:
+            return value.lower() in {'1', "true"}
+        return ctor(value)
+    except Exception:
+        warnings.warn(
+            f"Environment variable '{name}' is defined but its associated "
+            f"value '{value}' could not be parsed.\n"
+            "The parse failed with exception:\n"
+            f"{traceback.format_exc()}",
+            RuntimeWarning
+        )
+        return default
+_MVC_ERROR_MESSAGE = (
+    "Minor version compatibility requires ptxcompiler and cubinlinker packages "
+    "to be available"
+)
+ENABLE_PYNVJITLINK = (
+    _readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
+    or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
+)
+if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
+    config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
+if ENABLE_PYNVJITLINK:
+    try:
+        from pynvjitlink.api import NvJitLinker, NvJitLinkError
+    except ImportError:
+        raise ImportError(
+            "Using pynvjitlink requires the pynvjitlink package to be available"
+        )
+    if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
+        raise ValueError(
+            "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
+            "CUDA_ENABLE_PYNVJITLINK at the same time"
+        )
 def make_logger():
     logger = logging.getLogger(__name__)
     # is logging configured?
@@ -432,7 +480,7 @@ class Driver(object):
     def get_version(self):
         """
-        Returns the CUDA Runtime version as a tuple (major, minor).
+        Returns the CUDA Driver version as a tuple (major, minor).
         """
         if USE_NV_BINDING:
             version = driver.cuDriverGetVersion()
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
                               extra)
-if USE_NV_BINDING:
-    jitty = binding.CUjitInputType
-    FILE_EXTENSION_MAP = {
-        'o': jitty.CU_JIT_INPUT_OBJECT,
-        'ptx': jitty.CU_JIT_INPUT_PTX,
-        'a': jitty.CU_JIT_INPUT_LIBRARY,
-        'lib': jitty.CU_JIT_INPUT_LIBRARY,
-        'cubin': jitty.CU_JIT_INPUT_CUBIN,
-        'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
-    }
-else:
-    FILE_EXTENSION_MAP = {
-        'o': enums.CU_JIT_INPUT_OBJECT,
-        'ptx': enums.CU_JIT_INPUT_PTX,
-        'a': enums.CU_JIT_INPUT_LIBRARY,
-        'lib': enums.CU_JIT_INPUT_LIBRARY,
-        'cubin': enums.CU_JIT_INPUT_CUBIN,
-        'fatbin': enums.CU_JIT_INPUT_FATBINARY,
-    }
 class Linker(metaclass=ABCMeta):
     """Abstract base class for linkers"""
     @classmethod
-    def new(cls, max_registers=0, lineinfo=False, cc=None):
-        if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
-            return MVCLinker(max_registers, lineinfo, cc)
-        elif USE_NV_BINDING:
-            return CudaPythonLinker(max_registers, lineinfo, cc)
+    def new(cls,
+            max_registers=0,
+            lineinfo=False,
+            cc=None,
+            lto=None,
+            additional_flags=None
+            ):
+        driver_ver = driver.get_version()
+        if (
+            config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
+            and driver_ver >= (12, 0)
+        ):
+            raise ValueError(
+                "Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
+            )
+        if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
+            raise ValueError(
+                "Enabling pynvjitlink requires CUDA 12."
+            )
+        if config.CUDA_ENABLE_PYNVJITLINK:
+            linker = PyNvJitLinker
+        elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
+            linker = MVCLinker
+        else:
+            if USE_NV_BINDING:
+                linker = CudaPythonLinker
+            else:
+                linker = CtypesLinker
+        if linker is PyNvJitLinker:
+            return linker(max_registers, lineinfo, cc, lto, additional_flags)
+        elif additional_flags or lto:
+            raise ValueError("LTO and additional flags require PyNvJitLinker")
         else:
-            return CtypesLinker(max_registers, lineinfo, cc)
+            return linker(max_registers, lineinfo, cc)
     @abstractmethod
     def __init__(self, max_registers, lineinfo, cc):
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
             cu = f.read()
         self.add_cu(cu, os.path.basename(path))
-    def add_file_guess_ext(self, path):
-        """Add a file to the link, guessing its type from its extension."""
-        ext = os.path.splitext(path)[1][1:]
-        if ext == '':
-            raise RuntimeError("Don't know how to link file with no extension")
-        elif ext == 'cu':
-            self.add_cu_file(path)
+    def add_file_guess_ext(self, path_or_code):
+        """
+        Add a file or LinkableCode object to the link. If a file is
+        passed, the type will be inferred from the extension. A LinkableCode
+        object represents a file already in memory.
+        """
+        if isinstance(path_or_code, str):
+            ext = pathlib.Path(path_or_code).suffix
+            if ext == '':
+                raise RuntimeError(
+                    "Don't know how to link file with no extension"
+                )
+            elif ext == '.cu':
+                self.add_cu_file(path_or_code)
+            else:
+                kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
+                if kind is None:
+                    raise RuntimeError(
+                        "Don't know how to link file with extension "
+                        f"{ext}"
+                    )
+                self.add_file(path_or_code, kind)
+            return
         else:
-            kind = FILE_EXTENSION_MAP.get(ext, None)
-            if kind is None:
-                raise RuntimeError("Don't know how to link file with extension "
-                                   f".{ext}")
-            self.add_file(path, kind)
+            # Otherwise, we should have been given a LinkableCode object
+            if not isinstance(path_or_code, LinkableCode):
+                raise TypeError(
+                    "Expected path to file or a LinkableCode object"
+                )
+            if path_or_code.kind == "cu":
+                self.add_cu(path_or_code.data, path_or_code.name)
+            else:
+                self.add_data(
+                    path_or_code.data, path_or_code.kind, path_or_code.name
+                )
     @abstractmethod
     def complete(self):
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
         """
-_MVC_ERROR_MESSAGE = (
-    "Minor version compatibility requires ptxcompiler and cubinlinker packages "
-    "to be available"
-)
 class MVCLinker(Linker):
     """
     Linker supporting Minor Version Compatibility, backed by the cubinlinker
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
         return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
+class PyNvJitLinker(Linker):
+    def __init__(
+        self,
+        max_registers=None,
+        lineinfo=False,
+        cc=None,
+        lto=False,
+        additional_flags=None,
+    ):
+        if cc is None:
+            raise RuntimeError("PyNvJitLinker requires CC to be specified")
+        if not any(isinstance(cc, t) for t in [list, tuple]):
+            raise TypeError("`cc` must be a list or tuple of length 2")
+        sm_ver = f"{cc[0] * 10 + cc[1]}"
+        arch = f"-arch=sm_{sm_ver}"
+        options = [arch]
+        if max_registers:
+            options.append(f"-maxrregcount={max_registers}")
+        if lineinfo:
+            options.append("-lineinfo")
+        if lto:
+            options.append("-lto")
+        if additional_flags is not None:
+            options.extend(additional_flags)
+        self._linker = NvJitLinker(*options)
+        self.lto = lto
+        self.options = options
+    @property
+    def info_log(self):
+        return self._linker.info_log
+    @property
+    def error_log(self):
+        return self._linker.error_log
+    def add_ptx(self, ptx, name="<cudapy-ptx>"):
+        self._linker.add_ptx(ptx, name)
+    def add_fatbin(self, fatbin, name="<external-fatbin>"):
+        self._linker.add_fatbin(fatbin, name)
+    def add_ltoir(self, ltoir, name="<external-ltoir>"):
+        self._linker.add_ltoir(ltoir, name)
+    def add_object(self, obj, name="<external-object>"):
+        self._linker.add_object(obj, name)
+    def add_file(self, path, kind):
+        try:
+            with open(path, "rb") as f:
+                data = f.read()
+        except FileNotFoundError:
+            raise LinkerError(f"{path} not found")
+        name = pathlib.Path(path).name
+        self.add_data(data, kind, name)
+    def add_data(self, data, kind, name):
+        if kind == FILE_EXTENSION_MAP["cubin"]:
+            fn = self._linker.add_cubin
+        elif kind == FILE_EXTENSION_MAP["fatbin"]:
+            fn = self._linker.add_fatbin
+        elif kind == FILE_EXTENSION_MAP["a"]:
+            fn = self._linker.add_library
+        elif kind == FILE_EXTENSION_MAP["ptx"]:
+            return self.add_ptx(data, name)
+        elif kind == FILE_EXTENSION_MAP["o"]:
+            fn = self._linker.add_object
+        elif kind == FILE_EXTENSION_MAP["ltoir"]:
+            fn = self._linker.add_ltoir
+        else:
+            raise LinkerError(f"Don't know how to link {kind}")
+        try:
+            fn(data, name)
+        except NvJitLinkError as e:
+            raise LinkerError from e
+    def complete(self):
+        try:
+            return self._linker.get_linked_cubin()
+        except NvJitLinkError as e:
+            raise LinkerError from e
 # -----------------------------------------------------------------------------

numba_cuda/numba/cuda/cudadrv/enums.py CHANGED Viewed

@@ -309,6 +309,9 @@ CU_JIT_INPUT_OBJECT = 3
 # Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
 CU_JIT_INPUT_LIBRARY = 4
+# LTO IR
+CU_JIT_INPUT_NVVM = 5
 CU_JIT_NUM_INPUT_TYPES = 6

numba_cuda/numba/cuda/cudadrv/linkable_code.py ADDED Viewed

@@ -0,0 +1,63 @@
+from .mappings import FILE_EXTENSION_MAP
+class LinkableCode:
+    """An object that can be passed in the `link` list argument to `@cuda.jit`
+    kernels to supply code to be linked from memory."""
+    def __init__(self, data, name=None):
+        self.data = data
+        self._name = name
+    @property
+    def name(self):
+        return self._name or self.default_name
+class PTXSource(LinkableCode):
+    """PTX Source code in memory"""
+    kind = FILE_EXTENSION_MAP["ptx"]
+    default_name = "<unnamed-ptx>"
+class CUSource(LinkableCode):
+    """CUDA C/C++ Source code in memory"""
+    kind = "cu"
+    default_name = "<unnamed-cu>"
+class Fatbin(LinkableCode):
+    """A fatbin ELF in memory"""
+    kind = FILE_EXTENSION_MAP["fatbin"]
+    default_name = "<unnamed-fatbin>"
+class Cubin(LinkableCode):
+    """A cubin ELF in memory"""
+    kind = FILE_EXTENSION_MAP["cubin"]
+    default_name = "<unnamed-cubin>"
+class Archive(LinkableCode):
+    """An archive of objects in memory"""
+    kind = FILE_EXTENSION_MAP["a"]
+    default_name = "<unnamed-archive>"
+class Object(LinkableCode):
+    """An object file in memory"""
+    kind = FILE_EXTENSION_MAP["o"]
+    default_name = "<unnamed-object>"
+class LTOIR(LinkableCode):
+    """An LTOIR file in memory"""
+    kind = "ltoir"
+    default_name = "<unnamed-ltoir>"

numba_cuda/numba/cuda/cudadrv/mappings.py ADDED Viewed

@@ -0,0 +1,24 @@
+from numba import config
+from . import enums
+if config.CUDA_USE_NVIDIA_BINDING:
+    from cuda import cuda
+    jitty = cuda.CUjitInputType
+    FILE_EXTENSION_MAP = {
+        'o': jitty.CU_JIT_INPUT_OBJECT,
+        'ptx': jitty.CU_JIT_INPUT_PTX,
+        'a': jitty.CU_JIT_INPUT_LIBRARY,
+        'lib': jitty.CU_JIT_INPUT_LIBRARY,
+        'cubin': jitty.CU_JIT_INPUT_CUBIN,
+        'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
+        'ltoir': jitty.CU_JIT_INPUT_NVVM,
+    }
+else:
+    FILE_EXTENSION_MAP = {
+        'o': enums.CU_JIT_INPUT_OBJECT,
+        'ptx': enums.CU_JIT_INPUT_PTX,
+        'a': enums.CU_JIT_INPUT_LIBRARY,
+        'lib': enums.CU_JIT_INPUT_LIBRARY,
+        'cubin': enums.CU_JIT_INPUT_CUBIN,
+        'fatbin': enums.CU_JIT_INPUT_FATBINARY,
+        'ltoir': enums.CU_JIT_INPUT_NVVM,
+    }

numba_cuda/numba/cuda/device_init.py CHANGED Viewed

@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
                                 shfl_xor_sync)
 from .kernels import reduction
+from numba.cuda.cudadrv.linkable_code import (
+    Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
+)
 reduce = Reduce = reduction.Reduce

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -46,7 +46,7 @@ class _Kernel(serialize.ReduceMixin):
     @global_compiler_lock
     def __init__(self, py_func, argtypes, link=None, debug=False,
                  lineinfo=False, inline=False, fastmath=False, extensions=None,
-                 max_registers=None, opt=True, device=False):
+                 max_registers=None, lto=False, opt=True, device=False):
         if device:
             raise RuntimeError('Cannot compile a device function as a kernel')
@@ -94,7 +94,7 @@ class _Kernel(serialize.ReduceMixin):
         lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
                                                   debug, lineinfo, nvvm_options,
                                                   filename, linenum,
-                                                  max_registers)
+                                                  max_registers, lto)
         if not link:
             link = []

numba_cuda/numba/cuda/intrinsics.py CHANGED Viewed

@@ -4,7 +4,7 @@ from numba import cuda, types
 from numba.core import cgutils
 from numba.core.errors import RequireLiteralValue
 from numba.core.typing import signature
-from numba.core.extending import overload_attribute
+from numba.core.extending import overload_attribute, overload_method
 from numba.cuda import nvvmutils
 from numba.cuda.extending import intrinsic
@@ -196,3 +196,8 @@ def syncthreads_or(typingctx, predicate):
     '''
     fname = 'llvm.nvvm.barrier0.or'
     return _syncthreads_predicate(typingctx, predicate, fname)
+@overload_method(types.Integer, 'bit_count', target='cuda')
+def integer_bit_count(i):
+    return lambda i: cuda.popc(i)

numba_cuda/numba/cuda/printimpl.py CHANGED Viewed

@@ -63,6 +63,17 @@ def dim3_print_impl(ty, context, builder, val):
     return rawfmt, [x, y, z]
+@print_item.register(types.Boolean)
+def bool_print_impl(ty, context, builder, val):
+    true_string = context.insert_string_const_addrspace(builder, "True")
+    false_string = context.insert_string_const_addrspace(builder, "False")
+    res_ptr = cgutils.alloca_once_value(builder, false_string)
+    with builder.if_then(val):
+        builder.store(true_string, res_ptr)
+    rawfmt = "%s"
+    return rawfmt, [builder.load(res_ptr)]
 @lower(print, types.VarArg(types.Any))
 def print_varargs(context, builder, sig, args):
     """This function is a generic 'print' wrapper for arbitrary types.

numba_cuda/numba/cuda/target.py CHANGED Viewed

@@ -148,7 +148,7 @@ class CUDATargetContext(BaseContext):
     def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
                             nvvm_options, filename, linenum,
-                            max_registers=None):
+                            max_registers=None, lto=False):
         """
         Adapt a code library ``codelib`` with the numba compiled CUDA kernel
         with name ``fname`` and arguments ``argtypes`` for NVVM.
@@ -175,7 +175,9 @@ class CUDATargetContext(BaseContext):
         library = self.codegen().create_library(f'{codelib.name}_kernel_',
                                                 entry_name=kernel_name,
                                                 nvvm_options=nvvm_options,
-                                                max_registers=max_registers)
+                                                max_registers=max_registers,
+                                                lto=lto
+                                                )
         library.add_linking_library(codelib)
         wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
                                                debug, lineinfo, filename,

numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py ADDED Viewed

@@ -0,0 +1,199 @@
+from numba.cuda.testing import unittest
+from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.testing import CUDATestCase
+from numba.cuda.cudadrv.driver import PyNvJitLinker
+import itertools
+import os
+from numba.cuda import get_current_device
+from numba import cuda
+from numba import config
+TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
+if TEST_BIN_DIR:
+    test_device_functions_a = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.a"
+    )
+    test_device_functions_cubin = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.cubin"
+    )
+    test_device_functions_cu = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.cu"
+    )
+    test_device_functions_fatbin = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.fatbin"
+    )
+    test_device_functions_o = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.o"
+    )
+    test_device_functions_ptx = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.ptx"
+    )
+    test_device_functions_ltoir = os.path.join(
+        TEST_BIN_DIR, "test_device_functions.ltoir"
+    )
+@unittest.skipIf(
+    not config.CUDA_ENABLE_PYNVJITLINK or not TEST_BIN_DIR,
+    "pynvjitlink not enabled"
+)
+@skip_on_cudasim("Linking unsupported in the simulator")
+class TestLinker(CUDATestCase):
+    _NUMBA_NVIDIA_BINDING_0_ENV = {"NUMBA_CUDA_USE_NVIDIA_BINDING": "0"}
+    def test_nvjitlink_create(self):
+        patched_linker = PyNvJitLinker(cc=(7, 5))
+        assert "-arch=sm_75" in patched_linker.options
+    def test_nvjitlink_create_no_cc_error(self):
+        # nvJitLink expects at least the architecture to be specified.
+        with self.assertRaisesRegex(
+            RuntimeError, "PyNvJitLinker requires CC to be specified"
+        ):
+            PyNvJitLinker()
+    def test_nvjitlink_invalid_arch_error(self):
+        from pynvjitlink.api import NvJitLinkError
+        # CC 0.0 is not a valid compute capability
+        with self.assertRaisesRegex(
+            NvJitLinkError, "NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"
+        ):
+            PyNvJitLinker(cc=(0, 0))
+    def test_nvjitlink_invalid_cc_type_error(self):
+        with self.assertRaisesRegex(
+            TypeError, "`cc` must be a list or tuple of length 2"
+        ):
+            PyNvJitLinker(cc=0)
+    def test_nvjitlink_ptx_compile_options(self):
+        max_registers = (None, 32)
+        lineinfo = (False, True)
+        lto = (False, True)
+        additional_flags = (None, ("-g",), ("-g", "-time"))
+        for (
+            max_registers_i,
+            line_info_i,
+            lto_i,
+            additional_flags_i,
+        ) in itertools.product(max_registers, lineinfo, lto, additional_flags):
+            with self.subTest(
+                max_registers=max_registers_i,
+                lineinfo=line_info_i,
+                lto=lto_i,
+                additional_flags=additional_flags_i,
+            ):
+                patched_linker = PyNvJitLinker(
+                    cc=(7, 5),
+                    max_registers=max_registers_i,
+                    lineinfo=line_info_i,
+                    lto=lto_i,
+                    additional_flags=additional_flags_i,
+                )
+                assert "-arch=sm_75" in patched_linker.options
+                if max_registers_i:
+                    assert (
+                        f"-maxrregcount={max_registers_i}"
+                        in patched_linker.options
+                    )
+                else:
+                    assert "-maxrregcount" not in patched_linker.options
+                if line_info_i:
+                    assert "-lineinfo" in patched_linker.options
+                else:
+                    assert "-lineinfo" not in patched_linker.options
+                if lto_i:
+                    assert "-lto" in patched_linker.options
+                else:
+                    assert "-lto" not in patched_linker.options
+                if additional_flags_i:
+                    for flag in additional_flags_i:
+                        assert flag in patched_linker.options
+    def test_nvjitlink_add_file_guess_ext_linkable_code(self):
+        files = (
+            test_device_functions_a,
+            test_device_functions_cubin,
+            test_device_functions_cu,
+            test_device_functions_fatbin,
+            test_device_functions_o,
+            test_device_functions_ptx,
+        )
+        for file in files:
+            with self.subTest(file=file):
+                patched_linker = PyNvJitLinker(
+                    cc=get_current_device().compute_capability
+                )
+                patched_linker.add_file_guess_ext(file)
+    def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
+        with open(test_device_functions_cubin, "rb") as f:
+            content = f.read()
+        patched_linker = PyNvJitLinker(
+            cc=get_current_device().compute_capability
+        )
+        with self.assertRaisesRegex(
+            TypeError, "Expected path to file or a LinkableCode"
+        ):
+            # Feeding raw data as bytes to add_file_guess_ext should raise,
+            # because there's no way to know what kind of file to treat it as
+            patched_linker.add_file_guess_ext(content)
+    def test_nvjitlink_jit_with_linkable_code(self):
+        files = (
+            test_device_functions_a,
+            test_device_functions_cubin,
+            test_device_functions_cu,
+            test_device_functions_fatbin,
+            test_device_functions_o,
+            test_device_functions_ptx,
+        )
+        for file in files:
+            with self.subTest(file=file):
+                sig = "uint32(uint32, uint32)"
+                add_from_numba = cuda.declare_device("add_from_numba", sig)
+                @cuda.jit(link=[file])
+                def kernel(result):
+                    result[0] = add_from_numba(1, 2)
+                result = cuda.device_array(1)
+                kernel[1, 1](result)
+                assert result[0] == 3
+    def test_nvjitlink_jit_with_linkable_code_lto(self):
+        file = test_device_functions_ltoir
+        sig = "uint32(uint32, uint32)"
+        add_from_numba = cuda.declare_device("add_from_numba", sig)
+        @cuda.jit(link=[file], lto=True)
+        def kernel(result):
+            result[0] = add_from_numba(1, 2)
+        result = cuda.device_array(1)
+        kernel[1, 1](result)
+        assert result[0] == 3
+    def test_nvjitlink_jit_with_invalid_linkable_code(self):
+        with open(test_device_functions_cubin, "rb") as f:
+            content = f.read()
+        with self.assertRaisesRegex(
+            TypeError, "Expected path to file or a LinkableCode"
+        ):
+            @cuda.jit("void()", link=[content])
+            def kernel():
+                pass
+if __name__ == "__main__":
+    unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py CHANGED Viewed

@@ -68,6 +68,10 @@ def simple_popc(ary, c):
     ary[0] = cuda.popc(c)
+def simple_bit_count(ary, c):
+    ary[0] = c.bit_count()
 def simple_fma(ary, a, b, c):
     ary[0] = cuda.fma(a, b, c)
@@ -550,17 +554,53 @@ class TestCudaIntrinsic(CUDATestCase):
         self.assertTrue(np.all(arr))
+    def test_popc_u1(self):
+        compiled = cuda.jit("void(int32[:], uint8)")(simple_popc)
+        ary = np.zeros(1, dtype=np.int8)
+        compiled[1, 1](ary, np.uint8(0xFF))
+        self.assertEqual(ary[0], 8)
+    def test_popc_u2(self):
+        compiled = cuda.jit("void(int32[:], uint16)")(simple_popc)
+        ary = np.zeros(1, dtype=np.int16)
+        compiled[1, 1](ary, np.uint16(0xFFFF))
+        self.assertEqual(ary[0], 16)
     def test_popc_u4(self):
         compiled = cuda.jit("void(int32[:], uint32)")(simple_popc)
         ary = np.zeros(1, dtype=np.int32)
-        compiled[1, 1](ary, 0xF0)
-        self.assertEqual(ary[0], 4)
+        compiled[1, 1](ary, np.uint32(0xFFFFFFFF))
+        self.assertEqual(ary[0], 32)
     def test_popc_u8(self):
         compiled = cuda.jit("void(int32[:], uint64)")(simple_popc)
         ary = np.zeros(1, dtype=np.int32)
-        compiled[1, 1](ary, 0xF00000000000)
-        self.assertEqual(ary[0], 4)
+        compiled[1, 1](ary, np.uint64(0xFFFFFFFFFFFFFFFF))
+        self.assertEqual(ary[0], 64)
+    def test_bit_count_u1(self):
+        compiled = cuda.jit("void(int32[:], uint8)")(simple_bit_count)
+        ary = np.zeros(1, dtype=np.int8)
+        compiled[1, 1](ary, np.uint8(0xFF))
+        self.assertEqual(ary[0], 8)
+    def test_bit_count_u2(self):
+        compiled = cuda.jit("void(int32[:], uint16)")(simple_bit_count)
+        ary = np.zeros(1, dtype=np.int16)
+        compiled[1, 1](ary, np.uint16(0xFFFF))
+        self.assertEqual(ary[0], 16)
+    def test_bit_count_u4(self):
+        compiled = cuda.jit("void(int32[:], uint32)")(simple_bit_count)
+        ary = np.zeros(1, dtype=np.int32)
+        compiled[1, 1](ary, np.uint32(0xFFFFFFFF))
+        self.assertEqual(ary[0], 32)
+    def test_bit_count_u8(self):
+        compiled = cuda.jit("void(int32[:], uint64)")(simple_bit_count)
+        ary = np.zeros(1, dtype=np.int32)
+        compiled[1, 1](ary, np.uint64(0xFFFFFFFFFFFFFFFF))
+        self.assertEqual(ary[0], 64)
     def test_fma_f4(self):
         compiled = cuda.jit("void(f4[:], f4, f4, f4)")(simple_fma)

numba_cuda/numba/cuda/tests/cudapy/test_print.py CHANGED Viewed

@@ -32,6 +32,21 @@ cuda.synchronize()
 """
+printbool_usecase = """\
+from numba import cuda
+@cuda.jit
+def printbool(x):
+    print(True)
+    print(False)
+    print(x == 0)
+printbool[1, 1](0)
+printbool[1, 1](1)
+cuda.synchronize()
+"""
 printstring_usecase = """\
 from numba import cuda
@@ -109,6 +124,11 @@ class TestPrint(CUDATestCase):
         expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
         self.assertIn(output.strip(), expected_cases)
+    def test_bool(self):
+        output, _ = self.run_code(printbool_usecase)
+        expected = "True\nFalse\nTrue\nTrue\nFalse\nFalse"
+        self.assertEqual(output.strip(), expected)
     def test_printempty(self):
         output, _ = self.run_code(printempty_usecase)
         self.assertEqual(output.strip(), "")

numba_cuda/numba/cuda/tests/test_binary_generation/Makefile ADDED Viewed

@@ -0,0 +1,51 @@
+# Generates the input files used by the pynvjitlink binding test suite
+# Test binaries are built taking into account the CC of the GPU in the test machine
+GPU_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv | grep -v compute_cap | head -n 1 | sed 's/\.//')
+GPU_CC ?= 75
+# Use CC 7.0 as an alternative in fatbin testing, unless CC is 7.x
+ifeq ($(shell echo "$(GPU_CC)" | cut -c1),7)
+    ALT_CC := 80
+else
+    ALT_CC := 70
+endif
+# Gencode flags suitable for most tests
+GENCODE := -gencode arch=compute_$(GPU_CC),code=sm_$(GPU_CC)
+# Fatbin tests need to generate code for an additional compute capability
+FATBIN_GENCODE := $(GENCODE) -gencode arch=compute_$(ALT_CC),code=sm_$(ALT_CC)
+# LTO-IR tests need to generate for the LTO "architecture" instead
+LTOIR_GENCODE := -gencode arch=lto_$(GPU_CC),code=lto_$(GPU_CC)
+# Compile with optimization; use relocatable device code to preserve device
+# functions in the final output
+NVCC_FLAGS := -O3 -rdc true
+# Flags specific to output type
+CUBIN_FLAGS := $(GENCODE) --cubin
+PTX_FLAGS := $(GENCODE) -ptx
+OBJECT_FLAGS := $(GENCODE) -dc
+LIBRARY_FLAGS := $(GENCODE) -lib
+FATBIN_FLAGS := $(FATBIN_GENCODE) --fatbin
+LTOIR_FLAGS := $(LTOIR_GENCODE) -dc
+OUTPUT_DIR := ./
+all:
+	@echo "GPU CC: $(GPU_CC)"
+	@echo "Alternative CC: $(ALT_CC)"
+	# Compile all test objects
+	nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
+	nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
+	nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
+	nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
+	nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
+	nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu
+	# Generate LTO-IR wrapped in a fatbin
+	nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir.o test_device_functions.cu
+	# Generate LTO-IR in a "raw" LTO-IR container
+	python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu

numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py ADDED Viewed

@@ -0,0 +1,163 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import argparse
+import pathlib
+import subprocess
+import sys
+from cuda import nvrtc
+# Magic number found at the start of an LTO-IR file
+LTOIR_MAGIC = 0x7F4E43ED
+def check(args):
+    """
+    Abort and print an error message in the presence of an error result.
+    Otherwise:
+    - Return None if there were no more arguments,
+    - Return the singular argument if there was only one further argument,
+    - Return the tuple of arguments if multiple followed.
+    """
+    result, *args = args
+    value = result.value
+    if value:
+        error_string = check(nvrtc.nvrtcGetErrorString(result)).decode()
+        msg = f"NVRTC error, code {value}: {error_string}"
+        print(msg, file=sys.stderr)
+        sys.exit(1)
+    if len(args) == 0:
+        return None
+    elif len(args) == 1:
+        return args[0]
+    else:
+        return args
+def determine_include_flags():
+    # Inspired by the logic in FindCUDAToolkit.cmake. We need the CUDA include
+    # paths because NVRTC doesn't add them by default, and we can compile a
+    # much broader set of test files if the CUDA includes are available.
+    # We invoke NVCC in verbose mode ("-v") and give a dummy filename, without
+    # which it won't produce output.
+    cmd = ["nvcc", "-v", "__dummy"]
+    cp = subprocess.run(cmd, capture_output=True)
+    # Since the dummy file doesn't actually exist, NVCC is expected to exit
+    # with an error code of 1.
+    rc = cp.returncode
+    if rc != 1:
+        print(f"Unexpected return code ({rc}) from `nvcc -v`. Expected 1.")
+        return None
+    output = cp.stderr.decode()
+    lines = output.splitlines()
+    includes_lines = [line for line in lines if line.startswith("#$ INCLUDES=")]
+    if len(includes_lines) != 1:
+        print(f"Expected exactly one INCLUDES line. Got {len(includes_lines)}.")
+        return None
+    # Parse out the arguments following "INCLUDES=" - these are a space
+    # separated list of strings that are potentially quoted.
+    quoted_flags = includes_lines[0].split("INCLUDES=")[1].strip().split()
+    include_flags = [flag.strip('"') for flag in quoted_flags]
+    print(f"Using CUDA include flags: {include_flags}")
+    return include_flags
+def get_ltoir(source, name, arch):
+    """Given a CUDA C/C++ source, compile it and return the LTO-IR."""
+    program = check(
+        nvrtc.nvrtcCreateProgram(source.encode(), name.encode(), 0, [], [])
+    )
+    cuda_include_flags = determine_include_flags()
+    if cuda_include_flags is None:
+        print("Error determining CUDA include flags. Exiting.", file=sys.stderr)
+        sys.exit(1)
+    options = [
+        f"--gpu-architecture={arch}",
+        "-dlto",
+        "-rdc",
+        "true",
+        *cuda_include_flags,
+    ]
+    options = [o.encode() for o in options]
+    result = nvrtc.nvrtcCompileProgram(program, len(options), options)
+    # Report compilation errors back to the user
+    if result[0] == nvrtc.nvrtcResult.NVRTC_ERROR_COMPILATION:
+        log_size = check(nvrtc.nvrtcGetProgramLogSize(program))
+        log = b" " * log_size
+        check(nvrtc.nvrtcGetProgramLog(program, log))
+        print("NVRTC compilation error:\n", file=sys.stderr)
+        print(log.decode(), file=sys.stderr)
+        sys.exit(1)
+    # Handle other errors in the standard way
+    check(result)
+    ltoir_size = check(nvrtc.nvrtcGetLTOIRSize(program))
+    ltoir = b" " * ltoir_size
+    check(nvrtc.nvrtcGetLTOIR(program, ltoir))
+    # Check that the output looks like an LTO-IR container
+    header = int.from_bytes(ltoir[:4], byteorder="little")
+    if header != LTOIR_MAGIC:
+        print(
+            f"Unexpected header value 0x{header:X}.\n"
+            f"Expected LTO-IR magic number 0x{LTOIR_MAGIC:X}."
+            "\nExiting.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    return ltoir
+def main(sourcepath, outputpath, arch):
+    with open(sourcepath) as f:
+        source = f.read()
+    name = pathlib.Path(sourcepath).name
+    ltoir = get_ltoir(source, name, arch)
+    print(f"Writing {outputpath}...")
+    with open(outputpath, "wb") as f:
+        f.write(ltoir)
+if __name__ == "__main__":
+    description = "Compiles CUDA C/C++ to LTO-IR using NVRTC."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("sourcepath", help="path to source file")
+    parser.add_argument(
+        "-o", "--output", help="path to output file", default=None
+    )
+    parser.add_argument(
+        "-a",
+        "--arch",
+        help="compute arch to target (e.g. sm_87). " "Defaults to sm_50.",
+        default="sm_50",
+    )
+    args = parser.parse_args()
+    outputpath = args.output
+    if outputpath is None:
+        outputpath = pathlib.Path(args.sourcepath).with_suffix(".ltoir")
+    main(args.sourcepath, outputpath, args.arch)

numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu ADDED Viewed

@@ -0,0 +1,19 @@
+#include <cuda_fp16.h>
+extern __device__ bool __heq(__half arg1, __half arg2);
+__device__ __half test_add_fp16(__half arg1, __half arg2) {
+  return __hadd(arg1, arg2);
+}
+__device__ bool test_cmp_fp16(__half arg1, __half arg2) {
+  return __heq(arg1, arg2);
+}
+typedef unsigned int uint32_t;
+extern "C" __device__ int add_from_numba(uint32_t *result, uint32_t a,
+                                         uint32_t b) {
+  *result = a + b;
+  return 0;
+}

numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu ADDED Viewed

@@ -0,0 +1,3 @@
+extern __device__ float undef(float a, float b);
+__global__ void f(float *r, float *a, float *b) { r[0] = undef(a[0], b[0]); }

{numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: numba-cuda
-Version: 0.0.16
+Version: 0.0.18
 Summary: CUDA target for Numba
 Author: Anaconda Inc., NVIDIA Corporation
 License: BSD 2-clause

{numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
 _numba_cuda_redirector.py,sha256=rc56rnb40w3AtrqnhS66JSgYTSTsi3iTn8yP3NuoQV8,2401
-numba_cuda/VERSION,sha256=MrKpp1z4ZK4wXVG-XDLWh_uokdSUmX_-o7BTj-ugar4,7
+numba_cuda/VERSION,sha256=9p4BNLUELS6P4gQF_geoXDc4ldjt9TTmnJlhGbwWsO0,7
 numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
 numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
 numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -8,7 +8,7 @@ numba_cuda/numba/cuda/api.py,sha256=shLu7NEZHRMcaZAMEXSoyA5Gi5m0tm6ZRymxKLEKCSg,
 numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O2EfQ,861
 numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
 numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
-numba_cuda/numba/cuda/codegen.py,sha256=raBoCDNt_qkDgB12yU0tbJQlA5_eTlUMemgcRHen1Vk,12174
+numba_cuda/numba/cuda/codegen.py,sha256=9LnTlei-4JK7iq3Rg-H2Y19Oh_u5ZXMC_CPfattANjw,12358
 numba_cuda/numba/cuda/compiler.py,sha256=47SjuI5p4yWCujAglIq0Cb0ARO8QxRp4fOZropkNMtQ,16001
 numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
 numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
@@ -19,14 +19,14 @@ numba_cuda/numba/cuda/cudaimpl.py,sha256=3YMxQSCv2KClBrpuXGchrTNICV1F6NIjjL2rie5
 numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
 numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZpwJocM,7823
 numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
-numba_cuda/numba/cuda/device_init.py,sha256=orQK7anhnmEkYPRjHEs5I9uhdBwaHeXbaSD4ViX2_14,3460
+numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
 numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
-numba_cuda/numba/cuda/dispatcher.py,sha256=glLglJw4D03ZAK1B0N1K93M93yHfn7ZZZm7gLeue6Jk,40190
+numba_cuda/numba/cuda/dispatcher.py,sha256=CwFksBBcjNg9dLSTgC4GgqOy2sLeZYX8mvZvdzscGBw,40206
 numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
 numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
 numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
 numba_cuda/numba/cuda/intrinsic_wrapper.py,sha256=zbcUbegbfF3GdnC2Rl-z26-gozE8xBtaMxpS8LpOhfo,2239
-numba_cuda/numba/cuda/intrinsics.py,sha256=PazoJEYpomsMRZsnXGJWDbCwUM9eJKV16if_AEAz-HY,5961
+numba_cuda/numba/cuda/intrinsics.py,sha256=k0mQYAt0FTlJeghE5V8lSBtO4fgKH1jSRRLwHHcH4M0,6100
 numba_cuda/numba/cuda/libdevice.py,sha256=476LeIEaAth409m-0OO1SMMmY5AHzN2AotXI__k_yYE,60065
 numba_cuda/numba/cuda/libdevicedecl.py,sha256=xdZbb_rCaftMf8Pbw63g_Lr230N-1QoaYzBxq8udKTg,532
 numba_cuda/numba/cuda/libdevicefuncs.py,sha256=c80lGpGoFIYkAdgr4fzbxzdNCyJYrLdss64bwa0Mc6w,37471
@@ -34,11 +34,11 @@ numba_cuda/numba/cuda/libdeviceimpl.py,sha256=a9BmJ5kRtZ_mB7KjbDWW-PEpRuNiO_SMOx
 numba_cuda/numba/cuda/mathimpl.py,sha256=d_gCoQ4hJzNBFNc2hvRON5h1F052epgQ8zh_RKTlLlI,14416
 numba_cuda/numba/cuda/models.py,sha256=2c_seT-cWX-VyWYmcapaqOEl1M4FX6_kdIOusj4s5aE,1328
 numba_cuda/numba/cuda/nvvmutils.py,sha256=W1zr1TpnmFjTkHF0qeu5wnBHub6gzrnpzsvgmu2OLcU,8295
-numba_cuda/numba/cuda/printimpl.py,sha256=gyXZ3q0O4yECY3zmv83wIJBSCwVlXBUmRAMRDp7wqlI,3071
+numba_cuda/numba/cuda/printimpl.py,sha256=Y1BCQ7EgO2wQ7O6LibNVYBG3tmjVTvmURATW403rLao,3504
 numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWzrg,10456
 numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
 numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
-numba_cuda/numba/cuda/target.py,sha256=EI6XuKQeqvng0uSx_V9jDoxbgFivqSz-4jczFzAbs5o,16837
+numba_cuda/numba/cuda/target.py,sha256=LUOJRvGrX7Ch3-vLbZcjti21RAwUctdodVVcl82wYJ0,16954
 numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
 numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
 numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
@@ -47,12 +47,14 @@ numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuR
 numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
 numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=B3ItYQywTnwTWjltxVRx6oaKRq7rxTtvOaiqTWsMQ2w,31123
 numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
-numba_cuda/numba/cuda/cudadrv/driver.py,sha256=MfNwvOpCzjW1ctL_VZZZgBDIQhH8h0PfN3Vx54JrlJ8,105700
+numba_cuda/numba/cuda/cudadrv/driver.py,sha256=uPjKugdtSJfIwVSAo3KgkvQhctbABkQphHAfcq6Q7ec,110892
 numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
 numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
-numba_cuda/numba/cuda/cudadrv/enums.py,sha256=E0lnh17jO4EvZ_hSIq3ZtfsE5bObmINtKb_lbK7rmMg,23708
+numba_cuda/numba/cuda/cudadrv/enums.py,sha256=37zZmyrLvT-7R8wWtwKJkQhN8siLMxsDGiA3_NQ-yx8,23740
 numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
 numba_cuda/numba/cuda/cudadrv/libs.py,sha256=PRyxal4bz9jVZmuLpKiYw-VaR59LekfwJgWKo7R5uRY,6005
+numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
+numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
 numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
 numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=CLpuD9VzPcYoXj8dZ2meSoqbWXHOOC5V5D6dFNdXqmg,9693
 numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
@@ -100,6 +102,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py,sha256=0KPe4E9wOZsSV_0QI0Lmj
 numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=_l2_EQEko2Jet5ooj4XMT0L4BjOuqLjbONGj1_MVI50,10161
 numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=kYXYMkx_3GPAITKp4reLeM8KSzKkpxiC8nxnBvXpaTA,4979
 numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=984jATSa01SRoSrVqxPeO6ujJ7w2jsnZa39ABInFLVI,1529
+numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=m5zv6K6PHLnm-AqHKo5x9f_ZBrn3rmvPX_ZGjjrkPfI,6807
 numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=DF7KV5uh-yMztks0f47NhpalV64dvsNy-f8HY6GhAhE,7373
 numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=u_TthSS2N-2J4eBIuF4PGg33AjD-wxly7MKpz0vRAKc,944
 numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=MQWZx1j3lbEpWmIpQ1bV9szrGOV3VHN0QrEnJRjAhW4,508
@@ -151,7 +154,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py,sha256=Uhe8Q0u42jySrpwA
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=luDtBxFS_5ZbVemXe1Z7gfqMliaU_EAOR4SuLsU5rhw,2677
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=HLJ_f2lX8m_NNJjUbl_8zZ0-8GsBlRdBP2CUo_yWb0Y,1056
 numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=lP9-8SbWFn2Xc-qmF6UNhcY6LreKTnveaK5CGW2pu8E,5196
-numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=e6lABWy8YBgYheYYGfD75_y8vMbPP71GHb95A4hlLmA,34931
+numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=M6-pad8nVM0fuL18uFxvE6tmHw0spLNhnMBLVlO0FKU,36400
 numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=fggyy-kmsOkCb906_q3kXPGRziccWu7Co7ir83zBMwM,10536
 numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=daQW3kSkp7icCmlTn9pCvnaauz60k_eBf4x1UQF-XVY,2344
 numba_cuda/numba/cuda/tests/cudapy/test_lang.py,sha256=U1BCVZMjU1AZ4wDSmjsRIPPcAReiq4dB77Cz7GmrdmA,1691
@@ -172,7 +175,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=0nJej4D898_JU-jhlif44
 numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=SvqRsSFgcGxkFDZS-kul5B-mi8GxINTS98uUzAy4dhw,2647
 numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=u4yUDVFcV9E3NWMlNjM81e3IW4KaIkcDtXig8JYevsw,8538
 numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=TI82rYRnkSnwv9VN6PMpBnr9JqMJ_F3HhH4cKY6O8tw,3276
-numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=-hYmtwvVUjk6raNGHSP_qHAqVK7xbip8eCbYo1AwQU0,4070
+numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=QXhhhnEz1d5BlldLINQVnmuHeM_dT3aLvfGS7jm24nE,4451
 numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py,sha256=R88Vfgg3mSAZ0Jy6WT6dJNmkFTsxnVnEmO7XqpqyxuU,986
 numba_cuda/numba/cuda/tests/cudapy/test_random.py,sha256=rLw7_8a7BBhD_8GNqMal0l_AbWXzLs_Q0hC6_X8gdjA,3467
 numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py,sha256=grR64kdRlsLcR0K3IxSfI2VKsTrrqxsXuROOpvj-6nw,18769
@@ -224,8 +227,12 @@ numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py,sha256=o4DYocyHK7
 numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHFYAJYgpdStXkTcpK4_fxo,1641
 numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
 numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
-numba_cuda-0.0.16.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
-numba_cuda-0.0.16.dist-info/METADATA,sha256=n01sYKdJ2lX4fsQ8MDAixZnIl6D69fQFkUboKBvC5OY,1393
-numba_cuda-0.0.16.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-numba_cuda-0.0.16.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
-numba_cuda-0.0.16.dist-info/RECORD,,
+numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=OFC_6irwscCNGAyJJKq7fTchzWosCUuiVWU02m0bcUQ,2248
+numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=OqqmFhDk3c0Edt4AvAGm0MQRCXb9jLSO2wpQ72oiXXI,4838
+numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
+numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
+numba_cuda-0.0.18.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
+numba_cuda-0.0.18.dist-info/METADATA,sha256=kJletXn1FHyLocorf4n5QLO1TH0v6G_8uNkbqBAwiWY,1393
+numba_cuda-0.0.18.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+numba_cuda-0.0.18.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
+numba_cuda-0.0.18.dist-info/RECORD,,

{numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/LICENSE RENAMED Viewed

File without changes

{numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/top_level.txt RENAMED Viewed

File without changes

numba-cuda 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

numba-cuda 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl