PyPI - numba-cuda - Versions diffs - 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

numba-cuda 0.13.0py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of numba-cuda might be problematic. Click here for more details.

Files changed (22) hide show

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.13.0
1	+ 0.14.0

numba_cuda/numba/cuda/compiler.py CHANGED Viewed

@@ -575,6 +575,7 @@ def compile(
     abi_info=None,
     output="ptx",
     forceinline=False,
+    launch_bounds=None,
 ):
     """Compile a Python function to PTX or LTO-IR for a given set of argument
     types.
@@ -620,6 +621,16 @@ def compile(
                         ``alwaysinline`` function attribute to the function
                         definition. This is only valid when the output is
                         ``"ltoir"``.
+    :param launch_bounds: Kernel launch bounds, specified as a scalar or a tuple
+                          of between one and three items. Tuple items provide:
+                          - The maximum number of threads per block,
+                          - The minimum number of blocks per SM,
+                          - The maximum number of blocks per cluster.
+                          If a scalar is provided, it is used as the maximum
+                          number of threads per block.
+    :type launch_bounds: int | tuple[int]
     :return: (code, resty): The compiled code and inferred return type
     :rtype: tuple
     """
@@ -662,7 +673,12 @@ def compile(
     args, return_type = sigutils.normalize_signature(sig)
-    cc = cc or config.CUDA_DEFAULT_PTX_CC
+    # If the user has used the config variable to specify a non-default that is
+    # greater than the lowest non-deprecated one, then we should default to
+    # their specified CC instead of the lowest non-deprecated one.
+    MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC)
+    cc = cc or MIN_CC
     cres = compile_cuda(
         pyfunc,
         return_type,
@@ -693,6 +709,7 @@ def compile(
         kernel = lib.get_function(cres.fndesc.llvm_func_name)
         lib._entry_name = cres.fndesc.llvm_func_name
         kernel_fixup(kernel, debug)
+        nvvm.set_launch_bounds(kernel, launch_bounds)
     if lto:
         code = lib.get_ltoir(cc=cc)
@@ -713,6 +730,7 @@ def compile_for_current_device(
     abi_info=None,
     output="ptx",
     forceinline=False,
+    launch_bounds=None,
 ):
     """Compile a Python function to PTX or LTO-IR for a given signature for the
     current device's compute capabilility. This calls :func:`compile` with an
@@ -731,6 +749,7 @@ def compile_for_current_device(
         abi_info=abi_info,
         output=output,
         forceinline=forceinline,
+        launch_bounds=launch_bounds,
     )
@@ -746,6 +765,7 @@ def compile_ptx(
     abi="numba",
     abi_info=None,
     forceinline=False,
+    launch_bounds=None,
 ):
     """Compile a Python function to PTX for a given signature. See
     :func:`compile`. The defaults for this function are to compile a kernel
@@ -764,6 +784,7 @@ def compile_ptx(
         abi_info=abi_info,
         output="ptx",
         forceinline=forceinline,
+        launch_bounds=launch_bounds,
     )
@@ -778,6 +799,7 @@ def compile_ptx_for_current_device(
     abi="numba",
     abi_info=None,
     forceinline=False,
+    launch_bounds=None,
 ):
     """Compile a Python function to PTX for a given signature for the current
     device's compute capabilility. See :func:`compile_ptx`."""
@@ -794,6 +816,7 @@ def compile_ptx_for_current_device(
         abi=abi,
         abi_info=abi_info,
         forceinline=forceinline,
+        launch_bounds=launch_bounds,
     )

numba_cuda/numba/cuda/cudadrv/driver.py CHANGED Viewed

@@ -82,9 +82,21 @@ _MVC_ERROR_MESSAGE = (
     "to be available"
 )
-ENABLE_PYNVJITLINK = _readenv(
-    "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False
-) or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
+# Enable pynvjitlink if the environment variables NUMBA_CUDA_ENABLE_PYNVJITLINK
+# or CUDA_ENABLE_PYNVJITLINK are set, or if the pynvjitlink module is found. If
+# explicitly disabled, do not use pynvjitlink, even if present in the env.
+_pynvjitlink_enabled_in_env = _readenv(
+    "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
+)
+_pynvjitlink_enabled_in_cfg = getattr(config, "CUDA_ENABLE_PYNVJITLINK", None)
+if _pynvjitlink_enabled_in_env is not None:
+    ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
+elif _pynvjitlink_enabled_in_cfg is not None:
+    ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_cfg
+else:
+    ENABLE_PYNVJITLINK = importlib.util.find_spec("pynvjitlink") is not None
 if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
     config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK

numba_cuda/numba/cuda/cudadrv/nvvm.py CHANGED Viewed

@@ -369,48 +369,101 @@ COMPUTE_CAPABILITIES = (
     (9, 0),
     (10, 0),
     (10, 1),
+    (10, 3),
     (12, 0),
+    (12, 1),
 )
-# Maps CTK version -> (min supported cc, max supported cc) inclusive
+# Maps CTK version -> (min supported cc, max supported cc) ranges, bounds inclusive
 _CUDA_CC_MIN_MAX_SUPPORT = {
-    (11, 1): ((3, 5), (8, 0)),
-    (11, 2): ((3, 5), (8, 6)),
-    (11, 3): ((3, 5), (8, 6)),
-    (11, 4): ((3, 5), (8, 7)),
-    (11, 5): ((3, 5), (8, 7)),
-    (11, 6): ((3, 5), (8, 7)),
-    (11, 7): ((3, 5), (8, 7)),
-    (11, 8): ((3, 5), (9, 0)),
-    (12, 0): ((5, 0), (9, 0)),
-    (12, 1): ((5, 0), (9, 0)),
-    (12, 2): ((5, 0), (9, 0)),
-    (12, 3): ((5, 0), (9, 0)),
-    (12, 4): ((5, 0), (9, 0)),
-    (12, 5): ((5, 0), (9, 0)),
-    (12, 6): ((5, 0), (9, 0)),
-    (12, 8): ((5, 0), (12, 0)),
+    (11, 2): [
+        ((3, 5), (8, 6)),
+    ],
+    (11, 3): [
+        ((3, 5), (8, 6)),
+    ],
+    (11, 4): [
+        ((3, 5), (8, 7)),
+    ],
+    (11, 5): [
+        ((3, 5), (8, 7)),
+    ],
+    (11, 6): [
+        ((3, 5), (8, 7)),
+    ],
+    (11, 7): [
+        ((3, 5), (8, 7)),
+    ],
+    (11, 8): [
+        ((3, 5), (9, 0)),
+    ],
+    (12, 0): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 1): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 2): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 3): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 4): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 5): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 6): [
+        ((5, 0), (9, 0)),
+    ],
+    (12, 8): [
+        ((5, 0), (10, 1)),
+        ((12, 0), (12, 0)),
+    ],
+    (12, 9): [
+        ((5, 0), (12, 1)),
+    ],
 }
+# From CUDA 12.9 Release notes, Section 1.5.4, "Deprecated Architectures"
+# https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#deprecated-architectures
+#
+#   "Maxwell, Pascal, and Volta architectures are now feature-complete with no
+#   further enhancements planned. While CUDA Toolkit 12.x series will continue
+#   to support building applications for these architectures, offline
+#   compilation and library support will be removed in the next major CUDA
+#   Toolkit version release. Users should plan migration to newer
+#   architectures, as future toolkits will be unable to target Maxwell, Pascal,
+#   and Volta GPUs."
+#
+# In order to maintain compatibility with future toolkits, we use Turing (7.5)
+# as the default CC if it is not otherwise specified.
+LOWEST_CURRENT_CC = (7, 5)
 def ccs_supported_by_ctk(ctk_version):
     try:
         # For supported versions, we look up the range of supported CCs
-        min_cc, max_cc = _CUDA_CC_MIN_MAX_SUPPORT[ctk_version]
-        return tuple(
-            [cc for cc in COMPUTE_CAPABILITIES if min_cc <= cc <= max_cc]
-        )
-    except KeyError:
-        # For unsupported CUDA toolkit versions, all we can do is assume all
-        # non-deprecated versions we are aware of are supported.
         return tuple(
             [
                 cc
+                for min_cc, max_cc in _CUDA_CC_MIN_MAX_SUPPORT[ctk_version]
                 for cc in COMPUTE_CAPABILITIES
-                if cc >= config.CUDA_DEFAULT_PTX_CC
+                if min_cc <= cc <= max_cc
             ]
         )
+    except KeyError:
+        # For unsupported CUDA toolkit versions, all we can do is assume all
+        # non-deprecated versions we are aware of are supported.
+        #
+        # If the user has specified a non-default CC that is greater than the
+        # lowest non-deprecated one, then we should assume that instead.
+        MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, LOWEST_CURRENT_CC)
+        return tuple([cc for cc in COMPUTE_CAPABILITIES if cc >= MIN_CC])
 def get_supported_ccs():
@@ -857,6 +910,54 @@ def set_cuda_kernel(function):
     function.attributes.discard("noinline")
+def set_launch_bounds(kernel, launch_bounds):
+    # Based on: CUDA C / C++ Programming Guide 12.9, Section 8.38:
+    # https://docs.nvidia.com/cuda/archive/12.9.0/cuda-c-programming-guide/index.html#launch-bounds
+    # PTX ISA Specification Version 8.7, Section 11.4:
+    # https://docs.nvidia.com/cuda/archive/12.8.1/parallel-thread-execution/index.html#performance-tuning-directives
+    # NVVM IR Specification 12.9, Section 13:
+    # https://docs.nvidia.com/cuda/archive/12.9.0/nvvm-ir-spec/index.html#global-property-annotation
+    if launch_bounds is None:
+        return
+    if isinstance(launch_bounds, int):
+        launch_bounds = (launch_bounds,)
+    if (n := len(launch_bounds)) > 3:
+        raise ValueError(
+            f"Got {n} launch bounds: {launch_bounds}. A maximum of three are supported: "
+            "(max_threads_per_block, min_blocks_per_sm, max_blocks_per_cluster)"
+        )
+    module = kernel.module
+    nvvm_annotations = cgutils.get_or_insert_named_metadata(
+        module, "nvvm.annotations"
+    )
+    # Note that only maxntidx is used even though NVVM IR and PTX allow
+    # maxntidy and maxntidz. This is because the thread block size limit
+    # pertains only to the total number of threads, and therefore bounds on
+    # individual dimensions may be exceeded anyway. To prevent an unsurprising
+    # interface, it is cleaner to only allow setting total size via maxntidx
+    # and assuming y and z to be 1 (as is the case in CUDA C/C++).
+    properties = (
+        # Max threads per block
+        "maxntidx",
+        # Min blocks per multiprocessor
+        "minctasm",
+        # Max blocks per cluster
+        "cluster_max_blocks",
+    )
+    for prop, bound in zip(properties, launch_bounds):
+        mdstr = ir.MetaDataString(module, prop)
+        mdvalue = ir.Constant(ir.IntType(32), bound)
+        md = module.add_metadata((kernel, mdstr, mdvalue))
+        nvvm_annotations.add(md)
 def add_ir_version(mod):
     """Add NVVM IR version to module"""
     # We specify the IR version to match the current NVVM's IR version

numba_cuda/numba/cuda/debuginfo.py CHANGED Viewed

@@ -2,6 +2,7 @@ from llvmlite import ir
 from numba.core import types, cgutils
 from numba.core.debuginfo import DIBuilder
 from numba.cuda.types import GridGroup
+from numba.core.datamodel.models import UnionModel
 _BYTE_SIZE = 8
@@ -16,6 +17,7 @@ class CUDADIBuilder(DIBuilder):
         is_bool = False
         is_int_literal = False
         is_grid_group = False
+        m = self.module
         if isinstance(lltype, ir.IntType):
             if datamodel is None:
@@ -36,7 +38,6 @@ class CUDADIBuilder(DIBuilder):
                     is_grid_group = True
         if is_bool or is_int_literal or is_grid_group:
-            m = self.module
             bitsize = _BYTE_SIZE * size
             # Boolean type workaround until upstream Numba is fixed
             if is_bool:
@@ -56,6 +57,56 @@ class CUDADIBuilder(DIBuilder):
                 },
             )
+        if isinstance(datamodel, UnionModel):
+            # UnionModel is handled here to represent polymorphic types
+            meta = []
+            maxwidth = 0
+            for field, model in zip(
+                datamodel._fields, datamodel.inner_models()
+            ):
+                # Ignore the "tag" field, focus on the "payload" field which
+                # contains the data types in memory
+                if field == "payload":
+                    for mod in model.inner_models():
+                        dtype = mod.get_value_type()
+                        membersize = self.cgctx.get_abi_sizeof(dtype)
+                        basetype = self._var_type(
+                            dtype, membersize, datamodel=mod
+                        )
+                        if isinstance(mod.fe_type, types.Literal):
+                            typename = str(mod.fe_type.literal_type)
+                        else:
+                            typename = str(mod.fe_type)
+                        # Use a prefix "_" on type names as field names
+                        membername = "_" + typename
+                        memberwidth = _BYTE_SIZE * membersize
+                        derived_type = m.add_debug_info(
+                            "DIDerivedType",
+                            {
+                                "tag": ir.DIToken("DW_TAG_member"),
+                                "name": membername,
+                                "baseType": basetype,
+                                # DW_TAG_member size is in bits
+                                "size": memberwidth,
+                            },
+                        )
+                        meta.append(derived_type)
+                        if memberwidth > maxwidth:
+                            maxwidth = memberwidth
+            fake_union_name = "dbg_poly_union"
+            return m.add_debug_info(
+                "DICompositeType",
+                {
+                    "file": self.difile,
+                    "tag": ir.DIToken("DW_TAG_union_type"),
+                    "name": fake_union_name,
+                    "identifier": str(lltype),
+                    "elements": m.add_metadata(meta),
+                    "size": maxwidth,
+                },
+                is_distinct=True,
+            )
         # For other cases, use upstream Numba implementation
         return super()._var_type(lltype, size, datamodel=datamodel)

numba_cuda/numba/cuda/decorators.py CHANGED Viewed

@@ -23,6 +23,7 @@ def jit(
     opt=None,
     lineinfo=False,
     cache=False,
+    launch_bounds=None,
     **kws,
 ):
     """
@@ -72,6 +73,16 @@ def jit(
     :type lineinfo: bool
     :param cache: If True, enables the file-based cache for this function.
     :type cache: bool
+    :param launch_bounds: Kernel launch bounds, specified as a scalar or a tuple
+                          of between one and three items. Tuple items provide:
+                          - The maximum number of threads per block,
+                          - The minimum number of blocks per SM,
+                          - The maximum number of blocks per cluster.
+                          If a scalar is provided, it is used as the maximum
+                          number of threads per block.
+    :type launch_bounds: int | tuple[int]
     """
     if link and config.ENABLE_CUDASIM:
@@ -153,6 +164,7 @@ def jit(
             targetoptions["inline"] = inline
             targetoptions["forceinline"] = forceinline
             targetoptions["extensions"] = extensions
+            targetoptions["launch_bounds"] = launch_bounds
             disp = CUDADispatcher(func, targetoptions=targetoptions)
@@ -200,6 +212,7 @@ def jit(
                         lineinfo=lineinfo,
                         link=link,
                         cache=cache,
+                        launch_bounds=launch_bounds,
                         **kws,
                     )
@@ -221,6 +234,7 @@ def jit(
                 targetoptions["inline"] = inline
                 targetoptions["forceinline"] = forceinline
                 targetoptions["extensions"] = extensions
+                targetoptions["launch_bounds"] = launch_bounds
                 disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
                 if cache:

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -18,7 +18,7 @@ from numba.cuda.compiler import (
     kernel_fixup,
 )
 import re
-from numba.cuda.cudadrv import driver
+from numba.cuda.cudadrv import driver, nvvm
 from numba.cuda.cudadrv.linkable_code import LinkableCode
 from numba.cuda.cudadrv.devices import get_context
 from numba.cuda.descriptor import cuda_target
@@ -94,6 +94,7 @@ class _Kernel(serialize.ReduceMixin):
         lto=False,
         opt=True,
         device=False,
+        launch_bounds=None,
     ):
         if device:
             raise RuntimeError("Cannot compile a device function as a kernel")
@@ -120,6 +121,7 @@ class _Kernel(serialize.ReduceMixin):
         self.debug = debug
         self.lineinfo = lineinfo
         self.extensions = extensions or []
+        self.launch_bounds = launch_bounds
         nvvm_options = {"fastmath": fastmath, "opt": 3 if opt else 0}
@@ -145,6 +147,7 @@ class _Kernel(serialize.ReduceMixin):
         kernel = lib.get_function(cres.fndesc.llvm_func_name)
         lib._entry_name = cres.fndesc.llvm_func_name
         kernel_fixup(kernel, self.debug)
+        nvvm.set_launch_bounds(kernel, launch_bounds)
         if not link:
             link = []
@@ -547,6 +550,10 @@ class _Kernel(serialize.ReduceMixin):
             for ax in range(devary.ndim):
                 kernelargs.append(c_intp(devary.strides[ax]))
+        elif isinstance(ty, types.CPointer):
+            # Pointer arguments should be a pointer-sized integer
+            kernelargs.append(ctypes.c_uint64(val))
         elif isinstance(ty, types.Integer):
             cval = getattr(ctypes, "c_%s" % ty)(val)
             kernelargs.append(cval)

numba_cuda/numba/cuda/lowering.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from numba.core.lowering import Lower
 from llvmlite import ir
+from numba.core import ir as numba_ir
+from numba.core import types
 class CUDALower(Lower):
@@ -14,10 +16,7 @@ class CUDALower(Lower):
         if (
             self.context.enable_debuginfo
             # Conditions used to elide stores in parent method
-            and (
-                name not in self._singly_assigned_vars
-                or self._disable_sroa_like_opt
-            )
+            and self.store_var_needed(name)
             # No emission of debuginfo for internal names
             and not name.startswith("$")
         ):
@@ -27,6 +26,11 @@ class CUDALower(Lower):
             int_type = (ir.IntType,)
             real_type = ir.FloatType, ir.DoubleType
             if isinstance(lltype, int_type + real_type):
+                index = name.find(".")
+                src_name = name[:index] if index > 0 else name
+                if src_name in self.poly_var_typ_map:
+                    # Do not emit debug value on polymorphic type var
+                    return
                 # Emit debug value for scalar variable
                 sizeof = self.context.get_abi_sizeof(lltype)
                 datamodel = self.context.data_model_manager[fetype]
@@ -41,3 +45,78 @@ class CUDALower(Lower):
                     datamodel,
                     argidx,
                 )
+    def pre_lower(self):
+        """
+        Called before lowering all blocks.
+        """
+        super().pre_lower()
+        self.poly_var_typ_map = {}
+        self.poly_var_loc_map = {}
+        # When debug info is enabled, walk through function body and mark
+        # variables with polymorphic types.
+        if self.context.enable_debuginfo and self._disable_sroa_like_opt:
+            poly_map = {}
+            # pre-scan all blocks
+            for block in self.blocks.values():
+                for x in block.find_insts(numba_ir.Assign):
+                    if x.target.name.startswith("$"):
+                        continue
+                    ssa_name = x.target.name
+                    index = ssa_name.find(".")
+                    src_name = ssa_name[:index] if index > 0 else ssa_name
+                    # Check all the multi-versioned targets
+                    if len(x.target.versioned_names) > 0:
+                        fetype = self.typeof(ssa_name)
+                        if src_name not in poly_map:
+                            poly_map[src_name] = set()
+                        # deduplicate polymorphic types
+                        if isinstance(fetype, types.Literal):
+                            fetype = fetype.literal_type
+                        poly_map[src_name].add(fetype)
+            # Filter out multi-versioned but single typed variables
+            self.poly_var_typ_map = {
+                k: v for k, v in poly_map.items() if len(v) > 1
+            }
+    def _alloca_var(self, name, fetype):
+        """
+        Ensure the given variable has an allocated stack slot (if needed).
+        """
+        # If the name is not handled yet and a store is needed
+        if name not in self.varmap and self.store_var_needed(name):
+            index = name.find(".")
+            src_name = name[:index] if index > 0 else name
+            if src_name in self.poly_var_typ_map:
+                dtype = types.UnionType(self.poly_var_typ_map[src_name])
+                datamodel = self.context.data_model_manager[dtype]
+                if src_name not in self.poly_var_loc_map:
+                    # UnionType has sorted set of types, max at last index
+                    maxsizetype = dtype.types[-1]
+                    # Create a single element aggregate type
+                    aggr_type = types.UniTuple(maxsizetype, 1)
+                    lltype = self.context.get_value_type(aggr_type)
+                    ptr = self.alloca_lltype(src_name, lltype, datamodel)
+                    # save the location of the union type for polymorphic var
+                    self.poly_var_loc_map[src_name] = ptr
+                # Any member of this union type shoud type cast ptr to fetype
+                lltype = self.context.get_value_type(fetype)
+                castptr = self.builder.bitcast(
+                    self.poly_var_loc_map[src_name], ir.PointerType(lltype)
+                )
+                # Remember the pointer
+                self.varmap[name] = castptr
+        super()._alloca_var(name, fetype)
+    def store_var_needed(self, name):
+        # Check the conditions used to elide stores in parent class,
+        # e.g. in method storevar() and _alloca_var()
+        return (
+            # used in multiple blocks
+            name not in self._singly_assigned_vars
+            # lowering with debuginfo
+            or self._disable_sroa_like_opt
+        )

numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py CHANGED Viewed

@@ -299,12 +299,12 @@ class TestLinkerUsage(CUDATestCase):
     def test_linker_enabled_envvar(self):
         env = os.environ.copy()
-        env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "1"
+        env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
         run_in_subprocess(self.src.format(config=""), env=env)
     def test_linker_disabled_envvar(self):
         env = os.environ.copy()
-        env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
+        env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "0"
         with self.assertRaisesRegex(
             AssertionError, "LTO and additional flags require PyNvJitLinker"
         ):

numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py CHANGED Viewed

@@ -30,7 +30,8 @@ class TestNvvmDriver(unittest.TestCase):
             self.skipTest("-gen-lto unavailable in this toolkit version")
         nvvmir = self.get_nvvmir()
-        ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch="compute_52")
+        arch = "compute_%d%d" % nvvm.LOWEST_CURRENT_CC
+        ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
         # Verify we correctly passed the option by checking if we got LTOIR
         # from NVVM (by looking for the expected magic number for LTOIR)
@@ -138,9 +139,9 @@ class TestNvvmDriver(unittest.TestCase):
 class TestArchOption(unittest.TestCase):
     def test_get_arch_option(self):
         # Test returning the nearest lowest arch.
-        self.assertEqual(nvvm.get_arch_option(5, 3), "compute_53")
         self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75")
         self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75")
+        self.assertEqual(nvvm.get_arch_option(8, 8), "compute_87")
         # Test known arch.
         supported_cc = nvvm.get_supported_ccs()
         for arch in supported_cc:

numba_cuda/numba/cuda/tests/cudapy/test_compiler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from math import sqrt
-from numba import cuda, float32, int16, int32, int64, uint32, void
+from numba import cuda, float32, int16, int32, int64, types, uint32, void
 from numba.cuda import (
     compile,
     compile_for_current_device,
@@ -288,7 +288,7 @@ class TestCompileOnlyTests(unittest.TestCase):
             # Sleep for a variable time
             cuda.nanosleep(x)
-        ptx, resty = compile_ptx(use_nanosleep, (uint32,), cc=(7, 0))
+        ptx, resty = compile_ptx(use_nanosleep, (uint32,))
         nanosleep_count = 0
         for line in ptx.split("\n"):
@@ -306,5 +306,65 @@ class TestCompileOnlyTests(unittest.TestCase):
         )
+@skip_on_cudasim("Compilation unsupported in the simulator")
+class TestCompileWithLaunchBounds(unittest.TestCase):
+    def _test_launch_bounds_common(self, launch_bounds):
+        def f():
+            pass
+        sig = "void()"
+        ptx, resty = cuda.compile_ptx(f, sig, launch_bounds=launch_bounds)
+        self.assertIsInstance(resty, types.NoneType)
+        self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
+        return ptx
+    def test_launch_bounds_scalar(self):
+        launch_bounds = 128
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertNotIn(".minnctapersm", ptx)
+        self.assertNotIn(".maxclusterrank", ptx)
+    def test_launch_bounds_tuple(self):
+        launch_bounds = (128,)
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertNotIn(".minnctapersm", ptx)
+        self.assertNotIn(".maxclusterrank", ptx)
+    def test_launch_bounds_with_min_cta(self):
+        launch_bounds = (128, 2)
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertRegex(ptx, r".minnctapersm\s+2")
+        self.assertNotIn(".maxclusterrank", ptx)
+    def test_launch_bounds_with_max_cluster_rank(self):
+        def f():
+            pass
+        launch_bounds = (128, 2, 4)
+        cc = (9, 0)
+        sig = "void()"
+        ptx, resty = cuda.compile_ptx(
+            f, sig, launch_bounds=launch_bounds, cc=cc
+        )
+        self.assertIsInstance(resty, types.NoneType)
+        self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
+        self.assertRegex(ptx, r".minnctapersm\s+2")
+        self.assertRegex(ptx, r".maxclusterrank\s+4")
+    def test_too_many_launch_bounds(self):
+        def f():
+            pass
+        sig = "void()"
+        launch_bounds = (128, 2, 4, 8)
+        with self.assertRaisesRegex(ValueError, "Got 4 launch bounds:"):
+            cuda.compile_ptx(f, sig, launch_bounds=launch_bounds)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py CHANGED Viewed

@@ -332,10 +332,10 @@ class TestCudaDebugInfo(CUDATestCase):
         @cuda.jit("void(int32, int32)", debug=True, opt=False)
         def f(x, y):
-            z = x  # noqa: F841
-            z = 100  # noqa: F841
-            z = y  # noqa: F841
-            z = True  # noqa: F841
+            z1 = x  # noqa: F841
+            z2 = 100  # noqa: F841
+            z3 = y  # noqa: F841
+            z4 = True  # noqa: F841
         llvm_ir = f.inspect_llvm(sig)
         # Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
@@ -373,6 +373,45 @@ class TestCudaDebugInfo(CUDATestCase):
         match = re.compile(pat).search(llvm_ir)
         self.assertIsNone(match, msg=llvm_ir)
+    def test_union_poly_types(self):
+        sig = (types.int32, types.int32)
+        @cuda.jit("void(int32, int32)", debug=True, opt=False)
+        def f(x, y):
+            foo = 100  # noqa: F841
+            foo = 2.34  # noqa: F841
+            foo = True  # noqa: F841
+            foo = 200  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        # Extract the type node id
+        pat1 = r'!DILocalVariable\(.*name: "foo".*type: !(\d+)\)'
+        match = re.compile(pat1).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        mdnode_id = match.group(1)
+        # Verify the union type and extract the elements node id
+        pat2 = rf"!{mdnode_id} = distinct !DICompositeType\(elements: !(\d+),.*size: 64, tag: DW_TAG_union_type\)"  # noqa: E501
+        match = re.compile(pat2).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        mdnode_id = match.group(1)
+        # Extract the member node ids
+        pat3 = r"!{ !(\d+), !(\d+), !(\d+) }"
+        match = re.compile(pat3).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        mdnode_id1 = match.group(1)
+        mdnode_id2 = match.group(2)
+        mdnode_id3 = match.group(3)
+        # Verify the member nodes
+        pat4 = rf'!{mdnode_id1} = !DIDerivedType(.*name: "_bool", size: 8, tag: DW_TAG_member)'  # noqa: E501
+        match = re.compile(pat4).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        pat5 = rf'!{mdnode_id2} = !DIDerivedType(.*name: "_float64", size: 64, tag: DW_TAG_member)'  # noqa: E501
+        match = re.compile(pat5).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        pat6 = rf'!{mdnode_id3} = !DIDerivedType(.*name: "_int64", size: 64, tag: DW_TAG_member)'  # noqa: E501
+        match = re.compile(pat6).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py CHANGED Viewed

@@ -1,9 +1,26 @@
+from numba.cuda.cudadrv.driver import CudaAPIError
 import numpy as np
 import threading
-from numba import boolean, config, cuda, float32, float64, int32, int64, void
+from numba import (
+    boolean,
+    config,
+    cuda,
+    float32,
+    float64,
+    int32,
+    int64,
+    types,
+    uint32,
+    void,
+)
 from numba.core.errors import TypingError
-from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
+from numba.cuda.testing import (
+    cc_X_or_above,
+    skip_on_cudasim,
+    unittest,
+    CUDATestCase,
+)
 import math
@@ -466,6 +483,35 @@ class TestDispatcher(CUDATestCase):
         self.assertEqual("Add two integers, kernel version", add_kernel.__doc__)
         self.assertEqual("Add two integers, device version", add_device.__doc__)
+    @skip_on_cudasim("Cudasim does not have device pointers")
+    def test_dispatcher_cpointer_arguments(self):
+        ptr = types.CPointer(types.int32)
+        sig = void(ptr, int32, ptr, ptr, uint32)
+        @cuda.jit(sig)
+        def axpy(r, a, x, y, n):
+            i = cuda.grid(1)
+            if i < n:
+                r[i] = a * x[i] + y[i]
+        N = 16
+        a = 5
+        hx = np.arange(10, dtype=np.int32)
+        hy = np.arange(10, dtype=np.int32) * 2
+        dx = cuda.to_device(hx)
+        dy = cuda.to_device(hy)
+        dr = cuda.device_array_like(dx)
+        r_ptr = dr.__cuda_array_interface__["data"][0]
+        x_ptr = dx.__cuda_array_interface__["data"][0]
+        y_ptr = dy.__cuda_array_interface__["data"][0]
+        axpy[1, 32](r_ptr, a, x_ptr, y_ptr, N)
+        expected = a * hx + hy
+        actual = dr.copy_to_host()
+        np.testing.assert_equal(expected, actual)
 @skip_on_cudasim("CUDA simulator doesn't implement kernel properties")
 class TestDispatcherKernelProperties(CUDATestCase):
@@ -708,5 +754,63 @@ class TestDispatcherKernelProperties(CUDATestCase):
         self.assertGreaterEqual(local_mem_per_thread, N * 4)
+@skip_on_cudasim("Simulator does not support launch bounds")
+class TestLaunchBounds(CUDATestCase):
+    def _test_launch_bounds_common(self, launch_bounds):
+        @cuda.jit(launch_bounds=launch_bounds)
+        def f():
+            pass
+        # Test successful launch
+        f[1, 128]()
+        # Test launch bound exceeded
+        msg = "Call to cuLaunchKernel results in CUDA_ERROR_INVALID_VALUE"
+        with self.assertRaisesRegex(CudaAPIError, msg):
+            f[1, 256]()
+        sig = f.signatures[0]
+        ptx = f.inspect_asm(sig)
+        self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
+        return ptx
+    def test_launch_bounds_scalar(self):
+        launch_bounds = 128
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertNotIn(".minnctapersm", ptx)
+        self.assertNotIn(".maxclusterrank", ptx)
+    def test_launch_bounds_tuple(self):
+        launch_bounds = (128,)
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertNotIn(".minnctapersm", ptx)
+        self.assertNotIn(".maxclusterrank", ptx)
+    def test_launch_bounds_with_min_cta(self):
+        launch_bounds = (128, 2)
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertRegex(ptx, r".minnctapersm\s+2")
+        self.assertNotIn(".maxclusterrank", ptx)
+    @unittest.skipUnless(
+        cc_X_or_above(9, 0), "CC 9.0 needed for max cluster rank"
+    )
+    def test_launch_bounds_with_max_cluster_rank(self):
+        launch_bounds = (128, 2, 4)
+        ptx = self._test_launch_bounds_common(launch_bounds)
+        self.assertRegex(ptx, r".minnctapersm\s+2")
+        self.assertRegex(ptx, r".maxclusterrank\s+4")
+    def test_too_many_launch_bounds(self):
+        launch_bounds = (128, 2, 4, 8)
+        with self.assertRaisesRegex(ValueError, "Got 4 launch bounds:"):
+            cuda.jit("void()", launch_bounds=launch_bounds)(lambda: None)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py CHANGED Viewed

@@ -118,31 +118,18 @@ class TestFastMathOption(CUDATestCase):
         def tanh_kernel(r, x):
             r[0] = tanh(x)
-        def tanh_common_test(cc, criterion):
-            fastptx, _ = compile_ptx(
-                tanh_kernel, (float32[::1], float32), fastmath=True, cc=cc
-            )
-            precptx, _ = compile_ptx(
-                tanh_kernel, (float32[::1], float32), cc=cc
-            )
-            criterion.check(self, fastptx, precptx)
-        tanh_common_test(
-            cc=(7, 5),
-            criterion=FastMathCriterion(
-                fast_expected=["tanh.approx.f32 "],
-                prec_unexpected=["tanh.approx.f32 "],
-            ),
+        fastptx, _ = compile_ptx(
+            tanh_kernel, (float32[::1], float32), fastmath=True
         )
+        precptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32))
-        tanh_common_test(
-            cc=(7, 0),
-            criterion=FastMathCriterion(
-                fast_expected=["ex2.approx.ftz.f32 ", "rcp.approx.ftz.f32 "],
-                prec_unexpected=["tanh.approx.f32 "],
-            ),
+        criterion = FastMathCriterion(
+            fast_expected=["tanh.approx.f32 "],
+            prec_unexpected=["tanh.approx.f32 "],
         )
+        criterion.check(self, fastptx, precptx)
     def test_expf(self):
         self._test_fast_math_unary(
             exp,

numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py CHANGED Viewed

@@ -641,7 +641,7 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_hadd_ptx(self):
         args = (f2[:], f2, f2)
-        ptx, _ = compile_ptx(simple_hadd_scalar, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_hadd_scalar, args)
         self.assertIn("add.f16", ptx)
     @skip_unless_cc_53
@@ -668,7 +668,7 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_hfma_ptx(self):
         args = (f2[:], f2, f2, f2)
-        ptx, _ = compile_ptx(simple_hfma_scalar, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_hfma_scalar, args)
         self.assertIn("fma.rn.f16", ptx)
     @skip_unless_cc_53
@@ -693,7 +693,7 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_hsub_ptx(self):
         args = (f2[:], f2, f2)
-        ptx, _ = compile_ptx(simple_hsub_scalar, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_hsub_scalar, args)
         self.assertIn("sub.f16", ptx)
     @skip_unless_cc_53
@@ -718,7 +718,7 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_hmul_ptx(self):
         args = (f2[:], f2, f2)
-        ptx, _ = compile_ptx(simple_hmul_scalar, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_hmul_scalar, args)
         self.assertIn("mul.f16", ptx)
     @skip_unless_cc_53
@@ -763,7 +763,7 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_hneg_ptx(self):
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_hneg_scalar, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_hneg_scalar, args)
         self.assertIn("neg.f16", ptx)
     @skip_unless_cc_53
@@ -786,7 +786,7 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_habs_ptx(self):
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_habs_scalar, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_habs_scalar, args)
         self.assertIn("abs.f16", ptx)
     @skip_unless_cc_53

numba_cuda/numba/cuda/tests/cudapy/test_operator.py CHANGED Viewed

@@ -178,7 +178,7 @@ class TestOperatorModule(CUDATestCase):
         args = (f2[:], f2, f2)
         for fn, instr in zip(functions, instrs):
             with self.subTest(instr=instr):
-                ptx, _ = compile_ptx(fn, args, cc=(5, 3))
+                ptx, _ = compile_ptx(fn, args)
                 self.assertIn(instr, ptx)
     @skip_unless_cc_53
@@ -212,7 +212,7 @@ class TestOperatorModule(CUDATestCase):
         for fn, instr in zip(functions, instrs):
             with self.subTest(instr=instr):
-                ptx, _ = compile_ptx(fn, args, cc=(5, 3))
+                ptx, _ = compile_ptx(fn, args)
                 self.assertIn(instr, ptx)
     @skip_unless_cc_53
@@ -255,13 +255,13 @@ class TestOperatorModule(CUDATestCase):
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_fp16_neg_ptx(self):
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_fp16neg, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_fp16neg, args)
         self.assertIn("neg.f16", ptx)
     @skip_on_cudasim("Compilation unsupported in the simulator")
     def test_fp16_abs_ptx(self):
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_fp16abs, args, cc=(5, 3))
+        ptx, _ = compile_ptx(simple_fp16abs, args)
         self.assertIn("abs.f16", ptx)
@@ -396,7 +396,7 @@ class TestOperatorModule(CUDATestCase):
         for fn, op, s in zip(functions, ops, opstring):
             with self.subTest(op=op):
-                ptx, _ = compile_ptx(fn, args, cc=(5, 3))
+                ptx, _ = compile_ptx(fn, args)
                 self.assertIn(s, ptx)
     @skip_on_cudasim("Compilation unsupported in the simulator")
@@ -431,7 +431,7 @@ class TestOperatorModule(CUDATestCase):
         for fn, op in zip(functions, ops):
             with self.subTest(op=op):
                 args = (b1[:], f2, from_dtype(np.int8))
-                ptx, _ = compile_ptx(fn, args, cc=(5, 3))
+                ptx, _ = compile_ptx(fn, args)
                 self.assertIn(opstring[op], ptx)
     @skip_on_cudasim("Compilation unsupported in the simulator")
@@ -475,7 +475,7 @@ class TestOperatorModule(CUDATestCase):
             with self.subTest(op=op, ty=ty):
                 arg2_ty = np.result_type(np.float16, ty)
                 args = (b1[:], f2, from_dtype(arg2_ty))
-                ptx, _ = compile_ptx(fn, args, cc=(5, 3))
+                ptx, _ = compile_ptx(fn, args)
                 ops = opstring[op] + opsuffix[arg2_ty]
                 self.assertIn(ops, ptx)

numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py ADDED Viewed

@@ -0,0 +1,64 @@
+import unittest
+from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.tests.support import captured_stdout
+@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
+class TestCPointer(CUDATestCase):
+    """
+    Test simple vector addition
+    """
+    def setUp(self):
+        # Prevent output from this test showing
+        # up when running the test suite
+        self._captured_stdout = captured_stdout()
+        self._captured_stdout.__enter__()
+        super().setUp()
+    def tearDown(self):
+        # No exception type, value, or traceback
+        self._captured_stdout.__exit__(None, None, None)
+        super().tearDown()
+    def test_ex_cpointer(self):
+        # ex_cpointer.sig.begin
+        import numpy as np
+        from numba import cuda, types
+        # The first kernel argument is a pointer to a uint8 array.
+        # The second argument holds the length as a uint32.
+        # The return type of a kernel is always void.
+        sig = types.void(types.CPointer(types.uint8), types.uint32)
+        # ex_cpointer.sig.end
+        # ex_cpointer.kernel.begin
+        @cuda.jit(sig)
+        def add_one(x, n):
+            i = cuda.grid(1)
+            if i < n:
+                x[i] += 1
+        # ex_cpointer.kernel.end
+        # ex_cpointer.launch.begin
+        x = cuda.to_device(np.arange(10, dtype=np.uint8))
+        # Print initial values of x
+        print(x.copy_to_host())  # [0 1 2 3 4 5 6 7 8 9]
+        # Obtain a pointer to the data from from the CUDA Array Interface
+        x_ptr = x.__cuda_array_interface__["data"][0]
+        x_len = len(x)
+        # Launch the kernel with the pointer and length
+        add_one[1, 32](x_ptr, x_len)
+        # Demonstrate that the data was updated by the kernel
+        print(x.copy_to_host())  # [ 1  2  3  4  5  6  7  8  9 10]
+        # ex_cpointer.launch.end
+if __name__ == "__main__":
+    unittest.main()

{numba_cuda-0.13.0.dist-info → numba_cuda-0.14.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: numba-cuda
-Version: 0.13.0
+Version: 0.14.0
 Summary: CUDA target for Numba
 Author: Anaconda Inc., NVIDIA Corporation
 License: BSD 2-clause
@@ -12,6 +12,27 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numba>=0.59.1
+Provides-Extra: cu11
+Requires-Dist: cuda-python==11.8.*; extra == "cu11"
+Requires-Dist: nvidia-cuda-nvcc-cu11; extra == "cu11"
+Requires-Dist: nvidia-cuda-runtime-cu11; extra == "cu11"
+Requires-Dist: nvidia-cuda-nvrtc-cu11; extra == "cu11"
+Provides-Extra: cu12
+Requires-Dist: cuda-python==12.9.*; extra == "cu12"
+Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
+Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
+Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
+Provides-Extra: test
+Requires-Dist: psutil; extra == "test"
+Requires-Dist: cffi; extra == "test"
+Requires-Dist: pytest; extra == "test"
+Provides-Extra: test-cu11
+Requires-Dist: numba-cuda[test]; extra == "test-cu11"
+Requires-Dist: nvidia-curand-cu11; extra == "test-cu11"
+Provides-Extra: test-cu12
+Requires-Dist: numba-cuda[test]; extra == "test-cu12"
+Requires-Dist: nvidia-curand-cu12; extra == "test-cu12"
+Requires-Dist: pynvjitlink-cu12; extra == "test-cu12"
 Dynamic: license-file
 <div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>

{numba_cuda-0.13.0.dist-info → numba_cuda-0.14.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
 _numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
-numba_cuda/VERSION,sha256=2EyeWWx9apTl90V5742JEqgHsNKFgkdJAK0137Pt_PQ,7
+numba_cuda/VERSION,sha256=BlWCZVqs1vyD_3QqVxXAS7Slc5W_PuRVl5j6QsLORYk,7
 numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
 numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
 numba_cuda/numba/cuda/__init__.py,sha256=3siqMXEKqa9ezQ8RxPC3KMdebUjgJt-EKxxV4CX9818,607
@@ -10,18 +10,18 @@ numba_cuda/numba/cuda/args.py,sha256=UlTHTJpwPeCtnW0Bb-Wetm5UO9TPR-PCgIt5ys8b8tQ
 numba_cuda/numba/cuda/bf16.py,sha256=PXuitxHhPMjnti3g9IOSoL90ofGgVRcDfqFg7AqCXpU,1778
 numba_cuda/numba/cuda/cg.py,sha256=n-sBj05ut6U_GgFIq-PTCjPad4nXWAc0GVg_J9xD_Pc,1602
 numba_cuda/numba/cuda/codegen.py,sha256=u2J0mRRDBiPceB1G5WR4KQ0KUFGGawaDaaoUf9zLQzE,16719
-numba_cuda/numba/cuda/compiler.py,sha256=aZwEVP8KXCIyccSw4vJyG6Qaai9oXsFuBAo_Ghwwai4,25607
+numba_cuda/numba/cuda/compiler.py,sha256=JeF0PXoIOlL4wCHPkcQN48KTl_Ll90TQ3ZO150Isaa0,26681
 numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
 numba_cuda/numba/cuda/cuda_paths.py,sha256=kMIJ_1yV2qtcKEM5rCgSDJ3Gz7bgxbfAWh54E5cDndg,15872
 numba_cuda/numba/cuda/cudadecl.py,sha256=_TXMu8SIT2hIhsPI0n05wuShtzp8NcPX88NH5y7xauU,22909
 numba_cuda/numba/cuda/cudaimpl.py,sha256=q6CPqD8ZtJvY8JlpMEN--d6003_FIHoHLBqNP2McNyM,39274
 numba_cuda/numba/cuda/cudamath.py,sha256=wbGjlyGVwcUAoQjgXIaAaasLdVuDSKHkf6KyID5IYBw,3979
-numba_cuda/numba/cuda/debuginfo.py,sha256=5tCw_IEeZfoD6CtFpA_yUGdrq25Q9mFjfxxrudH_VFg,5476
-numba_cuda/numba/cuda/decorators.py,sha256=bR8yOAIC68lhm8mSMU-DUt1qFrEogbmSAtzAI4MoToc,9608
+numba_cuda/numba/cuda/debuginfo.py,sha256=br4Ce9Q8AA7FlX8sjpXj0-mUWgs5ttQCP0ma-qayWUE,7812
+numba_cuda/numba/cuda/decorators.py,sha256=NeSHxaiUZyAVJf79UFTctU-7AKLm8dDPERIHbERZPI0,10347
 numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
 numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
 numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
-numba_cuda/numba/cuda/dispatcher.py,sha256=m8kXKk08ldcW7Cl3KpFxsKMTxVgZeRJke9bKzO6_JjE,43172
+numba_cuda/numba/cuda/dispatcher.py,sha256=_uaS7jxpquTiG4En2u5eNbOBXYvOIrJebVS-vk9voVU,43467
 numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
 numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
 numba_cuda/numba/cuda/initialize.py,sha256=0SnpjccQEYiWITIyfAJx833H1yhYFFDY42EpnwYyMn8,487
@@ -32,7 +32,7 @@ numba_cuda/numba/cuda/libdevicedecl.py,sha256=xdZbb_rCaftMf8Pbw63g_Lr230N-1QoaYz
 numba_cuda/numba/cuda/libdevicefuncs.py,sha256=c80lGpGoFIYkAdgr4fzbxzdNCyJYrLdss64bwa0Mc6w,37471
 numba_cuda/numba/cuda/libdeviceimpl.py,sha256=m4Fog_OPPEg2RkOk7LEeqF26MK4aEFlKxITlSCZKMAo,2798
 numba_cuda/numba/cuda/locks.py,sha256=yF6WcwMyzauJ9H7JuCRq2Ynx7kFVAnlkkvmWp7UdZ5w,388
-numba_cuda/numba/cuda/lowering.py,sha256=6XXpTRfTBTVHPh1M4jVAL9APvKk1UWSb-A5WJTEMsqQ,1602
+numba_cuda/numba/cuda/lowering.py,sha256=DSco9CZiYcKyL2U22yzg9Z7eW7VA7YA-TZ55ZyZ5wIo,5240
 numba_cuda/numba/cuda/mathimpl.py,sha256=-8IOkhorbMg8iPBMIdgjk3qJZSyRWYJDwPAWrTMkODI,14356
 numba_cuda/numba/cuda/models.py,sha256=jbvmbL51mt0Z1nZTSiniBJTFhnOfPzzcVD6xCEpXDMA,1282
 numba_cuda/numba/cuda/nvvmutils.py,sha256=x-0nCqwkoB8DzX7bSrvTH0h-aKSDx0rVWKR7Eqx4ldA,7993
@@ -52,7 +52,7 @@ numba_cuda/numba/cuda/_internal/cuda_bf16.py,sha256=QYck6s_D85HBEsc__SAl_UZxf7Sp
 numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=inat2K8K1OVrgDe64FK7CyRmyFyNKcNO4p2_L79yRZ0,201
 numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=6tF2TYnmjMbKk2fho1ONoD_QsRD9QVTT2kHP7x1u1J0,31556
 numba_cuda/numba/cuda/cudadrv/devices.py,sha256=k87EDIRhj1ncM9PxJCjZGPFfEks99vzmHlTc55GK5X0,8062
-numba_cuda/numba/cuda/cudadrv/driver.py,sha256=63NDga5RLrk6JEiHW1aJDubqCbbHA5uumK3mSYy7SEY,119091
+numba_cuda/numba/cuda/cudadrv/driver.py,sha256=ypF1plUmtHo7pFVI_JsIAJkOAYerj_1eW3rsXmawXJM,119641
 numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=OnjYWnmy8ZlSfYouhzyYIpW-AJ3x1YHj32YcBY2xet4,16790
 numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=2jycZhniMy3ncoVWQG9D8dBehTEeocBZTW43gKHL5Tc,14291
 numba_cuda/numba/cuda/cudadrv/enums.py,sha256=raWKryxamWQZ5A8ivMpyYVhhwbSpaD9lu7l1_wl2W9M,23742
@@ -62,7 +62,7 @@ numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=IZ13laEG_altDQyi9HkdMcwW-Y
 numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=9uEs1KepeVGRbEpVhLjtxSsvZpZsbrHnPywmx--y88A,804
 numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
 numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=UD8kASyGUU896tNWAtVxmbzDTP5jDbiOAZjCsELOg6U,14986
-numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=7tTy6-VEbMBpDUmuSMnUwqPFfBndTh3aPq_n7nxhEA0,26344
+numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=2vq00bifcNvQQGbp0IUaStlFLM5faU9weQ2poWSB0a4,29637
 numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=J6PRGGK07XSLRzgCw5xs8VU5xVoqavvhojk1mxiQsi4,226
 numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=CFumwg4iblWap_E7l7GM_hMYz1PsbH81-N0tZwFFooA,4372
 numba_cuda/numba/cuda/include/11/cuda_bf16.h,sha256=Z7HGJEOhMjQzD0Gs0eq0qdzD-Wr8Zbty-FeeLtahN-s,138713
@@ -130,8 +130,8 @@ numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=ymv2ujRLLIIURikNEdC0Ss
 numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=2tkf766GjIta_wL5NGlMIqmrDMFN2rZmnP_c9A8cWA8,5084
 numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py,sha256=176Ma2ZVLnc4w4bfYwbF1eeRq3x3rbOvDieRJLSuNpI,8413
 numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=9MLFEXn7DnLkuuXK_qjilA1jxQwC-AeSBOcRYzZogRY,1513
-numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=8SSSAotk8rhGClwxQCnwL_JhoD9NbvXxEa7KfjaZO3M,11551
-numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=1r817QeIrIEs8BcK0XKBR9g_mkO3e7WI-oW-sNsO_Ho,7353
+numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=2BpJ-m3Ue9ZN-NNVkVgPyPyWsffADj_eCtYdiLVJ528,11551
+numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=71-Hlng6-HyhfK3i3ITUzHQIHyL3hCv1ubkkJOGt0R4,7400
 numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=PGuv4bt9qiIGlkLhyQCOXFIf1SK5Nj-RjcpWqeO1TMM,943
 numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=xbSFmvqOIcWY-TI9p1MDcGwE-24iaK4j-_UenMvTnR4,508
 numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py,sha256=bpM9AvL39hUM2kv01lUy3UdlnCmv1BGyzh4rByaUMns,4978
@@ -159,7 +159,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_boolean.py,sha256=j4mIOv4rJTLjJzpKk1O9UF
 numba_cuda/numba/cuda/tests/cudapy/test_caching.py,sha256=obUSTJSP2Lh-YNElq8PZpVnRJOeq-uqV_VyLHtsXwAw,18427
 numba_cuda/numba/cuda/tests/cudapy/test_casting.py,sha256=3LaN3ZsSuOZXAZXCV85wYyhh0ih7JqABnjGTa7Y2YBE,8748
 numba_cuda/numba/cuda/tests/cudapy/test_cffi.py,sha256=tC7ZCA4dkzehS33iz2l35rX6OxE3BTQd9ivV4r74YXs,926
-numba_cuda/numba/cuda/tests/cudapy/test_compiler.py,sha256=OkCavTZAAcdffdUBYGEmlP_BN7zAH-rWlhr-LqSUUs8,10997
+numba_cuda/numba/cuda/tests/cudapy/test_compiler.py,sha256=4BB1pEC_2XQ9EWixiLXeLTDcP-5H2sAZCPt2_p-njQ4,12908
 numba_cuda/numba/cuda/tests/cudapy/test_complex.py,sha256=hmAcyZim46yueXZDqDSJYqxXuBGm7wRiZo_q9-SbMlg,10129
 numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py,sha256=KIuXQ0ihgQQXM-eH7s3xAxhKe35YL1qDTHCVTWA4ut8,497
 numba_cuda/numba/cuda/tests/cudapy/test_const_string.py,sha256=li1UsV5vc2M01cJ7k6_526VPtuAOAKr8e7kb1CDUXi4,4323
@@ -169,14 +169,14 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=RXCNHAZM3
 numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=8prL2FTiaajW-UHSL9al-nBniygOfpdAOT_Dkej4PWI,2138
 numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=MnOeDWMz-rL3-07FsswM06Laxmm0KjTmTwhrP3rmchQ,3526
 numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=1P369s02AvGu7fSIEe_YxSgh3c6S72Aw1gRgmepDbQY,3383
-numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=AE8D4U4dAv4nYP9oatDwROW6knpJ0-iggP4BaHymo6g,13170
+numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=5TVEbo5DAfF5Z-kDLU6cShgNy18-A1fp0vssE8Gs7D8,15038
 numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=LNGBZfqFGUtVVQeC6FcHo8T3DbG-j6AjeBwJmwp9HH4,13157
-numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=Oc6CdI1j9Ad_wklHdIYSMytrzUpzK6oXD0BGe45sTwg,26636
+numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=mu35VClyXQK8tqF9IBc5909FVgtqfHmPUSwQNufJD6A,29609
 numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=VQGPLcTbT1nhS1BE4VALK-TaQEsPec5zu-XVlWV0sHA,4593
 numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=w6ipW9UIvUD_ZIt_6fQ-uJsHyKLyHVqv2bym-9vyGyY,2757
 numba_cuda/numba/cuda/tests/cudapy/test_exception.py,sha256=W5NF022DOOTaEjFmhfr8BnfhRXvYyXHiGwznQrm_9T4,5507
 numba_cuda/numba/cuda/tests/cudapy/test_extending.py,sha256=G6KcFAiJnDEfa5f7HW72Ocqxrv6xRvGMRTbwttTsuec,8678
-numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py,sha256=fiUoOiwWjctZNFN-DGw1A8eGfHLqNulo2OQ7v1DFS9o,8552
+numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py,sha256=2May_6jJVWlYMvkAjns6UROv6GbK9wu8z2AJC2clJiE,8122
 numba_cuda/numba/cuda/tests/cudapy/test_forall.py,sha256=Ory5s-_9MauSCP2RuWUEmcGFvP0kS7ytV-3iYPFYR6o,1470
 numba_cuda/numba/cuda/tests/cudapy/test_freevar.py,sha256=JvWn7Lw137HI61mouKnPvDxZIqLppiCF_351osxQQYE,753
 numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py,sha256=nm3dK4SEIj_Wmg5iIxgFkFBHc-hLwcFtqu-8rcV7w68,2024
@@ -187,7 +187,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=1USofSlavYFa
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=tTy7hN2LJ4897UzO3EUxjuUzbBcs9QITHJu3s_eknq0,1054
 numba_cuda/numba/cuda/tests/cudapy/test_inline.py,sha256=T7DHquV_4HuX5fFQQS3kcZzgifTzwYbMFiY7SgQzoLA,4584
 numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=L9-62nPmiWC90PST5EZrnGdAcrsbhMS_mbEkwdDkFQ0,4901
-numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=uQ0S_XXds-F9Z5GhuFYzRVXu5XYD1ULa-y55Wi92i5I,36726
+numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=-RGl-0vVFbCMOJFXIc_f2kvtoO6al3wRmh8f24roBpU,36660
 numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=bNT6UZgsgeVWyzBrlKXucQW6IKcD6NEmbwV5cFhf-7I,10553
 numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=WCRkQfkEnB0d9aj55dVvyQzD4QxrOLubnlKO0xTiNto,2343
 numba_cuda/numba/cuda/tests/cudapy/test_lang.py,sha256=TP1spLeJfmBKKrU7G3bvkhNPvVm-oQX134taQsZeNbE,1693
@@ -204,7 +204,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py,sha256=rZNVEwf7FqFwFd_O433D9
 numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py,sha256=9jkdHiaHAFbs7DzrOIDKYsbByB-8B6ucLQUvV9dWJcE,1225
 numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py,sha256=B6g46b9Ky8G0PlJhoGUf44D_Ayvs1otQ0DoCFPwhBWw,2843
 numba_cuda/numba/cuda/tests/cudapy/test_nondet.py,sha256=E5hu6MD7FV9JJOK1t9ggVP37EQzpDaCdVd5TjNcmOqU,1378
-numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=H9108Y72VNQ3pfJU2ViZcCGeNtDVEeHkseTikV8cWmI,13813
+numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=HKbXyFAGRgkWmtCQRCo0vSnO2TcM4BYDUmxs4jSC7Gs,13736
 numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=-sY0U9aQUYTVFQFd8hXuypv2oH6dRY3N8cNSixCMykE,2924
 numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=BtBI4DxVKbg5i6ftQEmWjtITU25OTbn35WA2pyLWoI8,9107
 numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=ydwUtozuZlaLqSl440BkYbrUP3p_x6U1boXXcaDbU8c,3264
@@ -245,6 +245,7 @@ numba_cuda/numba/cuda/tests/data/warn.cu,sha256=6L-qsXJIxAr_n3hVMAz_EZ5j0skcJAfg
 numba_cuda/numba/cuda/tests/data/include/add.cuh,sha256=yv61Ilqge_kjj-_BPO5YWAx3sqJD73gEh66gxYwE8wc,107
 numba_cuda/numba/cuda/tests/doc_examples/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
 numba_cuda/numba/cuda/tests/doc_examples/test_cg.py,sha256=VLWd5_v744Z5QKa4i3JVDLUwA1sxJFQzV5cRG6EkyOI,2888
+numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py,sha256=eMWfbi-dj1uyE6lXfTeSmFYDsZkgQeAEu4vmDg_4AOU,1921
 numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py,sha256=I4hWDF4DzTTtt3-XmQsP5RzPAO_pWUGsKjVO0hhPOCM,2251
 numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=AtjAzFgZWm1nwOokQyO7D8NVMYGd1QDD3EaUT_RQruQ,4403
 numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py,sha256=4C_drWYNZq_qGIt-N0fJ9r8DZBaJdO_5h7mxRZ6RcO8,5133
@@ -273,8 +274,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=
 numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu,sha256=T9ubst3fFUK7EXyXXMi73wAban3VFFQ986cY5OcKfvI,157
 numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=IB5t-dVhrKVoue3AbUx3yVMxPG0hBF_yZbzb4642sf0,538
 numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
-numba_cuda-0.13.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
-numba_cuda-0.13.0.dist-info/METADATA,sha256=clEe3q5Jb4S4sixwT6RAgkGqLieoRYtWoyWEqBvSyZk,1859
-numba_cuda-0.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-numba_cuda-0.13.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
-numba_cuda-0.13.0.dist-info/RECORD,,
+numba_cuda-0.14.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
+numba_cuda-0.14.0.dist-info/METADATA,sha256=eq4qxmqY97oT9f9_0tBT4EFxrMBsD1Bvj5Ix3he40HM,2799
+numba_cuda-0.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+numba_cuda-0.14.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
+numba_cuda-0.14.0.dist-info/RECORD,,

{numba_cuda-0.13.0.dist-info → numba_cuda-0.14.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{numba_cuda-0.13.0.dist-info → numba_cuda-0.14.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{numba_cuda-0.13.0.dist-info → numba_cuda-0.14.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

numba-cuda 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

Potentially problematic release.

numba-cuda 0.13.0py3-none-any.whl → 0.14.0py3-none-any.whl