PyPI - numba-cuda - Versions diffs - 0.21.1__cp313-cp313-win_amd64.whl → 0.23.0__cp313-cp313-win_amd64.whl - Mend

numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.23.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/api.py +4 -1
numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_dispatcher.cpp +0 -38
numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_typeof.cpp +0 -111
numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/codegen.py +42 -10
numba_cuda/numba/cuda/compiler.py +10 -4
numba_cuda/numba/cuda/core/analysis.py +29 -21
numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
numba_cuda/numba/cuda/core/base.py +6 -1
numba_cuda/numba/cuda/core/consts.py +1 -1
numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
numba_cuda/numba/cuda/core/errors.py +4 -912
numba_cuda/numba/cuda/core/inline_closurecall.py +71 -57
numba_cuda/numba/cuda/core/interpreter.py +79 -64
numba_cuda/numba/cuda/core/ir.py +191 -119
numba_cuda/numba/cuda/core/ir_utils.py +142 -112
numba_cuda/numba/cuda/core/postproc.py +8 -8
numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
numba_cuda/numba/cuda/core/ssa.py +3 -3
numba_cuda/numba/cuda/core/transforms.py +25 -10
numba_cuda/numba/cuda/core/typed_passes.py +9 -9
numba_cuda/numba/cuda/core/typeinfer.py +39 -24
numba_cuda/numba/cuda/core/untyped_passes.py +71 -55
numba_cuda/numba/cuda/cudadecl.py +0 -13
numba_cuda/numba/cuda/cudadrv/devicearray.py +6 -5
numba_cuda/numba/cuda/cudadrv/driver.py +132 -511
numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
numba_cuda/numba/cuda/cudadrv/nvrtc.py +16 -0
numba_cuda/numba/cuda/cudaimpl.py +0 -12
numba_cuda/numba/cuda/debuginfo.py +104 -10
numba_cuda/numba/cuda/descriptor.py +1 -1
numba_cuda/numba/cuda/device_init.py +4 -7
numba_cuda/numba/cuda/dispatcher.py +36 -32
numba_cuda/numba/cuda/intrinsics.py +150 -1
numba_cuda/numba/cuda/lowering.py +64 -29
numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
numba_cuda/numba/cuda/np/arrayobj.py +54 -0
numba_cuda/numba/cuda/np/numpy_support.py +26 -0
numba_cuda/numba/cuda/printimpl.py +20 -0
numba_cuda/numba/cuda/serialize.py +10 -0
numba_cuda/numba/cuda/stubs.py +0 -11
numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +130 -48
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +5 -6
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +27 -19
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +10 -0
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +89 -0
numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +116 -1
numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
numba_cuda/numba/cuda/typing/context.py +3 -1
numba_cuda/numba/cuda/typing/typeof.py +56 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/METADATA +1 -1
{numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/RECORD +74 -74
numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
{numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/licenses/LICENSE.numba +0 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/lowering.py CHANGED Viewed

@@ -11,6 +11,7 @@ from llvmlite import ir as llvm_ir
 from numba.cuda import HAS_NUMBA
 from numba.cuda.core import ir
 from numba.cuda import debuginfo, cgutils, utils, typing, types
+from numba import cuda
 from numba.cuda.core import (
     ir_utils,
     targetconfig,
@@ -441,7 +442,9 @@ class Lower(BaseLower):
                         # Ensure that the variable is not defined multiple times
                         # in the block
                         [defblk] = var_assign_map[var]
-                        assign_stmts = self.blocks[defblk].find_insts(ir.Assign)
+                        assign_stmts = self.blocks[defblk].find_insts(
+                            ir.assign_types
+                        )
                         assigns = [
                             stmt
                             for stmt in assign_stmts
@@ -468,7 +471,7 @@ class Lower(BaseLower):
             self.builder.position_at_end(bb)
             all_names = set()
             for block in self.blocks.values():
-                for x in block.find_insts(ir.Del):
+                for x in block.find_insts(ir.del_types):
                     if x.value not in all_names:
                         all_names.add(x.value)
             for name in all_names:
@@ -483,9 +486,9 @@ class Lower(BaseLower):
                 self.func_ir,
                 call.func,
             )
-            if defn is not None and isinstance(defn, ir.Global):
+            if defn is not None and isinstance(defn, ir.global_types):
                 if defn.value is eh.exception_check:
-                    if isinstance(block.terminator, ir.Branch):
+                    if isinstance(block.terminator, ir.branch_types):
                         targetblk = self.blkmap[block.terminator.truebr]
                         # NOTE: This hacks in an attribute for call_conv to
                         #       pick up. This hack is no longer needed when
@@ -505,19 +508,19 @@ class Lower(BaseLower):
         self.debuginfo.mark_location(self.builder, self.loc.line)
         self.notify_loc(self.loc)
         self.debug_print(str(inst))
-        if isinstance(inst, ir.Assign):
+        if isinstance(inst, ir.assign_types):
             ty = self.typeof(inst.target.name)
             val = self.lower_assign(ty, inst)
             argidx = None
             # If this is a store from an arg, like x = arg.x then tell debuginfo
             # that this is the arg
-            if isinstance(inst.value, ir.Arg):
+            if isinstance(inst.value, ir.arg_types):
                 # NOTE: debug location is the `def <func>` line
                 self.debuginfo.mark_location(self.builder, self.defn_loc.line)
                 argidx = inst.value.index + 1  # args start at 1
             self.storevar(val, inst.target.name, argidx=argidx)
-        elif isinstance(inst, ir.Branch):
+        elif isinstance(inst, ir.branch_types):
             cond = self.loadvar(inst.cond.name)
             tr = self.blkmap[inst.truebr]
             fl = self.blkmap[inst.falsebr]
@@ -529,11 +532,11 @@ class Lower(BaseLower):
             )
             self.builder.cbranch(pred, tr, fl)
-        elif isinstance(inst, ir.Jump):
+        elif isinstance(inst, ir.jump_types):
             target = self.blkmap[inst.target]
             self.builder.branch(target)
-        elif isinstance(inst, ir.Return):
+        elif isinstance(inst, ir.return_types):
             if self.generator_info:
                 # StopIteration
                 self.genlower.return_from_generator(self)
@@ -551,10 +554,10 @@ class Lower(BaseLower):
             retval = self.context.get_return_value(self.builder, ty, val)
             self.call_conv.return_value(self.builder, retval)
-        elif isinstance(inst, ir.PopBlock):
+        elif isinstance(inst, ir.popblock_types):
             pass  # this is just a marker
-        elif isinstance(inst, ir.StaticSetItem):
+        elif isinstance(inst, ir.staticsetitem_types):
             signature = self.fndesc.calltypes[inst]
             assert signature is not None
             try:
@@ -572,22 +575,22 @@ class Lower(BaseLower):
                 )
                 return impl(self.builder, (target, inst.index, value))
-        elif isinstance(inst, ir.Print):
+        elif isinstance(inst, ir.print_types):
             self.lower_print(inst)
-        elif isinstance(inst, ir.SetItem):
+        elif isinstance(inst, ir.setitem_types):
             signature = self.fndesc.calltypes[inst]
             assert signature is not None
             return self.lower_setitem(
                 inst.target, inst.index, inst.value, signature
             )
-        elif isinstance(inst, ir.StoreMap):
+        elif isinstance(inst, ir.storemap_types):
             signature = self.fndesc.calltypes[inst]
             assert signature is not None
             return self.lower_setitem(inst.dct, inst.key, inst.value, signature)
-        elif isinstance(inst, ir.DelItem):
+        elif isinstance(inst, ir.delitem_types):
             target = self.loadvar(inst.target.name)
             index = self.loadvar(inst.index.name)
@@ -613,10 +616,10 @@ class Lower(BaseLower):
             return impl(self.builder, (target, index))
-        elif isinstance(inst, ir.Del):
+        elif isinstance(inst, ir.del_types):
             self.delvar(inst.value)
-        elif isinstance(inst, ir.SetAttr):
+        elif isinstance(inst, ir.setattr_types):
             target = self.loadvar(inst.target.name)
             value = self.loadvar(inst.value.name)
             signature = self.fndesc.calltypes[inst]
@@ -634,16 +637,16 @@ class Lower(BaseLower):
             return impl(self.builder, (target, value))
-        elif isinstance(inst, ir.DynamicRaise):
+        elif isinstance(inst, ir.dynamicraise_types):
             self.lower_dynamic_raise(inst)
-        elif isinstance(inst, ir.DynamicTryRaise):
+        elif isinstance(inst, ir.dynamictryraise_types):
             self.lower_try_dynamic_raise(inst)
-        elif isinstance(inst, ir.StaticRaise):
+        elif isinstance(inst, ir.staticraise_types):
             self.lower_static_raise(inst)
-        elif isinstance(inst, ir.StaticTryRaise):
+        elif isinstance(inst, ir.statictryraise_types):
             self.lower_static_try_raise(inst)
         else:
@@ -695,7 +698,7 @@ class Lower(BaseLower):
         args = []
         nb_types = []
         for exc_arg in exc_args:
-            if isinstance(exc_arg, ir.Var):
+            if isinstance(exc_arg, ir.var_types):
                 # dynamic values
                 typ = self.typeof(exc_arg.name)
                 val = self.loadvar(exc_arg.name)
@@ -727,24 +730,28 @@ class Lower(BaseLower):
     def lower_assign(self, ty, inst):
         value = inst.value
         # In nopython mode, closure vars are frozen like globals
-        if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)):
+        if (
+            isinstance(value, ir.const_types)
+            or isinstance(value, ir.global_types)
+            or isinstance(value, ir.freevar_types)
+        ):
             res = self.context.get_constant_generic(
                 self.builder, ty, value.value
             )
             self.incref(ty, res)
             return res
-        elif isinstance(value, ir.Expr):
+        elif isinstance(value, ir.expr_types):
             return self.lower_expr(ty, value)
-        elif isinstance(value, ir.Var):
+        elif isinstance(value, ir.var_types):
             val = self.loadvar(value.name)
             oty = self.typeof(value.name)
             res = self.context.cast(self.builder, val, oty, ty)
             self.incref(ty, res)
             return res
-        elif isinstance(value, ir.Arg):
+        elif isinstance(value, ir.arg_types):
             # Suspend debug info else all the arg repacking ends up being
             # associated with some line or other and it's actually just a detail
             # of Numba's CC.
@@ -770,7 +777,7 @@ class Lower(BaseLower):
                 self.incref(ty, res)
                 return res
-        elif isinstance(value, ir.Yield):
+        elif isinstance(value, ir.yield_types):
             res = self.lower_yield(ty, value)
             self.incref(ty, res)
             return res
@@ -1677,10 +1684,31 @@ class Lower(BaseLower):
 class CUDALower(Lower):
+    def _is_shared_array_call(self, fnty):
+        # Check if function type is a cuda.shared.array call
+        if not hasattr(fnty, "typing_key"):
+            return False
+        return fnty.typing_key is cuda.shared.array
+    def _lower_call_normal(self, fnty, expr, signature):
+        # Set flag for subsequent store to track shared address space
+        if self.context.enable_debuginfo and self._is_shared_array_call(fnty):
+            self._pending_shared_store = True
+        return super()._lower_call_normal(fnty, expr, signature)
     def storevar(self, value, name, argidx=None):
         """
         Store the value into the given variable.
         """
+        # Track address space for debug info
+        if self.context.enable_debuginfo and self._pending_shared_store:
+            from numba.cuda.cudadrv import nvvm
+            self._addrspace_map[name] = nvvm.ADDRSPACE_SHARED
+            if not name.startswith("$") and not name.startswith("."):
+                self._pending_shared_store = False
         # Handle polymorphic variables with CUDA_DEBUG_POLY enabled
         if config.CUDA_DEBUG_POLY:
             src_name = name.split(".")[0]
@@ -1792,7 +1820,7 @@ class CUDALower(Lower):
         self.dbg_val_names = set()
         if self.context.enable_debuginfo and self._disable_sroa_like_opt:
-            for x in block.find_insts(ir.Assign):
+            for x in block.find_insts(ir.assign_types):
                 if x.target.name.startswith("$"):
                     continue
                 ssa_name = x.target.name
@@ -1806,6 +1834,13 @@ class CUDALower(Lower):
         """
         super().pre_lower()
+        # Track address space for debug info
+        self._addrspace_map = {}
+        self._pending_shared_store = False
+        if self.context.enable_debuginfo:
+            self.debuginfo._set_addrspace_map(self._addrspace_map)
+        # Track polymorphic variables for debug info
         self.poly_var_typ_map = {}
         self.poly_var_loc_map = {}
         self.poly_var_set = set()
@@ -1818,7 +1853,7 @@ class CUDALower(Lower):
             poly_map = {}
             # pre-scan all blocks
             for block in self.blocks.values():
-                for x in block.find_insts(ir.Assign):
+                for x in block.find_insts(ir.assign_types):
                     if x.target.name.startswith("$"):
                         continue
                     ssa_name = x.target.name

numba_cuda/numba/cuda/memory_management/nrt.py CHANGED Viewed

@@ -13,9 +13,10 @@ from numba.cuda import config, types
 from numba.cuda.cudadrv.driver import (
     _Linker,
     driver,
-    launch_kernel,
+    _to_core_stream,
     _have_nvjitlink,
 )
+from cuda.core.experimental import LaunchConfig, launch
 from numba.cuda.cudadrv import devices
 from numba.cuda.api import get_current_device
 from numba.cuda.utils import _readenv, cached_file_read
@@ -126,7 +127,7 @@ class _Runtime:
         cc = get_current_device().compute_capability
         # Create a new linker instance and add the cu file
-        linker = _Linker.new(cc=cc, lto=_have_nvjitlink())
+        linker = _Linker(max_registers=0, cc=cc, lto=_have_nvjitlink())
         linker.add_cu_file(memsys_mod)
         # Complete the linker and create a module from it
@@ -179,20 +180,15 @@ class _Runtime:
             stream = cuda.default_stream()
         func = module.get_function(name)
-        launch_kernel(
-            func.handle,
-            1,
-            1,
-            1,
-            1,
-            1,
-            1,
-            0,
-            stream.handle.value,
-            params,
-            cooperative=False,
+        config = LaunchConfig(
+            grid=(1, 1, 1),
+            block=(1, 1, 1),
+            shmem_size=0,
+            cooperative_launch=False,
         )
+        launch(_to_core_stream(stream), config, func.kernel, *params)
     def ensure_initialized(self, stream=None):
         """
         If memsys is not initialized, initialize memsys

numba_cuda/numba/cuda/np/arrayobj.py CHANGED Viewed

@@ -31,6 +31,7 @@ from numba.cuda.np.numpy_support import (
     type_is_scalar,
     lt_complex,
     lt_floats,
+    strides_from_shape,
 )
 from numba.cuda.np.numpy_support import (
     type_can_asarray,
@@ -3642,10 +3643,63 @@ def record_static_setitem_int(context, builder, sig, args):
 def constant_array(context, builder, ty, pyval):
     """
     Create a constant array (mechanism is target-dependent).
+    For objects implementing __cuda_array_interface__,
+    the device pointer is embedded directly as a constant. For other arrays,
+    the target-specific mechanism is used.
     """
+    # Check if this is a device array (implements __cuda_array_interface__)
+    if getattr(pyval, "__cuda_array_interface__", None) is not None:
+        return _lower_constant_device_array(context, builder, ty, pyval)
     return context.make_constant_array(builder, ty, pyval)
+def _lower_constant_device_array(context, builder, ty, pyval):
+    """
+    Lower objects with __cuda_array_interface__ by embedding the device
+    pointer as a constant.
+    This allows device arrays captured from globals to be used in CUDA
+    kernels and device functions.
+    """
+    interface = pyval.__cuda_array_interface__
+    # Hold on to the device array to prevent garbage collection.
+    context.active_code_library.referenced_objects[id(pyval)] = pyval
+    shape = interface["shape"]
+    strides = interface.get("strides")
+    data_ptr = interface["data"][0]
+    typestr = interface["typestr"]
+    itemsize = np.dtype(typestr).itemsize
+    # Calculate strides if not provided (C-contiguous)
+    if strides is None:
+        strides = strides_from_shape(shape, itemsize, order="C")
+    # Embed device pointer as constant
+    llvoidptr = context.get_value_type(types.voidptr)
+    data = context.get_constant(types.uintp, data_ptr).inttoptr(llvoidptr)
+    # Build array structure
+    ary = context.make_array(ty)(context, builder)
+    kshape = [context.get_constant(types.intp, s) for s in shape]
+    kstrides = [context.get_constant(types.intp, s) for s in strides]
+    context.populate_array(
+        ary,
+        data=builder.bitcast(data, ary.data.type),
+        shape=kshape,
+        strides=kstrides,
+        itemsize=context.get_constant(types.intp, itemsize),
+        parent=None,
+        meminfo=None,
+    )
+    return ary._getvalue()
 @lower_constant(types.Record)
 def constant_record(context, builder, ty, pyval):
     """

numba_cuda/numba/cuda/np/numpy_support.py CHANGED Viewed

@@ -3,7 +3,10 @@
 import collections
 import ctypes
+import itertools
+import operator
 import re
 import numpy as np
 from numba.cuda import types
@@ -17,6 +20,29 @@ from numba.cuda.cgutils import is_nonelike  # noqa: F401
 numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
+def strides_from_shape(
+    shape: tuple[int, ...], itemsize: int, *, order: str
+) -> tuple[int, ...]:
+    """Compute strides for a contiguous array with given shape and order."""
+    if len(shape) == 0:
+        # 0-D arrays have empty strides
+        return ()
+    limits = slice(1, None) if order == "C" else slice(None, -1)
+    transform = reversed if order == "C" else lambda x: x
+    strides = tuple(
+        map(
+            itemsize.__mul__,
+            itertools.accumulate(
+                transform(shape[limits]), operator.mul, initial=1
+            ),
+        )
+    )
+    if order == "F":
+        return strides
+    return strides[::-1]
 FROM_DTYPE = {
     np.dtype("bool"): types.boolean,
     np.dtype("int8"): types.int8,

numba_cuda/numba/cuda/printimpl.py CHANGED Viewed

@@ -32,6 +32,26 @@ def print_item(ty, context, builder, val):
     )
+@print_item.register(types.Tuple)
+@print_item.register(types.UniTuple)
+def tuple_print_impl(ty, context, builder, val):
+    formats = []
+    values = []
+    for i, argtyp in enumerate(ty.types):
+        argval = builder.extract_value(val, i)
+        argfmt, argvals = print_item(argtyp, context, builder, argval)
+        formats.append(argfmt)
+        values.extend(argvals)
+    if len(formats) == 1:
+        base = "({},)"
+    else:
+        base = "({})"
+    rawfmt = base.format(", ".join(formats))
+    return rawfmt, values
 @print_item.register(types.Integer)
 @print_item.register(types.IntegerLiteral)
 def int_print_impl(ty, context, builder, val):

numba_cuda/numba/cuda/serialize.py CHANGED Viewed

@@ -197,6 +197,16 @@ class NumbaPickler(cloudpickle.CloudPickler):
         # Overridden to disable pickling of certain types
         if type(obj) in self.disabled_types:
             _no_pickle(obj)  # noreturn
+        # Prevent pickling of objects implementing __cuda_array_interface__
+        # These contain device pointers that would become stale after unpickling
+        if getattr(obj, "__cuda_array_interface__", None) is not None:
+            raise pickle.PicklingError(
+                "Cannot serialize kernels or device functions referencing "
+                "global device arrays. Pass the array(s) as arguments "
+                "to the kernel instead."
+            )
         return super().reducer_override(obj)

numba_cuda/numba/cuda/stubs.py CHANGED Viewed

@@ -200,17 +200,6 @@ class syncwarp(Stub):
     _description_ = "<warp_sync()>"
-class vote_sync_intrinsic(Stub):
-    """
-    vote_sync_intrinsic(mask, mode, predictate)
-    Nvvm intrinsic for performing a reduce and broadcast across a warp
-    docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-vote
-    """
-    _description_ = "<vote_sync()>"
 class match_any_sync(Stub):
     """
     match_any_sync(mask, value)

numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py CHANGED Viewed

@@ -36,8 +36,13 @@ pytestmark = pytest.mark.skipif(
         ),
     ],
 )
-def test_one_arg(benchmark, array_func):
-    @cuda.jit("void(float32[:])")
+@pytest.mark.parametrize(
+    "jit",
+    [cuda.jit, cuda.jit("void(float32[::1])")],
+    ids=["dispatch", "signature"],
+)
+def test_one_arg(benchmark, array_func, jit):
+    @jit
     def one_arg(arr1):
         return
@@ -78,10 +83,22 @@ def test_one_arg(benchmark, array_func):
         ),
     ],
 )
-def test_many_args(benchmark, array_func):
+@pytest.mark.parametrize(
+    "jit",
+    [
+        cuda.jit,
+        cuda.jit(
+            "void({})".format(
+                ", ".join(["float32[::1]"] * len(string.ascii_lowercase))
+            )
+        ),
+    ],
+    ids=["dispatch", "signature"],
+)
+def test_many_args(benchmark, array_func, jit):
     many_arrs = array_func()
-    @cuda.jit("void({})".format(", ".join(["float32[:]"] * len(many_arrs))))
+    @jit
     def many_args(
         a,
         b,

numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py CHANGED Viewed

@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: BSD-2-Clause
 import numbers
-import weakref
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
@@ -90,8 +89,8 @@ class Test3rdPartyContext(CUDATestCase):
         dev = driver.binding.CUdevice(0)
         binding_hctx = the_driver.cuDevicePrimaryCtxRetain(dev)
         hctx = driver.drvapi.cu_context(int(binding_hctx))
+        ctx = driver.Context(dev, hctx)
         try:
-            ctx = driver.Context(weakref.proxy(self), hctx)
             ctx.push()
             # Check that the context from numba matches the created primary
             # context.