PyPI - numba-cuda - Versions diffs - 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl - Mend

numba-cuda 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +4 -1
numba_cuda/numba/cuda/_compat.py +47 -0
numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -2
numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_typeof.cpp +56 -8
numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
numba_cuda/numba/cuda/codegen.py +4 -2
numba_cuda/numba/cuda/compiler.py +5 -5
numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
numba_cuda/numba/cuda/core/base.py +6 -10
numba_cuda/numba/cuda/core/bytecode.py +21 -13
numba_cuda/numba/cuda/core/byteflow.py +336 -90
numba_cuda/numba/cuda/core/compiler.py +3 -4
numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
numba_cuda/numba/cuda/core/config.py +5 -7
numba_cuda/numba/cuda/core/controlflow.py +17 -9
numba_cuda/numba/cuda/core/inline_closurecall.py +11 -10
numba_cuda/numba/cuda/core/interpreter.py +255 -96
numba_cuda/numba/cuda/core/ir_utils.py +8 -17
numba_cuda/numba/cuda/core/pythonapi.py +3 -0
numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
numba_cuda/numba/cuda/core/ssa.py +2 -2
numba_cuda/numba/cuda/core/transforms.py +4 -6
numba_cuda/numba/cuda/core/typed_passes.py +1 -1
numba_cuda/numba/cuda/core/typeinfer.py +3 -3
numba_cuda/numba/cuda/core/untyped_passes.py +11 -10
numba_cuda/numba/cuda/cpython/unicode.py +2 -2
numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -4
numba_cuda/numba/cuda/cudadrv/driver.py +13 -11
numba_cuda/numba/cuda/cudadrv/nvrtc.py +71 -32
numba_cuda/numba/cuda/debuginfo.py +10 -79
numba_cuda/numba/cuda/deviceufunc.py +3 -6
numba_cuda/numba/cuda/dispatcher.py +5 -19
numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
numba_cuda/numba/cuda/lowering.py +0 -28
numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
numba_cuda/numba/cuda/np/arrayobj.py +7 -9
numba_cuda/numba/cuda/np/numpy_support.py +7 -10
numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
numba_cuda/numba/cuda/testing.py +4 -8
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +66 -4
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +26 -4
numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +12 -1
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +12 -7
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +8 -7
numba_cuda/numba/cuda/tests/support.py +11 -0
numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
numba_cuda/numba/cuda/typing/typeof.py +9 -16
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +74 -73
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/lowering.py CHANGED Viewed

@@ -11,7 +11,6 @@ from llvmlite import ir as llvm_ir
 from numba.cuda import HAS_NUMBA
 from numba.cuda.core import ir
 from numba.cuda import debuginfo, cgutils, utils, typing, types
-from numba import cuda
 from numba.cuda.core import (
     ir_utils,
     targetconfig,
@@ -1684,31 +1683,10 @@ class Lower(BaseLower):
 class CUDALower(Lower):
-    def _is_shared_array_call(self, fnty):
-        # Check if function type is a cuda.shared.array call
-        if not hasattr(fnty, "typing_key"):
-            return False
-        return fnty.typing_key is cuda.shared.array
-    def _lower_call_normal(self, fnty, expr, signature):
-        # Set flag for subsequent store to track shared address space
-        if self.context.enable_debuginfo and self._is_shared_array_call(fnty):
-            self._pending_shared_store = True
-        return super()._lower_call_normal(fnty, expr, signature)
     def storevar(self, value, name, argidx=None):
         """
         Store the value into the given variable.
         """
-        # Track address space for debug info
-        if self.context.enable_debuginfo and self._pending_shared_store:
-            from numba.cuda.cudadrv import nvvm
-            self._addrspace_map[name] = nvvm.ADDRSPACE_SHARED
-            if not name.startswith("$") and not name.startswith("."):
-                self._pending_shared_store = False
         # Handle polymorphic variables with CUDA_DEBUG_POLY enabled
         if config.CUDA_DEBUG_POLY:
             src_name = name.split(".")[0]
@@ -1834,12 +1812,6 @@ class CUDALower(Lower):
         """
         super().pre_lower()
-        # Track address space for debug info
-        self._addrspace_map = {}
-        self._pending_shared_store = False
-        if self.context.enable_debuginfo:
-            self.debuginfo._set_addrspace_map(self._addrspace_map)
         # Track polymorphic variables for debug info
         self.poly_var_typ_map = {}
         self.poly_var_loc_map = {}

numba_cuda/numba/cuda/memory_management/nrt.py CHANGED Viewed

@@ -16,7 +16,7 @@ from numba.cuda.cudadrv.driver import (
     _to_core_stream,
     _have_nvjitlink,
 )
-from cuda.core.experimental import LaunchConfig, launch
+from numba.cuda._compat import LaunchConfig, launch
 from numba.cuda.cudadrv import devices
 from numba.cuda.api import get_current_device
 from numba.cuda.utils import _readenv, cached_file_read

numba_cuda/numba/cuda/np/arrayobj.py CHANGED Viewed

@@ -1798,10 +1798,10 @@ def numpy_broadcast_arrays(*args):
             tup = tuple_setitem(tup, i, shape[i])
         # numpy checks if the input arrays have the same shape as `shape`
-        outs = []
-        for array in literal_unroll(args):
-            outs.append(np.broadcast_to(np.asarray(array), tup))
-        return outs
+        return [
+            np.broadcast_to(np.asarray(array), tup)
+            for array in literal_unroll(args)
+        ]
     return impl
@@ -4822,13 +4822,11 @@ def _parse_shape(context, builder, ty, val):
         ndim = ty.count
         passed_shapes = cgutils.unpack_tuple(builder, val, count=ndim)
-    shapes = []
-    for s in passed_shapes:
-        shapes.append(safecast_intp(context, builder, s.type, s))
+    shapes = [safecast_intp(context, builder, s.type, s) for s in passed_shapes]
     zero = context.get_constant_generic(builder, types.intp, 0)
-    for dim in range(ndim):
-        is_neg = builder.icmp_signed("<", shapes[dim], zero)
+    for shape in shapes:
+        is_neg = builder.icmp_signed("<", shape, zero)
         with cgutils.if_unlikely(builder, is_neg):
             context.call_conv.return_user_exc(
                 builder, ValueError, ("negative dimensions not allowed",)

numba_cuda/numba/cuda/np/numpy_support.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import collections
 import ctypes
 import itertools
+import functools
 import operator
 import re
@@ -21,11 +22,12 @@ from numba.cuda.cgutils import is_nonelike  # noqa: F401
 numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
+@functools.lru_cache
 def strides_from_shape(
     shape: tuple[int, ...], itemsize: int, *, order: str
 ) -> tuple[int, ...]:
     """Compute strides for a contiguous array with given shape and order."""
-    if len(shape) == 0:
+    if not shape:
         # 0-D arrays have empty strides
         return ()
     limits = slice(1, None) if order == "C" else slice(None, -1)
@@ -118,16 +120,11 @@ def from_dtype(dtype):
     elif getattr(dtype, "fields", None) is not None:
         return from_struct_dtype(dtype)
-    try:
-        return FROM_DTYPE[dtype]
-    except KeyError:
-        pass
+    result = FROM_DTYPE.get(dtype)
+    if result is not None:
+        return result
-    try:
-        char = dtype.char
-    except AttributeError:
-        pass
-    else:
+    if (char := getattr(dtype, "char", None)) is not None:
         if char in "SU":
             return _from_str_dtype(dtype)
         if char in "mM":

numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py CHANGED Viewed

@@ -122,9 +122,10 @@ def polyutils_as_series(alist, trim=True):
     def impl(alist, trim=True):
         if tuple_input:
-            arrays = []
-            for item in literal_unroll(alist):
-                arrays.append(np.atleast_1d(np.asarray(item)).astype(res_dtype))
+            arrays = [
+                np.atleast_1d(np.asarray(item)).astype(res_dtype)
+                for item in literal_unroll(alist)
+            ]
         elif list_input:
             arrays = [

numba_cuda/numba/cuda/testing.py CHANGED Viewed

@@ -276,14 +276,6 @@ def skip_if_curand_kernel_missing(fn):
     return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
-def skip_if_mvc_enabled(reason):
-    """Skip a test if Minor Version Compatibility is enabled"""
-    assert isinstance(reason, str)
-    return unittest.skipIf(
-        config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
-    )
 def cc_X_or_above(major, minor):
     if not config.ENABLE_CUDASIM:
         cc = devices.get_context().device.compute_capability
@@ -308,6 +300,10 @@ def skip_unless_cc_75(fn):
     return unittest.skipUnless(cc_X_or_above(7, 5), "requires cc >= 7.5")(fn)
+def skip_unless_cc_90(fn):
+    return unittest.skipUnless(cc_X_or_above(9, 0), "requires cc >= 9.0")(fn)
 def xfail_unless_cudasim(fn):
     if config.ENABLE_CUDASIM:
         return fn

numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py CHANGED Viewed

@@ -2,21 +2,25 @@
 # SPDX-License-Identifier: BSD-2-Clause
 from ctypes import c_int, sizeof
+import cffi
+import numpy as np
 from numba.cuda.cudadrv.driver import host_to_device, device_to_host, driver
-from cuda.core.experimental import (
+from numba.cuda._compat import (
     LaunchConfig,
+    Device,
     Stream as ExperimentalStream,
     launch,
 )
 from numba import cuda
-from numba.cuda.cudadrv import devices
-from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.cudadrv import devices, nvrtc
+from numba.cuda.testing import unittest, CUDATestCase, skip_unless_cc_90
 from numba.cuda.testing import skip_on_cudasim
+from numba.cuda.tests.support import override_config
+from numba.core import types
 import contextlib
-from cuda.core.experimental import Device
 ptx1 = """
     .version 1.4
@@ -391,5 +395,63 @@ class TestDevice(CUDATestCase):
         self.assertRegex(dev.uuid, uuid_format)
+@skip_on_cudasim("CUDA asm unsupported in the simulator")
+class TestAcceleratedArchitecture(CUDATestCase):
+    @skip_unless_cc_90
+    def test_device_arch_specific(self):
+        set_desc = cuda.CUSource("""
+        #include <cuda_fp16.h>
+        extern "C" __device__
+        int set_descriptor(int *out, int* smem) {
+            unsigned usmem = __cvta_generic_to_shared(smem);
+            asm volatile("tensormap.replace.tile.rank.shared::cta.b1024.b32 [%0], 2;" :: "r"(usmem));
+            return 0;
+        }
+        """)
+        set_descriptor = cuda.declare_device(
+            "set_descriptor",
+            types.int32(types.CPointer(types.int32)),
+            link=[set_desc],
+        )
+        ffi = cffi.FFI()
+        @cuda.jit
+        def kernel(a):
+            sm = cuda.shared.array(1, dtype=np.int32)
+            data_ptr = ffi.from_buffer(sm)
+            set_descriptor(data_ptr)
+            # just to prevent optimization:
+            sm[0] = 2
+            cuda.syncthreads()
+            a[0] = sm[0]
+        a = np.ones(1, dtype=np.int32)
+        kernel[1, 1](a)
+        assert a[0] == 2
+    def test_get_arch_option_force_cc(self):
+        with override_config("FORCE_CUDA_CC", (8, 0)):
+            arch = nvrtc.get_arch_option(9, 0, "a")
+            self.assertEqual("compute_80", arch)
+    def test_get_arch_option_force_cc_arch_specific(self):
+        with override_config("FORCE_CUDA_CC", (9, 0, "a")):
+            arch = nvrtc.get_arch_option(9, 0)
+            self.assertEqual("compute_90a", arch)
+    def test_get_arch_option_illegal_arch_specific(self):
+        # Using a fictitious very high compute capability (major 99) for this
+        # test to ensure future toolkits are unlikely to provide an exact match
+        msg = "Can't use arch-specific compute_990a with"
+        with self.assertRaisesRegex(ValueError, msg):
+            nvrtc.get_arch_option(99, 0, "a")
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_events.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import numpy as np
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase
-from cuda.core.experimental import Device
+from numba.cuda._compat import Device
 from numba.cuda.testing import skip_on_cudasim

numba_cuda/numba/cuda/tests/cudadrv/test_linker.py CHANGED Viewed

@@ -15,7 +15,7 @@ from numba.cuda import require_context
 from numba import cuda
 from numba.cuda import void, float64, int64, int32, float32
 from numba.cuda.typing.typeof import typeof
-from cuda.core.experimental._utils.cuda_utils import CUDAError
+from numba.cuda._compat import CUDAError
 CONST1D = np.arange(10, dtype=np.float64)
@@ -196,7 +196,7 @@ class TestLinker(CUDATestCase):
         link = str(test_data_dir / "error.cu")
-        from cuda.core.experimental._utils.cuda_utils import NVRTCError
+        from numba.cuda._compat import NVRTCError
         errty = NVRTCError
         with self.assertRaises(errty) as e:

numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py CHANGED Viewed

@@ -13,7 +13,7 @@ from numba.cuda.testing import (
     CUDATestCase,
     skip_on_cudasim,
 )
-from cuda.core.experimental import ObjectCode
+from numba.cuda._compat import ObjectCode
 if not config.ENABLE_CUDASIM:
     from cuda.bindings.driver import cuLibraryGetGlobal, cuMemcpyHtoD

numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py CHANGED Viewed

@@ -43,6 +43,12 @@ if TEST_BIN_DIR:
         TEST_BIN_DIR, "test_device_functions.ltoir"
     )
+    require_cuobjdump = (
+        test_device_functions_fatbin_multi,
+        test_device_functions_fatbin,
+        test_device_functions_o,
+    )
 @unittest.skipIf(
     not TEST_BIN_DIR or not _have_nvjitlink(),
@@ -127,14 +133,22 @@ class TestLinkerDumpAssembly(CUDATestCase):
         super().tearDown()
     def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
-        files = [
+        files = (
             test_device_functions_cu,
             test_device_functions_ltoir,
             test_device_functions_fatbin_multi,
-        ]
+        )
         for file in files:
             with self.subTest(file=file):
+                if (
+                    file in require_cuobjdump
+                    and os.getenv("NUMBA_CUDA_TEST_WHEEL_ONLY") is not None
+                ):
+                    self.skipTest(
+                        "wheel-only environments do not have cuobjdump"
+                    )
                 f = io.StringIO()
                 with contextlib.redirect_stdout(f):
                     sig = "uint32(uint32, uint32)"
@@ -151,16 +165,24 @@ class TestLinkerDumpAssembly(CUDATestCase):
                 self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
     def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
-        files = [
+        files = (
             test_device_functions_a,
             test_device_functions_cubin,
             test_device_functions_fatbin,
             test_device_functions_o,
             test_device_functions_ptx,
-        ]
+        )
         for file in files:
             with self.subTest(file=file):
+                if (
+                    file in require_cuobjdump
+                    and os.getenv("NUMBA_CUDA_TEST_WHEEL_ONLY") is not None
+                ):
+                    self.skipTest(
+                        "wheel-only environments do not have cuobjdump"
+                    )
                 sig = "uint32(uint32, uint32)"
                 add_from_numba = cuda.declare_device("add_from_numba", sig)

numba_cuda/numba/cuda/tests/cudapy/test_analysis.py CHANGED Viewed

@@ -854,13 +854,25 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
                 _CONST2 = "PLACEHOLDER2"
             return _CONST2 + 4
-        new = self._literal_const_sample_generator(impl, {1: 0, 3: 20})
+        if PYVERSION in ((3, 14),):
+            # The order of the __code__.co_consts changes with 3.14
+            new = self._literal_const_sample_generator(impl, {0: 0, 2: 20})
+        elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+            new = self._literal_const_sample_generator(impl, {1: 0, 3: 20})
+        else:
+            raise NotImplementedError(PYVERSION)
         iconst = impl.__code__.co_consts
         nconst = new.__code__.co_consts
-        self.assertEqual(
-            iconst, (None, "PLACEHOLDER1", 3.14159, "PLACEHOLDER2", 4)
-        )
-        self.assertEqual(nconst, (None, 0, 3.14159, 20, 4))
+        if PYVERSION in ((3, 14),):
+            self.assertEqual(iconst, ("PLACEHOLDER1", 3.14159, "PLACEHOLDER2"))
+            self.assertEqual(nconst, (0, 3.14159, 20))
+        elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+            self.assertEqual(
+                iconst, (None, "PLACEHOLDER1", 3.14159, "PLACEHOLDER2", 4)
+            )
+            self.assertEqual(nconst, (None, 0, 3.14159, 20, 4))
+        else:
+            raise NotImplementedError(PYVERSION)
         self.assertEqual(impl(None), 3.14159)
         self.assertEqual(new(None), 24)
@@ -872,7 +884,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
         for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
             for const in c_inp:
-                func = self._literal_const_sample_generator(impl, {1: const})
+                if PYVERSION in ((3, 14),):
+                    # The order of the __code__.co_consts changes with 3.14
+                    func = self._literal_const_sample_generator(
+                        impl, {0: const}
+                    )
+                elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+                    func = self._literal_const_sample_generator(
+                        impl, {1: const}
+                    )
+                else:
+                    raise NotImplementedError(PYVERSION)
                 self.assert_prune(
                     func, (types.NoneType("none"),), [prune], None
                 )
@@ -885,7 +907,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
         for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
             for const in c_inp:
-                func = self._literal_const_sample_generator(impl, {1: const})
+                if PYVERSION in ((3, 14),):
+                    # The order of the __code__.co_consts changes with 3.14
+                    func = self._literal_const_sample_generator(
+                        impl, {0: const}
+                    )
+                elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+                    func = self._literal_const_sample_generator(
+                        impl, {1: const}
+                    )
+                else:
+                    raise NotImplementedError(PYVERSION)
                 self.assert_prune(
                     func, (types.NoneType("none"),), [prune], None
                 )
@@ -900,7 +932,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
         for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
             for const in c_inp:
-                func = self._literal_const_sample_generator(impl, {1: const})
+                if PYVERSION in ((3, 14),):
+                    # The order of the __code__.co_consts changes with 3.14
+                    func = self._literal_const_sample_generator(
+                        impl, {0: const}
+                    )
+                elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+                    func = self._literal_const_sample_generator(
+                        impl, {1: const}
+                    )
+                else:
+                    raise NotImplementedError(PYVERSION)
                 self.assert_prune(
                     func, (types.NoneType("none"),), [prune], None
                 )
@@ -915,7 +957,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
         for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
             for const in c_inp:
-                func = self._literal_const_sample_generator(impl, {1: const})
+                if PYVERSION in ((3, 14),):
+                    # The order of the __code__.co_consts changes with 3.14
+                    func = self._literal_const_sample_generator(
+                        impl, {0: const}
+                    )
+                elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+                    func = self._literal_const_sample_generator(
+                        impl, {1: const}
+                    )
+                else:
+                    raise NotImplementedError(PYVERSION)
                 self.assert_prune(
                     func, (types.NoneType("none"),), [prune], None
                 )

numba_cuda/numba/cuda/tests/cudapy/test_atomics.py CHANGED Viewed

@@ -592,6 +592,12 @@ def atomic_cas_2dim(res, old, ary, fill_val):
         old[gid] = cuda.atomic.cas(res, gid, fill_val, ary[gid])
+@unittest.skipIf(
+    not config.ENABLE_CUDASIM
+    and cuda.get_current_device().compute_capability >= (12, 0)
+    and cuda.cudadrv.runtime.get_version()[0] == 12,
+    reason="NVVM 12.9 Bugged on CC 10+",
+)
 class TestCudaAtomics(CUDATestCase):
     def setUp(self):
         super().setUp()

numba_cuda/numba/cuda/tests/cudapy/test_compiler.py CHANGED Viewed

@@ -13,6 +13,7 @@ from numba.cuda import (
     compile_all,
     LinkableCode,
 )
+from numba.cuda.cudadrv import nvrtc
 from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
 TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
@@ -557,7 +558,7 @@ class TestCompile(unittest.TestCase):
                 link_obj = LinkableCode.from_path(link)
                 if link_obj.kind == "cu":
                     # if link is a cu file, result contains a compiled object code
-                    from cuda.core.experimental import ObjectCode
+                    from numba.cuda._compat import ObjectCode
                     assert isinstance(code_list[1], ObjectCode)
                 else:
@@ -661,6 +662,16 @@ class TestCompileOnlyTests(unittest.TestCase):
             ),
         )
+    def test_compile_ptx_arch_specific(self):
+        ptx, resty = cuda.compile_ptx(lambda: None, tuple(), cc=(9, 0, "a"))
+        self.assertIn(".target sm_90a", ptx)
+        if nvrtc._get_nvrtc_version() >= (12, 9):
+            ptx, resty = cuda.compile_ptx(
+                lambda: None, tuple(), cc=(10, 0, "f")
+            )
+            self.assertIn(".target sm_100f", ptx)
 @skip_on_cudasim("Compilation unsupported in the simulator")
 class TestCompileWithLaunchBounds(unittest.TestCase):

numba_cuda/numba/cuda/tests/cudapy/test_complex.py CHANGED Viewed

@@ -3,12 +3,15 @@
 import math
 import itertools
+import sys
 import numpy as np
+import pytest
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda import types
 from numba import cuda
+from numba.cuda import config
 from numba.cuda.tests.cudapy.complex_usecases import (
     real_usecase,
     imag_usecase,
@@ -275,6 +278,10 @@ class TestCMath(BaseComplexTest):
     def test_log(self):
         self.check_unary_func(log_usecase)
+    @pytest.mark.xfail(
+        sys.version_info[:2] >= (3, 14),
+        reason="python 3.14 cmath.log behavior is different than previous versions",
+    )
     def test_log_base(self):
         values = list(itertools.product(self.more_values(), self.more_values()))
         value_types = [
@@ -333,6 +340,12 @@ class TestCMath(BaseComplexTest):
         self.check_unary_func(tanh_usecase, ulps=2, ignore_sign_on_zero=True)
+@unittest.skipIf(
+    not config.ENABLE_CUDASIM
+    and cuda.get_current_device().compute_capability >= (12, 0)
+    and cuda.cudadrv.runtime.get_version()[0] == 12,
+    reason="NVVM 12.9 Bugged on CC 10+",
+)
 class TestAtomicOnComplexComponents(CUDATestCase):
     # Based on the reproducer from Issue #8309. array.real and array.imag could
     # not be used because they required returning an array from a generated

numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py CHANGED Viewed

@@ -48,7 +48,7 @@ def _in_list_var(list_var, var):
 def _find_assign(func_ir, var):
-    for label, block in func_ir.blocks.items():
+    for block in func_ir.blocks.values():
         for i, inst in enumerate(block.body):
             if isinstance(inst, ir.Assign) and inst.target.name != var:
                 all_var = inst.list_vars()

numba_cuda/numba/cuda/tests/cudapy/test_debug.py CHANGED Viewed

@@ -54,7 +54,7 @@ class TestDebugOutput(CUDATestCase):
                 self.assertRaises(AssertionError, check_meth, out)
     def _check_dump_bytecode(self, out):
-        if PYVERSION > (3, 10):
+        if PYVERSION in ((3, 11), (3, 12), (3, 13), (3, 14)):
             # binop with arg=0 is binary add, see CPython dis.py and opcode.py
             self.assertIn("BINARY_OP(arg=0", out)
         else:

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py CHANGED Viewed

@@ -885,29 +885,34 @@ class TestCudaDebugInfo(CUDATestCase):
         """,
         )
-    # shared_arr -> composite -> elements[4] (data field at index 4) -> pointer with dwarfAddressSpace: 8
-    # local_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace: 8
+    # shared_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace
+    # local_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace
+    # Note: Shared memory pointers don't have dwarfAddressSpace because they are
+    # cast to generic address space via addrspacecast in cudaimpl.py
     address_class_filechecks = r"""
         CHECK-DAG: [[SHARED_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "shared_arr"{{.*}}type: [[SHARED_COMPOSITE:![0-9]+]]
         CHECK-DAG: [[SHARED_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[SHARED_ELEMENTS:![0-9]+]]
         CHECK-DAG: [[SHARED_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[SHARED_DATA:![0-9]+]], {{.*}}, {{.*}}}
         CHECK-DAG: [[SHARED_DATA]] = !DIDerivedType(baseType: [[SHARED_PTR:![0-9]+]], name: "data"
-        CHECK-DAG: [[SHARED_PTR]] = !DIDerivedType({{.*}}dwarfAddressSpace: 8{{.*}}tag: DW_TAG_pointer_type
+        CHECK-DAG: [[SHARED_PTR]] = !DIDerivedType({{.*}}tag: DW_TAG_pointer_type
+        CHECK-NOT: [[SHARED_PTR]]{{.*}}dwarfAddressSpace
         CHECK-DAG: [[LOCAL_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "local_arr"{{.*}}type: [[LOCAL_COMPOSITE:![0-9]+]]
         CHECK-DAG: [[LOCAL_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[LOCAL_ELEMENTS:![0-9]+]]
         CHECK-DAG: [[LOCAL_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[LOCAL_DATA:![0-9]+]], {{.*}}, {{.*}}}
         CHECK-DAG: [[LOCAL_DATA]] = !DIDerivedType(baseType: [[LOCAL_PTR:![0-9]+]], name: "data"
         CHECK-DAG: [[LOCAL_PTR]] = !DIDerivedType(baseType: {{.*}}tag: DW_TAG_pointer_type
-        CHECK-NOT: [[LOCAL_PTR]]{{.*}}dwarfAddressSpace: 8
+        CHECK-NOT: [[LOCAL_PTR]]{{.*}}dwarfAddressSpace
     """
     def _test_shared_memory_address_class(self, dtype):
         """Test that shared memory arrays have correct DWARF address class.
-        Shared memory pointers should have addressClass: 8 (DW_AT_address_class
-        for CUDA shared memory) in their debug metadata, while regular local
-        arrays should not have this annotation.
+        Shared memory pointers should NOT have dwarfAddressSpace attribute
+        because they are cast to generic address space via addrspacecast.
+        The runtime pointer type is generic, not shared, so cuda-gdb can
+        correctly dereference them. Local arrays also should not have this
+        attribute.
         """
         sig = (numpy_support.from_dtype(dtype),)

numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
-from cuda.core.experimental._utils.cuda_utils import CUDAError
+from numba.cuda._compat import CUDAError
 import numpy as np
 import threading

numba_cuda/numba/cuda/tests/cudapy/test_extending.py CHANGED Viewed

@@ -860,7 +860,7 @@ class TestIntrinsic(TestCase):
             "TestIntrinsic.test_docstring.<locals>.void_func",
             void_func.__qualname__,
         )
-        self.assertDictEqual({"a": int}, void_func.__annotations__)
+        self.assertDictEqual({"a": int}, inspect.get_annotations(void_func))
         self.assertEqual("void_func docstring", void_func.__doc__)