PyPI - numba-cuda - Versions diffs - 0.21.1__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl - Mend

numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +4 -1
numba_cuda/numba/cuda/_compat.py +47 -0
numba_cuda/numba/cuda/api.py +4 -1
numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -40
numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_typeof.cpp +56 -119
numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
numba_cuda/numba/cuda/codegen.py +46 -12
numba_cuda/numba/cuda/compiler.py +15 -9
numba_cuda/numba/cuda/core/analysis.py +29 -21
numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
numba_cuda/numba/cuda/core/base.py +12 -11
numba_cuda/numba/cuda/core/bytecode.py +21 -13
numba_cuda/numba/cuda/core/byteflow.py +336 -90
numba_cuda/numba/cuda/core/compiler.py +3 -4
numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
numba_cuda/numba/cuda/core/config.py +5 -7
numba_cuda/numba/cuda/core/consts.py +1 -1
numba_cuda/numba/cuda/core/controlflow.py +17 -9
numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
numba_cuda/numba/cuda/core/errors.py +4 -912
numba_cuda/numba/cuda/core/inline_closurecall.py +82 -67
numba_cuda/numba/cuda/core/interpreter.py +334 -160
numba_cuda/numba/cuda/core/ir.py +191 -119
numba_cuda/numba/cuda/core/ir_utils.py +149 -128
numba_cuda/numba/cuda/core/postproc.py +8 -8
numba_cuda/numba/cuda/core/pythonapi.py +3 -0
numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
numba_cuda/numba/cuda/core/ssa.py +5 -5
numba_cuda/numba/cuda/core/transforms.py +29 -16
numba_cuda/numba/cuda/core/typed_passes.py +10 -10
numba_cuda/numba/cuda/core/typeinfer.py +42 -27
numba_cuda/numba/cuda/core/untyped_passes.py +82 -65
numba_cuda/numba/cuda/cpython/unicode.py +2 -2
numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
numba_cuda/numba/cuda/cudadecl.py +0 -13
numba_cuda/numba/cuda/cudadrv/devicearray.py +10 -9
numba_cuda/numba/cuda/cudadrv/driver.py +142 -519
numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
numba_cuda/numba/cuda/cudadrv/nvrtc.py +87 -32
numba_cuda/numba/cuda/cudaimpl.py +0 -12
numba_cuda/numba/cuda/debuginfo.py +25 -0
numba_cuda/numba/cuda/descriptor.py +1 -1
numba_cuda/numba/cuda/device_init.py +4 -7
numba_cuda/numba/cuda/deviceufunc.py +3 -6
numba_cuda/numba/cuda/dispatcher.py +39 -49
numba_cuda/numba/cuda/intrinsics.py +150 -1
numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
numba_cuda/numba/cuda/lowering.py +36 -29
numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
numba_cuda/numba/cuda/np/arrayobj.py +61 -9
numba_cuda/numba/cuda/np/numpy_support.py +32 -9
numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
numba_cuda/numba/cuda/printimpl.py +20 -0
numba_cuda/numba/cuda/serialize.py +10 -0
numba_cuda/numba/cuda/stubs.py +0 -11
numba_cuda/numba/cuda/testing.py +4 -8
numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +195 -51
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +6 -7
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +53 -23
numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +22 -1
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +94 -0
numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +117 -1
numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
numba_cuda/numba/cuda/tests/support.py +11 -0
numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
numba_cuda/numba/cuda/typing/context.py +3 -1
numba_cuda/numba/cuda/typing/typeof.py +51 -2
{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +106 -105
numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py CHANGED Viewed

@@ -187,41 +187,43 @@ def make_fancy_creation_kernel(vtype):
         f4_34 = v4(f4_1)  # 1 2 3 4
-        for v in (
-            f4_1,
-            f4_2,
-            f4_3,
-            f4_4,
-            f4_5,
-            f4_6,
-            f4_7,
-            f4_8,
-            f4_9,
-            f4_10,
-            f4_11,
-            f4_12,
-            f4_13,
-            f4_14,
-            f4_15,
-            f4_16,
-            f4_17,
-            f4_18,
-            f4_19,
-            f4_20,
-            f4_21,
-            f4_22,
-            f4_23,
-            f4_24,
-            f4_25,
-            f4_26,
-            f4_27,
-            f4_28,
-            f4_29,
-            f4_30,
-            f4_31,
-            f4_32,
-            f4_33,
-            f4_34,
+        for v in tuple(
+            (
+                f4_1,
+                f4_2,
+                f4_3,
+                f4_4,
+                f4_5,
+                f4_6,
+                f4_7,
+                f4_8,
+                f4_9,
+                f4_10,
+                f4_11,
+                f4_12,
+                f4_13,
+                f4_14,
+                f4_15,
+                f4_16,
+                f4_17,
+                f4_18,
+                f4_19,
+                f4_20,
+                f4_21,
+                f4_22,
+                f4_23,
+                f4_24,
+                f4_25,
+                f4_26,
+                f4_27,
+                f4_28,
+                f4_29,
+                f4_30,
+                f4_31,
+                f4_32,
+                f4_33,
+                f4_34,
+            )
         ):
             res[j] = v.x
             res[j + 1] = v.y

numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import re
 import numpy as np
-from numba import cuda
+from numba import cuda, errors
 from numba.cuda import int32, int64, float32, float64
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
 from numba.cuda.compiler import compile_ptx
@@ -208,6 +208,122 @@ class TestCudaWarpOperations(CUDATestCase):
                 compiled[1, nelem](ary, val)
                 self.assertTrue(np.all(ary == val))
+    def test_vote_sync_const_mode_val(self):
+        nelem = 32
+        ary1 = np.ones(nelem, dtype=np.int32)
+        ary2 = np.empty(nelem, dtype=np.int32)
+        subtest = [
+            (use_vote_sync_all, "void(int32[:], int32[:])", (ary1, ary2)),
+            (use_vote_sync_any, "void(int32[:], int32[:])", (ary1, ary2)),
+            (use_vote_sync_eq, "void(int32[:], int32[:])", (ary1, ary2)),
+            (use_vote_sync_ballot, "void(uint32[:])", (ary2,)),
+        ]
+        args_re = r"\((.*)\)"
+        m = re.compile(args_re)
+        for func, sig, input in subtest:
+            with self.subTest(func=func.__name__):
+                compiled = cuda.jit(sig)(func)
+                compiled[1, nelem](*input)
+                irs = next(iter(compiled.inspect_llvm().values()))
+                for ir in irs.split("\n"):
+                    if "call" in ir and "llvm.nvvm.vote.sync" in ir:
+                        args = m.search(ir).group(0)
+                        arglist = args.split(",")
+                        mode_arg = arglist[1]
+                        self.assertNotIn("%", mode_arg)
+    def test_vote_sync_const_mode_val_sm100(self):
+        subtest = [
+            (use_vote_sync_all, "void(int32[:], int32[:])"),
+            (use_vote_sync_any, "void(int32[:], int32[:])"),
+            (use_vote_sync_eq, "void(int32[:], int32[:])"),
+            (use_vote_sync_ballot, "void(uint32[:])"),
+        ]
+        for func, sig in subtest:
+            with self.subTest(func=func.__name__):
+                compile_ptx(func, sig, cc=(10, 0))
+    def test_vote_sync_type_validation(self):
+        nelem = 32
+        def use_vote_sync_all_with_mask(mask, predicate, result):
+            i = cuda.grid(1)
+            if i < result.shape[0]:
+                result[i] = cuda.all_sync(mask[i], predicate[i])
+        invalid_cases = [
+            (
+                "void(float32[:], int32[:], int32[:])",
+                "Mask type must be an integer",
+            ),
+            (
+                "void(boolean[:], int32[:], int32[:])",
+                "Mask type must be an integer",
+            ),
+            (
+                "void(float64[:], int32[:], int32[:])",
+                "Mask type must be an integer",
+            ),
+            (
+                "void(int32[:], float32[:], int32[:])",
+                "Predicate must be an integer or boolean",
+            ),
+            (
+                "void(int32[:], float64[:], int32[:])",
+                "Predicate must be an integer or boolean",
+            ),
+        ]
+        for sig, expected_msg in invalid_cases:
+            with self.subTest(sig=sig):
+                with self.assertRaisesRegex(errors.TypingError, expected_msg):
+                    cuda.jit(sig)(use_vote_sync_all_with_mask)
+        valid_cases = [
+            # mask: unsigned/signed integer
+            # predicate: unsigned/signed integer, boolean
+            ("void(uint32[:], uint32[:], int32[:])", np.uint32, np.uint32),
+            ("void(int64[:], int64[:], int32[:])", np.int64, np.int64),
+            ("void(uint64[:], uint64[:], int32[:])", np.uint64, np.uint64),
+            ("void(int32[:], int32[:], int32[:])", np.int32, np.int32),
+            ("void(uint32[:], boolean[:], int32[:])", np.uint32, np.bool_),
+            ("void(uint64[:], boolean[:], int32[:])", np.uint64, np.bool_),
+        ]
+        for sig, mask_dtype, pred_dtype in valid_cases:
+            with self.subTest(sig=sig):
+                mask_val = (~np.array(0, dtype=mask_dtype)).item()
+                compiled = cuda.jit(sig)(use_vote_sync_all_with_mask)
+                ary_mask = np.full(nelem, mask_val, dtype=mask_dtype)
+                ary_pred = np.ones(nelem, dtype=pred_dtype)
+                ary_result = np.empty(nelem, dtype=np.int32)
+                compiled[1, nelem](ary_mask, ary_pred, ary_result)
+        # literals
+        @cuda.jit
+        def use_vote_sync_all_with_literal(result):
+            i = cuda.grid(1)
+            if i < result.shape[0]:
+                result[i] = cuda.all_sync(0xFFFFFFFF, 1)
+        ary_result = np.empty(nelem, dtype=np.int32)
+        use_vote_sync_all_with_literal[1, nelem](ary_result)
+        @cuda.jit
+        def use_vote_sync_all_with_predicate_literal(mask, result):
+            i = cuda.grid(1)
+            if i < mask.shape[0]:
+                result[i] = cuda.all_sync(mask[i], 1)
+        ary_mask = np.full(nelem, 0xFFFFFFFF, dtype=np.uint32)
+        ary_result = np.empty(nelem, dtype=np.int32)
+        use_vote_sync_all_with_predicate_literal[1, nelem](ary_mask, ary_result)
     def test_vote_sync_all(self):
         compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_all)
         nelem = 32

numba_cuda/numba/cuda/tests/doc_examples/test_globals.py ADDED Viewed

@@ -0,0 +1,111 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+import unittest
+from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.tests.support import captured_stdout
+@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
+class TestGlobals(CUDATestCase):
+    """
+    Tests demonstrating how global variables are captured in CUDA kernels.
+    """
+    def setUp(self):
+        # Prevent output from this test showing
+        # up when running the test suite
+        self._captured_stdout = captured_stdout()
+        self._captured_stdout.__enter__()
+        super().setUp()
+    def tearDown(self):
+        # No exception type, value, or traceback
+        self._captured_stdout.__exit__(None, None, None)
+        super().tearDown()
+    def test_ex_globals_constant_capture(self):
+        """
+        Test demonstrating how global variables are captured as constants.
+        """
+        # magictoken.ex_globals_constant_capture.begin
+        import numpy as np
+        from numba import cuda
+        TAX_RATE = 0.08
+        PRICES = np.array([10.0, 25.0, 5.0, 15.0, 30.0], dtype=np.float64)
+        @cuda.jit
+        def compute_totals(quantities, totals):
+            i = cuda.grid(1)
+            if i < totals.size:
+                totals[i] = quantities[i] * PRICES[i] * (1 + TAX_RATE)
+        d_quantities = cuda.to_device(
+            np.array([1, 2, 3, 4, 5], dtype=np.float64)
+        )
+        d_totals = cuda.device_array(5, dtype=np.float64)
+        # First kernel call - compiles and captures values
+        compute_totals[1, 32](d_quantities, d_totals)
+        print("Value of d_totals:", d_totals.copy_to_host())
+        # These modifications have no effect on subsequent kernel calls
+        TAX_RATE = 0.10  # noqa: F841
+        PRICES[:] = [20.0, 50.0, 10.0, 30.0, 60.0]
+        # Second kernel call still uses the original values
+        compute_totals[1, 32](d_quantities, d_totals)
+        print("Value of d_totals:", d_totals.copy_to_host())
+        # magictoken.ex_globals_constant_capture.end
+        # Verify the values are the same (original values were captured)
+        expected = np.array([10.8, 54.0, 16.2, 64.8, 162.0])
+        np.testing.assert_allclose(d_totals.copy_to_host(), expected)
+    def test_ex_globals_device_array_capture(self):
+        """
+        Test demonstrating how global device arrays are captured by pointer.
+        """
+        # magictoken.ex_globals_device_array_capture.begin
+        import numpy as np
+        from numba import cuda
+        # Global device array - pointer is captured, not data
+        PRICES = cuda.to_device(
+            np.array([10.0, 25.0, 5.0, 15.0, 30.0], dtype=np.float32)
+        )
+        @cuda.jit
+        def compute_totals(quantities, totals):
+            i = cuda.grid(1)
+            if i < totals.size:
+                totals[i] = quantities[i] * PRICES[i]
+        d_quantities = cuda.to_device(
+            np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32)
+        )
+        d_totals = cuda.device_array(5, dtype=np.float32)
+        # First kernel call
+        compute_totals[1, 32](d_quantities, d_totals)
+        print(d_totals.copy_to_host())  # [10. 25.  5. 15. 30.]
+        # Mutate the device array in-place
+        PRICES.copy_to_device(
+            np.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
+        )
+        # Second kernel call sees the updated values
+        compute_totals[1, 32](d_quantities, d_totals)
+        print(d_totals.copy_to_host())  # [20. 50. 10. 30. 60.]
+        # magictoken.ex_globals_device_array_capture.end
+        # Verify the second call sees updated values
+        expected = np.array([20.0, 50.0, 10.0, 30.0, 60.0], dtype=np.float32)
+        np.testing.assert_allclose(d_totals.copy_to_host(), expected)
+if __name__ == "__main__":
+    unittest.main()

numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py CHANGED Viewed

@@ -387,5 +387,66 @@ class TestIterate(unittest.TestCase):
             x = val  # noqa: F841
+@skip_on_cudasim("Tests internals of the CUDA driver device array")
+class TestEmptyArrays(unittest.TestCase):
+    def test_empty_array_flags(self):
+        test_shapes = [
+            (0,),
+            (10, 0),
+            (0, 10),
+            (0, 0),
+            (5, 0, 3),
+            (0, 5, 3),
+            (5, 3, 0),
+            (0, 0, 0),
+        ]
+        for shape in test_shapes:
+            with self.subTest(shape=shape):
+                nparr = np.empty(shape)
+                arr = Array.from_desc(
+                    0, nparr.shape, nparr.strides, nparr.dtype.itemsize
+                )
+                # Empty arrays should be both C and F contiguous
+                self.assertEqual(
+                    arr.flags["C_CONTIGUOUS"],
+                    nparr.flags["C_CONTIGUOUS"],
+                    f"C_CONTIGUOUS mismatch for shape {shape}",
+                )
+                self.assertEqual(
+                    arr.flags["F_CONTIGUOUS"],
+                    nparr.flags["F_CONTIGUOUS"],
+                    f"F_CONTIGUOUS mismatch for shape {shape}",
+                )
+                self.assertTrue(arr.flags["C_CONTIGUOUS"])
+                self.assertTrue(arr.flags["F_CONTIGUOUS"])
+@skip_on_cudasim("Tests CUDA device array type inference")
+class TestEmptyArrayTypeInference(unittest.TestCase):
+    def test_empty_array_typeof(self):
+        from numba import cuda, typeof
+        test_cases = [
+            ((0,), np.int64),
+            ((10, 0), np.int64),
+            ((0, 10), np.int64),
+            ((0, 0), np.float32),
+            ((5, 0, 3), np.float32),
+            ((0, 5, 3), np.int32),
+            ((5, 3, 0), np.float64),
+        ]
+        for shape, dtype in test_cases:
+            with self.subTest(shape=shape, dtype=dtype):
+                h_values = np.empty(shape, dtype=dtype)
+                d_values = cuda.to_device(h_values)
+                self.assertEqual(
+                    typeof(h_values),
+                    typeof(d_values),
+                    f"Type mismatch for shape {shape}, dtype {dtype}: "
+                    f"host={typeof(h_values)}, device={typeof(d_values)}",
+                )
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/nrt/test_nrt.py CHANGED Viewed

@@ -382,6 +382,37 @@ class TestNrtStatistics(CUDATestCase):
             self.assertEqual(stats.free, stats_free)
             self.assertEqual(stats.mi_free, stats_mi_free)
+    def test_nrt_toggle_enabled(self):
+        def array_reshape1d(arr, newshape, got):
+            y = arr.reshape(newshape)
+            for i in range(y.shape[0]):
+                got[i] = y[i]
+        def array_reshape(arr, newshape):
+            return arr.reshape(newshape)
+        with override_config("CUDA_ENABLE_NRT", True):
+            # compile a kernel that caches an NRT enabled reshape primitive
+            @cuda.jit
+            def kernel(out):
+                out = out.reshape(out.shape)
+                out[0] = 1
+            out = cuda.to_device(np.zeros(1, dtype=np.float64))
+            kernel[1, 1](out)
+        with override_config("CUDA_ENABLE_NRT", False):
+            # compile and launch a new kernel that gets a cache hit on the
+            # NRT enabled reshape, but tries to launch with NRT disabled
+            # globally
+            new_kernel = cuda.jit(array_reshape1d)
+            arr = np.arange(24)
+            expected = array_reshape(arr, (24,))
+            got = np.zeros(expected.shape, dtype=arr.dtype)
+            new_kernel[1, 1](arr, (24,), got)
+        self.assertTrue(np.array_equal(expected, got))
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/support.py CHANGED Viewed

@@ -38,6 +38,7 @@ from numba.cuda.datamodel.models import OpaqueModel
 from numba.cuda.np import numpy_support
 from numba.cuda import HAS_NUMBA
+from numba.cuda.utils import PYVERSION
 if HAS_NUMBA:
     from numba.core.extending import (
@@ -56,6 +57,16 @@ class EnableNRTStatsMixin(object):
         rtsys.memsys_disable_stats()
+skip_if_py314 = unittest.skipIf(PYVERSION == (3, 14), "Test unstable on 3.14")
+def expected_failure_py314(fn):
+    if PYVERSION == (3, 14):
+        return unittest.expectedFailure(fn)
+    else:
+        return fn
 skip_unless_cffi = unittest.skipUnless(cffi_utils.SUPPORTED, "requires cffi")
 _lnx_reason = "linux only test"

numba_cuda/numba/cuda/types/cuda_functions.py CHANGED Viewed

@@ -334,7 +334,7 @@ class BaseFunction(Callable):
                             k: _unlit_non_poison(v) for k, v in kws.items()
                         }
                         sig = temp.apply(nolitargs, nolitkws)
-                except Exception as e:
+                except Exception as e:  # noqa: PERF203
                     if not isinstance(e, errors.NumbaError):
                         raise e
                     sig = None

numba_cuda/numba/cuda/typing/asnumbatype.py CHANGED Viewed

@@ -7,6 +7,7 @@ import typing as py_typing
 from numba.cuda.typing.typeof import typeof
 from numba.cuda.core import errors
 from numba.cuda import types
+from numba.cuda.utils import PYVERSION
 class AsNumbaTypeRegistry:
@@ -40,8 +41,42 @@ class AsNumbaTypeRegistry:
             return py_type
     def _builtin_infer(self, py_type):
-        if not isinstance(py_type, py_typing._GenericAlias):
-            return
+        if PYVERSION in ((3, 14),):
+            # As of 3.14 the typing module has been updated to return a
+            # different type when calling: `typing.Optional[X]`.
+            #
+            # On 3.14:
+            #
+            # >>> type(typing.Optional[float])
+            # <class 'typing.Union'>
+            #
+            #
+            # On 3.13 (and presumably below):
+            #
+            # >>> type(typing._UnionGenericAlias)
+            # <class 'typing._UnionGenericAlias'>
+            #
+            #
+            # The previous implementation of this predicate used
+            # `_GenericAlias`, which was possible because `_UnionGenericAlias`
+            # is a subclass of `_GenericAlias`...
+            #
+            # >>> issubclass(typing._UnionGenericAlias, typing._GenericAlias)
+            # True
+            #
+            # However, other types, such as `typing.List[float]` remain as
+            # `typing._GenericAlias`, so that must be keept.
+            #
+            if not isinstance(
+                py_type, (py_typing.Union, py_typing._GenericAlias)
+            ):
+                return
+        elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+            # Use of underscore type `_GenericAlias`.
+            if not isinstance(py_type, py_typing._GenericAlias):
+                return
+        else:
+            raise NotImplementedError(PYVERSION)
         if getattr(py_type, "__origin__", None) is py_typing.Union:
             if len(py_type.__args__) != 2:

numba_cuda/numba/cuda/typing/context.py CHANGED Viewed

@@ -460,7 +460,9 @@ class BaseContext(object):
         def is_external(obj):
             """Check if obj is from outside numba.* namespace."""
             try:
-                return not obj.__module__.startswith("numba.")
+                is_numba_module = obj.__module__.startswith("numba.")
+                is_test_module = obj.__module__.startswith("numba.cuda.tests.")
+                return not is_numba_module or is_test_module
             except AttributeError:
                 return True

numba_cuda/numba/cuda/typing/typeof.py CHANGED Viewed

@@ -5,6 +5,7 @@ from collections import namedtuple
 from functools import singledispatch
 import ctypes
 import enum
+import operator
 import numpy as np
 from numpy.random.bit_generator import BitGenerator
@@ -47,11 +48,20 @@ def typeof_impl(val, c):
     """
     Generic typeof() implementation.
     """
-    tp = _typeof_buffer(val, c)
+    tp = getattr(val, "_numba_type_", None)
     if tp is not None:
         return tp
-    tp = getattr(val, "_numba_type_", None)
+    # Check for __cuda_array_interface__ objects (third-party device arrays)
+    # Numba's own DeviceNDArray is handled above via _numba_type_.
+    cai = getattr(val, "__cuda_array_interface__", None)
+    if cai is not None:
+        tp = _typeof_cuda_array_interface(cai, c)
+        if tp is not None:
+            return tp
+    tp = _typeof_buffer(val, c)
     if tp is not None:
         return tp
@@ -299,3 +309,42 @@ def typeof_numpy_polynomial(val, c):
     domain = typeof(val.domain)
     window = typeof(val.window)
     return types.PolynomialType(coef, domain, window)
+def _typeof_cuda_array_interface(val, c):
+    """
+    Determine the type of a __cuda_array_interface__ object.
+    This handles third-party device arrays that implement the CUDA
+    Array Interface. These are typed as regular Array types, with lowering
+    handled in numba.cuda.np.arrayobj.
+    """
+    dtype = numpy_support.from_dtype(np.dtype(val["typestr"]))
+    shape = val["shape"]
+    ndim = len(shape)
+    strides = val.get("strides")
+    # Determine layout
+    if not ndim:
+        layout = "C"
+    elif strides is None:
+        layout = "C"
+    else:
+        itemsize = np.dtype(val["typestr"]).itemsize
+        # Quick rejection: C-contiguous has strides[-1] == itemsize,
+        # F-contiguous has strides[0] == itemsize. If neither, it's "A".
+        if strides[-1] == itemsize:
+            c_strides = numpy_support.strides_from_shape(
+                shape, itemsize, order="C"
+            )
+            layout = "C" if all(map(operator.eq, strides, c_strides)) else "A"
+        elif strides[0] == itemsize:
+            f_strides = numpy_support.strides_from_shape(
+                shape, itemsize, order="F"
+            )
+            layout = "F" if all(map(operator.eq, strides, f_strides)) else "A"
+        else:
+            layout = "A"
+    _, readonly = val["data"]
+    return types.Array(dtype, ndim, layout, readonly=readonly)

{numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: numba-cuda
-Version: 0.21.1
+Version: 0.24.0
 Summary: CUDA target for Numba
 Author: Anaconda Inc., NVIDIA Corporation
 License-Expression: BSD-2-Clause
@@ -16,24 +16,15 @@ License-File: LICENSE.numba
 Requires-Dist: numba>=0.60.0
 Requires-Dist: cuda-bindings<14.0.0,>=12.9.1
 Requires-Dist: cuda-core<1.0.0,>=0.3.2
+Requires-Dist: packaging
 Provides-Extra: cu12
 Requires-Dist: cuda-bindings<13.0.0,>=12.9.1; extra == "cu12"
 Requires-Dist: cuda-core<1.0.0,>=0.3.0; extra == "cu12"
-Requires-Dist: cuda-python==12.9.*; extra == "cu12"
-Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
-Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
-Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
-Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
-Requires-Dist: nvidia-cuda-cccl-cu12; extra == "cu12"
+Requires-Dist: cuda-toolkit[cccl,cudart,nvcc,nvjitlink,nvrtc]==12.*; extra == "cu12"
 Provides-Extra: cu13
 Requires-Dist: cuda-bindings==13.*; extra == "cu13"
 Requires-Dist: cuda-core<1.0.0,>=0.3.2; extra == "cu13"
-Requires-Dist: cuda-python==13.*; extra == "cu13"
-Requires-Dist: nvidia-nvvm==13.*; extra == "cu13"
-Requires-Dist: nvidia-cuda-runtime==13.*; extra == "cu13"
-Requires-Dist: nvidia-cuda-nvrtc==13.*; extra == "cu13"
-Requires-Dist: nvidia-nvjitlink==13.*; extra == "cu13"
-Requires-Dist: nvidia-cuda-cccl==13.*; extra == "cu13"
+Requires-Dist: cuda-toolkit[cccl,cudart,nvjitlink,nvrtc,nvvm]==13.*; extra == "cu13"
 Dynamic: license-file
 <div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>