PyPI - numba-cuda - Versions diffs - 0.10.1__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

numba-cuda 0.10.1py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/{cuda_bf16.py → _internal/cuda_bf16.py} +1 -1
numba_cuda/numba/cuda/api.py +13 -0
numba_cuda/numba/cuda/bf16.py +112 -0
numba_cuda/numba/cuda/cg.py +2 -0
numba_cuda/numba/cuda/codegen.py +77 -2
numba_cuda/numba/cuda/compiler.py +22 -16
numba_cuda/numba/cuda/cudadecl.py +21 -6
numba_cuda/numba/cuda/cudadrv/driver.py +107 -20
numba_cuda/numba/cuda/cudadrv/linkable_code.py +10 -2
numba_cuda/numba/cuda/cudadrv/nvrtc.py +23 -1
numba_cuda/numba/cuda/cudaimpl.py +103 -11
numba_cuda/numba/cuda/debuginfo.py +27 -0
numba_cuda/numba/cuda/decorators.py +7 -2
numba_cuda/numba/cuda/dispatcher.py +25 -65
numba_cuda/numba/cuda/runtime/nrt.cu +2 -17
numba_cuda/numba/cuda/runtime/nrt.cuh +41 -0
numba_cuda/numba/cuda/runtime/nrt.py +13 -1
numba_cuda/numba/cuda/stubs.py +23 -11
numba_cuda/numba/cuda/target.py +10 -1
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -12
numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +33 -0
numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +236 -0
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +55 -0
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +49 -23
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +34 -51
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +34 -0
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +17 -0
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +140 -0
numba_cuda/numba/cuda/tests/data/cta_barrier.cu +23 -0
numba_cuda/numba/cuda/tests/data/include/add.cuh +3 -0
numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +3 -0
numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +9 -0
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +48 -1
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +122 -3
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +11 -0
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +5 -2
numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +7 -0
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +4 -0
numba_cuda/numba/cuda/utils.py +7 -0
{numba_cuda-0.10.1.dist-info → numba_cuda-0.12.1.dist-info}/METADATA +1 -1
{numba_cuda-0.10.1.dist-info → numba_cuda-0.12.1.dist-info}/RECORD +45 -35
{numba_cuda-0.10.1.dist-info → numba_cuda-0.12.1.dist-info}/WHEEL +1 -1
{numba_cuda-0.10.1.dist-info → numba_cuda-0.12.1.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.10.1.dist-info → numba_cuda-0.12.1.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py ADDED Viewed

@@ -0,0 +1,33 @@
+from numba import cuda
+from numba.cuda.testing import CUDATestCase
+import sys
+from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase
+# Usecase with cooperative groups
+@cuda.jit(cache=True)
+def cg_usecase_kernel(r, x):
+    grid = cuda.cg.this_grid()
+    grid.sync()
+cg_usecase = CUDAUseCase(cg_usecase_kernel)
+class _TestModule(CUDATestCase):
+    """
+    Tests for functionality of this module's functions.
+    Note this does not define any "test_*" method, instead check_module()
+    should be called by hand.
+    """
+    def check_module(self, mod):
+        mod.cg_usecase(0)
+def self_test():
+    mod = sys.modules[__name__]
+    _TestModule().check_module(mod)

numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py ADDED Viewed

@@ -0,0 +1,236 @@
+import re
+import itertools
+import numpy as np
+from numba import cuda
+from numba.core.errors import TypingError
+from numba.cuda.testing import CUDATestCase
+import unittest
+# Set to true if you want to see dots printed for each subtest.
+NOISY = False
+# In order to verify the alignment of the local and shared memory arrays, we
+# inspect the LLVM IR of the generated kernel using the following regexes.
+# Shared memory example:
+# @"_cudapy_smem_38" = addrspace(3) global [1 x i8] undef, align 16
+SMEM_PATTERN = re.compile(
+    r'^@"_cudapy_smem_\d+".*?align (\d+)',
+    re.MULTILINE,
+)
+# Local memory example:
+# %"_cudapy_lmem" = alloca [1 x i8], align 64
+LMEM_PATTERN = re.compile(
+    r'^\s*%"_cudapy_lmem".*?align (\d+)',
+    re.MULTILINE,
+)
+DTYPES = [np.uint8, np.uint32, np.uint64]
+# Add in some record dtypes with and without alignment.
+for align in (True, False):
+    DTYPES += [
+        np.dtype(
+            [
+                ("a", np.uint8),
+                ("b", np.int32),
+                ("c", np.float64),
+            ],
+            align=align,
+        ),
+        np.dtype(
+            [
+                ("a", np.uint32),
+                ("b", np.uint8),
+            ],
+            align=align,
+        ),
+        np.dtype(
+            [
+                ("a", np.uint8),
+                ("b", np.int32),
+                ("c", np.float64),
+                ("d", np.complex64),
+                ("e", (np.uint8, 5)),
+            ],
+            align=align,
+        ),
+    ]
+# N.B. We name the test class TestArrayAddressAlignment to avoid name conflict
+#      with the test_alignment.TestArrayAlignment class.
+class TestArrayAddressAlignment(CUDATestCase):
+    """
+    Test cuda.local.array and cuda.shared.array support for an alignment
+    keyword argument.
+    """
+    def test_array_alignment_1d(self):
+        shapes = (1, 8, 50)
+        alignments = (None, 16, 64, 256)
+        array_types = [(0, "local"), (1, "shared")]
+        self._do_test(array_types, shapes, DTYPES, alignments)
+    def test_array_alignment_2d(self):
+        shapes = ((2, 3),)
+        alignments = (None, 16, 64, 256)
+        array_types = [(0, "local"), (1, "shared")]
+        self._do_test(array_types, shapes, DTYPES, alignments)
+    def test_array_alignment_3d(self):
+        shapes = ((2, 3, 4), (1, 4, 5))
+        alignments = (None, 16, 64, 256)
+        array_types = [(0, "local"), (1, "shared")]
+        self._do_test(array_types, shapes, DTYPES, alignments)
+    def _do_test(self, array_types, shapes, dtypes, alignments):
+        items = itertools.product(array_types, shapes, dtypes, alignments)
+        for (which, array_type), shape, dtype, alignment in items:
+            with self.subTest(
+                array_type=array_type,
+                shape=shape,
+                dtype=dtype,
+                alignment=alignment,
+            ):
+                @cuda.jit
+                def f(loc, shrd, which):
+                    i = cuda.grid(1)
+                    if which == 0:
+                        local_array = cuda.local.array(
+                            shape=shape,
+                            dtype=dtype,
+                            alignment=alignment,
+                        )
+                        if i == 0:
+                            loc[0] = local_array.ctypes.data
+                    else:
+                        shared_array = cuda.shared.array(
+                            shape=shape,
+                            dtype=dtype,
+                            alignment=alignment,
+                        )
+                        if i == 0:
+                            shrd[0] = shared_array.ctypes.data
+                loc = np.zeros(1, dtype=np.uint64)
+                shrd = np.zeros(1, dtype=np.uint64)
+                f[1, 1](loc, shrd, which)
+                kernel = f.overloads[f.signatures[0]]
+                llvm_ir = kernel.inspect_llvm()
+                if alignment is None:
+                    if which == 0:
+                        # Local memory shouldn't have any alignment information
+                        # when no alignment is specified.
+                        match = LMEM_PATTERN.findall(llvm_ir)
+                        self.assertEqual(len(match), 0)
+                    else:
+                        # Shared memory should at least have a power-of-two
+                        # alignment when no alignment is specified.
+                        match = SMEM_PATTERN.findall(llvm_ir)
+                        self.assertEqual(len(match), 1)
+                        alignment = int(match[0])
+                        # Verify alignment is a power of two.
+                        self.assertTrue(alignment & (alignment - 1) == 0)
+                else:
+                    # Verify alignment is in the LLVM IR.
+                    if which == 0:
+                        match = LMEM_PATTERN.findall(llvm_ir)
+                        self.assertEqual(len(match), 1)
+                        actual_alignment = int(match[0])
+                        self.assertEqual(alignment, actual_alignment)
+                    else:
+                        match = SMEM_PATTERN.findall(llvm_ir)
+                        self.assertEqual(len(match), 1)
+                        actual_alignment = int(match[0])
+                        self.assertEqual(alignment, actual_alignment)
+                    # Also verify that the address of the array is aligned.
+                    # If this fails, there problem is likely with NVVM.
+                    address = loc[0] if which == 0 else shrd[0]
+                    alignment_mod = int(address % alignment)
+                    self.assertEqual(alignment_mod, 0)
+                if NOISY:
+                    print(".", end="", flush=True)
+    def test_invalid_aligments(self):
+        shapes = (1, 50)
+        dtypes = (np.uint8, np.uint64)
+        invalid_alignment_values = (-1, 0, 3, 17, 33)
+        invalid_alignment_types = ("1.0", "1", "foo", 1.0, 1.5, 3.2)
+        alignments = invalid_alignment_values + invalid_alignment_types
+        array_types = [(0, "local"), (1, "shared")]
+        # Use regex pattern to match error message, handling potential ANSI
+        # color codes which appear on CI.
+        expected_invalid_type_error_regex = (
+            r"RequireLiteralValue:.*alignment must be a constant integer"
+        )
+        items = itertools.product(array_types, shapes, dtypes, alignments)
+        for (which, array_type), shape, dtype, alignment in items:
+            with self.subTest(
+                array_type=array_type,
+                shape=shape,
+                dtype=dtype,
+                alignment=alignment,
+            ):
+                if which == 0:
+                    @cuda.jit
+                    def f(dest_array):
+                        i = cuda.grid(1)
+                        local_array = cuda.local.array(
+                            shape=shape,
+                            dtype=dtype,
+                            alignment=alignment,
+                        )
+                        if i == 0:
+                            dest_array[0] = local_array.ctypes.data
+                else:
+                    @cuda.jit
+                    def f(dest_array):
+                        i = cuda.grid(1)
+                        shared_array = cuda.shared.array(
+                            shape=shape,
+                            dtype=dtype,
+                            alignment=alignment,
+                        )
+                        if i == 0:
+                            dest_array[0] = shared_array.ctypes.data
+                array = np.zeros(1, dtype=np.uint64)
+                # The type of error we expect differs between an invalid value
+                # that is still an int, and an invalid type.
+                if isinstance(alignment, int):
+                    self.assertRaisesRegex(
+                        ValueError, r"Alignment must be.*", f[1, 1], array
+                    )
+                else:
+                    self.assertRaisesRegex(
+                        TypingError,
+                        expected_invalid_type_error_regex,
+                        f[1, 1],
+                        array,
+                    )
+                if NOISY:
+                    print(".", end="", flush=True)
+if __name__ == "__main__":
+    unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py ADDED Viewed

@@ -0,0 +1,55 @@
+from numba import cuda, float32
+from numba.cuda.bf16 import bfloat16
+from numba.cuda.testing import CUDATestCase
+import math
+class TestBfloat16HighLevelBindings(CUDATestCase):
+    def skip_unsupported(self):
+        if not cuda.is_bfloat16_supported():
+            self.skipTest(
+                "bfloat16 requires compute capability 8.0+ and CUDA version>= 12.0"
+            )
+    def test_use_type_in_kernel(self):
+        self.skip_unsupported()
+        @cuda.jit
+        def kernel():
+            bfloat16(3.14)
+        kernel[1, 1]()
+    def test_math_bindings(self):
+        self.skip_unsupported()
+        functions = [
+            math.trunc,
+            math.ceil,
+            math.floor,
+            math.sqrt,
+            math.log,
+            math.log10,
+            math.cos,
+            math.sin,
+            math.tanh,
+            math.exp,
+            math.exp2,
+        ]
+        for f in functions:
+            with self.subTest(func=f):
+                @cuda.jit
+                def kernel(arr):
+                    x = bfloat16(3.14)
+                    y = f(x)
+                    arr[0] = float32(y)
+                arr = cuda.device_array((1,), dtype="float32")
+                kernel[1, 1](arr)
+                if f in (math.exp, math.exp2):
+                    self.assertAlmostEqual(arr[0], f(3.14), delta=1e-1)
+                else:
+                    self.assertAlmostEqual(arr[0], f(3.14), delta=1e-2)

numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py CHANGED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 from numba import int16, int32, int64, uint16, uint32, uint64, float32, float64
 from numba.types import float16
-from numba.cuda.cuda_bf16 import (
+from numba.cuda._internal.cuda_bf16 import (
     nv_bfloat16,
     htrunc,
     hceil,
@@ -22,21 +22,23 @@ from numba.cuda.cuda_bf16 import (
     hexp,
     hexp2,
     hexp10,
+    htanh,
+    htanh_approx,
 )
-from numba.cuda.cudadrv.runtime import get_version
-cuda_version = get_version()
 dtypes = [int16, int32, int64, uint16, uint32, uint64, float32]
-@unittest.skipIf(
-    (cuda.get_current_device().compute_capability < (8, 0)),
-    "bfloat16 requires compute capability 8.0+",
-)
 class Bfloat16Test(CUDATestCase):
+    def skip_unsupported(self):
+        if not cuda.is_bfloat16_supported():
+            self.skipTest(
+                "bfloat16 requires compute capability 8.0+ and CUDA version>= 12.0"
+            )
     def test_ctor(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel():
             a = nv_bfloat16(float64(1.0))  # noqa: F841
@@ -47,18 +49,13 @@ class Bfloat16Test(CUDATestCase):
             f = nv_bfloat16(uint16(6))  # noqa: F841
             g = nv_bfloat16(uint32(7))  # noqa: F841
             h = nv_bfloat16(uint64(8))  # noqa: F841
+            i = nv_bfloat16(float16(9))  # noqa: F841
         simple_kernel[1, 1]()
-        if cuda_version >= (12, 0):
-            @cuda.jit
-            def simple_kernel_fp16():
-                i = nv_bfloat16(float16(9))  # noqa: F841
-            simple_kernel_fp16[1, 1]()
     def test_casts(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel(b, c, d, e, f, g, h):
             a = nv_bfloat16(3.14)
@@ -90,6 +87,7 @@ class Bfloat16Test(CUDATestCase):
         assert h[0] == 3
     def test_ctor_cast_loop(self):
+        self.skip_unsupported()
         for dtype in dtypes:
             with self.subTest(dtype=dtype):
@@ -106,6 +104,8 @@ class Bfloat16Test(CUDATestCase):
                     assert a[0] == 3
     def test_arithmetic(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel(arith, logic):
             # Binary Arithmetic Operators
@@ -175,6 +175,8 @@ class Bfloat16Test(CUDATestCase):
         )
     def test_math_func(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel(a):
             x = nv_bfloat16(3.14)
@@ -191,16 +193,18 @@ class Bfloat16Test(CUDATestCase):
             a[9] = float32(hlog10(x))
             a[10] = float32(hcos(x))
             a[11] = float32(hsin(x))
-            a[12] = float32(hexp(x))
-            a[13] = float32(hexp2(x))
-            a[14] = float32(hexp10(x))
+            a[12] = float32(htanh(x))
+            a[13] = float32(htanh_approx(x))
+            a[14] = float32(hexp(x))
+            a[15] = float32(hexp2(x))
+            a[16] = float32(hexp10(x))
-        a = np.zeros(15, dtype=np.float32)
+        a = np.zeros(17, dtype=np.float32)
         simple_kernel[1, 1](a)
         x = 3.14
         np.testing.assert_allclose(
-            a[:12],
+            a[:14],
             [
                 np.trunc(x),
                 np.ceil(x),
@@ -214,15 +218,19 @@ class Bfloat16Test(CUDATestCase):
                 np.log10(x),
                 np.cos(x),
                 np.sin(x),
+                np.tanh(x),
+                np.tanh(x),
             ],
             atol=1e-2,
         )
         np.testing.assert_allclose(
-            a[12:], [np.exp(x), np.exp2(x), np.power(10, x)], atol=1e2
+            a[14:], [np.exp(x), np.exp2(x), np.power(10, x)], atol=1e2
         )
     def test_check_bfloat16_type(self):
+        self.skip_unsupported()
         @cuda.jit
         def kernel(arr):
             x = nv_bfloat16(3.14)
@@ -237,6 +245,8 @@ class Bfloat16Test(CUDATestCase):
         np.testing.assert_allclose(arr, [3.14], atol=1e-2)
     def test_use_within_device_func(self):
+        self.skip_unsupported()
         @cuda.jit(device=True)
         def add_bf16(a, b):
             return a + b
@@ -252,6 +262,22 @@ class Bfloat16Test(CUDATestCase):
         np.testing.assert_allclose(arr, [8], atol=1e-2)
+    def test_use_binding_inside_dfunc(self):
+        @cuda.jit(device=True)
+        def f(arr):
+            pi = nv_bfloat16(3.14)
+            three = htrunc(pi)
+            arr[0] = float32(three)
+        @cuda.jit
+        def kernel(arr):
+            f(arr)
+        arr = np.zeros(1, np.float32)
+        kernel[1, 1](arr)
+        np.testing.assert_allclose(arr, [3], atol=1e-2)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_caching.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import multiprocessing
 import os
 import shutil
-import subprocess
-import sys
 import unittest
 import warnings
@@ -163,55 +161,6 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
         f = mod.renamed_function2
         self.assertPreciseEqual(f(2), 8)
-    @skip_unless_cc_60
-    @skip_if_cudadevrt_missing
-    @skip_if_mvc_enabled("CG not supported with MVC")
-    def test_cache_cg(self):
-        # Functions using cooperative groups should be cacheable. See Issue
-        # #8888: https://github.com/numba/numba/issues/8888
-        self.check_pycache(0)
-        mod = self.import_module()
-        self.check_pycache(0)
-        mod.cg_usecase(0)
-        self.check_pycache(2)  # 1 index, 1 data
-        # Check the code runs ok from another process
-        self.run_in_separate_process()
-    @skip_unless_cc_60
-    @skip_if_cudadevrt_missing
-    @skip_if_mvc_enabled("CG not supported with MVC")
-    def test_cache_cg_clean_run(self):
-        # See Issue #9432: https://github.com/numba/numba/issues/9432
-        # If a cached function using CG sync was the first thing to compile,
-        # the compile would fail.
-        self.check_pycache(0)
-        # This logic is modelled on run_in_separate_process(), but executes the
-        # CG usecase directly in the subprocess.
-        code = """if 1:
-            import sys
-            sys.path.insert(0, %(tempdir)r)
-            mod = __import__(%(modname)r)
-            mod.cg_usecase(0)
-            """ % dict(tempdir=self.tempdir, modname=self.modname)
-        popen = subprocess.Popen(
-            [sys.executable, "-c", code],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        out, err = popen.communicate(timeout=60)
-        if popen.returncode != 0:
-            raise AssertionError(
-                "process failed with code %s: \n"
-                "stdout follows\n%s\n"
-                "stderr follows\n%s\n"
-                % (popen.returncode, out.decode(), err.decode()),
-            )
     def _test_pycache_fallback(self):
         """
         With a disabled __pycache__, test there is a working fallback
@@ -275,6 +224,40 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
                 pass
+@skip_on_cudasim("Simulator does not implement caching")
+class CUDACooperativeGroupTest(SerialMixin, DispatcherCacheUsecasesTest):
+    # See Issue #9432: https://github.com/numba/numba/issues/9432
+    # If a cached function using CG sync was the first thing to compile,
+    # the compile would fail.
+    here = os.path.dirname(__file__)
+    usecases_file = os.path.join(here, "cg_cache_usecases.py")
+    modname = "cuda_cooperative_caching_test_fodder"
+    def setUp(self):
+        DispatcherCacheUsecasesTest.setUp(self)
+        CUDATestCase.setUp(self)
+    def tearDown(self):
+        CUDATestCase.tearDown(self)
+        DispatcherCacheUsecasesTest.tearDown(self)
+    @skip_unless_cc_60
+    @skip_if_cudadevrt_missing
+    @skip_if_mvc_enabled("CG not supported with MVC")
+    def test_cache_cg(self):
+        # Functions using cooperative groups should be cacheable. See Issue
+        # #8888: https://github.com/numba/numba/issues/8888
+        self.check_pycache(0)
+        mod = self.import_module()
+        self.check_pycache(0)
+        mod.cg_usecase(0)
+        self.check_pycache(2)  # 1 index, 1 data
+        # Check the code runs ok from another process
+        self.run_in_separate_process()
 @skip_on_cudasim("Simulator does not implement caching")
 class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
     here = os.path.dirname(__file__)

numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py CHANGED Viewed

@@ -1,8 +1,13 @@
 from __future__ import print_function
+import os
+import cffi
 import numpy as np
 from numba import config, cuda, int32
+from numba.types import CPointer
 from numba.cuda.testing import (
     unittest,
     CUDATestCase,
@@ -11,6 +16,9 @@ from numba.cuda.testing import (
     skip_if_cudadevrt_missing,
     skip_if_mvc_enabled,
 )
+from numba.core.typing import signature
+ffi = cffi.FFI()
 @cuda.jit
@@ -149,6 +157,32 @@ class TestCudaCooperativeGroups(CUDATestCase):
         self.assertEqual(blocks1d, blocks2d)
         self.assertEqual(blocks1d, blocks3d)
+    @skip_unless_cc_60
+    def test_external_cooperative_func(self):
+        cudapy_test_path = os.path.dirname(__file__)
+        tests_path = os.path.dirname(cudapy_test_path)
+        data_path = os.path.join(tests_path, "data")
+        src = os.path.join(data_path, "cta_barrier.cu")
+        sig = signature(
+            CPointer(int32),
+        )
+        cta_barrier = cuda.declare_device(
+            "cta_barrier", sig=sig, link=[src], use_cooperative=True
+        )
+        @cuda.jit
+        def kernel():
+            cta_barrier()
+        block_size = 32
+        grid_size = 1024
+        kernel[grid_size, block_size]()
+        overload = kernel.overloads[()]
+        self.assertTrue(overload.cooperative)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py CHANGED Viewed

@@ -310,6 +310,23 @@ class TestCudaDebugInfo(CUDATestCase):
             with captured_stdout():
                 self._test_kernel_args_types()
+    def test_kernel_args_names(self):
+        sig = (types.int32,)
+        @cuda.jit("void(int32)", debug=True, opt=False)
+        def f(x):
+            z = x  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        # Verify argument name is not prefixed with "arg."
+        pat = r"define void @.*\(i32 %\"x\"\)"
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        pat = r"define void @.*\(i32 %\"arg\.x\"\)"
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNone(match, msg=llvm_ir)
     def test_llvm_dbg_value(self):
         sig = (types.int32, types.int32)

numba-cuda 0.10.1__py3-none-any.whl → 0.12.1__py3-none-any.whl

numba-cuda 0.10.1py3-none-any.whl → 0.12.1py3-none-any.whl