PyPI - numba-cuda - Versions diffs - 0.11.0__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

numba-cuda 0.11.0py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

numba_cuda/numba/cuda/testing.py CHANGED Viewed

@@ -116,20 +116,26 @@ def skip_on_arm(reason):
 def skip_if_cuda_includes_missing(fn):
     # Skip when cuda.h is not available - generally this should indicate
     # whether the CUDA includes are available or not
-    cuda_include_path = libs.get_cuda_include_dir()
+    reason = "CUDA include dir not available on this system"
+    try:
+        cuda_include_path = libs.get_cuda_include_dir()
+    except FileNotFoundError:
+        return unittest.skip(reason)(fn)
     cuda_h = os.path.join(cuda_include_path, "cuda.h")
     cuda_h_file = os.path.exists(cuda_h) and os.path.isfile(cuda_h)
-    reason = "CUDA include dir not available on this system"
     return unittest.skipUnless(cuda_h_file, reason)(fn)
 def skip_if_curand_kernel_missing(fn):
-    cuda_include_path = libs.get_cuda_include_dir()
+    reason = "curand_kernel.h not available on this system"
+    try:
+        cuda_include_path = libs.get_cuda_include_dir()
+    except FileNotFoundError:
+        return unittest.skip(reason)(fn)
     curand_kernel_h = os.path.join(cuda_include_path, "curand_kernel.h")
     curand_kernel_h_file = os.path.exists(curand_kernel_h) and os.path.isfile(
         curand_kernel_h
     )
-    reason = "curand_kernel.h not available on this system"
     return unittest.skipUnless(curand_kernel_h_file, reason)(fn)

numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py CHANGED Viewed

@@ -476,12 +476,14 @@ class TestArrayMethod(CUDATestCase):
             host_array, dev_array.copy_to_host().astype(dtype)
         )
+    @skip_on_cudasim("Simulator does not use __array__()")
     @unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
     def test_np_array_copy_false(self):
         dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
         with self.assertRaisesRegex(ValueError, "`copy=False` is not"):
             np.array(dev_array, copy=False)
+    @skip_on_cudasim("Simulator does not use __array__()")
     @unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
     def test_np_array_copy_true(self):
         dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))

numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py CHANGED Viewed

@@ -5,14 +5,19 @@ import numpy as np
 from numba import cuda, config
 from numba.cuda.cudadrv.linkable_code import CUSource
-from numba.cuda.testing import CUDATestCase, ContextResettingTestCase
+from numba.cuda.testing import (
+    CUDATestCase,
+    ContextResettingTestCase,
+    skip_on_cudasim,
+)
-from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
+if not config.ENABLE_CUDASIM:
+    from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
-if config.CUDA_USE_NVIDIA_BINDING:
-    from cuda.cuda import CUmodule as cu_module_type
-else:
-    from numba.cuda.cudadrv.drvapi import cu_module as cu_module_type
+    if config.CUDA_USE_NVIDIA_BINDING:
+        from cuda.cuda import CUmodule as cu_module_type
+    else:
+        from numba.cuda.cudadrv.drvapi import cu_module as cu_module_type
 def wipe_all_modules_in_context():
@@ -32,6 +37,7 @@ def get_hashable_handle_value(handle):
     return handle
+@skip_on_cudasim("Module loading not implemented in the simulator")
 class TestModuleCallbacksBasic(ContextResettingTestCase):
     def test_basic(self):
         counter = 0
@@ -136,6 +142,7 @@ class TestModuleCallbacksBasic(ContextResettingTestCase):
         self.assertEqual(len(teardown_seen), 2)
+@skip_on_cudasim("Module loading not implemented in the simulator")
 class TestModuleCallbacksAPICompleteness(CUDATestCase):
     def test_api(self):
         def setup(handle):
@@ -164,6 +171,7 @@ class TestModuleCallbacksAPICompleteness(CUDATestCase):
                 kernel[1, 1]()
+@skip_on_cudasim("Module loading not implemented in the simulator")
 class TestModuleCallbacks(CUDATestCase):
     def setUp(self):
         super().setUp()
@@ -213,6 +221,7 @@ __device__ int get_num(int &retval) {
         self.assertEqual(arr[0], 42)
+@skip_on_cudasim("Module loading not implemented in the simulator")
 class TestMultithreadedCallbacks(CUDATestCase):
     def test_concurrent_initialization(self):
         seen_mods = set()

numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py CHANGED Viewed

@@ -267,6 +267,7 @@ class TestLinker(CUDATestCase):
     not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
     reason="pynvjitlink not enabled",
 )
+@skip_on_cudasim("Linking unsupported in the simulator")
 class TestLinkerUsage(CUDATestCase):
     """Test that whether pynvjitlink can be enabled by both environment variable
     and modification of config at runtime.

numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py CHANGED Viewed

@@ -203,18 +203,6 @@ def simple_usecase_kernel(r, x):
 simple_usecase_caller = CUDAUseCase(simple_usecase_kernel)
-# Usecase with cooperative groups
-@cuda.jit(cache=True)
-def cg_usecase_kernel(r, x):
-    grid = cuda.cg.this_grid()
-    grid.sync()
-cg_usecase = CUDAUseCase(cg_usecase_kernel)
 class _TestModule(CUDATestCase):
     """
     Tests for functionality of this module's functions.

numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py ADDED Viewed

@@ -0,0 +1,33 @@
+from numba import cuda
+from numba.cuda.testing import CUDATestCase
+import sys
+from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase
+# Usecase with cooperative groups
+@cuda.jit(cache=True)
+def cg_usecase_kernel(r, x):
+    grid = cuda.cg.this_grid()
+    grid.sync()
+cg_usecase = CUDAUseCase(cg_usecase_kernel)
+class _TestModule(CUDATestCase):
+    """
+    Tests for functionality of this module's functions.
+    Note this does not define any "test_*" method, instead check_module()
+    should be called by hand.
+    """
+    def check_module(self, mod):
+        mod.cg_usecase(0)
+def self_test():
+    mod = sys.modules[__name__]
+    _TestModule().check_module(mod)

numba_cuda/numba/cuda/tests/cudapy/test_array.py CHANGED Viewed

@@ -310,9 +310,6 @@ class TestCudaArray(CUDATestCase):
             check(array_reshape, array_reshape1d, arr, 0)
             check(array_reshape, array_reshape1d, arr, (0,))
             check(array_reshape, array_reshape3d, arr, (1, 0, 2))
-            check_only_shape(array_reshape2d, arr, (0, -1), (0, 0))
-            check_only_shape(array_reshape2d, arr, (4, -1), (4, 0))
-            check_only_shape(array_reshape3d, arr, (-1, 0, 4), (0, 0, 4))
         # C-contiguous
         arr = np.arange(24)

numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py CHANGED Viewed

@@ -3,7 +3,11 @@ import itertools
 import numpy as np
 from numba import cuda
 from numba.core.errors import TypingError
-from numba.cuda.testing import CUDATestCase
+from numba.cuda.testing import (
+    CUDATestCase,
+    skip_on_cudasim,
+    skip_unless_cudasim,
+)
 import unittest
@@ -65,6 +69,7 @@ for align in (True, False):
 #      with the test_alignment.TestArrayAlignment class.
+@skip_on_cudasim("Array alignment not supported on cudasim")
 class TestArrayAddressAlignment(CUDATestCase):
     """
     Test cuda.local.array and cuda.shared.array support for an alignment
@@ -232,5 +237,24 @@ class TestArrayAddressAlignment(CUDATestCase):
                     print(".", end="", flush=True)
+@skip_unless_cudasim("Only check for alignment unsupported in the simulator")
+class TestCudasimUnsupportedAlignment(CUDATestCase):
+    def test_local_unsupported(self):
+        @cuda.jit
+        def f():
+            cuda.local.array(1, dtype=np.uint8, alignment=16)
+        with self.assertRaisesRegex(RuntimeError, "not supported in cudasim"):
+            f[1, 1]()
+    def test_shared_unsupported(self):
+        @cuda.jit
+        def f():
+            cuda.shared.array(1, dtype=np.uint8, alignment=16)
+        with self.assertRaisesRegex(RuntimeError, "not supported in cudasim"):
+            f[1, 1]()
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py ADDED Viewed

@@ -0,0 +1,62 @@
+from numba import cuda, float32
+from numba.cuda.bf16 import bfloat16
+from numba.cuda.testing import CUDATestCase
+import math
+class TestBfloat16HighLevelBindings(CUDATestCase):
+    def skip_unsupported(self):
+        if not cuda.is_bfloat16_supported():
+            self.skipTest(
+                "bfloat16 requires compute capability 8.0+ and CUDA version>= 12.0"
+            )
+    def test_use_type_in_kernel(self):
+        self.skip_unsupported()
+        @cuda.jit
+        def kernel():
+            bfloat16(3.14)
+        kernel[1, 1]()
+    def test_math_bindings(self):
+        self.skip_unsupported()
+        exp_functions = [math.exp]
+        try:
+            from math import exp2
+            exp_functions += [exp2]
+        except ImportError:
+            pass
+        functions = [
+            math.trunc,
+            math.ceil,
+            math.floor,
+            math.sqrt,
+            math.log,
+            math.log10,
+            math.cos,
+            math.sin,
+            math.tanh,
+        ] + exp_functions
+        for f in functions:
+            with self.subTest(func=f):
+                @cuda.jit
+                def kernel(arr):
+                    x = bfloat16(3.14)
+                    y = f(x)
+                    arr[0] = float32(y)
+                arr = cuda.device_array((1,), dtype="float32")
+                kernel[1, 1](arr)
+                if f in exp_functions:
+                    self.assertAlmostEqual(arr[0], f(3.14), delta=1e-1)
+                else:
+                    self.assertAlmostEqual(arr[0], f(3.14), delta=1e-2)

numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py CHANGED Viewed

@@ -2,41 +2,54 @@ import numba.cuda as cuda
 from numba.cuda.testing import unittest, CUDATestCase
 import numpy as np
-from numba import int16, int32, int64, uint16, uint32, uint64, float32, float64
-from numba.types import float16
-from numba.cuda.cuda_bf16 import (
-    nv_bfloat16,
-    htrunc,
-    hceil,
-    hfloor,
-    hrint,
-    hsqrt,
-    hrsqrt,
-    hrcp,
-    hlog,
-    hlog2,
-    hlog10,
-    hcos,
-    hsin,
-    hexp,
-    hexp2,
-    hexp10,
+from numba import (
+    config,
+    int16,
+    int32,
+    int64,
+    uint16,
+    uint32,
+    uint64,
+    float32,
+    float64,
 )
+from numba.types import float16
-from numba.cuda.cudadrv.runtime import get_version
-cuda_version = get_version()
+if not config.ENABLE_CUDASIM:
+    from numba.cuda._internal.cuda_bf16 import (
+        nv_bfloat16,
+        htrunc,
+        hceil,
+        hfloor,
+        hrint,
+        hsqrt,
+        hrsqrt,
+        hrcp,
+        hlog,
+        hlog2,
+        hlog10,
+        hcos,
+        hsin,
+        hexp,
+        hexp2,
+        hexp10,
+        htanh,
+        htanh_approx,
+    )
 dtypes = [int16, int32, int64, uint16, uint32, uint64, float32]
-@unittest.skipIf(
-    (cuda.get_current_device().compute_capability < (8, 0)),
-    "bfloat16 requires compute capability 8.0+",
-)
 class Bfloat16Test(CUDATestCase):
+    def skip_unsupported(self):
+        if not cuda.is_bfloat16_supported():
+            self.skipTest(
+                "bfloat16 requires compute capability 8.0+ and CUDA version>= 12.0"
+            )
     def test_ctor(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel():
             a = nv_bfloat16(float64(1.0))  # noqa: F841
@@ -47,18 +60,13 @@ class Bfloat16Test(CUDATestCase):
             f = nv_bfloat16(uint16(6))  # noqa: F841
             g = nv_bfloat16(uint32(7))  # noqa: F841
             h = nv_bfloat16(uint64(8))  # noqa: F841
+            i = nv_bfloat16(float16(9))  # noqa: F841
         simple_kernel[1, 1]()
-        if cuda_version >= (12, 0):
-            @cuda.jit
-            def simple_kernel_fp16():
-                i = nv_bfloat16(float16(9))  # noqa: F841
-            simple_kernel_fp16[1, 1]()
     def test_casts(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel(b, c, d, e, f, g, h):
             a = nv_bfloat16(3.14)
@@ -90,6 +98,7 @@ class Bfloat16Test(CUDATestCase):
         assert h[0] == 3
     def test_ctor_cast_loop(self):
+        self.skip_unsupported()
         for dtype in dtypes:
             with self.subTest(dtype=dtype):
@@ -106,6 +115,8 @@ class Bfloat16Test(CUDATestCase):
                     assert a[0] == 3
     def test_arithmetic(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel(arith, logic):
             # Binary Arithmetic Operators
@@ -175,6 +186,8 @@ class Bfloat16Test(CUDATestCase):
         )
     def test_math_func(self):
+        self.skip_unsupported()
         @cuda.jit
         def simple_kernel(a):
             x = nv_bfloat16(3.14)
@@ -191,16 +204,18 @@ class Bfloat16Test(CUDATestCase):
             a[9] = float32(hlog10(x))
             a[10] = float32(hcos(x))
             a[11] = float32(hsin(x))
-            a[12] = float32(hexp(x))
-            a[13] = float32(hexp2(x))
-            a[14] = float32(hexp10(x))
+            a[12] = float32(htanh(x))
+            a[13] = float32(htanh_approx(x))
+            a[14] = float32(hexp(x))
+            a[15] = float32(hexp2(x))
+            a[16] = float32(hexp10(x))
-        a = np.zeros(15, dtype=np.float32)
+        a = np.zeros(17, dtype=np.float32)
         simple_kernel[1, 1](a)
         x = 3.14
         np.testing.assert_allclose(
-            a[:12],
+            a[:14],
             [
                 np.trunc(x),
                 np.ceil(x),
@@ -214,15 +229,19 @@ class Bfloat16Test(CUDATestCase):
                 np.log10(x),
                 np.cos(x),
                 np.sin(x),
+                np.tanh(x),
+                np.tanh(x),
             ],
             atol=1e-2,
         )
         np.testing.assert_allclose(
-            a[12:], [np.exp(x), np.exp2(x), np.power(10, x)], atol=1e2
+            a[14:], [np.exp(x), np.exp2(x), np.power(10, x)], atol=1e2
         )
     def test_check_bfloat16_type(self):
+        self.skip_unsupported()
         @cuda.jit
         def kernel(arr):
             x = nv_bfloat16(3.14)
@@ -237,6 +256,8 @@ class Bfloat16Test(CUDATestCase):
         np.testing.assert_allclose(arr, [3.14], atol=1e-2)
     def test_use_within_device_func(self):
+        self.skip_unsupported()
         @cuda.jit(device=True)
         def add_bf16(a, b):
             return a + b
@@ -252,6 +273,24 @@ class Bfloat16Test(CUDATestCase):
         np.testing.assert_allclose(arr, [8], atol=1e-2)
+    def test_use_binding_inside_dfunc(self):
+        self.skip_unsupported()
+        @cuda.jit(device=True)
+        def f(arr):
+            pi = nv_bfloat16(3.14)
+            three = htrunc(pi)
+            arr[0] = float32(three)
+        @cuda.jit
+        def kernel(arr):
+            f(arr)
+        arr = np.zeros(1, np.float32)
+        kernel[1, 1](arr)
+        np.testing.assert_allclose(arr, [3], atol=1e-2)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_caching.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import multiprocessing
 import os
 import shutil
-import subprocess
-import sys
 import unittest
 import warnings
@@ -163,55 +161,6 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
         f = mod.renamed_function2
         self.assertPreciseEqual(f(2), 8)
-    @skip_unless_cc_60
-    @skip_if_cudadevrt_missing
-    @skip_if_mvc_enabled("CG not supported with MVC")
-    def test_cache_cg(self):
-        # Functions using cooperative groups should be cacheable. See Issue
-        # #8888: https://github.com/numba/numba/issues/8888
-        self.check_pycache(0)
-        mod = self.import_module()
-        self.check_pycache(0)
-        mod.cg_usecase(0)
-        self.check_pycache(2)  # 1 index, 1 data
-        # Check the code runs ok from another process
-        self.run_in_separate_process()
-    @skip_unless_cc_60
-    @skip_if_cudadevrt_missing
-    @skip_if_mvc_enabled("CG not supported with MVC")
-    def test_cache_cg_clean_run(self):
-        # See Issue #9432: https://github.com/numba/numba/issues/9432
-        # If a cached function using CG sync was the first thing to compile,
-        # the compile would fail.
-        self.check_pycache(0)
-        # This logic is modelled on run_in_separate_process(), but executes the
-        # CG usecase directly in the subprocess.
-        code = """if 1:
-            import sys
-            sys.path.insert(0, %(tempdir)r)
-            mod = __import__(%(modname)r)
-            mod.cg_usecase(0)
-            """ % dict(tempdir=self.tempdir, modname=self.modname)
-        popen = subprocess.Popen(
-            [sys.executable, "-c", code],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        out, err = popen.communicate(timeout=60)
-        if popen.returncode != 0:
-            raise AssertionError(
-                "process failed with code %s: \n"
-                "stdout follows\n%s\n"
-                "stderr follows\n%s\n"
-                % (popen.returncode, out.decode(), err.decode()),
-            )
     def _test_pycache_fallback(self):
         """
         With a disabled __pycache__, test there is a working fallback
@@ -275,6 +224,40 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
                 pass
+@skip_on_cudasim("Simulator does not implement caching")
+class CUDACooperativeGroupTest(SerialMixin, DispatcherCacheUsecasesTest):
+    # See Issue #9432: https://github.com/numba/numba/issues/9432
+    # If a cached function using CG sync was the first thing to compile,
+    # the compile would fail.
+    here = os.path.dirname(__file__)
+    usecases_file = os.path.join(here, "cg_cache_usecases.py")
+    modname = "cuda_cooperative_caching_test_fodder"
+    def setUp(self):
+        DispatcherCacheUsecasesTest.setUp(self)
+        CUDATestCase.setUp(self)
+    def tearDown(self):
+        CUDATestCase.tearDown(self)
+        DispatcherCacheUsecasesTest.tearDown(self)
+    @skip_unless_cc_60
+    @skip_if_cudadevrt_missing
+    @skip_if_mvc_enabled("CG not supported with MVC")
+    def test_cache_cg(self):
+        # Functions using cooperative groups should be cacheable. See Issue
+        # #8888: https://github.com/numba/numba/issues/8888
+        self.check_pycache(0)
+        mod = self.import_module()
+        self.check_pycache(0)
+        mod.cg_usecase(0)
+        self.check_pycache(2)  # 1 index, 1 data
+        # Check the code runs ok from another process
+        self.run_in_separate_process()
 @skip_on_cudasim("Simulator does not implement caching")
 class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
     here = os.path.dirname(__file__)

numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py CHANGED Viewed

@@ -1,8 +1,13 @@
 from __future__ import print_function
+import os
+import cffi
 import numpy as np
 from numba import config, cuda, int32
+from numba.types import CPointer
 from numba.cuda.testing import (
     unittest,
     CUDATestCase,
@@ -11,6 +16,9 @@ from numba.cuda.testing import (
     skip_if_cudadevrt_missing,
     skip_if_mvc_enabled,
 )
+from numba.core.typing import signature
+ffi = cffi.FFI()
 @cuda.jit
@@ -149,6 +157,34 @@ class TestCudaCooperativeGroups(CUDATestCase):
         self.assertEqual(blocks1d, blocks2d)
         self.assertEqual(blocks1d, blocks3d)
+    @skip_on_cudasim("External code unsupported on cudasim")
+    @skip_unless_cc_60
+    def test_external_cooperative_func(self):
+        cudapy_test_path = os.path.dirname(__file__)
+        tests_path = os.path.dirname(cudapy_test_path)
+        data_path = os.path.join(tests_path, "data")
+        src = os.path.join(data_path, "cta_barrier.cu")
+        sig = signature(
+            CPointer(int32),
+        )
+        cta_barrier = cuda.declare_device(
+            "cta_barrier", sig=sig, link=[src], use_cooperative=True
+        )
+        @cuda.jit("void()")
+        def kernel():
+            cta_barrier()
+        overload = kernel.overloads[()]
+        block_size = 32
+        grid_size = overload.max_cooperative_grid_blocks(block_size)
+        kernel[grid_size, block_size]()
+        overload = kernel.overloads[()]
+        self.assertTrue(overload.cooperative)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py CHANGED Viewed

@@ -310,6 +310,23 @@ class TestCudaDebugInfo(CUDATestCase):
             with captured_stdout():
                 self._test_kernel_args_types()
+    def test_kernel_args_names(self):
+        sig = (types.int32,)
+        @cuda.jit("void(int32)", debug=True, opt=False)
+        def f(x):
+            z = x  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        # Verify argument name is not prefixed with "arg."
+        pat = r"define void @.*\(i32 %\"x\"\)"
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        pat = r"define void @.*\(i32 %\"arg\.x\"\)"
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNone(match, msg=llvm_ir)
     def test_llvm_dbg_value(self):
         sig = (types.int32, types.int32)

numba_cuda/numba/cuda/tests/cudapy/test_enums.py CHANGED Viewed

@@ -116,6 +116,7 @@ class EnumTest(CUDATestCase):
         got = cuda_func(arr)
         self.assertPreciseEqual(expected, got)
+    @skip_on_cudasim("No typing context in CUDA simulator")
     def test_int_enum_no_conversion(self):
         # Ported from Numba PR #10047: "Fix IntEnumMember.can_convert_to() when
         # no conversions found", https://github.com/numba/numba/pull/10047.

numba-cuda 0.11.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

numba-cuda 0.11.0py3-none-any.whl → 0.13.0py3-none-any.whl