PyPI - numba-cuda - Versions diffs - 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

numba-cuda 0.16.0py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +0 -8
numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
numba_cuda/numba/cuda/api_util.py +6 -0
numba_cuda/numba/cuda/cgutils.py +1291 -0
numba_cuda/numba/cuda/codegen.py +32 -14
numba_cuda/numba/cuda/compiler.py +113 -10
numba_cuda/numba/cuda/core/caching.py +741 -0
numba_cuda/numba/cuda/core/callconv.py +338 -0
numba_cuda/numba/cuda/core/codegen.py +168 -0
numba_cuda/numba/cuda/core/compiler.py +205 -0
numba_cuda/numba/cuda/core/typed_passes.py +139 -0
numba_cuda/numba/cuda/cuda_paths.py +1 -1
numba_cuda/numba/cuda/cudadecl.py +0 -268
numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
numba_cuda/numba/cuda/cudadrv/devices.py +4 -6
numba_cuda/numba/cuda/cudadrv/driver.py +105 -50
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
numba_cuda/numba/cuda/cudaimpl.py +4 -178
numba_cuda/numba/cuda/debuginfo.py +469 -3
numba_cuda/numba/cuda/device_init.py +0 -1
numba_cuda/numba/cuda/dispatcher.py +311 -14
numba_cuda/numba/cuda/extending.py +2 -1
numba_cuda/numba/cuda/fp16.py +348 -0
numba_cuda/numba/cuda/intrinsics.py +1 -1
numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
numba_cuda/numba/cuda/lowering.py +1833 -8
numba_cuda/numba/cuda/mathimpl.py +2 -90
numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
numba_cuda/numba/cuda/nvvmutils.py +2 -1
numba_cuda/numba/cuda/printimpl.py +2 -1
numba_cuda/numba/cuda/serialize.py +264 -0
numba_cuda/numba/cuda/simulator/__init__.py +2 -0
numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
numba_cuda/numba/cuda/stubs.py +0 -308
numba_cuda/numba/cuda/target.py +13 -5
numba_cuda/numba/cuda/testing.py +156 -5
numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +16 -5
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +5 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -5
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +2 -5
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
numba_cuda/numba/cuda/utils.py +785 -0
numba_cuda/numba/cuda/vector_types.py +1 -1
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +69 -56
numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 import operator
 import re
 from numba import cuda, int64
-from numba.cuda import compile_ptx
 from numba.core.errors import TypingError
 from numba.core.types import f2
 from numba.cuda.testing import (
@@ -11,6 +10,7 @@ from numba.cuda.testing import (
     CUDATestCase,
     skip_on_cudasim,
     skip_unless_cc_53,
+    skip_if_nvjitlink_missing,
 )
@@ -174,27 +174,27 @@ def hlt_func_2(x, y):
     return cuda.fp16.hlt(x, y)
-def test_multiple_hcmp_1(r, a, b, c):
+def multiple_hcmp_1(r, a, b, c):
     # float16 predicates used in two separate functions
     r[0] = hlt_func_1(a, b) and hlt_func_2(b, c)
-def test_multiple_hcmp_2(r, a, b, c):
+def multiple_hcmp_2(r, a, b, c):
     # The same float16 predicate used in the caller and callee
     r[0] = hlt_func_1(a, b) and cuda.fp16.hlt(b, c)
-def test_multiple_hcmp_3(r, a, b, c):
+def multiple_hcmp_3(r, a, b, c):
     # Different float16 predicates used in the caller and callee
     r[0] = hlt_func_1(a, b) and cuda.fp16.hge(c, b)
-def test_multiple_hcmp_4(r, a, b, c):
+def multiple_hcmp_4(r, a, b, c):
     # The same float16 predicates used twice in a function
     r[0] = cuda.fp16.hlt(a, b) and cuda.fp16.hlt(b, c)
-def test_multiple_hcmp_5(r, a, b, c):
+def multiple_hcmp_5(r, a, b, c):
     # Different float16 predicates used in a function
     r[0] = cuda.fp16.hlt(a, b) and cuda.fp16.hge(c, b)
@@ -561,13 +561,13 @@ class TestCudaIntrinsic(CUDATestCase):
     def test_popc_u1(self):
         compiled = cuda.jit("void(int32[:], uint8)")(simple_popc)
-        ary = np.zeros(1, dtype=np.int8)
+        ary = np.zeros(1, dtype=np.int32)
         compiled[1, 1](ary, np.uint8(0xFF))
         self.assertEqual(ary[0], 8)
     def test_popc_u2(self):
         compiled = cuda.jit("void(int32[:], uint16)")(simple_popc)
-        ary = np.zeros(1, dtype=np.int16)
+        ary = np.zeros(1, dtype=np.int32)
         compiled[1, 1](ary, np.uint16(0xFFFF))
         self.assertEqual(ary[0], 16)
@@ -585,13 +585,13 @@ class TestCudaIntrinsic(CUDATestCase):
     def test_bit_count_u1(self):
         compiled = cuda.jit("void(int32[:], uint8)")(simple_bit_count)
-        ary = np.zeros(1, dtype=np.int8)
+        ary = np.zeros(1, dtype=np.int32)
         compiled[1, 1](ary, np.uint8(0xFF))
         self.assertEqual(ary[0], 8)
     def test_bit_count_u2(self):
         compiled = cuda.jit("void(int32[:], uint16)")(simple_bit_count)
-        ary = np.zeros(1, dtype=np.int16)
+        ary = np.zeros(1, dtype=np.int32)
         compiled[1, 1](ary, np.uint16(0xFFFF))
         self.assertEqual(ary[0], 16)
@@ -639,9 +639,11 @@ class TestCudaIntrinsic(CUDATestCase):
         np.testing.assert_allclose(ary[0], ref)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_hadd_ptx(self):
+        compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(simple_hadd_scalar)
         args = (f2[:], f2, f2)
-        ptx, _ = compile_ptx(simple_hadd_scalar, args)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("add.f16", ptx)
     @skip_unless_cc_53
@@ -666,9 +668,13 @@ class TestCudaIntrinsic(CUDATestCase):
         np.testing.assert_allclose(ary[0], ref)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_hfma_ptx(self):
+        compiled = cuda.jit("void(f2[:], f2, f2, f2)", lto=True)(
+            simple_hfma_scalar
+        )
         args = (f2[:], f2, f2, f2)
-        ptx, _ = compile_ptx(simple_hfma_scalar, args)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("fma.rn.f16", ptx)
     @skip_unless_cc_53
@@ -691,14 +697,16 @@ class TestCudaIntrinsic(CUDATestCase):
         np.testing.assert_allclose(ary[0], ref)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_hsub_ptx(self):
+        compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(simple_hsub_scalar)
         args = (f2[:], f2, f2)
-        ptx, _ = compile_ptx(simple_hsub_scalar, args)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("sub.f16", ptx)
     @skip_unless_cc_53
     def test_hmul(self):
-        compiled = cuda.jit()(simple_hmul)
+        compiled = cuda.jit(simple_hmul)
         ary = np.zeros(1, dtype=np.float16)
         arg1 = np.array([3.0], dtype=np.float16)
         arg2 = np.array([4.0], dtype=np.float16)
@@ -716,9 +724,11 @@ class TestCudaIntrinsic(CUDATestCase):
         np.testing.assert_allclose(ary[0], ref)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_hmul_ptx(self):
+        compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(simple_hmul_scalar)
         args = (f2[:], f2, f2)
-        ptx, _ = compile_ptx(simple_hmul_scalar, args)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("mul.f16", ptx)
     @skip_unless_cc_53
@@ -761,14 +771,16 @@ class TestCudaIntrinsic(CUDATestCase):
         np.testing.assert_allclose(ary[0], ref)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_hneg_ptx(self):
+        compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_hneg_scalar)
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_hneg_scalar, args)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("neg.f16", ptx)
     @skip_unless_cc_53
     def test_habs(self):
-        compiled = cuda.jit()(simple_habs)
+        compiled = cuda.jit(simple_habs)
         ary = np.zeros(1, dtype=np.float16)
         arg1 = np.array([-3.0], dtype=np.float16)
         compiled[1, 1](ary, arg1)
@@ -784,9 +796,11 @@ class TestCudaIntrinsic(CUDATestCase):
         np.testing.assert_allclose(ary[0], ref)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_habs_ptx(self):
+        compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_habs_scalar)
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_habs_scalar, args)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("abs.f16", ptx)
     @skip_unless_cc_53
@@ -908,11 +922,11 @@ class TestCudaIntrinsic(CUDATestCase):
     @skip_unless_cc_53
     def test_multiple_float16_comparisons(self):
         functions = (
-            test_multiple_hcmp_1,
-            test_multiple_hcmp_2,
-            test_multiple_hcmp_3,
-            test_multiple_hcmp_4,
-            test_multiple_hcmp_5,
+            multiple_hcmp_1,
+            multiple_hcmp_2,
+            multiple_hcmp_3,
+            multiple_hcmp_4,
+            multiple_hcmp_5,
         )
         for fn in functions:
             with self.subTest(fn=fn):

numba_cuda/numba/cuda/tests/cudapy/test_operator.py CHANGED Viewed

@@ -4,10 +4,12 @@ from numba.cuda.testing import (
     CUDATestCase,
     skip_unless_cc_53,
     skip_on_cudasim,
+    skip_if_nvjitlink_missing,
 )
 from numba import cuda
+from numba.core import types
 from numba.core.types import f2, b1
-from numba.cuda import compile_ptx
+from numba.core.typing import signature
 import operator
 import itertools
 from numba.np.numpy_support import from_dtype
@@ -87,27 +89,27 @@ def hlt_func_2(x, y):
     return x < y
-def test_multiple_hcmp_1(r, a, b, c):
+def multiple_hcmp_1(r, a, b, c):
     # float16 predicates used in two separate functions
     r[0] = hlt_func_1(a, b) and hlt_func_2(b, c)
-def test_multiple_hcmp_2(r, a, b, c):
+def multiple_hcmp_2(r, a, b, c):
     # The same float16 predicate used in the caller and callee
     r[0] = hlt_func_1(a, b) and b < c
-def test_multiple_hcmp_3(r, a, b, c):
+def multiple_hcmp_3(r, a, b, c):
     # Different float16 predicates used in the caller and callee
     r[0] = hlt_func_1(a, b) and c >= b
-def test_multiple_hcmp_4(r, a, b, c):
+def multiple_hcmp_4(r, a, b, c):
     # The same float16 predicates used twice in a function
     r[0] = a < b and b < c
-def test_multiple_hcmp_5(r, a, b, c):
+def multiple_hcmp_5(r, a, b, c):
     # Different float16 predicates used in a function
     r[0] = a < b and c >= b
@@ -172,16 +174,19 @@ class TestOperatorModule(CUDATestCase):
                 np.testing.assert_allclose(got, expected)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_fp16_binary_ptx(self):
         functions = (simple_fp16add, simple_fp16sub, simple_fp16mul)
         instrs = ("add.f16", "sub.f16", "mul.f16")
         args = (f2[:], f2, f2)
         for fn, instr in zip(functions, instrs):
             with self.subTest(instr=instr):
-                ptx, _ = compile_ptx(fn, args)
+                compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(fn)
+                ptx = compiled.inspect_lto_ptx(args)
                 self.assertIn(instr, ptx)
     @skip_unless_cc_53
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_mixed_fp16_binary_arithmetic(self):
         functions = (
             simple_fp16add,
@@ -193,7 +198,7 @@ class TestOperatorModule(CUDATestCase):
         types = (np.int8, np.int16, np.int32, np.int64, np.float32, np.float64)
         for (fn, op), ty in itertools.product(zip(functions, ops), types):
             with self.subTest(op=op, ty=ty):
-                kernel = cuda.jit(fn)
+                kernel = cuda.jit(fn, lto=True)
                 arg1 = np.random.random(1).astype(np.float16)
                 arg2 = (np.random.random(1) * 100).astype(ty)
@@ -205,6 +210,7 @@ class TestOperatorModule(CUDATestCase):
                 np.testing.assert_allclose(got, expected)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_fp16_inplace_binary_ptx(self):
         functions = (simple_fp16_iadd, simple_fp16_isub, simple_fp16_imul)
         instrs = ("add.f16", "sub.f16", "mul.f16")
@@ -212,7 +218,8 @@ class TestOperatorModule(CUDATestCase):
         for fn, instr in zip(functions, instrs):
             with self.subTest(instr=instr):
-                ptx, _ = compile_ptx(fn, args)
+                compiled = cuda.jit("void(f2[:], f2)", lto=True)(fn)
+                ptx = compiled.inspect_lto_ptx(args)
                 self.assertIn(instr, ptx)
     @skip_unless_cc_53
@@ -253,16 +260,19 @@ class TestOperatorModule(CUDATestCase):
                 np.testing.assert_allclose(got, expected)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_fp16_neg_ptx(self):
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_fp16neg, args)
+        compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_fp16neg)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("neg.f16", ptx)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_fp16_abs_ptx(self):
         args = (f2[:], f2)
-        ptx, _ = compile_ptx(simple_fp16abs, args)
+        compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_fp16abs)
+        ptx = compiled.inspect_lto_ptx(args)
         self.assertIn("abs.f16", ptx)
     @skip_unless_cc_53
@@ -331,11 +341,11 @@ class TestOperatorModule(CUDATestCase):
     @skip_unless_cc_53
     def test_multiple_float16_comparisons(self):
         functions = (
-            test_multiple_hcmp_1,
-            test_multiple_hcmp_2,
-            test_multiple_hcmp_3,
-            test_multiple_hcmp_4,
-            test_multiple_hcmp_5,
+            multiple_hcmp_1,
+            multiple_hcmp_2,
+            multiple_hcmp_3,
+            multiple_hcmp_4,
+            multiple_hcmp_5,
         )
         for fn in functions:
             with self.subTest(fn=fn):
@@ -350,11 +360,11 @@ class TestOperatorModule(CUDATestCase):
     @skip_unless_cc_53
     def test_multiple_float16_comparisons_false(self):
         functions = (
-            test_multiple_hcmp_1,
-            test_multiple_hcmp_2,
-            test_multiple_hcmp_3,
-            test_multiple_hcmp_4,
-            test_multiple_hcmp_5,
+            multiple_hcmp_1,
+            multiple_hcmp_2,
+            multiple_hcmp_3,
+            multiple_hcmp_4,
+            multiple_hcmp_5,
         )
         for fn in functions:
             with self.subTest(fn=fn):
@@ -367,6 +377,7 @@ class TestOperatorModule(CUDATestCase):
                 self.assertFalse(ary[0])
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_fp16_comparison_ptx(self):
         functions = (
             simple_fp16_gt,
@@ -390,16 +401,18 @@ class TestOperatorModule(CUDATestCase):
             "setp.lt.f16",
             "setp.le.f16",
             "setp.eq.f16",
-            "setp.ne.f16",
+            "setp.neu.f16",
         )
         args = (b1[:], f2, f2)
         for fn, op, s in zip(functions, ops, opstring):
             with self.subTest(op=op):
-                ptx, _ = compile_ptx(fn, args)
+                compiled = cuda.jit("void(b1[:], f2, f2)", lto=True)(fn)
+                ptx = compiled.inspect_lto_ptx(args)
                 self.assertIn(s, ptx)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_fp16_int8_comparison_ptx(self):
         # Test that int8 can be safely converted to fp16
         # in a comparison
@@ -426,15 +439,17 @@ class TestOperatorModule(CUDATestCase):
             operator.lt: "setp.lt.f16",
             operator.le: "setp.le.f16",
             operator.eq: "setp.eq.f16",
-            operator.ne: "setp.ne.f16",
+            operator.ne: "setp.neu.f16",
         }
         for fn, op in zip(functions, ops):
             with self.subTest(op=op):
                 args = (b1[:], f2, from_dtype(np.int8))
-                ptx, _ = compile_ptx(fn, args)
+                compiled = cuda.jit(signature(types.void, *args), lto=True)(fn)
+                ptx = compiled.inspect_lto_ptx(args)
                 self.assertIn(opstring[op], ptx)
     @skip_on_cudasim("Compilation unsupported in the simulator")
+    @skip_if_nvjitlink_missing("Numbast generated bindings")
     def test_mixed_fp16_comparison_promotion_ptx(self):
         functions = (
             simple_fp16_gt,
@@ -475,7 +490,8 @@ class TestOperatorModule(CUDATestCase):
             with self.subTest(op=op, ty=ty):
                 arg2_ty = np.result_type(np.float16, ty)
                 args = (b1[:], f2, from_dtype(arg2_ty))
-                ptx, _ = compile_ptx(fn, args)
+                compiled = cuda.jit(signature(types.void, *args), lto=True)(fn)
+                ptx = compiled.inspect_lto_ptx(args)
                 ops = opstring[op] + opsuffix[arg2_ty]
                 self.assertIn(ops, ptx)

numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py CHANGED Viewed

@@ -4,7 +4,7 @@ from numba.cuda.testing import (
     unittest,
     CUDATestCase,
 )
-from numba import config, cuda
+from numba import cuda
 # Basic tests that stream APIs execute on the hardware and in the simulator.
 #
@@ -38,10 +38,7 @@ class TestStreamAPI(CUDATestCase):
         # We don't test synchronization on the stream because it's not a real
         # stream - we used a dummy pointer for testing the API, so we just
         # ensure that the stream handle matches the external stream pointer.
-        if config.CUDA_USE_NVIDIA_BINDING:
-            value = int(s.handle)
-        else:
-            value = s.handle.value
+        value = s.handle.value
         self.assertEqual(ptr, value)
     @skip_unless_cudasim("External streams are usable with hardware")

numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py CHANGED Viewed

@@ -17,16 +17,16 @@ def _make_ufunc_usecase(ufunc):
     return fn
-# This test would also be a CUDATestCase, but to avoid a confusing and
-# potentially dangerous inheritance diamond with setUp methods that modify
-# global state, we implement the necessary parts of CUDATestCase within this
-# class instead. These are:
+# This class provides common functionality for UFunc tests. The UFunc tests
+# are quite long-running in comparison to other tests, so we break the tests up
+# into multiple test classes for distribution across workers.
 #
-# - Disable parallel testing with _numba_parallel_test_.
-# - Disabling CUDA performance warnings for the duration of tests.
-class TestUFuncs(BasicUFuncTest, TestCase):
-    _numba_parallel_test_ = False
+# This class would also be a CUDATestCase, but to avoid a confusing and
+# potentially dangerous inheritance diamond with setUp methods that modify
+# global state, we implement the necessary part of CUDATestCase within this
+# class instead. This disables CUDA performance warnings for the duration of
+# tests.
+class CUDAUFuncTestBase(BasicUFuncTest, TestCase):
     def setUp(self):
         BasicUFuncTest.setUp(self)
@@ -146,6 +146,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     ############################################################################
     # Trigonometric Functions
+class TestBasicTrigUFuncs(CUDAUFuncTestBase):
     def test_sin_ufunc(self):
         self.basic_ufunc_test(np.sin, kinds="cf")
@@ -167,6 +169,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     def test_arctan2_ufunc(self):
         self.basic_ufunc_test(np.arctan2, kinds="f")
+class TestHypTrigUFuncs(CUDAUFuncTestBase):
     def test_hypot_ufunc(self):
         self.basic_ufunc_test(np.hypot, kinds="f")
@@ -207,6 +211,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
         self.basic_ufunc_test(np.arctanh, skip_inputs=to_skip, kinds="cf")
+class TestConversionUFuncs(CUDAUFuncTestBase):
     def test_deg2rad_ufunc(self):
         self.basic_ufunc_test(np.deg2rad, kinds="f")
@@ -221,6 +227,9 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     ############################################################################
     # Comparison functions
+class TestComparisonUFuncs1(CUDAUFuncTestBase):
     def test_greater_ufunc(self):
         self.signed_unsigned_cmp_test(np.greater)
@@ -239,6 +248,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     def test_equal_ufunc(self):
         self.signed_unsigned_cmp_test(np.equal)
+class TestLogicalUFuncs(CUDAUFuncTestBase):
     def test_logical_and_ufunc(self):
         self.basic_ufunc_test(np.logical_and)
@@ -251,6 +262,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     def test_logical_not_ufunc(self):
         self.basic_ufunc_test(np.logical_not)
+class TestMinmaxUFuncs(CUDAUFuncTestBase):
     def test_maximum_ufunc(self):
         self.basic_ufunc_test(np.maximum)
@@ -263,6 +276,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     def test_fmin_ufunc(self):
         self.basic_ufunc_test(np.fmin)
+class TestBitwiseUFuncs(CUDAUFuncTestBase):
     def test_bitwise_and_ufunc(self):
         self.basic_int_ufunc_test(np.bitwise_and)
@@ -286,6 +301,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
     ############################################################################
     # Mathematical Functions
+class TestLogUFuncs(CUDAUFuncTestBase):
     def test_log_ufunc(self):
         self.basic_ufunc_test(np.log, kinds="cf")

numba_cuda/numba/cuda/tests/cudapy/test_warning.py CHANGED Viewed

@@ -1,7 +1,12 @@
 import numpy as np
 from numba import cuda
-from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
-from numba.tests.support import linux_only, override_config
+from numba.cuda.cudadrv import driver
+from numba.cuda.testing import (
+    unittest,
+    CUDATestCase,
+    skip_on_cudasim,
+)
+from numba.tests.support import linux_only, override_config, run_in_subprocess
 from numba.core.errors import NumbaPerformanceWarning
 from numba.core import config
 import warnings
@@ -9,6 +14,26 @@ import warnings
 @skip_on_cudasim("cudasim does not raise performance warnings")
 class TestWarnings(CUDATestCase):
+    def test_float16_warn_if_lto_missing(self):
+        fp16_kernel_invocation = """
+import math
+from numba import cuda, core
+@cuda.jit
+def kernel():
+    x = core.types.float16(1.0)
+    y = math.sin(x)
+kernel[1,1]()
+kernel[1,1]()
+"""
+        performance_warning = "float16 relies on LTO for performance"
+        expected_warning_count = 0 if driver._have_nvjitlink() else 1
+        _, err = run_in_subprocess(fp16_kernel_invocation)
+        self.assertEqual(
+            err.decode().count(performance_warning), expected_warning_count
+        )
     def test_inefficient_launch_configuration(self):
         @cuda.jit
         def kernel():

numba_cuda/numba/cuda/tests/enum_usecases.py ADDED Viewed

@@ -0,0 +1,56 @@
+from enum import Enum, IntEnum
+class Color(Enum):
+    red = 1
+    green = 2
+    blue = 3
+class Shake(Enum):
+    vanilla = 7
+    chocolate = 4
+    cookies = 9
+    # Same as Color.blue
+    mint = 3
+class Planet(Enum):
+    MERCURY = (3.303e23, 2.4397e6)
+    VENUS = (4.869e24, 6.0518e6)
+    EARTH = (5.976e24, 6.37814e6)
+    MARS = (6.421e23, 3.3972e6)
+    JUPITER = (1.9e27, 7.1492e7)
+    SATURN = (5.688e26, 6.0268e7)
+    URANUS = (8.686e25, 2.5559e7)
+    NEPTUNE = (1.024e26, 2.4746e7)
+class HeterogeneousEnum(Enum):
+    red = 1.0
+    green = 2.0
+    blue = 3j
+class Shape(IntEnum):
+    # Same as Color.green
+    circle = 2
+    # Same as RequestError.internal_error
+    square = 500
+class RequestError(IntEnum):
+    dummy = 2
+    not_found = 404
+    internal_error = 500
+class IntEnumWithNegatives(IntEnum):
+    # Used for testing of hash, need to make sure -1 -> -2 to comply with CPy
+    one = 1
+    two = 2
+    too = 2
+    three = 3
+    negone = -1
+    negtwo = -2
+    negthree = -3

numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py CHANGED Viewed

@@ -9,7 +9,6 @@ from numba.cuda.cudadrv import nvvm
 from numba.cuda.testing import (
     unittest,
     skip_on_cudasim,
-    SerialMixin,
     skip_unless_conda_cudatoolkit,
 )
 from numba.cuda.cuda_paths import (
@@ -24,7 +23,7 @@ has_cuda = nvvm.is_available()
 has_mp_get_context = hasattr(mp, "get_context")
-class LibraryLookupBase(SerialMixin, unittest.TestCase):
+class LibraryLookupBase(unittest.TestCase):
     def setUp(self):
         ctx = mp.get_context("spawn")

numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from numba.cuda.cudadrv import nvvm
 from numba.cuda.testing import skip_on_cudasim
-from numba.core import utils
+from numba.cuda import utils
 from llvmlite import ir
 from llvmlite import binding as llvm

numba-cuda 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

numba-cuda 0.16.0py3-none-any.whl → 0.18.0py3-none-any.whl