PyPI - numba-cuda - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

numba-cuda 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

_numba_cuda_redirector.py +17 -13
numba_cuda/VERSION +1 -1
numba_cuda/_version.py +4 -1
numba_cuda/numba/cuda/__init__.py +6 -2
numba_cuda/numba/cuda/api.py +129 -86
numba_cuda/numba/cuda/api_util.py +3 -3
numba_cuda/numba/cuda/args.py +12 -16
numba_cuda/numba/cuda/cg.py +6 -6
numba_cuda/numba/cuda/codegen.py +74 -43
numba_cuda/numba/cuda/compiler.py +232 -113
numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
numba_cuda/numba/cuda/cuda_fp16.h +661 -661
numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
numba_cuda/numba/cuda/cuda_paths.py +291 -99
numba_cuda/numba/cuda/cudadecl.py +125 -69
numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
numba_cuda/numba/cuda/cudadrv/error.py +6 -2
numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
numba_cuda/numba/cuda/cudaimpl.py +317 -233
numba_cuda/numba/cuda/cudamath.py +1 -1
numba_cuda/numba/cuda/debuginfo.py +8 -6
numba_cuda/numba/cuda/decorators.py +75 -45
numba_cuda/numba/cuda/descriptor.py +1 -1
numba_cuda/numba/cuda/device_init.py +69 -18
numba_cuda/numba/cuda/deviceufunc.py +143 -98
numba_cuda/numba/cuda/dispatcher.py +300 -213
numba_cuda/numba/cuda/errors.py +13 -10
numba_cuda/numba/cuda/extending.py +1 -1
numba_cuda/numba/cuda/initialize.py +5 -3
numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
numba_cuda/numba/cuda/intrinsics.py +31 -27
numba_cuda/numba/cuda/kernels/reduction.py +13 -13
numba_cuda/numba/cuda/kernels/transpose.py +3 -6
numba_cuda/numba/cuda/libdevice.py +317 -317
numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
numba_cuda/numba/cuda/locks.py +16 -0
numba_cuda/numba/cuda/mathimpl.py +62 -57
numba_cuda/numba/cuda/models.py +1 -5
numba_cuda/numba/cuda/nvvmutils.py +103 -88
numba_cuda/numba/cuda/printimpl.py +9 -5
numba_cuda/numba/cuda/random.py +46 -36
numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
numba_cuda/numba/cuda/runtime/__init__.py +1 -1
numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
numba_cuda/numba/cuda/runtime/nrt.py +48 -43
numba_cuda/numba/cuda/simulator/__init__.py +22 -12
numba_cuda/numba/cuda/simulator/api.py +38 -22
numba_cuda/numba/cuda/simulator/compiler.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
numba_cuda/numba/cuda/simulator/kernel.py +43 -34
numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
numba_cuda/numba/cuda/simulator/reduction.py +1 -0
numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
numba_cuda/numba/cuda/simulator_init.py +2 -4
numba_cuda/numba/cuda/stubs.py +139 -102
numba_cuda/numba/cuda/target.py +64 -47
numba_cuda/numba/cuda/testing.py +24 -19
numba_cuda/numba/cuda/tests/__init__.py +14 -12
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
numba_cuda/numba/cuda/types.py +5 -2
numba_cuda/numba/cuda/ufuncs.py +382 -362
numba_cuda/numba/cuda/utils.py +2 -2
numba_cuda/numba/cuda/vector_types.py +2 -2
numba_cuda/numba/cuda/vectorizers.py +37 -32
{numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
numba_cuda-0.9.0.dist-info/RECORD +253 -0
{numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
numba_cuda-0.8.0.dist-info/RECORD +0 -251
{numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/tests/cudapy/test_ipc.py CHANGED Viewed

@@ -7,9 +7,13 @@ import numpy as np
 from numba import cuda
 from numba.cuda.cudadrv import driver
-from numba.cuda.testing import (skip_on_arm, skip_on_cudasim,
-                                skip_under_cuda_memcheck,
-                                ContextResettingTestCase, ForeignArray)
+from numba.cuda.testing import (
+    skip_on_arm,
+    skip_on_cudasim,
+    skip_under_cuda_memcheck,
+    ContextResettingTestCase,
+    ForeignArray,
+)
 from numba.tests.support import linux_only, windows_only
 import unittest
@@ -32,8 +36,9 @@ def core_ipc_handle_test(the_work, result_queue):
 def base_ipc_handle_test(handle, size, result_queue):
     def the_work():
         dtype = np.dtype(np.intp)
-        with cuda.open_ipc_array(handle, shape=size // dtype.itemsize,
-                                 dtype=dtype) as darr:
+        with cuda.open_ipc_array(
+            handle, shape=size // dtype.itemsize, dtype=dtype
+        ) as darr:
             # copy the data to host
             return darr.copy_to_host()
@@ -43,9 +48,11 @@ def base_ipc_handle_test(handle, size, result_queue):
 def serialize_ipc_handle_test(handle, result_queue):
     def the_work():
         dtype = np.dtype(np.intp)
-        darr = handle.open_array(cuda.current_context(),
-                                 shape=handle.size // dtype.itemsize,
-                                 dtype=dtype)
+        darr = handle.open_array(
+            cuda.current_context(),
+            shape=handle.size // dtype.itemsize,
+            dtype=dtype,
+        )
         # copy the data to host
         arr = darr.copy_to_host()
         handle.close()
@@ -63,10 +70,10 @@ def ipc_array_test(ipcarr, result_queue):
                 with ipcarr:
                     pass
             except ValueError as e:
-                if str(e) != 'IpcHandle is already opened':
-                    raise AssertionError('invalid exception message')
+                if str(e) != "IpcHandle is already opened":
+                    raise AssertionError("invalid exception message")
             else:
-                raise AssertionError('did not raise on reopen')
+                raise AssertionError("did not raise on reopen")
     # Catch any exception so we can propagate it
     except:  # noqa: E722
         # FAILED. propagate the exception as a string
@@ -80,11 +87,10 @@ def ipc_array_test(ipcarr, result_queue):
 @linux_only
-@skip_under_cuda_memcheck('Hangs cuda-memcheck')
-@skip_on_cudasim('Ipc not available in CUDASIM')
-@skip_on_arm('CUDA IPC not supported on ARM in Numba')
+@skip_under_cuda_memcheck("Hangs cuda-memcheck")
+@skip_on_cudasim("Ipc not available in CUDASIM")
+@skip_on_arm("CUDA IPC not supported on ARM in Numba")
 class TestIpcMemory(ContextResettingTestCase):
     def test_ipc_handle(self):
         # prepare data for IPC
         arr = np.arange(10, dtype=np.intp)
@@ -102,7 +108,7 @@ class TestIpcMemory(ContextResettingTestCase):
         size = ipch.size
         # spawn new process for testing
-        ctx = mp.get_context('spawn')
+        ctx = mp.get_context("spawn")
         result_queue = ctx.Queue()
         args = (handle_bytes, size, result_queue)
         proc = ctx.Process(target=base_ipc_handle_test, args=args)
@@ -145,11 +151,12 @@ class TestIpcMemory(ContextResettingTestCase):
         if driver.USE_NV_BINDING:
             self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
         else:
-            self.assertEqual(ipch_recon.handle.reserved[:],
-                             ipch.handle.reserved[:])
+            self.assertEqual(
+                ipch_recon.handle.reserved[:], ipch.handle.reserved[:]
+            )
         # spawn new process for testing
-        ctx = mp.get_context('spawn')
+        ctx = mp.get_context("spawn")
         result_queue = ctx.Queue()
         args = (ipch, result_queue)
         proc = ctx.Process(target=serialize_ipc_handle_test, args=args)
@@ -162,7 +169,10 @@ class TestIpcMemory(ContextResettingTestCase):
         proc.join(3)
     def test_ipc_handle_serialization(self):
-        for index, foreign, in self.variants():
+        for (
+            index,
+            foreign,
+        ) in self.variants():
             with self.subTest(index=index, foreign=foreign):
                 self.check_ipc_handle_serialization(index, foreign)
@@ -179,7 +189,7 @@ class TestIpcMemory(ContextResettingTestCase):
         ipch = devarr.get_ipc_handle()
         # spawn new process for testing
-        ctx = mp.get_context('spawn')
+        ctx = mp.get_context("spawn")
         result_queue = ctx.Queue()
         args = (ipch, result_queue)
         proc = ctx.Process(target=ipc_array_test, args=args)
@@ -192,7 +202,10 @@ class TestIpcMemory(ContextResettingTestCase):
         proc.join(3)
     def test_ipc_array(self):
-        for index, foreign, in self.variants():
+        for (
+            index,
+            foreign,
+        ) in self.variants():
             with self.subTest(index=index, foreign=foreign):
                 self.check_ipc_array(index, foreign)
@@ -205,7 +218,9 @@ def staged_ipc_handle_test(handle, device_num, result_queue):
             arrsize = handle.size // np.dtype(np.intp).itemsize
             hostarray = np.zeros(arrsize, dtype=np.intp)
             cuda.driver.device_to_host(
-                hostarray, deviceptr, size=handle.size,
+                hostarray,
+                deviceptr,
+                size=handle.size,
             )
             handle.close()
         return hostarray
@@ -223,10 +238,10 @@ def staged_ipc_array_test(ipcarr, device_num, result_queue):
                     with ipcarr:
                         pass
                 except ValueError as e:
-                    if str(e) != 'IpcHandle is already opened':
-                        raise AssertionError('invalid exception message')
+                    if str(e) != "IpcHandle is already opened":
+                        raise AssertionError("invalid exception message")
                 else:
-                    raise AssertionError('did not raise on reopen')
+                    raise AssertionError("did not raise on reopen")
     # Catch any exception so we can propagate it
     except:  # noqa: E722
         # FAILED. propagate the exception as a string
@@ -240,9 +255,9 @@ def staged_ipc_array_test(ipcarr, device_num, result_queue):
 @linux_only
-@skip_under_cuda_memcheck('Hangs cuda-memcheck')
-@skip_on_cudasim('Ipc not available in CUDASIM')
-@skip_on_arm('CUDA IPC not supported on ARM in Numba')
+@skip_under_cuda_memcheck("Hangs cuda-memcheck")
+@skip_on_cudasim("Ipc not available in CUDASIM")
+@skip_on_arm("CUDA IPC not supported on ARM in Numba")
 class TestIpcStaged(ContextResettingTestCase):
     def test_staged(self):
         # prepare data for IPC
@@ -250,7 +265,7 @@ class TestIpcStaged(ContextResettingTestCase):
         devarr = cuda.to_device(arr)
         # spawn new process for testing
-        mpctx = mp.get_context('spawn')
+        mpctx = mp.get_context("spawn")
         result_queue = mpctx.Queue()
         # create IPC handle
@@ -264,8 +279,7 @@ class TestIpcStaged(ContextResettingTestCase):
             self.assertEqual(ipch_recon.handle.reserved, ipch.handle.reserved)
         else:
             self.assertEqual(
-                ipch_recon.handle.reserved[:],
-                ipch.handle.reserved[:]
+                ipch_recon.handle.reserved[:], ipch.handle.reserved[:]
             )
         self.assertEqual(ipch_recon.size, ipch.size)
@@ -289,7 +303,7 @@ class TestIpcStaged(ContextResettingTestCase):
             ipch = devarr.get_ipc_handle()
             # spawn new process for testing
-            ctx = mp.get_context('spawn')
+            ctx = mp.get_context("spawn")
             result_queue = ctx.Queue()
             args = (ipch, device_num, result_queue)
             proc = ctx.Process(target=staged_ipc_array_test, args=args)
@@ -303,7 +317,7 @@ class TestIpcStaged(ContextResettingTestCase):
 @windows_only
-@skip_on_cudasim('Ipc not available in CUDASIM')
+@skip_on_cudasim("Ipc not available in CUDASIM")
 class TestIpcNotSupported(ContextResettingTestCase):
     def test_unsupported(self):
         arr = np.arange(10, dtype=np.intp)
@@ -311,8 +325,8 @@ class TestIpcNotSupported(ContextResettingTestCase):
         with self.assertRaises(OSError) as raises:
             devarr.get_ipc_handle()
         errmsg = str(raises.exception)
-        self.assertIn('OS does not support CUDA IPC', errmsg)
+        self.assertIn("OS does not support CUDA IPC", errmsg)
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_iterators.py CHANGED Viewed

@@ -5,7 +5,6 @@ import numpy as np
 class TestIterators(CUDATestCase):
     def test_enumerate(self):
         @cuda.jit
         def enumerator(x, error):
@@ -95,5 +94,5 @@ class TestIterators(CUDATestCase):
         self._test_twoarg_function(zipper_enumerator)
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_lang.py CHANGED Viewed

@@ -10,7 +10,7 @@ from numba.cuda.testing import unittest, CUDATestCase
 class TestLang(CUDATestCase):
     def test_enumerate(self):
-        tup = (1., 2.5, 3.)
+        tup = (1.0, 2.5, 3.0)
         @cuda.jit("void(float64[:])")
         def foo(a):
@@ -39,12 +39,12 @@ class TestLang(CUDATestCase):
         self.assertTrue(np.all(a == (b + c).sum()))
     def test_issue_872(self):
-        '''
+        """
         Ensure that typing and lowering of CUDA kernel API primitives works in
         more than one block. Was originally to ensure that macro expansion works
         for more than one block (issue #872), but macro expansion has been
         replaced by a "proper" implementation of all kernel API functions.
-        '''
+        """
         @cuda.jit("void(float64[:,:])")
         def cuda_kernel_api_in_multiple_blocks(ary):
@@ -60,5 +60,5 @@ class TestLang(CUDATestCase):
         cuda_kernel_api_in_multiple_blocks[1, (2, 3)](a)
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_laplace.py CHANGED Viewed

@@ -14,7 +14,6 @@ SM_SIZE = tpb, tpb
 class TestCudaLaplace(CUDATestCase):
     def test_laplace_small(self):
         @cuda.jit(float64(float64, float64), device=True, inline=True)
         def get_max(a, b):
             if a > b:
@@ -38,8 +37,9 @@ class TestCudaLaplace(CUDATestCase):
             err_sm[ty, tx] = 0
             if j >= 1 and j < n - 1 and i >= 1 and i < m - 1:
-                Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1]
-                                      + A[j - 1, i] + A[j + 1, i])
+                Anew[j, i] = 0.25 * (
+                    A[j, i + 1] + A[j, i - 1] + A[j - 1, i] + A[j + 1, i]
+                )
                 err_sm[ty, tx] = Anew[j, i] - A[j, i]
             cuda.syncthreads()
@@ -91,8 +91,8 @@ class TestCudaLaplace(CUDATestCase):
         stream = cuda.stream()
-        dA = cuda.to_device(A, stream)          # to device and don't come back
-        dAnew = cuda.to_device(Anew, stream)    # to device and don't come back
+        dA = cuda.to_device(A, stream)  # to device and don't come back
+        dAnew = cuda.to_device(Anew, stream)  # to device and don't come back
         derror_grid = cuda.to_device(error_grid, stream)
         while error > tol and iter < iter_max:
@@ -115,5 +115,5 @@ class TestCudaLaplace(CUDATestCase):
             iter += 1
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py CHANGED Viewed

@@ -31,7 +31,7 @@ def use_sad(r, x, y, z):
         r[i] = libdevice.sad(x[i], y[i], z[i])
-@skip_on_cudasim('Libdevice functions are not supported on cudasim')
+@skip_on_cudasim("Libdevice functions are not supported on cudasim")
 class TestLibdevice(CUDATestCase):
     """
     Some tests of libdevice function wrappers that check the returned values.
@@ -102,14 +102,15 @@ def make_test_call(libname):
     def _test_call_functions(self):
         # Strip off '__nv_' from libdevice name to get Python name
         apiname = libname[5:]
-        apifunc = getattr(libdevice, apiname)
+        apifunc = getattr(libdevice, apiname)  # noqa: F841
         retty, args = functions[libname]
         sig = create_signature(retty, args)
         # Construct arguments to the libdevice function. These are all
         # non-pointer arguments to the underlying bitcode function.
-        funcargs = ", ".join(['a%d' % i for i, arg in enumerate(args) if not
-                              arg.is_ptr])
+        funcargs = ", ".join(
+            ["a%d" % i for i, arg in enumerate(args) if not arg.is_ptr]
+        )
         # Arguments to the Python function (`pyfunc` in the template above) are
         # the arguments to the libdevice function, plus as many extra arguments
@@ -118,35 +119,37 @@ def make_test_call(libname):
         # returns.
         if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
             # Start with the parameters for the return values
-            pyargs = ", ".join(['r%d' % i for i in
-                                range(len(sig.return_type))])
+            pyargs = ", ".join(["r%d" % i for i in range(len(sig.return_type))])
             # Add the parameters for the argument values
             pyargs += ", " + funcargs
             # Generate the unpacking of the return value from the libdevice
             # function into the Python function return values (`r0`, `r1`,
             # etc.).
-            retvars = ", ".join(['r%d[0]' % i for i in
-                                 range(len(sig.return_type))])
+            retvars = ", ".join(
+                ["r%d[0]" % i for i in range(len(sig.return_type))]
+            )
         else:
             # Scalar return is a more straightforward case
             pyargs = "r0, " + funcargs
             retvars = "r0[0]"
         # Create the string containing the function to compile
-        d = { 'func': apiname,
-              'pyargs': pyargs,
-              'funcargs': funcargs,
-              'retvars': retvars }
+        d = {
+            "func": apiname,
+            "pyargs": pyargs,
+            "funcargs": funcargs,
+            "retvars": retvars,
+        }
         code = function_template % d
         # Convert the string to a Python function
         locals = {}
         exec(code, globals(), locals)
-        pyfunc = locals['pyfunc']
+        pyfunc = locals["pyfunc"]
         # Compute the signature for compilation. This mirrors the creation of
         # arguments to the Python function above.
-        pyargs = [ arg.ty for arg in args if not arg.is_ptr ]
+        pyargs = [arg.ty for arg in args if not arg.is_ptr]
         if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
             pyreturns = [ret[::1] for ret in sig.return_type]
             pyargs = pyreturns + pyargs
@@ -159,16 +162,16 @@ def make_test_call(libname):
         # If the function body was discarded by optimization (therefore making
         # the test a bit weak), there won't be any loading of parameters -
         # ensure that a load from parameters occurs somewhere in the PTX
-        self.assertIn('ld.param', ptx)
+        self.assertIn("ld.param", ptx)
         # Returning the result (through a passed-in array) should also require
         # a store to global memory, so check for at least one of those too.
-        self.assertIn('st.global', ptx)
+        self.assertIn("st.global", ptx)
     return _test_call_functions
-@skip_on_cudasim('Compilation to PTX is not supported on cudasim')
+@skip_on_cudasim("Compilation to PTX is not supported on cudasim")
 class TestLibdeviceCompilation(unittest.TestCase):
     """
     Class for holding all tests of compiling calls to libdevice functions. We
@@ -179,9 +182,10 @@ class TestLibdeviceCompilation(unittest.TestCase):
 for libname in functions:
-    setattr(TestLibdeviceCompilation, 'test_%s' % libname,
-            make_test_call(libname))
+    setattr(
+        TestLibdeviceCompilation, "test_%s" % libname, make_test_call(libname)
+    )
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py CHANGED Viewed

@@ -7,16 +7,16 @@ import unittest
 import warnings
-@skip_on_cudasim('Simulator does not produce lineinfo')
+@skip_on_cudasim("Simulator does not produce lineinfo")
 class TestCudaLineInfo(CUDATestCase):
     def _loc_directive_regex(self):
         # This is used in several tests
         pat = (
-            r'\.loc'      # .loc directive beginning
-            r'\s+[0-9]+'  # whitespace then file index
-            r'\s+[0-9]+'  # whitespace then line number
-            r'\s+[0-9]+'  # whitespace then column position
+            r"\.loc"  # .loc directive beginning
+            r"\s+[0-9]+"  # whitespace then file index
+            r"\s+[0-9]+"  # whitespace then line number
+            r"\s+[0-9]+"  # whitespace then column position
         )
         return re.compile(pat)
@@ -29,21 +29,21 @@ class TestCudaLineInfo(CUDATestCase):
         # DICompileUnit debug info metadata should all be of the
         # DebugDirectivesOnly kind, and not the FullDebug kind
         pat = (
-            r'!DICompileUnit\(.*'    # Opening of DICompileUnit metadata. Since
-                                     # the order of attributes is not
-                                     # guaranteed, we need to match arbitrarily
-                                     # afterwards.
-            r'emissionKind:\s+'      # The emissionKind attribute followed by
-                                     # whitespace.
-            r'DebugDirectivesOnly'   # The correct emissionKind.
+            r"!DICompileUnit\(.*"  # Opening of DICompileUnit metadata. Since
+            # the order of attributes is not
+            # guaranteed, we need to match arbitrarily
+            # afterwards.
+            r"emissionKind:\s+"  # The emissionKind attribute followed by
+            # whitespace.
+            r"DebugDirectivesOnly"  # The correct emissionKind.
         )
         match = re.compile(pat).search(llvm)
         assertfn(match, msg=ptx)
         pat = (
-            r'!DICompileUnit\(.*'  # Same as the pattern above, but for the
-            r'emissionKind:\s+'    # incorrect FullDebug emissionKind.
-            r'FullDebug'           #
+            r"!DICompileUnit\(.*"  # Same as the pattern above, but for the
+            r"emissionKind:\s+"  # incorrect FullDebug emissionKind.
+            r"FullDebug"  #
         )
         match = re.compile(pat).search(llvm)
         self.assertIsNone(match, msg=ptx)
@@ -51,8 +51,8 @@ class TestCudaLineInfo(CUDATestCase):
         # The name of this file should be present in the line mapping
         # if lineinfo was propagated through correctly.
         pat = (
-            r'\.file'                # .file directive beginning
-            r'\s+[0-9]+\s+'          # file number surrounded by whitespace
+            r"\.file"  # .file directive beginning
+            r"\s+[0-9]+\s+"  # file number surrounded by whitespace
             r'".*test_lineinfo.py"'  # filename in quotes, ignoring full path
         )
         match = re.compile(pat).search(ptx)
@@ -65,8 +65,8 @@ class TestCudaLineInfo(CUDATestCase):
         # Debug info sections should not be present when only lineinfo is
         # generated
         pat = (
-            r'\.section\s+'  # .section directive beginning
-            r'\.debug_info'  # Section named ".debug_info"
+            r"\.section\s+"  # .section directive beginning
+            r"\.debug_info"  # Section named ".debug_info"
         )
         match = re.compile(pat).search(ptx)
         self.assertIsNone(match, msg=ptx)
@@ -98,7 +98,7 @@ class TestCudaLineInfo(CUDATestCase):
         # signal an exception (e.g. divide by zero) has occurred. When the
         # error model is the default NumPy one (as it should be when only
         # lineinfo is enabled) the device function always returns 0.
-        self.assertNotIn('ret i32 1', llvm)
+        self.assertNotIn("ret i32 1", llvm)
     def test_no_lineinfo_in_device_function(self):
         # Ensure that no lineinfo is generated in device functions by default.
@@ -138,7 +138,7 @@ class TestCudaLineInfo(CUDATestCase):
         # Check that there is no device function in the PTX
         # A line beginning with ".weak .func" that identifies a device function
-        devfn_start = re.compile(r'^\.weak\s+\.func')
+        devfn_start = re.compile(r"^\.weak\s+\.func")
         for line in ptxlines:
             if devfn_start.match(line) is not None:
@@ -151,13 +151,14 @@ class TestCudaLineInfo(CUDATestCase):
         for line in ptxlines:
             if loc_directive.search(line) is not None:
-                if 'inlined_at' in line:
+                if "inlined_at" in line:
                     found = True
                     break
         if not found:
-            self.fail(f'No .loc directive with inlined_at info found'
-                      f'in:\n\n{ptx}')
+            self.fail(
+                f"No .loc directive with inlined_at info foundin:\n\n{ptx}"
+            )
         # We also inspect the LLVM to ensure that there's debug info for each
         # subprogram (function). A lightweight way to check this is to ensure
@@ -166,7 +167,7 @@ class TestCudaLineInfo(CUDATestCase):
         llvm = caller.inspect_llvm(sig)
         subprograms = 0
         for line in llvm.splitlines():
-            if 'distinct !DISubprogram' in line:
+            if "distinct !DISubprogram" in line:
                 subprograms += 1
         # One DISubprogram for each of:
@@ -174,9 +175,12 @@ class TestCudaLineInfo(CUDATestCase):
         # - The callee
         expected_subprograms = 2
-        self.assertEqual(subprograms, expected_subprograms,
-                         f'"Expected {expected_subprograms} DISubprograms; '
-                         f'got {subprograms}')
+        self.assertEqual(
+            subprograms,
+            expected_subprograms,
+            f'"Expected {expected_subprograms} DISubprograms; '
+            f"got {subprograms}",
+        )
     def test_debug_and_lineinfo_warning(self):
         with warnings.catch_warnings(record=True) as w:
@@ -190,9 +194,10 @@ class TestCudaLineInfo(CUDATestCase):
         self.assertEqual(len(w), 1)
         self.assertEqual(w[0].category, NumbaInvalidConfigWarning)
-        self.assertIn('debug and lineinfo are mutually exclusive',
-                      str(w[0].message))
+        self.assertIn(
+            "debug and lineinfo are mutually exclusive", str(w[0].message)
+        )
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_localmem.py CHANGED Viewed

@@ -31,32 +31,31 @@ def culocal1tuple(A, B):
         B[i] = C[i]
-@skip_on_cudasim('PTX inspection not available in cudasim')
+@skip_on_cudasim("PTX inspection not available in cudasim")
 class TestCudaLocalMem(CUDATestCase):
     def test_local_array(self):
         sig = (int32[:], int32[:])
         jculocal = cuda.jit(sig)(culocal)
-        self.assertTrue('.local' in jculocal.inspect_asm(sig))
-        A = np.arange(1000, dtype='int32')
+        self.assertTrue(".local" in jculocal.inspect_asm(sig))
+        A = np.arange(1000, dtype="int32")
         B = np.zeros_like(A)
         jculocal[1, 1](A, B)
         self.assertTrue(np.all(A == B))
     def test_local_array_1_tuple(self):
-        """Ensure that local arrays can be constructed with 1-tuple shape
-        """
-        jculocal = cuda.jit('void(int32[:], int32[:])')(culocal1tuple)
+        """Ensure that local arrays can be constructed with 1-tuple shape"""
+        jculocal = cuda.jit("void(int32[:], int32[:])")(culocal1tuple)
         # Don't check if .local is in the ptx because the optimizer
         # may reduce it to registers.
-        A = np.arange(5, dtype='int32')
+        A = np.arange(5, dtype="int32")
         B = np.zeros_like(A)
         jculocal[1, 1](A, B)
         self.assertTrue(np.all(A == B))
     def test_local_array_complex(self):
-        sig = 'void(complex128[:], complex128[:])'
+        sig = "void(complex128[:], complex128[:])"
         jculocalcomplex = cuda.jit(sig)(culocalcomplex)
-        A = (np.arange(100, dtype='complex128') - 1) / 2j
+        A = (np.arange(100, dtype="complex128") - 1) / 2j
         B = np.zeros_like(A)
         jculocalcomplex[1, 1](A, B)
         self.assertTrue(np.all(A == B))
@@ -64,7 +63,7 @@ class TestCudaLocalMem(CUDATestCase):
     def check_dtype(self, f, dtype):
         # Find the typing of the dtype argument to cuda.local.array
         annotation = next(iter(f.overloads.values()))._type_annotation
-        l_dtype = annotation.typemap['l'].dtype
+        l_dtype = annotation.typemap["l"].dtype
         # Ensure that the typing is correct
         self.assertEqual(l_dtype, dtype)
@@ -95,7 +94,7 @@ class TestCudaLocalMem(CUDATestCase):
         # Check that strings can be used to specify the dtype of a local array
         @cuda.jit(void(int32[::1]))
         def f(x):
-            l = cuda.local.array(10, dtype='int32')
+            l = cuda.local.array(10, dtype="int32")
             l[0] = x[0]
             x[0] = l[0]
@@ -106,9 +105,10 @@ class TestCudaLocalMem(CUDATestCase):
         # Check that strings of invalid dtypes cause a typing error
         re = ".*Invalid NumPy dtype specified: 'int33'.*"
         with self.assertRaisesRegex(TypingError, re):
             @cuda.jit(void(int32[::1]))
             def f(x):
-                l = cuda.local.array(10, dtype='int33')
+                l = cuda.local.array(10, dtype="int33")
                 l[0] = x[0]
                 x[0] = l[0]
@@ -160,5 +160,5 @@ class TestCudaLocalMem(CUDATestCase):
         self._check_local_array_size_fp16(2, 2, np.float16)
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_mandel.py CHANGED Viewed

@@ -3,11 +3,10 @@ from numba.cuda.compiler import compile_ptx
 from numba.cuda.testing import skip_on_cudasim, unittest
-@skip_on_cudasim('Compilation unsupported in the simulator')
+@skip_on_cudasim("Compilation unsupported in the simulator")
 class TestCudaMandel(unittest.TestCase):
     def test_mandel(self):
-        """Just make sure we can compile this
-        """
+        """Just make sure we can compile this"""
         def mandel(tid, min_x, max_x, min_y, max_y, width, height, iters):
             pixel_size_x = (max_x - min_x) / width
@@ -28,10 +27,18 @@ class TestCudaMandel(unittest.TestCase):
                     return i
             return iters
-        args = (uint32, float64, float64, float64, float64,
-                uint32, uint32, uint32)
+        args = (
+            uint32,
+            float64,
+            float64,
+            float64,
+            float64,
+            uint32,
+            uint32,
+            uint32,
+        )
         compile_ptx(mandel, args, device=True)
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()

numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

numba-cuda 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl