PyPI - numba-cuda - Versions diffs - 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

numba-cuda 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

_numba_cuda_redirector.py +17 -13
numba_cuda/VERSION +1 -1
numba_cuda/_version.py +4 -1
numba_cuda/numba/cuda/__init__.py +6 -2
numba_cuda/numba/cuda/api.py +129 -86
numba_cuda/numba/cuda/api_util.py +3 -3
numba_cuda/numba/cuda/args.py +12 -16
numba_cuda/numba/cuda/cg.py +6 -6
numba_cuda/numba/cuda/codegen.py +74 -43
numba_cuda/numba/cuda/compiler.py +246 -114
numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
numba_cuda/numba/cuda/cuda_paths.py +293 -99
numba_cuda/numba/cuda/cudadecl.py +93 -79
numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
numba_cuda/numba/cuda/cudadrv/error.py +6 -2
numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
numba_cuda/numba/cuda/cudaimpl.py +296 -275
numba_cuda/numba/cuda/cudamath.py +1 -1
numba_cuda/numba/cuda/debuginfo.py +99 -7
numba_cuda/numba/cuda/decorators.py +87 -45
numba_cuda/numba/cuda/descriptor.py +1 -1
numba_cuda/numba/cuda/device_init.py +68 -18
numba_cuda/numba/cuda/deviceufunc.py +143 -98
numba_cuda/numba/cuda/dispatcher.py +300 -213
numba_cuda/numba/cuda/errors.py +13 -10
numba_cuda/numba/cuda/extending.py +55 -1
numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
numba_cuda/numba/cuda/initialize.py +5 -3
numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
numba_cuda/numba/cuda/intrinsics.py +203 -28
numba_cuda/numba/cuda/kernels/reduction.py +13 -13
numba_cuda/numba/cuda/kernels/transpose.py +3 -6
numba_cuda/numba/cuda/libdevice.py +317 -317
numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
numba_cuda/numba/cuda/locks.py +16 -0
numba_cuda/numba/cuda/lowering.py +43 -0
numba_cuda/numba/cuda/mathimpl.py +62 -57
numba_cuda/numba/cuda/models.py +1 -5
numba_cuda/numba/cuda/nvvmutils.py +103 -88
numba_cuda/numba/cuda/printimpl.py +9 -5
numba_cuda/numba/cuda/random.py +46 -36
numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
numba_cuda/numba/cuda/runtime/__init__.py +1 -1
numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
numba_cuda/numba/cuda/runtime/nrt.py +48 -43
numba_cuda/numba/cuda/simulator/__init__.py +22 -12
numba_cuda/numba/cuda/simulator/api.py +38 -22
numba_cuda/numba/cuda/simulator/compiler.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
numba_cuda/numba/cuda/simulator/kernel.py +43 -34
numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
numba_cuda/numba/cuda/simulator/reduction.py +1 -0
numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
numba_cuda/numba/cuda/simulator_init.py +2 -4
numba_cuda/numba/cuda/stubs.py +134 -108
numba_cuda/numba/cuda/target.py +92 -47
numba_cuda/numba/cuda/testing.py +24 -19
numba_cuda/numba/cuda/tests/__init__.py +14 -12
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
numba_cuda/numba/cuda/types.py +5 -2
numba_cuda/numba/cuda/ufuncs.py +382 -362
numba_cuda/numba/cuda/utils.py +2 -2
numba_cuda/numba/cuda/vector_types.py +5 -3
numba_cuda/numba/cuda/vectorizers.py +38 -33
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
numba_cuda-0.10.0.dist-info/RECORD +263 -0
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
numba_cuda-0.8.1.dist-info/RECORD +0 -251
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/cudadrv/devicearray.py CHANGED Viewed

@@ -25,7 +25,7 @@ from numba.core.errors import NumbaPerformanceWarning
 from warnings import warn
 try:
-    lru_cache = getattr(functools, 'lru_cache')(None)
+    lru_cache = getattr(functools, "lru_cache")(None)
 except AttributeError:
     # Python 3.1 or lower
     def lru_cache(func):
@@ -34,7 +34,7 @@ except AttributeError:
 def is_cuda_ndarray(obj):
     "Check if an object is a CUDA ndarray"
-    return getattr(obj, '__cuda_ndarray__', False)
+    return getattr(obj, "__cuda_ndarray__", False)
 def verify_cuda_ndarray_interface(obj):
@@ -45,25 +45,25 @@ def verify_cuda_ndarray_interface(obj):
         if not hasattr(obj, attr):
             raise AttributeError(attr)
         if not isinstance(getattr(obj, attr), typ):
-            raise AttributeError('%s must be of type %s' % (attr, typ))
+            raise AttributeError("%s must be of type %s" % (attr, typ))
-    requires_attr('shape', tuple)
-    requires_attr('strides', tuple)
-    requires_attr('dtype', np.dtype)
-    requires_attr('size', int)
+    requires_attr("shape", tuple)
+    requires_attr("strides", tuple)
+    requires_attr("dtype", np.dtype)
+    requires_attr("size", int)
 def require_cuda_ndarray(obj):
     "Raises ValueError is is_cuda_ndarray(obj) evaluates False"
     if not is_cuda_ndarray(obj):
-        raise ValueError('require an cuda ndarray object')
+        raise ValueError("require an cuda ndarray object")
 class DeviceNDArrayBase(_devicearray.DeviceArray):
-    """A on GPU NDArray representation
-    """
+    """A on GPU NDArray representation"""
     __cuda_memory__ = True
-    __cuda_ndarray__ = True     # There must be gpu_data attribute
+    __cuda_ndarray__ = True  # There must be gpu_data attribute
     def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
         """
@@ -88,9 +88,10 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
         dtype = np.dtype(dtype)
         self.ndim = len(shape)
         if len(strides) != self.ndim:
-            raise ValueError('strides not match ndim')
-        self._dummy = dummyarray.Array.from_desc(0, shape, strides,
-                                                 dtype.itemsize)
+            raise ValueError("strides not match ndim")
+        self._dummy = dummyarray.Array.from_desc(
+            0, shape, strides, dtype.itemsize
+        )
         self.shape = tuple(shape)
         self.strides = tuple(strides)
         self.dtype = dtype
@@ -99,7 +100,8 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
         if self.size > 0:
             if gpu_data is None:
                 self.alloc_size = _driver.memory_size_from_info(
-                    self.shape, self.strides, self.dtype.itemsize)
+                    self.shape, self.strides, self.dtype.itemsize
+                )
                 gpu_data = devices.get_context().memalloc(self.alloc_size)
             else:
                 self.alloc_size = _driver.device_memory_size(gpu_data)
@@ -109,8 +111,9 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
                 null = _driver.binding.CUdeviceptr(0)
             else:
                 null = c_void_p(0)
-            gpu_data = _driver.MemoryPointer(context=devices.get_context(),
-                                             pointer=null, size=0)
+            gpu_data = _driver.MemoryPointer(
+                context=devices.get_context(), pointer=null, size=0
+            )
             self.alloc_size = 0
         self.gpu_data = gpu_data
@@ -130,12 +133,12 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
                 ptr = 0
         return {
-            'shape': tuple(self.shape),
-            'strides': None if is_contiguous(self) else tuple(self.strides),
-            'data': (ptr, False),
-            'typestr': self.dtype.str,
-            'stream': int(self.stream) if self.stream != 0 else None,
-            'version': 3,
+            "shape": tuple(self.shape),
+            "strides": None if is_contiguous(self) else tuple(self.strides),
+            "data": (ptr, False),
+            "typestr": self.dtype.str,
+            "stream": int(self.stream) if self.stream != 0 else None,
+            "version": 3,
         }
     def bind(self, stream=0):
@@ -160,6 +163,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
             raise ValueError("invalid axes list %r" % (axes,))
         else:
             from numba.cuda.kernels.transpose import transpose
             return transpose(self)
     def _default_stream(self, stream):
@@ -186,20 +190,19 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
         # layouts.
         broadcast = 0 in self.strides
-        if self.flags['C_CONTIGUOUS'] and not broadcast:
-            layout = 'C'
-        elif self.flags['F_CONTIGUOUS'] and not broadcast:
-            layout = 'F'
+        if self.flags["C_CONTIGUOUS"] and not broadcast:
+            layout = "C"
+        elif self.flags["F_CONTIGUOUS"] and not broadcast:
+            layout = "F"
         else:
-            layout = 'A'
+            layout = "A"
         dtype = numpy_support.from_dtype(self.dtype)
         return types.Array(dtype, self.ndim, layout)
     @property
     def device_ctypes_pointer(self):
-        """Returns the ctypes pointer to the GPU data buffer
-        """
+        """Returns the ctypes pointer to the GPU data buffer"""
         if self.gpu_data is None:
             if _driver.USE_NV_BINDING:
                 return _driver.binding.CUdeviceptr(0)
@@ -232,13 +235,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
             # (i.e., in order to materialize a writable strided view)
             ary_core = np.array(
                 ary_core,
-                order='C' if self_core.flags['C_CONTIGUOUS'] else 'F',
+                order="C" if self_core.flags["C_CONTIGUOUS"] else "F",
                 subok=True,
-                copy=(not ary_core.flags['WRITEABLE'])
-                if numpy_version < (2, 0) else None)
+                copy=(not ary_core.flags["WRITEABLE"])
+                if numpy_version < (2, 0)
+                else None,
+            )
             check_array_compatibility(self_core, ary_core)
-            _driver.host_to_device(self, ary_core, self.alloc_size,
-                                   stream=stream)
+            _driver.host_to_device(
+                self, ary_core, self.alloc_size, stream=stream
+            )
     @devices.require_context
     def copy_to_host(self, ary=None, stream=0):
@@ -264,7 +270,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
             result_array = d_arr.copy_to_host()
         """
         if any(s < 0 for s in self.strides):
-            msg = 'D->H copy not implemented for negative strides: {}'
+            msg = "D->H copy not implemented for negative strides: {}"
             raise NotImplementedError(msg.format(self.strides))
         assert self.alloc_size >= 0, "Negative memory size"
         stream = self._default_stream(stream)
@@ -275,16 +281,22 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
             hostary = ary
         if self.alloc_size != 0:
-            _driver.device_to_host(hostary, self, self.alloc_size,
-                                   stream=stream)
+            _driver.device_to_host(
+                hostary, self, self.alloc_size, stream=stream
+            )
         if ary is None:
             if self.size == 0:
-                hostary = np.ndarray(shape=self.shape, dtype=self.dtype,
-                                     buffer=hostary)
+                hostary = np.ndarray(
+                    shape=self.shape, dtype=self.dtype, buffer=hostary
+                )
             else:
-                hostary = np.ndarray(shape=self.shape, dtype=self.dtype,
-                                     strides=self.strides, buffer=hostary)
+                hostary = np.ndarray(
+                    shape=self.shape,
+                    dtype=self.dtype,
+                    strides=self.strides,
+                    buffer=hostary,
+                )
         return hostary
     def split(self, section, stream=0):
@@ -305,12 +317,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
             end = min(begin + section, self.size)
             shape = (end - begin,)
             gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize)
-            yield DeviceNDArray(shape, strides, dtype=self.dtype, stream=stream,
-                                gpu_data=gpu_data)
+            yield DeviceNDArray(
+                shape,
+                strides,
+                dtype=self.dtype,
+                stream=stream,
+                gpu_data=gpu_data,
+            )
     def as_cuda_arg(self):
-        """Returns a device memory object that is used as the argument.
-        """
+        """Returns a device memory object that is used as the argument."""
         return self.gpu_data
     def get_ipc_handle(self):
@@ -368,8 +384,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
                 )
             shape[-1], rem = divmod(
-                shape[-1] * self.dtype.itemsize,
-                dtype.itemsize
+                shape[-1] * self.dtype.itemsize, dtype.itemsize
             )
             if rem != 0:
@@ -398,14 +413,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
 class DeviceRecord(DeviceNDArrayBase):
-    '''
+    """
     An on-GPU record type
-    '''
+    """
     def __init__(self, dtype, stream=0, gpu_data=None):
         shape = ()
         strides = ()
-        super(DeviceRecord, self).__init__(shape, strides, dtype, stream,
-                                           gpu_data)
+        super(DeviceRecord, self).__init__(
+            shape, strides, dtype, stream, gpu_data
+        )
     @property
     def flags(self):
@@ -415,7 +432,7 @@ class DeviceRecord(DeviceNDArrayBase):
         with an existing `numpy.ndarray` (as the C- and F- contiguous flags
         aren't writeable).
         """
-        return dict(self._dummy.flags) # defensive copy
+        return dict(self._dummy.flags)  # defensive copy
     @property
     def _numba_type_(self):
@@ -431,8 +448,7 @@ class DeviceRecord(DeviceNDArrayBase):
     @devices.require_context
     def getitem(self, item, stream=0):
-        """Do `__getitem__(item)` with CUDA stream
-        """
+        """Do `__getitem__(item)` with CUDA stream"""
         return self._do_getitem(item, stream)
     def _do_getitem(self, item, stream=0):
@@ -442,22 +458,24 @@ class DeviceRecord(DeviceNDArrayBase):
         if typ.shape == ():
             if typ.names is not None:
-                return DeviceRecord(dtype=typ, stream=stream,
-                                    gpu_data=newdata)
+                return DeviceRecord(dtype=typ, stream=stream, gpu_data=newdata)
             else:
                 hostary = np.empty(1, dtype=typ)
-                _driver.device_to_host(dst=hostary, src=newdata,
-                                       size=typ.itemsize,
-                                       stream=stream)
+                _driver.device_to_host(
+                    dst=hostary, src=newdata, size=typ.itemsize, stream=stream
+                )
             return hostary[0]
         else:
-            shape, strides, dtype = \
-                prepare_shape_strides_dtype(typ.shape,
-                                            None,
-                                            typ.subdtype[0], 'C')
-            return DeviceNDArray(shape=shape, strides=strides,
-                                 dtype=dtype, gpu_data=newdata,
-                                 stream=stream)
+            shape, strides, dtype = prepare_shape_strides_dtype(
+                typ.shape, None, typ.subdtype[0], "C"
+            )
+            return DeviceNDArray(
+                shape=shape,
+                strides=strides,
+                dtype=dtype,
+                gpu_data=newdata,
+                stream=stream,
+            )
     @devices.require_context
     def __setitem__(self, key, value):
@@ -465,12 +483,10 @@ class DeviceRecord(DeviceNDArrayBase):
     @devices.require_context
     def setitem(self, key, value, stream=0):
-        """Do `__setitem__(key, value)` with CUDA stream
-        """
+        """Do `__setitem__(key, value)` with CUDA stream"""
         return self._do_setitem(key, value, stream=stream)
     def _do_setitem(self, key, value, stream=0):
         stream = self._default_stream(stream)
         # If the record didn't have a default stream, and the user didn't
@@ -515,6 +531,7 @@ def _assign_kernel(ndim):
         @cuda.jit
         def kernel(lhs, rhs):
             lhs[()] = rhs[()]
         return kernel
     @cuda.jit
@@ -531,9 +548,7 @@ def _assign_kernel(ndim):
         # [0, :] is the to-index (into `lhs`)
         # [1, :] is the from-index (into `rhs`)
-        idx = cuda.local.array(
-            shape=(2, ndim),
-            dtype=types.int64)
+        idx = cuda.local.array(shape=(2, ndim), dtype=types.int64)
         for i in range(ndim - 1, -1, -1):
             idx[0, i] = location % lhs.shape[i]
@@ -541,17 +556,19 @@ def _assign_kernel(ndim):
             location //= lhs.shape[i]
         lhs[to_fixed_tuple(idx[0], ndim)] = rhs[to_fixed_tuple(idx[1], ndim)]
     return kernel
 class DeviceNDArray(DeviceNDArrayBase):
-    '''
+    """
     An on-GPU array type
-    '''
+    """
     def is_f_contiguous(self):
-        '''
+        """
         Return true if the array is Fortran-contiguous.
-        '''
+        """
         return self._dummy.is_f_contig
     @property
@@ -562,12 +579,12 @@ class DeviceNDArray(DeviceNDArrayBase):
         with an existing `numpy.ndarray` (as the C- and F- contiguous flags
         aren't writeable).
         """
-        return dict(self._dummy.flags) # defensive copy
+        return dict(self._dummy.flags)  # defensive copy
     def is_c_contiguous(self):
-        '''
+        """
         Return true if the array is C-contiguous.
-        '''
+        """
         return self._dummy.is_c_contig
     def __array__(self, dtype=None, copy=None):
@@ -590,7 +607,7 @@ class DeviceNDArray(DeviceNDArrayBase):
         Reshape the array without changing its contents, similarly to
         :meth:`numpy.ndarray.reshape`. Example::
-            d_arr = d_arr.reshape(20, 50, order='F')
+            d_arr = d_arr.reshape(20, 50, order="F")
         """
         if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)):
             newshape = newshape[0]
@@ -598,31 +615,43 @@ class DeviceNDArray(DeviceNDArrayBase):
         cls = type(self)
         if newshape == self.shape:
             # nothing to do
-            return cls(shape=self.shape, strides=self.strides,
-                       dtype=self.dtype, gpu_data=self.gpu_data)
+            return cls(
+                shape=self.shape,
+                strides=self.strides,
+                dtype=self.dtype,
+                gpu_data=self.gpu_data,
+            )
         newarr, extents = self._dummy.reshape(*newshape, **kws)
         if extents == [self._dummy.extent]:
-            return cls(shape=newarr.shape, strides=newarr.strides,
-                       dtype=self.dtype, gpu_data=self.gpu_data)
+            return cls(
+                shape=newarr.shape,
+                strides=newarr.strides,
+                dtype=self.dtype,
+                gpu_data=self.gpu_data,
+            )
         else:
             raise NotImplementedError("operation requires copying")
-    def ravel(self, order='C', stream=0):
-        '''
+    def ravel(self, order="C", stream=0):
+        """
         Flattens a contiguous array without changing its contents, similar to
         :meth:`numpy.ndarray.ravel`. If the array is not contiguous, raises an
         exception.
-        '''
+        """
         stream = self._default_stream(stream)
         cls = type(self)
         newarr, extents = self._dummy.ravel(order=order)
         if extents == [self._dummy.extent]:
-            return cls(shape=newarr.shape, strides=newarr.strides,
-                       dtype=self.dtype, gpu_data=self.gpu_data,
-                       stream=stream)
+            return cls(
+                shape=newarr.shape,
+                strides=newarr.strides,
+                dtype=self.dtype,
+                gpu_data=self.gpu_data,
+                stream=stream,
+            )
         else:
             raise NotImplementedError("operation requires copying")
@@ -633,8 +662,7 @@ class DeviceNDArray(DeviceNDArrayBase):
     @devices.require_context
     def getitem(self, item, stream=0):
-        """Do `__getitem__(item)` with CUDA stream
-        """
+        """Do `__getitem__(item)` with CUDA stream"""
         return self._do_getitem(item, stream)
     def _do_getitem(self, item, stream=0):
@@ -649,22 +677,36 @@ class DeviceNDArray(DeviceNDArrayBase):
             if not arr.is_array:
                 # Check for structured array type (record)
                 if self.dtype.names is not None:
-                    return DeviceRecord(dtype=self.dtype, stream=stream,
-                                        gpu_data=newdata)
+                    return DeviceRecord(
+                        dtype=self.dtype, stream=stream, gpu_data=newdata
+                    )
                 else:
                     # Element indexing
                     hostary = np.empty(1, dtype=self.dtype)
-                    _driver.device_to_host(dst=hostary, src=newdata,
-                                           size=self._dummy.itemsize,
-                                           stream=stream)
+                    _driver.device_to_host(
+                        dst=hostary,
+                        src=newdata,
+                        size=self._dummy.itemsize,
+                        stream=stream,
+                    )
                 return hostary[0]
             else:
-                return cls(shape=arr.shape, strides=arr.strides,
-                           dtype=self.dtype, gpu_data=newdata, stream=stream)
+                return cls(
+                    shape=arr.shape,
+                    strides=arr.strides,
+                    dtype=self.dtype,
+                    gpu_data=newdata,
+                    stream=stream,
+                )
         else:
             newdata = self.gpu_data.view(*arr.extent)
-            return cls(shape=arr.shape, strides=arr.strides,
-                       dtype=self.dtype, gpu_data=newdata, stream=stream)
+            return cls(
+                shape=arr.shape,
+                strides=arr.strides,
+                dtype=self.dtype,
+                gpu_data=newdata,
+                stream=stream,
+            )
     @devices.require_context
     def __setitem__(self, key, value):
@@ -672,12 +714,10 @@ class DeviceNDArray(DeviceNDArrayBase):
     @devices.require_context
     def setitem(self, key, value, stream=0):
-        """Do `__setitem__(key, value)` with CUDA stream
-        """
+        """Do `__setitem__(key, value)` with CUDA stream"""
         return self._do_setitem(key, value, stream=stream)
     def _do_setitem(self, key, value, stream=0):
         stream = self._default_stream(stream)
         # If the array didn't have a default stream, and the user didn't provide
@@ -706,23 +746,26 @@ class DeviceNDArray(DeviceNDArrayBase):
             strides=strides,
             dtype=self.dtype,
             gpu_data=newdata,
-            stream=stream)
+            stream=stream,
+        )
         # (2) prepare RHS
         rhs, _ = auto_device(value, stream=stream, user_explicit=True)
         if rhs.ndim > lhs.ndim:
-            raise ValueError("Can't assign %s-D array to %s-D self" % (
-                rhs.ndim,
-                lhs.ndim))
+            raise ValueError(
+                "Can't assign %s-D array to %s-D self" % (rhs.ndim, lhs.ndim)
+            )
         rhs_shape = np.ones(lhs.ndim, dtype=np.int64)
         # negative indices would not work if rhs.ndim == 0
-        rhs_shape[lhs.ndim - rhs.ndim:] = rhs.shape
+        rhs_shape[lhs.ndim - rhs.ndim :] = rhs.shape
         rhs = rhs.reshape(*rhs_shape)
         for i, (l, r) in enumerate(zip(lhs.shape, rhs.shape)):
             if r != 1 and l != r:
-                raise ValueError("Can't copy sequence with size %d to array "
-                                 "axis %d with dimension %d" % ( r, i, l))
+                raise ValueError(
+                    "Can't copy sequence with size %d to array "
+                    "axis %d with dimension %d" % (r, i, l)
+                )
         # (3) do the copy
@@ -751,6 +794,7 @@ class IpcArrayHandle(object):
             some_code(ipc_array)
         # ipc_array is dead at this point
     """
     def __init__(self, ipc_handle, array_desc):
         self._array_desc = array_desc
         self._ipc_handle = ipc_handle
@@ -798,8 +842,9 @@ class ManagedNDArray(DeviceNDArrayBase, np.ndarray):
 def from_array_like(ary, stream=0, gpu_data=None):
     "Create a DeviceNDArray object that is like ary."
-    return DeviceNDArray(ary.shape, ary.strides, ary.dtype, stream=stream,
-                         gpu_data=gpu_data)
+    return DeviceNDArray(
+        ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
+    )
 def from_record_like(rec, stream=0, gpu_data=None):
@@ -841,15 +886,17 @@ def is_contiguous(ary):
     return True
-errmsg_contiguous_buffer = ("Array contains non-contiguous buffer and cannot "
-                            "be transferred as a single memory region. Please "
-                            "ensure contiguous buffer with numpy "
-                            ".ascontiguousarray()")
+errmsg_contiguous_buffer = (
+    "Array contains non-contiguous buffer and cannot "
+    "be transferred as a single memory region. Please "
+    "ensure contiguous buffer with numpy "
+    ".ascontiguousarray()"
+)
 def sentry_contiguous(ary):
     core = array_core(ary)
-    if not core.flags['C_CONTIGUOUS'] and not core.flags['F_CONTIGUOUS']:
+    if not core.flags["C_CONTIGUOUS"] and not core.flags["F_CONTIGUOUS"]:
         raise ValueError(errmsg_contiguous_buffer)
@@ -861,7 +908,7 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
     """
     if _driver.is_device_memory(obj):
         return obj, False
-    elif hasattr(obj, '__cuda_array_interface__'):
+    elif hasattr(obj, "__cuda_array_interface__"):
         return numba.cuda.as_cuda_array(obj), False
     else:
         if isinstance(obj, np.void):
@@ -873,9 +920,8 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
             # into this function (with no overhead -- copies -- for `obj`s
             # that are already `ndarray`s.
             obj = np.array(
-                obj,
-                copy=False if numpy_version < (2, 0) else None,
-                subok=True)
+                obj, copy=False if numpy_version < (2, 0) else None, subok=True
+            )
             sentry_contiguous(obj)
             devobj = from_array_like(obj, stream=stream)
         if copy:
@@ -883,13 +929,14 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
                 config.CUDA_WARN_ON_IMPLICIT_COPY
                 and not config.DISABLE_PERFORMANCE_WARNINGS
             ):
-                if (
-                    not user_explicit and
-                    (not isinstance(obj, DeviceNDArray)
-                     and isinstance(obj, np.ndarray))
+                if not user_explicit and (
+                    not isinstance(obj, DeviceNDArray)
+                    and isinstance(obj, np.ndarray)
                 ):
-                    msg = ("Host array used in CUDA kernel will incur "
-                           "copy overhead to/from device.")
+                    msg = (
+                        "Host array used in CUDA kernel will incur "
+                        "copy overhead to/from device."
+                    )
                     warn(NumbaPerformanceWarning(msg))
             devobj.copy_to_device(obj, stream=stream)
         return devobj, True
@@ -898,13 +945,16 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
 def check_array_compatibility(ary1, ary2):
     ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
     if ary1.dtype != ary2.dtype:
-        raise TypeError('incompatible dtype: %s vs. %s' %
-                        (ary1.dtype, ary2.dtype))
+        raise TypeError(
+            "incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
+        )
     if ary1sq.shape != ary2sq.shape:
-        raise ValueError('incompatible shape: %s vs. %s' %
-                         (ary1.shape, ary2.shape))
+        raise ValueError(
+            "incompatible shape: %s vs. %s" % (ary1.shape, ary2.shape)
+        )
     # We check strides only if the size is nonzero, because strides are
     # irrelevant (and can differ) for zero-length copies.
     if ary1.size and ary1sq.strides != ary2sq.strides:
-        raise ValueError('incompatible strides: %s vs. %s' %
-                         (ary1.strides, ary2.strides))
+        raise ValueError(
+            "incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
+        )

numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

numba-cuda 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl