PyPI - numba-cuda - Versions diffs - 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

numba-cuda 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

_numba_cuda_redirector.py +17 -13
numba_cuda/VERSION +1 -1
numba_cuda/_version.py +4 -1
numba_cuda/numba/cuda/__init__.py +6 -2
numba_cuda/numba/cuda/api.py +129 -86
numba_cuda/numba/cuda/api_util.py +3 -3
numba_cuda/numba/cuda/args.py +12 -16
numba_cuda/numba/cuda/cg.py +6 -6
numba_cuda/numba/cuda/codegen.py +74 -43
numba_cuda/numba/cuda/compiler.py +246 -114
numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
numba_cuda/numba/cuda/cuda_paths.py +293 -99
numba_cuda/numba/cuda/cudadecl.py +93 -79
numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
numba_cuda/numba/cuda/cudadrv/error.py +6 -2
numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
numba_cuda/numba/cuda/cudaimpl.py +296 -275
numba_cuda/numba/cuda/cudamath.py +1 -1
numba_cuda/numba/cuda/debuginfo.py +99 -7
numba_cuda/numba/cuda/decorators.py +87 -45
numba_cuda/numba/cuda/descriptor.py +1 -1
numba_cuda/numba/cuda/device_init.py +68 -18
numba_cuda/numba/cuda/deviceufunc.py +143 -98
numba_cuda/numba/cuda/dispatcher.py +300 -213
numba_cuda/numba/cuda/errors.py +13 -10
numba_cuda/numba/cuda/extending.py +55 -1
numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
numba_cuda/numba/cuda/initialize.py +5 -3
numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
numba_cuda/numba/cuda/intrinsics.py +203 -28
numba_cuda/numba/cuda/kernels/reduction.py +13 -13
numba_cuda/numba/cuda/kernels/transpose.py +3 -6
numba_cuda/numba/cuda/libdevice.py +317 -317
numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
numba_cuda/numba/cuda/locks.py +16 -0
numba_cuda/numba/cuda/lowering.py +43 -0
numba_cuda/numba/cuda/mathimpl.py +62 -57
numba_cuda/numba/cuda/models.py +1 -5
numba_cuda/numba/cuda/nvvmutils.py +103 -88
numba_cuda/numba/cuda/printimpl.py +9 -5
numba_cuda/numba/cuda/random.py +46 -36
numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
numba_cuda/numba/cuda/runtime/__init__.py +1 -1
numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
numba_cuda/numba/cuda/runtime/nrt.py +48 -43
numba_cuda/numba/cuda/simulator/__init__.py +22 -12
numba_cuda/numba/cuda/simulator/api.py +38 -22
numba_cuda/numba/cuda/simulator/compiler.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
numba_cuda/numba/cuda/simulator/kernel.py +43 -34
numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
numba_cuda/numba/cuda/simulator/reduction.py +1 -0
numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
numba_cuda/numba/cuda/simulator_init.py +2 -4
numba_cuda/numba/cuda/stubs.py +134 -108
numba_cuda/numba/cuda/target.py +92 -47
numba_cuda/numba/cuda/testing.py +24 -19
numba_cuda/numba/cuda/tests/__init__.py +14 -12
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
numba_cuda/numba/cuda/types.py +5 -2
numba_cuda/numba/cuda/ufuncs.py +382 -362
numba_cuda/numba/cuda/utils.py +2 -2
numba_cuda/numba/cuda/vector_types.py +5 -3
numba_cuda/numba/cuda/vectorizers.py +38 -33
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
numba_cuda-0.10.0.dist-info/RECORD +263 -0
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
numba_cuda-0.8.1.dist-info/RECORD +0 -251
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0

_numba_cuda_redirector.py CHANGED Viewed

@@ -4,11 +4,14 @@ import pathlib
 import sys
 import warnings
-multiple_locations_msg = ("Multiple submodule search locations for {}. "
-                          "Cannot redirect numba.cuda to numba_cuda")
+multiple_locations_msg = (
+    "Multiple submodule search locations for {}. "
+    "Cannot redirect numba.cuda to numba_cuda"
+)
-no_spec_msg = ("Couldn't get spec for {}. "
-               "Cannot redirect numba.cuda to numba_cuda")
+no_spec_msg = (
+    "Couldn't get spec for {}. Cannot redirect numba.cuda to numba_cuda"
+)
 class NumbaCudaFinder(importlib.abc.MetaPathFinder):
@@ -19,17 +22,17 @@ class NumbaCudaFinder(importlib.abc.MetaPathFinder):
         if self.initialized is not None:
             return self.initialized
-        numba_spec = importlib.util.find_spec('numba')
+        numba_spec = importlib.util.find_spec("numba")
         if numba_spec is None:
-            warnings.warn(no_spec_msg.format('numba'))
+            warnings.warn(no_spec_msg.format("numba"))
             self.initialized = False
             return False
-        numba_cuda_spec = importlib.util.find_spec('numba_cuda')
+        numba_cuda_spec = importlib.util.find_spec("numba_cuda")
         if numba_spec is None:
-            warnings.warn(no_spec_msg.format('numba_cuda'))
+            warnings.warn(no_spec_msg.format("numba_cuda"))
             self.initialized = False
             return False
@@ -37,19 +40,19 @@ class NumbaCudaFinder(importlib.abc.MetaPathFinder):
         numba_cuda_search_locations = numba_cuda_spec.submodule_search_locations
         if len(numba_search_locations) != 1:
-            warnings.warn(multiple_locations_msg.format('numba'))
+            warnings.warn(multiple_locations_msg.format("numba"))
             self.initialized = False
             return False
         if len(numba_cuda_search_locations) != 1:
-            warnings.warn(multiple_locations_msg.format('numba_cuda'))
+            warnings.warn(multiple_locations_msg.format("numba_cuda"))
             self.initialized = False
             return False
         self.numba_path = numba_search_locations[0]
         location = numba_cuda_search_locations[0]
-        self.numba_cuda_path = str((pathlib.Path(location) / 'numba'))
+        self.numba_cuda_path = str((pathlib.Path(location) / "numba"))
         self.initialized = True
         return True
@@ -64,8 +67,9 @@ class NumbaCudaFinder(importlib.abc.MetaPathFinder):
                 # Re-entrancy - return and carry on
                 return None
-            oot_path = [p.replace(self.numba_path, self.numba_cuda_path)
-                        for p in path]
+            oot_path = [
+                p.replace(self.numba_path, self.numba_cuda_path) for p in path
+            ]
             for finder in sys.meta_path:
                 try:
                     spec = finder.find_spec(name, oot_path, target)

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.8.1
1	+ 0.10.0

numba_cuda/_version.py CHANGED Viewed

@@ -15,5 +15,8 @@
 import importlib.resources
 __version__ = (
-    importlib.resources.files("numba_cuda").joinpath("VERSION").read_text().strip()
+    importlib.resources.files("numba_cuda")
+    .joinpath("VERSION")
+    .read_text()
+    .strip()
 )

numba_cuda/numba/cuda/__init__.py CHANGED Viewed

@@ -7,8 +7,12 @@ else:
     from .device_init import *
     from .device_init import _auto_device
-from numba.cuda.compiler import (compile, compile_for_current_device,
-                                 compile_ptx, compile_ptx_for_current_device)
+from numba.cuda.compiler import (
+    compile,
+    compile_for_current_device,
+    compile_ptx,
+    compile_ptx_for_current_device,
+)
 # This is the out-of-tree NVIDIA-maintained target. This is reported in Numba
 # sysinfo (`numba -s`):

numba_cuda/numba/cuda/api.py CHANGED Viewed

@@ -2,7 +2,6 @@
 API that are reported to numba.cuda
 """
 import contextlib
 import os
@@ -28,35 +27,37 @@ def from_cuda_array_interface(desc, owner=None, sync=True):
     If ``sync`` is ``True``, then the imported stream (if present) will be
     synchronized.
     """
-    version = desc.get('version')
+    version = desc.get("version")
     # Mask introduced in version 1
     if 1 <= version:
-        mask = desc.get('mask')
+        mask = desc.get("mask")
         # Would ideally be better to detect if the mask is all valid
         if mask is not None:
-            raise NotImplementedError('Masked arrays are not supported')
+            raise NotImplementedError("Masked arrays are not supported")
-    shape = desc['shape']
-    strides = desc.get('strides')
-    dtype = np.dtype(desc['typestr'])
+    shape = desc["shape"]
+    strides = desc.get("strides")
+    dtype = np.dtype(desc["typestr"])
     shape, strides, dtype = prepare_shape_strides_dtype(
-        shape, strides, dtype, order='C')
+        shape, strides, dtype, order="C"
+    )
     size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
-    devptr = driver.get_devptr_for_active_ctx(desc['data'][0])
+    devptr = driver.get_devptr_for_active_ctx(desc["data"][0])
     data = driver.MemoryPointer(
-        current_context(), devptr, size=size, owner=owner)
-    stream_ptr = desc.get('stream', None)
+        current_context(), devptr, size=size, owner=owner
+    )
+    stream_ptr = desc.get("stream", None)
     if stream_ptr is not None:
         stream = external_stream(stream_ptr)
         if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
             stream.synchronize()
     else:
-        stream = 0 # No "Numba default stream", not the CUDA default stream
-    da = devicearray.DeviceNDArray(shape=shape, strides=strides,
-                                   dtype=dtype, gpu_data=data,
-                                   stream=stream)
+        stream = 0  # No "Numba default stream", not the CUDA default stream
+    da = devicearray.DeviceNDArray(
+        shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
+    )
     return da
@@ -73,8 +74,9 @@ def as_cuda_array(obj, sync=True):
     if not is_cuda_array(obj):
         raise TypeError("*obj* doesn't implement the cuda array interface.")
     else:
-        return from_cuda_array_interface(obj.__cuda_array_interface__,
-                                         owner=obj, sync=sync)
+        return from_cuda_array_interface(
+            obj.__cuda_array_interface__, owner=obj, sync=sync
+        )
 def is_cuda_array(obj):
@@ -82,7 +84,7 @@ def is_cuda_array(obj):
     Does not verify the validity of the interface.
     """
-    return hasattr(obj, '__cuda_array_interface__')
+    return hasattr(obj, "__cuda_array_interface__")
 def is_float16_supported():
@@ -125,8 +127,9 @@ def to_device(obj, stream=0, copy=True, to=None):
         hary = d_ary.copy_to_host(stream=stream)
     """
     if to is None:
-        to, new = devicearray.auto_device(obj, stream=stream, copy=copy,
-                                          user_explicit=True)
+        to, new = devicearray.auto_device(
+            obj, stream=stream, copy=copy, user_explicit=True
+        )
         return to
     if copy:
         to.copy_to_device(obj, stream=stream)
@@ -134,20 +137,28 @@ def to_device(obj, stream=0, copy=True, to=None):
 @require_context
-def device_array(shape, dtype=np.float64, strides=None, order='C', stream=0):
+def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
     """device_array(shape, dtype=np.float64, strides=None, order='C', stream=0)
     Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
     """
-    shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
-                                                        order)
-    return devicearray.DeviceNDArray(shape=shape, strides=strides, dtype=dtype,
-                                     stream=stream)
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    return devicearray.DeviceNDArray(
+        shape=shape, strides=strides, dtype=dtype, stream=stream
+    )
 @require_context
-def managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
-                  attach_global=True):
+def managed_array(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    attach_global=True,
+):
     """managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
                      attach_global=True)
@@ -163,37 +174,48 @@ def managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
                           *host*, and memory is only accessible by devices
                           with Compute Capability 6.0 and later.
     """
-    shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
-                                                        order)
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
     bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
-    buffer = current_context().memallocmanaged(bytesize,
-                                               attach_global=attach_global)
-    npary = np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order,
-                       buffer=buffer)
+    buffer = current_context().memallocmanaged(
+        bytesize, attach_global=attach_global
+    )
+    npary = np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
     managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
     managedview.device_setup(buffer, stream=stream)
     return managedview
 @require_context
-def pinned_array(shape, dtype=np.float64, strides=None, order='C'):
+def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
     """pinned_array(shape, dtype=np.float64, strides=None, order='C')
     Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
     (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
     """
-    shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
-                                                        order)
-    bytesize = driver.memory_size_from_info(shape, strides,
-                                            dtype.itemsize)
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
+    bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
     buffer = current_context().memhostalloc(bytesize)
-    return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order,
-                      buffer=buffer)
+    return np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
 @require_context
-def mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
-                 portable=False, wc=False):
+def mapped_array(
+    shape,
+    dtype=np.float64,
+    strides=None,
+    order="C",
+    stream=0,
+    portable=False,
+    wc=False,
+):
     """mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
                     portable=False, wc=False)
@@ -206,12 +228,14 @@ def mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
         to write by the host and to read by the device, but slower to
         write by the host and slower to write by the device.
     """
-    shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
-                                                        order)
+    shape, strides, dtype = prepare_shape_strides_dtype(
+        shape, strides, dtype, order
+    )
     bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
     buffer = current_context().memhostalloc(bytesize, mapped=True)
-    npary = np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order,
-                       buffer=buffer)
+    npary = np.ndarray(
+        shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
+    )
     mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
     mappedview.device_setup(buffer, stream=stream)
     return mappedview
@@ -243,8 +267,9 @@ def open_ipc_array(handle, shape, dtype, strides=None, offset=0):
         driver_handle.reserved[:] = handle
     # use *IpcHandle* to open the IPC memory
     ipchandle = driver.IpcHandle(None, driver_handle, size, offset=offset)
-    yield ipchandle.open_array(current_context(), shape=shape,
-                               strides=strides, dtype=dtype)
+    yield ipchandle.open_array(
+        current_context(), shape=shape, strides=strides, dtype=dtype
+    )
     ipchandle.close()
@@ -260,7 +285,7 @@ def _contiguous_strides_like_array(ary):
     """
     # Don't recompute strides if the default strides will be sufficient to
     # create a contiguous array.
-    if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
+    if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
         return None
     # Otherwise, we need to compute new strides using an algorithm adapted from
@@ -270,7 +295,7 @@ def _contiguous_strides_like_array(ary):
     # Stride permutation. E.g. a stride array (4, -2, 12) becomes
     # [(1, -2), (0, 4), (2, 12)]
-    strideperm = [ x for x in enumerate(ary.strides) ]
+    strideperm = [x for x in enumerate(ary.strides)]
     strideperm.sort(key=lambda x: x[1])
     # Compute new strides using permutation
@@ -283,10 +308,10 @@ def _contiguous_strides_like_array(ary):
 def _order_like_array(ary):
-    if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
-        return 'F'
+    if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
+        return "F"
     else:
-        return 'C'
+        return "C"
 def device_array_like(ary, stream=0):
@@ -296,8 +321,13 @@ def device_array_like(ary, stream=0):
     """
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
-    return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
-                        order=order, stream=stream)
+    return device_array(
+        shape=ary.shape,
+        dtype=ary.dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+    )
 def mapped_array_like(ary, stream=0, portable=False, wc=False):
@@ -307,8 +337,15 @@ def mapped_array_like(ary, stream=0, portable=False, wc=False):
     """
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
-    return mapped_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
-                        order=order, stream=stream, portable=portable, wc=wc)
+    return mapped_array(
+        shape=ary.shape,
+        dtype=ary.dtype,
+        strides=strides,
+        order=order,
+        stream=stream,
+        portable=portable,
+        wc=wc,
+    )
 def pinned_array_like(ary):
@@ -318,8 +355,9 @@ def pinned_array_like(ary):
     """
     strides = _contiguous_strides_like_array(ary)
     order = _order_like_array(ary)
-    return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
-                        order=order)
+    return pinned_array(
+        shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
+    )
 # Stream helper
@@ -373,13 +411,15 @@ def external_stream(ptr):
 @require_context
 @contextlib.contextmanager
 def pinned(*arylist):
-    """A context manager for temporary pinning a sequence of host ndarrays.
-    """
+    """A context manager for temporary pinning a sequence of host ndarrays."""
     pmlist = []
     for ary in arylist:
-        pm = current_context().mempin(ary, driver.host_pointer(ary),
-                                      driver.host_memory_size(ary),
-                                      mapped=False)
+        pm = current_context().mempin(
+            ary,
+            driver.host_pointer(ary),
+            driver.host_memory_size(ary),
+            mapped=False,
+        )
         pmlist.append(pm)
     yield
@@ -387,16 +427,18 @@ def pinned(*arylist):
 @require_context
 @contextlib.contextmanager
 def mapped(*arylist, **kws):
-    """A context manager for temporarily mapping a sequence of host ndarrays.
-    """
-    assert not kws or 'stream' in kws, "Only accept 'stream' as keyword."
-    stream = kws.get('stream', 0)
+    """A context manager for temporarily mapping a sequence of host ndarrays."""
+    assert not kws or "stream" in kws, "Only accept 'stream' as keyword."
+    stream = kws.get("stream", 0)
     pmlist = []
     devarylist = []
     for ary in arylist:
-        pm = current_context().mempin(ary, driver.host_pointer(ary),
-                                      driver.host_memory_size(ary),
-                                      mapped=True)
+        pm = current_context().mempin(
+            ary,
+            driver.host_pointer(ary),
+            driver.host_memory_size(ary),
+            mapped=True,
+        )
         pmlist.append(pm)
         devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream)
         devarylist.append(devary)
@@ -427,6 +469,7 @@ event_elapsed_time = driver.event_elapsed_time
 # Device selection
 def select_device(device_id):
     """
     Make the context associated with device *device_id* the current context.
@@ -468,7 +511,7 @@ def detect():
     Returns a boolean indicating whether any supported devices were detected.
     """
     devlist = list_devices()
-    print('Found %d CUDA devices' % len(devlist))
+    print("Found %d CUDA devices" % len(devlist))
     supported_count = 0
     for dev in devlist:
         attrs = []
@@ -476,29 +519,29 @@ def detect():
         kernel_timeout = dev.KERNEL_EXEC_TIMEOUT
         tcc = dev.TCC_DRIVER
         fp32_to_fp64_ratio = dev.SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO
-        attrs += [('Compute Capability', '%d.%d' % cc)]
-        attrs += [('PCI Device ID', dev.PCI_DEVICE_ID)]
-        attrs += [('PCI Bus ID', dev.PCI_BUS_ID)]
-        attrs += [('UUID', dev.uuid)]
-        attrs += [('Watchdog', 'Enabled' if kernel_timeout else 'Disabled')]
+        attrs += [("Compute Capability", "%d.%d" % cc)]
+        attrs += [("PCI Device ID", dev.PCI_DEVICE_ID)]
+        attrs += [("PCI Bus ID", dev.PCI_BUS_ID)]
+        attrs += [("UUID", dev.uuid)]
+        attrs += [("Watchdog", "Enabled" if kernel_timeout else "Disabled")]
         if os.name == "nt":
-            attrs += [('Compute Mode', 'TCC' if tcc else 'WDDM')]
-        attrs += [('FP32/FP64 Performance Ratio', fp32_to_fp64_ratio)]
+            attrs += [("Compute Mode", "TCC" if tcc else "WDDM")]
+        attrs += [("FP32/FP64 Performance Ratio", fp32_to_fp64_ratio)]
         if cc < (3, 5):
-            support = '[NOT SUPPORTED: CC < 3.5]'
+            support = "[NOT SUPPORTED: CC < 3.5]"
         elif cc < (5, 0):
-            support = '[SUPPORTED (DEPRECATED)]'
+            support = "[SUPPORTED (DEPRECATED)]"
             supported_count += 1
         else:
-            support = '[SUPPORTED]'
+            support = "[SUPPORTED]"
             supported_count += 1
-        print('id %d    %20s %40s' % (dev.id, dev.name, support))
+        print("id %d    %20s %40s" % (dev.id, dev.name, support))
         for key, val in attrs:
-            print('%40s: %s' % (key, val))
+            print("%40s: %s" % (key, val))
-    print('Summary:')
-    print('\t%d/%d devices are supported' % (supported_count, len(devlist)))
+    print("Summary:")
+    print("\t%d/%d devices are supported" % (supported_count, len(devlist)))
     return supported_count > 0

numba_cuda/numba/cuda/api_util.py CHANGED Viewed

@@ -17,14 +17,14 @@ def _fill_stride_by_order(shape, dtype, order):
     if nd == 0:
         return ()
     strides = [0] * nd
-    if order == 'C':
+    if order == "C":
         strides[-1] = dtype.itemsize
         for d in reversed(range(nd - 1)):
             strides[d] = strides[d + 1] * shape[d + 1]
-    elif order == 'F':
+    elif order == "F":
         strides[0] = dtype.itemsize
         for d in range(1, nd):
             strides[d] = strides[d - 1] * shape[d - 1]
     else:
-        raise ValueError('must be either C/F order')
+        raise ValueError("must be either C/F order")
     return tuple(strides)

numba_cuda/numba/cuda/args.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Hints to wrap Kernel arguments to indicate how to manage host-device
 memory transfers before & after the kernel call.
 """
 import abc
 from numba.core.typing.typeof import typeof, Purpose
@@ -31,9 +32,8 @@ class ArgHint(metaclass=abc.ABCMeta):
 class In(ArgHint):
     def to_device(self, retr, stream=0):
         from .cudadrv.devicearray import auto_device
-        devary, _ = auto_device(
-            self.value,
-            stream=stream)
+        devary, _ = auto_device(self.value, stream=stream)
         # A dummy writeback functor to keep devary alive until the kernel
         # is called.
         retr.append(lambda: devary)
@@ -43,10 +43,8 @@ class In(ArgHint):
 class Out(ArgHint):
     def to_device(self, retr, stream=0):
         from .cudadrv.devicearray import auto_device
-        devary, conv = auto_device(
-            self.value,
-            copy=False,
-            stream=stream)
+        devary, conv = auto_device(self.value, copy=False, stream=stream)
         if conv:
             retr.append(lambda: devary.copy_to_host(self.value, stream=stream))
         return devary
@@ -55,9 +53,8 @@ class Out(ArgHint):
 class InOut(ArgHint):
     def to_device(self, retr, stream=0):
         from .cudadrv.devicearray import auto_device
-        devary, conv = auto_device(
-            self.value,
-            stream=stream)
+        devary, conv = auto_device(self.value, stream=stream)
         if conv:
             retr.append(lambda: devary.copy_to_host(self.value, stream=stream))
         return devary
@@ -68,10 +65,9 @@ def wrap_arg(value, default=InOut):
 __all__ = [
-    'In',
-    'Out',
-    'InOut',
-    'ArgHint',
-    'wrap_arg',
+    "In",
+    "Out",
+    "InOut",
+    "ArgHint",
+    "wrap_arg",
 ]

numba_cuda/numba/cuda/cg.py CHANGED Viewed

@@ -26,13 +26,13 @@ def _this_grid(typingctx):
         one = context.get_constant(types.int32, 1)
         mod = builder.module
         return builder.call(
-            nvvmutils.declare_cudaCGGetIntrinsicHandle(mod),
-            (one,))
+            nvvmutils.declare_cudaCGGetIntrinsicHandle(mod), (one,)
+        )
     return sig, codegen
-@overload(this_grid, target='cuda')
+@overload(this_grid, target="cuda")
 def _ol_this_grid():
     def impl():
         return _this_grid()
@@ -48,13 +48,13 @@ def _grid_group_sync(typingctx, group):
         flags = context.get_constant(types.int32, 0)
         mod = builder.module
         return builder.call(
-            nvvmutils.declare_cudaCGSynchronize(mod),
-            (*args, flags))
+            nvvmutils.declare_cudaCGSynchronize(mod), (*args, flags)
+        )
     return sig, codegen
-@overload_method(GridGroupClass, 'sync', target='cuda')
+@overload_method(GridGroupClass, "sync", target="cuda")
 def _ol_grid_group_sync(group):
     def impl(group):
         return _grid_group_sync(group)

numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

numba-cuda 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl