numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ from numba.core.errors import NumbaPerformanceWarning
|
|
25
25
|
from warnings import warn
|
26
26
|
|
27
27
|
try:
|
28
|
-
lru_cache = getattr(functools,
|
28
|
+
lru_cache = getattr(functools, "lru_cache")(None)
|
29
29
|
except AttributeError:
|
30
30
|
# Python 3.1 or lower
|
31
31
|
def lru_cache(func):
|
@@ -34,7 +34,7 @@ except AttributeError:
|
|
34
34
|
|
35
35
|
def is_cuda_ndarray(obj):
|
36
36
|
"Check if an object is a CUDA ndarray"
|
37
|
-
return getattr(obj,
|
37
|
+
return getattr(obj, "__cuda_ndarray__", False)
|
38
38
|
|
39
39
|
|
40
40
|
def verify_cuda_ndarray_interface(obj):
|
@@ -45,25 +45,25 @@ def verify_cuda_ndarray_interface(obj):
|
|
45
45
|
if not hasattr(obj, attr):
|
46
46
|
raise AttributeError(attr)
|
47
47
|
if not isinstance(getattr(obj, attr), typ):
|
48
|
-
raise AttributeError(
|
48
|
+
raise AttributeError("%s must be of type %s" % (attr, typ))
|
49
49
|
|
50
|
-
requires_attr(
|
51
|
-
requires_attr(
|
52
|
-
requires_attr(
|
53
|
-
requires_attr(
|
50
|
+
requires_attr("shape", tuple)
|
51
|
+
requires_attr("strides", tuple)
|
52
|
+
requires_attr("dtype", np.dtype)
|
53
|
+
requires_attr("size", int)
|
54
54
|
|
55
55
|
|
56
56
|
def require_cuda_ndarray(obj):
|
57
57
|
"Raises ValueError is is_cuda_ndarray(obj) evaluates False"
|
58
58
|
if not is_cuda_ndarray(obj):
|
59
|
-
raise ValueError(
|
59
|
+
raise ValueError("require an cuda ndarray object")
|
60
60
|
|
61
61
|
|
62
62
|
class DeviceNDArrayBase(_devicearray.DeviceArray):
|
63
|
-
"""A on GPU NDArray representation
|
64
|
-
|
63
|
+
"""A on GPU NDArray representation"""
|
64
|
+
|
65
65
|
__cuda_memory__ = True
|
66
|
-
__cuda_ndarray__ = True
|
66
|
+
__cuda_ndarray__ = True # There must be gpu_data attribute
|
67
67
|
|
68
68
|
def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
|
69
69
|
"""
|
@@ -88,9 +88,10 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
88
88
|
dtype = np.dtype(dtype)
|
89
89
|
self.ndim = len(shape)
|
90
90
|
if len(strides) != self.ndim:
|
91
|
-
raise ValueError(
|
92
|
-
self._dummy = dummyarray.Array.from_desc(
|
93
|
-
|
91
|
+
raise ValueError("strides not match ndim")
|
92
|
+
self._dummy = dummyarray.Array.from_desc(
|
93
|
+
0, shape, strides, dtype.itemsize
|
94
|
+
)
|
94
95
|
self.shape = tuple(shape)
|
95
96
|
self.strides = tuple(strides)
|
96
97
|
self.dtype = dtype
|
@@ -99,7 +100,8 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
99
100
|
if self.size > 0:
|
100
101
|
if gpu_data is None:
|
101
102
|
self.alloc_size = _driver.memory_size_from_info(
|
102
|
-
self.shape, self.strides, self.dtype.itemsize
|
103
|
+
self.shape, self.strides, self.dtype.itemsize
|
104
|
+
)
|
103
105
|
gpu_data = devices.get_context().memalloc(self.alloc_size)
|
104
106
|
else:
|
105
107
|
self.alloc_size = _driver.device_memory_size(gpu_data)
|
@@ -109,8 +111,9 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
109
111
|
null = _driver.binding.CUdeviceptr(0)
|
110
112
|
else:
|
111
113
|
null = c_void_p(0)
|
112
|
-
gpu_data = _driver.MemoryPointer(
|
113
|
-
|
114
|
+
gpu_data = _driver.MemoryPointer(
|
115
|
+
context=devices.get_context(), pointer=null, size=0
|
116
|
+
)
|
114
117
|
self.alloc_size = 0
|
115
118
|
|
116
119
|
self.gpu_data = gpu_data
|
@@ -130,12 +133,12 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
130
133
|
ptr = 0
|
131
134
|
|
132
135
|
return {
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
136
|
+
"shape": tuple(self.shape),
|
137
|
+
"strides": None if is_contiguous(self) else tuple(self.strides),
|
138
|
+
"data": (ptr, False),
|
139
|
+
"typestr": self.dtype.str,
|
140
|
+
"stream": int(self.stream) if self.stream != 0 else None,
|
141
|
+
"version": 3,
|
139
142
|
}
|
140
143
|
|
141
144
|
def bind(self, stream=0):
|
@@ -160,6 +163,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
160
163
|
raise ValueError("invalid axes list %r" % (axes,))
|
161
164
|
else:
|
162
165
|
from numba.cuda.kernels.transpose import transpose
|
166
|
+
|
163
167
|
return transpose(self)
|
164
168
|
|
165
169
|
def _default_stream(self, stream):
|
@@ -186,20 +190,19 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
186
190
|
# layouts.
|
187
191
|
|
188
192
|
broadcast = 0 in self.strides
|
189
|
-
if self.flags[
|
190
|
-
layout =
|
191
|
-
elif self.flags[
|
192
|
-
layout =
|
193
|
+
if self.flags["C_CONTIGUOUS"] and not broadcast:
|
194
|
+
layout = "C"
|
195
|
+
elif self.flags["F_CONTIGUOUS"] and not broadcast:
|
196
|
+
layout = "F"
|
193
197
|
else:
|
194
|
-
layout =
|
198
|
+
layout = "A"
|
195
199
|
|
196
200
|
dtype = numpy_support.from_dtype(self.dtype)
|
197
201
|
return types.Array(dtype, self.ndim, layout)
|
198
202
|
|
199
203
|
@property
|
200
204
|
def device_ctypes_pointer(self):
|
201
|
-
"""Returns the ctypes pointer to the GPU data buffer
|
202
|
-
"""
|
205
|
+
"""Returns the ctypes pointer to the GPU data buffer"""
|
203
206
|
if self.gpu_data is None:
|
204
207
|
if _driver.USE_NV_BINDING:
|
205
208
|
return _driver.binding.CUdeviceptr(0)
|
@@ -232,13 +235,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
232
235
|
# (i.e., in order to materialize a writable strided view)
|
233
236
|
ary_core = np.array(
|
234
237
|
ary_core,
|
235
|
-
order=
|
238
|
+
order="C" if self_core.flags["C_CONTIGUOUS"] else "F",
|
236
239
|
subok=True,
|
237
|
-
copy=(not ary_core.flags[
|
238
|
-
if numpy_version < (2, 0)
|
240
|
+
copy=(not ary_core.flags["WRITEABLE"])
|
241
|
+
if numpy_version < (2, 0)
|
242
|
+
else None,
|
243
|
+
)
|
239
244
|
check_array_compatibility(self_core, ary_core)
|
240
|
-
_driver.host_to_device(
|
241
|
-
|
245
|
+
_driver.host_to_device(
|
246
|
+
self, ary_core, self.alloc_size, stream=stream
|
247
|
+
)
|
242
248
|
|
243
249
|
@devices.require_context
|
244
250
|
def copy_to_host(self, ary=None, stream=0):
|
@@ -264,7 +270,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
264
270
|
result_array = d_arr.copy_to_host()
|
265
271
|
"""
|
266
272
|
if any(s < 0 for s in self.strides):
|
267
|
-
msg =
|
273
|
+
msg = "D->H copy not implemented for negative strides: {}"
|
268
274
|
raise NotImplementedError(msg.format(self.strides))
|
269
275
|
assert self.alloc_size >= 0, "Negative memory size"
|
270
276
|
stream = self._default_stream(stream)
|
@@ -275,16 +281,22 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
275
281
|
hostary = ary
|
276
282
|
|
277
283
|
if self.alloc_size != 0:
|
278
|
-
_driver.device_to_host(
|
279
|
-
|
284
|
+
_driver.device_to_host(
|
285
|
+
hostary, self, self.alloc_size, stream=stream
|
286
|
+
)
|
280
287
|
|
281
288
|
if ary is None:
|
282
289
|
if self.size == 0:
|
283
|
-
hostary = np.ndarray(
|
284
|
-
|
290
|
+
hostary = np.ndarray(
|
291
|
+
shape=self.shape, dtype=self.dtype, buffer=hostary
|
292
|
+
)
|
285
293
|
else:
|
286
|
-
hostary = np.ndarray(
|
287
|
-
|
294
|
+
hostary = np.ndarray(
|
295
|
+
shape=self.shape,
|
296
|
+
dtype=self.dtype,
|
297
|
+
strides=self.strides,
|
298
|
+
buffer=hostary,
|
299
|
+
)
|
288
300
|
return hostary
|
289
301
|
|
290
302
|
def split(self, section, stream=0):
|
@@ -305,12 +317,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
305
317
|
end = min(begin + section, self.size)
|
306
318
|
shape = (end - begin,)
|
307
319
|
gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize)
|
308
|
-
yield DeviceNDArray(
|
309
|
-
|
320
|
+
yield DeviceNDArray(
|
321
|
+
shape,
|
322
|
+
strides,
|
323
|
+
dtype=self.dtype,
|
324
|
+
stream=stream,
|
325
|
+
gpu_data=gpu_data,
|
326
|
+
)
|
310
327
|
|
311
328
|
def as_cuda_arg(self):
|
312
|
-
"""Returns a device memory object that is used as the argument.
|
313
|
-
"""
|
329
|
+
"""Returns a device memory object that is used as the argument."""
|
314
330
|
return self.gpu_data
|
315
331
|
|
316
332
|
def get_ipc_handle(self):
|
@@ -368,8 +384,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
368
384
|
)
|
369
385
|
|
370
386
|
shape[-1], rem = divmod(
|
371
|
-
shape[-1] * self.dtype.itemsize,
|
372
|
-
dtype.itemsize
|
387
|
+
shape[-1] * self.dtype.itemsize, dtype.itemsize
|
373
388
|
)
|
374
389
|
|
375
390
|
if rem != 0:
|
@@ -398,14 +413,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
398
413
|
|
399
414
|
|
400
415
|
class DeviceRecord(DeviceNDArrayBase):
|
401
|
-
|
416
|
+
"""
|
402
417
|
An on-GPU record type
|
403
|
-
|
418
|
+
"""
|
419
|
+
|
404
420
|
def __init__(self, dtype, stream=0, gpu_data=None):
|
405
421
|
shape = ()
|
406
422
|
strides = ()
|
407
|
-
super(DeviceRecord, self).__init__(
|
408
|
-
|
423
|
+
super(DeviceRecord, self).__init__(
|
424
|
+
shape, strides, dtype, stream, gpu_data
|
425
|
+
)
|
409
426
|
|
410
427
|
@property
|
411
428
|
def flags(self):
|
@@ -415,7 +432,7 @@ class DeviceRecord(DeviceNDArrayBase):
|
|
415
432
|
with an existing `numpy.ndarray` (as the C- and F- contiguous flags
|
416
433
|
aren't writeable).
|
417
434
|
"""
|
418
|
-
return dict(self._dummy.flags)
|
435
|
+
return dict(self._dummy.flags) # defensive copy
|
419
436
|
|
420
437
|
@property
|
421
438
|
def _numba_type_(self):
|
@@ -431,8 +448,7 @@ class DeviceRecord(DeviceNDArrayBase):
|
|
431
448
|
|
432
449
|
@devices.require_context
|
433
450
|
def getitem(self, item, stream=0):
|
434
|
-
"""Do `__getitem__(item)` with CUDA stream
|
435
|
-
"""
|
451
|
+
"""Do `__getitem__(item)` with CUDA stream"""
|
436
452
|
return self._do_getitem(item, stream)
|
437
453
|
|
438
454
|
def _do_getitem(self, item, stream=0):
|
@@ -442,22 +458,24 @@ class DeviceRecord(DeviceNDArrayBase):
|
|
442
458
|
|
443
459
|
if typ.shape == ():
|
444
460
|
if typ.names is not None:
|
445
|
-
return DeviceRecord(dtype=typ, stream=stream,
|
446
|
-
gpu_data=newdata)
|
461
|
+
return DeviceRecord(dtype=typ, stream=stream, gpu_data=newdata)
|
447
462
|
else:
|
448
463
|
hostary = np.empty(1, dtype=typ)
|
449
|
-
_driver.device_to_host(
|
450
|
-
|
451
|
-
|
464
|
+
_driver.device_to_host(
|
465
|
+
dst=hostary, src=newdata, size=typ.itemsize, stream=stream
|
466
|
+
)
|
452
467
|
return hostary[0]
|
453
468
|
else:
|
454
|
-
shape, strides, dtype =
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
469
|
+
shape, strides, dtype = prepare_shape_strides_dtype(
|
470
|
+
typ.shape, None, typ.subdtype[0], "C"
|
471
|
+
)
|
472
|
+
return DeviceNDArray(
|
473
|
+
shape=shape,
|
474
|
+
strides=strides,
|
475
|
+
dtype=dtype,
|
476
|
+
gpu_data=newdata,
|
477
|
+
stream=stream,
|
478
|
+
)
|
461
479
|
|
462
480
|
@devices.require_context
|
463
481
|
def __setitem__(self, key, value):
|
@@ -465,12 +483,10 @@ class DeviceRecord(DeviceNDArrayBase):
|
|
465
483
|
|
466
484
|
@devices.require_context
|
467
485
|
def setitem(self, key, value, stream=0):
|
468
|
-
"""Do `__setitem__(key, value)` with CUDA stream
|
469
|
-
"""
|
486
|
+
"""Do `__setitem__(key, value)` with CUDA stream"""
|
470
487
|
return self._do_setitem(key, value, stream=stream)
|
471
488
|
|
472
489
|
def _do_setitem(self, key, value, stream=0):
|
473
|
-
|
474
490
|
stream = self._default_stream(stream)
|
475
491
|
|
476
492
|
# If the record didn't have a default stream, and the user didn't
|
@@ -515,6 +531,7 @@ def _assign_kernel(ndim):
|
|
515
531
|
@cuda.jit
|
516
532
|
def kernel(lhs, rhs):
|
517
533
|
lhs[()] = rhs[()]
|
534
|
+
|
518
535
|
return kernel
|
519
536
|
|
520
537
|
@cuda.jit
|
@@ -531,9 +548,7 @@ def _assign_kernel(ndim):
|
|
531
548
|
|
532
549
|
# [0, :] is the to-index (into `lhs`)
|
533
550
|
# [1, :] is the from-index (into `rhs`)
|
534
|
-
idx = cuda.local.array(
|
535
|
-
shape=(2, ndim),
|
536
|
-
dtype=types.int64)
|
551
|
+
idx = cuda.local.array(shape=(2, ndim), dtype=types.int64)
|
537
552
|
|
538
553
|
for i in range(ndim - 1, -1, -1):
|
539
554
|
idx[0, i] = location % lhs.shape[i]
|
@@ -541,17 +556,19 @@ def _assign_kernel(ndim):
|
|
541
556
|
location //= lhs.shape[i]
|
542
557
|
|
543
558
|
lhs[to_fixed_tuple(idx[0], ndim)] = rhs[to_fixed_tuple(idx[1], ndim)]
|
559
|
+
|
544
560
|
return kernel
|
545
561
|
|
546
562
|
|
547
563
|
class DeviceNDArray(DeviceNDArrayBase):
|
548
|
-
|
564
|
+
"""
|
549
565
|
An on-GPU array type
|
550
|
-
|
566
|
+
"""
|
567
|
+
|
551
568
|
def is_f_contiguous(self):
|
552
|
-
|
569
|
+
"""
|
553
570
|
Return true if the array is Fortran-contiguous.
|
554
|
-
|
571
|
+
"""
|
555
572
|
return self._dummy.is_f_contig
|
556
573
|
|
557
574
|
@property
|
@@ -562,12 +579,12 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
562
579
|
with an existing `numpy.ndarray` (as the C- and F- contiguous flags
|
563
580
|
aren't writeable).
|
564
581
|
"""
|
565
|
-
return dict(self._dummy.flags)
|
582
|
+
return dict(self._dummy.flags) # defensive copy
|
566
583
|
|
567
584
|
def is_c_contiguous(self):
|
568
|
-
|
585
|
+
"""
|
569
586
|
Return true if the array is C-contiguous.
|
570
|
-
|
587
|
+
"""
|
571
588
|
return self._dummy.is_c_contig
|
572
589
|
|
573
590
|
def __array__(self, dtype=None, copy=None):
|
@@ -590,7 +607,7 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
590
607
|
Reshape the array without changing its contents, similarly to
|
591
608
|
:meth:`numpy.ndarray.reshape`. Example::
|
592
609
|
|
593
|
-
d_arr = d_arr.reshape(20, 50, order=
|
610
|
+
d_arr = d_arr.reshape(20, 50, order="F")
|
594
611
|
"""
|
595
612
|
if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)):
|
596
613
|
newshape = newshape[0]
|
@@ -598,31 +615,43 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
598
615
|
cls = type(self)
|
599
616
|
if newshape == self.shape:
|
600
617
|
# nothing to do
|
601
|
-
return cls(
|
602
|
-
|
618
|
+
return cls(
|
619
|
+
shape=self.shape,
|
620
|
+
strides=self.strides,
|
621
|
+
dtype=self.dtype,
|
622
|
+
gpu_data=self.gpu_data,
|
623
|
+
)
|
603
624
|
|
604
625
|
newarr, extents = self._dummy.reshape(*newshape, **kws)
|
605
626
|
|
606
627
|
if extents == [self._dummy.extent]:
|
607
|
-
return cls(
|
608
|
-
|
628
|
+
return cls(
|
629
|
+
shape=newarr.shape,
|
630
|
+
strides=newarr.strides,
|
631
|
+
dtype=self.dtype,
|
632
|
+
gpu_data=self.gpu_data,
|
633
|
+
)
|
609
634
|
else:
|
610
635
|
raise NotImplementedError("operation requires copying")
|
611
636
|
|
612
|
-
def ravel(self, order=
|
613
|
-
|
637
|
+
def ravel(self, order="C", stream=0):
|
638
|
+
"""
|
614
639
|
Flattens a contiguous array without changing its contents, similar to
|
615
640
|
:meth:`numpy.ndarray.ravel`. If the array is not contiguous, raises an
|
616
641
|
exception.
|
617
|
-
|
642
|
+
"""
|
618
643
|
stream = self._default_stream(stream)
|
619
644
|
cls = type(self)
|
620
645
|
newarr, extents = self._dummy.ravel(order=order)
|
621
646
|
|
622
647
|
if extents == [self._dummy.extent]:
|
623
|
-
return cls(
|
624
|
-
|
625
|
-
|
648
|
+
return cls(
|
649
|
+
shape=newarr.shape,
|
650
|
+
strides=newarr.strides,
|
651
|
+
dtype=self.dtype,
|
652
|
+
gpu_data=self.gpu_data,
|
653
|
+
stream=stream,
|
654
|
+
)
|
626
655
|
|
627
656
|
else:
|
628
657
|
raise NotImplementedError("operation requires copying")
|
@@ -633,8 +662,7 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
633
662
|
|
634
663
|
@devices.require_context
|
635
664
|
def getitem(self, item, stream=0):
|
636
|
-
"""Do `__getitem__(item)` with CUDA stream
|
637
|
-
"""
|
665
|
+
"""Do `__getitem__(item)` with CUDA stream"""
|
638
666
|
return self._do_getitem(item, stream)
|
639
667
|
|
640
668
|
def _do_getitem(self, item, stream=0):
|
@@ -649,22 +677,36 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
649
677
|
if not arr.is_array:
|
650
678
|
# Check for structured array type (record)
|
651
679
|
if self.dtype.names is not None:
|
652
|
-
return DeviceRecord(
|
653
|
-
|
680
|
+
return DeviceRecord(
|
681
|
+
dtype=self.dtype, stream=stream, gpu_data=newdata
|
682
|
+
)
|
654
683
|
else:
|
655
684
|
# Element indexing
|
656
685
|
hostary = np.empty(1, dtype=self.dtype)
|
657
|
-
_driver.device_to_host(
|
658
|
-
|
659
|
-
|
686
|
+
_driver.device_to_host(
|
687
|
+
dst=hostary,
|
688
|
+
src=newdata,
|
689
|
+
size=self._dummy.itemsize,
|
690
|
+
stream=stream,
|
691
|
+
)
|
660
692
|
return hostary[0]
|
661
693
|
else:
|
662
|
-
return cls(
|
663
|
-
|
694
|
+
return cls(
|
695
|
+
shape=arr.shape,
|
696
|
+
strides=arr.strides,
|
697
|
+
dtype=self.dtype,
|
698
|
+
gpu_data=newdata,
|
699
|
+
stream=stream,
|
700
|
+
)
|
664
701
|
else:
|
665
702
|
newdata = self.gpu_data.view(*arr.extent)
|
666
|
-
return cls(
|
667
|
-
|
703
|
+
return cls(
|
704
|
+
shape=arr.shape,
|
705
|
+
strides=arr.strides,
|
706
|
+
dtype=self.dtype,
|
707
|
+
gpu_data=newdata,
|
708
|
+
stream=stream,
|
709
|
+
)
|
668
710
|
|
669
711
|
@devices.require_context
|
670
712
|
def __setitem__(self, key, value):
|
@@ -672,12 +714,10 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
672
714
|
|
673
715
|
@devices.require_context
|
674
716
|
def setitem(self, key, value, stream=0):
|
675
|
-
"""Do `__setitem__(key, value)` with CUDA stream
|
676
|
-
"""
|
717
|
+
"""Do `__setitem__(key, value)` with CUDA stream"""
|
677
718
|
return self._do_setitem(key, value, stream=stream)
|
678
719
|
|
679
720
|
def _do_setitem(self, key, value, stream=0):
|
680
|
-
|
681
721
|
stream = self._default_stream(stream)
|
682
722
|
|
683
723
|
# If the array didn't have a default stream, and the user didn't provide
|
@@ -706,23 +746,26 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
706
746
|
strides=strides,
|
707
747
|
dtype=self.dtype,
|
708
748
|
gpu_data=newdata,
|
709
|
-
stream=stream
|
749
|
+
stream=stream,
|
750
|
+
)
|
710
751
|
|
711
752
|
# (2) prepare RHS
|
712
753
|
|
713
754
|
rhs, _ = auto_device(value, stream=stream, user_explicit=True)
|
714
755
|
if rhs.ndim > lhs.ndim:
|
715
|
-
raise ValueError(
|
716
|
-
rhs.ndim,
|
717
|
-
|
756
|
+
raise ValueError(
|
757
|
+
"Can't assign %s-D array to %s-D self" % (rhs.ndim, lhs.ndim)
|
758
|
+
)
|
718
759
|
rhs_shape = np.ones(lhs.ndim, dtype=np.int64)
|
719
760
|
# negative indices would not work if rhs.ndim == 0
|
720
|
-
rhs_shape[lhs.ndim - rhs.ndim:] = rhs.shape
|
761
|
+
rhs_shape[lhs.ndim - rhs.ndim :] = rhs.shape
|
721
762
|
rhs = rhs.reshape(*rhs_shape)
|
722
763
|
for i, (l, r) in enumerate(zip(lhs.shape, rhs.shape)):
|
723
764
|
if r != 1 and l != r:
|
724
|
-
raise ValueError(
|
725
|
-
|
765
|
+
raise ValueError(
|
766
|
+
"Can't copy sequence with size %d to array "
|
767
|
+
"axis %d with dimension %d" % (r, i, l)
|
768
|
+
)
|
726
769
|
|
727
770
|
# (3) do the copy
|
728
771
|
|
@@ -751,6 +794,7 @@ class IpcArrayHandle(object):
|
|
751
794
|
some_code(ipc_array)
|
752
795
|
# ipc_array is dead at this point
|
753
796
|
"""
|
797
|
+
|
754
798
|
def __init__(self, ipc_handle, array_desc):
|
755
799
|
self._array_desc = array_desc
|
756
800
|
self._ipc_handle = ipc_handle
|
@@ -798,8 +842,9 @@ class ManagedNDArray(DeviceNDArrayBase, np.ndarray):
|
|
798
842
|
|
799
843
|
def from_array_like(ary, stream=0, gpu_data=None):
|
800
844
|
"Create a DeviceNDArray object that is like ary."
|
801
|
-
return DeviceNDArray(
|
802
|
-
|
845
|
+
return DeviceNDArray(
|
846
|
+
ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
|
847
|
+
)
|
803
848
|
|
804
849
|
|
805
850
|
def from_record_like(rec, stream=0, gpu_data=None):
|
@@ -841,15 +886,17 @@ def is_contiguous(ary):
|
|
841
886
|
return True
|
842
887
|
|
843
888
|
|
844
|
-
errmsg_contiguous_buffer = (
|
845
|
-
|
846
|
-
|
847
|
-
|
889
|
+
errmsg_contiguous_buffer = (
|
890
|
+
"Array contains non-contiguous buffer and cannot "
|
891
|
+
"be transferred as a single memory region. Please "
|
892
|
+
"ensure contiguous buffer with numpy "
|
893
|
+
".ascontiguousarray()"
|
894
|
+
)
|
848
895
|
|
849
896
|
|
850
897
|
def sentry_contiguous(ary):
|
851
898
|
core = array_core(ary)
|
852
|
-
if not core.flags[
|
899
|
+
if not core.flags["C_CONTIGUOUS"] and not core.flags["F_CONTIGUOUS"]:
|
853
900
|
raise ValueError(errmsg_contiguous_buffer)
|
854
901
|
|
855
902
|
|
@@ -861,7 +908,7 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
|
|
861
908
|
"""
|
862
909
|
if _driver.is_device_memory(obj):
|
863
910
|
return obj, False
|
864
|
-
elif hasattr(obj,
|
911
|
+
elif hasattr(obj, "__cuda_array_interface__"):
|
865
912
|
return numba.cuda.as_cuda_array(obj), False
|
866
913
|
else:
|
867
914
|
if isinstance(obj, np.void):
|
@@ -873,9 +920,8 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
|
|
873
920
|
# into this function (with no overhead -- copies -- for `obj`s
|
874
921
|
# that are already `ndarray`s.
|
875
922
|
obj = np.array(
|
876
|
-
obj,
|
877
|
-
|
878
|
-
subok=True)
|
923
|
+
obj, copy=False if numpy_version < (2, 0) else None, subok=True
|
924
|
+
)
|
879
925
|
sentry_contiguous(obj)
|
880
926
|
devobj = from_array_like(obj, stream=stream)
|
881
927
|
if copy:
|
@@ -883,13 +929,14 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
|
|
883
929
|
config.CUDA_WARN_ON_IMPLICIT_COPY
|
884
930
|
and not config.DISABLE_PERFORMANCE_WARNINGS
|
885
931
|
):
|
886
|
-
if (
|
887
|
-
not
|
888
|
-
|
889
|
-
and isinstance(obj, np.ndarray))
|
932
|
+
if not user_explicit and (
|
933
|
+
not isinstance(obj, DeviceNDArray)
|
934
|
+
and isinstance(obj, np.ndarray)
|
890
935
|
):
|
891
|
-
msg = (
|
892
|
-
|
936
|
+
msg = (
|
937
|
+
"Host array used in CUDA kernel will incur "
|
938
|
+
"copy overhead to/from device."
|
939
|
+
)
|
893
940
|
warn(NumbaPerformanceWarning(msg))
|
894
941
|
devobj.copy_to_device(obj, stream=stream)
|
895
942
|
return devobj, True
|
@@ -898,13 +945,16 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
|
|
898
945
|
def check_array_compatibility(ary1, ary2):
|
899
946
|
ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
|
900
947
|
if ary1.dtype != ary2.dtype:
|
901
|
-
raise TypeError(
|
902
|
-
|
948
|
+
raise TypeError(
|
949
|
+
"incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
|
950
|
+
)
|
903
951
|
if ary1sq.shape != ary2sq.shape:
|
904
|
-
raise ValueError(
|
905
|
-
|
952
|
+
raise ValueError(
|
953
|
+
"incompatible shape: %s vs. %s" % (ary1.shape, ary2.shape)
|
954
|
+
)
|
906
955
|
# We check strides only if the size is nonzero, because strides are
|
907
956
|
# irrelevant (and can differ) for zero-length copies.
|
908
957
|
if ary1.size and ary1sq.strides != ary2sq.strides:
|
909
|
-
raise ValueError(
|
910
|
-
|
958
|
+
raise ValueError(
|
959
|
+
"incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
|
960
|
+
)
|