numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ from numba.core.errors import NumbaPerformanceWarning
25
25
  from warnings import warn
26
26
 
27
27
  try:
28
- lru_cache = getattr(functools, 'lru_cache')(None)
28
+ lru_cache = getattr(functools, "lru_cache")(None)
29
29
  except AttributeError:
30
30
  # Python 3.1 or lower
31
31
  def lru_cache(func):
@@ -34,7 +34,7 @@ except AttributeError:
34
34
 
35
35
  def is_cuda_ndarray(obj):
36
36
  "Check if an object is a CUDA ndarray"
37
- return getattr(obj, '__cuda_ndarray__', False)
37
+ return getattr(obj, "__cuda_ndarray__", False)
38
38
 
39
39
 
40
40
  def verify_cuda_ndarray_interface(obj):
@@ -45,25 +45,25 @@ def verify_cuda_ndarray_interface(obj):
45
45
  if not hasattr(obj, attr):
46
46
  raise AttributeError(attr)
47
47
  if not isinstance(getattr(obj, attr), typ):
48
- raise AttributeError('%s must be of type %s' % (attr, typ))
48
+ raise AttributeError("%s must be of type %s" % (attr, typ))
49
49
 
50
- requires_attr('shape', tuple)
51
- requires_attr('strides', tuple)
52
- requires_attr('dtype', np.dtype)
53
- requires_attr('size', int)
50
+ requires_attr("shape", tuple)
51
+ requires_attr("strides", tuple)
52
+ requires_attr("dtype", np.dtype)
53
+ requires_attr("size", int)
54
54
 
55
55
 
56
56
  def require_cuda_ndarray(obj):
57
57
  "Raises ValueError is is_cuda_ndarray(obj) evaluates False"
58
58
  if not is_cuda_ndarray(obj):
59
- raise ValueError('require an cuda ndarray object')
59
+ raise ValueError("require an cuda ndarray object")
60
60
 
61
61
 
62
62
  class DeviceNDArrayBase(_devicearray.DeviceArray):
63
- """A on GPU NDArray representation
64
- """
63
+ """A on GPU NDArray representation"""
64
+
65
65
  __cuda_memory__ = True
66
- __cuda_ndarray__ = True # There must be gpu_data attribute
66
+ __cuda_ndarray__ = True # There must be gpu_data attribute
67
67
 
68
68
  def __init__(self, shape, strides, dtype, stream=0, gpu_data=None):
69
69
  """
@@ -88,9 +88,10 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
88
88
  dtype = np.dtype(dtype)
89
89
  self.ndim = len(shape)
90
90
  if len(strides) != self.ndim:
91
- raise ValueError('strides not match ndim')
92
- self._dummy = dummyarray.Array.from_desc(0, shape, strides,
93
- dtype.itemsize)
91
+ raise ValueError("strides not match ndim")
92
+ self._dummy = dummyarray.Array.from_desc(
93
+ 0, shape, strides, dtype.itemsize
94
+ )
94
95
  self.shape = tuple(shape)
95
96
  self.strides = tuple(strides)
96
97
  self.dtype = dtype
@@ -99,7 +100,8 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
99
100
  if self.size > 0:
100
101
  if gpu_data is None:
101
102
  self.alloc_size = _driver.memory_size_from_info(
102
- self.shape, self.strides, self.dtype.itemsize)
103
+ self.shape, self.strides, self.dtype.itemsize
104
+ )
103
105
  gpu_data = devices.get_context().memalloc(self.alloc_size)
104
106
  else:
105
107
  self.alloc_size = _driver.device_memory_size(gpu_data)
@@ -109,8 +111,9 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
109
111
  null = _driver.binding.CUdeviceptr(0)
110
112
  else:
111
113
  null = c_void_p(0)
112
- gpu_data = _driver.MemoryPointer(context=devices.get_context(),
113
- pointer=null, size=0)
114
+ gpu_data = _driver.MemoryPointer(
115
+ context=devices.get_context(), pointer=null, size=0
116
+ )
114
117
  self.alloc_size = 0
115
118
 
116
119
  self.gpu_data = gpu_data
@@ -130,12 +133,12 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
130
133
  ptr = 0
131
134
 
132
135
  return {
133
- 'shape': tuple(self.shape),
134
- 'strides': None if is_contiguous(self) else tuple(self.strides),
135
- 'data': (ptr, False),
136
- 'typestr': self.dtype.str,
137
- 'stream': int(self.stream) if self.stream != 0 else None,
138
- 'version': 3,
136
+ "shape": tuple(self.shape),
137
+ "strides": None if is_contiguous(self) else tuple(self.strides),
138
+ "data": (ptr, False),
139
+ "typestr": self.dtype.str,
140
+ "stream": int(self.stream) if self.stream != 0 else None,
141
+ "version": 3,
139
142
  }
140
143
 
141
144
  def bind(self, stream=0):
@@ -160,6 +163,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
160
163
  raise ValueError("invalid axes list %r" % (axes,))
161
164
  else:
162
165
  from numba.cuda.kernels.transpose import transpose
166
+
163
167
  return transpose(self)
164
168
 
165
169
  def _default_stream(self, stream):
@@ -186,20 +190,19 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
186
190
  # layouts.
187
191
 
188
192
  broadcast = 0 in self.strides
189
- if self.flags['C_CONTIGUOUS'] and not broadcast:
190
- layout = 'C'
191
- elif self.flags['F_CONTIGUOUS'] and not broadcast:
192
- layout = 'F'
193
+ if self.flags["C_CONTIGUOUS"] and not broadcast:
194
+ layout = "C"
195
+ elif self.flags["F_CONTIGUOUS"] and not broadcast:
196
+ layout = "F"
193
197
  else:
194
- layout = 'A'
198
+ layout = "A"
195
199
 
196
200
  dtype = numpy_support.from_dtype(self.dtype)
197
201
  return types.Array(dtype, self.ndim, layout)
198
202
 
199
203
  @property
200
204
  def device_ctypes_pointer(self):
201
- """Returns the ctypes pointer to the GPU data buffer
202
- """
205
+ """Returns the ctypes pointer to the GPU data buffer"""
203
206
  if self.gpu_data is None:
204
207
  if _driver.USE_NV_BINDING:
205
208
  return _driver.binding.CUdeviceptr(0)
@@ -232,13 +235,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
232
235
  # (i.e., in order to materialize a writable strided view)
233
236
  ary_core = np.array(
234
237
  ary_core,
235
- order='C' if self_core.flags['C_CONTIGUOUS'] else 'F',
238
+ order="C" if self_core.flags["C_CONTIGUOUS"] else "F",
236
239
  subok=True,
237
- copy=(not ary_core.flags['WRITEABLE'])
238
- if numpy_version < (2, 0) else None)
240
+ copy=(not ary_core.flags["WRITEABLE"])
241
+ if numpy_version < (2, 0)
242
+ else None,
243
+ )
239
244
  check_array_compatibility(self_core, ary_core)
240
- _driver.host_to_device(self, ary_core, self.alloc_size,
241
- stream=stream)
245
+ _driver.host_to_device(
246
+ self, ary_core, self.alloc_size, stream=stream
247
+ )
242
248
 
243
249
  @devices.require_context
244
250
  def copy_to_host(self, ary=None, stream=0):
@@ -264,7 +270,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
264
270
  result_array = d_arr.copy_to_host()
265
271
  """
266
272
  if any(s < 0 for s in self.strides):
267
- msg = 'D->H copy not implemented for negative strides: {}'
273
+ msg = "D->H copy not implemented for negative strides: {}"
268
274
  raise NotImplementedError(msg.format(self.strides))
269
275
  assert self.alloc_size >= 0, "Negative memory size"
270
276
  stream = self._default_stream(stream)
@@ -275,16 +281,22 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
275
281
  hostary = ary
276
282
 
277
283
  if self.alloc_size != 0:
278
- _driver.device_to_host(hostary, self, self.alloc_size,
279
- stream=stream)
284
+ _driver.device_to_host(
285
+ hostary, self, self.alloc_size, stream=stream
286
+ )
280
287
 
281
288
  if ary is None:
282
289
  if self.size == 0:
283
- hostary = np.ndarray(shape=self.shape, dtype=self.dtype,
284
- buffer=hostary)
290
+ hostary = np.ndarray(
291
+ shape=self.shape, dtype=self.dtype, buffer=hostary
292
+ )
285
293
  else:
286
- hostary = np.ndarray(shape=self.shape, dtype=self.dtype,
287
- strides=self.strides, buffer=hostary)
294
+ hostary = np.ndarray(
295
+ shape=self.shape,
296
+ dtype=self.dtype,
297
+ strides=self.strides,
298
+ buffer=hostary,
299
+ )
288
300
  return hostary
289
301
 
290
302
  def split(self, section, stream=0):
@@ -305,12 +317,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
305
317
  end = min(begin + section, self.size)
306
318
  shape = (end - begin,)
307
319
  gpu_data = self.gpu_data.view(begin * itemsize, end * itemsize)
308
- yield DeviceNDArray(shape, strides, dtype=self.dtype, stream=stream,
309
- gpu_data=gpu_data)
320
+ yield DeviceNDArray(
321
+ shape,
322
+ strides,
323
+ dtype=self.dtype,
324
+ stream=stream,
325
+ gpu_data=gpu_data,
326
+ )
310
327
 
311
328
  def as_cuda_arg(self):
312
- """Returns a device memory object that is used as the argument.
313
- """
329
+ """Returns a device memory object that is used as the argument."""
314
330
  return self.gpu_data
315
331
 
316
332
  def get_ipc_handle(self):
@@ -368,8 +384,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
368
384
  )
369
385
 
370
386
  shape[-1], rem = divmod(
371
- shape[-1] * self.dtype.itemsize,
372
- dtype.itemsize
387
+ shape[-1] * self.dtype.itemsize, dtype.itemsize
373
388
  )
374
389
 
375
390
  if rem != 0:
@@ -398,14 +413,16 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
398
413
 
399
414
 
400
415
  class DeviceRecord(DeviceNDArrayBase):
401
- '''
416
+ """
402
417
  An on-GPU record type
403
- '''
418
+ """
419
+
404
420
  def __init__(self, dtype, stream=0, gpu_data=None):
405
421
  shape = ()
406
422
  strides = ()
407
- super(DeviceRecord, self).__init__(shape, strides, dtype, stream,
408
- gpu_data)
423
+ super(DeviceRecord, self).__init__(
424
+ shape, strides, dtype, stream, gpu_data
425
+ )
409
426
 
410
427
  @property
411
428
  def flags(self):
@@ -415,7 +432,7 @@ class DeviceRecord(DeviceNDArrayBase):
415
432
  with an existing `numpy.ndarray` (as the C- and F- contiguous flags
416
433
  aren't writeable).
417
434
  """
418
- return dict(self._dummy.flags) # defensive copy
435
+ return dict(self._dummy.flags) # defensive copy
419
436
 
420
437
  @property
421
438
  def _numba_type_(self):
@@ -431,8 +448,7 @@ class DeviceRecord(DeviceNDArrayBase):
431
448
 
432
449
  @devices.require_context
433
450
  def getitem(self, item, stream=0):
434
- """Do `__getitem__(item)` with CUDA stream
435
- """
451
+ """Do `__getitem__(item)` with CUDA stream"""
436
452
  return self._do_getitem(item, stream)
437
453
 
438
454
  def _do_getitem(self, item, stream=0):
@@ -442,22 +458,24 @@ class DeviceRecord(DeviceNDArrayBase):
442
458
 
443
459
  if typ.shape == ():
444
460
  if typ.names is not None:
445
- return DeviceRecord(dtype=typ, stream=stream,
446
- gpu_data=newdata)
461
+ return DeviceRecord(dtype=typ, stream=stream, gpu_data=newdata)
447
462
  else:
448
463
  hostary = np.empty(1, dtype=typ)
449
- _driver.device_to_host(dst=hostary, src=newdata,
450
- size=typ.itemsize,
451
- stream=stream)
464
+ _driver.device_to_host(
465
+ dst=hostary, src=newdata, size=typ.itemsize, stream=stream
466
+ )
452
467
  return hostary[0]
453
468
  else:
454
- shape, strides, dtype = \
455
- prepare_shape_strides_dtype(typ.shape,
456
- None,
457
- typ.subdtype[0], 'C')
458
- return DeviceNDArray(shape=shape, strides=strides,
459
- dtype=dtype, gpu_data=newdata,
460
- stream=stream)
469
+ shape, strides, dtype = prepare_shape_strides_dtype(
470
+ typ.shape, None, typ.subdtype[0], "C"
471
+ )
472
+ return DeviceNDArray(
473
+ shape=shape,
474
+ strides=strides,
475
+ dtype=dtype,
476
+ gpu_data=newdata,
477
+ stream=stream,
478
+ )
461
479
 
462
480
  @devices.require_context
463
481
  def __setitem__(self, key, value):
@@ -465,12 +483,10 @@ class DeviceRecord(DeviceNDArrayBase):
465
483
 
466
484
  @devices.require_context
467
485
  def setitem(self, key, value, stream=0):
468
- """Do `__setitem__(key, value)` with CUDA stream
469
- """
486
+ """Do `__setitem__(key, value)` with CUDA stream"""
470
487
  return self._do_setitem(key, value, stream=stream)
471
488
 
472
489
  def _do_setitem(self, key, value, stream=0):
473
-
474
490
  stream = self._default_stream(stream)
475
491
 
476
492
  # If the record didn't have a default stream, and the user didn't
@@ -515,6 +531,7 @@ def _assign_kernel(ndim):
515
531
  @cuda.jit
516
532
  def kernel(lhs, rhs):
517
533
  lhs[()] = rhs[()]
534
+
518
535
  return kernel
519
536
 
520
537
  @cuda.jit
@@ -531,9 +548,7 @@ def _assign_kernel(ndim):
531
548
 
532
549
  # [0, :] is the to-index (into `lhs`)
533
550
  # [1, :] is the from-index (into `rhs`)
534
- idx = cuda.local.array(
535
- shape=(2, ndim),
536
- dtype=types.int64)
551
+ idx = cuda.local.array(shape=(2, ndim), dtype=types.int64)
537
552
 
538
553
  for i in range(ndim - 1, -1, -1):
539
554
  idx[0, i] = location % lhs.shape[i]
@@ -541,17 +556,19 @@ def _assign_kernel(ndim):
541
556
  location //= lhs.shape[i]
542
557
 
543
558
  lhs[to_fixed_tuple(idx[0], ndim)] = rhs[to_fixed_tuple(idx[1], ndim)]
559
+
544
560
  return kernel
545
561
 
546
562
 
547
563
  class DeviceNDArray(DeviceNDArrayBase):
548
- '''
564
+ """
549
565
  An on-GPU array type
550
- '''
566
+ """
567
+
551
568
  def is_f_contiguous(self):
552
- '''
569
+ """
553
570
  Return true if the array is Fortran-contiguous.
554
- '''
571
+ """
555
572
  return self._dummy.is_f_contig
556
573
 
557
574
  @property
@@ -562,12 +579,12 @@ class DeviceNDArray(DeviceNDArrayBase):
562
579
  with an existing `numpy.ndarray` (as the C- and F- contiguous flags
563
580
  aren't writeable).
564
581
  """
565
- return dict(self._dummy.flags) # defensive copy
582
+ return dict(self._dummy.flags) # defensive copy
566
583
 
567
584
  def is_c_contiguous(self):
568
- '''
585
+ """
569
586
  Return true if the array is C-contiguous.
570
- '''
587
+ """
571
588
  return self._dummy.is_c_contig
572
589
 
573
590
  def __array__(self, dtype=None, copy=None):
@@ -590,7 +607,7 @@ class DeviceNDArray(DeviceNDArrayBase):
590
607
  Reshape the array without changing its contents, similarly to
591
608
  :meth:`numpy.ndarray.reshape`. Example::
592
609
 
593
- d_arr = d_arr.reshape(20, 50, order='F')
610
+ d_arr = d_arr.reshape(20, 50, order="F")
594
611
  """
595
612
  if len(newshape) == 1 and isinstance(newshape[0], (tuple, list)):
596
613
  newshape = newshape[0]
@@ -598,31 +615,43 @@ class DeviceNDArray(DeviceNDArrayBase):
598
615
  cls = type(self)
599
616
  if newshape == self.shape:
600
617
  # nothing to do
601
- return cls(shape=self.shape, strides=self.strides,
602
- dtype=self.dtype, gpu_data=self.gpu_data)
618
+ return cls(
619
+ shape=self.shape,
620
+ strides=self.strides,
621
+ dtype=self.dtype,
622
+ gpu_data=self.gpu_data,
623
+ )
603
624
 
604
625
  newarr, extents = self._dummy.reshape(*newshape, **kws)
605
626
 
606
627
  if extents == [self._dummy.extent]:
607
- return cls(shape=newarr.shape, strides=newarr.strides,
608
- dtype=self.dtype, gpu_data=self.gpu_data)
628
+ return cls(
629
+ shape=newarr.shape,
630
+ strides=newarr.strides,
631
+ dtype=self.dtype,
632
+ gpu_data=self.gpu_data,
633
+ )
609
634
  else:
610
635
  raise NotImplementedError("operation requires copying")
611
636
 
612
- def ravel(self, order='C', stream=0):
613
- '''
637
+ def ravel(self, order="C", stream=0):
638
+ """
614
639
  Flattens a contiguous array without changing its contents, similar to
615
640
  :meth:`numpy.ndarray.ravel`. If the array is not contiguous, raises an
616
641
  exception.
617
- '''
642
+ """
618
643
  stream = self._default_stream(stream)
619
644
  cls = type(self)
620
645
  newarr, extents = self._dummy.ravel(order=order)
621
646
 
622
647
  if extents == [self._dummy.extent]:
623
- return cls(shape=newarr.shape, strides=newarr.strides,
624
- dtype=self.dtype, gpu_data=self.gpu_data,
625
- stream=stream)
648
+ return cls(
649
+ shape=newarr.shape,
650
+ strides=newarr.strides,
651
+ dtype=self.dtype,
652
+ gpu_data=self.gpu_data,
653
+ stream=stream,
654
+ )
626
655
 
627
656
  else:
628
657
  raise NotImplementedError("operation requires copying")
@@ -633,8 +662,7 @@ class DeviceNDArray(DeviceNDArrayBase):
633
662
 
634
663
  @devices.require_context
635
664
  def getitem(self, item, stream=0):
636
- """Do `__getitem__(item)` with CUDA stream
637
- """
665
+ """Do `__getitem__(item)` with CUDA stream"""
638
666
  return self._do_getitem(item, stream)
639
667
 
640
668
  def _do_getitem(self, item, stream=0):
@@ -649,22 +677,36 @@ class DeviceNDArray(DeviceNDArrayBase):
649
677
  if not arr.is_array:
650
678
  # Check for structured array type (record)
651
679
  if self.dtype.names is not None:
652
- return DeviceRecord(dtype=self.dtype, stream=stream,
653
- gpu_data=newdata)
680
+ return DeviceRecord(
681
+ dtype=self.dtype, stream=stream, gpu_data=newdata
682
+ )
654
683
  else:
655
684
  # Element indexing
656
685
  hostary = np.empty(1, dtype=self.dtype)
657
- _driver.device_to_host(dst=hostary, src=newdata,
658
- size=self._dummy.itemsize,
659
- stream=stream)
686
+ _driver.device_to_host(
687
+ dst=hostary,
688
+ src=newdata,
689
+ size=self._dummy.itemsize,
690
+ stream=stream,
691
+ )
660
692
  return hostary[0]
661
693
  else:
662
- return cls(shape=arr.shape, strides=arr.strides,
663
- dtype=self.dtype, gpu_data=newdata, stream=stream)
694
+ return cls(
695
+ shape=arr.shape,
696
+ strides=arr.strides,
697
+ dtype=self.dtype,
698
+ gpu_data=newdata,
699
+ stream=stream,
700
+ )
664
701
  else:
665
702
  newdata = self.gpu_data.view(*arr.extent)
666
- return cls(shape=arr.shape, strides=arr.strides,
667
- dtype=self.dtype, gpu_data=newdata, stream=stream)
703
+ return cls(
704
+ shape=arr.shape,
705
+ strides=arr.strides,
706
+ dtype=self.dtype,
707
+ gpu_data=newdata,
708
+ stream=stream,
709
+ )
668
710
 
669
711
  @devices.require_context
670
712
  def __setitem__(self, key, value):
@@ -672,12 +714,10 @@ class DeviceNDArray(DeviceNDArrayBase):
672
714
 
673
715
  @devices.require_context
674
716
  def setitem(self, key, value, stream=0):
675
- """Do `__setitem__(key, value)` with CUDA stream
676
- """
717
+ """Do `__setitem__(key, value)` with CUDA stream"""
677
718
  return self._do_setitem(key, value, stream=stream)
678
719
 
679
720
  def _do_setitem(self, key, value, stream=0):
680
-
681
721
  stream = self._default_stream(stream)
682
722
 
683
723
  # If the array didn't have a default stream, and the user didn't provide
@@ -706,23 +746,26 @@ class DeviceNDArray(DeviceNDArrayBase):
706
746
  strides=strides,
707
747
  dtype=self.dtype,
708
748
  gpu_data=newdata,
709
- stream=stream)
749
+ stream=stream,
750
+ )
710
751
 
711
752
  # (2) prepare RHS
712
753
 
713
754
  rhs, _ = auto_device(value, stream=stream, user_explicit=True)
714
755
  if rhs.ndim > lhs.ndim:
715
- raise ValueError("Can't assign %s-D array to %s-D self" % (
716
- rhs.ndim,
717
- lhs.ndim))
756
+ raise ValueError(
757
+ "Can't assign %s-D array to %s-D self" % (rhs.ndim, lhs.ndim)
758
+ )
718
759
  rhs_shape = np.ones(lhs.ndim, dtype=np.int64)
719
760
  # negative indices would not work if rhs.ndim == 0
720
- rhs_shape[lhs.ndim - rhs.ndim:] = rhs.shape
761
+ rhs_shape[lhs.ndim - rhs.ndim :] = rhs.shape
721
762
  rhs = rhs.reshape(*rhs_shape)
722
763
  for i, (l, r) in enumerate(zip(lhs.shape, rhs.shape)):
723
764
  if r != 1 and l != r:
724
- raise ValueError("Can't copy sequence with size %d to array "
725
- "axis %d with dimension %d" % ( r, i, l))
765
+ raise ValueError(
766
+ "Can't copy sequence with size %d to array "
767
+ "axis %d with dimension %d" % (r, i, l)
768
+ )
726
769
 
727
770
  # (3) do the copy
728
771
 
@@ -751,6 +794,7 @@ class IpcArrayHandle(object):
751
794
  some_code(ipc_array)
752
795
  # ipc_array is dead at this point
753
796
  """
797
+
754
798
  def __init__(self, ipc_handle, array_desc):
755
799
  self._array_desc = array_desc
756
800
  self._ipc_handle = ipc_handle
@@ -798,8 +842,9 @@ class ManagedNDArray(DeviceNDArrayBase, np.ndarray):
798
842
 
799
843
  def from_array_like(ary, stream=0, gpu_data=None):
800
844
  "Create a DeviceNDArray object that is like ary."
801
- return DeviceNDArray(ary.shape, ary.strides, ary.dtype, stream=stream,
802
- gpu_data=gpu_data)
845
+ return DeviceNDArray(
846
+ ary.shape, ary.strides, ary.dtype, stream=stream, gpu_data=gpu_data
847
+ )
803
848
 
804
849
 
805
850
  def from_record_like(rec, stream=0, gpu_data=None):
@@ -841,15 +886,17 @@ def is_contiguous(ary):
841
886
  return True
842
887
 
843
888
 
844
- errmsg_contiguous_buffer = ("Array contains non-contiguous buffer and cannot "
845
- "be transferred as a single memory region. Please "
846
- "ensure contiguous buffer with numpy "
847
- ".ascontiguousarray()")
889
+ errmsg_contiguous_buffer = (
890
+ "Array contains non-contiguous buffer and cannot "
891
+ "be transferred as a single memory region. Please "
892
+ "ensure contiguous buffer with numpy "
893
+ ".ascontiguousarray()"
894
+ )
848
895
 
849
896
 
850
897
  def sentry_contiguous(ary):
851
898
  core = array_core(ary)
852
- if not core.flags['C_CONTIGUOUS'] and not core.flags['F_CONTIGUOUS']:
899
+ if not core.flags["C_CONTIGUOUS"] and not core.flags["F_CONTIGUOUS"]:
853
900
  raise ValueError(errmsg_contiguous_buffer)
854
901
 
855
902
 
@@ -861,7 +908,7 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
861
908
  """
862
909
  if _driver.is_device_memory(obj):
863
910
  return obj, False
864
- elif hasattr(obj, '__cuda_array_interface__'):
911
+ elif hasattr(obj, "__cuda_array_interface__"):
865
912
  return numba.cuda.as_cuda_array(obj), False
866
913
  else:
867
914
  if isinstance(obj, np.void):
@@ -873,9 +920,8 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
873
920
  # into this function (with no overhead -- copies -- for `obj`s
874
921
  # that are already `ndarray`s.
875
922
  obj = np.array(
876
- obj,
877
- copy=False if numpy_version < (2, 0) else None,
878
- subok=True)
923
+ obj, copy=False if numpy_version < (2, 0) else None, subok=True
924
+ )
879
925
  sentry_contiguous(obj)
880
926
  devobj = from_array_like(obj, stream=stream)
881
927
  if copy:
@@ -883,13 +929,14 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
883
929
  config.CUDA_WARN_ON_IMPLICIT_COPY
884
930
  and not config.DISABLE_PERFORMANCE_WARNINGS
885
931
  ):
886
- if (
887
- not user_explicit and
888
- (not isinstance(obj, DeviceNDArray)
889
- and isinstance(obj, np.ndarray))
932
+ if not user_explicit and (
933
+ not isinstance(obj, DeviceNDArray)
934
+ and isinstance(obj, np.ndarray)
890
935
  ):
891
- msg = ("Host array used in CUDA kernel will incur "
892
- "copy overhead to/from device.")
936
+ msg = (
937
+ "Host array used in CUDA kernel will incur "
938
+ "copy overhead to/from device."
939
+ )
893
940
  warn(NumbaPerformanceWarning(msg))
894
941
  devobj.copy_to_device(obj, stream=stream)
895
942
  return devobj, True
@@ -898,13 +945,16 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
898
945
  def check_array_compatibility(ary1, ary2):
899
946
  ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
900
947
  if ary1.dtype != ary2.dtype:
901
- raise TypeError('incompatible dtype: %s vs. %s' %
902
- (ary1.dtype, ary2.dtype))
948
+ raise TypeError(
949
+ "incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
950
+ )
903
951
  if ary1sq.shape != ary2sq.shape:
904
- raise ValueError('incompatible shape: %s vs. %s' %
905
- (ary1.shape, ary2.shape))
952
+ raise ValueError(
953
+ "incompatible shape: %s vs. %s" % (ary1.shape, ary2.shape)
954
+ )
906
955
  # We check strides only if the size is nonzero, because strides are
907
956
  # irrelevant (and can differ) for zero-length copies.
908
957
  if ary1.size and ary1sq.strides != ary2sq.strides:
909
- raise ValueError('incompatible strides: %s vs. %s' %
910
- (ary1.strides, ary2.strides))
958
+ raise ValueError(
959
+ "incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
960
+ )