numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ class CudaArrayIndexing(CUDATestCase):
11
11
  def test_index_1d(self):
12
12
  arr = np.arange(10)
13
13
  darr = cuda.to_device(arr)
14
- x, = arr.shape
14
+ (x,) = arr.shape
15
15
  for i in range(-x, x):
16
16
  self.assertEqual(arr[i], darr[i])
17
17
  with self.assertRaises(IndexError):
@@ -58,7 +58,6 @@ class CudaArrayIndexing(CUDATestCase):
58
58
 
59
59
 
60
60
  class CudaArrayStridedSlice(CUDATestCase):
61
-
62
61
  def test_strided_index_1d(self):
63
62
  arr = np.arange(10)
64
63
  darr = cuda.to_device(arr)
@@ -71,8 +70,9 @@ class CudaArrayStridedSlice(CUDATestCase):
71
70
 
72
71
  for i in range(arr.shape[0]):
73
72
  for j in range(arr.shape[1]):
74
- np.testing.assert_equal(arr[i::2, j::2],
75
- darr[i::2, j::2].copy_to_host())
73
+ np.testing.assert_equal(
74
+ arr[i::2, j::2], darr[i::2, j::2].copy_to_host()
75
+ )
76
76
 
77
77
  def test_strided_index_3d(self):
78
78
  arr = np.arange(6 * 7 * 8).reshape(6, 7, 8)
@@ -83,7 +83,8 @@ class CudaArrayStridedSlice(CUDATestCase):
83
83
  for k in range(arr.shape[2]):
84
84
  np.testing.assert_equal(
85
85
  arr[i::2, j::2, k::2],
86
- darr[i::2, j::2, k::2].copy_to_host())
86
+ darr[i::2, j::2, k::2].copy_to_host(),
87
+ )
87
88
 
88
89
 
89
90
  class CudaArraySlicing(CUDATestCase):
@@ -96,7 +97,7 @@ class CudaArraySlicing(CUDATestCase):
96
97
  self.assertTrue(np.all(expect == got))
97
98
 
98
99
  def test_prefix_2d(self):
99
- arr = np.arange(3 ** 2).reshape(3, 3)
100
+ arr = np.arange(3**2).reshape(3, 3)
100
101
  darr = cuda.to_device(arr)
101
102
  for i in range(arr.shape[0]):
102
103
  for j in range(arr.shape[1]):
@@ -129,39 +130,45 @@ class CudaArraySlicing(CUDATestCase):
129
130
  self.assertTrue(np.all(expect == got))
130
131
 
131
132
  def test_select_f(self):
132
- a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order='F')
133
+ a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order="F")
133
134
  da = cuda.to_device(a)
134
135
 
135
136
  for i in range(a.shape[0]):
136
137
  for j in range(a.shape[1]):
137
- self.assertTrue(np.array_equal(da[i, j, :].copy_to_host(),
138
- a[i, j, :]))
138
+ self.assertTrue(
139
+ np.array_equal(da[i, j, :].copy_to_host(), a[i, j, :])
140
+ )
139
141
  for j in range(a.shape[2]):
140
- self.assertTrue(np.array_equal(da[i, :, j].copy_to_host(),
141
- a[i, :, j]))
142
+ self.assertTrue(
143
+ np.array_equal(da[i, :, j].copy_to_host(), a[i, :, j])
144
+ )
142
145
  for i in range(a.shape[1]):
143
146
  for j in range(a.shape[2]):
144
- self.assertTrue(np.array_equal(da[:, i, j].copy_to_host(),
145
- a[:, i, j]))
147
+ self.assertTrue(
148
+ np.array_equal(da[:, i, j].copy_to_host(), a[:, i, j])
149
+ )
146
150
 
147
151
  def test_select_c(self):
148
- a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order='C')
152
+ a = np.arange(5 * 6 * 7).reshape(5, 6, 7, order="C")
149
153
  da = cuda.to_device(a)
150
154
 
151
155
  for i in range(a.shape[0]):
152
156
  for j in range(a.shape[1]):
153
- self.assertTrue(np.array_equal(da[i, j, :].copy_to_host(),
154
- a[i, j, :]))
157
+ self.assertTrue(
158
+ np.array_equal(da[i, j, :].copy_to_host(), a[i, j, :])
159
+ )
155
160
  for j in range(a.shape[2]):
156
- self.assertTrue(np.array_equal(da[i, :, j].copy_to_host(),
157
- a[i, :, j]))
161
+ self.assertTrue(
162
+ np.array_equal(da[i, :, j].copy_to_host(), a[i, :, j])
163
+ )
158
164
  for i in range(a.shape[1]):
159
165
  for j in range(a.shape[2]):
160
- self.assertTrue(np.array_equal(da[:, i, j].copy_to_host(),
161
- a[:, i, j]))
166
+ self.assertTrue(
167
+ np.array_equal(da[:, i, j].copy_to_host(), a[:, i, j])
168
+ )
162
169
 
163
170
  def test_prefix_select(self):
164
- arr = np.arange(5 * 7).reshape(5, 7, order='F')
171
+ arr = np.arange(5 * 7).reshape(5, 7, order="F")
165
172
 
166
173
  darr = cuda.to_device(arr)
167
174
  self.assertTrue(np.all(darr[:1, 1].copy_to_host() == arr[:1, 1]))
@@ -170,15 +177,15 @@ class CudaArraySlicing(CUDATestCase):
170
177
  arr = np.arange(10)
171
178
  darr = cuda.to_device(arr)
172
179
  for i, j in product(range(-10, 10), repeat=2):
173
- np.testing.assert_array_equal(arr[i:j],
174
- darr[i:j].copy_to_host())
180
+ np.testing.assert_array_equal(arr[i:j], darr[i:j].copy_to_host())
175
181
 
176
182
  def test_negative_slicing_2d(self):
177
183
  arr = np.arange(12).reshape(3, 4)
178
184
  darr = cuda.to_device(arr)
179
185
  for x, y, w, s in product(range(-4, 4), repeat=4):
180
- np.testing.assert_array_equal(arr[x:y, w:s],
181
- darr[x:y, w:s].copy_to_host())
186
+ np.testing.assert_array_equal(
187
+ arr[x:y, w:s], darr[x:y, w:s].copy_to_host()
188
+ )
182
189
 
183
190
  def test_empty_slice_1d(self):
184
191
  arr = np.arange(5)
@@ -186,12 +193,12 @@ class CudaArraySlicing(CUDATestCase):
186
193
  for i in range(darr.shape[0]):
187
194
  np.testing.assert_array_equal(darr[i:i].copy_to_host(), arr[i:i])
188
195
  # empty slice of empty slice
189
- self.assertFalse(darr[:0][:0].copy_to_host())
196
+ self.assertFalse(darr[:0][:0].copy_to_host().size > 0)
190
197
  # out-of-bound slice just produces empty slices
191
- np.testing.assert_array_equal(darr[:0][:1].copy_to_host(),
192
- arr[:0][:1])
193
- np.testing.assert_array_equal(darr[:0][-1:].copy_to_host(),
194
- arr[:0][-1:])
198
+ np.testing.assert_array_equal(darr[:0][:1].copy_to_host(), arr[:0][:1])
199
+ np.testing.assert_array_equal(
200
+ darr[:0][-1:].copy_to_host(), arr[:0][-1:]
201
+ )
195
202
 
196
203
  def test_empty_slice_2d(self):
197
204
  arr = np.arange(5 * 7).reshape(5, 7)
@@ -199,11 +206,12 @@ class CudaArraySlicing(CUDATestCase):
199
206
  np.testing.assert_array_equal(darr[:0].copy_to_host(), arr[:0])
200
207
  np.testing.assert_array_equal(darr[3, :0].copy_to_host(), arr[3, :0])
201
208
  # empty slice of empty slice
202
- self.assertFalse(darr[:0][:0].copy_to_host())
209
+ self.assertFalse(darr[:0][:0].copy_to_host().size > 0)
203
210
  # out-of-bound slice just produces empty slices
204
211
  np.testing.assert_array_equal(darr[:0][:1].copy_to_host(), arr[:0][:1])
205
- np.testing.assert_array_equal(darr[:0][-1:].copy_to_host(),
206
- arr[:0][-1:])
212
+ np.testing.assert_array_equal(
213
+ darr[:0][-1:].copy_to_host(), arr[:0][-1:]
214
+ )
207
215
 
208
216
 
209
217
  class CudaArraySetting(CUDATestCase):
@@ -292,7 +300,8 @@ class CudaArraySetting(CUDATestCase):
292
300
  "Can't assign 3-D array to 1-D self", # device
293
301
  "could not broadcast input array from shape (2,3) "
294
302
  "into shape (35,)", # simulator, NP >= 1.20
295
- ])
303
+ ],
304
+ )
296
305
 
297
306
  def test_incompatible_shape(self):
298
307
  darr = cuda.to_device(np.arange(5))
@@ -306,57 +315,67 @@ class CudaArraySetting(CUDATestCase):
306
315
  "Can't copy sequence with size 2 to array axis 0 with "
307
316
  "dimension 5", # device
308
317
  "could not broadcast input array from shape (2,) into "
309
- "shape (5,)", # simulator, NP >= 1.20
310
- ])
318
+ "shape (5,)", # simulator, NP >= 1.20
319
+ ],
320
+ )
311
321
 
312
- @skip_on_cudasim('cudasim does not use streams and operates synchronously')
322
+ @skip_on_cudasim("cudasim does not use streams and operates synchronously")
313
323
  def test_sync(self):
314
324
  # There should be a synchronization when no stream is supplied
315
325
  darr = cuda.to_device(np.arange(5))
316
326
 
317
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
318
- return_value=None) as mock_sync:
327
+ with patch.object(
328
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
329
+ ) as mock_sync:
319
330
  darr[0] = 10
320
331
 
321
332
  mock_sync.assert_called_once()
322
333
 
323
- @skip_on_cudasim('cudasim does not use streams and operates synchronously')
334
+ @skip_on_cudasim("cudasim does not use streams and operates synchronously")
324
335
  def test_no_sync_default_stream(self):
325
336
  # There should not be a synchronization when the array has a default
326
337
  # stream, whether it is the default stream, the legacy default stream,
327
338
  # the per-thread default stream, or another stream.
328
- streams = (cuda.stream(), cuda.default_stream(),
329
- cuda.legacy_default_stream(),
330
- cuda.per_thread_default_stream())
339
+ streams = (
340
+ cuda.stream(),
341
+ cuda.default_stream(),
342
+ cuda.legacy_default_stream(),
343
+ cuda.per_thread_default_stream(),
344
+ )
331
345
 
332
346
  for stream in streams:
333
347
  darr = cuda.to_device(np.arange(5), stream=stream)
334
348
 
335
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
336
- return_value=None) as mock_sync:
349
+ with patch.object(
350
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
351
+ ) as mock_sync:
337
352
  darr[0] = 10
338
353
 
339
354
  mock_sync.assert_not_called()
340
355
 
341
- @skip_on_cudasim('cudasim does not use streams and operates synchronously')
356
+ @skip_on_cudasim("cudasim does not use streams and operates synchronously")
342
357
  def test_no_sync_supplied_stream(self):
343
358
  # There should not be a synchronization when a stream is supplied for
344
359
  # the setitem call, whether it is the default stream, the legacy default
345
360
  # stream, the per-thread default stream, or another stream.
346
- streams = (cuda.stream(), cuda.default_stream(),
347
- cuda.legacy_default_stream(),
348
- cuda.per_thread_default_stream())
361
+ streams = (
362
+ cuda.stream(),
363
+ cuda.default_stream(),
364
+ cuda.legacy_default_stream(),
365
+ cuda.per_thread_default_stream(),
366
+ )
349
367
 
350
368
  for stream in streams:
351
369
  darr = cuda.to_device(np.arange(5))
352
370
 
353
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
354
- return_value=None) as mock_sync:
371
+ with patch.object(
372
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
373
+ ) as mock_sync:
355
374
  darr.setitem(0, 10, stream=stream)
356
375
 
357
376
  mock_sync.assert_not_called()
358
377
 
359
- @unittest.skip('Requires PR #6367')
378
+ @unittest.skip("Requires PR #6367")
360
379
  def test_issue_6505(self):
361
380
  # On Windows, the writes to ary_v would not be visible prior to the
362
381
  # assertion, due to the assignment being done with a kernel launch that
@@ -365,11 +384,11 @@ class CudaArraySetting(CUDATestCase):
365
384
  ary = cuda.mapped_array(2, dtype=np.int32)
366
385
  ary[:] = 0
367
386
 
368
- ary_v = ary.view('u1')
387
+ ary_v = ary.view("u1")
369
388
  ary_v[1] = 1
370
389
  ary_v[5] = 1
371
390
  self.assertEqual(sum(ary), 512)
372
391
 
373
392
 
374
- if __name__ == '__main__':
393
+ if __name__ == "__main__":
375
394
  unittest.main()
@@ -17,5 +17,5 @@ class TestCudaAutoContext(CUDATestCase):
17
17
  self.assertTrue(np.allclose(A, newA))
18
18
 
19
19
 
20
- if __name__ == '__main__':
20
+ if __name__ == "__main__":
21
21
  unittest.main()
@@ -1,7 +1,10 @@
1
1
  import numpy as np
2
2
  import ctypes
3
- from numba.cuda.cudadrv.devicearray import (DeviceRecord, from_record_like,
4
- auto_device)
3
+ from numba.cuda.cudadrv.devicearray import (
4
+ DeviceRecord,
5
+ from_record_like,
6
+ auto_device,
7
+ )
5
8
  from numba.cuda.testing import unittest, CUDATestCase
6
9
  from numba.cuda.testing import skip_on_cudasim
7
10
  from numba.np import numpy_support
@@ -11,43 +14,37 @@ N_CHARS = 5
11
14
 
12
15
  recordtype = np.dtype(
13
16
  [
14
- ('a', np.float64),
15
- ('b', np.int32),
16
- ('c', np.complex64),
17
- ('d', (np.str_, N_CHARS))
17
+ ("a", np.float64),
18
+ ("b", np.int32),
19
+ ("c", np.complex64),
20
+ ("d", (np.str_, N_CHARS)),
18
21
  ],
19
- align=True
22
+ align=True,
20
23
  )
21
24
 
22
- recordwitharray = np.dtype(
23
- [
24
- ('g', np.int32),
25
- ('h', np.float32, 2)
26
- ],
27
- align=True
28
- )
25
+ recordwitharray = np.dtype([("g", np.int32), ("h", np.float32, 2)], align=True)
29
26
 
30
- recwithmat = np.dtype([('i', np.int32),
31
- ('j', np.float32, (3, 3))])
27
+ recwithmat = np.dtype([("i", np.int32), ("j", np.float32, (3, 3))])
32
28
 
33
- recwithrecwithmat = np.dtype([('x', np.int32), ('y', recwithmat)])
29
+ recwithrecwithmat = np.dtype([("x", np.int32), ("y", recwithmat)])
34
30
 
35
31
 
36
- @skip_on_cudasim('Device Record API unsupported in the simulator')
32
+ @skip_on_cudasim("Device Record API unsupported in the simulator")
37
33
  class TestCudaDeviceRecord(CUDATestCase):
38
34
  """
39
35
  Tests the DeviceRecord class with np.void host types.
40
36
  """
37
+
41
38
  def setUp(self):
42
39
  super().setUp()
43
40
  self._create_data(np.zeros)
44
41
 
45
42
  def _create_data(self, array_ctor):
46
- self.dtype = np.dtype([('a', np.int32), ('b', np.float32)], align=True)
43
+ self.dtype = np.dtype([("a", np.int32), ("b", np.float32)], align=True)
47
44
  self.hostz = array_ctor(1, self.dtype)[0]
48
45
  self.hostnz = array_ctor(1, self.dtype)[0]
49
- self.hostnz['a'] = 10
50
- self.hostnz['b'] = 11.0
46
+ self.hostnz["a"] = 10
47
+ self.hostnz["b"] = 11.0
51
48
 
52
49
  def _check_device_record(self, reference, rec):
53
50
  self.assertEqual(rec.shape, tuple())
@@ -111,21 +108,22 @@ class TestCudaDeviceRecordWithRecord(TestCudaDeviceRecord):
111
108
  """
112
109
  Tests the DeviceRecord class with np.record host types
113
110
  """
111
+
114
112
  def setUp(self):
115
113
  CUDATestCase.setUp(self)
116
114
  self._create_data(np.recarray)
117
115
 
118
116
 
119
- @skip_on_cudasim('Structured array attr access not supported in simulator')
117
+ @skip_on_cudasim("Structured array attr access not supported in simulator")
120
118
  class TestRecordDtypeWithStructArrays(CUDATestCase):
121
- '''
119
+ """
122
120
  Test operation of device arrays on structured arrays.
123
- '''
121
+ """
124
122
 
125
123
  def _createSampleArrays(self):
126
124
  self.sample1d = cuda.device_array(3, dtype=recordtype)
127
125
  self.samplerec1darr = cuda.device_array(1, dtype=recordwitharray)[0]
128
- self.samplerecmat = cuda.device_array(1,dtype=recwithmat)[0]
126
+ self.samplerecmat = cuda.device_array(1, dtype=recwithmat)[0]
129
127
 
130
128
  def setUp(self):
131
129
  super().setUp()
@@ -134,46 +132,46 @@ class TestRecordDtypeWithStructArrays(CUDATestCase):
134
132
  ary = self.sample1d
135
133
  for i in range(ary.size):
136
134
  x = i + 1
137
- ary[i]['a'] = x / 2
138
- ary[i]['b'] = x
139
- ary[i]['c'] = x * 1j
140
- ary[i]['d'] = str(x) * N_CHARS
135
+ ary[i]["a"] = x / 2
136
+ ary[i]["b"] = x
137
+ ary[i]["c"] = x * 1j
138
+ ary[i]["d"] = str(x) * N_CHARS
141
139
 
142
140
  def test_structured_array1(self):
143
141
  ary = self.sample1d
144
142
  for i in range(self.sample1d.size):
145
143
  x = i + 1
146
- self.assertEqual(ary[i]['a'], x / 2)
147
- self.assertEqual(ary[i]['b'], x)
148
- self.assertEqual(ary[i]['c'], x * 1j)
149
- self.assertEqual(ary[i]['d'], str(x) * N_CHARS)
144
+ self.assertEqual(ary[i]["a"], x / 2)
145
+ self.assertEqual(ary[i]["b"], x)
146
+ self.assertEqual(ary[i]["c"], x * 1j)
147
+ self.assertEqual(ary[i]["d"], str(x) * N_CHARS)
150
148
 
151
149
  def test_structured_array2(self):
152
150
  ary = self.samplerec1darr
153
- ary['g'] = 2
154
- ary['h'][0] = 3.0
155
- ary['h'][1] = 4.0
156
- self.assertEqual(ary['g'], 2)
157
- self.assertEqual(ary['h'][0], 3.0)
158
- self.assertEqual(ary['h'][1], 4.0)
151
+ ary["g"] = 2
152
+ ary["h"][0] = 3.0
153
+ ary["h"][1] = 4.0
154
+ self.assertEqual(ary["g"], 2)
155
+ self.assertEqual(ary["h"][0], 3.0)
156
+ self.assertEqual(ary["h"][1], 4.0)
159
157
 
160
158
  def test_structured_array3(self):
161
159
  ary = self.samplerecmat
162
- mat = np.array([[5.0, 10.0, 15.0],
163
- [20.0, 25.0, 30.0],
164
- [35.0, 40.0, 45.0]],
165
- dtype=np.float32).reshape(3,3)
166
- ary['j'][:] = mat
167
- np.testing.assert_equal(ary['j'], mat)
160
+ mat = np.array(
161
+ [[5.0, 10.0, 15.0], [20.0, 25.0, 30.0], [35.0, 40.0, 45.0]],
162
+ dtype=np.float32,
163
+ ).reshape(3, 3)
164
+ ary["j"][:] = mat
165
+ np.testing.assert_equal(ary["j"], mat)
168
166
 
169
167
  def test_structured_array4(self):
170
168
  arr = np.zeros(1, dtype=recwithrecwithmat)
171
169
  d_arr = cuda.to_device(arr)
172
- d_arr[0]['y']['i'] = 1
173
- self.assertEqual(d_arr[0]['y']['i'], 1)
174
- d_arr[0]['y']['j'][0, 0] = 2.0
175
- self.assertEqual(d_arr[0]['y']['j'][0, 0], 2.0)
170
+ d_arr[0]["y"]["i"] = 1
171
+ self.assertEqual(d_arr[0]["y"]["i"], 1)
172
+ d_arr[0]["y"]["j"][0, 0] = 2.0
173
+ self.assertEqual(d_arr[0]["y"]["j"][0, 0], 2.0)
176
174
 
177
175
 
178
- if __name__ == '__main__':
176
+ if __name__ == "__main__":
179
177
  unittest.main()
@@ -1,13 +1,17 @@
1
1
  from ctypes import byref, c_int, c_void_p, sizeof
2
2
 
3
- from numba.cuda.cudadrv.driver import (host_to_device, device_to_host, driver,
4
- launch_kernel)
3
+ from numba.cuda.cudadrv.driver import (
4
+ host_to_device,
5
+ device_to_host,
6
+ driver,
7
+ launch_kernel,
8
+ )
5
9
  from numba.cuda.cudadrv import devices, drvapi, driver as _driver
6
10
  from numba.cuda.testing import unittest, CUDATestCase
7
11
  from numba.cuda.testing import skip_on_cudasim
8
12
 
9
13
 
10
- ptx1 = '''
14
+ ptx1 = """
11
15
  .version 1.4
12
16
  .target sm_10, map_f64_to_f32
13
17
 
@@ -29,9 +33,9 @@ $LDWbegin__Z10helloworldPi:
29
33
  exit;
30
34
  $LDWend__Z10helloworldPi:
31
35
  } // _Z10helloworldPi
32
- '''
36
+ """
33
37
 
34
- ptx2 = '''
38
+ ptx2 = """
35
39
  .version 3.0
36
40
  .target sm_20
37
41
  .address_size 64
@@ -57,10 +61,10 @@ ptx2 = '''
57
61
  .loc 2 7 2
58
62
  ret;
59
63
  }
60
- '''
64
+ """
61
65
 
62
66
 
63
- @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
67
+ @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
64
68
  class TestCudaDriver(CUDATestCase):
65
69
  def setUp(self):
66
70
  super().setUp()
@@ -79,7 +83,7 @@ class TestCudaDriver(CUDATestCase):
79
83
 
80
84
  def test_cuda_driver_basic(self):
81
85
  module = self.context.create_module_ptx(self.ptx)
82
- function = module.get_function('_Z10helloworldPi')
86
+ function = module.get_function("_Z10helloworldPi")
83
87
 
84
88
  array = (c_int * 100)()
85
89
 
@@ -93,12 +97,18 @@ class TestCudaDriver(CUDATestCase):
93
97
  ptr = c_void_p(int(ptr))
94
98
  stream = _driver.binding.CUstream(stream)
95
99
 
96
- launch_kernel(function.handle, # Kernel
97
- 1, 1, 1, # gx, gy, gz
98
- 100, 1, 1, # bx, by, bz
99
- 0, # dynamic shared mem
100
- stream, # stream
101
- [ptr]) # arguments
100
+ launch_kernel(
101
+ function.handle, # Kernel
102
+ 1,
103
+ 1,
104
+ 1, # gx, gy, gz
105
+ 100,
106
+ 1,
107
+ 1, # bx, by, bz
108
+ 0, # dynamic shared mem
109
+ stream, # stream
110
+ [ptr],
111
+ ) # arguments
102
112
 
103
113
  device_to_host(array, memory, sizeof(array))
104
114
  for i, v in enumerate(array):
@@ -108,7 +118,7 @@ class TestCudaDriver(CUDATestCase):
108
118
 
109
119
  def test_cuda_driver_stream_operations(self):
110
120
  module = self.context.create_module_ptx(self.ptx)
111
- function = module.get_function('_Z10helloworldPi')
121
+ function = module.get_function("_Z10helloworldPi")
112
122
 
113
123
  array = (c_int * 100)()
114
124
 
@@ -122,12 +132,18 @@ class TestCudaDriver(CUDATestCase):
122
132
  if _driver.USE_NV_BINDING:
123
133
  ptr = c_void_p(int(ptr))
124
134
 
125
- launch_kernel(function.handle, # Kernel
126
- 1, 1, 1, # gx, gy, gz
127
- 100, 1, 1, # bx, by, bz
128
- 0, # dynamic shared mem
129
- stream.handle, # stream
130
- [ptr]) # arguments
135
+ launch_kernel(
136
+ function.handle, # Kernel
137
+ 1,
138
+ 1,
139
+ 1, # gx, gy, gz
140
+ 100,
141
+ 1,
142
+ 1, # bx, by, bz
143
+ 0, # dynamic shared mem
144
+ stream.handle, # stream
145
+ [ptr],
146
+ ) # arguments
131
147
 
132
148
  device_to_host(array, memory, sizeof(array), stream=stream)
133
149
 
@@ -193,17 +209,19 @@ class TestCudaDriver(CUDATestCase):
193
209
 
194
210
  def test_cuda_driver_occupancy(self):
195
211
  module = self.context.create_module_ptx(self.ptx)
196
- function = module.get_function('_Z10helloworldPi')
212
+ function = module.get_function("_Z10helloworldPi")
197
213
 
198
- value = self.context.get_active_blocks_per_multiprocessor(function,
199
- 128, 128)
214
+ value = self.context.get_active_blocks_per_multiprocessor(
215
+ function, 128, 128
216
+ )
200
217
  self.assertTrue(value > 0)
201
218
 
202
219
  def b2d(bs):
203
220
  return bs
204
221
 
205
- grid, block = self.context.get_max_potential_block_size(function, b2d,
206
- 128, 128)
222
+ grid, block = self.context.get_max_potential_block_size(
223
+ function, b2d, 128, 128
224
+ )
207
225
  self.assertTrue(grid > 0)
208
226
  self.assertTrue(block > 0)
209
227
 
@@ -221,15 +239,15 @@ class TestDevice(CUDATestCase):
221
239
  # 4122) pertaining to versions and variants, so we do not extract and
222
240
  # validate the values of these bits.
223
241
 
224
- h = '[0-9a-f]{%d}'
242
+ h = "[0-9a-f]{%d}"
225
243
  h4 = h % 4
226
244
  h8 = h % 8
227
245
  h12 = h % 12
228
- uuid_format = f'^GPU-{h8}-{h4}-{h4}-{h4}-{h12}$'
246
+ uuid_format = f"^GPU-{h8}-{h4}-{h4}-{h4}-{h12}$"
229
247
 
230
248
  dev = devices.get_context().device
231
249
  self.assertRegex(dev.uuid, uuid_format)
232
250
 
233
251
 
234
- if __name__ == '__main__':
252
+ if __name__ == "__main__":
235
253
  unittest.main()
@@ -3,7 +3,7 @@ from numba.cuda.testing import skip_on_cudasim, skip_unless_conda_cudatoolkit
3
3
  from numba.misc.findlib import find_lib
4
4
 
5
5
 
6
- @skip_on_cudasim('Library detection unsupported in the simulator')
6
+ @skip_on_cudasim("Library detection unsupported in the simulator")
7
7
  @skip_unless_conda_cudatoolkit
8
8
  class TestLibraryDetection(unittest.TestCase):
9
9
  def test_detect(self):
@@ -13,10 +13,10 @@ class TestLibraryDetection(unittest.TestCase):
13
13
  PyCulib (and potentially others) rely on Numba's library finding
14
14
  capacity to find and subsequently load these libraries.
15
15
  """
16
- core_libs = ['nvvm']
16
+ core_libs = ["nvvm"]
17
17
  for l in core_libs:
18
18
  self.assertNotEqual(find_lib(l), [])
19
19
 
20
20
 
21
- if __name__ == '__main__':
21
+ if __name__ == "__main__":
22
22
  unittest.main()