numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,11 @@ import unittest
13
13
  # Signatures to test with - these are all homogeneous in dtype, so the output
14
14
  # dtype should match the input dtype - the output should not have been cast
15
15
  # upwards, as reported in #8400: https://github.com/numba/numba/issues/8400
16
- signatures = [int32(int32, int32),
17
- float32(float32, float32),
18
- float64(float64, float64)]
16
+ signatures = [
17
+ int32(int32, int32),
18
+ float32(float32, float32),
19
+ float64(float64, float64),
20
+ ]
19
21
 
20
22
  # The order here is chosen such that each subsequent dtype might have been
21
23
  # casted to a previously-used dtype. This is unlikely to be an issue for CUDA,
@@ -25,16 +27,16 @@ signatures = [int32(int32, int32),
25
27
  dtypes = (np.float64, np.float32, np.int32)
26
28
 
27
29
  # NumPy ndarray orders
28
- orders = ('C', 'F')
30
+ orders = ("C", "F")
29
31
 
30
32
  # Input sizes corresponding to operations:
31
33
  # - Less than one warp,
32
34
  # - Less than one block,
33
35
  # - Greater than one block (i.e. many blocks)
34
- input_sizes = (8, 100, 2 ** 10 + 1)
36
+ input_sizes = (8, 100, 2**10 + 1)
35
37
 
36
38
 
37
- @skip_on_cudasim('ufunc API unsupported in the simulator')
39
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
38
40
  class TestCUDAVectorize(CUDATestCase):
39
41
  # Presumably chosen as an odd number unlikely to coincide with the total
40
42
  # thread count, and large enough to ensure a significant number of blocks
@@ -42,8 +44,7 @@ class TestCUDAVectorize(CUDATestCase):
42
44
  N = 1000001
43
45
 
44
46
  def test_scalar(self):
45
-
46
- @vectorize(signatures, target='cuda')
47
+ @vectorize(signatures, target="cuda")
47
48
  def vector_add(a, b):
48
49
  return a + b
49
50
 
@@ -53,8 +54,7 @@ class TestCUDAVectorize(CUDATestCase):
53
54
  self.assertEqual(c, a + b)
54
55
 
55
56
  def test_1d(self):
56
-
57
- @vectorize(signatures, target='cuda')
57
+ @vectorize(signatures, target="cuda")
58
58
  def vector_add(a, b):
59
59
  return a + b
60
60
 
@@ -66,8 +66,7 @@ class TestCUDAVectorize(CUDATestCase):
66
66
  self.assertEqual(actual.dtype, ty)
67
67
 
68
68
  def test_1d_async(self):
69
-
70
- @vectorize(signatures, target='cuda')
69
+ @vectorize(signatures, target="cuda")
71
70
  def vector_add(a, b):
72
71
  return a + b
73
72
 
@@ -86,8 +85,7 @@ class TestCUDAVectorize(CUDATestCase):
86
85
  self.assertEqual(actual.dtype, ty)
87
86
 
88
87
  def test_nd(self):
89
-
90
- @vectorize(signatures, target='cuda')
88
+ @vectorize(signatures, target="cuda")
91
89
  def vector_add(a, b):
92
90
  return a + b
93
91
 
@@ -102,7 +100,7 @@ class TestCUDAVectorize(CUDATestCase):
102
100
  self.assertEqual(actual.dtype, dtype)
103
101
 
104
102
  def test_output_arg(self):
105
- @vectorize(signatures, target='cuda')
103
+ @vectorize(signatures, target="cuda")
106
104
  def vector_add(a, b):
107
105
  return a + b
108
106
 
@@ -117,7 +115,7 @@ class TestCUDAVectorize(CUDATestCase):
117
115
  self.assertEqual(expected.dtype, actual.dtype)
118
116
 
119
117
  def test_reduce(self):
120
- @vectorize(signatures, target='cuda')
118
+ @vectorize(signatures, target="cuda")
121
119
  def vector_add(a, b):
122
120
  return a + b
123
121
 
@@ -135,8 +133,7 @@ class TestCUDAVectorize(CUDATestCase):
135
133
  self.assertEqual(dtype, actual.dtype)
136
134
 
137
135
  def test_reduce_async(self):
138
-
139
- @vectorize(signatures, target='cuda')
136
+ @vectorize(signatures, target="cuda")
140
137
  def vector_add(a, b):
141
138
  return a + b
142
139
 
@@ -153,7 +150,7 @@ class TestCUDAVectorize(CUDATestCase):
153
150
  self.assertEqual(dtype, actual.dtype)
154
151
 
155
152
  def test_manual_transfer(self):
156
- @vectorize(signatures, target='cuda')
153
+ @vectorize(signatures, target="cuda")
157
154
  def vector_add(a, b):
158
155
  return a + b
159
156
 
@@ -166,7 +163,7 @@ class TestCUDAVectorize(CUDATestCase):
166
163
  self.assertEqual(expected.dtype, actual.dtype)
167
164
 
168
165
  def test_ufunc_output_2d(self):
169
- @vectorize(signatures, target='cuda')
166
+ @vectorize(signatures, target="cuda")
170
167
  def vector_add(a, b):
171
168
  return a + b
172
169
 
@@ -181,7 +178,7 @@ class TestCUDAVectorize(CUDATestCase):
181
178
  self.assertEqual(expected.dtype, actual.dtype)
182
179
 
183
180
  def check_tuple_arg(self, a, b):
184
- @vectorize(signatures, target='cuda')
181
+ @vectorize(signatures, target="cuda")
185
182
  def vector_add(a, b):
186
183
  return a + b
187
184
 
@@ -194,7 +191,7 @@ class TestCUDAVectorize(CUDATestCase):
194
191
  self.check_tuple_arg(a, b)
195
192
 
196
193
  def test_namedtuple_arg(self):
197
- Point = namedtuple('Point', ('x', 'y', 'z'))
194
+ Point = namedtuple("Point", ("x", "y", "z"))
198
195
  a = Point(x=1.0, y=2.0, z=3.0)
199
196
  b = Point(x=4.0, y=5.0, z=6.0)
200
197
  self.check_tuple_arg(a, b)
@@ -206,7 +203,7 @@ class TestCUDAVectorize(CUDATestCase):
206
203
  self.check_tuple_arg(a, b)
207
204
 
208
205
  def test_tuple_of_namedtuple_arg(self):
209
- Point = namedtuple('Point', ('x', 'y', 'z'))
206
+ Point = namedtuple("Point", ("x", "y", "z"))
210
207
  a = (Point(x=1.0, y=2.0, z=3.0), Point(x=1.5, y=2.5, z=3.5))
211
208
  b = (Point(x=4.0, y=5.0, z=6.0), Point(x=4.5, y=5.5, z=6.5))
212
209
  self.check_tuple_arg(a, b)
@@ -216,17 +213,17 @@ class TestCUDAVectorize(CUDATestCase):
216
213
  ys1 = xs1 + 2
217
214
  xs2 = np.arange(10, dtype=np.int32) * 2
218
215
  ys2 = xs2 + 1
219
- Points = namedtuple('Points', ('xs', 'ys'))
216
+ Points = namedtuple("Points", ("xs", "ys"))
220
217
  a = Points(xs=xs1, ys=ys1)
221
218
  b = Points(xs=xs2, ys=ys2)
222
219
  self.check_tuple_arg(a, b)
223
220
 
224
221
  def test_name_attribute(self):
225
- @vectorize('f8(f8)', target='cuda')
222
+ @vectorize("f8(f8)", target="cuda")
226
223
  def bar(x):
227
- return x ** 2
224
+ return x**2
228
225
 
229
- self.assertEqual(bar.__name__, 'bar')
226
+ self.assertEqual(bar.__name__, "bar")
230
227
 
231
228
  def test_no_transfer_for_device_data(self):
232
229
  # Initialize test data on the device prior to banning host <-> device
@@ -238,15 +235,15 @@ class TestCUDAVectorize(CUDATestCase):
238
235
  # A mock of a CUDA function that always raises a CudaAPIError
239
236
 
240
237
  def raising_transfer(*args, **kwargs):
241
- raise CudaAPIError(999, 'Transfer not allowed')
238
+ raise CudaAPIError(999, "Transfer not allowed")
242
239
 
243
240
  # Use the mock for transfers between the host and device
244
241
 
245
- old_HtoD = getattr(driver, 'cuMemcpyHtoD', None)
246
- old_DtoH = getattr(driver, 'cuMemcpyDtoH', None)
242
+ old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
243
+ old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
247
244
 
248
- setattr(driver, 'cuMemcpyHtoD', raising_transfer)
249
- setattr(driver, 'cuMemcpyDtoH', raising_transfer)
245
+ setattr(driver, "cuMemcpyHtoD", raising_transfer)
246
+ setattr(driver, "cuMemcpyDtoH", raising_transfer)
250
247
 
251
248
  # Ensure that the mock functions are working as expected
252
249
 
@@ -260,7 +257,7 @@ class TestCUDAVectorize(CUDATestCase):
260
257
  # Check that defining and calling a ufunc with data on the device
261
258
  # induces no transfers
262
259
 
263
- @vectorize(['float32(float32)'], target='cuda')
260
+ @vectorize(["float32(float32)"], target="cuda")
264
261
  def func(noise):
265
262
  return noise + 1.0
266
263
 
@@ -270,14 +267,14 @@ class TestCUDAVectorize(CUDATestCase):
270
267
  # no original implementation, simply remove ours.
271
268
 
272
269
  if old_HtoD is not None:
273
- setattr(driver, 'cuMemcpyHtoD', old_HtoD)
270
+ setattr(driver, "cuMemcpyHtoD", old_HtoD)
274
271
  else:
275
272
  del driver.cuMemcpyHtoD
276
273
  if old_DtoH is not None:
277
- setattr(driver, 'cuMemcpyDtoH', old_DtoH)
274
+ setattr(driver, "cuMemcpyDtoH", old_DtoH)
278
275
  else:
279
276
  del driver.cuMemcpyDtoH
280
277
 
281
278
 
282
- if __name__ == '__main__':
279
+ if __name__ == "__main__":
283
280
  unittest.main()
@@ -4,17 +4,17 @@ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
4
4
  import unittest
5
5
 
6
6
 
7
- @skip_on_cudasim('ufunc API unsupported in the simulator')
7
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
8
8
  class TestVectorizeComplex(CUDATestCase):
9
9
  def test_vectorize_complex(self):
10
- @vectorize(['complex128(complex128)'], target='cuda')
10
+ @vectorize(["complex128(complex128)"], target="cuda")
11
11
  def vcomp(a):
12
- return a * a + 1.
12
+ return a * a + 1.0
13
13
 
14
14
  A = np.arange(5, dtype=np.complex128)
15
15
  B = vcomp(A)
16
- self.assertTrue(np.allclose(A * A + 1., B))
16
+ self.assertTrue(np.allclose(A * A + 1.0, B))
17
17
 
18
18
 
19
- if __name__ == '__main__':
19
+ if __name__ == "__main__":
20
20
  unittest.main()
@@ -1,21 +1,25 @@
1
1
  import numpy as np
2
2
 
3
3
  from numba import vectorize, cuda
4
- from numba.tests.npyufunc.test_vectorize_decor import BaseVectorizeDecor, \
5
- BaseVectorizeNopythonArg, BaseVectorizeUnrecognizedArg
4
+ from numba.tests.npyufunc.test_vectorize_decor import (
5
+ BaseVectorizeDecor,
6
+ BaseVectorizeNopythonArg,
7
+ BaseVectorizeUnrecognizedArg,
8
+ )
6
9
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
7
10
  import unittest
8
11
 
9
12
 
10
- @skip_on_cudasim('ufunc API unsupported in the simulator')
13
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
11
14
  class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
12
15
  """
13
16
  Runs the tests from BaseVectorizeDecor with the CUDA target.
14
17
  """
15
- target = 'cuda'
16
18
 
19
+ target = "cuda"
17
20
 
18
- @skip_on_cudasim('ufunc API unsupported in the simulator')
21
+
22
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
19
23
  class TestGPUVectorizeBroadcast(CUDATestCase):
20
24
  def test_broadcast(self):
21
25
  a = np.random.randn(100, 3, 1)
@@ -24,7 +28,7 @@ class TestGPUVectorizeBroadcast(CUDATestCase):
24
28
  def fn(a, b):
25
29
  return a - b
26
30
 
27
- @vectorize(['float64(float64,float64)'], target='cuda')
31
+ @vectorize(["float64(float64,float64)"], target="cuda")
28
32
  def fngpu(a, b):
29
33
  return a - b
30
34
 
@@ -43,7 +47,7 @@ class TestGPUVectorizeBroadcast(CUDATestCase):
43
47
  def fn(a, b):
44
48
  return a - b
45
49
 
46
- @vectorize(['float64(float64,float64)'], target='cuda')
50
+ @vectorize(["float64(float64,float64)"], target="cuda")
47
51
  def fngpu(a, b):
48
52
  return a - b
49
53
 
@@ -52,18 +56,18 @@ class TestGPUVectorizeBroadcast(CUDATestCase):
52
56
  np.testing.assert_almost_equal(expect, got.copy_to_host())
53
57
 
54
58
 
55
- @skip_on_cudasim('ufunc API unsupported in the simulator')
59
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
56
60
  class TestVectorizeNopythonArg(BaseVectorizeNopythonArg, CUDATestCase):
57
61
  def test_target_cuda_nopython(self):
58
62
  warnings = ["nopython kwarg for cuda target is redundant"]
59
- self._test_target_nopython('cuda', warnings)
63
+ self._test_target_nopython("cuda", warnings)
60
64
 
61
65
 
62
- @skip_on_cudasim('ufunc API unsupported in the simulator')
66
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
63
67
  class TestVectorizeUnrecognizedArg(BaseVectorizeUnrecognizedArg, CUDATestCase):
64
68
  def test_target_cuda_unrecognized_arg(self):
65
- self._test_target_unrecognized_arg('cuda')
69
+ self._test_target_unrecognized_arg("cuda")
66
70
 
67
71
 
68
- if __name__ == '__main__':
72
+ if __name__ == "__main__":
69
73
  unittest.main()
@@ -5,19 +5,19 @@ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
5
5
  import unittest
6
6
 
7
7
 
8
- @skip_on_cudasim('ufunc API unsupported in the simulator')
8
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
9
9
  class TestCudaVectorizeDeviceCall(CUDATestCase):
10
10
  def test_cuda_vectorize_device_call(self):
11
-
12
11
  @cuda.jit(float32(float32, float32, float32), device=True)
13
12
  def cu_device_fn(x, y, z):
14
- return x ** y / z
13
+ return x**y / z
15
14
 
16
15
  def cu_ufunc(x, y, z):
17
16
  return cu_device_fn(x, y, z)
18
17
 
19
- ufunc = vectorize([float32(float32, float32, float32)], target='cuda')(
20
- cu_ufunc)
18
+ ufunc = vectorize([float32(float32, float32, float32)], target="cuda")(
19
+ cu_ufunc
20
+ )
21
21
 
22
22
  N = 100
23
23
 
@@ -27,10 +27,10 @@ class TestCudaVectorizeDeviceCall(CUDATestCase):
27
27
 
28
28
  out = ufunc(X, Y, Z)
29
29
 
30
- gold = (X ** Y) / Z
30
+ gold = (X**Y) / Z
31
31
 
32
32
  self.assertTrue(np.allclose(out, gold))
33
33
 
34
34
 
35
- if __name__ == '__main__':
35
+ if __name__ == "__main__":
36
36
  unittest.main()
@@ -7,11 +7,10 @@ import unittest
7
7
  sig = [float64(float64, float64)]
8
8
 
9
9
 
10
- @skip_on_cudasim('ufunc API unsupported in the simulator')
10
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
11
11
  class TestCUDAVectorizeScalarArg(CUDATestCase):
12
-
13
12
  def test_vectorize_scalar_arg(self):
14
- @vectorize(sig, target='cuda')
13
+ @vectorize(sig, target="cuda")
15
14
  def vector_add(a, b):
16
15
  return a + b
17
16
 
@@ -20,11 +19,11 @@ class TestCUDAVectorizeScalarArg(CUDATestCase):
20
19
  v = vector_add(1.0, dA)
21
20
 
22
21
  np.testing.assert_array_almost_equal(
23
- v.copy_to_host(),
24
- np.arange(1, 11, dtype=np.float64))
22
+ v.copy_to_host(), np.arange(1, 11, dtype=np.float64)
23
+ )
25
24
 
26
25
  def test_vectorize_all_scalars(self):
27
- @vectorize(sig, target='cuda')
26
+ @vectorize(sig, target="cuda")
28
27
  def vector_add(a, b):
29
28
  return a + b
30
29
 
@@ -33,5 +32,5 @@ class TestCUDAVectorizeScalarArg(CUDATestCase):
33
32
  np.testing.assert_almost_equal(2.0, v)
34
33
 
35
34
 
36
- if __name__ == '__main__':
35
+ if __name__ == "__main__":
37
36
  unittest.main()
@@ -7,27 +7,27 @@ from numba.core import config
7
7
  import warnings
8
8
 
9
9
 
10
- @skip_on_cudasim('cudasim does not raise performance warnings')
10
+ @skip_on_cudasim("cudasim does not raise performance warnings")
11
11
  class TestWarnings(CUDATestCase):
12
12
  def test_inefficient_launch_configuration(self):
13
13
  @cuda.jit
14
14
  def kernel():
15
15
  pass
16
16
 
17
- with override_config('CUDA_LOW_OCCUPANCY_WARNINGS', 1):
17
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
18
18
  with warnings.catch_warnings(record=True) as w:
19
19
  kernel[1, 1]()
20
20
 
21
21
  self.assertEqual(w[0].category, NumbaPerformanceWarning)
22
- self.assertIn('Grid size', str(w[0].message))
23
- self.assertIn('low occupancy', str(w[0].message))
22
+ self.assertIn("Grid size", str(w[0].message))
23
+ self.assertIn("low occupancy", str(w[0].message))
24
24
 
25
25
  def test_efficient_launch_configuration(self):
26
26
  @cuda.jit
27
27
  def kernel():
28
28
  pass
29
29
 
30
- with override_config('CUDA_LOW_OCCUPANCY_WARNINGS', 1):
30
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
31
31
  with warnings.catch_warnings(record=True) as w:
32
32
  kernel[256, 256]()
33
33
 
@@ -40,14 +40,15 @@ class TestWarnings(CUDATestCase):
40
40
 
41
41
  N = 10
42
42
  arr_f32 = np.zeros(N, dtype=np.float32)
43
- with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
43
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
44
44
  with warnings.catch_warnings(record=True) as w:
45
45
  foo[1, N](arr_f32, N)
46
46
 
47
47
  self.assertEqual(w[0].category, NumbaPerformanceWarning)
48
- self.assertIn('Host array used in CUDA kernel will incur',
49
- str(w[0].message))
50
- self.assertIn('copy overhead', str(w[0].message))
48
+ self.assertIn(
49
+ "Host array used in CUDA kernel will incur", str(w[0].message)
50
+ )
51
+ self.assertIn("copy overhead", str(w[0].message))
51
52
 
52
53
  def test_pinned_warn_on_host_array(self):
53
54
  @cuda.jit
@@ -57,14 +58,15 @@ class TestWarnings(CUDATestCase):
57
58
  N = 10
58
59
  ary = cuda.pinned_array(N, dtype=np.float32)
59
60
 
60
- with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
61
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
61
62
  with warnings.catch_warnings(record=True) as w:
62
63
  foo[1, N](ary, N)
63
64
 
64
65
  self.assertEqual(w[0].category, NumbaPerformanceWarning)
65
- self.assertIn('Host array used in CUDA kernel will incur',
66
- str(w[0].message))
67
- self.assertIn('copy overhead', str(w[0].message))
66
+ self.assertIn(
67
+ "Host array used in CUDA kernel will incur", str(w[0].message)
68
+ )
69
+ self.assertIn("copy overhead", str(w[0].message))
68
70
 
69
71
  def test_nowarn_on_mapped_array(self):
70
72
  @cuda.jit
@@ -74,7 +76,7 @@ class TestWarnings(CUDATestCase):
74
76
  N = 10
75
77
  ary = cuda.mapped_array(N, dtype=np.float32)
76
78
 
77
- with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
79
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
78
80
  with warnings.catch_warnings(record=True) as w:
79
81
  foo[1, N](ary, N)
80
82
 
@@ -89,7 +91,7 @@ class TestWarnings(CUDATestCase):
89
91
  N = 10
90
92
  ary = cuda.managed_array(N, dtype=np.float32)
91
93
 
92
- with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
94
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
93
95
  with warnings.catch_warnings(record=True) as w:
94
96
  foo[1, N](ary, N)
95
97
 
@@ -103,7 +105,7 @@ class TestWarnings(CUDATestCase):
103
105
  N = 10
104
106
  ary = cuda.device_array(N, dtype=np.float32)
105
107
 
106
- with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
108
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
107
109
  with warnings.catch_warnings(record=True) as w:
108
110
  foo[1, N](ary, N)
109
111
 
@@ -114,14 +116,14 @@ class TestWarnings(CUDATestCase):
114
116
  cuda.jit(debug=True, opt=True)
115
117
 
116
118
  self.assertEqual(len(w), 1)
117
- self.assertIn('not supported by CUDA', str(w[0].message))
119
+ self.assertIn("not supported by CUDA", str(w[0].message))
118
120
 
119
121
  def test_warn_on_debug_and_opt_default(self):
120
122
  with warnings.catch_warnings(record=True) as w:
121
123
  cuda.jit(debug=True)
122
124
 
123
125
  self.assertEqual(len(w), 1)
124
- self.assertIn('not supported by CUDA', str(w[0].message))
126
+ self.assertIn("not supported by CUDA", str(w[0].message))
125
127
 
126
128
  def test_no_warn_on_debug_and_no_opt(self):
127
129
  with warnings.catch_warnings(record=True) as w:
@@ -136,8 +138,8 @@ class TestWarnings(CUDATestCase):
136
138
  self.assertEqual(len(w), 0)
137
139
 
138
140
  def test_no_warn_on_debug_and_opt_with_config(self):
139
- with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
140
- with override_config('OPT', config._OptLevel(0)):
141
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
142
+ with override_config("OPT", config._OptLevel(0)):
141
143
  with warnings.catch_warnings(record=True) as w:
142
144
  cuda.jit()
143
145
 
@@ -148,30 +150,30 @@ class TestWarnings(CUDATestCase):
148
150
 
149
151
  self.assertEqual(len(w), 0)
150
152
 
151
- with override_config('OPT', config._OptLevel(0)):
153
+ with override_config("OPT", config._OptLevel(0)):
152
154
  with warnings.catch_warnings(record=True) as w:
153
155
  cuda.jit(debug=True)
154
156
 
155
157
  self.assertEqual(len(w), 0)
156
158
 
157
159
  def test_warn_on_debug_and_opt_with_config(self):
158
- with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
159
- for opt in (1, 2, 3, 'max'):
160
- with override_config('OPT', config._OptLevel(opt)):
160
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
161
+ for opt in (1, 2, 3, "max"):
162
+ with override_config("OPT", config._OptLevel(opt)):
161
163
  with warnings.catch_warnings(record=True) as w:
162
164
  cuda.jit()
163
165
 
164
166
  self.assertEqual(len(w), 1)
165
- self.assertIn('not supported by CUDA', str(w[0].message))
167
+ self.assertIn("not supported by CUDA", str(w[0].message))
166
168
 
167
- for opt in (1, 2, 3, 'max'):
168
- with override_config('OPT', config._OptLevel(opt)):
169
+ for opt in (1, 2, 3, "max"):
170
+ with override_config("OPT", config._OptLevel(opt)):
169
171
  with warnings.catch_warnings(record=True) as w:
170
172
  cuda.jit(debug=True)
171
173
 
172
174
  self.assertEqual(len(w), 1)
173
- self.assertIn('not supported by CUDA', str(w[0].message))
175
+ self.assertIn("not supported by CUDA", str(w[0].message))
174
176
 
175
177
 
176
- if __name__ == '__main__':
178
+ if __name__ == "__main__":
177
179
  unittest.main()