numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -3,9 +3,14 @@ from __future__ import print_function
3
3
  import numpy as np
4
4
 
5
5
  from numba import config, cuda, int32
6
- from numba.cuda.testing import (unittest, CUDATestCase, skip_on_cudasim,
7
- skip_unless_cc_60, skip_if_cudadevrt_missing,
8
- skip_if_mvc_enabled)
6
+ from numba.cuda.testing import (
7
+ unittest,
8
+ CUDATestCase,
9
+ skip_on_cudasim,
10
+ skip_unless_cc_60,
11
+ skip_if_cudadevrt_missing,
12
+ skip_if_mvc_enabled,
13
+ )
9
14
 
10
15
 
11
16
  @cuda.jit
@@ -47,7 +52,7 @@ def sequential_rows(M):
47
52
 
48
53
 
49
54
  @skip_if_cudadevrt_missing
50
- @skip_if_mvc_enabled('CG not supported with MVC')
55
+ @skip_if_mvc_enabled("CG not supported with MVC")
51
56
  class TestCudaCooperativeGroups(CUDATestCase):
52
57
  @skip_unless_cc_60
53
58
  def test_this_grid(self):
@@ -55,11 +60,12 @@ class TestCudaCooperativeGroups(CUDATestCase):
55
60
  this_grid[1, 1](A)
56
61
 
57
62
  # Ensure the kernel executed beyond the call to cuda.this_grid()
58
- self.assertFalse(np.isnan(A[0]), 'Value was not set')
63
+ self.assertFalse(np.isnan(A[0]), "Value was not set")
59
64
 
60
65
  @skip_unless_cc_60
61
- @skip_on_cudasim("Simulator doesn't differentiate between normal and "
62
- "cooperative kernels")
66
+ @skip_on_cudasim(
67
+ "Simulator doesn't differentiate between normal and cooperative kernels"
68
+ )
63
69
  def test_this_grid_is_cooperative(self):
64
70
  A = np.full(1, fill_value=np.nan)
65
71
  this_grid[1, 1](A)
@@ -74,11 +80,12 @@ class TestCudaCooperativeGroups(CUDATestCase):
74
80
  sync_group[1, 1](A)
75
81
 
76
82
  # Ensure the kernel executed beyond the call to cuda.sync_group()
77
- self.assertFalse(np.isnan(A[0]), 'Value was not set')
83
+ self.assertFalse(np.isnan(A[0]), "Value was not set")
78
84
 
79
85
  @skip_unless_cc_60
80
- @skip_on_cudasim("Simulator doesn't differentiate between normal and "
81
- "cooperative kernels")
86
+ @skip_on_cudasim(
87
+ "Simulator doesn't differentiate between normal and cooperative kernels"
88
+ )
82
89
  def test_sync_group_is_cooperative(self):
83
90
  A = np.full(1, fill_value=np.nan)
84
91
  sync_group[1, 1](A)
@@ -99,7 +106,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
99
106
  for key, overload in no_sync.overloads.items():
100
107
  self.assertFalse(overload.cooperative)
101
108
  for link in overload._codelibrary._linking_files:
102
- self.assertNotIn('cudadevrt', link)
109
+ self.assertNotIn("cudadevrt", link)
103
110
 
104
111
  @skip_unless_cc_60
105
112
  def test_sync_at_matrix_row(self):
@@ -113,7 +120,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
113
120
  blockdim = 32
114
121
  griddim = A.shape[1] // blockdim
115
122
 
116
- sig = (int32[:,::1],)
123
+ sig = (int32[:, ::1],)
117
124
  c_sequential_rows = cuda.jit(sig)(sequential_rows)
118
125
 
119
126
  overload = c_sequential_rows.overloads[sig]
@@ -133,7 +140,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
133
140
  # doesn't error, and that varying the number of dimensions of the block
134
141
  # whilst keeping the total number of threads constant doesn't change
135
142
  # the maximum to validate some of the logic.
136
- sig = (int32[:,::1],)
143
+ sig = (int32[:, ::1],)
137
144
  c_sequential_rows = cuda.jit(sig)(sequential_rows)
138
145
  overload = c_sequential_rows.overloads[sig]
139
146
  blocks1d = overload.max_cooperative_grid_blocks(256)
@@ -143,5 +150,5 @@ class TestCudaCooperativeGroups(CUDATestCase):
143
150
  self.assertEqual(blocks1d, blocks3d)
144
151
 
145
152
 
146
- if __name__ == '__main__':
153
+ if __name__ == "__main__":
147
154
  unittest.main()
@@ -9,12 +9,13 @@ from numba.tests.support import linux_only, override_config
9
9
  from unittest.mock import call, patch
10
10
 
11
11
 
12
- @skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
12
+ @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
13
13
  class TestCudaArrayInterface(ContextResettingTestCase):
14
14
  def assertPointersEqual(self, a, b):
15
15
  if driver.USE_NV_BINDING:
16
- self.assertEqual(int(a.device_ctypes_pointer),
17
- int(b.device_ctypes_pointer))
16
+ self.assertEqual(
17
+ int(a.device_ctypes_pointer), int(b.device_ctypes_pointer)
18
+ )
18
19
 
19
20
  def test_as_cuda_array(self):
20
21
  h_arr = np.arange(10)
@@ -37,7 +38,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
37
38
  else:
38
39
  return stream.handle.value
39
40
 
40
- @skip_if_external_memmgr('Ownership not relevant with external memmgr')
41
+ @skip_if_external_memmgr("Ownership not relevant with external memmgr")
41
42
  def test_ownership(self):
42
43
  # Get the deallocation queue
43
44
  ctx = cuda.current_context()
@@ -82,7 +83,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
82
83
  np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
83
84
 
84
85
  def test_ufunc_arg(self):
85
- @vectorize(['f8(f8, f8)'], target='cuda')
86
+ @vectorize(["f8(f8, f8)"], target="cuda")
86
87
  def vadd(a, b):
87
88
  return a + b
88
89
 
@@ -99,7 +100,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
99
100
  np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val)
100
101
 
101
102
  def test_gufunc_arg(self):
102
- @guvectorize(['(f8, f8, f8[:])'], '(),()->()', target='cuda')
103
+ @guvectorize(["(f8, f8, f8[:])"], "(),()->()", target="cuda")
103
104
  def vadd(inp, val, out):
104
105
  out[0] = inp + val
105
106
 
@@ -118,8 +119,8 @@ class TestCudaArrayInterface(ContextResettingTestCase):
118
119
 
119
120
  def test_array_views(self):
120
121
  """Views created via array interface support:
121
- - Strided slices
122
- - Strided slices
122
+ - Strided slices
123
+ - Strided slices
123
124
  """
124
125
  h_arr = np.random.random(10)
125
126
  c_arr = cuda.to_device(h_arr)
@@ -148,23 +149,22 @@ class TestCudaArrayInterface(ContextResettingTestCase):
148
149
  self.assertEqual(arr[::2].strides, arr_strided.strides)
149
150
  self.assertEqual(arr[::2].dtype.itemsize, arr_strided.dtype.itemsize)
150
151
  self.assertEqual(arr[::2].alloc_size, arr_strided.alloc_size)
151
- self.assertEqual(arr[::2].nbytes,
152
- arr_strided.size * arr_strided.dtype.itemsize)
152
+ self.assertEqual(
153
+ arr[::2].nbytes, arr_strided.size * arr_strided.dtype.itemsize
154
+ )
153
155
 
154
156
  # __setitem__ interface propagates into external array
155
157
 
156
158
  # Writes to a slice
157
159
  arr[:5] = np.pi
158
160
  np.testing.assert_array_equal(
159
- c_arr.copy_to_host(),
160
- np.concatenate((np.full(5, np.pi), h_arr[5:]))
161
+ c_arr.copy_to_host(), np.concatenate((np.full(5, np.pi), h_arr[5:]))
161
162
  )
162
163
 
163
164
  # Writes to a slice from a view
164
165
  arr[:5] = arr[5:]
165
166
  np.testing.assert_array_equal(
166
- c_arr.copy_to_host(),
167
- np.concatenate((h_arr[5:], h_arr[5:]))
167
+ c_arr.copy_to_host(), np.concatenate((h_arr[5:], h_arr[5:]))
168
168
  )
169
169
 
170
170
  # Writes through a view
@@ -177,10 +177,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
177
177
  c_arr.copy_to_host()[::2],
178
178
  np.full(5, np.pi),
179
179
  )
180
- np.testing.assert_array_equal(
181
- c_arr.copy_to_host()[1::2],
182
- h_arr[1::2]
183
- )
180
+ np.testing.assert_array_equal(c_arr.copy_to_host()[1::2], h_arr[1::2])
184
181
 
185
182
  def test_negative_strided_issue(self):
186
183
  # issue #3705
@@ -188,7 +185,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
188
185
  c_arr = cuda.to_device(h_arr)
189
186
 
190
187
  def base_offset(orig, sliced):
191
- return sliced['data'][0] - orig['data'][0]
188
+ return sliced["data"][0] - orig["data"][0]
192
189
 
193
190
  h_ai = h_arr.__array_interface__
194
191
  c_ai = c_arr.__cuda_array_interface__
@@ -202,8 +199,8 @@ class TestCudaArrayInterface(ContextResettingTestCase):
202
199
  base_offset(c_ai, c_ai_sliced),
203
200
  )
204
201
  # Check shape and strides are correct
205
- self.assertEqual(h_ai_sliced['shape'], c_ai_sliced['shape'])
206
- self.assertEqual(h_ai_sliced['strides'], c_ai_sliced['strides'])
202
+ self.assertEqual(h_ai_sliced["shape"], c_ai_sliced["shape"])
203
+ self.assertEqual(h_ai_sliced["strides"], c_ai_sliced["strides"])
207
204
 
208
205
  def test_negative_strided_copy_to_host(self):
209
206
  # issue #3705
@@ -212,28 +209,28 @@ class TestCudaArrayInterface(ContextResettingTestCase):
212
209
  sliced = c_arr[::-1]
213
210
  with self.assertRaises(NotImplementedError) as raises:
214
211
  sliced.copy_to_host()
215
- expected_msg = 'D->H copy not implemented for negative strides'
212
+ expected_msg = "D->H copy not implemented for negative strides"
216
213
  self.assertIn(expected_msg, str(raises.exception))
217
214
 
218
215
  def test_masked_array(self):
219
216
  h_arr = np.random.random(10)
220
- h_mask = np.random.randint(2, size=10, dtype='bool')
217
+ h_mask = np.random.randint(2, size=10, dtype="bool")
221
218
  c_arr = cuda.to_device(h_arr)
222
219
  c_mask = cuda.to_device(h_mask)
223
220
 
224
221
  # Manually create a masked CUDA Array Interface dictionary
225
222
  masked_cuda_array_interface = c_arr.__cuda_array_interface__.copy()
226
- masked_cuda_array_interface['mask'] = c_mask
223
+ masked_cuda_array_interface["mask"] = c_mask
227
224
 
228
225
  with self.assertRaises(NotImplementedError) as raises:
229
226
  cuda.from_cuda_array_interface(masked_cuda_array_interface)
230
- expected_msg = 'Masked arrays are not supported'
227
+ expected_msg = "Masked arrays are not supported"
231
228
  self.assertIn(expected_msg, str(raises.exception))
232
229
 
233
230
  def test_zero_size_array(self):
234
231
  # for #4175
235
232
  c_arr = cuda.device_array(0)
236
- self.assertEqual(c_arr.__cuda_array_interface__['data'][0], 0)
233
+ self.assertEqual(c_arr.__cuda_array_interface__["data"][0], 0)
237
234
 
238
235
  @cuda.jit
239
236
  def add_one(arr):
@@ -249,49 +246,49 @@ class TestCudaArrayInterface(ContextResettingTestCase):
249
246
  # for #4175
250
247
  # First, test C-contiguous array
251
248
  c_arr = cuda.device_array((2, 3, 4))
252
- self.assertEqual(c_arr.__cuda_array_interface__['strides'], None)
249
+ self.assertEqual(c_arr.__cuda_array_interface__["strides"], None)
253
250
 
254
251
  # Second, test non C-contiguous array
255
252
  c_arr = c_arr[:, 1, :]
256
- self.assertNotEqual(c_arr.__cuda_array_interface__['strides'], None)
253
+ self.assertNotEqual(c_arr.__cuda_array_interface__["strides"], None)
257
254
 
258
255
  def test_consuming_strides(self):
259
256
  hostarray = np.arange(10).reshape(2, 5)
260
257
  devarray = cuda.to_device(hostarray)
261
258
  face = devarray.__cuda_array_interface__
262
- self.assertIsNone(face['strides'])
259
+ self.assertIsNone(face["strides"])
263
260
  got = cuda.from_cuda_array_interface(face).copy_to_host()
264
261
  np.testing.assert_array_equal(got, hostarray)
265
- self.assertTrue(got.flags['C_CONTIGUOUS'])
262
+ self.assertTrue(got.flags["C_CONTIGUOUS"])
266
263
  # Try non-NULL strides
267
- face['strides'] = hostarray.strides
268
- self.assertIsNotNone(face['strides'])
264
+ face["strides"] = hostarray.strides
265
+ self.assertIsNotNone(face["strides"])
269
266
  got = cuda.from_cuda_array_interface(face).copy_to_host()
270
267
  np.testing.assert_array_equal(got, hostarray)
271
- self.assertTrue(got.flags['C_CONTIGUOUS'])
268
+ self.assertTrue(got.flags["C_CONTIGUOUS"])
272
269
 
273
270
  def test_produce_no_stream(self):
274
271
  c_arr = cuda.device_array(10)
275
- self.assertIsNone(c_arr.__cuda_array_interface__['stream'])
272
+ self.assertIsNone(c_arr.__cuda_array_interface__["stream"])
276
273
 
277
274
  mapped_arr = cuda.mapped_array(10)
278
- self.assertIsNone(mapped_arr.__cuda_array_interface__['stream'])
275
+ self.assertIsNone(mapped_arr.__cuda_array_interface__["stream"])
279
276
 
280
277
  @linux_only
281
278
  def test_produce_managed_no_stream(self):
282
279
  managed_arr = cuda.managed_array(10)
283
- self.assertIsNone(managed_arr.__cuda_array_interface__['stream'])
280
+ self.assertIsNone(managed_arr.__cuda_array_interface__["stream"])
284
281
 
285
282
  def test_produce_stream(self):
286
283
  s = cuda.stream()
287
284
  c_arr = cuda.device_array(10, stream=s)
288
- cai_stream = c_arr.__cuda_array_interface__['stream']
285
+ cai_stream = c_arr.__cuda_array_interface__["stream"]
289
286
  stream_value = self.get_stream_value(s)
290
287
  self.assertEqual(stream_value, cai_stream)
291
288
 
292
289
  s = cuda.stream()
293
290
  mapped_arr = cuda.mapped_array(10, stream=s)
294
- cai_stream = mapped_arr.__cuda_array_interface__['stream']
291
+ cai_stream = mapped_arr.__cuda_array_interface__["stream"]
295
292
  stream_value = self.get_stream_value(s)
296
293
  self.assertEqual(stream_value, cai_stream)
297
294
 
@@ -299,7 +296,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
299
296
  def test_produce_managed_stream(self):
300
297
  s = cuda.stream()
301
298
  managed_arr = cuda.managed_array(10, stream=s)
302
- cai_stream = managed_arr.__cuda_array_interface__['stream']
299
+ cai_stream = managed_arr.__cuda_array_interface__["stream"]
303
300
  stream_value = self.get_stream_value(s)
304
301
  self.assertEqual(stream_value, cai_stream)
305
302
 
@@ -327,8 +324,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
327
324
  # Create a foreign array with no stream
328
325
  f_arr = ForeignArray(cuda.device_array(10))
329
326
 
330
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
331
- return_value=None) as mock_sync:
327
+ with patch.object(
328
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
329
+ ) as mock_sync:
332
330
  cuda.as_cuda_array(f_arr)
333
331
 
334
332
  # Ensure the synchronize method of a stream was not called
@@ -339,8 +337,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
339
337
  s = cuda.stream()
340
338
  f_arr = ForeignArray(cuda.device_array(10, stream=s))
341
339
 
342
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
343
- return_value=None) as mock_sync:
340
+ with patch.object(
341
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
342
+ ) as mock_sync:
344
343
  cuda.as_cuda_array(f_arr)
345
344
 
346
345
  # Ensure the synchronize method of a stream was called
@@ -354,9 +353,10 @@ class TestCudaArrayInterface(ContextResettingTestCase):
354
353
  # Set sync to false before testing. The test suite should generally be
355
354
  # run with sync enabled, but stash the old value just in case it is
356
355
  # not.
357
- with override_config('CUDA_ARRAY_INTERFACE_SYNC', False):
358
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
359
- return_value=None) as mock_sync:
356
+ with override_config("CUDA_ARRAY_INTERFACE_SYNC", False):
357
+ with patch.object(
358
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
359
+ ) as mock_sync:
360
360
  cuda.as_cuda_array(f_arr)
361
361
 
362
362
  # Ensure the synchronize method of a stream was not called
@@ -370,8 +370,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
370
370
  def f(x):
371
371
  pass
372
372
 
373
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
374
- return_value=None) as mock_sync:
373
+ with patch.object(
374
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
375
+ ) as mock_sync:
375
376
  f[1, 1](f_arr)
376
377
 
377
378
  # Ensure the synchronize method of a stream was not called
@@ -386,8 +387,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
386
387
  def f(x):
387
388
  pass
388
389
 
389
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
390
- return_value=None) as mock_sync:
390
+ with patch.object(
391
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
392
+ ) as mock_sync:
391
393
  f[1, 1](f_arr)
392
394
 
393
395
  # Ensure the synchronize method of a stream was called
@@ -404,8 +406,9 @@ class TestCudaArrayInterface(ContextResettingTestCase):
404
406
  def f(x, y):
405
407
  pass
406
408
 
407
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
408
- return_value=None) as mock_sync:
409
+ with patch.object(
410
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
411
+ ) as mock_sync:
409
412
  f[1, 1](f_arr1, f_arr2)
410
413
 
411
414
  # Ensure that synchronize was called twice
@@ -418,13 +421,15 @@ class TestCudaArrayInterface(ContextResettingTestCase):
418
421
  f_arr1 = ForeignArray(cuda.device_array(10, stream=s1))
419
422
  f_arr2 = ForeignArray(cuda.device_array(10, stream=s2))
420
423
 
421
- with override_config('CUDA_ARRAY_INTERFACE_SYNC', False):
424
+ with override_config("CUDA_ARRAY_INTERFACE_SYNC", False):
425
+
422
426
  @cuda.jit
423
427
  def f(x, y):
424
428
  pass
425
429
 
426
- with patch.object(cuda.cudadrv.driver.Stream, 'synchronize',
427
- return_value=None) as mock_sync:
430
+ with patch.object(
431
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
432
+ ) as mock_sync:
428
433
  f[1, 1](f_arr1, f_arr2)
429
434
 
430
435
  # Ensure that synchronize was not called
@@ -80,11 +80,12 @@ class TestCudaJitNoTypes(CUDATestCase):
80
80
  def test_jit_debug_simulator(self):
81
81
  # Ensure that the jit decorator accepts the debug kwarg when the
82
82
  # simulator is in use - see Issue #6615.
83
- with override_config('ENABLE_CUDASIM', 1):
83
+ with override_config("ENABLE_CUDASIM", 1):
84
+
84
85
  @cuda.jit(debug=True, opt=False)
85
86
  def f(x):
86
87
  pass
87
88
 
88
89
 
89
- if __name__ == '__main__':
90
+ if __name__ == "__main__":
90
91
  unittest.main()
@@ -13,9 +13,9 @@ class TestCudaDateTime(CUDATestCase):
13
13
  for i in range(cuda.grid(1), delta.size, cuda.gridsize(1)):
14
14
  delta[i] = end[i] - start[i]
15
15
 
16
- arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
16
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
17
17
  arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
18
- delta = np.zeros_like(arr1, dtype='timedelta64[D]')
18
+ delta = np.zeros_like(arr1, dtype="timedelta64[D]")
19
19
 
20
20
  foo[1, 32](arr1, arr2, delta)
21
21
 
@@ -27,11 +27,12 @@ class TestCudaDateTime(CUDATestCase):
27
27
  for i in range(cuda.grid(1), matches.size, cuda.gridsize(1)):
28
28
  matches[i] = dates[i] == target
29
29
  outdelta[i] = dates[i] - delta
30
- arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
31
- target = arr1[5] # datetime
30
+
31
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
32
+ target = arr1[5] # datetime
32
33
  delta = arr1[6] - arr1[5] # timedelta
33
34
  matches = np.zeros_like(arr1, dtype=np.bool_)
34
- outdelta = np.zeros_like(arr1, dtype='datetime64[D]')
35
+ outdelta = np.zeros_like(arr1, dtype="datetime64[D]")
35
36
 
36
37
  foo[1, 32](arr1, target, delta, matches, outdelta)
37
38
  where = matches.nonzero()
@@ -39,56 +40,59 @@ class TestCudaDateTime(CUDATestCase):
39
40
  self.assertEqual(list(where), [5])
40
41
  self.assertPreciseEqual(outdelta, arr1 - delta)
41
42
 
42
- @skip_on_cudasim('ufunc API unsupported in the simulator')
43
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
43
44
  def test_ufunc(self):
44
- datetime_t = from_dtype(np.dtype('datetime64[D]'))
45
+ datetime_t = from_dtype(np.dtype("datetime64[D]"))
45
46
 
46
- @vectorize([(datetime_t, datetime_t)], target='cuda')
47
+ @vectorize([(datetime_t, datetime_t)], target="cuda")
47
48
  def timediff(start, end):
48
49
  return end - start
49
50
 
50
- arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
51
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
51
52
  arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
52
53
 
53
54
  delta = timediff(arr1, arr2)
54
55
 
55
56
  self.assertPreciseEqual(delta, arr2 - arr1)
56
57
 
57
- @skip_on_cudasim('ufunc API unsupported in the simulator')
58
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
58
59
  def test_gufunc(self):
59
- datetime_t = from_dtype(np.dtype('datetime64[D]'))
60
- timedelta_t = from_dtype(np.dtype('timedelta64[D]'))
61
-
62
- @guvectorize([(datetime_t, datetime_t, timedelta_t[:])], '(),()->()',
63
- target='cuda')
60
+ datetime_t = from_dtype(np.dtype("datetime64[D]"))
61
+ timedelta_t = from_dtype(np.dtype("timedelta64[D]"))
62
+
63
+ @guvectorize(
64
+ [(datetime_t, datetime_t, timedelta_t[:])],
65
+ "(),()->()",
66
+ target="cuda",
67
+ )
64
68
  def timediff(start, end, out):
65
69
  out[0] = end - start
66
70
 
67
- arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
71
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
68
72
  arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
69
73
 
70
74
  delta = timediff(arr1, arr2)
71
75
 
72
76
  self.assertPreciseEqual(delta, arr2 - arr1)
73
77
 
74
- @skip_on_cudasim('no .copy_to_host() in the simulator')
78
+ @skip_on_cudasim("no .copy_to_host() in the simulator")
75
79
  def test_datetime_view_as_int64(self):
76
- arr = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
80
+ arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
77
81
  darr = cuda.to_device(arr)
78
82
  viewed = darr.view(np.int64)
79
83
  self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
80
84
  self.assertEqual(viewed.gpu_data, darr.gpu_data)
81
85
 
82
- @skip_on_cudasim('no .copy_to_host() in the simulator')
86
+ @skip_on_cudasim("no .copy_to_host() in the simulator")
83
87
  def test_timedelta_view_as_int64(self):
84
- arr = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
88
+ arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
85
89
  arr = arr - (arr - 1)
86
- self.assertEqual(arr.dtype, np.dtype('timedelta64[D]'))
90
+ self.assertEqual(arr.dtype, np.dtype("timedelta64[D]"))
87
91
  darr = cuda.to_device(arr)
88
92
  viewed = darr.view(np.int64)
89
93
  self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
90
94
  self.assertEqual(viewed.gpu_data, darr.gpu_data)
91
95
 
92
96
 
93
- if __name__ == '__main__':
97
+ if __name__ == "__main__":
94
98
  unittest.main()
@@ -2,8 +2,11 @@ import numpy as np
2
2
 
3
3
  from numba.core.utils import PYVERSION
4
4
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
5
- from numba.tests.support import (override_config, captured_stderr,
6
- captured_stdout)
5
+ from numba.tests.support import (
6
+ override_config,
7
+ captured_stderr,
8
+ captured_stdout,
9
+ )
7
10
  from numba import cuda, float64
8
11
  import unittest
9
12
 
@@ -13,9 +16,8 @@ def simple_cuda(A, B):
13
16
  B[i] = A[i] + 1.5
14
17
 
15
18
 
16
- @skip_on_cudasim('Simulator does not produce debug dumps')
19
+ @skip_on_cudasim("Simulator does not produce debug dumps")
17
20
  class TestDebugOutput(CUDATestCase):
18
-
19
21
  def compile_simple_cuda(self):
20
22
  with captured_stderr() as err:
21
23
  with captured_stdout() as out:
@@ -34,14 +36,14 @@ class TestDebugOutput(CUDATestCase):
34
36
  self.assertRaises(AssertionError, *args, **kwargs)
35
37
 
36
38
  def check_debug_output(self, out, enabled_dumps):
37
- all_dumps = dict.fromkeys(['bytecode', 'cfg', 'ir', 'llvm',
38
- 'assembly'],
39
- False)
39
+ all_dumps = dict.fromkeys(
40
+ ["bytecode", "cfg", "ir", "llvm", "assembly"], False
41
+ )
40
42
  for name in enabled_dumps:
41
43
  assert name in all_dumps
42
44
  all_dumps[name] = True
43
45
  for name, enabled in sorted(all_dumps.items()):
44
- check_meth = getattr(self, '_check_dump_%s' % name)
46
+ check_meth = getattr(self, "_check_dump_%s" % name)
45
47
  if enabled:
46
48
  check_meth(out)
47
49
  else:
@@ -50,50 +52,50 @@ class TestDebugOutput(CUDATestCase):
50
52
  def _check_dump_bytecode(self, out):
51
53
  if PYVERSION > (3, 10):
52
54
  # binop with arg=0 is binary add, see CPython dis.py and opcode.py
53
- self.assertIn('BINARY_OP(arg=0', out)
55
+ self.assertIn("BINARY_OP(arg=0", out)
54
56
  else:
55
- self.assertIn('BINARY_ADD', out)
57
+ self.assertIn("BINARY_ADD", out)
56
58
 
57
59
  def _check_dump_cfg(self, out):
58
- self.assertIn('CFG dominators', out)
60
+ self.assertIn("CFG dominators", out)
59
61
 
60
62
  def _check_dump_ir(self, out):
61
- self.assertIn('--IR DUMP: simple_cuda--', out)
62
- self.assertIn('const(float, 1.5)', out)
63
+ self.assertIn("--IR DUMP: simple_cuda--", out)
64
+ self.assertIn("const(float, 1.5)", out)
63
65
 
64
66
  def _check_dump_llvm(self, out):
65
- self.assertIn('--LLVM DUMP', out)
67
+ self.assertIn("--LLVM DUMP", out)
66
68
  self.assertIn('!"kernel", i32 1', out)
67
69
 
68
70
  def _check_dump_assembly(self, out):
69
- self.assertIn('--ASSEMBLY simple_cuda', out)
70
- self.assertIn('Generated by NVIDIA NVVM Compiler', out)
71
+ self.assertIn("--ASSEMBLY simple_cuda", out)
72
+ self.assertIn("Generated by NVIDIA NVVM Compiler", out)
71
73
 
72
74
  def test_dump_bytecode(self):
73
- with override_config('DUMP_BYTECODE', True):
75
+ with override_config("DUMP_BYTECODE", True):
74
76
  out = self.compile_simple_cuda()
75
- self.check_debug_output(out, ['bytecode'])
77
+ self.check_debug_output(out, ["bytecode"])
76
78
 
77
79
  def test_dump_ir(self):
78
- with override_config('DUMP_IR', True):
80
+ with override_config("DUMP_IR", True):
79
81
  out = self.compile_simple_cuda()
80
- self.check_debug_output(out, ['ir'])
82
+ self.check_debug_output(out, ["ir"])
81
83
 
82
84
  def test_dump_cfg(self):
83
- with override_config('DUMP_CFG', True):
85
+ with override_config("DUMP_CFG", True):
84
86
  out = self.compile_simple_cuda()
85
- self.check_debug_output(out, ['cfg'])
87
+ self.check_debug_output(out, ["cfg"])
86
88
 
87
89
  def test_dump_llvm(self):
88
- with override_config('DUMP_LLVM', True):
90
+ with override_config("DUMP_LLVM", True):
89
91
  out = self.compile_simple_cuda()
90
- self.check_debug_output(out, ['llvm'])
92
+ self.check_debug_output(out, ["llvm"])
91
93
 
92
94
  def test_dump_assembly(self):
93
- with override_config('DUMP_ASSEMBLY', True):
95
+ with override_config("DUMP_ASSEMBLY", True):
94
96
  out = self.compile_simple_cuda()
95
- self.check_debug_output(out, ['assembly'])
97
+ self.check_debug_output(out, ["assembly"])
96
98
 
97
99
 
98
- if __name__ == '__main__':
100
+ if __name__ == "__main__":
99
101
  unittest.main()