numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from numba.tests.support import (override_config, captured_stdout)
1
+ from numba.tests.support import override_config, captured_stdout
2
2
  from numba.cuda.testing import skip_on_cudasim
3
3
  from numba import cuda
4
4
  from numba.core import types
@@ -8,7 +8,7 @@ import re
8
8
  import unittest
9
9
 
10
10
 
11
- @skip_on_cudasim('Simulator does not produce debug dumps')
11
+ @skip_on_cudasim("Simulator does not produce debug dumps")
12
12
  class TestCudaDebugInfo(CUDATestCase):
13
13
  """
14
14
  These tests only checks the compiled PTX for debuginfo section
@@ -49,7 +49,7 @@ class TestCudaDebugInfo(CUDATestCase):
49
49
  self._check(foo, sig=(types.int32[:],), expect=True)
50
50
 
51
51
  def test_environment_override(self):
52
- with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
52
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
53
53
  # Using default value
54
54
  @cuda.jit(opt=False)
55
55
  def foo(x):
@@ -86,7 +86,7 @@ class TestCudaDebugInfo(CUDATestCase):
86
86
 
87
87
  llvm_ir = f.inspect_llvm(sig)
88
88
  # A varible name starting with "bool" in the debug metadata
89
- pat = r'!DILocalVariable\(.*name:\s+\"bool'
89
+ pat = r"!DILocalVariable\(.*name:\s+\"bool"
90
90
  match = re.compile(pat).search(llvm_ir)
91
91
  self.assertIsNone(match, msg=llvm_ir)
92
92
 
@@ -106,7 +106,7 @@ class TestCudaDebugInfo(CUDATestCase):
106
106
  mdnode_id = match.group(1)
107
107
 
108
108
  # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
109
- pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
109
+ pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
110
110
  match = re.compile(pat).search(llvm_ir)
111
111
  self.assertIsNotNone(match, msg=llvm_ir)
112
112
 
@@ -133,14 +133,17 @@ class TestCudaDebugInfo(CUDATestCase):
133
133
 
134
134
  llvm_ir = f.inspect_llvm(sig)
135
135
 
136
- defines = [line for line in llvm_ir.splitlines()
137
- if 'define void @"_ZN6cudapy' in line]
136
+ defines = [
137
+ line
138
+ for line in llvm_ir.splitlines()
139
+ if 'define void @"_ZN6cudapy' in line
140
+ ]
138
141
 
139
142
  # Make sure we only found one definition
140
143
  self.assertEqual(len(defines), 1)
141
144
 
142
145
  wrapper_define = defines[0]
143
- self.assertIn('!dbg', wrapper_define)
146
+ self.assertIn("!dbg", wrapper_define)
144
147
 
145
148
  def test_debug_function_calls_internal_impl(self):
146
149
  # Calling a function in a module generated from an implementation
@@ -198,16 +201,16 @@ class TestCudaDebugInfo(CUDATestCase):
198
201
  debug_opts = itertools.product(*[(True, False)] * 3)
199
202
 
200
203
  for kernel_debug, f1_debug, f2_debug in debug_opts:
201
- with self.subTest(kernel_debug=kernel_debug,
202
- f1_debug=f1_debug,
203
- f2_debug=f2_debug):
204
- self._test_chained_device_function(kernel_debug,
205
- f1_debug,
206
- f2_debug)
207
-
208
- def _test_chained_device_function_two_calls(self, kernel_debug, f1_debug,
209
- f2_debug):
210
-
204
+ with self.subTest(
205
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
206
+ ):
207
+ self._test_chained_device_function(
208
+ kernel_debug, f1_debug, f2_debug
209
+ )
210
+
211
+ def _test_chained_device_function_two_calls(
212
+ self, kernel_debug, f1_debug, f2_debug
213
+ ):
211
214
  @cuda.jit(device=True, debug=f2_debug, opt=False)
212
215
  def f2(x):
213
216
  return x + 1
@@ -232,12 +235,12 @@ class TestCudaDebugInfo(CUDATestCase):
232
235
  debug_opts = itertools.product(*[(True, False)] * 3)
233
236
 
234
237
  for kernel_debug, f1_debug, f2_debug in debug_opts:
235
- with self.subTest(kernel_debug=kernel_debug,
236
- f1_debug=f1_debug,
237
- f2_debug=f2_debug):
238
- self._test_chained_device_function_two_calls(kernel_debug,
239
- f1_debug,
240
- f2_debug)
238
+ with self.subTest(
239
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
240
+ ):
241
+ self._test_chained_device_function_two_calls(
242
+ kernel_debug, f1_debug, f2_debug
243
+ )
241
244
 
242
245
  def test_chained_device_three_functions(self):
243
246
  # Like test_chained_device_function, but with enough functions (three)
@@ -278,13 +281,13 @@ class TestCudaDebugInfo(CUDATestCase):
278
281
  llvm_ir = f.inspect_llvm(sig)
279
282
 
280
283
  # extract the metadata node id from `types` field of DISubroutineType
281
- pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
284
+ pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
282
285
  match = re.compile(pat).search(llvm_ir)
283
286
  self.assertIsNotNone(match, msg=llvm_ir)
284
287
  mdnode_id = match.group(1)
285
288
 
286
289
  # extract the metadata node ids from the flexible node of types
287
- pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
290
+ pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
288
291
  match = re.compile(pat).search(llvm_ir)
289
292
  self.assertIsNotNone(match, msg=llvm_ir)
290
293
  mdnode_id1 = match.group(1)
@@ -303,10 +306,10 @@ class TestCudaDebugInfo(CUDATestCase):
303
306
 
304
307
  def test_kernel_args_types_dump(self):
305
308
  # see issue#135
306
- with override_config('DUMP_LLVM', 1):
309
+ with override_config("DUMP_LLVM", 1):
307
310
  with captured_stdout():
308
311
  self._test_kernel_args_types()
309
312
 
310
313
 
311
- if __name__ == '__main__':
314
+ if __name__ == "__main__":
312
315
  unittest.main()
@@ -3,8 +3,13 @@ import cffi
3
3
 
4
4
  import numpy as np
5
5
 
6
- from numba.cuda.testing import (skip_if_curand_kernel_missing, skip_on_cudasim,
7
- test_data_dir, unittest, CUDATestCase)
6
+ from numba.cuda.testing import (
7
+ skip_if_curand_kernel_missing,
8
+ skip_on_cudasim,
9
+ test_data_dir,
10
+ unittest,
11
+ CUDATestCase,
12
+ )
8
13
  from numba import cuda, jit, float32, int32, types
9
14
  from numba.core.errors import TypingError
10
15
  from numba.tests.support import skip_unless_cffi
@@ -12,9 +17,7 @@ from types import ModuleType
12
17
 
13
18
 
14
19
  class TestDeviceFunc(CUDATestCase):
15
-
16
20
  def test_use_add2f(self):
17
-
18
21
  @cuda.jit("float32(float32, float32)", device=True)
19
22
  def add2f(a, b):
20
23
  return a + b
@@ -33,7 +36,6 @@ class TestDeviceFunc(CUDATestCase):
33
36
  self.assertTrue(np.all(ary == exp), (ary, exp))
34
37
 
35
38
  def test_indirect_add2f(self):
36
-
37
39
  @cuda.jit("float32(float32, float32)", device=True)
38
40
  def add2f(a, b):
39
41
  return a + b
@@ -74,12 +76,12 @@ class TestDeviceFunc(CUDATestCase):
74
76
 
75
77
  self._check_cpu_dispatcher(add)
76
78
 
77
- @skip_on_cudasim('not supported in cudasim')
79
+ @skip_on_cudasim("not supported in cudasim")
78
80
  def test_cpu_dispatcher_invalid(self):
79
81
  # Test invalid usage
80
82
  # Explicit signature disables compilation, which also disable
81
83
  # compiling on CUDA.
82
- @jit('(i4, i4)')
84
+ @jit("(i4, i4)")
83
85
  def add(a, b):
84
86
  return a + b
85
87
 
@@ -95,7 +97,7 @@ class TestDeviceFunc(CUDATestCase):
95
97
  def add(a, b):
96
98
  return a + b
97
99
 
98
- mymod = ModuleType(name='mymod')
100
+ mymod = ModuleType(name="mymod")
99
101
  mymod.add = add
100
102
  del add
101
103
 
@@ -109,7 +111,7 @@ class TestDeviceFunc(CUDATestCase):
109
111
  add_kernel[1, ary.size](ary)
110
112
  np.testing.assert_equal(expect, ary)
111
113
 
112
- @skip_on_cudasim('not supported in cudasim')
114
+ @skip_on_cudasim("not supported in cudasim")
113
115
  def test_inspect_llvm(self):
114
116
  @cuda.jit(device=True)
115
117
  def foo(x, y):
@@ -120,13 +122,13 @@ class TestDeviceFunc(CUDATestCase):
120
122
 
121
123
  fname = cres.fndesc.mangled_name
122
124
  # Verify that the function name has "foo" in it as in the python name
123
- self.assertIn('foo', fname)
125
+ self.assertIn("foo", fname)
124
126
 
125
127
  llvm = foo.inspect_llvm(args)
126
128
  # Check that the compiled function name is in the LLVM.
127
129
  self.assertIn(fname, llvm)
128
130
 
129
- @skip_on_cudasim('not supported in cudasim')
131
+ @skip_on_cudasim("not supported in cudasim")
130
132
  def test_inspect_asm(self):
131
133
  @cuda.jit(device=True)
132
134
  def foo(x, y):
@@ -137,13 +139,13 @@ class TestDeviceFunc(CUDATestCase):
137
139
 
138
140
  fname = cres.fndesc.mangled_name
139
141
  # Verify that the function name has "foo" in it as in the python name
140
- self.assertIn('foo', fname)
142
+ self.assertIn("foo", fname)
141
143
 
142
144
  ptx = foo.inspect_asm(args)
143
145
  # Check that the compiled function name is in the PTX
144
146
  self.assertIn(fname, ptx)
145
147
 
146
- @skip_on_cudasim('not supported in cudasim')
148
+ @skip_on_cudasim("not supported in cudasim")
147
149
  def test_inspect_sass_disallowed(self):
148
150
  @cuda.jit(device=True)
149
151
  def foo(x, y):
@@ -152,10 +154,11 @@ class TestDeviceFunc(CUDATestCase):
152
154
  with self.assertRaises(RuntimeError) as raises:
153
155
  foo.inspect_sass((int32, int32))
154
156
 
155
- self.assertIn('Cannot inspect SASS of a device function',
156
- str(raises.exception))
157
+ self.assertIn(
158
+ "Cannot inspect SASS of a device function", str(raises.exception)
159
+ )
157
160
 
158
- @skip_on_cudasim('cudasim will allow calling any function')
161
+ @skip_on_cudasim("cudasim will allow calling any function")
159
162
  def test_device_func_as_kernel_disallowed(self):
160
163
  @cuda.jit(device=True)
161
164
  def f():
@@ -164,10 +167,12 @@ class TestDeviceFunc(CUDATestCase):
164
167
  with self.assertRaises(RuntimeError) as raises:
165
168
  f[1, 1]()
166
169
 
167
- self.assertIn('Cannot compile a device function as a kernel',
168
- str(raises.exception))
170
+ self.assertIn(
171
+ "Cannot compile a device function as a kernel",
172
+ str(raises.exception),
173
+ )
169
174
 
170
- @skip_on_cudasim('cudasim ignores casting by jit decorator signature')
175
+ @skip_on_cudasim("cudasim ignores casting by jit decorator signature")
171
176
  def test_device_casting(self):
172
177
  # Ensure that casts to the correct type are forced when calling a
173
178
  # device function with a signature. This test ensures that:
@@ -176,20 +181,23 @@ class TestDeviceFunc(CUDATestCase):
176
181
  # shouldn't
177
182
  # - We insert a cast when calling rgba, as opposed to failing to type.
178
183
 
179
- @cuda.jit('int32(int32, int32, int32, int32)', device=True)
184
+ @cuda.jit("int32(int32, int32, int32, int32)", device=True)
180
185
  def rgba(r, g, b, a):
181
- return (((r & 0xFF) << 16) |
182
- ((g & 0xFF) << 8) |
183
- ((b & 0xFF) << 0) |
184
- ((a & 0xFF) << 24))
186
+ return (
187
+ ((r & 0xFF) << 16)
188
+ | ((g & 0xFF) << 8)
189
+ | ((b & 0xFF) << 0)
190
+ | ((a & 0xFF) << 24)
191
+ )
185
192
 
186
193
  @cuda.jit
187
194
  def rgba_caller(x, channels):
188
195
  x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
189
196
 
190
197
  x = cuda.device_array(1, dtype=np.int32)
191
- channels = cuda.to_device(np.asarray([1.0, 2.0, 3.0, 4.0],
192
- dtype=np.float32))
198
+ channels = cuda.to_device(
199
+ np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
200
+ )
193
201
 
194
202
  rgba_caller[1, 1](x, channels)
195
203
 
@@ -259,32 +267,31 @@ int random_number(unsigned int *out, unsigned long long seed)
259
267
  }""")
260
268
 
261
269
 
262
- @skip_on_cudasim('External functions unsupported in the simulator')
270
+ @skip_on_cudasim("External functions unsupported in the simulator")
263
271
  class TestDeclareDevice(CUDATestCase):
264
-
265
272
  def check_api(self, decl):
266
- self.assertEqual(decl.name, 'f1')
273
+ self.assertEqual(decl.name, "f1")
267
274
  self.assertEqual(decl.sig.args, (float32[:],))
268
275
  self.assertEqual(decl.sig.return_type, int32)
269
276
 
270
277
  def test_declare_device_signature(self):
271
- f1 = cuda.declare_device('f1', int32(float32[:]))
278
+ f1 = cuda.declare_device("f1", int32(float32[:]))
272
279
  self.check_api(f1)
273
280
 
274
281
  def test_declare_device_string(self):
275
- f1 = cuda.declare_device('f1', 'int32(float32[:])')
282
+ f1 = cuda.declare_device("f1", "int32(float32[:])")
276
283
  self.check_api(f1)
277
284
 
278
285
  def test_bad_declare_device_tuple(self):
279
- with self.assertRaisesRegex(TypeError, 'Return type'):
280
- cuda.declare_device('f1', (float32[:],))
286
+ with self.assertRaisesRegex(TypeError, "Return type"):
287
+ cuda.declare_device("f1", (float32[:],))
281
288
 
282
289
  def test_bad_declare_device_string(self):
283
- with self.assertRaisesRegex(TypeError, 'Return type'):
284
- cuda.declare_device('f1', '(float32[:],)')
290
+ with self.assertRaisesRegex(TypeError, "Return type"):
291
+ cuda.declare_device("f1", "(float32[:],)")
285
292
 
286
293
  def test_link_cu_source(self):
287
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
294
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
288
295
 
289
296
  @cuda.jit
290
297
  def kernel(r, x):
@@ -301,7 +308,7 @@ class TestDeclareDevice(CUDATestCase):
301
308
 
302
309
  def _test_link_multiple_sources(self, link_type):
303
310
  link = link_type([times2_cu, times4_cu])
304
- times4 = cuda.declare_device('times4', 'int32(int32)', link=link)
311
+ times4 = cuda.declare_device("times4", "int32(int32)", link=link)
305
312
 
306
313
  @cuda.jit
307
314
  def kernel(r, x):
@@ -360,7 +367,7 @@ class TestDeclareDevice(CUDATestCase):
360
367
  np.testing.assert_equal(x[0], 323845807)
361
368
 
362
369
  def test_declared_in_called_function(self):
363
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
370
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
364
371
 
365
372
  @cuda.jit
366
373
  def device_func(x):
@@ -380,7 +387,7 @@ class TestDeclareDevice(CUDATestCase):
380
387
  np.testing.assert_equal(r, x * 2)
381
388
 
382
389
  def test_declared_in_called_function_twice(self):
383
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
390
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
384
391
 
385
392
  @cuda.jit
386
393
  def device_func_1(x):
@@ -404,7 +411,7 @@ class TestDeclareDevice(CUDATestCase):
404
411
  np.testing.assert_equal(r, x * 2)
405
412
 
406
413
  def test_declared_in_called_function_two_calls(self):
407
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
414
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
408
415
 
409
416
  @cuda.jit
410
417
  def device_func(x):
@@ -424,7 +431,7 @@ class TestDeclareDevice(CUDATestCase):
424
431
  np.testing.assert_equal(r, x * 6)
425
432
 
426
433
  def test_call_declared_function_twice(self):
427
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
434
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
428
435
 
429
436
  @cuda.jit
430
437
  def kernel(r, x):
@@ -440,7 +447,7 @@ class TestDeclareDevice(CUDATestCase):
440
447
  np.testing.assert_equal(r, x * 6)
441
448
 
442
449
  def test_declared_in_called_function_and_parent(self):
443
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
450
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
444
451
 
445
452
  @cuda.jit
446
453
  def device_func(x):
@@ -460,8 +467,8 @@ class TestDeclareDevice(CUDATestCase):
460
467
  np.testing.assert_equal(r, x * 4)
461
468
 
462
469
  def test_call_two_different_declared_functions(self):
463
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
464
- times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
470
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
471
+ times3 = cuda.declare_device("times3", "int32(int32)", link=times3_cu)
465
472
 
466
473
  @cuda.jit
467
474
  def kernel(r, x):
@@ -477,5 +484,5 @@ class TestDeclareDevice(CUDATestCase):
477
484
  np.testing.assert_equal(r, x * 5)
478
485
 
479
486
 
480
- if __name__ == '__main__':
487
+ if __name__ == "__main__":
481
488
  unittest.main()
@@ -15,19 +15,18 @@ def add_kernel(r, x, y):
15
15
  r[0] = x + y
16
16
 
17
17
 
18
- @skip_on_cudasim('Specialization not implemented in the simulator')
18
+ @skip_on_cudasim("Specialization not implemented in the simulator")
19
19
  class TestDispatcherSpecialization(CUDATestCase):
20
20
  def _test_no_double_specialize(self, dispatcher, ty):
21
-
22
21
  with self.assertRaises(RuntimeError) as e:
23
22
  dispatcher.specialize(ty)
24
23
 
25
- self.assertIn('Dispatcher already specialized', str(e.exception))
24
+ self.assertIn("Dispatcher already specialized", str(e.exception))
26
25
 
27
26
  def test_no_double_specialize_sig_same_types(self):
28
27
  # Attempting to specialize a kernel jitted with a signature is illegal,
29
28
  # even for the same types the kernel is already specialized for.
30
- @cuda.jit('void(float32[::1])')
29
+ @cuda.jit("void(float32[::1])")
31
30
  def f(x):
32
31
  pass
33
32
 
@@ -45,7 +44,7 @@ class TestDispatcherSpecialization(CUDATestCase):
45
44
 
46
45
  def test_no_double_specialize_sig_diff_types(self):
47
46
  # Attempting to specialize a kernel jitted with a signature is illegal.
48
- @cuda.jit('void(int32[::1])')
47
+ @cuda.jit("void(int32[::1])")
49
48
  def f(x):
50
49
  pass
51
50
 
@@ -132,13 +131,13 @@ class TestDispatcher(CUDATestCase):
132
131
  self.assertEqual(r[0], add(12300000000, 456))
133
132
 
134
133
  # Now force compilation of only a single specialization
135
- c_add = cuda.jit('(i4[::1], i4, i4)')(add_kernel)
134
+ c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
136
135
  r = np.zeros(1, dtype=np.int32)
137
136
 
138
137
  c_add[1, 1](r, 123, 456)
139
138
  self.assertPreciseEqual(r[0], add(123, 456))
140
139
 
141
- @skip_on_cudasim('Simulator ignores signature')
140
+ @skip_on_cudasim("Simulator ignores signature")
142
141
  @unittest.expectedFailure
143
142
  def test_coerce_input_types_unsafe(self):
144
143
  # Implicit (unsafe) conversion of float to int, originally from
@@ -149,25 +148,24 @@ class TestDispatcher(CUDATestCase):
149
148
  #
150
149
  # This test is marked as xfail until future changes enable this
151
150
  # behavior.
152
- c_add = cuda.jit('(i4[::1], i4, i4)')(add_kernel)
151
+ c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
153
152
  r = np.zeros(1, dtype=np.int32)
154
153
 
155
154
  c_add[1, 1](r, 12.3, 45.6)
156
155
  self.assertPreciseEqual(r[0], add(12, 45))
157
156
 
158
- @skip_on_cudasim('Simulator ignores signature')
157
+ @skip_on_cudasim("Simulator ignores signature")
159
158
  def test_coerce_input_types_unsafe_complex(self):
160
159
  # Implicit conversion of complex to int disallowed
161
- c_add = cuda.jit('(i4[::1], i4, i4)')(add_kernel)
160
+ c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
162
161
  r = np.zeros(1, dtype=np.int32)
163
162
 
164
163
  with self.assertRaises(TypeError):
165
164
  c_add[1, 1](r, 12.3, 45.6j)
166
165
 
167
- @skip_on_cudasim('Simulator does not track overloads')
166
+ @skip_on_cudasim("Simulator does not track overloads")
168
167
  def test_ambiguous_new_version(self):
169
- """Test compiling new version in an ambiguous case
170
- """
168
+ """Test compiling new version in an ambiguous case"""
171
169
  c_add = cuda.jit(add_kernel)
172
170
 
173
171
  r = np.zeros(1, dtype=np.float64)
@@ -190,8 +188,9 @@ class TestDispatcher(CUDATestCase):
190
188
  # to (float, int) or (int, float) with equal weight.
191
189
  c_add[1, 1](r, 1, 1)
192
190
  self.assertAlmostEqual(r[0], INT + INT)
193
- self.assertEqual(len(c_add.overloads), 4, "didn't compile a new "
194
- "version")
191
+ self.assertEqual(
192
+ len(c_add.overloads), 4, "didn't compile a new version"
193
+ )
195
194
 
196
195
  @skip_on_cudasim("Simulator doesn't support concurrent kernels")
197
196
  def test_lock(self):
@@ -245,8 +244,10 @@ class TestDispatcher(CUDATestCase):
245
244
 
246
245
  def test_explicit_signatures_strings(self):
247
246
  # Check with a list of strings for signatures
248
- sigs = ["(int64[::1], int64, int64)",
249
- "(float64[::1], float64, float64)"]
247
+ sigs = [
248
+ "(int64[::1], int64, int64)",
249
+ "(float64[::1], float64, float64)",
250
+ ]
250
251
  self._test_explicit_signatures(sigs)
251
252
 
252
253
  def test_explicit_signatures_tuples(self):
@@ -256,26 +257,31 @@ class TestDispatcher(CUDATestCase):
256
257
 
257
258
  def test_explicit_signatures_signatures(self):
258
259
  # Check with a list of Signature objects for signatures
259
- sigs = [void(int64[::1], int64, int64),
260
- void(float64[::1], float64, float64)]
260
+ sigs = [
261
+ void(int64[::1], int64, int64),
262
+ void(float64[::1], float64, float64),
263
+ ]
261
264
  self._test_explicit_signatures(sigs)
262
265
 
263
266
  def test_explicit_signatures_mixed(self):
264
267
  # Check when we mix types of signature objects in a list of signatures
265
268
 
266
269
  # Tuple and string
267
- sigs = [(int64[::1], int64, int64),
268
- "(float64[::1], float64, float64)"]
270
+ sigs = [(int64[::1], int64, int64), "(float64[::1], float64, float64)"]
269
271
  self._test_explicit_signatures(sigs)
270
272
 
271
273
  # Tuple and Signature object
272
- sigs = [(int64[::1], int64, int64),
273
- void(float64[::1], float64, float64)]
274
+ sigs = [
275
+ (int64[::1], int64, int64),
276
+ void(float64[::1], float64, float64),
277
+ ]
274
278
  self._test_explicit_signatures(sigs)
275
279
 
276
280
  # Signature object and string
277
- sigs = [void(int64[::1], int64, int64),
278
- "(float64[::1], float64, float64)"]
281
+ sigs = [
282
+ void(int64[::1], int64, int64),
283
+ "(float64[::1], float64, float64)",
284
+ ]
279
285
  self._test_explicit_signatures(sigs)
280
286
 
281
287
  def test_explicit_signatures_same_type_class(self):
@@ -284,8 +290,10 @@ class TestDispatcher(CUDATestCase):
284
290
  # that dispatch is differentiated on the types of x and y only, to
285
291
  # closely preserve the intent of the original test from
286
292
  # numba.tests.test_dispatcher)
287
- sigs = ["(float64[::1], float32, float32)",
288
- "(float64[::1], float64, float64)"]
293
+ sigs = [
294
+ "(float64[::1], float32, float32)",
295
+ "(float64[::1], float64, float64)",
296
+ ]
289
297
  f = cuda.jit(sigs)(add_kernel)
290
298
 
291
299
  r = np.zeros(1, dtype=np.float64)
@@ -296,13 +304,17 @@ class TestDispatcher(CUDATestCase):
296
304
  f[1, 1](r, 1, 2**-25)
297
305
  self.assertPreciseEqual(r[0], 1.0000000298023224)
298
306
 
299
- @skip_on_cudasim('No overload resolution in the simulator')
307
+ @skip_on_cudasim("No overload resolution in the simulator")
300
308
  def test_explicit_signatures_ambiguous_resolution(self):
301
309
  # Fail to resolve ambiguity between the two best overloads
302
310
  # (Also deliberate float64[::1] for the first argument in all cases)
303
- f = cuda.jit(["(float64[::1], float32, float64)",
304
- "(float64[::1], float64, float32)",
305
- "(float64[::1], int64, int64)"])(add_kernel)
311
+ f = cuda.jit(
312
+ [
313
+ "(float64[::1], float32, float64)",
314
+ "(float64[::1], float64, float32)",
315
+ "(float64[::1], int64, int64)",
316
+ ]
317
+ )(add_kernel)
306
318
  with self.assertRaises(TypeError) as cm:
307
319
  r = np.zeros(1, dtype=np.float64)
308
320
  f[1, 1](r, 1.0, 2.0)
@@ -317,12 +329,12 @@ class TestDispatcher(CUDATestCase):
317
329
  r"\(Array\(float64, 1, 'C', False, aligned=True\), float32,"
318
330
  r" float64\) -> none\n"
319
331
  r"\(Array\(float64, 1, 'C', False, aligned=True\), float64,"
320
- r" float32\) -> none"
332
+ r" float32\) -> none",
321
333
  )
322
334
  # The integer signature is not part of the best matches
323
335
  self.assertNotIn("int64", str(cm.exception))
324
336
 
325
- @skip_on_cudasim('Simulator does not use _prepare_args')
337
+ @skip_on_cudasim("Simulator does not use _prepare_args")
326
338
  @unittest.expectedFailure
327
339
  def test_explicit_signatures_unsafe(self):
328
340
  # These tests are from test_explicit_signatures, but have to be xfail
@@ -336,8 +348,10 @@ class TestDispatcher(CUDATestCase):
336
348
  self.assertPreciseEqual(r[0], 3)
337
349
  self.assertEqual(len(f.overloads), 1, f.overloads)
338
350
 
339
- sigs = ["(int64[::1], int64, int64)",
340
- "(float64[::1], float64, float64)"]
351
+ sigs = [
352
+ "(int64[::1], int64, int64)",
353
+ "(float64[::1], float64, float64)",
354
+ ]
341
355
  f = cuda.jit(sigs)(add_kernel)
342
356
  r = np.zeros(1, dtype=np.float64)
343
357
  # Approximate match (int32 -> float64 is a safe conversion)
@@ -414,7 +428,7 @@ class TestDispatcher(CUDATestCase):
414
428
  f[1, 1](r, 1.5, 2.5)
415
429
  self.assertPreciseEqual(r[0], 4.0)
416
430
 
417
- @skip_on_cudasim('CUDA Simulator does not force casting')
431
+ @skip_on_cudasim("CUDA Simulator does not force casting")
418
432
  def test_explicit_signatures_device_unsafe(self):
419
433
  # These tests are from test_explicit_signatures. The device function
420
434
  # variant of these tests can succeed on CUDA because the compilation
@@ -489,17 +503,15 @@ class TestDispatcherKernelProperties(CUDATestCase):
489
503
  # provides the same values as getting the registers per thread for
490
504
  # individual signatures.
491
505
  regs_per_thread_all = pi_sin_array.get_regs_per_thread()
492
- self.assertEqual(regs_per_thread_all[sig_f32.args],
493
- regs_per_thread_f32)
494
- self.assertEqual(regs_per_thread_all[sig_f64.args],
495
- regs_per_thread_f64)
506
+ self.assertEqual(regs_per_thread_all[sig_f32.args], regs_per_thread_f32)
507
+ self.assertEqual(regs_per_thread_all[sig_f64.args], regs_per_thread_f64)
496
508
 
497
509
  if regs_per_thread_f32 == regs_per_thread_f64:
498
510
  # If the register usage is the same for both variants, there may be
499
511
  # a bug, but this may also be an artifact of the compiler / driver
500
512
  # / device combination, so produce an informational message only.
501
- print('f32 and f64 variant thread usages are equal.')
502
- print('This may warrant some investigation. Devices:')
513
+ print("f32 and f64 variant thread usages are equal.")
514
+ print("This may warrant some investigation. Devices:")
503
515
  cuda.detect()
504
516
 
505
517
  def test_get_regs_per_thread_specialized(self):
@@ -696,5 +708,5 @@ class TestDispatcherKernelProperties(CUDATestCase):
696
708
  self.assertGreaterEqual(local_mem_per_thread, N * 4)
697
709
 
698
710
 
699
- if __name__ == '__main__':
711
+ if __name__ == "__main__":
700
712
  unittest.main()