numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -15,19 +15,18 @@ def add_kernel(r, x, y):
15
15
  r[0] = x + y
16
16
 
17
17
 
18
- @skip_on_cudasim('Specialization not implemented in the simulator')
18
+ @skip_on_cudasim("Specialization not implemented in the simulator")
19
19
  class TestDispatcherSpecialization(CUDATestCase):
20
20
  def _test_no_double_specialize(self, dispatcher, ty):
21
-
22
21
  with self.assertRaises(RuntimeError) as e:
23
22
  dispatcher.specialize(ty)
24
23
 
25
- self.assertIn('Dispatcher already specialized', str(e.exception))
24
+ self.assertIn("Dispatcher already specialized", str(e.exception))
26
25
 
27
26
  def test_no_double_specialize_sig_same_types(self):
28
27
  # Attempting to specialize a kernel jitted with a signature is illegal,
29
28
  # even for the same types the kernel is already specialized for.
30
- @cuda.jit('void(float32[::1])')
29
+ @cuda.jit("void(float32[::1])")
31
30
  def f(x):
32
31
  pass
33
32
 
@@ -45,7 +44,7 @@ class TestDispatcherSpecialization(CUDATestCase):
45
44
 
46
45
  def test_no_double_specialize_sig_diff_types(self):
47
46
  # Attempting to specialize a kernel jitted with a signature is illegal.
48
- @cuda.jit('void(int32[::1])')
47
+ @cuda.jit("void(int32[::1])")
49
48
  def f(x):
50
49
  pass
51
50
 
@@ -132,13 +131,13 @@ class TestDispatcher(CUDATestCase):
132
131
  self.assertEqual(r[0], add(12300000000, 456))
133
132
 
134
133
  # Now force compilation of only a single specialization
135
- c_add = cuda.jit('(i4[::1], i4, i4)')(add_kernel)
134
+ c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
136
135
  r = np.zeros(1, dtype=np.int32)
137
136
 
138
137
  c_add[1, 1](r, 123, 456)
139
138
  self.assertPreciseEqual(r[0], add(123, 456))
140
139
 
141
- @skip_on_cudasim('Simulator ignores signature')
140
+ @skip_on_cudasim("Simulator ignores signature")
142
141
  @unittest.expectedFailure
143
142
  def test_coerce_input_types_unsafe(self):
144
143
  # Implicit (unsafe) conversion of float to int, originally from
@@ -149,25 +148,24 @@ class TestDispatcher(CUDATestCase):
149
148
  #
150
149
  # This test is marked as xfail until future changes enable this
151
150
  # behavior.
152
- c_add = cuda.jit('(i4[::1], i4, i4)')(add_kernel)
151
+ c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
153
152
  r = np.zeros(1, dtype=np.int32)
154
153
 
155
154
  c_add[1, 1](r, 12.3, 45.6)
156
155
  self.assertPreciseEqual(r[0], add(12, 45))
157
156
 
158
- @skip_on_cudasim('Simulator ignores signature')
157
+ @skip_on_cudasim("Simulator ignores signature")
159
158
  def test_coerce_input_types_unsafe_complex(self):
160
159
  # Implicit conversion of complex to int disallowed
161
- c_add = cuda.jit('(i4[::1], i4, i4)')(add_kernel)
160
+ c_add = cuda.jit("(i4[::1], i4, i4)")(add_kernel)
162
161
  r = np.zeros(1, dtype=np.int32)
163
162
 
164
163
  with self.assertRaises(TypeError):
165
164
  c_add[1, 1](r, 12.3, 45.6j)
166
165
 
167
- @skip_on_cudasim('Simulator does not track overloads')
166
+ @skip_on_cudasim("Simulator does not track overloads")
168
167
  def test_ambiguous_new_version(self):
169
- """Test compiling new version in an ambiguous case
170
- """
168
+ """Test compiling new version in an ambiguous case"""
171
169
  c_add = cuda.jit(add_kernel)
172
170
 
173
171
  r = np.zeros(1, dtype=np.float64)
@@ -190,8 +188,9 @@ class TestDispatcher(CUDATestCase):
190
188
  # to (float, int) or (int, float) with equal weight.
191
189
  c_add[1, 1](r, 1, 1)
192
190
  self.assertAlmostEqual(r[0], INT + INT)
193
- self.assertEqual(len(c_add.overloads), 4, "didn't compile a new "
194
- "version")
191
+ self.assertEqual(
192
+ len(c_add.overloads), 4, "didn't compile a new version"
193
+ )
195
194
 
196
195
  @skip_on_cudasim("Simulator doesn't support concurrent kernels")
197
196
  def test_lock(self):
@@ -245,8 +244,10 @@ class TestDispatcher(CUDATestCase):
245
244
 
246
245
  def test_explicit_signatures_strings(self):
247
246
  # Check with a list of strings for signatures
248
- sigs = ["(int64[::1], int64, int64)",
249
- "(float64[::1], float64, float64)"]
247
+ sigs = [
248
+ "(int64[::1], int64, int64)",
249
+ "(float64[::1], float64, float64)",
250
+ ]
250
251
  self._test_explicit_signatures(sigs)
251
252
 
252
253
  def test_explicit_signatures_tuples(self):
@@ -256,26 +257,31 @@ class TestDispatcher(CUDATestCase):
256
257
 
257
258
  def test_explicit_signatures_signatures(self):
258
259
  # Check with a list of Signature objects for signatures
259
- sigs = [void(int64[::1], int64, int64),
260
- void(float64[::1], float64, float64)]
260
+ sigs = [
261
+ void(int64[::1], int64, int64),
262
+ void(float64[::1], float64, float64),
263
+ ]
261
264
  self._test_explicit_signatures(sigs)
262
265
 
263
266
  def test_explicit_signatures_mixed(self):
264
267
  # Check when we mix types of signature objects in a list of signatures
265
268
 
266
269
  # Tuple and string
267
- sigs = [(int64[::1], int64, int64),
268
- "(float64[::1], float64, float64)"]
270
+ sigs = [(int64[::1], int64, int64), "(float64[::1], float64, float64)"]
269
271
  self._test_explicit_signatures(sigs)
270
272
 
271
273
  # Tuple and Signature object
272
- sigs = [(int64[::1], int64, int64),
273
- void(float64[::1], float64, float64)]
274
+ sigs = [
275
+ (int64[::1], int64, int64),
276
+ void(float64[::1], float64, float64),
277
+ ]
274
278
  self._test_explicit_signatures(sigs)
275
279
 
276
280
  # Signature object and string
277
- sigs = [void(int64[::1], int64, int64),
278
- "(float64[::1], float64, float64)"]
281
+ sigs = [
282
+ void(int64[::1], int64, int64),
283
+ "(float64[::1], float64, float64)",
284
+ ]
279
285
  self._test_explicit_signatures(sigs)
280
286
 
281
287
  def test_explicit_signatures_same_type_class(self):
@@ -284,8 +290,10 @@ class TestDispatcher(CUDATestCase):
284
290
  # that dispatch is differentiated on the types of x and y only, to
285
291
  # closely preserve the intent of the original test from
286
292
  # numba.tests.test_dispatcher)
287
- sigs = ["(float64[::1], float32, float32)",
288
- "(float64[::1], float64, float64)"]
293
+ sigs = [
294
+ "(float64[::1], float32, float32)",
295
+ "(float64[::1], float64, float64)",
296
+ ]
289
297
  f = cuda.jit(sigs)(add_kernel)
290
298
 
291
299
  r = np.zeros(1, dtype=np.float64)
@@ -296,13 +304,17 @@ class TestDispatcher(CUDATestCase):
296
304
  f[1, 1](r, 1, 2**-25)
297
305
  self.assertPreciseEqual(r[0], 1.0000000298023224)
298
306
 
299
- @skip_on_cudasim('No overload resolution in the simulator')
307
+ @skip_on_cudasim("No overload resolution in the simulator")
300
308
  def test_explicit_signatures_ambiguous_resolution(self):
301
309
  # Fail to resolve ambiguity between the two best overloads
302
310
  # (Also deliberate float64[::1] for the first argument in all cases)
303
- f = cuda.jit(["(float64[::1], float32, float64)",
304
- "(float64[::1], float64, float32)",
305
- "(float64[::1], int64, int64)"])(add_kernel)
311
+ f = cuda.jit(
312
+ [
313
+ "(float64[::1], float32, float64)",
314
+ "(float64[::1], float64, float32)",
315
+ "(float64[::1], int64, int64)",
316
+ ]
317
+ )(add_kernel)
306
318
  with self.assertRaises(TypeError) as cm:
307
319
  r = np.zeros(1, dtype=np.float64)
308
320
  f[1, 1](r, 1.0, 2.0)
@@ -317,12 +329,12 @@ class TestDispatcher(CUDATestCase):
317
329
  r"\(Array\(float64, 1, 'C', False, aligned=True\), float32,"
318
330
  r" float64\) -> none\n"
319
331
  r"\(Array\(float64, 1, 'C', False, aligned=True\), float64,"
320
- r" float32\) -> none"
332
+ r" float32\) -> none",
321
333
  )
322
334
  # The integer signature is not part of the best matches
323
335
  self.assertNotIn("int64", str(cm.exception))
324
336
 
325
- @skip_on_cudasim('Simulator does not use _prepare_args')
337
+ @skip_on_cudasim("Simulator does not use _prepare_args")
326
338
  @unittest.expectedFailure
327
339
  def test_explicit_signatures_unsafe(self):
328
340
  # These tests are from test_explicit_signatures, but have to be xfail
@@ -336,8 +348,10 @@ class TestDispatcher(CUDATestCase):
336
348
  self.assertPreciseEqual(r[0], 3)
337
349
  self.assertEqual(len(f.overloads), 1, f.overloads)
338
350
 
339
- sigs = ["(int64[::1], int64, int64)",
340
- "(float64[::1], float64, float64)"]
351
+ sigs = [
352
+ "(int64[::1], int64, int64)",
353
+ "(float64[::1], float64, float64)",
354
+ ]
341
355
  f = cuda.jit(sigs)(add_kernel)
342
356
  r = np.zeros(1, dtype=np.float64)
343
357
  # Approximate match (int32 -> float64 is a safe conversion)
@@ -414,7 +428,7 @@ class TestDispatcher(CUDATestCase):
414
428
  f[1, 1](r, 1.5, 2.5)
415
429
  self.assertPreciseEqual(r[0], 4.0)
416
430
 
417
- @skip_on_cudasim('CUDA Simulator does not force casting')
431
+ @skip_on_cudasim("CUDA Simulator does not force casting")
418
432
  def test_explicit_signatures_device_unsafe(self):
419
433
  # These tests are from test_explicit_signatures. The device function
420
434
  # variant of these tests can succeed on CUDA because the compilation
@@ -489,17 +503,15 @@ class TestDispatcherKernelProperties(CUDATestCase):
489
503
  # provides the same values as getting the registers per thread for
490
504
  # individual signatures.
491
505
  regs_per_thread_all = pi_sin_array.get_regs_per_thread()
492
- self.assertEqual(regs_per_thread_all[sig_f32.args],
493
- regs_per_thread_f32)
494
- self.assertEqual(regs_per_thread_all[sig_f64.args],
495
- regs_per_thread_f64)
506
+ self.assertEqual(regs_per_thread_all[sig_f32.args], regs_per_thread_f32)
507
+ self.assertEqual(regs_per_thread_all[sig_f64.args], regs_per_thread_f64)
496
508
 
497
509
  if regs_per_thread_f32 == regs_per_thread_f64:
498
510
  # If the register usage is the same for both variants, there may be
499
511
  # a bug, but this may also be an artifact of the compiler / driver
500
512
  # / device combination, so produce an informational message only.
501
- print('f32 and f64 variant thread usages are equal.')
502
- print('This may warrant some investigation. Devices:')
513
+ print("f32 and f64 variant thread usages are equal.")
514
+ print("This may warrant some investigation. Devices:")
503
515
  cuda.detect()
504
516
 
505
517
  def test_get_regs_per_thread_specialized(self):
@@ -696,5 +708,5 @@ class TestDispatcherKernelProperties(CUDATestCase):
696
708
  self.assertGreaterEqual(local_mem_per_thread, N * 4)
697
709
 
698
710
 
699
- if __name__ == '__main__':
711
+ if __name__ == "__main__":
700
712
  unittest.main()
@@ -6,24 +6,24 @@ import numpy as np
6
6
 
7
7
  from numba import int16, int32
8
8
  from numba import cuda, vectorize, njit
9
+ from numba.core import types
9
10
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
10
11
  from numba.tests.enum_usecases import (
11
12
  Color,
12
13
  Shape,
13
14
  Planet,
14
15
  RequestError,
15
- IntEnumWithNegatives
16
+ IntEnumWithNegatives,
16
17
  )
17
18
 
18
19
 
19
20
  class EnumTest(CUDATestCase):
20
-
21
21
  pairs = [
22
22
  (Color.red, Color.red),
23
23
  (Color.red, Color.green),
24
24
  (Planet.EARTH, Planet.EARTH),
25
25
  (Planet.VENUS, Planet.MARS),
26
- (Shape.circle, IntEnumWithNegatives.two) # IntEnum, same value
26
+ (Shape.circle, IntEnumWithNegatives.two), # IntEnum, same value
27
27
  ]
28
28
 
29
29
  def test_compare(self):
@@ -45,7 +45,7 @@ class EnumTest(CUDATestCase):
45
45
  def f(out):
46
46
  # Lookup of an enum member on its class
47
47
  out[0] = Color.red == Color.green
48
- out[1] = Color['red'] == Color['green']
48
+ out[1] = Color["red"] == Color["green"]
49
49
 
50
50
  cuda_f = cuda.jit(f)
51
51
  got = np.zeros((2,), dtype=np.bool_)
@@ -106,16 +106,33 @@ class EnumTest(CUDATestCase):
106
106
  def test_vectorize(self):
107
107
  def f(x):
108
108
  if x != RequestError.not_found:
109
- return RequestError['internal_error']
109
+ return RequestError["internal_error"]
110
110
  else:
111
111
  return RequestError.dummy
112
112
 
113
- cuda_func = vectorize("int64(int64)", target='cuda')(f)
113
+ cuda_func = vectorize("int64(int64)", target="cuda")(f)
114
114
  arr = np.array([2, 404, 500, 404], dtype=np.int64)
115
115
  expected = np.array([f(x) for x in arr], dtype=np.int64)
116
116
  got = cuda_func(arr)
117
117
  self.assertPreciseEqual(expected, got)
118
118
 
119
+ def test_int_enum_no_conversion(self):
120
+ # Ported from Numba PR #10047: "Fix IntEnumMember.can_convert_to() when
121
+ # no conversions found", https://github.com/numba/numba/pull/10047.
122
+
123
+ # The original test is intended to ensures that
124
+ # IntEnumMember.can_convert_to() handles the case when the typing
125
+ # context's can_convert() method returns None to signal no possible
126
+ # conversion. In Numba-CUDA, we had to patch the CUDA target context to
127
+ # work around this issue, because we cannot guarantee that the
128
+ # IntEnumMember method can be patched before instances are created.
129
+ ctx = cuda.descriptor.cuda_target.typing_context
130
+
131
+ int_enum_type = types.IntEnumMember(Shape, types.int64)
132
+ # Conversion of an int enum member to a 1D array would be invalid
133
+ invalid_toty = types.int64[::1]
134
+ self.assertIsNone(ctx.can_convert(int_enum_type, invalid_toty))
135
+
119
136
 
120
- if __name__ == '__main__':
137
+ if __name__ == "__main__":
121
138
  unittest.main()
@@ -17,34 +17,49 @@ class TestJitErrors(CUDATestCase):
17
17
 
18
18
  with self.assertRaises(ValueError) as raises:
19
19
  kernfunc[(1, 2, 3, 4), (5, 6)]
20
- self.assertIn("griddim must be a sequence of 1, 2 or 3 integers, "
21
- "got [1, 2, 3, 4]",
22
- str(raises.exception))
20
+ self.assertIn(
21
+ "griddim must be a sequence of 1, 2 or 3 integers, "
22
+ "got [1, 2, 3, 4]",
23
+ str(raises.exception),
24
+ )
23
25
 
24
26
  with self.assertRaises(ValueError) as raises:
25
- kernfunc[(1, 2,), (3, 4, 5, 6)]
26
- self.assertIn("blockdim must be a sequence of 1, 2 or 3 integers, "
27
- "got [3, 4, 5, 6]",
28
- str(raises.exception))
27
+ kernfunc[
28
+ (
29
+ 1,
30
+ 2,
31
+ ),
32
+ (3, 4, 5, 6),
33
+ ]
34
+ self.assertIn(
35
+ "blockdim must be a sequence of 1, 2 or 3 integers, "
36
+ "got [3, 4, 5, 6]",
37
+ str(raises.exception),
38
+ )
29
39
 
30
40
  def test_non_integral_dims(self):
31
41
  kernfunc = cuda.jit(noop)
32
42
 
33
43
  with self.assertRaises(TypeError) as raises:
34
44
  kernfunc[2.0, 3]
35
- self.assertIn("griddim must be a sequence of integers, got [2.0]",
36
- str(raises.exception))
45
+ self.assertIn(
46
+ "griddim must be a sequence of integers, got [2.0]",
47
+ str(raises.exception),
48
+ )
37
49
 
38
50
  with self.assertRaises(TypeError) as raises:
39
51
  kernfunc[2, 3.0]
40
- self.assertIn("blockdim must be a sequence of integers, got [3.0]",
41
- str(raises.exception))
52
+ self.assertIn(
53
+ "blockdim must be a sequence of integers, got [3.0]",
54
+ str(raises.exception),
55
+ )
42
56
 
43
57
  def _test_unconfigured(self, kernfunc):
44
58
  with self.assertRaises(ValueError) as raises:
45
59
  kernfunc(0)
46
- self.assertIn("launch configuration was not specified",
47
- str(raises.exception))
60
+ self.assertIn(
61
+ "launch configuration was not specified", str(raises.exception)
62
+ )
48
63
 
49
64
  def test_unconfigured_typed_cudakernel(self):
50
65
  kernfunc = cuda.jit("void(int32)")(noop)
@@ -54,7 +69,7 @@ class TestJitErrors(CUDATestCase):
54
69
  kernfunc = cuda.jit(noop)
55
70
  self._test_unconfigured(kernfunc)
56
71
 
57
- @skip_on_cudasim('TypingError does not occur on simulator')
72
+ @skip_on_cudasim("TypingError does not occur on simulator")
58
73
  def test_typing_error(self):
59
74
  # see #5860, this is present to catch changes to error reporting
60
75
  # accidentally breaking the CUDA target
@@ -75,5 +90,5 @@ class TestJitErrors(CUDATestCase):
75
90
  self.assertIn("NameError: name 'floor' is not defined", excstr)
76
91
 
77
92
 
78
- if __name__ == '__main__':
93
+ if __name__ == "__main__":
79
94
  unittest.main()
@@ -83,20 +83,19 @@ class TestException(CUDATestCase):
83
83
  x[i] += x[i] // y[i]
84
84
 
85
85
  n = 32
86
- got_x = 1. / (np.arange(n) + 0.01)
87
- got_y = 1. / (np.arange(n) + 0.01)
86
+ got_x = 1.0 / (np.arange(n) + 0.01)
87
+ got_y = 1.0 / (np.arange(n) + 0.01)
88
88
  problematic[1, n](got_x, got_y)
89
89
 
90
- expect_x = 1. / (np.arange(n) + 0.01)
91
- expect_y = 1. / (np.arange(n) + 0.01)
90
+ expect_x = 1.0 / (np.arange(n) + 0.01)
91
+ expect_y = 1.0 / (np.arange(n) + 0.01)
92
92
  oracle[1, n](expect_x, expect_y)
93
93
 
94
94
  np.testing.assert_almost_equal(expect_x, got_x)
95
95
  np.testing.assert_almost_equal(expect_y, got_y)
96
96
 
97
97
  def test_raise_causing_warp_diverge(self):
98
- """Test case for issue #2655.
99
- """
98
+ """Test case for issue #2655."""
100
99
  self.case_raise_causing_warp_diverge(with_debug_mode=False)
101
100
 
102
101
  # The following two cases relate to Issue #7806: Division by zero stops the
@@ -117,8 +116,8 @@ class TestException(CUDATestCase):
117
116
 
118
117
  f[1, 1](r, x, y)
119
118
 
120
- self.assertTrue(np.isinf(r[0]), 'Expected inf from div by zero')
121
- self.assertEqual(r[1], y[0], 'Expected execution to continue')
119
+ self.assertTrue(np.isinf(r[0]), "Expected inf from div by zero")
120
+ self.assertEqual(r[1], y[0], "Expected execution to continue")
122
121
 
123
122
  def test_zero_division_error_in_debug(self):
124
123
  # When debug is True:
@@ -146,15 +145,15 @@ class TestException(CUDATestCase):
146
145
  with self.assertRaises(exc):
147
146
  f[1, 1](r, x, y)
148
147
 
149
- self.assertEqual(r[0], 0, 'Expected result to be left unset')
150
- self.assertEqual(r[1], 0, 'Expected execution to stop')
148
+ self.assertEqual(r[0], 0, "Expected result to be left unset")
149
+ self.assertEqual(r[1], 0, "Expected execution to stop")
151
150
 
152
151
  @xfail_unless_cudasim
153
152
  def test_raise_in_device_function(self):
154
153
  # This is an expected failure because reporting of exceptions raised in
155
154
  # device functions does not work correctly - see Issue #8036:
156
155
  # https://github.com/numba/numba/issues/8036
157
- msg = 'Device Function Error'
156
+ msg = "Device Function Error"
158
157
 
159
158
  @cuda.jit(device=True)
160
159
  def f():
@@ -170,5 +169,5 @@ class TestException(CUDATestCase):
170
169
  self.assertIn(msg, str(raises.exception))
171
170
 
172
171
 
173
- if __name__ == '__main__':
172
+ if __name__ == "__main__":
174
173
  unittest.main()
@@ -8,12 +8,13 @@ class Interval:
8
8
  """
9
9
  A half-open interval on the real number line.
10
10
  """
11
+
11
12
  def __init__(self, lo, hi):
12
13
  self.lo = lo
13
14
  self.hi = hi
14
15
 
15
16
  def __repr__(self):
16
- return 'Interval(%f, %f)' % (self.lo, self.hi)
17
+ return "Interval(%f, %f)" % (self.lo, self.hi)
17
18
 
18
19
  @property
19
20
  def width(self):
@@ -32,16 +33,23 @@ def sum_intervals(i, j):
32
33
 
33
34
  if not config.ENABLE_CUDASIM:
34
35
  from numba.core import cgutils
35
- from numba.core.extending import (lower_builtin, make_attribute_wrapper,
36
- models, register_model, type_callable,
37
- typeof_impl)
36
+ from numba.core.extending import (
37
+ lower_builtin,
38
+ models,
39
+ type_callable,
40
+ typeof_impl,
41
+ )
38
42
  from numba.core.typing.templates import AttributeTemplate
39
43
  from numba.cuda.cudadecl import registry as cuda_registry
40
44
  from numba.cuda.cudaimpl import lower_attr as cuda_lower_attr
45
+ from numba.cuda.extending import (
46
+ register_model,
47
+ make_attribute_wrapper,
48
+ )
41
49
 
42
50
  class IntervalType(types.Type):
43
51
  def __init__(self):
44
- super().__init__(name='Interval')
52
+ super().__init__(name="Interval")
45
53
 
46
54
  interval_type = IntervalType()
47
55
 
@@ -54,19 +62,20 @@ if not config.ENABLE_CUDASIM:
54
62
  def typer(lo, hi):
55
63
  if isinstance(lo, types.Float) and isinstance(hi, types.Float):
56
64
  return interval_type
65
+
57
66
  return typer
58
67
 
59
68
  @register_model(IntervalType)
60
69
  class IntervalModel(models.StructModel):
61
70
  def __init__(self, dmm, fe_type):
62
71
  members = [
63
- ('lo', types.float64),
64
- ('hi', types.float64),
72
+ ("lo", types.float64),
73
+ ("hi", types.float64),
65
74
  ]
66
75
  models.StructModel.__init__(self, dmm, fe_type, members)
67
76
 
68
- make_attribute_wrapper(IntervalType, 'lo', 'lo')
69
- make_attribute_wrapper(IntervalType, 'hi', 'hi')
77
+ make_attribute_wrapper(IntervalType, "lo", "lo")
78
+ make_attribute_wrapper(IntervalType, "hi", "hi")
70
79
 
71
80
  @lower_builtin(Interval, types.Float, types.Float)
72
81
  def impl_interval(context, builder, sig, args):
@@ -84,14 +93,14 @@ if not config.ENABLE_CUDASIM:
84
93
  def resolve_width(self, mod):
85
94
  return types.float64
86
95
 
87
- @cuda_lower_attr(IntervalType, 'width')
96
+ @cuda_lower_attr(IntervalType, "width")
88
97
  def cuda_Interval_width(context, builder, sig, arg):
89
98
  lo = builder.extract_value(arg, 0)
90
99
  hi = builder.extract_value(arg, 1)
91
100
  return builder.fsub(hi, lo)
92
101
 
93
102
 
94
- @skip_on_cudasim('Extensions not supported in the simulator')
103
+ @skip_on_cudasim("Extensions not supported in the simulator")
95
104
  class TestExtending(CUDATestCase):
96
105
  def test_attributes(self):
97
106
  @cuda.jit
@@ -151,5 +160,5 @@ class TestExtending(CUDATestCase):
151
160
  np.testing.assert_allclose(r, expected)
152
161
 
153
162
 
154
- if __name__ == '__main__':
163
+ if __name__ == "__main__":
155
164
  unittest.main()