numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -12,18 +12,17 @@ from numba.tests.enum_usecases import (
12
12
  Shape,
13
13
  Planet,
14
14
  RequestError,
15
- IntEnumWithNegatives
15
+ IntEnumWithNegatives,
16
16
  )
17
17
 
18
18
 
19
19
  class EnumTest(CUDATestCase):
20
-
21
20
  pairs = [
22
21
  (Color.red, Color.red),
23
22
  (Color.red, Color.green),
24
23
  (Planet.EARTH, Planet.EARTH),
25
24
  (Planet.VENUS, Planet.MARS),
26
- (Shape.circle, IntEnumWithNegatives.two) # IntEnum, same value
25
+ (Shape.circle, IntEnumWithNegatives.two), # IntEnum, same value
27
26
  ]
28
27
 
29
28
  def test_compare(self):
@@ -45,7 +44,7 @@ class EnumTest(CUDATestCase):
45
44
  def f(out):
46
45
  # Lookup of an enum member on its class
47
46
  out[0] = Color.red == Color.green
48
- out[1] = Color['red'] == Color['green']
47
+ out[1] = Color["red"] == Color["green"]
49
48
 
50
49
  cuda_f = cuda.jit(f)
51
50
  got = np.zeros((2,), dtype=np.bool_)
@@ -106,16 +105,16 @@ class EnumTest(CUDATestCase):
106
105
  def test_vectorize(self):
107
106
  def f(x):
108
107
  if x != RequestError.not_found:
109
- return RequestError['internal_error']
108
+ return RequestError["internal_error"]
110
109
  else:
111
110
  return RequestError.dummy
112
111
 
113
- cuda_func = vectorize("int64(int64)", target='cuda')(f)
112
+ cuda_func = vectorize("int64(int64)", target="cuda")(f)
114
113
  arr = np.array([2, 404, 500, 404], dtype=np.int64)
115
114
  expected = np.array([f(x) for x in arr], dtype=np.int64)
116
115
  got = cuda_func(arr)
117
116
  self.assertPreciseEqual(expected, got)
118
117
 
119
118
 
120
- if __name__ == '__main__':
119
+ if __name__ == "__main__":
121
120
  unittest.main()
@@ -17,34 +17,49 @@ class TestJitErrors(CUDATestCase):
17
17
 
18
18
  with self.assertRaises(ValueError) as raises:
19
19
  kernfunc[(1, 2, 3, 4), (5, 6)]
20
- self.assertIn("griddim must be a sequence of 1, 2 or 3 integers, "
21
- "got [1, 2, 3, 4]",
22
- str(raises.exception))
20
+ self.assertIn(
21
+ "griddim must be a sequence of 1, 2 or 3 integers, "
22
+ "got [1, 2, 3, 4]",
23
+ str(raises.exception),
24
+ )
23
25
 
24
26
  with self.assertRaises(ValueError) as raises:
25
- kernfunc[(1, 2,), (3, 4, 5, 6)]
26
- self.assertIn("blockdim must be a sequence of 1, 2 or 3 integers, "
27
- "got [3, 4, 5, 6]",
28
- str(raises.exception))
27
+ kernfunc[
28
+ (
29
+ 1,
30
+ 2,
31
+ ),
32
+ (3, 4, 5, 6),
33
+ ]
34
+ self.assertIn(
35
+ "blockdim must be a sequence of 1, 2 or 3 integers, "
36
+ "got [3, 4, 5, 6]",
37
+ str(raises.exception),
38
+ )
29
39
 
30
40
  def test_non_integral_dims(self):
31
41
  kernfunc = cuda.jit(noop)
32
42
 
33
43
  with self.assertRaises(TypeError) as raises:
34
44
  kernfunc[2.0, 3]
35
- self.assertIn("griddim must be a sequence of integers, got [2.0]",
36
- str(raises.exception))
45
+ self.assertIn(
46
+ "griddim must be a sequence of integers, got [2.0]",
47
+ str(raises.exception),
48
+ )
37
49
 
38
50
  with self.assertRaises(TypeError) as raises:
39
51
  kernfunc[2, 3.0]
40
- self.assertIn("blockdim must be a sequence of integers, got [3.0]",
41
- str(raises.exception))
52
+ self.assertIn(
53
+ "blockdim must be a sequence of integers, got [3.0]",
54
+ str(raises.exception),
55
+ )
42
56
 
43
57
  def _test_unconfigured(self, kernfunc):
44
58
  with self.assertRaises(ValueError) as raises:
45
59
  kernfunc(0)
46
- self.assertIn("launch configuration was not specified",
47
- str(raises.exception))
60
+ self.assertIn(
61
+ "launch configuration was not specified", str(raises.exception)
62
+ )
48
63
 
49
64
  def test_unconfigured_typed_cudakernel(self):
50
65
  kernfunc = cuda.jit("void(int32)")(noop)
@@ -54,7 +69,7 @@ class TestJitErrors(CUDATestCase):
54
69
  kernfunc = cuda.jit(noop)
55
70
  self._test_unconfigured(kernfunc)
56
71
 
57
- @skip_on_cudasim('TypingError does not occur on simulator')
72
+ @skip_on_cudasim("TypingError does not occur on simulator")
58
73
  def test_typing_error(self):
59
74
  # see #5860, this is present to catch changes to error reporting
60
75
  # accidentally breaking the CUDA target
@@ -75,5 +90,5 @@ class TestJitErrors(CUDATestCase):
75
90
  self.assertIn("NameError: name 'floor' is not defined", excstr)
76
91
 
77
92
 
78
- if __name__ == '__main__':
93
+ if __name__ == "__main__":
79
94
  unittest.main()
@@ -83,20 +83,19 @@ class TestException(CUDATestCase):
83
83
  x[i] += x[i] // y[i]
84
84
 
85
85
  n = 32
86
- got_x = 1. / (np.arange(n) + 0.01)
87
- got_y = 1. / (np.arange(n) + 0.01)
86
+ got_x = 1.0 / (np.arange(n) + 0.01)
87
+ got_y = 1.0 / (np.arange(n) + 0.01)
88
88
  problematic[1, n](got_x, got_y)
89
89
 
90
- expect_x = 1. / (np.arange(n) + 0.01)
91
- expect_y = 1. / (np.arange(n) + 0.01)
90
+ expect_x = 1.0 / (np.arange(n) + 0.01)
91
+ expect_y = 1.0 / (np.arange(n) + 0.01)
92
92
  oracle[1, n](expect_x, expect_y)
93
93
 
94
94
  np.testing.assert_almost_equal(expect_x, got_x)
95
95
  np.testing.assert_almost_equal(expect_y, got_y)
96
96
 
97
97
  def test_raise_causing_warp_diverge(self):
98
- """Test case for issue #2655.
99
- """
98
+ """Test case for issue #2655."""
100
99
  self.case_raise_causing_warp_diverge(with_debug_mode=False)
101
100
 
102
101
  # The following two cases relate to Issue #7806: Division by zero stops the
@@ -117,8 +116,8 @@ class TestException(CUDATestCase):
117
116
 
118
117
  f[1, 1](r, x, y)
119
118
 
120
- self.assertTrue(np.isinf(r[0]), 'Expected inf from div by zero')
121
- self.assertEqual(r[1], y[0], 'Expected execution to continue')
119
+ self.assertTrue(np.isinf(r[0]), "Expected inf from div by zero")
120
+ self.assertEqual(r[1], y[0], "Expected execution to continue")
122
121
 
123
122
  def test_zero_division_error_in_debug(self):
124
123
  # When debug is True:
@@ -146,15 +145,15 @@ class TestException(CUDATestCase):
146
145
  with self.assertRaises(exc):
147
146
  f[1, 1](r, x, y)
148
147
 
149
- self.assertEqual(r[0], 0, 'Expected result to be left unset')
150
- self.assertEqual(r[1], 0, 'Expected execution to stop')
148
+ self.assertEqual(r[0], 0, "Expected result to be left unset")
149
+ self.assertEqual(r[1], 0, "Expected execution to stop")
151
150
 
152
151
  @xfail_unless_cudasim
153
152
  def test_raise_in_device_function(self):
154
153
  # This is an expected failure because reporting of exceptions raised in
155
154
  # device functions does not work correctly - see Issue #8036:
156
155
  # https://github.com/numba/numba/issues/8036
157
- msg = 'Device Function Error'
156
+ msg = "Device Function Error"
158
157
 
159
158
  @cuda.jit(device=True)
160
159
  def f():
@@ -170,5 +169,5 @@ class TestException(CUDATestCase):
170
169
  self.assertIn(msg, str(raises.exception))
171
170
 
172
171
 
173
- if __name__ == '__main__':
172
+ if __name__ == "__main__":
174
173
  unittest.main()
@@ -8,12 +8,13 @@ class Interval:
8
8
  """
9
9
  A half-open interval on the real number line.
10
10
  """
11
+
11
12
  def __init__(self, lo, hi):
12
13
  self.lo = lo
13
14
  self.hi = hi
14
15
 
15
16
  def __repr__(self):
16
- return 'Interval(%f, %f)' % (self.lo, self.hi)
17
+ return "Interval(%f, %f)" % (self.lo, self.hi)
17
18
 
18
19
  @property
19
20
  def width(self):
@@ -32,16 +33,21 @@ def sum_intervals(i, j):
32
33
 
33
34
  if not config.ENABLE_CUDASIM:
34
35
  from numba.core import cgutils
35
- from numba.core.extending import (lower_builtin, make_attribute_wrapper,
36
- models, register_model, type_callable,
37
- typeof_impl)
36
+ from numba.core.extending import (
37
+ lower_builtin,
38
+ make_attribute_wrapper,
39
+ models,
40
+ register_model,
41
+ type_callable,
42
+ typeof_impl,
43
+ )
38
44
  from numba.core.typing.templates import AttributeTemplate
39
45
  from numba.cuda.cudadecl import registry as cuda_registry
40
46
  from numba.cuda.cudaimpl import lower_attr as cuda_lower_attr
41
47
 
42
48
  class IntervalType(types.Type):
43
49
  def __init__(self):
44
- super().__init__(name='Interval')
50
+ super().__init__(name="Interval")
45
51
 
46
52
  interval_type = IntervalType()
47
53
 
@@ -54,19 +60,20 @@ if not config.ENABLE_CUDASIM:
54
60
  def typer(lo, hi):
55
61
  if isinstance(lo, types.Float) and isinstance(hi, types.Float):
56
62
  return interval_type
63
+
57
64
  return typer
58
65
 
59
66
  @register_model(IntervalType)
60
67
  class IntervalModel(models.StructModel):
61
68
  def __init__(self, dmm, fe_type):
62
69
  members = [
63
- ('lo', types.float64),
64
- ('hi', types.float64),
70
+ ("lo", types.float64),
71
+ ("hi", types.float64),
65
72
  ]
66
73
  models.StructModel.__init__(self, dmm, fe_type, members)
67
74
 
68
- make_attribute_wrapper(IntervalType, 'lo', 'lo')
69
- make_attribute_wrapper(IntervalType, 'hi', 'hi')
75
+ make_attribute_wrapper(IntervalType, "lo", "lo")
76
+ make_attribute_wrapper(IntervalType, "hi", "hi")
70
77
 
71
78
  @lower_builtin(Interval, types.Float, types.Float)
72
79
  def impl_interval(context, builder, sig, args):
@@ -84,14 +91,14 @@ if not config.ENABLE_CUDASIM:
84
91
  def resolve_width(self, mod):
85
92
  return types.float64
86
93
 
87
- @cuda_lower_attr(IntervalType, 'width')
94
+ @cuda_lower_attr(IntervalType, "width")
88
95
  def cuda_Interval_width(context, builder, sig, arg):
89
96
  lo = builder.extract_value(arg, 0)
90
97
  hi = builder.extract_value(arg, 1)
91
98
  return builder.fsub(hi, lo)
92
99
 
93
100
 
94
- @skip_on_cudasim('Extensions not supported in the simulator')
101
+ @skip_on_cudasim("Extensions not supported in the simulator")
95
102
  class TestExtending(CUDATestCase):
96
103
  def test_attributes(self):
97
104
  @cuda.jit
@@ -151,5 +158,5 @@ class TestExtending(CUDATestCase):
151
158
  np.testing.assert_allclose(r, expected)
152
159
 
153
160
 
154
- if __name__ == '__main__':
161
+ if __name__ == "__main__":
155
162
  unittest.main()
@@ -5,8 +5,7 @@ from numba.cuda.compiler import compile_ptx_for_current_device, compile_ptx
5
5
  from math import cos, sin, tan, exp, log, log10, log2, pow, tanh
6
6
  from operator import truediv
7
7
  import numpy as np
8
- from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
9
- skip_unless_cc_75)
8
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_75
10
9
  import unittest
11
10
 
12
11
 
@@ -24,10 +23,9 @@ class FastMathCriterion:
24
23
  test.assertTrue(all(i not in prec for i in self.prec_unexpected))
25
24
 
26
25
 
27
- @skip_on_cudasim('Fastmath and PTX inspection not available on cudasim')
26
+ @skip_on_cudasim("Fastmath and PTX inspection not available on cudasim")
28
27
  class TestFastMathOption(CUDATestCase):
29
28
  def _test_fast_math_common(self, pyfunc, sig, device, criterion):
30
-
31
29
  # Test jit code path
32
30
  fastver = cuda.jit(sig, device=device, fastmath=True)(pyfunc)
33
31
  precver = cuda.jit(sig, device=device)(pyfunc)
@@ -40,9 +38,7 @@ class TestFastMathOption(CUDATestCase):
40
38
  fastptx, _ = compile_ptx_for_current_device(
41
39
  pyfunc, sig, device=device, fastmath=True
42
40
  )
43
- precptx, _ = compile_ptx_for_current_device(
44
- pyfunc, sig, device=device
45
- )
41
+ precptx, _ = compile_ptx_for_current_device(pyfunc, sig, device=device)
46
42
 
47
43
  criterion.check(self, fastptx, precptx)
48
44
 
@@ -69,7 +65,9 @@ class TestFastMathOption(CUDATestCase):
69
65
 
70
66
  self._test_fast_math_common(
71
67
  kernel,
72
- (float32[::1], float32, float32), device=False, criterion=criterion
68
+ (float32[::1], float32, float32),
69
+ device=False,
70
+ criterion=criterion,
73
71
  )
74
72
  self._test_fast_math_common(
75
73
  device, (float32, float32), device=True, criterion=criterion
@@ -79,39 +77,41 @@ class TestFastMathOption(CUDATestCase):
79
77
  self._test_fast_math_unary(
80
78
  cos,
81
79
  FastMathCriterion(
82
- fast_expected=['cos.approx.ftz.f32 '],
83
- prec_unexpected=['cos.approx.ftz.f32 ']
84
- )
80
+ fast_expected=["cos.approx.ftz.f32 "],
81
+ prec_unexpected=["cos.approx.ftz.f32 "],
82
+ ),
85
83
  )
86
84
 
87
85
  def test_sinf(self):
88
86
  self._test_fast_math_unary(
89
87
  sin,
90
88
  FastMathCriterion(
91
- fast_expected=['sin.approx.ftz.f32 '],
92
- prec_unexpected=['sin.approx.ftz.f32 ']
93
- )
89
+ fast_expected=["sin.approx.ftz.f32 "],
90
+ prec_unexpected=["sin.approx.ftz.f32 "],
91
+ ),
94
92
  )
95
93
 
96
94
  def test_tanf(self):
97
95
  self._test_fast_math_unary(
98
96
  tan,
99
- FastMathCriterion(fast_expected=[
100
- 'sin.approx.ftz.f32 ',
101
- 'cos.approx.ftz.f32 ',
102
- 'div.approx.ftz.f32 '
103
- ], prec_unexpected=['sin.approx.ftz.f32 '])
97
+ FastMathCriterion(
98
+ fast_expected=[
99
+ "sin.approx.ftz.f32 ",
100
+ "cos.approx.ftz.f32 ",
101
+ "div.approx.ftz.f32 ",
102
+ ],
103
+ prec_unexpected=["sin.approx.ftz.f32 "],
104
+ ),
104
105
  )
105
106
 
106
107
  @skip_unless_cc_75
107
108
  def test_tanhf(self):
108
-
109
109
  self._test_fast_math_unary(
110
110
  tanh,
111
111
  FastMathCriterion(
112
- fast_expected=['tanh.approx.f32 '],
113
- prec_unexpected=['tanh.approx.f32 ']
114
- )
112
+ fast_expected=["tanh.approx.f32 "],
113
+ prec_unexpected=["tanh.approx.f32 "],
114
+ ),
115
115
  )
116
116
 
117
117
  def test_tanhf_compile_ptx(self):
@@ -119,74 +119,85 @@ class TestFastMathOption(CUDATestCase):
119
119
  r[0] = tanh(x)
120
120
 
121
121
  def tanh_common_test(cc, criterion):
122
- fastptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
123
- fastmath=True, cc=cc)
124
- precptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
125
- cc=cc)
122
+ fastptx, _ = compile_ptx(
123
+ tanh_kernel, (float32[::1], float32), fastmath=True, cc=cc
124
+ )
125
+ precptx, _ = compile_ptx(
126
+ tanh_kernel, (float32[::1], float32), cc=cc
127
+ )
126
128
  criterion.check(self, fastptx, precptx)
127
129
 
128
- tanh_common_test(cc=(7, 5), criterion=FastMathCriterion(
129
- fast_expected=['tanh.approx.f32 '],
130
- prec_unexpected=['tanh.approx.f32 ']
131
- ))
130
+ tanh_common_test(
131
+ cc=(7, 5),
132
+ criterion=FastMathCriterion(
133
+ fast_expected=["tanh.approx.f32 "],
134
+ prec_unexpected=["tanh.approx.f32 "],
135
+ ),
136
+ )
132
137
 
133
- tanh_common_test(cc=(7, 0),
134
- criterion=FastMathCriterion(
135
- fast_expected=['ex2.approx.ftz.f32 ',
136
- 'rcp.approx.ftz.f32 '],
137
- prec_unexpected=['tanh.approx.f32 ']))
138
+ tanh_common_test(
139
+ cc=(7, 0),
140
+ criterion=FastMathCriterion(
141
+ fast_expected=["ex2.approx.ftz.f32 ", "rcp.approx.ftz.f32 "],
142
+ prec_unexpected=["tanh.approx.f32 "],
143
+ ),
144
+ )
138
145
 
139
146
  def test_expf(self):
140
147
  self._test_fast_math_unary(
141
148
  exp,
142
149
  FastMathCriterion(
143
- fast_unexpected=['fma.rn.f32 '],
144
- prec_expected=['fma.rn.f32 ']
145
- )
150
+ fast_unexpected=["fma.rn.f32 "], prec_expected=["fma.rn.f32 "]
151
+ ),
146
152
  )
147
153
 
148
154
  def test_logf(self):
149
155
  # Look for constant used to convert from log base 2 to log base e
150
156
  self._test_fast_math_unary(
151
- log, FastMathCriterion(
152
- fast_expected=['lg2.approx.ftz.f32 ', '0f3F317218'],
153
- prec_unexpected=['lg2.approx.ftz.f32 '],
154
- )
157
+ log,
158
+ FastMathCriterion(
159
+ fast_expected=["lg2.approx.ftz.f32 ", "0f3F317218"],
160
+ prec_unexpected=["lg2.approx.ftz.f32 "],
161
+ ),
155
162
  )
156
163
 
157
164
  def test_log10f(self):
158
165
  # Look for constant used to convert from log base 2 to log base 10
159
166
  self._test_fast_math_unary(
160
- log10, FastMathCriterion(
161
- fast_expected=['lg2.approx.ftz.f32 ', '0f3E9A209B'],
162
- prec_unexpected=['lg2.approx.ftz.f32 ']
163
- )
167
+ log10,
168
+ FastMathCriterion(
169
+ fast_expected=["lg2.approx.ftz.f32 ", "0f3E9A209B"],
170
+ prec_unexpected=["lg2.approx.ftz.f32 "],
171
+ ),
164
172
  )
165
173
 
166
174
  def test_log2f(self):
167
175
  self._test_fast_math_unary(
168
- log2, FastMathCriterion(
169
- fast_expected=['lg2.approx.ftz.f32 '],
170
- prec_unexpected=['lg2.approx.ftz.f32 ']
171
- )
176
+ log2,
177
+ FastMathCriterion(
178
+ fast_expected=["lg2.approx.ftz.f32 "],
179
+ prec_unexpected=["lg2.approx.ftz.f32 "],
180
+ ),
172
181
  )
173
182
 
174
183
  def test_powf(self):
175
184
  self._test_fast_math_binary(
176
- pow, FastMathCriterion(
177
- fast_expected=['lg2.approx.ftz.f32 '],
178
- prec_unexpected=['lg2.approx.ftz.f32 '],
179
- )
185
+ pow,
186
+ FastMathCriterion(
187
+ fast_expected=["lg2.approx.ftz.f32 "],
188
+ prec_unexpected=["lg2.approx.ftz.f32 "],
189
+ ),
180
190
  )
181
191
 
182
192
  def test_divf(self):
183
193
  self._test_fast_math_binary(
184
- truediv, FastMathCriterion(
185
- fast_expected=['div.approx.ftz.f32 '],
186
- fast_unexpected=['div.rn.f32'],
187
- prec_expected=['div.rn.f32'],
188
- prec_unexpected=['div.approx.ftz.f32 '],
189
- )
194
+ truediv,
195
+ FastMathCriterion(
196
+ fast_expected=["div.approx.ftz.f32 "],
197
+ fast_unexpected=["div.rn.f32"],
198
+ prec_expected=["div.rn.f32"],
199
+ prec_unexpected=["div.approx.ftz.f32 "],
200
+ ),
190
201
  )
191
202
 
192
203
  def test_divf_exception(self):
@@ -232,13 +243,13 @@ class TestFastMathOption(CUDATestCase):
232
243
  # https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
233
244
 
234
245
  # The fast version should use the "fast, approximate divide" variant
235
- self.assertIn('div.approx.f32', fastver.inspect_asm(sig))
246
+ self.assertIn("div.approx.f32", fastver.inspect_asm(sig))
236
247
  # The precise version should use the "IEEE 754 compliant rounding"
237
248
  # variant, and neither of the "approximate divide" variants.
238
- self.assertIn('div.rn.f32', precver.inspect_asm(sig))
239
- self.assertNotIn('div.approx.f32', precver.inspect_asm(sig))
240
- self.assertNotIn('div.full.f32', precver.inspect_asm(sig))
249
+ self.assertIn("div.rn.f32", precver.inspect_asm(sig))
250
+ self.assertNotIn("div.approx.f32", precver.inspect_asm(sig))
251
+ self.assertNotIn("div.full.f32", precver.inspect_asm(sig))
241
252
 
242
253
 
243
- if __name__ == '__main__':
254
+ if __name__ == "__main__":
244
255
  unittest.main()
@@ -44,9 +44,11 @@ class TestForAll(CUDATestCase):
44
44
  # negative element count.
45
45
  with self.assertRaises(ValueError) as raises:
46
46
  foo.forall(-1)
47
- self.assertIn("Can't create ForAll with negative task count",
48
- str(raises.exception))
47
+ self.assertIn(
48
+ "Can't create ForAll with negative task count",
49
+ str(raises.exception),
50
+ )
49
51
 
50
52
 
51
- if __name__ == '__main__':
53
+ if __name__ == "__main__":
52
54
  unittest.main()
@@ -17,13 +17,15 @@ class TestFreeVar(CUDATestCase):
17
17
  @cuda.jit("(float32[::1], intp)")
18
18
  def foo(A, i):
19
19
  "Dummy function"
20
- sdata = cuda.shared.array(size, # size is freevar
21
- dtype=nbtype) # nbtype is freevar
20
+ sdata = cuda.shared.array(
21
+ size, # size is freevar
22
+ dtype=nbtype,
23
+ ) # nbtype is freevar
22
24
  A[i] = sdata[i]
23
25
 
24
26
  A = np.arange(2, dtype="float32")
25
27
  foo[1, 1](A, 0)
26
28
 
27
29
 
28
- if __name__ == '__main__':
30
+ if __name__ == "__main__":
29
31
  unittest.main()
@@ -62,5 +62,5 @@ class TestCudaFrexpLdexp(CUDATestCase):
62
62
  self.template_test_ldexp(np.float64, float64)
63
63
 
64
64
 
65
- if __name__ == '__main__':
65
+ if __name__ == "__main__":
66
66
  unittest.main()
@@ -29,8 +29,7 @@ def coop_smem2d(ary):
29
29
 
30
30
  class TestCudaTestGlobal(CUDATestCase):
31
31
  def test_global_int_const(self):
32
- """Test simple_smem
33
- """
32
+ """Test simple_smem"""
34
33
  compiled = cuda.jit("void(int32[:])")(simple_smem)
35
34
 
36
35
  nelem = 100
@@ -41,8 +40,7 @@ class TestCudaTestGlobal(CUDATestCase):
41
40
 
42
41
  @unittest.SkipTest
43
42
  def test_global_tuple_const(self):
44
- """Test coop_smem2d
45
- """
43
+ """Test coop_smem2d"""
46
44
  compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
47
45
 
48
46
  shape = 10, 20
@@ -56,5 +54,5 @@ class TestCudaTestGlobal(CUDATestCase):
56
54
  self.assertTrue(np.allclose(ary, exp))
57
55
 
58
56
 
59
- if __name__ == '__main__':
57
+ if __name__ == "__main__":
60
58
  unittest.main()