numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -5,8 +5,7 @@ from numba.cuda.compiler import compile_ptx_for_current_device, compile_ptx
5
5
  from math import cos, sin, tan, exp, log, log10, log2, pow, tanh
6
6
  from operator import truediv
7
7
  import numpy as np
8
- from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
9
- skip_unless_cc_75)
8
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_75
10
9
  import unittest
11
10
 
12
11
 
@@ -24,10 +23,9 @@ class FastMathCriterion:
24
23
  test.assertTrue(all(i not in prec for i in self.prec_unexpected))
25
24
 
26
25
 
27
- @skip_on_cudasim('Fastmath and PTX inspection not available on cudasim')
26
+ @skip_on_cudasim("Fastmath and PTX inspection not available on cudasim")
28
27
  class TestFastMathOption(CUDATestCase):
29
28
  def _test_fast_math_common(self, pyfunc, sig, device, criterion):
30
-
31
29
  # Test jit code path
32
30
  fastver = cuda.jit(sig, device=device, fastmath=True)(pyfunc)
33
31
  precver = cuda.jit(sig, device=device)(pyfunc)
@@ -40,9 +38,7 @@ class TestFastMathOption(CUDATestCase):
40
38
  fastptx, _ = compile_ptx_for_current_device(
41
39
  pyfunc, sig, device=device, fastmath=True
42
40
  )
43
- precptx, _ = compile_ptx_for_current_device(
44
- pyfunc, sig, device=device
45
- )
41
+ precptx, _ = compile_ptx_for_current_device(pyfunc, sig, device=device)
46
42
 
47
43
  criterion.check(self, fastptx, precptx)
48
44
 
@@ -69,7 +65,9 @@ class TestFastMathOption(CUDATestCase):
69
65
 
70
66
  self._test_fast_math_common(
71
67
  kernel,
72
- (float32[::1], float32, float32), device=False, criterion=criterion
68
+ (float32[::1], float32, float32),
69
+ device=False,
70
+ criterion=criterion,
73
71
  )
74
72
  self._test_fast_math_common(
75
73
  device, (float32, float32), device=True, criterion=criterion
@@ -79,39 +77,41 @@ class TestFastMathOption(CUDATestCase):
79
77
  self._test_fast_math_unary(
80
78
  cos,
81
79
  FastMathCriterion(
82
- fast_expected=['cos.approx.ftz.f32 '],
83
- prec_unexpected=['cos.approx.ftz.f32 ']
84
- )
80
+ fast_expected=["cos.approx.ftz.f32 "],
81
+ prec_unexpected=["cos.approx.ftz.f32 "],
82
+ ),
85
83
  )
86
84
 
87
85
  def test_sinf(self):
88
86
  self._test_fast_math_unary(
89
87
  sin,
90
88
  FastMathCriterion(
91
- fast_expected=['sin.approx.ftz.f32 '],
92
- prec_unexpected=['sin.approx.ftz.f32 ']
93
- )
89
+ fast_expected=["sin.approx.ftz.f32 "],
90
+ prec_unexpected=["sin.approx.ftz.f32 "],
91
+ ),
94
92
  )
95
93
 
96
94
  def test_tanf(self):
97
95
  self._test_fast_math_unary(
98
96
  tan,
99
- FastMathCriterion(fast_expected=[
100
- 'sin.approx.ftz.f32 ',
101
- 'cos.approx.ftz.f32 ',
102
- 'div.approx.ftz.f32 '
103
- ], prec_unexpected=['sin.approx.ftz.f32 '])
97
+ FastMathCriterion(
98
+ fast_expected=[
99
+ "sin.approx.ftz.f32 ",
100
+ "cos.approx.ftz.f32 ",
101
+ "div.approx.ftz.f32 ",
102
+ ],
103
+ prec_unexpected=["sin.approx.ftz.f32 "],
104
+ ),
104
105
  )
105
106
 
106
107
  @skip_unless_cc_75
107
108
  def test_tanhf(self):
108
-
109
109
  self._test_fast_math_unary(
110
110
  tanh,
111
111
  FastMathCriterion(
112
- fast_expected=['tanh.approx.f32 '],
113
- prec_unexpected=['tanh.approx.f32 ']
114
- )
112
+ fast_expected=["tanh.approx.f32 "],
113
+ prec_unexpected=["tanh.approx.f32 "],
114
+ ),
115
115
  )
116
116
 
117
117
  def test_tanhf_compile_ptx(self):
@@ -119,74 +119,85 @@ class TestFastMathOption(CUDATestCase):
119
119
  r[0] = tanh(x)
120
120
 
121
121
  def tanh_common_test(cc, criterion):
122
- fastptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
123
- fastmath=True, cc=cc)
124
- precptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
125
- cc=cc)
122
+ fastptx, _ = compile_ptx(
123
+ tanh_kernel, (float32[::1], float32), fastmath=True, cc=cc
124
+ )
125
+ precptx, _ = compile_ptx(
126
+ tanh_kernel, (float32[::1], float32), cc=cc
127
+ )
126
128
  criterion.check(self, fastptx, precptx)
127
129
 
128
- tanh_common_test(cc=(7, 5), criterion=FastMathCriterion(
129
- fast_expected=['tanh.approx.f32 '],
130
- prec_unexpected=['tanh.approx.f32 ']
131
- ))
130
+ tanh_common_test(
131
+ cc=(7, 5),
132
+ criterion=FastMathCriterion(
133
+ fast_expected=["tanh.approx.f32 "],
134
+ prec_unexpected=["tanh.approx.f32 "],
135
+ ),
136
+ )
132
137
 
133
- tanh_common_test(cc=(7, 0),
134
- criterion=FastMathCriterion(
135
- fast_expected=['ex2.approx.ftz.f32 ',
136
- 'rcp.approx.ftz.f32 '],
137
- prec_unexpected=['tanh.approx.f32 ']))
138
+ tanh_common_test(
139
+ cc=(7, 0),
140
+ criterion=FastMathCriterion(
141
+ fast_expected=["ex2.approx.ftz.f32 ", "rcp.approx.ftz.f32 "],
142
+ prec_unexpected=["tanh.approx.f32 "],
143
+ ),
144
+ )
138
145
 
139
146
  def test_expf(self):
140
147
  self._test_fast_math_unary(
141
148
  exp,
142
149
  FastMathCriterion(
143
- fast_unexpected=['fma.rn.f32 '],
144
- prec_expected=['fma.rn.f32 ']
145
- )
150
+ fast_unexpected=["fma.rn.f32 "], prec_expected=["fma.rn.f32 "]
151
+ ),
146
152
  )
147
153
 
148
154
  def test_logf(self):
149
155
  # Look for constant used to convert from log base 2 to log base e
150
156
  self._test_fast_math_unary(
151
- log, FastMathCriterion(
152
- fast_expected=['lg2.approx.ftz.f32 ', '0f3F317218'],
153
- prec_unexpected=['lg2.approx.ftz.f32 '],
154
- )
157
+ log,
158
+ FastMathCriterion(
159
+ fast_expected=["lg2.approx.ftz.f32 ", "0f3F317218"],
160
+ prec_unexpected=["lg2.approx.ftz.f32 "],
161
+ ),
155
162
  )
156
163
 
157
164
  def test_log10f(self):
158
165
  # Look for constant used to convert from log base 2 to log base 10
159
166
  self._test_fast_math_unary(
160
- log10, FastMathCriterion(
161
- fast_expected=['lg2.approx.ftz.f32 ', '0f3E9A209B'],
162
- prec_unexpected=['lg2.approx.ftz.f32 ']
163
- )
167
+ log10,
168
+ FastMathCriterion(
169
+ fast_expected=["lg2.approx.ftz.f32 ", "0f3E9A209B"],
170
+ prec_unexpected=["lg2.approx.ftz.f32 "],
171
+ ),
164
172
  )
165
173
 
166
174
  def test_log2f(self):
167
175
  self._test_fast_math_unary(
168
- log2, FastMathCriterion(
169
- fast_expected=['lg2.approx.ftz.f32 '],
170
- prec_unexpected=['lg2.approx.ftz.f32 ']
171
- )
176
+ log2,
177
+ FastMathCriterion(
178
+ fast_expected=["lg2.approx.ftz.f32 "],
179
+ prec_unexpected=["lg2.approx.ftz.f32 "],
180
+ ),
172
181
  )
173
182
 
174
183
  def test_powf(self):
175
184
  self._test_fast_math_binary(
176
- pow, FastMathCriterion(
177
- fast_expected=['lg2.approx.ftz.f32 '],
178
- prec_unexpected=['lg2.approx.ftz.f32 '],
179
- )
185
+ pow,
186
+ FastMathCriterion(
187
+ fast_expected=["lg2.approx.ftz.f32 "],
188
+ prec_unexpected=["lg2.approx.ftz.f32 "],
189
+ ),
180
190
  )
181
191
 
182
192
  def test_divf(self):
183
193
  self._test_fast_math_binary(
184
- truediv, FastMathCriterion(
185
- fast_expected=['div.approx.ftz.f32 '],
186
- fast_unexpected=['div.rn.f32'],
187
- prec_expected=['div.rn.f32'],
188
- prec_unexpected=['div.approx.ftz.f32 '],
189
- )
194
+ truediv,
195
+ FastMathCriterion(
196
+ fast_expected=["div.approx.ftz.f32 "],
197
+ fast_unexpected=["div.rn.f32"],
198
+ prec_expected=["div.rn.f32"],
199
+ prec_unexpected=["div.approx.ftz.f32 "],
200
+ ),
190
201
  )
191
202
 
192
203
  def test_divf_exception(self):
@@ -232,13 +243,13 @@ class TestFastMathOption(CUDATestCase):
232
243
  # https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
233
244
 
234
245
  # The fast version should use the "fast, approximate divide" variant
235
- self.assertIn('div.approx.f32', fastver.inspect_asm(sig))
246
+ self.assertIn("div.approx.f32", fastver.inspect_asm(sig))
236
247
  # The precise version should use the "IEEE 754 compliant rounding"
237
248
  # variant, and neither of the "approximate divide" variants.
238
- self.assertIn('div.rn.f32', precver.inspect_asm(sig))
239
- self.assertNotIn('div.approx.f32', precver.inspect_asm(sig))
240
- self.assertNotIn('div.full.f32', precver.inspect_asm(sig))
249
+ self.assertIn("div.rn.f32", precver.inspect_asm(sig))
250
+ self.assertNotIn("div.approx.f32", precver.inspect_asm(sig))
251
+ self.assertNotIn("div.full.f32", precver.inspect_asm(sig))
241
252
 
242
253
 
243
- if __name__ == '__main__':
254
+ if __name__ == "__main__":
244
255
  unittest.main()
@@ -44,9 +44,11 @@ class TestForAll(CUDATestCase):
44
44
  # negative element count.
45
45
  with self.assertRaises(ValueError) as raises:
46
46
  foo.forall(-1)
47
- self.assertIn("Can't create ForAll with negative task count",
48
- str(raises.exception))
47
+ self.assertIn(
48
+ "Can't create ForAll with negative task count",
49
+ str(raises.exception),
50
+ )
49
51
 
50
52
 
51
- if __name__ == '__main__':
53
+ if __name__ == "__main__":
52
54
  unittest.main()
@@ -17,13 +17,15 @@ class TestFreeVar(CUDATestCase):
17
17
  @cuda.jit("(float32[::1], intp)")
18
18
  def foo(A, i):
19
19
  "Dummy function"
20
- sdata = cuda.shared.array(size, # size is freevar
21
- dtype=nbtype) # nbtype is freevar
20
+ sdata = cuda.shared.array(
21
+ size, # size is freevar
22
+ dtype=nbtype,
23
+ ) # nbtype is freevar
22
24
  A[i] = sdata[i]
23
25
 
24
26
  A = np.arange(2, dtype="float32")
25
27
  foo[1, 1](A, 0)
26
28
 
27
29
 
28
- if __name__ == '__main__':
30
+ if __name__ == "__main__":
29
31
  unittest.main()
@@ -62,5 +62,5 @@ class TestCudaFrexpLdexp(CUDATestCase):
62
62
  self.template_test_ldexp(np.float64, float64)
63
63
 
64
64
 
65
- if __name__ == '__main__':
65
+ if __name__ == "__main__":
66
66
  unittest.main()
@@ -29,8 +29,7 @@ def coop_smem2d(ary):
29
29
 
30
30
  class TestCudaTestGlobal(CUDATestCase):
31
31
  def test_global_int_const(self):
32
- """Test simple_smem
33
- """
32
+ """Test simple_smem"""
34
33
  compiled = cuda.jit("void(int32[:])")(simple_smem)
35
34
 
36
35
  nelem = 100
@@ -41,8 +40,7 @@ class TestCudaTestGlobal(CUDATestCase):
41
40
 
42
41
  @unittest.SkipTest
43
42
  def test_global_tuple_const(self):
44
- """Test coop_smem2d
45
- """
43
+ """Test coop_smem2d"""
46
44
  compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
47
45
 
48
46
  shape = 10, 20
@@ -56,5 +54,5 @@ class TestCudaTestGlobal(CUDATestCase):
56
54
  self.assertTrue(np.allclose(ary, exp))
57
55
 
58
56
 
59
- if __name__ == '__main__':
57
+ if __name__ == "__main__":
60
58
  unittest.main()