numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -7,12 +7,13 @@ from numba import cuda
7
7
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
8
8
  import unittest
9
9
 
10
- has_mp_get_context = hasattr(mp, 'get_context')
11
- is_unix = os.name == 'posix'
10
+ has_mp_get_context = hasattr(mp, "get_context")
11
+ is_unix = os.name == "posix"
12
12
 
13
13
 
14
14
  def fork_test(q):
15
15
  from numba.cuda.cudadrv.error import CudaDriverError
16
+
16
17
  try:
17
18
  cuda.to_device(np.arange(1))
18
19
  except CudaDriverError as e:
@@ -21,17 +22,17 @@ def fork_test(q):
21
22
  q.put(None)
22
23
 
23
24
 
24
- @skip_on_cudasim('disabled for cudasim')
25
+ @skip_on_cudasim("disabled for cudasim")
25
26
  class TestMultiprocessing(CUDATestCase):
26
- @unittest.skipUnless(has_mp_get_context, 'requires mp.get_context')
27
- @unittest.skipUnless(is_unix, 'requires Unix')
27
+ @unittest.skipUnless(has_mp_get_context, "requires mp.get_context")
28
+ @unittest.skipUnless(is_unix, "requires Unix")
28
29
  def test_fork(self):
29
30
  """
30
31
  Test fork detection.
31
32
  """
32
33
  cuda.current_context() # force cuda initialize
33
34
  # fork in process that also uses CUDA
34
- ctx = mp.get_context('fork')
35
+ ctx = mp.get_context("fork")
35
36
  q = ctx.Queue()
36
37
  proc = ctx.Process(target=fork_test, args=[q])
37
38
  proc.start()
@@ -39,8 +40,8 @@ class TestMultiprocessing(CUDATestCase):
39
40
  proc.join()
40
41
  # there should be an exception raised in the child process
41
42
  self.assertIsNotNone(exc)
42
- self.assertIn('CUDA initialized before forking', str(exc))
43
+ self.assertIn("CUDA initialized before forking", str(exc))
43
44
 
44
45
 
45
- if __name__ == '__main__':
46
+ if __name__ == "__main__":
46
47
  unittest.main()
@@ -3,8 +3,11 @@ import threading
3
3
  import multiprocessing
4
4
  import numpy as np
5
5
  from numba import cuda
6
- from numba.cuda.testing import (skip_on_cudasim, skip_under_cuda_memcheck,
7
- CUDATestCase)
6
+ from numba.cuda.testing import (
7
+ skip_on_cudasim,
8
+ skip_under_cuda_memcheck,
9
+ CUDATestCase,
10
+ )
8
11
  import unittest
9
12
 
10
13
  try:
@@ -15,7 +18,7 @@ else:
15
18
  has_concurrent_futures = True
16
19
 
17
20
 
18
- has_mp_get_context = hasattr(multiprocessing, 'get_context')
21
+ has_mp_get_context = hasattr(multiprocessing, "get_context")
19
22
 
20
23
 
21
24
  def check_concurrent_compiling():
@@ -41,15 +44,14 @@ def spawn_process_entry(q):
41
44
  # Catch anything that goes wrong in the threads
42
45
  except: # noqa: E722
43
46
  msg = traceback.format_exc()
44
- q.put('\n'.join(['', '=' * 80, msg]))
47
+ q.put("\n".join(["", "=" * 80, msg]))
45
48
  else:
46
49
  q.put(None)
47
50
 
48
51
 
49
- @skip_under_cuda_memcheck('Hangs cuda-memcheck')
50
- @skip_on_cudasim('disabled for cudasim')
52
+ @skip_under_cuda_memcheck("Hangs cuda-memcheck")
53
+ @skip_on_cudasim("disabled for cudasim")
51
54
  class TestMultiThreadCompiling(CUDATestCase):
52
-
53
55
  @unittest.skipIf(not has_concurrent_futures, "no concurrent.futures")
54
56
  def test_concurrent_compiling(self):
55
57
  check_concurrent_compiling()
@@ -59,7 +61,7 @@ class TestMultiThreadCompiling(CUDATestCase):
59
61
  # force CUDA context init
60
62
  cuda.get_current_device()
61
63
  # use "spawn" to avoid inheriting the CUDA context
62
- ctx = multiprocessing.get_context('spawn')
64
+ ctx = multiprocessing.get_context("spawn")
63
65
 
64
66
  q = ctx.Queue()
65
67
  p = ctx.Process(target=spawn_process_entry, args=(q,))
@@ -70,7 +72,7 @@ class TestMultiThreadCompiling(CUDATestCase):
70
72
  p.join()
71
73
  if err is not None:
72
74
  raise AssertionError(err)
73
- self.assertEqual(p.exitcode, 0, 'test failed in child process')
75
+ self.assertEqual(p.exitcode, 0, "test failed in child process")
74
76
 
75
77
  def test_invalid_context_error_with_d2h(self):
76
78
  def d2h(arr, out):
@@ -97,5 +99,5 @@ class TestMultiThreadCompiling(CUDATestCase):
97
99
  np.testing.assert_equal(darr.copy_to_host(), arr)
98
100
 
99
101
 
100
- if __name__ == '__main__':
102
+ if __name__ == "__main__":
101
103
  unittest.main()
@@ -45,5 +45,5 @@ class TestCudaNonDet(CUDATestCase):
45
45
  np.testing.assert_array_almost_equal(dF.copy_to_host(), E)
46
46
 
47
47
 
48
- if __name__ == '__main__':
48
+ if __name__ == "__main__":
49
49
  unittest.main()
@@ -1,6 +1,10 @@
1
1
  import numpy as np
2
- from numba.cuda.testing import (unittest, CUDATestCase, skip_unless_cc_53,
3
- skip_on_cudasim)
2
+ from numba.cuda.testing import (
3
+ unittest,
4
+ CUDATestCase,
5
+ skip_unless_cc_53,
6
+ skip_on_cudasim,
7
+ )
4
8
  from numba import cuda
5
9
  from numba.core.types import f2, b1
6
10
  from numba.cuda import compile_ptx
@@ -73,12 +77,12 @@ def simple_fp16_ne(ary, a, b):
73
77
  ary[0] = a != b
74
78
 
75
79
 
76
- @cuda.jit('b1(f2, f2)', device=True)
80
+ @cuda.jit("b1(f2, f2)", device=True)
77
81
  def hlt_func_1(x, y):
78
82
  return x < y
79
83
 
80
84
 
81
- @cuda.jit('b1(f2, f2)', device=True)
85
+ @cuda.jit("b1(f2, f2)", device=True)
82
86
  def hlt_func_2(x, y):
83
87
  return x < y
84
88
 
@@ -116,6 +120,7 @@ class TestOperatorModule(CUDATestCase):
116
120
  """
117
121
  Test if operator module is supported by the CUDA target.
118
122
  """
123
+
119
124
  def operator_template(self, op):
120
125
  @cuda.jit
121
126
  def foo(a, b):
@@ -146,8 +151,12 @@ class TestOperatorModule(CUDATestCase):
146
151
 
147
152
  @skip_unless_cc_53
148
153
  def test_fp16_binary(self):
149
- functions = (simple_fp16add, simple_fp16sub, simple_fp16mul,
150
- simple_fp16_div_scalar)
154
+ functions = (
155
+ simple_fp16add,
156
+ simple_fp16sub,
157
+ simple_fp16mul,
158
+ simple_fp16_div_scalar,
159
+ )
151
160
  ops = (operator.add, operator.sub, operator.mul, operator.truediv)
152
161
 
153
162
  for fn, op in zip(functions, ops):
@@ -162,10 +171,10 @@ class TestOperatorModule(CUDATestCase):
162
171
  expected = op(arg1, arg2)
163
172
  np.testing.assert_allclose(got, expected)
164
173
 
165
- @skip_on_cudasim('Compilation unsupported in the simulator')
174
+ @skip_on_cudasim("Compilation unsupported in the simulator")
166
175
  def test_fp16_binary_ptx(self):
167
176
  functions = (simple_fp16add, simple_fp16sub, simple_fp16mul)
168
- instrs = ('add.f16', 'sub.f16', 'mul.f16')
177
+ instrs = ("add.f16", "sub.f16", "mul.f16")
169
178
  args = (f2[:], f2, f2)
170
179
  for fn, instr in zip(functions, instrs):
171
180
  with self.subTest(instr=instr):
@@ -174,11 +183,14 @@ class TestOperatorModule(CUDATestCase):
174
183
 
175
184
  @skip_unless_cc_53
176
185
  def test_mixed_fp16_binary_arithmetic(self):
177
- functions = (simple_fp16add, simple_fp16sub, simple_fp16mul,
178
- simple_fp16_div_scalar)
186
+ functions = (
187
+ simple_fp16add,
188
+ simple_fp16sub,
189
+ simple_fp16mul,
190
+ simple_fp16_div_scalar,
191
+ )
179
192
  ops = (operator.add, operator.sub, operator.mul, operator.truediv)
180
- types = (np.int8, np.int16, np.int32, np.int64,
181
- np.float32, np.float64)
193
+ types = (np.int8, np.int16, np.int32, np.int64, np.float32, np.float64)
182
194
  for (fn, op), ty in itertools.product(zip(functions, ops), types):
183
195
  with self.subTest(op=op, ty=ty):
184
196
  kernel = cuda.jit(fn)
@@ -192,10 +204,10 @@ class TestOperatorModule(CUDATestCase):
192
204
  expected = op(arg1, arg2)
193
205
  np.testing.assert_allclose(got, expected)
194
206
 
195
- @skip_on_cudasim('Compilation unsupported in the simulator')
207
+ @skip_on_cudasim("Compilation unsupported in the simulator")
196
208
  def test_fp16_inplace_binary_ptx(self):
197
209
  functions = (simple_fp16_iadd, simple_fp16_isub, simple_fp16_imul)
198
- instrs = ('add.f16', 'sub.f16', 'mul.f16')
210
+ instrs = ("add.f16", "sub.f16", "mul.f16")
199
211
  args = (f2[:], f2)
200
212
 
201
213
  for fn, instr in zip(functions, instrs):
@@ -205,8 +217,12 @@ class TestOperatorModule(CUDATestCase):
205
217
 
206
218
  @skip_unless_cc_53
207
219
  def test_fp16_inplace_binary(self):
208
- functions = (simple_fp16_iadd, simple_fp16_isub, simple_fp16_imul,
209
- simple_fp16_idiv)
220
+ functions = (
221
+ simple_fp16_iadd,
222
+ simple_fp16_isub,
223
+ simple_fp16_imul,
224
+ simple_fp16_idiv,
225
+ )
210
226
  ops = (operator.iadd, operator.isub, operator.imul, operator.itruediv)
211
227
 
212
228
  for fn, op in zip(functions, ops):
@@ -236,26 +252,37 @@ class TestOperatorModule(CUDATestCase):
236
252
  expected = op(arg1)
237
253
  np.testing.assert_allclose(got, expected)
238
254
 
239
- @skip_on_cudasim('Compilation unsupported in the simulator')
255
+ @skip_on_cudasim("Compilation unsupported in the simulator")
240
256
  def test_fp16_neg_ptx(self):
241
257
  args = (f2[:], f2)
242
258
  ptx, _ = compile_ptx(simple_fp16neg, args, cc=(5, 3))
243
- self.assertIn('neg.f16', ptx)
259
+ self.assertIn("neg.f16", ptx)
244
260
 
245
- @skip_on_cudasim('Compilation unsupported in the simulator')
261
+ @skip_on_cudasim("Compilation unsupported in the simulator")
246
262
  def test_fp16_abs_ptx(self):
247
263
  args = (f2[:], f2)
248
264
  ptx, _ = compile_ptx(simple_fp16abs, args, cc=(5, 3))
249
265
 
250
- self.assertIn('abs.f16', ptx)
266
+ self.assertIn("abs.f16", ptx)
251
267
 
252
268
  @skip_unless_cc_53
253
269
  def test_fp16_comparison(self):
254
- functions = (simple_fp16_gt, simple_fp16_ge,
255
- simple_fp16_lt, simple_fp16_le,
256
- simple_fp16_eq, simple_fp16_ne)
257
- ops = (operator.gt, operator.ge, operator.lt, operator.le,
258
- operator.eq, operator.ne)
270
+ functions = (
271
+ simple_fp16_gt,
272
+ simple_fp16_ge,
273
+ simple_fp16_lt,
274
+ simple_fp16_le,
275
+ simple_fp16_eq,
276
+ simple_fp16_ne,
277
+ )
278
+ ops = (
279
+ operator.gt,
280
+ operator.ge,
281
+ operator.lt,
282
+ operator.le,
283
+ operator.eq,
284
+ operator.ne,
285
+ )
259
286
 
260
287
  for fn, op in zip(functions, ops):
261
288
  with self.subTest(op=op):
@@ -271,16 +298,25 @@ class TestOperatorModule(CUDATestCase):
271
298
 
272
299
  @skip_unless_cc_53
273
300
  def test_mixed_fp16_comparison(self):
274
- functions = (simple_fp16_gt, simple_fp16_ge,
275
- simple_fp16_lt, simple_fp16_le,
276
- simple_fp16_eq, simple_fp16_ne)
277
- ops = (operator.gt, operator.ge, operator.lt, operator.le,
278
- operator.eq, operator.ne)
279
- types = (np.int8, np.int16, np.int32, np.int64,
280
- np.float32, np.float64)
281
-
282
- for (fn, op), ty in itertools.product(zip(functions, ops),
283
- types):
301
+ functions = (
302
+ simple_fp16_gt,
303
+ simple_fp16_ge,
304
+ simple_fp16_lt,
305
+ simple_fp16_le,
306
+ simple_fp16_eq,
307
+ simple_fp16_ne,
308
+ )
309
+ ops = (
310
+ operator.gt,
311
+ operator.ge,
312
+ operator.lt,
313
+ operator.le,
314
+ operator.eq,
315
+ operator.ne,
316
+ )
317
+ types = (np.int8, np.int16, np.int32, np.int64, np.float32, np.float64)
318
+
319
+ for (fn, op), ty in itertools.product(zip(functions, ops), types):
284
320
  with self.subTest(op=op, ty=ty):
285
321
  kernel = cuda.jit(fn)
286
322
 
@@ -294,48 +330,68 @@ class TestOperatorModule(CUDATestCase):
294
330
 
295
331
  @skip_unless_cc_53
296
332
  def test_multiple_float16_comparisons(self):
297
- functions = (test_multiple_hcmp_1,
298
- test_multiple_hcmp_2,
299
- test_multiple_hcmp_3,
300
- test_multiple_hcmp_4,
301
- test_multiple_hcmp_5)
333
+ functions = (
334
+ test_multiple_hcmp_1,
335
+ test_multiple_hcmp_2,
336
+ test_multiple_hcmp_3,
337
+ test_multiple_hcmp_4,
338
+ test_multiple_hcmp_5,
339
+ )
302
340
  for fn in functions:
303
341
  with self.subTest(fn=fn):
304
342
  compiled = cuda.jit("void(b1[:], f2, f2, f2)")(fn)
305
343
  ary = np.zeros(1, dtype=np.bool_)
306
- arg1 = np.float16(2.)
307
- arg2 = np.float16(3.)
308
- arg3 = np.float16(4.)
344
+ arg1 = np.float16(2.0)
345
+ arg2 = np.float16(3.0)
346
+ arg3 = np.float16(4.0)
309
347
  compiled[1, 1](ary, arg1, arg2, arg3)
310
348
  self.assertTrue(ary[0])
311
349
 
312
350
  @skip_unless_cc_53
313
351
  def test_multiple_float16_comparisons_false(self):
314
- functions = (test_multiple_hcmp_1,
315
- test_multiple_hcmp_2,
316
- test_multiple_hcmp_3,
317
- test_multiple_hcmp_4,
318
- test_multiple_hcmp_5)
352
+ functions = (
353
+ test_multiple_hcmp_1,
354
+ test_multiple_hcmp_2,
355
+ test_multiple_hcmp_3,
356
+ test_multiple_hcmp_4,
357
+ test_multiple_hcmp_5,
358
+ )
319
359
  for fn in functions:
320
360
  with self.subTest(fn=fn):
321
361
  compiled = cuda.jit("void(b1[:], f2, f2, f2)")(fn)
322
362
  ary = np.zeros(1, dtype=np.bool_)
323
- arg1 = np.float16(2.)
324
- arg2 = np.float16(3.)
325
- arg3 = np.float16(1.)
363
+ arg1 = np.float16(2.0)
364
+ arg2 = np.float16(3.0)
365
+ arg3 = np.float16(1.0)
326
366
  compiled[1, 1](ary, arg1, arg2, arg3)
327
367
  self.assertFalse(ary[0])
328
368
 
329
- @skip_on_cudasim('Compilation unsupported in the simulator')
369
+ @skip_on_cudasim("Compilation unsupported in the simulator")
330
370
  def test_fp16_comparison_ptx(self):
331
- functions = (simple_fp16_gt, simple_fp16_ge,
332
- simple_fp16_lt, simple_fp16_le,
333
- simple_fp16_eq, simple_fp16_ne)
334
- ops = (operator.gt, operator.ge, operator.lt, operator.le,
335
- operator.eq, operator.ne)
336
- opstring = ('setp.gt.f16', 'setp.ge.f16',
337
- 'setp.lt.f16', 'setp.le.f16',
338
- 'setp.eq.f16', 'setp.ne.f16')
371
+ functions = (
372
+ simple_fp16_gt,
373
+ simple_fp16_ge,
374
+ simple_fp16_lt,
375
+ simple_fp16_le,
376
+ simple_fp16_eq,
377
+ simple_fp16_ne,
378
+ )
379
+ ops = (
380
+ operator.gt,
381
+ operator.ge,
382
+ operator.lt,
383
+ operator.le,
384
+ operator.eq,
385
+ operator.ne,
386
+ )
387
+ opstring = (
388
+ "setp.gt.f16",
389
+ "setp.ge.f16",
390
+ "setp.lt.f16",
391
+ "setp.le.f16",
392
+ "setp.eq.f16",
393
+ "setp.ne.f16",
394
+ )
339
395
  args = (b1[:], f2, f2)
340
396
 
341
397
  for fn, op, s in zip(functions, ops, opstring):
@@ -343,51 +399,79 @@ class TestOperatorModule(CUDATestCase):
343
399
  ptx, _ = compile_ptx(fn, args, cc=(5, 3))
344
400
  self.assertIn(s, ptx)
345
401
 
346
- @skip_on_cudasim('Compilation unsupported in the simulator')
402
+ @skip_on_cudasim("Compilation unsupported in the simulator")
347
403
  def test_fp16_int8_comparison_ptx(self):
348
404
  # Test that int8 can be safely converted to fp16
349
405
  # in a comparison
350
- functions = (simple_fp16_gt, simple_fp16_ge,
351
- simple_fp16_lt, simple_fp16_le,
352
- simple_fp16_eq, simple_fp16_ne)
353
- ops = (operator.gt, operator.ge, operator.lt, operator.le,
354
- operator.eq, operator.ne)
355
-
356
- opstring = {operator.gt:'setp.gt.f16',
357
- operator.ge:'setp.ge.f16',
358
- operator.lt:'setp.lt.f16',
359
- operator.le:'setp.le.f16',
360
- operator.eq:'setp.eq.f16',
361
- operator.ne:'setp.ne.f16'}
406
+ functions = (
407
+ simple_fp16_gt,
408
+ simple_fp16_ge,
409
+ simple_fp16_lt,
410
+ simple_fp16_le,
411
+ simple_fp16_eq,
412
+ simple_fp16_ne,
413
+ )
414
+ ops = (
415
+ operator.gt,
416
+ operator.ge,
417
+ operator.lt,
418
+ operator.le,
419
+ operator.eq,
420
+ operator.ne,
421
+ )
422
+
423
+ opstring = {
424
+ operator.gt: "setp.gt.f16",
425
+ operator.ge: "setp.ge.f16",
426
+ operator.lt: "setp.lt.f16",
427
+ operator.le: "setp.le.f16",
428
+ operator.eq: "setp.eq.f16",
429
+ operator.ne: "setp.ne.f16",
430
+ }
362
431
  for fn, op in zip(functions, ops):
363
432
  with self.subTest(op=op):
364
433
  args = (b1[:], f2, from_dtype(np.int8))
365
434
  ptx, _ = compile_ptx(fn, args, cc=(5, 3))
366
435
  self.assertIn(opstring[op], ptx)
367
436
 
368
- @skip_on_cudasim('Compilation unsupported in the simulator')
437
+ @skip_on_cudasim("Compilation unsupported in the simulator")
369
438
  def test_mixed_fp16_comparison_promotion_ptx(self):
370
- functions = (simple_fp16_gt, simple_fp16_ge,
371
- simple_fp16_lt, simple_fp16_le,
372
- simple_fp16_eq, simple_fp16_ne)
373
- ops = (operator.gt, operator.ge, operator.lt, operator.le,
374
- operator.eq, operator.ne)
375
-
376
- types_promote = (np.int16, np.int32, np.int64,
377
- np.float32, np.float64)
378
- opstring = {operator.gt:'setp.gt.',
379
- operator.ge:'setp.ge.',
380
- operator.lt:'setp.lt.',
381
- operator.le:'setp.le.',
382
- operator.eq:'setp.eq.',
383
- operator.ne:'setp.neu.'}
384
- opsuffix = {np.dtype('int32'): 'f64',
385
- np.dtype('int64'): 'f64',
386
- np.dtype('float32'): 'f32',
387
- np.dtype('float64'): 'f64'}
388
-
389
- for (fn, op), ty in itertools.product(zip(functions, ops),
390
- types_promote):
439
+ functions = (
440
+ simple_fp16_gt,
441
+ simple_fp16_ge,
442
+ simple_fp16_lt,
443
+ simple_fp16_le,
444
+ simple_fp16_eq,
445
+ simple_fp16_ne,
446
+ )
447
+ ops = (
448
+ operator.gt,
449
+ operator.ge,
450
+ operator.lt,
451
+ operator.le,
452
+ operator.eq,
453
+ operator.ne,
454
+ )
455
+
456
+ types_promote = (np.int16, np.int32, np.int64, np.float32, np.float64)
457
+ opstring = {
458
+ operator.gt: "setp.gt.",
459
+ operator.ge: "setp.ge.",
460
+ operator.lt: "setp.lt.",
461
+ operator.le: "setp.le.",
462
+ operator.eq: "setp.eq.",
463
+ operator.ne: "setp.neu.",
464
+ }
465
+ opsuffix = {
466
+ np.dtype("int32"): "f64",
467
+ np.dtype("int64"): "f64",
468
+ np.dtype("float32"): "f32",
469
+ np.dtype("float64"): "f64",
470
+ }
471
+
472
+ for (fn, op), ty in itertools.product(
473
+ zip(functions, ops), types_promote
474
+ ):
391
475
  with self.subTest(op=op, ty=ty):
392
476
  arg2_ty = np.result_type(np.float16, ty)
393
477
  args = (b1[:], f2, from_dtype(arg2_ty))
@@ -397,5 +481,5 @@ class TestOperatorModule(CUDATestCase):
397
481
  self.assertIn(ops, ptx)
398
482
 
399
483
 
400
- if __name__ == '__main__':
484
+ if __name__ == "__main__":
401
485
  unittest.main()
@@ -18,10 +18,10 @@ def device_func(x, y, z):
18
18
  # the test function were more complex it may be possible to isolate additional
19
19
  # fragments of PTX we could check for the absence / presence of, but removal of
20
20
  # the use of local memory is a good indicator that optimization was applied.
21
- removed_by_opt = ( '__local_depot0',)
21
+ removed_by_opt = ("__local_depot0",)
22
22
 
23
23
 
24
- @skip_on_cudasim('Simulator does not optimize code')
24
+ @skip_on_cudasim("Simulator does not optimize code")
25
25
  class TestOptimization(CUDATestCase):
26
26
  def test_eager_opt(self):
27
27
  # Optimization should occur by default
@@ -74,7 +74,7 @@ class TestOptimization(CUDATestCase):
74
74
  sig = (float64, float64, float64)
75
75
  device = cuda.jit(sig, device=True)(device_func)
76
76
  ptx = device.inspect_asm(sig)
77
- self.assertIn('fma.rn.f64', ptx)
77
+ self.assertIn("fma.rn.f64", ptx)
78
78
 
79
79
  def test_device_noopt(self):
80
80
  # Optimization disabled
@@ -82,8 +82,8 @@ class TestOptimization(CUDATestCase):
82
82
  device = cuda.jit(sig, device=True, opt=False)(device_func)
83
83
  ptx = device.inspect_asm(sig)
84
84
  # Fused-multiply adds should be disabled when not optimizing
85
- self.assertNotIn('fma.rn.f64', ptx)
85
+ self.assertNotIn("fma.rn.f64", ptx)
86
86
 
87
87
 
88
- if __name__ == '__main__':
88
+ if __name__ == "__main__":
89
89
  unittest.main()