numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,12 @@ import sys
3
3
  import subprocess
4
4
  import threading
5
5
  from numba import cuda
6
- from numba.cuda.testing import (unittest, CUDATestCase, skip_on_cudasim,
7
- skip_under_cuda_memcheck)
6
+ from numba.cuda.testing import (
7
+ unittest,
8
+ CUDATestCase,
9
+ skip_on_cudasim,
10
+ skip_under_cuda_memcheck,
11
+ )
8
12
  from numba.tests.support import captured_stdout
9
13
 
10
14
 
@@ -14,21 +18,19 @@ class TestCudaDetect(CUDATestCase):
14
18
  with captured_stdout() as out:
15
19
  cuda.detect()
16
20
  output = out.getvalue()
17
- self.assertIn('Found', output)
18
- self.assertIn('CUDA devices', output)
21
+ self.assertIn("Found", output)
22
+ self.assertIn("CUDA devices", output)
19
23
 
20
24
 
21
- @skip_under_cuda_memcheck('Hangs cuda-memcheck')
25
+ @skip_under_cuda_memcheck("Hangs cuda-memcheck")
22
26
  class TestCUDAFindLibs(CUDATestCase):
23
-
24
27
  def run_cmd(self, cmdline, env):
25
- popen = subprocess.Popen(cmdline,
26
- stdout=subprocess.PIPE,
27
- stderr=subprocess.PIPE,
28
- env=env)
28
+ popen = subprocess.Popen(
29
+ cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
30
+ )
29
31
 
30
32
  # finish in 5 minutes or kill it
31
- timeout = threading.Timer(5 * 60., popen.kill)
33
+ timeout = threading.Timer(5 * 60.0, popen.kill)
32
34
  try:
33
35
  timeout.start()
34
36
  out, err = popen.communicate()
@@ -51,8 +53,8 @@ class TestCUDAFindLibs(CUDATestCase):
51
53
  cmdline = [sys.executable, "-c", code]
52
54
  return self.run_cmd(cmdline, env_copy)
53
55
 
54
- @skip_on_cudasim('Simulator does not hit device library search code path')
55
- @unittest.skipIf(not sys.platform.startswith('linux'), "linux only")
56
+ @skip_on_cudasim("Simulator does not hit device library search code path")
57
+ @unittest.skipIf(not sys.platform.startswith("linux"), "linux only")
56
58
  def test_cuda_find_lib_errors(self):
57
59
  """
58
60
  This tests that the find_libs works as expected in the case of an
@@ -60,7 +62,7 @@ class TestCUDAFindLibs(CUDATestCase):
60
62
  """
61
63
  # one of these is likely to exist on linux, it's also unlikely that
62
64
  # someone has extracted the contents of libdevice into here!
63
- locs = ['lib', 'lib64']
65
+ locs = ["lib", "lib64"]
64
66
 
65
67
  looking_for = None
66
68
  for l in locs:
@@ -71,11 +73,12 @@ class TestCUDAFindLibs(CUDATestCase):
71
73
  # This is the testing part, the test will only run if there's a valid
72
74
  # path in which to look
73
75
  if looking_for is not None:
74
- out, err = self.run_test_in_separate_process("NUMBA_CUDA_DRIVER",
75
- looking_for)
76
+ out, err = self.run_test_in_separate_process(
77
+ "NUMBA_CUDA_DRIVER", looking_for
78
+ )
76
79
  self.assertTrue(out is not None)
77
80
  self.assertTrue(err is not None)
78
81
 
79
82
 
80
- if __name__ == '__main__':
83
+ if __name__ == "__main__":
81
84
  unittest.main()
@@ -8,6 +8,7 @@ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
8
8
  from numba.tests.support import linux_only
9
9
 
10
10
  if not config.ENABLE_CUDASIM:
11
+
11
12
  class DeviceOnlyEMMPlugin(cuda.HostOnlyCUDAMemoryManager):
12
13
  """
13
14
  Dummy EMM Plugin implementation for testing. It memorises which plugin
@@ -56,8 +57,9 @@ if not config.ENABLE_CUDASIM:
56
57
  # the reference count drops to zero.
57
58
  ctx = weakref.proxy(self.context)
58
59
  ptr = ctypes.c_void_p(alloc_count)
59
- return cuda.cudadrv.driver.AutoFreePointer(ctx, ptr, size,
60
- finalizer=finalizer)
60
+ return cuda.cudadrv.driver.AutoFreePointer(
61
+ ctx, ptr, size, finalizer=finalizer
62
+ )
61
63
 
62
64
  def initialize(self):
63
65
  # No special initialization needed.
@@ -97,7 +99,7 @@ if not config.ENABLE_CUDASIM:
97
99
  return 2
98
100
 
99
101
 
100
- @skip_on_cudasim('EMM Plugins not supported on CUDA simulator')
102
+ @skip_on_cudasim("EMM Plugins not supported on CUDA simulator")
101
103
  class TestDeviceOnlyEMMPlugin(CUDATestCase):
102
104
  """
103
105
  Tests that the API of an EMM Plugin that implements device allocations
@@ -175,7 +177,7 @@ class TestDeviceOnlyEMMPlugin(CUDATestCase):
175
177
  self.assertIn("Dummy IPC handle for alloc 1", ipch._ipc_handle)
176
178
 
177
179
 
178
- @skip_on_cudasim('EMM Plugins not supported on CUDA simulator')
180
+ @skip_on_cudasim("EMM Plugins not supported on CUDA simulator")
179
181
  class TestBadEMMPluginVersion(CUDATestCase):
180
182
  """
181
183
  Ensure that Numba rejects EMM Plugins with incompatible version
@@ -185,8 +187,8 @@ class TestBadEMMPluginVersion(CUDATestCase):
185
187
  def test_bad_plugin_version(self):
186
188
  with self.assertRaises(RuntimeError) as raises:
187
189
  cuda.set_memory_manager(BadVersionEMMPlugin)
188
- self.assertIn('version 1 required', str(raises.exception))
190
+ self.assertIn("version 1 required", str(raises.exception))
189
191
 
190
192
 
191
- if __name__ == '__main__':
193
+ if __name__ == "__main__":
192
194
  unittest.main()
@@ -34,5 +34,5 @@ class TestCudaEvent(CUDATestCase):
34
34
  evtstart.elapsed_time(evtend)
35
35
 
36
36
 
37
- if __name__ == '__main__':
37
+ if __name__ == "__main__":
38
38
  unittest.main()
@@ -10,10 +10,9 @@ class TestHostAlloc(ContextResettingTestCase):
10
10
  mem = cuda.current_context().memhostalloc(n, mapped=True)
11
11
 
12
12
  dtype = np.dtype(np.uint8)
13
- ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype,
14
- buffer=mem)
13
+ ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype, buffer=mem)
15
14
 
16
- magic = 0xab
15
+ magic = 0xAB
17
16
  driver.device_memset(mem, magic, n)
18
17
 
19
18
  self.assertTrue(np.all(ary == magic))
@@ -46,8 +45,10 @@ class TestHostAlloc(ContextResettingTestCase):
46
45
  self.assertTrue(sum(ary != 0) == 0)
47
46
 
48
47
  def test_host_operators(self):
49
- for ary in [cuda.mapped_array(10, dtype=np.uint32),
50
- cuda.pinned_array(10, dtype=np.uint32)]:
48
+ for ary in [
49
+ cuda.mapped_array(10, dtype=np.uint32),
50
+ cuda.pinned_array(10, dtype=np.uint32),
51
+ ]:
51
52
  ary[:] = range(10)
52
53
  self.assertTrue(sum(ary + 1) == 55)
53
54
  self.assertTrue(sum((ary + 1) * 2 - 1) == 100)
@@ -55,11 +56,11 @@ class TestHostAlloc(ContextResettingTestCase):
55
56
  self.assertTrue(sum(ary <= 5) == 6)
56
57
  self.assertTrue(sum(ary > 6) == 3)
57
58
  self.assertTrue(sum(ary >= 6) == 4)
58
- self.assertTrue(sum(ary ** 2) == 285)
59
+ self.assertTrue(sum(ary**2) == 285)
59
60
  self.assertTrue(sum(ary // 2) == 20)
60
61
  self.assertTrue(sum(ary / 2.0) == 22.5)
61
62
  self.assertTrue(sum(ary % 2) == 5)
62
63
 
63
64
 
64
- if __name__ == '__main__':
65
+ if __name__ == "__main__":
65
66
  unittest.main()
@@ -9,7 +9,7 @@ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
9
9
 
10
10
  # A mock of cuInit that always raises a CudaAPIError
11
11
  def cuInit_raising(arg):
12
- raise CudaAPIError(999, 'CUDA_ERROR_UNKNOWN')
12
+ raise CudaAPIError(999, "CUDA_ERROR_UNKNOWN")
13
13
 
14
14
 
15
15
  # Test code to run in a child that patches driver.cuInit to a variant that
@@ -82,45 +82,45 @@ def cuda_disabled_error_test(result_queue):
82
82
  result_queue.put((success, msg))
83
83
 
84
84
 
85
- @skip_on_cudasim('CUDA Simulator does not initialize driver')
85
+ @skip_on_cudasim("CUDA Simulator does not initialize driver")
86
86
  class TestInit(CUDATestCase):
87
87
  def _test_init_failure(self, target, expected):
88
88
  # Run the initialization failure test in a separate subprocess
89
- ctx = mp.get_context('spawn')
89
+ ctx = mp.get_context("spawn")
90
90
  result_queue = ctx.Queue()
91
91
  proc = ctx.Process(target=target, args=(result_queue,))
92
92
  proc.start()
93
- proc.join(30) # should complete within 30s
93
+ proc.join(30) # should complete within 30s
94
94
  success, msg = result_queue.get()
95
95
 
96
96
  # Ensure the child process raised an exception during initialization
97
97
  # before checking the message
98
98
  if not success:
99
- self.fail('CudaSupportError not raised')
99
+ self.fail("CudaSupportError not raised")
100
100
 
101
101
  self.assertIn(expected, msg)
102
102
 
103
103
  def test_init_failure_raising(self):
104
- expected = 'Error at driver init: CUDA_ERROR_UNKNOWN (999)'
104
+ expected = "Error at driver init: CUDA_ERROR_UNKNOWN (999)"
105
105
  self._test_init_failure(cuInit_raising_test, expected)
106
106
 
107
107
  def test_init_failure_error(self):
108
- expected = 'CUDA_ERROR_UNKNOWN (999)'
108
+ expected = "CUDA_ERROR_UNKNOWN (999)"
109
109
  self._test_init_failure(initialization_error_test, expected)
110
110
 
111
111
  def _test_cuda_disabled(self, target):
112
112
  # Uses _test_init_failure to launch the test in a separate subprocess
113
113
  # with CUDA disabled.
114
- cuda_disabled = os.environ.get('NUMBA_DISABLE_CUDA')
115
- os.environ['NUMBA_DISABLE_CUDA'] = "1"
114
+ cuda_disabled = os.environ.get("NUMBA_DISABLE_CUDA")
115
+ os.environ["NUMBA_DISABLE_CUDA"] = "1"
116
116
  try:
117
- expected = 'CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1'
117
+ expected = "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1"
118
118
  self._test_init_failure(cuda_disabled_test, expected)
119
119
  finally:
120
120
  if cuda_disabled is not None:
121
- os.environ['NUMBA_DISABLE_CUDA'] = cuda_disabled
121
+ os.environ["NUMBA_DISABLE_CUDA"] = cuda_disabled
122
122
  else:
123
- os.environ.pop('NUMBA_DISABLE_CUDA')
123
+ os.environ.pop("NUMBA_DISABLE_CUDA")
124
124
 
125
125
  def test_cuda_disabled_raising(self):
126
126
  self._test_cuda_disabled(cuda_disabled_test)
@@ -135,5 +135,5 @@ class TestInit(CUDATestCase):
135
135
  self.assertIsNone(cuda.cuda_error())
136
136
 
137
137
 
138
- if __name__ == '__main__':
138
+ if __name__ == "__main__":
139
139
  unittest.main()
@@ -5,20 +5,23 @@ from numba.cuda.testing import unittest, ContextResettingTestCase
5
5
  from numba.cuda.testing import skip_on_cudasim
6
6
 
7
7
 
8
- @skip_on_cudasim('Inline PTX cannot be used in the simulator')
8
+ @skip_on_cudasim("Inline PTX cannot be used in the simulator")
9
9
  class TestCudaInlineAsm(ContextResettingTestCase):
10
10
  def test_inline_rsqrt(self):
11
11
  mod = ir.Module(__name__)
12
- mod.triple = 'nvptx64-nvidia-cuda'
12
+ mod.triple = "nvptx64-nvidia-cuda"
13
13
  nvvm.add_ir_version(mod)
14
14
  fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())])
15
- fn = ir.Function(mod, fnty, 'cu_rsqrt')
16
- bldr = ir.IRBuilder(fn.append_basic_block('entry'))
15
+ fn = ir.Function(mod, fnty, "cu_rsqrt")
16
+ bldr = ir.IRBuilder(fn.append_basic_block("entry"))
17
17
 
18
18
  rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
19
- inlineasm = ir.InlineAsm(rsqrt_approx_fnty,
20
- 'rsqrt.approx.f32 $0, $1;',
21
- '=f,f', side_effect=True)
19
+ inlineasm = ir.InlineAsm(
20
+ rsqrt_approx_fnty,
21
+ "rsqrt.approx.f32 $0, $1;",
22
+ "=f,f",
23
+ side_effect=True,
24
+ )
22
25
  val = bldr.load(fn.args[0])
23
26
  res = bldr.call(inlineasm, [val])
24
27
 
@@ -30,8 +33,8 @@ class TestCudaInlineAsm(ContextResettingTestCase):
30
33
  nvvm.set_cuda_kernel(fn)
31
34
  nvvmir = str(mod)
32
35
  ptx = nvvm.compile_ir(nvvmir)
33
- self.assertTrue('rsqrt.approx.f32' in str(ptx))
36
+ self.assertTrue("rsqrt.approx.f32" in str(ptx))
34
37
 
35
38
 
36
- if __name__ == '__main__':
39
+ if __name__ == "__main__":
37
40
  unittest.main()
@@ -1,10 +1,9 @@
1
1
  import numpy as np
2
2
  import warnings
3
3
  from numba.cuda.testing import unittest
4
- from numba.cuda.testing import (skip_on_cudasim, skip_if_cuda_includes_missing)
4
+ from numba.cuda.testing import skip_on_cudasim, skip_if_cuda_includes_missing
5
5
  from numba.cuda.testing import CUDATestCase, test_data_dir
6
- from numba.cuda.cudadrv.driver import (CudaAPIError, Linker,
7
- LinkerError)
6
+ from numba.cuda.cudadrv.driver import CudaAPIError, Linker, LinkerError
8
7
  from numba.cuda.cudadrv.error import NvrtcError
9
8
  from numba.cuda import require_context
10
9
  from numba.tests.support import ignore_internal_warnings
@@ -103,25 +102,24 @@ def simple_lmem(A, B, dty):
103
102
  B[i] = C[i]
104
103
 
105
104
 
106
- @skip_on_cudasim('Linking unsupported in the simulator')
105
+ @skip_on_cudasim("Linking unsupported in the simulator")
107
106
  class TestLinker(CUDATestCase):
108
- _NUMBA_NVIDIA_BINDING_0_ENV = {'NUMBA_CUDA_USE_NVIDIA_BINDING': '0'}
107
+ _NUMBA_NVIDIA_BINDING_0_ENV = {"NUMBA_CUDA_USE_NVIDIA_BINDING": "0"}
109
108
 
110
109
  @require_context
111
110
  def test_linker_basic(self):
112
- '''Simply go through the constructor and destructor
113
- '''
111
+ """Simply go through the constructor and destructor"""
114
112
  linker = Linker.new(cc=(5, 3))
115
113
  del linker
116
114
 
117
115
  def _test_linking(self, eager):
118
116
  global bar # must be a global; other it is recognized as a freevar
119
- bar = cuda.declare_device('bar', 'int32(int32)')
117
+ bar = cuda.declare_device("bar", "int32(int32)")
120
118
 
121
- link = str(test_data_dir / 'jitlink.ptx')
119
+ link = str(test_data_dir / "jitlink.ptx")
122
120
 
123
121
  if eager:
124
- args = ['void(int32[:], int32[:])']
122
+ args = ["void(int32[:], int32[:])"]
125
123
  else:
126
124
  args = []
127
125
 
@@ -144,9 +142,9 @@ class TestLinker(CUDATestCase):
144
142
  self._test_linking(eager=True)
145
143
 
146
144
  def test_linking_cu(self):
147
- bar = cuda.declare_device('bar', 'int32(int32)')
145
+ bar = cuda.declare_device("bar", "int32(int32)")
148
146
 
149
- link = str(test_data_dir / 'jitlink.cu')
147
+ link = str(test_data_dir / "jitlink.cu")
150
148
 
151
149
  @cuda.jit(link=[link])
152
150
  def kernel(r, x):
@@ -165,36 +163,37 @@ class TestLinker(CUDATestCase):
165
163
  np.testing.assert_array_equal(r, expected)
166
164
 
167
165
  def test_linking_cu_log_warning(self):
168
- bar = cuda.declare_device('bar', 'int32(int32)')
166
+ bar = cuda.declare_device("bar", "int32(int32)")
169
167
 
170
- link = str(test_data_dir / 'warn.cu')
168
+ link = str(test_data_dir / "warn.cu")
171
169
 
172
170
  with warnings.catch_warnings(record=True) as w:
173
171
  ignore_internal_warnings()
174
172
 
175
- @cuda.jit('void(int32)', link=[link])
173
+ @cuda.jit("void(int32)", link=[link])
176
174
  def kernel(x):
177
175
  bar(x)
178
176
 
179
- self.assertEqual(len(w), 1, 'Expected warnings from NVRTC')
177
+ self.assertEqual(len(w), 1, "Expected warnings from NVRTC")
180
178
  # Check the warning refers to the log messages
181
- self.assertIn('NVRTC log messages', str(w[0].message))
179
+ self.assertIn("NVRTC log messages", str(w[0].message))
182
180
  # Check the message pertaining to the unused variable is provided
183
- self.assertIn('declared but never referenced', str(w[0].message))
181
+ self.assertIn("declared but never referenced", str(w[0].message))
184
182
 
185
183
  def test_linking_cu_error(self):
186
- bar = cuda.declare_device('bar', 'int32(int32)')
184
+ bar = cuda.declare_device("bar", "int32(int32)")
187
185
 
188
- link = str(test_data_dir / 'error.cu')
186
+ link = str(test_data_dir / "error.cu")
189
187
 
190
188
  with self.assertRaises(NvrtcError) as e:
191
- @cuda.jit('void(int32)', link=[link])
189
+
190
+ @cuda.jit("void(int32)", link=[link])
192
191
  def kernel(x):
193
192
  bar(x)
194
193
 
195
194
  msg = e.exception.args[0]
196
195
  # Check the error message refers to the NVRTC compile
197
- self.assertIn('NVRTC Compilation failure', msg)
196
+ self.assertIn("NVRTC Compilation failure", msg)
198
197
  # Check the expected error in the CUDA source is reported
199
198
  self.assertIn('identifier "SYNTAX" is undefined', msg)
200
199
  # Check the filename is reported correctly
@@ -203,33 +202,37 @@ class TestLinker(CUDATestCase):
203
202
  def test_linking_unknown_filetype_error(self):
204
203
  expected_err = "Don't know how to link file with extension .cuh"
205
204
  with self.assertRaisesRegex(RuntimeError, expected_err):
206
- @cuda.jit('void()', link=['header.cuh'])
205
+
206
+ @cuda.jit("void()", link=["header.cuh"])
207
207
  def kernel():
208
208
  pass
209
209
 
210
210
  def test_linking_file_with_no_extension_error(self):
211
211
  expected_err = "Don't know how to link file with no extension"
212
212
  with self.assertRaisesRegex(RuntimeError, expected_err):
213
- @cuda.jit('void()', link=['data'])
213
+
214
+ @cuda.jit("void()", link=["data"])
214
215
  def kernel():
215
216
  pass
216
217
 
217
218
  @skip_if_cuda_includes_missing
218
219
  def test_linking_cu_cuda_include(self):
219
- link = str(test_data_dir / 'cuda_include.cu')
220
+ link = str(test_data_dir / "cuda_include.cu")
220
221
 
221
222
  # An exception will be raised when linking this kernel due to the
222
223
  # compile failure if CUDA includes cannot be found by Nvrtc.
223
- @cuda.jit('void()', link=[link])
224
+ @cuda.jit("void()", link=[link])
224
225
  def kernel():
225
226
  pass
226
227
 
227
228
  def test_try_to_link_nonexistent(self):
228
229
  with self.assertRaises(LinkerError) as e:
229
- @cuda.jit('void(int32[::1])', link=['nonexistent.a'])
230
+
231
+ @cuda.jit("void(int32[::1])", link=["nonexistent.a"])
230
232
  def f(x):
231
233
  x[0] = 0
232
- self.assertIn('nonexistent.a not found', e.exception.args)
234
+
235
+ self.assertIn("nonexistent.a not found", e.exception.args)
233
236
 
234
237
  def test_set_registers_no_max(self):
235
238
  """Ensure that the jitted kernel used in the test_set_registers_* tests
@@ -276,7 +279,8 @@ class TestLinker(CUDATestCase):
276
279
  def test_get_shared_mem_per_specialized(self):
277
280
  compiled = cuda.jit(simple_smem)
278
281
  compiled_specialized = compiled.specialize(
279
- np.zeros(100, dtype=np.int32), np.float64)
282
+ np.zeros(100, dtype=np.int32), np.float64
283
+ )
280
284
  shared_mem_size = compiled_specialized.get_shared_mem_per_block()
281
285
  self.assertEqual(shared_mem_size, 800)
282
286
 
@@ -307,11 +311,12 @@ class TestLinker(CUDATestCase):
307
311
  compiled_specialized = compiled.specialize(
308
312
  np.zeros(LMEM_SIZE, dtype=np.int32),
309
313
  np.zeros(LMEM_SIZE, dtype=np.int32),
310
- np.float64)
314
+ np.float64,
315
+ )
311
316
  local_mem_size = compiled_specialized.get_local_mem_per_thread()
312
317
  calc_size = np.dtype(np.float64).itemsize * LMEM_SIZE
313
318
  self.assertGreaterEqual(local_mem_size, calc_size)
314
319
 
315
320
 
316
- if __name__ == '__main__':
321
+ if __name__ == "__main__":
317
322
  unittest.main()
@@ -7,11 +7,10 @@ from numba.cuda.testing import skip_on_cudasim, skip_on_arm
7
7
  from numba.tests.support import linux_only
8
8
 
9
9
 
10
- @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
10
+ @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
11
11
  @linux_only
12
- @skip_on_arm('Managed Alloc support is experimental/untested on ARM')
12
+ @skip_on_arm("Managed Alloc support is experimental/untested on ARM")
13
13
  class TestManagedAlloc(ContextResettingTestCase):
14
-
15
14
  def get_total_gpu_memory(self):
16
15
  # We use a driver function to directly get the total GPU memory because
17
16
  # an EMM plugin may report something different (or not implement
@@ -48,7 +47,9 @@ class TestManagedAlloc(ContextResettingTestCase):
48
47
  def test_managed_alloc_driver_undersubscribe(self):
49
48
  msg = "Managed memory unsupported prior to CC 3.0"
50
49
  self.skip_if_cc_major_lt(3, msg)
51
- self._test_managed_alloc_driver(0.5)
50
+ # We keep the allocation small so that it doesn't hang on GPUs
51
+ # with large memory (H100)
52
+ self._test_managed_alloc_driver(0.1)
52
53
 
53
54
  # This test is skipped by default because it is easy to hang the machine
54
55
  # for a very long time or get OOM killed if the GPU memory size is >50% of
@@ -85,7 +86,7 @@ class TestManagedAlloc(ContextResettingTestCase):
85
86
  n_elems = n_bytes // dtype.itemsize
86
87
  ary = np.ndarray(shape=n_elems, dtype=dtype, buffer=mem)
87
88
 
88
- magic = 0xab
89
+ magic = 0xAB
89
90
  device_memset(mem, magic, n_bytes)
90
91
  ctx.synchronize()
91
92
 
@@ -102,7 +103,7 @@ class TestManagedAlloc(ContextResettingTestCase):
102
103
  ary.fill(123.456)
103
104
  self.assertTrue(all(ary == 123.456))
104
105
 
105
- @cuda.jit('void(double[:])')
106
+ @cuda.jit("void(double[:])")
106
107
  def kernel(x):
107
108
  i = cuda.grid(1)
108
109
  if i < x.shape[0]:
@@ -123,5 +124,5 @@ class TestManagedAlloc(ContextResettingTestCase):
123
124
  self._test_managed_array(attach_global=False)
124
125
 
125
126
 
126
- if __name__ == '__main__':
127
+ if __name__ == "__main__":
127
128
  unittest.main()