numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from numba.tests.support import (override_config, captured_stdout)
1
+ from numba.tests.support import override_config, captured_stdout
2
2
  from numba.cuda.testing import skip_on_cudasim
3
3
  from numba import cuda
4
4
  from numba.core import types
@@ -8,7 +8,7 @@ import re
8
8
  import unittest
9
9
 
10
10
 
11
- @skip_on_cudasim('Simulator does not produce debug dumps')
11
+ @skip_on_cudasim("Simulator does not produce debug dumps")
12
12
  class TestCudaDebugInfo(CUDATestCase):
13
13
  """
14
14
  These tests only checks the compiled PTX for debuginfo section
@@ -49,7 +49,7 @@ class TestCudaDebugInfo(CUDATestCase):
49
49
  self._check(foo, sig=(types.int32[:],), expect=True)
50
50
 
51
51
  def test_environment_override(self):
52
- with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
52
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
53
53
  # Using default value
54
54
  @cuda.jit(opt=False)
55
55
  def foo(x):
@@ -86,7 +86,7 @@ class TestCudaDebugInfo(CUDATestCase):
86
86
 
87
87
  llvm_ir = f.inspect_llvm(sig)
88
88
  # A varible name starting with "bool" in the debug metadata
89
- pat = r'!DILocalVariable\(.*name:\s+\"bool'
89
+ pat = r"!DILocalVariable\(.*name:\s+\"bool"
90
90
  match = re.compile(pat).search(llvm_ir)
91
91
  self.assertIsNone(match, msg=llvm_ir)
92
92
 
@@ -106,7 +106,7 @@ class TestCudaDebugInfo(CUDATestCase):
106
106
  mdnode_id = match.group(1)
107
107
 
108
108
  # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
109
- pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
109
+ pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
110
110
  match = re.compile(pat).search(llvm_ir)
111
111
  self.assertIsNotNone(match, msg=llvm_ir)
112
112
 
@@ -133,14 +133,17 @@ class TestCudaDebugInfo(CUDATestCase):
133
133
 
134
134
  llvm_ir = f.inspect_llvm(sig)
135
135
 
136
- defines = [line for line in llvm_ir.splitlines()
137
- if 'define void @"_ZN6cudapy' in line]
136
+ defines = [
137
+ line
138
+ for line in llvm_ir.splitlines()
139
+ if 'define void @"_ZN6cudapy' in line
140
+ ]
138
141
 
139
142
  # Make sure we only found one definition
140
143
  self.assertEqual(len(defines), 1)
141
144
 
142
145
  wrapper_define = defines[0]
143
- self.assertIn('!dbg', wrapper_define)
146
+ self.assertIn("!dbg", wrapper_define)
144
147
 
145
148
  def test_debug_function_calls_internal_impl(self):
146
149
  # Calling a function in a module generated from an implementation
@@ -198,16 +201,16 @@ class TestCudaDebugInfo(CUDATestCase):
198
201
  debug_opts = itertools.product(*[(True, False)] * 3)
199
202
 
200
203
  for kernel_debug, f1_debug, f2_debug in debug_opts:
201
- with self.subTest(kernel_debug=kernel_debug,
202
- f1_debug=f1_debug,
203
- f2_debug=f2_debug):
204
- self._test_chained_device_function(kernel_debug,
205
- f1_debug,
206
- f2_debug)
207
-
208
- def _test_chained_device_function_two_calls(self, kernel_debug, f1_debug,
209
- f2_debug):
210
-
204
+ with self.subTest(
205
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
206
+ ):
207
+ self._test_chained_device_function(
208
+ kernel_debug, f1_debug, f2_debug
209
+ )
210
+
211
+ def _test_chained_device_function_two_calls(
212
+ self, kernel_debug, f1_debug, f2_debug
213
+ ):
211
214
  @cuda.jit(device=True, debug=f2_debug, opt=False)
212
215
  def f2(x):
213
216
  return x + 1
@@ -232,12 +235,12 @@ class TestCudaDebugInfo(CUDATestCase):
232
235
  debug_opts = itertools.product(*[(True, False)] * 3)
233
236
 
234
237
  for kernel_debug, f1_debug, f2_debug in debug_opts:
235
- with self.subTest(kernel_debug=kernel_debug,
236
- f1_debug=f1_debug,
237
- f2_debug=f2_debug):
238
- self._test_chained_device_function_two_calls(kernel_debug,
239
- f1_debug,
240
- f2_debug)
238
+ with self.subTest(
239
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
240
+ ):
241
+ self._test_chained_device_function_two_calls(
242
+ kernel_debug, f1_debug, f2_debug
243
+ )
241
244
 
242
245
  def test_chained_device_three_functions(self):
243
246
  # Like test_chained_device_function, but with enough functions (three)
@@ -278,13 +281,13 @@ class TestCudaDebugInfo(CUDATestCase):
278
281
  llvm_ir = f.inspect_llvm(sig)
279
282
 
280
283
  # extract the metadata node id from `types` field of DISubroutineType
281
- pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
284
+ pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
282
285
  match = re.compile(pat).search(llvm_ir)
283
286
  self.assertIsNotNone(match, msg=llvm_ir)
284
287
  mdnode_id = match.group(1)
285
288
 
286
289
  # extract the metadata node ids from the flexible node of types
287
- pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
290
+ pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
288
291
  match = re.compile(pat).search(llvm_ir)
289
292
  self.assertIsNotNone(match, msg=llvm_ir)
290
293
  mdnode_id1 = match.group(1)
@@ -303,10 +306,56 @@ class TestCudaDebugInfo(CUDATestCase):
303
306
 
304
307
  def test_kernel_args_types_dump(self):
305
308
  # see issue#135
306
- with override_config('DUMP_LLVM', 1):
309
+ with override_config("DUMP_LLVM", 1):
307
310
  with captured_stdout():
308
311
  self._test_kernel_args_types()
309
312
 
313
+ def test_llvm_dbg_value(self):
314
+ sig = (types.int32, types.int32)
315
+
316
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
317
+ def f(x, y):
318
+ z = x # noqa: F841
319
+ z = 100 # noqa: F841
320
+ z = y # noqa: F841
321
+ z = True # noqa: F841
322
+
323
+ llvm_ir = f.inspect_llvm(sig)
324
+ # Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
325
+ pat1 = r'call void @"llvm.dbg.declare"'
326
+ match = re.compile(pat1).search(llvm_ir)
327
+ self.assertIsNone(match, msg=llvm_ir)
328
+ pat2 = r'call void @"llvm.dbg.value"'
329
+ match = re.compile(pat2).search(llvm_ir)
330
+ self.assertIsNotNone(match, msg=llvm_ir)
331
+
332
+ def test_no_user_var_alias(self):
333
+ sig = (types.int32, types.int32)
334
+
335
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
336
+ def f(x, y):
337
+ z = x # noqa: F841
338
+ z = y # noqa: F841
339
+
340
+ llvm_ir = f.inspect_llvm(sig)
341
+ pat = r'!DILocalVariable.*name:\s+"z\$1".*'
342
+ match = re.compile(pat).search(llvm_ir)
343
+ self.assertIsNone(match, msg=llvm_ir)
344
+
345
+ def test_no_literal_type(self):
346
+ sig = (types.int32,)
347
+
348
+ @cuda.jit("void(int32)", debug=True, opt=False)
349
+ def f(x):
350
+ z = x # noqa: F841
351
+ z = 100 # noqa: F841
352
+ z = True # noqa: F841
353
+
354
+ llvm_ir = f.inspect_llvm(sig)
355
+ pat = r'!DIBasicType.*name:\s+"Literal.*'
356
+ match = re.compile(pat).search(llvm_ir)
357
+ self.assertIsNone(match, msg=llvm_ir)
358
+
310
359
 
311
- if __name__ == '__main__':
360
+ if __name__ == "__main__":
312
361
  unittest.main()
@@ -3,8 +3,13 @@ import cffi
3
3
 
4
4
  import numpy as np
5
5
 
6
- from numba.cuda.testing import (skip_if_curand_kernel_missing, skip_on_cudasim,
7
- test_data_dir, unittest, CUDATestCase)
6
+ from numba.cuda.testing import (
7
+ skip_if_curand_kernel_missing,
8
+ skip_on_cudasim,
9
+ test_data_dir,
10
+ unittest,
11
+ CUDATestCase,
12
+ )
8
13
  from numba import cuda, jit, float32, int32, types
9
14
  from numba.core.errors import TypingError
10
15
  from numba.tests.support import skip_unless_cffi
@@ -12,9 +17,7 @@ from types import ModuleType
12
17
 
13
18
 
14
19
  class TestDeviceFunc(CUDATestCase):
15
-
16
20
  def test_use_add2f(self):
17
-
18
21
  @cuda.jit("float32(float32, float32)", device=True)
19
22
  def add2f(a, b):
20
23
  return a + b
@@ -33,7 +36,6 @@ class TestDeviceFunc(CUDATestCase):
33
36
  self.assertTrue(np.all(ary == exp), (ary, exp))
34
37
 
35
38
  def test_indirect_add2f(self):
36
-
37
39
  @cuda.jit("float32(float32, float32)", device=True)
38
40
  def add2f(a, b):
39
41
  return a + b
@@ -74,12 +76,12 @@ class TestDeviceFunc(CUDATestCase):
74
76
 
75
77
  self._check_cpu_dispatcher(add)
76
78
 
77
- @skip_on_cudasim('not supported in cudasim')
79
+ @skip_on_cudasim("not supported in cudasim")
78
80
  def test_cpu_dispatcher_invalid(self):
79
81
  # Test invalid usage
80
82
  # Explicit signature disables compilation, which also disable
81
83
  # compiling on CUDA.
82
- @jit('(i4, i4)')
84
+ @jit("(i4, i4)")
83
85
  def add(a, b):
84
86
  return a + b
85
87
 
@@ -95,7 +97,7 @@ class TestDeviceFunc(CUDATestCase):
95
97
  def add(a, b):
96
98
  return a + b
97
99
 
98
- mymod = ModuleType(name='mymod')
100
+ mymod = ModuleType(name="mymod")
99
101
  mymod.add = add
100
102
  del add
101
103
 
@@ -109,7 +111,7 @@ class TestDeviceFunc(CUDATestCase):
109
111
  add_kernel[1, ary.size](ary)
110
112
  np.testing.assert_equal(expect, ary)
111
113
 
112
- @skip_on_cudasim('not supported in cudasim')
114
+ @skip_on_cudasim("not supported in cudasim")
113
115
  def test_inspect_llvm(self):
114
116
  @cuda.jit(device=True)
115
117
  def foo(x, y):
@@ -120,13 +122,13 @@ class TestDeviceFunc(CUDATestCase):
120
122
 
121
123
  fname = cres.fndesc.mangled_name
122
124
  # Verify that the function name has "foo" in it as in the python name
123
- self.assertIn('foo', fname)
125
+ self.assertIn("foo", fname)
124
126
 
125
127
  llvm = foo.inspect_llvm(args)
126
128
  # Check that the compiled function name is in the LLVM.
127
129
  self.assertIn(fname, llvm)
128
130
 
129
- @skip_on_cudasim('not supported in cudasim')
131
+ @skip_on_cudasim("not supported in cudasim")
130
132
  def test_inspect_asm(self):
131
133
  @cuda.jit(device=True)
132
134
  def foo(x, y):
@@ -137,13 +139,13 @@ class TestDeviceFunc(CUDATestCase):
137
139
 
138
140
  fname = cres.fndesc.mangled_name
139
141
  # Verify that the function name has "foo" in it as in the python name
140
- self.assertIn('foo', fname)
142
+ self.assertIn("foo", fname)
141
143
 
142
144
  ptx = foo.inspect_asm(args)
143
145
  # Check that the compiled function name is in the PTX
144
146
  self.assertIn(fname, ptx)
145
147
 
146
- @skip_on_cudasim('not supported in cudasim')
148
+ @skip_on_cudasim("not supported in cudasim")
147
149
  def test_inspect_sass_disallowed(self):
148
150
  @cuda.jit(device=True)
149
151
  def foo(x, y):
@@ -152,10 +154,11 @@ class TestDeviceFunc(CUDATestCase):
152
154
  with self.assertRaises(RuntimeError) as raises:
153
155
  foo.inspect_sass((int32, int32))
154
156
 
155
- self.assertIn('Cannot inspect SASS of a device function',
156
- str(raises.exception))
157
+ self.assertIn(
158
+ "Cannot inspect SASS of a device function", str(raises.exception)
159
+ )
157
160
 
158
- @skip_on_cudasim('cudasim will allow calling any function')
161
+ @skip_on_cudasim("cudasim will allow calling any function")
159
162
  def test_device_func_as_kernel_disallowed(self):
160
163
  @cuda.jit(device=True)
161
164
  def f():
@@ -164,10 +167,12 @@ class TestDeviceFunc(CUDATestCase):
164
167
  with self.assertRaises(RuntimeError) as raises:
165
168
  f[1, 1]()
166
169
 
167
- self.assertIn('Cannot compile a device function as a kernel',
168
- str(raises.exception))
170
+ self.assertIn(
171
+ "Cannot compile a device function as a kernel",
172
+ str(raises.exception),
173
+ )
169
174
 
170
- @skip_on_cudasim('cudasim ignores casting by jit decorator signature')
175
+ @skip_on_cudasim("cudasim ignores casting by jit decorator signature")
171
176
  def test_device_casting(self):
172
177
  # Ensure that casts to the correct type are forced when calling a
173
178
  # device function with a signature. This test ensures that:
@@ -176,20 +181,23 @@ class TestDeviceFunc(CUDATestCase):
176
181
  # shouldn't
177
182
  # - We insert a cast when calling rgba, as opposed to failing to type.
178
183
 
179
- @cuda.jit('int32(int32, int32, int32, int32)', device=True)
184
+ @cuda.jit("int32(int32, int32, int32, int32)", device=True)
180
185
  def rgba(r, g, b, a):
181
- return (((r & 0xFF) << 16) |
182
- ((g & 0xFF) << 8) |
183
- ((b & 0xFF) << 0) |
184
- ((a & 0xFF) << 24))
186
+ return (
187
+ ((r & 0xFF) << 16)
188
+ | ((g & 0xFF) << 8)
189
+ | ((b & 0xFF) << 0)
190
+ | ((a & 0xFF) << 24)
191
+ )
185
192
 
186
193
  @cuda.jit
187
194
  def rgba_caller(x, channels):
188
195
  x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
189
196
 
190
197
  x = cuda.device_array(1, dtype=np.int32)
191
- channels = cuda.to_device(np.asarray([1.0, 2.0, 3.0, 4.0],
192
- dtype=np.float32))
198
+ channels = cuda.to_device(
199
+ np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
200
+ )
193
201
 
194
202
  rgba_caller[1, 1](x, channels)
195
203
 
@@ -259,32 +267,31 @@ int random_number(unsigned int *out, unsigned long long seed)
259
267
  }""")
260
268
 
261
269
 
262
- @skip_on_cudasim('External functions unsupported in the simulator')
270
+ @skip_on_cudasim("External functions unsupported in the simulator")
263
271
  class TestDeclareDevice(CUDATestCase):
264
-
265
272
  def check_api(self, decl):
266
- self.assertEqual(decl.name, 'f1')
273
+ self.assertEqual(decl.name, "f1")
267
274
  self.assertEqual(decl.sig.args, (float32[:],))
268
275
  self.assertEqual(decl.sig.return_type, int32)
269
276
 
270
277
  def test_declare_device_signature(self):
271
- f1 = cuda.declare_device('f1', int32(float32[:]))
278
+ f1 = cuda.declare_device("f1", int32(float32[:]))
272
279
  self.check_api(f1)
273
280
 
274
281
  def test_declare_device_string(self):
275
- f1 = cuda.declare_device('f1', 'int32(float32[:])')
282
+ f1 = cuda.declare_device("f1", "int32(float32[:])")
276
283
  self.check_api(f1)
277
284
 
278
285
  def test_bad_declare_device_tuple(self):
279
- with self.assertRaisesRegex(TypeError, 'Return type'):
280
- cuda.declare_device('f1', (float32[:],))
286
+ with self.assertRaisesRegex(TypeError, "Return type"):
287
+ cuda.declare_device("f1", (float32[:],))
281
288
 
282
289
  def test_bad_declare_device_string(self):
283
- with self.assertRaisesRegex(TypeError, 'Return type'):
284
- cuda.declare_device('f1', '(float32[:],)')
290
+ with self.assertRaisesRegex(TypeError, "Return type"):
291
+ cuda.declare_device("f1", "(float32[:],)")
285
292
 
286
293
  def test_link_cu_source(self):
287
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
294
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
288
295
 
289
296
  @cuda.jit
290
297
  def kernel(r, x):
@@ -301,7 +308,7 @@ class TestDeclareDevice(CUDATestCase):
301
308
 
302
309
  def _test_link_multiple_sources(self, link_type):
303
310
  link = link_type([times2_cu, times4_cu])
304
- times4 = cuda.declare_device('times4', 'int32(int32)', link=link)
311
+ times4 = cuda.declare_device("times4", "int32(int32)", link=link)
305
312
 
306
313
  @cuda.jit
307
314
  def kernel(r, x):
@@ -360,7 +367,7 @@ class TestDeclareDevice(CUDATestCase):
360
367
  np.testing.assert_equal(x[0], 323845807)
361
368
 
362
369
  def test_declared_in_called_function(self):
363
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
370
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
364
371
 
365
372
  @cuda.jit
366
373
  def device_func(x):
@@ -380,7 +387,7 @@ class TestDeclareDevice(CUDATestCase):
380
387
  np.testing.assert_equal(r, x * 2)
381
388
 
382
389
  def test_declared_in_called_function_twice(self):
383
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
390
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
384
391
 
385
392
  @cuda.jit
386
393
  def device_func_1(x):
@@ -404,7 +411,7 @@ class TestDeclareDevice(CUDATestCase):
404
411
  np.testing.assert_equal(r, x * 2)
405
412
 
406
413
  def test_declared_in_called_function_two_calls(self):
407
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
414
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
408
415
 
409
416
  @cuda.jit
410
417
  def device_func(x):
@@ -424,7 +431,7 @@ class TestDeclareDevice(CUDATestCase):
424
431
  np.testing.assert_equal(r, x * 6)
425
432
 
426
433
  def test_call_declared_function_twice(self):
427
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
434
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
428
435
 
429
436
  @cuda.jit
430
437
  def kernel(r, x):
@@ -440,7 +447,7 @@ class TestDeclareDevice(CUDATestCase):
440
447
  np.testing.assert_equal(r, x * 6)
441
448
 
442
449
  def test_declared_in_called_function_and_parent(self):
443
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
450
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
444
451
 
445
452
  @cuda.jit
446
453
  def device_func(x):
@@ -460,8 +467,8 @@ class TestDeclareDevice(CUDATestCase):
460
467
  np.testing.assert_equal(r, x * 4)
461
468
 
462
469
  def test_call_two_different_declared_functions(self):
463
- times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
464
- times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
470
+ times2 = cuda.declare_device("times2", "int32(int32)", link=times2_cu)
471
+ times3 = cuda.declare_device("times3", "int32(int32)", link=times3_cu)
465
472
 
466
473
  @cuda.jit
467
474
  def kernel(r, x):
@@ -477,5 +484,5 @@ class TestDeclareDevice(CUDATestCase):
477
484
  np.testing.assert_equal(r, x * 5)
478
485
 
479
486
 
480
- if __name__ == '__main__':
487
+ if __name__ == "__main__":
481
488
  unittest.main()