numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -136,5 +136,5 @@ class Math_isnan(ConcreteTemplate):
136
136
  class Math_modf(ConcreteTemplate):
137
137
  cases = [
138
138
  signature(types.UniTuple(types.float64, 2), types.float64),
139
- signature(types.UniTuple(types.float32, 2), types.float32)
139
+ signature(types.UniTuple(types.float32, 2), types.float32),
140
140
  ]
@@ -1,5 +1,5 @@
1
1
  from llvmlite import ir
2
- from numba.core import types
2
+ from numba.core import types, cgutils
3
3
  from numba.core.debuginfo import DIBuilder
4
4
  from numba.cuda.types import GridGroup
5
5
 
@@ -7,9 +7,14 @@ _BYTE_SIZE = 8
7
7
 
8
8
 
9
9
  class CUDADIBuilder(DIBuilder):
10
+ def __init__(self, module, filepath, cgctx, directives_only):
11
+ super().__init__(module, filepath, cgctx, directives_only)
12
+ # Cache for local variable metadata type and line deduplication
13
+ self._vartypelinemap = {}
10
14
 
11
15
  def _var_type(self, lltype, size, datamodel=None):
12
16
  is_bool = False
17
+ is_int_literal = False
13
18
  is_grid_group = False
14
19
 
15
20
  if isinstance(lltype, ir.IntType):
@@ -21,24 +26,111 @@ class CUDADIBuilder(DIBuilder):
21
26
  name = str(datamodel.fe_type)
22
27
  if isinstance(datamodel.fe_type, types.Boolean):
23
28
  is_bool = True
29
+ if isinstance(datamodel.fe_type, types.BooleanLiteral):
30
+ name = "bool"
31
+ elif isinstance(datamodel.fe_type, types.Integer):
32
+ if isinstance(datamodel.fe_type, types.IntegerLiteral):
33
+ name = f"int{_BYTE_SIZE * size}"
34
+ is_int_literal = True
24
35
  elif isinstance(datamodel.fe_type, GridGroup):
25
36
  is_grid_group = True
26
37
 
27
- if is_bool or is_grid_group:
38
+ if is_bool or is_int_literal or is_grid_group:
28
39
  m = self.module
29
40
  bitsize = _BYTE_SIZE * size
30
41
  # Boolean type workaround until upstream Numba is fixed
31
42
  if is_bool:
32
43
  ditok = "DW_ATE_boolean"
44
+ elif is_int_literal:
45
+ ditok = "DW_ATE_signed"
33
46
  # GridGroup type should use numba.cuda implementation
34
47
  elif is_grid_group:
35
48
  ditok = "DW_ATE_unsigned"
36
49
 
37
- return m.add_debug_info('DIBasicType', {
38
- 'name': name,
39
- 'size': bitsize,
40
- 'encoding': ir.DIToken(ditok),
41
- })
50
+ return m.add_debug_info(
51
+ "DIBasicType",
52
+ {
53
+ "name": name,
54
+ "size": bitsize,
55
+ "encoding": ir.DIToken(ditok),
56
+ },
57
+ )
42
58
 
43
59
  # For other cases, use upstream Numba implementation
44
60
  return super()._var_type(lltype, size, datamodel=datamodel)
61
+
62
+ def mark_variable(
63
+ self,
64
+ builder,
65
+ allocavalue,
66
+ name,
67
+ lltype,
68
+ size,
69
+ line,
70
+ datamodel=None,
71
+ argidx=None,
72
+ ):
73
+ if name.startswith("$") or "." in name:
74
+ # Do not emit llvm.dbg.declare on user variable alias
75
+ return
76
+ else:
77
+ int_type = (ir.IntType,)
78
+ real_type = ir.FloatType, ir.DoubleType
79
+ if isinstance(lltype, int_type + real_type):
80
+ # Start with scalar variable, swtiching llvm.dbg.declare
81
+ # to llvm.dbg.value
82
+ return
83
+ else:
84
+ return super().mark_variable(
85
+ builder,
86
+ allocavalue,
87
+ name,
88
+ lltype,
89
+ size,
90
+ line,
91
+ datamodel,
92
+ argidx,
93
+ )
94
+
95
+ def update_variable(
96
+ self,
97
+ builder,
98
+ value,
99
+ name,
100
+ lltype,
101
+ size,
102
+ line,
103
+ datamodel=None,
104
+ argidx=None,
105
+ ):
106
+ m = self.module
107
+ fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3)
108
+ decl = cgutils.get_or_insert_function(m, fnty, "llvm.dbg.value")
109
+
110
+ mdtype = self._var_type(lltype, size, datamodel)
111
+ index = name.find(".")
112
+ if index >= 0:
113
+ name = name[:index]
114
+ # Merge DILocalVariable nodes with same name and type but different
115
+ # lines. Use the cached [(name, type) -> line] info to deduplicate
116
+ # metadata. Use the lltype as part of key.
117
+ key = (name, lltype)
118
+ if key in self._vartypelinemap:
119
+ line = self._vartypelinemap[key]
120
+ else:
121
+ self._vartypelinemap[key] = line
122
+ arg_index = 0 if argidx is None else argidx
123
+ mdlocalvar = m.add_debug_info(
124
+ "DILocalVariable",
125
+ {
126
+ "name": name,
127
+ "arg": arg_index,
128
+ "scope": self.subprograms[-1],
129
+ "file": self.difile,
130
+ "line": line,
131
+ "type": mdtype,
132
+ },
133
+ )
134
+ mdexpr = m.add_debug_info("DIExpression", {})
135
+
136
+ return builder.call(decl, [value, mdlocalvar, mdexpr])
@@ -6,13 +6,24 @@ from numba.cuda.dispatcher import CUDADispatcher
6
6
  from numba.cuda.simulator.kernel import FakeCUDAKernel
7
7
 
8
8
 
9
- _msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. "
10
- "Signatures should be passed as the first "
11
- "positional argument.")
12
-
13
-
14
- def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
15
- opt=None, lineinfo=False, cache=False, **kws):
9
+ _msg_deprecated_signature_arg = (
10
+ "Deprecated keyword argument `{0}`. "
11
+ "Signatures should be passed as the first "
12
+ "positional argument."
13
+ )
14
+
15
+
16
+ def jit(
17
+ func_or_sig=None,
18
+ device=False,
19
+ inline="never",
20
+ link=[],
21
+ debug=None,
22
+ opt=None,
23
+ lineinfo=False,
24
+ cache=False,
25
+ **kws,
26
+ ):
16
27
  """
17
28
  JIT compile a Python function for CUDA GPUs.
18
29
 
@@ -55,39 +66,52 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
55
66
  """
56
67
 
57
68
  if link and config.ENABLE_CUDASIM:
58
- raise NotImplementedError('Cannot link PTX in the simulator')
69
+ raise NotImplementedError("Cannot link PTX in the simulator")
59
70
 
60
- if kws.get('boundscheck'):
71
+ if kws.get("boundscheck"):
61
72
  raise NotImplementedError("bounds checking is not supported for CUDA")
62
73
 
63
- if kws.get('argtypes') is not None:
64
- msg = _msg_deprecated_signature_arg.format('argtypes')
74
+ if kws.get("argtypes") is not None:
75
+ msg = _msg_deprecated_signature_arg.format("argtypes")
65
76
  raise DeprecationError(msg)
66
- if kws.get('restype') is not None:
67
- msg = _msg_deprecated_signature_arg.format('restype')
77
+ if kws.get("restype") is not None:
78
+ msg = _msg_deprecated_signature_arg.format("restype")
68
79
  raise DeprecationError(msg)
69
- if kws.get('bind') is not None:
70
- msg = _msg_deprecated_signature_arg.format('bind')
80
+ if kws.get("bind") is not None:
81
+ msg = _msg_deprecated_signature_arg.format("bind")
71
82
  raise DeprecationError(msg)
72
83
 
84
+ if isinstance(inline, bool):
85
+ DeprecationWarning(
86
+ "Passing bool to inline argument is deprecated, please refer to "
87
+ "Numba's documentation on inlining: "
88
+ "https://numba.readthedocs.io/en/stable/developer/inlining.html"
89
+ )
90
+
91
+ inline = "always" if inline else "never"
92
+
73
93
  debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
74
94
  opt = (config.OPT != 0) if opt is None else opt
75
- fastmath = kws.get('fastmath', False)
76
- extensions = kws.get('extensions', [])
95
+ fastmath = kws.get("fastmath", False)
96
+ extensions = kws.get("extensions", [])
77
97
 
78
98
  if debug and opt:
79
- msg = ("debug=True with opt=True "
80
- "is not supported by CUDA. This may result in a crash"
81
- " - set debug=False or opt=False.")
99
+ msg = (
100
+ "debug=True with opt=True "
101
+ "is not supported by CUDA. This may result in a crash"
102
+ " - set debug=False or opt=False."
103
+ )
82
104
  warn(NumbaInvalidConfigWarning(msg))
83
105
 
84
106
  if debug and lineinfo:
85
- msg = ("debug and lineinfo are mutually exclusive. Use debug to get "
86
- "full debug info (this disables some optimizations), or "
87
- "lineinfo for line info only with code generation unaffected.")
107
+ msg = (
108
+ "debug and lineinfo are mutually exclusive. Use debug to get "
109
+ "full debug info (this disables some optimizations), or "
110
+ "lineinfo for line info only with code generation unaffected."
111
+ )
88
112
  warn(NumbaInvalidConfigWarning(msg))
89
113
 
90
- if device and kws.get('link'):
114
+ if device and kws.get("link"):
91
115
  raise ValueError("link keyword invalid for device function")
92
116
 
93
117
  if sigutils.is_signature(func_or_sig):
@@ -101,19 +125,22 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
101
125
 
102
126
  if signatures is not None:
103
127
  if config.ENABLE_CUDASIM:
128
+
104
129
  def jitwrapper(func):
105
130
  return FakeCUDAKernel(func, device=device, fastmath=fastmath)
131
+
106
132
  return jitwrapper
107
133
 
108
134
  def _jit(func):
109
135
  targetoptions = kws.copy()
110
- targetoptions['debug'] = debug
111
- targetoptions['lineinfo'] = lineinfo
112
- targetoptions['link'] = link
113
- targetoptions['opt'] = opt
114
- targetoptions['fastmath'] = fastmath
115
- targetoptions['device'] = device
116
- targetoptions['extensions'] = extensions
136
+ targetoptions["debug"] = debug
137
+ targetoptions["lineinfo"] = lineinfo
138
+ targetoptions["link"] = link
139
+ targetoptions["opt"] = opt
140
+ targetoptions["fastmath"] = fastmath
141
+ targetoptions["device"] = device
142
+ targetoptions["inline"] = inline
143
+ targetoptions["extensions"] = extensions
117
144
 
118
145
  disp = CUDADispatcher(func, targetoptions=targetoptions)
119
146
 
@@ -128,6 +155,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
128
155
 
129
156
  if device:
130
157
  from numba.core import typeinfer
158
+
131
159
  with typeinfer.register_dispatcher(disp):
132
160
  disp.compile_device(argtypes, restype)
133
161
  else:
@@ -142,29 +170,43 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
142
170
  else:
143
171
  if func_or_sig is None:
144
172
  if config.ENABLE_CUDASIM:
173
+
145
174
  def autojitwrapper(func):
146
- return FakeCUDAKernel(func, device=device,
147
- fastmath=fastmath)
175
+ return FakeCUDAKernel(
176
+ func, device=device, fastmath=fastmath
177
+ )
148
178
  else:
179
+
149
180
  def autojitwrapper(func):
150
- return jit(func, device=device, debug=debug, opt=opt,
151
- lineinfo=lineinfo, link=link, cache=cache, **kws)
181
+ return jit(
182
+ func,
183
+ device=device,
184
+ inline=inline,
185
+ debug=debug,
186
+ opt=opt,
187
+ lineinfo=lineinfo,
188
+ link=link,
189
+ cache=cache,
190
+ **kws,
191
+ )
152
192
 
153
193
  return autojitwrapper
154
194
  # func_or_sig is a function
155
195
  else:
156
196
  if config.ENABLE_CUDASIM:
157
- return FakeCUDAKernel(func_or_sig, device=device,
158
- fastmath=fastmath)
197
+ return FakeCUDAKernel(
198
+ func_or_sig, device=device, fastmath=fastmath
199
+ )
159
200
  else:
160
201
  targetoptions = kws.copy()
161
- targetoptions['debug'] = debug
162
- targetoptions['lineinfo'] = lineinfo
163
- targetoptions['opt'] = opt
164
- targetoptions['link'] = link
165
- targetoptions['fastmath'] = fastmath
166
- targetoptions['device'] = device
167
- targetoptions['extensions'] = extensions
202
+ targetoptions["debug"] = debug
203
+ targetoptions["lineinfo"] = lineinfo
204
+ targetoptions["opt"] = opt
205
+ targetoptions["link"] = link
206
+ targetoptions["fastmath"] = fastmath
207
+ targetoptions["device"] = device
208
+ targetoptions["inline"] = inline
209
+ targetoptions["extensions"] = extensions
168
210
  disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
169
211
 
170
212
  if cache:
@@ -191,7 +233,7 @@ def declare_device(name, sig, link=None):
191
233
 
192
234
  argtypes, restype = sigutils.normalize_signature(sig)
193
235
  if restype is None:
194
- msg = 'Return type must be provided for device declarations'
236
+ msg = "Return type must be provided for device declarations"
195
237
  raise TypeError(msg)
196
238
 
197
239
  return declare_device_function(name, restype, argtypes, link)
@@ -30,4 +30,4 @@ class CUDATarget(TargetDescriptor):
30
30
  return self._targetctx
31
31
 
32
32
 
33
- cuda_target = CUDATarget('cuda')
33
+ cuda_target = CUDATarget("cuda")
@@ -1,21 +1,61 @@
1
1
  # Re export
2
2
  import sys
3
3
  from numba.cuda import cg
4
- from .stubs import (threadIdx, blockIdx, blockDim, gridDim, laneid, warpsize,
5
- syncwarp, shared, local, const, atomic,
6
- shfl_sync_intrinsic, vote_sync_intrinsic, match_any_sync,
7
- match_all_sync, threadfence_block, threadfence_system,
8
- threadfence, selp, popc, brev, clz, ffs, fma, cbrt,
9
- activemask, lanemask_lt, nanosleep, fp16,
10
- _vector_type_stubs)
11
- from .intrinsics import (grid, gridsize, syncthreads, syncthreads_and,
12
- syncthreads_count, syncthreads_or)
4
+ from .stubs import (
5
+ threadIdx,
6
+ blockIdx,
7
+ blockDim,
8
+ gridDim,
9
+ laneid,
10
+ warpsize,
11
+ syncwarp,
12
+ shared,
13
+ local,
14
+ const,
15
+ atomic,
16
+ vote_sync_intrinsic,
17
+ match_any_sync,
18
+ match_all_sync,
19
+ threadfence_block,
20
+ threadfence_system,
21
+ threadfence,
22
+ selp,
23
+ popc,
24
+ brev,
25
+ clz,
26
+ ffs,
27
+ fma,
28
+ cbrt,
29
+ activemask,
30
+ lanemask_lt,
31
+ nanosleep,
32
+ fp16,
33
+ _vector_type_stubs,
34
+ )
35
+ from .intrinsics import (
36
+ grid,
37
+ gridsize,
38
+ syncthreads,
39
+ syncthreads_and,
40
+ syncthreads_count,
41
+ syncthreads_or,
42
+ shfl_sync,
43
+ shfl_up_sync,
44
+ shfl_down_sync,
45
+ shfl_xor_sync,
46
+ )
13
47
  from .cudadrv.error import CudaSupportError
14
- from numba.cuda.cudadrv.driver import (BaseCUDAMemoryManager,
15
- HostOnlyCUDAMemoryManager,
16
- GetIpcHandleMixin, MemoryPointer,
17
- MappedMemory, PinnedMemory, MemoryInfo,
18
- IpcHandle, set_memory_manager)
48
+ from numba.cuda.cudadrv.driver import (
49
+ BaseCUDAMemoryManager,
50
+ HostOnlyCUDAMemoryManager,
51
+ GetIpcHandleMixin,
52
+ MemoryPointer,
53
+ MappedMemory,
54
+ PinnedMemory,
55
+ MemoryInfo,
56
+ IpcHandle,
57
+ set_memory_manager,
58
+ )
19
59
  from numba.cuda.cudadrv.runtime import runtime
20
60
  from .cudadrv import nvvm
21
61
  from numba.cuda import initialize
@@ -26,13 +66,23 @@ from .api import *
26
66
  from .api import _auto_device
27
67
  from .args import In, Out, InOut
28
68
 
29
- from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
30
- shfl_sync, shfl_up_sync, shfl_down_sync,
31
- shfl_xor_sync)
69
+ from .intrinsic_wrapper import (
70
+ all_sync,
71
+ any_sync,
72
+ eq_sync,
73
+ ballot_sync,
74
+ )
32
75
 
33
76
  from .kernels import reduction
34
77
  from numba.cuda.cudadrv.linkable_code import (
35
- Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
78
+ Archive,
79
+ CUSource,
80
+ Cubin,
81
+ Fatbin,
82
+ LinkableCode,
83
+ LTOIR,
84
+ Object,
85
+ PTXSource,
36
86
  )
37
87
 
38
88
  reduce = Reduce = reduction.Reduce