numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,978 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import namedtuple
5
+ from numba.cuda.tests.support import override_config, captured_stdout
6
+ from numba.cuda.testing import skip_on_cudasim
7
+ from numba import cuda
8
+ from numba.cuda import types
9
+ from numba.cuda.np import numpy_support
10
+ from numba.cuda.testing import CUDATestCase
11
+ from numba.cuda.core import config
12
+ from textwrap import dedent
13
+ import math
14
+ import itertools
15
+ import re
16
+ import unittest
17
+ import warnings
18
+ from numba.cuda.core.errors import NumbaDebugInfoWarning
19
+ from numba.cuda.tests.support import ignore_internal_warnings
20
+ import numpy as np
21
+ import inspect
22
+
23
+
24
+ @skip_on_cudasim("Simulator does not produce debug dumps")
25
+ class TestCudaDebugInfo(CUDATestCase):
26
+ """
27
+ These tests only checks the compiled PTX for debuginfo section
28
+ """
29
+
30
+ def _getasm(self, fn, sig):
31
+ fn.compile(sig)
32
+ return fn.inspect_asm(sig)
33
+
34
+ def _check(self, fn, sig, expect):
35
+ asm = self._getasm(fn, sig=sig)
36
+ re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")
37
+ match = re_section_dbginfo.search(asm)
38
+ assertfn = self.assertIsNotNone if expect else self.assertIsNone
39
+ assertfn(match, msg=asm)
40
+
41
+ def test_no_debuginfo_in_asm(self):
42
+ @cuda.jit(debug=False, opt=False)
43
+ def foo(x):
44
+ x[0] = 1
45
+
46
+ self._check(foo, sig=(types.int32[:],), expect=False)
47
+
48
+ def test_debuginfo_in_asm(self):
49
+ @cuda.jit(debug=True, opt=False)
50
+ def foo(x):
51
+ x[0] = 1
52
+
53
+ self._check(foo, sig=(types.int32[:],), expect=True)
54
+
55
+ def test_environment_override(self):
56
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
57
+ # Using default value
58
+ @cuda.jit(opt=False)
59
+ def foo(x):
60
+ x[0] = 1
61
+
62
+ self._check(foo, sig=(types.int32[:],), expect=True)
63
+
64
+ # User override default value
65
+ @cuda.jit(debug=False)
66
+ def bar(x):
67
+ x[0] = 1
68
+
69
+ self._check(bar, sig=(types.int32[:],), expect=False)
70
+
71
+ def test_issue_5835(self):
72
+ # Invalid debug metadata would segfault NVVM when any function was
73
+ # compiled with debug turned on and optimization off. This eager
74
+ # compilation should not crash anything.
75
+ @cuda.jit((types.int32[::1],), debug=True, opt=False)
76
+ def f(x):
77
+ x[0] = 0
78
+
79
+ def test_issue_9888(self):
80
+ # Compiler created symbol should not be emitted in DILocalVariable
81
+ # See Numba Issue #9888 https://github.com/numba/numba/pull/9888
82
+ sig = (types.boolean,)
83
+
84
+ @cuda.jit(sig, debug=True, opt=False)
85
+ def f(cond):
86
+ if cond:
87
+ x = 1 # noqa: F841
88
+ else:
89
+ x = 0 # noqa: F841
90
+
91
+ llvm_ir = f.inspect_llvm(sig)
92
+ # A varible name starting with "bool" in the debug metadata
93
+ pat = r"!DILocalVariable\(.*name:\s+\"bool"
94
+ match = re.compile(pat).search(llvm_ir)
95
+ self.assertIsNone(match, msg=llvm_ir)
96
+
97
+ def test_bool_type(self):
98
+ sig = (types.int32, types.int32)
99
+
100
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
101
+ def f(x, y):
102
+ z = x == y # noqa: F841
103
+
104
+ llvm_ir = f.inspect_llvm(sig)
105
+
106
+ # extract the metadata node id from `type` field of DILocalVariable
107
+ pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
108
+ match = re.compile(pat).search(llvm_ir)
109
+ self.assertIsNotNone(match, msg=llvm_ir)
110
+ mdnode_id = match.group(1)
111
+
112
+ # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
113
+ pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
114
+ match = re.compile(pat).search(llvm_ir)
115
+ self.assertIsNotNone(match, msg=llvm_ir)
116
+
117
+ def test_grid_group_type(self):
118
+ sig = (types.int32,)
119
+
120
+ @cuda.jit(sig, debug=True, opt=False)
121
+ def f(x):
122
+ grid = cuda.cg.this_grid() # noqa: F841
123
+
124
+ llvm_ir = f.inspect_llvm(sig)
125
+
126
+ pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
127
+ match = re.compile(pat).search(llvm_ir)
128
+ self.assertIsNotNone(match, msg=llvm_ir)
129
+
130
+ @unittest.skip("Wrappers no longer exist")
131
+ def test_wrapper_has_debuginfo(self):
132
+ sig = (types.int32[::1],)
133
+
134
+ @cuda.jit(sig, debug=True, opt=0)
135
+ def f(x):
136
+ x[0] = 1
137
+
138
+ llvm_ir = f.inspect_llvm(sig)
139
+
140
+ defines = [
141
+ line
142
+ for line in llvm_ir.splitlines()
143
+ if 'define void @"_ZN6cudapy' in line
144
+ ]
145
+
146
+ # Make sure we only found one definition
147
+ self.assertEqual(len(defines), 1)
148
+
149
+ wrapper_define = defines[0]
150
+ self.assertIn("!dbg", wrapper_define)
151
+
152
+ def test_debug_function_calls_internal_impl(self):
153
+ # Calling a function in a module generated from an implementation
154
+ # internal to Numba requires multiple modules to be compiled with NVVM -
155
+ # the internal implementation, and the caller. This example uses two
156
+ # modules because the `in (2, 3)` is implemented with:
157
+ #
158
+ # numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(
159
+ # UniTuple<long long, 2>,
160
+ # int
161
+ # )
162
+ #
163
+ # This is condensed from this reproducer in Issue 5311:
164
+ # https://github.com/numba/numba/issues/5311#issuecomment-674206587
165
+
166
+ @cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)
167
+ def f(inp, outp):
168
+ outp[0] = 1 if inp[0] in (2, 3) else 3
169
+
170
+ def test_debug_function_calls_device_function(self):
171
+ # Calling a device function requires compilation of multiple modules
172
+ # with NVVM - one for the caller and one for the callee. This checks
173
+ # that we don't cause an NVVM error in this case.
174
+
175
+ @cuda.jit(device=True, debug=True, opt=0)
176
+ def threadid():
177
+ return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
178
+
179
+ @cuda.jit((types.int32[:],), debug=True, opt=0)
180
+ def kernel(arr):
181
+ i = cuda.grid(1)
182
+ if i < len(arr):
183
+ arr[i] = threadid()
184
+
185
+ def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):
186
+ @cuda.jit(device=True, debug=f2_debug, opt=False)
187
+ def f2(x):
188
+ return x + 1
189
+
190
+ @cuda.jit(device=True, debug=f1_debug, opt=False)
191
+ def f1(x, y):
192
+ return x - f2(y)
193
+
194
+ @cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)
195
+ def kernel(x, y):
196
+ f1(x, y)
197
+
198
+ kernel[1, 1](1, 2)
199
+
200
+ def test_chained_device_function(self):
201
+ # Calling a device function that calls another device function from a
202
+ # kernel with should succeed regardless of which jit decorators have
203
+ # debug=True. See Issue #7159.
204
+
205
+ debug_opts = itertools.product(*[(True, False)] * 3)
206
+
207
+ for kernel_debug, f1_debug, f2_debug in debug_opts:
208
+ with self.subTest(
209
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
210
+ ):
211
+ self._test_chained_device_function(
212
+ kernel_debug, f1_debug, f2_debug
213
+ )
214
+
215
+ def _test_chained_device_function_two_calls(
216
+ self, kernel_debug, f1_debug, f2_debug
217
+ ):
218
+ @cuda.jit(device=True, debug=f2_debug, opt=False)
219
+ def f2(x):
220
+ return x + 1
221
+
222
+ @cuda.jit(device=True, debug=f1_debug, opt=False)
223
+ def f1(x, y):
224
+ return x - f2(y)
225
+
226
+ @cuda.jit(debug=kernel_debug, opt=False)
227
+ def kernel(x, y):
228
+ f1(x, y)
229
+ f2(x)
230
+
231
+ kernel[1, 1](1, 2)
232
+
233
+ def test_chained_device_function_two_calls(self):
234
+ # Calling a device function that calls a leaf device function from a
235
+ # kernel, and calling the leaf device function from the kernel should
236
+ # succeed, regardless of which jit decorators have debug=True. See
237
+ # Issue #7159.
238
+
239
+ debug_opts = itertools.product(*[(True, False)] * 3)
240
+
241
+ for kernel_debug, f1_debug, f2_debug in debug_opts:
242
+ with self.subTest(
243
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
244
+ ):
245
+ self._test_chained_device_function_two_calls(
246
+ kernel_debug, f1_debug, f2_debug
247
+ )
248
+
249
+ def test_chained_device_three_functions(self):
250
+ # Like test_chained_device_function, but with enough functions (three)
251
+ # to ensure that the recursion visits all the way down the call tree
252
+ # when fixing linkage of functions for debug.
253
+ def three_device_fns(kernel_debug, leaf_debug):
254
+ @cuda.jit(device=True, debug=leaf_debug, opt=False)
255
+ def f3(x):
256
+ return x * x
257
+
258
+ @cuda.jit(device=True)
259
+ def f2(x):
260
+ return f3(x) + 1
261
+
262
+ @cuda.jit(device=True)
263
+ def f1(x, y):
264
+ return x - f2(y)
265
+
266
+ @cuda.jit(debug=kernel_debug, opt=False)
267
+ def kernel(x, y):
268
+ f1(x, y)
269
+
270
+ kernel[1, 1](1, 2)
271
+
272
+ # Check when debug on the kernel, on the leaf, and not on any function.
273
+ three_device_fns(kernel_debug=True, leaf_debug=True)
274
+ three_device_fns(kernel_debug=True, leaf_debug=False)
275
+ three_device_fns(kernel_debug=False, leaf_debug=True)
276
+ three_device_fns(kernel_debug=False, leaf_debug=False)
277
+
278
+ def _test_kernel_args_types(self):
279
+ sig = (types.int32, types.int32)
280
+
281
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
282
+ def f(x, y):
283
+ z = x + y # noqa: F841
284
+
285
+ llvm_ir = f.inspect_llvm(sig)
286
+
287
+ # extract the metadata node id from `types` field of DISubroutineType
288
+ pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
289
+ match = re.compile(pat).search(llvm_ir)
290
+ self.assertIsNotNone(match, msg=llvm_ir)
291
+ mdnode_id = match.group(1)
292
+
293
+ # extract the metadata node ids from the flexible node of types
294
+ pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
295
+ match = re.compile(pat).search(llvm_ir)
296
+ self.assertIsNotNone(match, msg=llvm_ir)
297
+ mdnode_id1 = match.group(1)
298
+ mdnode_id2 = match.group(2)
299
+
300
+ # verify each of the two metadata nodes match expected type
301
+ pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
302
+ match = re.compile(pat).search(llvm_ir)
303
+ self.assertIsNotNone(match, msg=llvm_ir)
304
+ pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
305
+ match = re.compile(pat).search(llvm_ir)
306
+ self.assertIsNotNone(match, msg=llvm_ir)
307
+
308
+ def test_kernel_args_types(self):
309
+ self._test_kernel_args_types()
310
+
311
+ def test_kernel_args_types_dump(self):
312
+ # see issue#135
313
+ with override_config("DUMP_LLVM", 1):
314
+ with captured_stdout():
315
+ self._test_kernel_args_types()
316
+
317
+ def test_kernel_args_names(self):
318
+ sig = (types.int32,)
319
+
320
+ @cuda.jit("void(int32)", debug=True, opt=False)
321
+ def f(x):
322
+ z = x # noqa: F841
323
+
324
+ llvm_ir = f.inspect_llvm(sig)
325
+
326
+ # Verify argument name is not prefixed with "arg."
327
+ pat = r"define void @.*\(i32 %\"x\"\)"
328
+ match = re.compile(pat).search(llvm_ir)
329
+ self.assertIsNotNone(match, msg=llvm_ir)
330
+ pat = r"define void @.*\(i32 %\"arg\.x\"\)"
331
+ match = re.compile(pat).search(llvm_ir)
332
+ self.assertIsNone(match, msg=llvm_ir)
333
+
334
+ def test_llvm_dbg_value(self):
335
+ sig = (types.int32, types.int32)
336
+
337
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
338
+ def f(x, y):
339
+ z1 = x # noqa: F841
340
+ z2 = 100 # noqa: F841
341
+ z3 = y # noqa: F841
342
+ z4 = True # noqa: F841
343
+
344
+ llvm_ir = f.inspect_llvm(sig)
345
+ # Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
346
+ pat1 = r'call void @"llvm.dbg.declare"'
347
+ match = re.compile(pat1).search(llvm_ir)
348
+ self.assertIsNone(match, msg=llvm_ir)
349
+ pat2 = r'call void @"llvm.dbg.value"'
350
+ match = re.compile(pat2).search(llvm_ir)
351
+ self.assertIsNotNone(match, msg=llvm_ir)
352
+
353
+ def test_llvm_dbg_value_range(self):
354
+ sig = (types.int64,)
355
+
356
+ @cuda.jit("void(int64,)", debug=True, opt=False)
357
+ def foo(x):
358
+ """
359
+ CHECK: store i1 true, i1* %"second.1"
360
+ CHECK: call void @"llvm.dbg.value"
361
+ CHECK: store i1 true, i1* %"second.2"
362
+ CHECK: call void @"llvm.dbg.value"
363
+
364
+ CHECK: %[[VAL_1:.*]] = load i1, i1* %"second.2"
365
+ CHECK: %[[VAL_2:.*]] = load i1, i1* %[[VAL_3:.*]]
366
+ CHECK: store i1 %[[VAL_1]], i1* %[[VAL_3]]
367
+ CHECK: call void @"llvm.dbg.value"(metadata i1 %[[VAL_1]], metadata ![[VAL_4:[0-9]+]]
368
+
369
+ CHECK: ![[VAL_4]] = !DILocalVariable{{.+}}name: "second"
370
+ """
371
+ if x > 0:
372
+ second = x > 10
373
+ else:
374
+ second = True
375
+ if second:
376
+ pass
377
+
378
+ ir = foo.inspect_llvm()[sig]
379
+ self.assertFileCheckMatches(ir, foo.__doc__)
380
+
381
+ def test_no_user_var_alias(self):
382
+ sig = (types.int32, types.int32)
383
+
384
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
385
+ def f(x, y):
386
+ z = x # noqa: F841
387
+ z = y # noqa: F841
388
+
389
+ llvm_ir = f.inspect_llvm(sig)
390
+ pat = r'!DILocalVariable.*name:\s+"z\$1".*'
391
+ match = re.compile(pat).search(llvm_ir)
392
+ self.assertIsNone(match, msg=llvm_ir)
393
+
394
+ def test_no_literal_type(self):
395
+ sig = (types.int32,)
396
+
397
+ @cuda.jit("void(int32)", debug=True, opt=False)
398
+ def f(x):
399
+ z = x # noqa: F841
400
+ z = 100 # noqa: F841
401
+ z = True # noqa: F841
402
+
403
+ llvm_ir = f.inspect_llvm(sig)
404
+ pat = r'!DIBasicType.*name:\s+"Literal.*'
405
+ match = re.compile(pat).search(llvm_ir)
406
+ self.assertIsNone(match, msg=llvm_ir)
407
+
408
+ @unittest.skipIf(
409
+ config.CUDA_DEBUG_POLY, "Uses old union format, not variant_part"
410
+ )
411
+ def test_union_poly_types(self):
412
+ sig = (types.int32, types.int32)
413
+
414
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
415
+ def f(x, y):
416
+ foo = 100 # noqa: F841
417
+ foo = 2.34 # noqa: F841
418
+ foo = True # noqa: F841
419
+ foo = 200 # noqa: F841
420
+
421
+ llvm_ir = f.inspect_llvm(sig)
422
+ # Extract the type node id
423
+ pat1 = r'!DILocalVariable\(.*name: "foo".*type: !(\d+)\)'
424
+ match = re.compile(pat1).search(llvm_ir)
425
+ self.assertIsNotNone(match, msg=llvm_ir)
426
+ mdnode_id = match.group(1)
427
+ # Verify the union type and extract the elements node id
428
+ pat2 = rf"!{mdnode_id} = distinct !DICompositeType\(elements: !(\d+),.*size: 64, tag: DW_TAG_union_type\)" # noqa: E501
429
+ match = re.compile(pat2).search(llvm_ir)
430
+ self.assertIsNotNone(match, msg=llvm_ir)
431
+ mdnode_id = match.group(1)
432
+ # Extract the member node ids
433
+ pat3 = r"!{ !(\d+), !(\d+), !(\d+) }"
434
+ match = re.compile(pat3).search(llvm_ir)
435
+ self.assertIsNotNone(match, msg=llvm_ir)
436
+ mdnode_id1 = match.group(1)
437
+ mdnode_id2 = match.group(2)
438
+ mdnode_id3 = match.group(3)
439
+ # Verify the member nodes
440
+ pat4 = rf'!{mdnode_id1} = !DIDerivedType(.*name: "_bool", size: 8, tag: DW_TAG_member)' # noqa: E501
441
+ match = re.compile(pat4).search(llvm_ir)
442
+ self.assertIsNotNone(match, msg=llvm_ir)
443
+ pat5 = rf'!{mdnode_id2} = !DIDerivedType(.*name: "_float64", size: 64, tag: DW_TAG_member)' # noqa: E501
444
+ match = re.compile(pat5).search(llvm_ir)
445
+ self.assertIsNotNone(match, msg=llvm_ir)
446
+ pat6 = rf'!{mdnode_id3} = !DIDerivedType(.*name: "_int64", size: 64, tag: DW_TAG_member)' # noqa: E501
447
+ match = re.compile(pat6).search(llvm_ir)
448
+ self.assertIsNotNone(match, msg=llvm_ir)
449
+
450
+ def test_union_debug(self):
451
+ @cuda.jit("void(u8, int64[::1])", debug=True, opt=False)
452
+ def a_union_use_case(arg, results):
453
+ foo = 1
454
+ foo = arg
455
+ if foo < 1:
456
+ foo = 2
457
+ return
458
+ bar = foo == 0
459
+ results[0] = 1 if not bar else 0
460
+
461
+ with captured_stdout() as out:
462
+ results = cuda.to_device(np.zeros(16, dtype=np.int64))
463
+ a_union_use_case[1, 1](100, results)
464
+ print(results.copy_to_host())
465
+ expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
466
+ self.assertIn(expected, out.getvalue())
467
+
468
+ @unittest.skipUnless(config.CUDA_DEBUG_POLY, "CUDA_DEBUG_POLY not enabled")
469
+ def test_poly_variant_part(self):
470
+ """Test polymorphic variables with DW_TAG_variant_part.
471
+
472
+ This test verifies that when CUDA_DEBUG_POLY is enabled,
473
+ polymorphic variables generate proper DWARF5 variant_part
474
+ debug information with discriminator and variant members.
475
+ """
476
+ # Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456
477
+ if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
478
+ extradata_pattern = "i8 {{[0-9]+}}"
479
+ else:
480
+ extradata_pattern = "{{![0-9]+}}"
481
+
482
+ @cuda.jit("void()", debug=True, opt=False)
483
+ def f():
484
+ foo = 100 # noqa: F841
485
+ foo = 3.14 # noqa: F841
486
+ foo = True # noqa: F841
487
+ foo = np.int32(42) # noqa: F841
488
+
489
+ llvm_ir = f.inspect_llvm()[tuple()]
490
+
491
+ # Build FileCheck pattern dynamically based on config
492
+ # Capture node IDs and verify the hierarchical structure
493
+ check_pattern = """
494
+ CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]]
495
+ CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type)
496
+ CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] }
497
+ CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator-{{[0-9]+}}"{{.*}}size: 8{{.*}}tag: DW_TAG_member)
498
+ CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part)
499
+ CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] }
500
+ CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member)
501
+ CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
502
+ CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member)
503
+ CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
504
+ """.replace("EXTRADATA", extradata_pattern)
505
+
506
+ self.assertFileCheckMatches(llvm_ir, check_pattern)
507
+
508
+ def test_DW_LANG(self):
509
+ @cuda.jit(debug=True, opt=False)
510
+ def foo():
511
+ """
512
+ CHECK: distinct !DICompileUnit
513
+ CHECK-SAME: emissionKind: FullDebug
514
+ CHECK-SAME: isOptimized: true
515
+ CHECK-SAME: language: DW_LANG_C_plus_plus
516
+ CHECK-SAME: producer: "clang (Numba)"
517
+ """
518
+ pass
519
+
520
+ foo[1, 1]()
521
+
522
+ llvm_ir = foo.inspect_llvm()[tuple()]
523
+ self.assertFileCheckMatches(llvm_ir, foo.__doc__)
524
+
525
+ def test_DILocation(self):
526
+ """Tests that DILocation information is reasonable.
527
+
528
+ The kernel `foo` produces LLVM like:
529
+ define function() {
530
+ entry:
531
+ alloca
532
+ store 0 to alloca
533
+ <arithmetic for doing the operations on b, c, d>
534
+ setup for print
535
+ branch
536
+ other_labels:
537
+ ... <elided>
538
+ }
539
+
540
+ The following checks that:
541
+ * the alloca and store have no !dbg
542
+ * the arithmetic occurs in the order defined and with !dbg
543
+ * that the !dbg entries are monotonically increasing in value with
544
+ source line number
545
+ """
546
+ sig = (types.float64,)
547
+
548
+ @cuda.jit(sig, debug=True, opt=False)
549
+ def foo(a):
550
+ """
551
+ CHECK-LABEL: define void @{{.+}}foo
552
+ CHECK: entry:
553
+
554
+ CHECK: %[[VAL_0:.*]] = alloca double
555
+ CHECK-NOT: !dbg
556
+ CHECK: store double 0.0, double* %[[VAL_0]]
557
+ CHECK-NOT: !dbg
558
+ CHECK: %[[VAL_1:.*]] = alloca double
559
+ CHECK-NOT: !dbg
560
+ CHECK: store double 0.0, double* %[[VAL_1]]
561
+ CHECK-NOT: !dbg
562
+ CHECK: %[[VAL_2:.*]] = alloca double
563
+ CHECK-NOT: !dbg
564
+ CHECK: store double 0.0, double* %[[VAL_2]]
565
+ CHECK-NOT: !dbg
566
+ CHECK: %[[VAL_3:.*]] = alloca double
567
+ CHECK-NOT: !dbg
568
+ CHECK: store double 0.0, double* %[[VAL_3]]
569
+ CHECK-NOT: !dbg
570
+ CHECK: %[[VAL_4:.*]] = alloca double
571
+ CHECK-NOT: !dbg
572
+ CHECK: store double 0.0, double* %[[VAL_4]]
573
+ CHECK-NOT: !dbg
574
+ CHECK: %[[VAL_5:.*]] = alloca double
575
+ CHECK-NOT: !dbg
576
+ CHECK: store double 0.0, double* %[[VAL_5]]
577
+ CHECK-NOT: !dbg
578
+ CHECK: %[[VAL_6:.*]] = alloca i8*
579
+ CHECK-NOT: !dbg
580
+ CHECK: store i8* null, i8** %[[VAL_6]]
581
+ CHECK-NOT: !dbg
582
+ CHECK: %[[VAL_7:.*]] = alloca i8*
583
+ CHECK-NOT: !dbg
584
+ CHECK: store i8* null, i8** %[[VAL_7]]
585
+ CHECK-NOT: !dbg
586
+
587
+ CHECK: br label %"[[ENTRY:.+]]"
588
+ CHECK-NOT: !dbg
589
+ CHECK: [[ENTRY]]:
590
+
591
+ CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
592
+ CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
593
+ CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
594
+
595
+ CHECK: ![[DBGADD]] = !DILocation
596
+ CHECK: ![[DBGMUL]] = !DILocation
597
+ CHECK: ![[DBGDIV]] = !DILocation
598
+ """
599
+ b = a + 1.23
600
+ c = b * 2.34
601
+ a = b / c
602
+
603
+ ir = foo.inspect_llvm()[sig]
604
+ self.assertFileCheckMatches(ir, foo.__doc__)
605
+
606
+ def test_missing_source(self):
607
+ strsrc = """
608
+ def foo():
609
+ pass
610
+ """
611
+ l = dict()
612
+ exec(dedent(strsrc), {}, l)
613
+ foo = cuda.jit(debug=True, opt=False)(l["foo"])
614
+
615
+ with warnings.catch_warnings(record=True) as w:
616
+ warnings.simplefilter("always", NumbaDebugInfoWarning)
617
+ ignore_internal_warnings()
618
+ foo[1, 1]()
619
+
620
+ self.assertEqual(len(w), 1)
621
+ found = w[0]
622
+ self.assertEqual(found.category, NumbaDebugInfoWarning)
623
+ msg = str(found.message)
624
+ # make sure the warning contains the right message
625
+ self.assertIn("Could not find source for function", msg)
626
+ # and refers to the offending function
627
+ self.assertIn(str(foo.py_func), msg)
628
+
629
+ def test_no_if_op_bools_declared(self):
630
+ @cuda.jit(
631
+ "int64(boolean, boolean)",
632
+ debug=True,
633
+ opt=False,
634
+ _dbg_optnone=True,
635
+ device=True,
636
+ )
637
+ def choice(cond1, cond2):
638
+ """
639
+ CHECK: define void @{{.+}}choices
640
+ """
641
+ if cond1 and cond2:
642
+ return 1
643
+ else:
644
+ return 2
645
+
646
+ ir_content = choice.inspect_llvm()[choice.signatures[0]]
647
+ # We should not declare variables used as the condition in if ops.
648
+ # See Numba PR #9888: https://github.com/numba/numba/pull/9888
649
+
650
+ for line in ir_content.splitlines():
651
+ if "llvm.dbg.declare" in line:
652
+ self.assertNotIn("bool", line)
653
+
654
+ def test_llvm_inliner_flag_conflict(self):
655
+ # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
656
+ # set functions are not marked as 'alwaysinline' and this results in a
657
+ # conflict. baz will not be marked as 'alwaysinline' as a result of
658
+ # DEBUGINFO_DEFAULT
659
+
660
+ @cuda.jit(forceinline=True)
661
+ def bar(x):
662
+ return math.sin(x)
663
+
664
+ @cuda.jit(forceinline=False)
665
+ def baz(x):
666
+ return math.cos(x)
667
+
668
+ @cuda.jit(opt=True)
669
+ def foo(x, y):
670
+ """
671
+ CHECK-LABEL: define void @{{.+}}foo
672
+ CHECK: call i32 @"[[BAR:.+]]"(
673
+ CHECK: call i32 @"[[BAZ:.+]]"(
674
+
675
+ CHECK-DAG: declare i32 @"[[BAR]]"({{.+}}alwaysinline
676
+ CHECK-DAG: declare i32 @"[[BAZ]]"(
677
+ CHECK-DAG: define linkonce_odr i32 @"[[BAR]]"({{.+}}alwaysinline
678
+ CHECK-DAG: define linkonce_odr i32 @"[[BAZ]]"(
679
+ """
680
+ a = bar(y)
681
+ b = baz(y)
682
+ x[0] = a + b
683
+
684
+ # check it compiles
685
+ with override_config("DEBUGINFO_DEFAULT", 1):
686
+ result = cuda.device_array(1, dtype=np.float32)
687
+ foo[1, 1](result, np.pi)
688
+ result.copy_to_host()
689
+
690
+ result_host = math.sin(np.pi) + math.cos(np.pi)
691
+ self.assertPreciseEqual(result[0], result_host)
692
+
693
+ ir_content = foo.inspect_llvm()[foo.signatures[0]]
694
+ self.assertFileCheckMatches(ir_content, foo.__doc__)
695
+
696
+ # Check that the device functions call the appropriate device
697
+ # math functions and have the correct attributes.
698
+ self.assertFileCheckMatches(
699
+ ir_content,
700
+ """
701
+ CHECK: define linkonce_odr i32 @{{.+}}bar
702
+ CHECK-SAME: alwaysinline
703
+ CHECK-NEXT: {
704
+ CHECK-NEXT: {{.*}}:
705
+ CHECK-NEXT: br label {{.*}}
706
+ CHECK-NEXT: {{.*}}:
707
+ CHECK-NEXT: call double @"__nv_sin"
708
+ CHECK-NEXT: store double {{.*}}, double* {{.*}}
709
+ CHECK-NEXT: ret i32 0
710
+ CHECK-NEXT: }
711
+ """,
712
+ )
713
+
714
+ self.assertFileCheckMatches(
715
+ ir_content,
716
+ """
717
+ CHECK: define linkonce_odr i32 @{{.+}}baz
718
+ CHECK-NOT: alwaysinline
719
+ CHECK-NEXT: {
720
+ CHECK-NEXT: {{.*}}:
721
+ CHECK-NEXT: br label {{.*}}
722
+ CHECK-NEXT: {{.*}}:
723
+ CHECK-NEXT: call double @"__nv_cos"
724
+ CHECK-NEXT: store double {{.*}}, double* {{.*}}
725
+ CHECK-NEXT: ret i32 0
726
+ CHECK-NEXT: }
727
+ """,
728
+ )
729
+
730
+ def test_DILocation_versioned_variables(self):
731
+ """Tests that DILocation information for versions of variables matches
732
+ up to their definition site."""
733
+
734
+ @cuda.jit(debug=True, opt=False)
735
+ def foo(dest, n):
736
+ """
737
+ CHECK: define void @{{.+}}foo
738
+ CHECK: store i64 5, i64* %"c{{.+}} !dbg ![[STORE5:.+]]
739
+ CHECK: store i64 1, i64* %"c{{.+}} !dbg ![[STORE1:.+]]
740
+ CHECK: [[STORE5]] = !DILocation(
741
+ CHECK: [[STORE1]] = !DILocation(
742
+ """
743
+ if n:
744
+ c = 5
745
+ else:
746
+ c = 1
747
+ dest[0] = c
748
+
749
+ foo_source_lines, foo_source_lineno = inspect.getsourcelines(
750
+ foo.py_func
751
+ )
752
+
753
+ result = cuda.device_array(1, dtype=np.int32)
754
+ foo[1, 1](result, 1)
755
+ result.copy_to_host()
756
+ self.assertEqual(result[0], 5)
757
+
758
+ ir_content = foo.inspect_llvm()[foo.signatures[0]]
759
+ self.assertFileCheckMatches(ir_content, foo.__doc__)
760
+
761
+ # Collect lines pertaining to the function `foo` and debuginfo
762
+ # metadata
763
+ lines = ir_content.splitlines()
764
+ debuginfo_equals = re.compile(r"!(\d+) = ")
765
+ debug_info_lines = list(
766
+ filter(lambda x: debuginfo_equals.search(x), lines)
767
+ )
768
+
769
+ function_start_regex = re.compile(r"define void @.+foo")
770
+ function_start_lines = list(
771
+ filter(
772
+ lambda x: function_start_regex.search(x[1]), enumerate(lines)
773
+ )
774
+ )
775
+ function_end_lines = list(
776
+ filter(lambda x: x[1] == "}", enumerate(lines))
777
+ )
778
+ foo_ir_lines = lines[
779
+ function_start_lines[0][0] : function_end_lines[0][0]
780
+ ]
781
+
782
+ # Check the if condition's debuginfo
783
+ cond_branch = list(filter(lambda x: "br i1" in x, foo_ir_lines))
784
+ self.assertEqual(len(cond_branch), 1)
785
+ self.assertIn("!dbg", cond_branch[0])
786
+ cond_branch_dbginfo_node = cond_branch[0].split("!dbg")[1].strip()
787
+ cond_branch_dbginfos = list(
788
+ filter(
789
+ lambda x: cond_branch_dbginfo_node + " = " in x,
790
+ debug_info_lines,
791
+ )
792
+ )
793
+ self.assertEqual(len(cond_branch_dbginfos), 1)
794
+ cond_branch_dbginfo = cond_branch_dbginfos[0]
795
+
796
+ # Check debuginfo for the store instructions
797
+ store_1_lines = list(filter(lambda x: "store i64 1" in x, foo_ir_lines))
798
+ store_5_lines = list(filter(lambda x: "store i64 5" in x, foo_ir_lines))
799
+
800
+ self.assertEqual(len(store_1_lines), 2)
801
+ self.assertEqual(len(store_5_lines), 2)
802
+
803
+ store_1_dbginfo_set = set(
804
+ map(lambda x: x.split("!dbg")[1].strip(), store_1_lines)
805
+ )
806
+ store_5_dbginfo_set = set(
807
+ map(lambda x: x.split("!dbg")[1].strip(), store_5_lines)
808
+ )
809
+ self.assertEqual(len(store_1_dbginfo_set), 1)
810
+ self.assertEqual(len(store_5_dbginfo_set), 1)
811
+ store_1_dbginfo_node = store_1_dbginfo_set.pop()
812
+ store_5_dbginfo_node = store_5_dbginfo_set.pop()
813
+ store_1_dbginfos = list(
814
+ filter(
815
+ lambda x: store_1_dbginfo_node + " = " in x, debug_info_lines
816
+ )
817
+ )
818
+ store_5_dbginfos = list(
819
+ filter(
820
+ lambda x: store_5_dbginfo_node + " = " in x, debug_info_lines
821
+ )
822
+ )
823
+ self.assertEqual(len(store_1_dbginfos), 1)
824
+ self.assertEqual(len(store_5_dbginfos), 1)
825
+ store_1_dbginfo = store_1_dbginfos[0]
826
+ store_5_dbginfo = store_5_dbginfos[0]
827
+
828
+ # Ensure the line numbers match what we expect based on the Python source
829
+ line_number_regex = re.compile(r"line: (\d+)")
830
+ LineNumbers = namedtuple(
831
+ "LineNumbers", ["cond_branch", "store_5", "store_1"]
832
+ )
833
+ line_number_matches = LineNumbers(
834
+ *map(
835
+ lambda x: line_number_regex.search(x),
836
+ [cond_branch_dbginfo, store_5_dbginfo, store_1_dbginfo],
837
+ )
838
+ )
839
+ self.assertTrue(
840
+ all(
841
+ map(
842
+ lambda x: x is not None,
843
+ line_number_matches,
844
+ )
845
+ )
846
+ )
847
+ line_numbers = LineNumbers(
848
+ *map(
849
+ lambda x: int(x.group(1)),
850
+ line_number_matches,
851
+ )
852
+ )
853
+ source_line_numbers = LineNumbers(
854
+ *map(
855
+ lambda x: x[0] + foo_source_lineno,
856
+ filter(
857
+ lambda x: "c = " in x[1] or "if n:" in x[1],
858
+ enumerate(foo_source_lines),
859
+ ),
860
+ )
861
+ )
862
+ self.assertEqual(line_numbers, source_line_numbers)
863
+
864
+ def test_debuginfo_asm(self):
865
+ def foo():
866
+ pass
867
+
868
+ foo_debug = cuda.jit(debug=True, opt=False)(foo)
869
+ foo_debug[1, 1]()
870
+ asm = foo_debug.inspect_asm()[foo_debug.signatures[0]]
871
+ self.assertFileCheckMatches(
872
+ asm,
873
+ """
874
+ CHECK: .section{{.+}}.debug
875
+ """,
876
+ )
877
+
878
+ foo_nodebug = cuda.jit(debug=False)(foo)
879
+ foo_nodebug[1, 1]()
880
+ asm = foo_nodebug.inspect_asm()[foo_nodebug.signatures[0]]
881
+ self.assertFileCheckMatches(
882
+ asm,
883
+ """
884
+ CHECK-NOT: .section{{.+}}.debug
885
+ """,
886
+ )
887
+
888
+ # shared_arr -> composite -> elements[4] (data field at index 4) -> pointer with dwarfAddressSpace: 8
889
+ # local_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace: 8
890
+ address_class_filechecks = r"""
891
+ CHECK-DAG: [[SHARED_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "shared_arr"{{.*}}type: [[SHARED_COMPOSITE:![0-9]+]]
892
+ CHECK-DAG: [[SHARED_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[SHARED_ELEMENTS:![0-9]+]]
893
+ CHECK-DAG: [[SHARED_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[SHARED_DATA:![0-9]+]], {{.*}}, {{.*}}}
894
+ CHECK-DAG: [[SHARED_DATA]] = !DIDerivedType(baseType: [[SHARED_PTR:![0-9]+]], name: "data"
895
+ CHECK-DAG: [[SHARED_PTR]] = !DIDerivedType({{.*}}dwarfAddressSpace: 8{{.*}}tag: DW_TAG_pointer_type
896
+
897
+ CHECK-DAG: [[LOCAL_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "local_arr"{{.*}}type: [[LOCAL_COMPOSITE:![0-9]+]]
898
+ CHECK-DAG: [[LOCAL_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[LOCAL_ELEMENTS:![0-9]+]]
899
+ CHECK-DAG: [[LOCAL_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[LOCAL_DATA:![0-9]+]], {{.*}}, {{.*}}}
900
+ CHECK-DAG: [[LOCAL_DATA]] = !DIDerivedType(baseType: [[LOCAL_PTR:![0-9]+]], name: "data"
901
+ CHECK-DAG: [[LOCAL_PTR]] = !DIDerivedType(baseType: {{.*}}tag: DW_TAG_pointer_type
902
+ CHECK-NOT: [[LOCAL_PTR]]{{.*}}dwarfAddressSpace: 8
903
+ """
904
+
905
+ def _test_shared_memory_address_class(self, dtype):
906
+ """Test that shared memory arrays have correct DWARF address class.
907
+
908
+ Shared memory pointers should have addressClass: 8 (DW_AT_address_class
909
+ for CUDA shared memory) in their debug metadata, while regular local
910
+ arrays should not have this annotation.
911
+ """
912
+ sig = (numpy_support.from_dtype(dtype),)
913
+
914
+ @cuda.jit(sig, debug=True, opt=False)
915
+ def kernel_with_shared(data):
916
+ shared_arr = cuda.shared.array(32, dtype=dtype)
917
+ local_arr = cuda.local.array(32, dtype=dtype)
918
+ idx = cuda.grid(1)
919
+ if idx < 32:
920
+ shared_arr[idx] = data + idx
921
+ local_arr[idx] = data * 2 + idx
922
+ cuda.syncthreads()
923
+ if idx == 0:
924
+ result = dtype(0)
925
+ for i in range(32):
926
+ result += shared_arr[i] + local_arr[i]
927
+
928
+ llvm_ir = kernel_with_shared.inspect_llvm(sig)
929
+
930
+ self.assertFileCheckMatches(llvm_ir, self.address_class_filechecks)
931
+
932
+ def test_shared_memory_address_class_int32(self):
933
+ self._test_shared_memory_address_class(np.int32)
934
+
935
+ def test_shared_memory_address_class_complex64(self):
936
+ self._test_shared_memory_address_class(np.complex64)
937
+
938
+ def test_shared_memory_address_class_boolean(self):
939
+ self._test_shared_memory_address_class(np.bool)
940
+
941
+ def test_shared_memory_address_class_float16(self):
942
+ self._test_shared_memory_address_class(np.float16)
943
+
944
+ def test_shared_memory_address_class_record(self):
945
+ dtype = np.dtype(
946
+ [
947
+ ("a", np.int32),
948
+ ("b", np.float32),
949
+ ]
950
+ )
951
+ sig = (numpy_support.from_dtype(dtype),)
952
+
953
+ @cuda.jit(sig, debug=True, opt=False)
954
+ def kernel_with_shared(data):
955
+ shared_arr = cuda.shared.array(32, dtype=dtype)
956
+ local_arr = cuda.local.array(32, dtype=dtype)
957
+ result = cuda.local.array(1, dtype=dtype)
958
+ idx = cuda.grid(1)
959
+ if idx < 32:
960
+ shared_arr[idx].a = data.a + idx
961
+ local_arr[idx].a = data.a * 2 + idx
962
+ shared_arr[idx].b = data.b + idx
963
+ local_arr[idx].b = data.b * 2 + idx
964
+ cuda.syncthreads()
965
+ if idx == 0:
966
+ result[0].a = 0
967
+ result[0].b = 0.0
968
+ for i in range(32):
969
+ result[0].a += shared_arr[i].a + local_arr[i].a
970
+ result[0].b += shared_arr[i].b + local_arr[i].b
971
+
972
+ llvm_ir = kernel_with_shared.inspect_llvm(sig)
973
+
974
+ self.assertFileCheckMatches(llvm_ir, self.address_class_filechecks)
975
+
976
+
977
+ if __name__ == "__main__":
978
+ unittest.main()