numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,889 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import namedtuple
5
+ from numba.cuda.tests.support import override_config, captured_stdout
6
+ from numba.cuda.testing import skip_on_cudasim
7
+ from numba import cuda
8
+ from numba.cuda import types
9
+ from numba.cuda.testing import CUDATestCase
10
+ from numba.cuda.core import config
11
+ from textwrap import dedent
12
+ import math
13
+ import itertools
14
+ import re
15
+ import unittest
16
+ import warnings
17
+ from numba.cuda.core.errors import NumbaDebugInfoWarning
18
+ from numba.cuda.tests.support import ignore_internal_warnings
19
+ import numpy as np
20
+ import inspect
21
+
22
+
23
+ @skip_on_cudasim("Simulator does not produce debug dumps")
24
+ class TestCudaDebugInfo(CUDATestCase):
25
+ """
26
+ These tests only checks the compiled PTX for debuginfo section
27
+ """
28
+
29
+ def _getasm(self, fn, sig):
30
+ fn.compile(sig)
31
+ return fn.inspect_asm(sig)
32
+
33
+ def _check(self, fn, sig, expect):
34
+ asm = self._getasm(fn, sig=sig)
35
+ re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")
36
+ match = re_section_dbginfo.search(asm)
37
+ assertfn = self.assertIsNotNone if expect else self.assertIsNone
38
+ assertfn(match, msg=asm)
39
+
40
+ def test_no_debuginfo_in_asm(self):
41
+ @cuda.jit(debug=False, opt=False)
42
+ def foo(x):
43
+ x[0] = 1
44
+
45
+ self._check(foo, sig=(types.int32[:],), expect=False)
46
+
47
+ def test_debuginfo_in_asm(self):
48
+ @cuda.jit(debug=True, opt=False)
49
+ def foo(x):
50
+ x[0] = 1
51
+
52
+ self._check(foo, sig=(types.int32[:],), expect=True)
53
+
54
+ def test_environment_override(self):
55
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
56
+ # Using default value
57
+ @cuda.jit(opt=False)
58
+ def foo(x):
59
+ x[0] = 1
60
+
61
+ self._check(foo, sig=(types.int32[:],), expect=True)
62
+
63
+ # User override default value
64
+ @cuda.jit(debug=False)
65
+ def bar(x):
66
+ x[0] = 1
67
+
68
+ self._check(bar, sig=(types.int32[:],), expect=False)
69
+
70
+ def test_issue_5835(self):
71
+ # Invalid debug metadata would segfault NVVM when any function was
72
+ # compiled with debug turned on and optimization off. This eager
73
+ # compilation should not crash anything.
74
+ @cuda.jit((types.int32[::1],), debug=True, opt=False)
75
+ def f(x):
76
+ x[0] = 0
77
+
78
+ def test_issue_9888(self):
79
+ # Compiler created symbol should not be emitted in DILocalVariable
80
+ # See Numba Issue #9888 https://github.com/numba/numba/pull/9888
81
+ sig = (types.boolean,)
82
+
83
+ @cuda.jit(sig, debug=True, opt=False)
84
+ def f(cond):
85
+ if cond:
86
+ x = 1 # noqa: F841
87
+ else:
88
+ x = 0 # noqa: F841
89
+
90
+ llvm_ir = f.inspect_llvm(sig)
91
+ # A varible name starting with "bool" in the debug metadata
92
+ pat = r"!DILocalVariable\(.*name:\s+\"bool"
93
+ match = re.compile(pat).search(llvm_ir)
94
+ self.assertIsNone(match, msg=llvm_ir)
95
+
96
+ def test_bool_type(self):
97
+ sig = (types.int32, types.int32)
98
+
99
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
100
+ def f(x, y):
101
+ z = x == y # noqa: F841
102
+
103
+ llvm_ir = f.inspect_llvm(sig)
104
+
105
+ # extract the metadata node id from `type` field of DILocalVariable
106
+ pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
107
+ match = re.compile(pat).search(llvm_ir)
108
+ self.assertIsNotNone(match, msg=llvm_ir)
109
+ mdnode_id = match.group(1)
110
+
111
+ # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
112
+ pat = rf"!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean"
113
+ match = re.compile(pat).search(llvm_ir)
114
+ self.assertIsNotNone(match, msg=llvm_ir)
115
+
116
+ def test_grid_group_type(self):
117
+ sig = (types.int32,)
118
+
119
+ @cuda.jit(sig, debug=True, opt=False)
120
+ def f(x):
121
+ grid = cuda.cg.this_grid() # noqa: F841
122
+
123
+ llvm_ir = f.inspect_llvm(sig)
124
+
125
+ pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
126
+ match = re.compile(pat).search(llvm_ir)
127
+ self.assertIsNotNone(match, msg=llvm_ir)
128
+
129
+ @unittest.skip("Wrappers no longer exist")
130
+ def test_wrapper_has_debuginfo(self):
131
+ sig = (types.int32[::1],)
132
+
133
+ @cuda.jit(sig, debug=True, opt=0)
134
+ def f(x):
135
+ x[0] = 1
136
+
137
+ llvm_ir = f.inspect_llvm(sig)
138
+
139
+ defines = [
140
+ line
141
+ for line in llvm_ir.splitlines()
142
+ if 'define void @"_ZN6cudapy' in line
143
+ ]
144
+
145
+ # Make sure we only found one definition
146
+ self.assertEqual(len(defines), 1)
147
+
148
+ wrapper_define = defines[0]
149
+ self.assertIn("!dbg", wrapper_define)
150
+
151
+ def test_debug_function_calls_internal_impl(self):
152
+ # Calling a function in a module generated from an implementation
153
+ # internal to Numba requires multiple modules to be compiled with NVVM -
154
+ # the internal implementation, and the caller. This example uses two
155
+ # modules because the `in (2, 3)` is implemented with:
156
+ #
157
+ # numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(
158
+ # UniTuple<long long, 2>,
159
+ # int
160
+ # )
161
+ #
162
+ # This is condensed from this reproducer in Issue 5311:
163
+ # https://github.com/numba/numba/issues/5311#issuecomment-674206587
164
+
165
+ @cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)
166
+ def f(inp, outp):
167
+ outp[0] = 1 if inp[0] in (2, 3) else 3
168
+
169
+ def test_debug_function_calls_device_function(self):
170
+ # Calling a device function requires compilation of multiple modules
171
+ # with NVVM - one for the caller and one for the callee. This checks
172
+ # that we don't cause an NVVM error in this case.
173
+
174
+ @cuda.jit(device=True, debug=True, opt=0)
175
+ def threadid():
176
+ return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
177
+
178
+ @cuda.jit((types.int32[:],), debug=True, opt=0)
179
+ def kernel(arr):
180
+ i = cuda.grid(1)
181
+ if i < len(arr):
182
+ arr[i] = threadid()
183
+
184
+ def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):
185
+ @cuda.jit(device=True, debug=f2_debug, opt=False)
186
+ def f2(x):
187
+ return x + 1
188
+
189
+ @cuda.jit(device=True, debug=f1_debug, opt=False)
190
+ def f1(x, y):
191
+ return x - f2(y)
192
+
193
+ @cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)
194
+ def kernel(x, y):
195
+ f1(x, y)
196
+
197
+ kernel[1, 1](1, 2)
198
+
199
+ def test_chained_device_function(self):
200
+ # Calling a device function that calls another device function from a
201
+ # kernel with should succeed regardless of which jit decorators have
202
+ # debug=True. See Issue #7159.
203
+
204
+ debug_opts = itertools.product(*[(True, False)] * 3)
205
+
206
+ for kernel_debug, f1_debug, f2_debug in debug_opts:
207
+ with self.subTest(
208
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
209
+ ):
210
+ self._test_chained_device_function(
211
+ kernel_debug, f1_debug, f2_debug
212
+ )
213
+
214
+ def _test_chained_device_function_two_calls(
215
+ self, kernel_debug, f1_debug, f2_debug
216
+ ):
217
+ @cuda.jit(device=True, debug=f2_debug, opt=False)
218
+ def f2(x):
219
+ return x + 1
220
+
221
+ @cuda.jit(device=True, debug=f1_debug, opt=False)
222
+ def f1(x, y):
223
+ return x - f2(y)
224
+
225
+ @cuda.jit(debug=kernel_debug, opt=False)
226
+ def kernel(x, y):
227
+ f1(x, y)
228
+ f2(x)
229
+
230
+ kernel[1, 1](1, 2)
231
+
232
+ def test_chained_device_function_two_calls(self):
233
+ # Calling a device function that calls a leaf device function from a
234
+ # kernel, and calling the leaf device function from the kernel should
235
+ # succeed, regardless of which jit decorators have debug=True. See
236
+ # Issue #7159.
237
+
238
+ debug_opts = itertools.product(*[(True, False)] * 3)
239
+
240
+ for kernel_debug, f1_debug, f2_debug in debug_opts:
241
+ with self.subTest(
242
+ kernel_debug=kernel_debug, f1_debug=f1_debug, f2_debug=f2_debug
243
+ ):
244
+ self._test_chained_device_function_two_calls(
245
+ kernel_debug, f1_debug, f2_debug
246
+ )
247
+
248
+ def test_chained_device_three_functions(self):
249
+ # Like test_chained_device_function, but with enough functions (three)
250
+ # to ensure that the recursion visits all the way down the call tree
251
+ # when fixing linkage of functions for debug.
252
+ def three_device_fns(kernel_debug, leaf_debug):
253
+ @cuda.jit(device=True, debug=leaf_debug, opt=False)
254
+ def f3(x):
255
+ return x * x
256
+
257
+ @cuda.jit(device=True)
258
+ def f2(x):
259
+ return f3(x) + 1
260
+
261
+ @cuda.jit(device=True)
262
+ def f1(x, y):
263
+ return x - f2(y)
264
+
265
+ @cuda.jit(debug=kernel_debug, opt=False)
266
+ def kernel(x, y):
267
+ f1(x, y)
268
+
269
+ kernel[1, 1](1, 2)
270
+
271
+ # Check when debug on the kernel, on the leaf, and not on any function.
272
+ three_device_fns(kernel_debug=True, leaf_debug=True)
273
+ three_device_fns(kernel_debug=True, leaf_debug=False)
274
+ three_device_fns(kernel_debug=False, leaf_debug=True)
275
+ three_device_fns(kernel_debug=False, leaf_debug=False)
276
+
277
+ def _test_kernel_args_types(self):
278
+ sig = (types.int32, types.int32)
279
+
280
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
281
+ def f(x, y):
282
+ z = x + y # noqa: F841
283
+
284
+ llvm_ir = f.inspect_llvm(sig)
285
+
286
+ # extract the metadata node id from `types` field of DISubroutineType
287
+ pat = r"!DISubroutineType\(types:\s+!(\d+)\)"
288
+ match = re.compile(pat).search(llvm_ir)
289
+ self.assertIsNotNone(match, msg=llvm_ir)
290
+ mdnode_id = match.group(1)
291
+
292
+ # extract the metadata node ids from the flexible node of types
293
+ pat = rf"!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}"
294
+ match = re.compile(pat).search(llvm_ir)
295
+ self.assertIsNotNone(match, msg=llvm_ir)
296
+ mdnode_id1 = match.group(1)
297
+ mdnode_id2 = match.group(2)
298
+
299
+ # verify each of the two metadata nodes match expected type
300
+ pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
301
+ match = re.compile(pat).search(llvm_ir)
302
+ self.assertIsNotNone(match, msg=llvm_ir)
303
+ pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
304
+ match = re.compile(pat).search(llvm_ir)
305
+ self.assertIsNotNone(match, msg=llvm_ir)
306
+
307
+ def test_kernel_args_types(self):
308
+ self._test_kernel_args_types()
309
+
310
+ def test_kernel_args_types_dump(self):
311
+ # see issue#135
312
+ with override_config("DUMP_LLVM", 1):
313
+ with captured_stdout():
314
+ self._test_kernel_args_types()
315
+
316
+ def test_kernel_args_names(self):
317
+ sig = (types.int32,)
318
+
319
+ @cuda.jit("void(int32)", debug=True, opt=False)
320
+ def f(x):
321
+ z = x # noqa: F841
322
+
323
+ llvm_ir = f.inspect_llvm(sig)
324
+
325
+ # Verify argument name is not prefixed with "arg."
326
+ pat = r"define void @.*\(i32 %\"x\"\)"
327
+ match = re.compile(pat).search(llvm_ir)
328
+ self.assertIsNotNone(match, msg=llvm_ir)
329
+ pat = r"define void @.*\(i32 %\"arg\.x\"\)"
330
+ match = re.compile(pat).search(llvm_ir)
331
+ self.assertIsNone(match, msg=llvm_ir)
332
+
333
+ def test_llvm_dbg_value(self):
334
+ sig = (types.int32, types.int32)
335
+
336
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
337
+ def f(x, y):
338
+ z1 = x # noqa: F841
339
+ z2 = 100 # noqa: F841
340
+ z3 = y # noqa: F841
341
+ z4 = True # noqa: F841
342
+
343
+ llvm_ir = f.inspect_llvm(sig)
344
+ # Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
345
+ pat1 = r'call void @"llvm.dbg.declare"'
346
+ match = re.compile(pat1).search(llvm_ir)
347
+ self.assertIsNone(match, msg=llvm_ir)
348
+ pat2 = r'call void @"llvm.dbg.value"'
349
+ match = re.compile(pat2).search(llvm_ir)
350
+ self.assertIsNotNone(match, msg=llvm_ir)
351
+
352
+ def test_llvm_dbg_value_range(self):
353
+ sig = (types.int64,)
354
+
355
+ @cuda.jit("void(int64,)", debug=True, opt=False)
356
+ def foo(x):
357
+ """
358
+ CHECK: store i1 true, i1* %"second.1"
359
+ CHECK: call void @"llvm.dbg.value"
360
+ CHECK: store i1 true, i1* %"second.2"
361
+ CHECK: call void @"llvm.dbg.value"
362
+
363
+ CHECK: %[[VAL_1:.*]] = load i1, i1* %"second.2"
364
+ CHECK: %[[VAL_2:.*]] = load i1, i1* %[[VAL_3:.*]]
365
+ CHECK: store i1 %[[VAL_1]], i1* %[[VAL_3]]
366
+ CHECK: call void @"llvm.dbg.value"(metadata i1 %[[VAL_1]], metadata ![[VAL_4:[0-9]+]]
367
+
368
+ CHECK: ![[VAL_4]] = !DILocalVariable{{.+}}name: "second"
369
+ """
370
+ if x > 0:
371
+ second = x > 10
372
+ else:
373
+ second = True
374
+ if second:
375
+ pass
376
+
377
+ ir = foo.inspect_llvm()[sig]
378
+ self.assertFileCheckMatches(ir, foo.__doc__)
379
+
380
+ def test_no_user_var_alias(self):
381
+ sig = (types.int32, types.int32)
382
+
383
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
384
+ def f(x, y):
385
+ z = x # noqa: F841
386
+ z = y # noqa: F841
387
+
388
+ llvm_ir = f.inspect_llvm(sig)
389
+ pat = r'!DILocalVariable.*name:\s+"z\$1".*'
390
+ match = re.compile(pat).search(llvm_ir)
391
+ self.assertIsNone(match, msg=llvm_ir)
392
+
393
+ def test_no_literal_type(self):
394
+ sig = (types.int32,)
395
+
396
+ @cuda.jit("void(int32)", debug=True, opt=False)
397
+ def f(x):
398
+ z = x # noqa: F841
399
+ z = 100 # noqa: F841
400
+ z = True # noqa: F841
401
+
402
+ llvm_ir = f.inspect_llvm(sig)
403
+ pat = r'!DIBasicType.*name:\s+"Literal.*'
404
+ match = re.compile(pat).search(llvm_ir)
405
+ self.assertIsNone(match, msg=llvm_ir)
406
+
407
+ @unittest.skipIf(
408
+ config.CUDA_DEBUG_POLY, "Uses old union format, not variant_part"
409
+ )
410
+ def test_union_poly_types(self):
411
+ sig = (types.int32, types.int32)
412
+
413
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
414
+ def f(x, y):
415
+ foo = 100 # noqa: F841
416
+ foo = 2.34 # noqa: F841
417
+ foo = True # noqa: F841
418
+ foo = 200 # noqa: F841
419
+
420
+ llvm_ir = f.inspect_llvm(sig)
421
+ # Extract the type node id
422
+ pat1 = r'!DILocalVariable\(.*name: "foo".*type: !(\d+)\)'
423
+ match = re.compile(pat1).search(llvm_ir)
424
+ self.assertIsNotNone(match, msg=llvm_ir)
425
+ mdnode_id = match.group(1)
426
+ # Verify the union type and extract the elements node id
427
+ pat2 = rf"!{mdnode_id} = distinct !DICompositeType\(elements: !(\d+),.*size: 64, tag: DW_TAG_union_type\)" # noqa: E501
428
+ match = re.compile(pat2).search(llvm_ir)
429
+ self.assertIsNotNone(match, msg=llvm_ir)
430
+ mdnode_id = match.group(1)
431
+ # Extract the member node ids
432
+ pat3 = r"!{ !(\d+), !(\d+), !(\d+) }"
433
+ match = re.compile(pat3).search(llvm_ir)
434
+ self.assertIsNotNone(match, msg=llvm_ir)
435
+ mdnode_id1 = match.group(1)
436
+ mdnode_id2 = match.group(2)
437
+ mdnode_id3 = match.group(3)
438
+ # Verify the member nodes
439
+ pat4 = rf'!{mdnode_id1} = !DIDerivedType(.*name: "_bool", size: 8, tag: DW_TAG_member)' # noqa: E501
440
+ match = re.compile(pat4).search(llvm_ir)
441
+ self.assertIsNotNone(match, msg=llvm_ir)
442
+ pat5 = rf'!{mdnode_id2} = !DIDerivedType(.*name: "_float64", size: 64, tag: DW_TAG_member)' # noqa: E501
443
+ match = re.compile(pat5).search(llvm_ir)
444
+ self.assertIsNotNone(match, msg=llvm_ir)
445
+ pat6 = rf'!{mdnode_id3} = !DIDerivedType(.*name: "_int64", size: 64, tag: DW_TAG_member)' # noqa: E501
446
+ match = re.compile(pat6).search(llvm_ir)
447
+ self.assertIsNotNone(match, msg=llvm_ir)
448
+
449
+ def test_union_debug(self):
450
+ @cuda.jit("void(u8, int64[::1])", debug=True, opt=False)
451
+ def a_union_use_case(arg, results):
452
+ foo = 1
453
+ foo = arg
454
+ if foo < 1:
455
+ foo = 2
456
+ return
457
+ bar = foo == 0
458
+ results[0] = 1 if not bar else 0
459
+
460
+ with captured_stdout() as out:
461
+ results = cuda.to_device(np.zeros(16, dtype=np.int64))
462
+ a_union_use_case[1, 1](100, results)
463
+ print(results.copy_to_host())
464
+ expected = "[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
465
+ self.assertIn(expected, out.getvalue())
466
+
467
+ @unittest.skipUnless(config.CUDA_DEBUG_POLY, "CUDA_DEBUG_POLY not enabled")
468
+ def test_poly_variant_part(self):
469
+ """Test polymorphic variables with DW_TAG_variant_part.
470
+
471
+ This test verifies that when CUDA_DEBUG_POLY is enabled,
472
+ polymorphic variables generate proper DWARF5 variant_part
473
+ debug information with discriminator and variant members.
474
+ """
475
+ # Typed constant: i8 0, i8 1, etc. | Node reference: !123, !456
476
+ if config.CUDA_DEBUG_POLY_USE_TYPED_CONST:
477
+ extradata_pattern = "i8 {{[0-9]+}}"
478
+ else:
479
+ extradata_pattern = "{{![0-9]+}}"
480
+
481
+ @cuda.jit("void()", debug=True, opt=False)
482
+ def f():
483
+ foo = 100 # noqa: F841
484
+ foo = 3.14 # noqa: F841
485
+ foo = True # noqa: F841
486
+ foo = np.int32(42) # noqa: F841
487
+
488
+ llvm_ir = f.inspect_llvm()[tuple()]
489
+
490
+ # Build FileCheck pattern dynamically based on config
491
+ # Capture node IDs and verify the hierarchical structure
492
+ check_pattern = """
493
+ CHECK-DAG: !DILocalVariable({{.*}}name: "foo"{{.*}}type: [[WRAPPER:![0-9]+]]
494
+ CHECK-DAG: [[WRAPPER]] = !DICompositeType({{.*}}elements: [[ELEMENTS:![0-9]+]]{{.*}}name: "variant_wrapper_struct"{{.*}}size: 128{{.*}}tag: DW_TAG_structure_type)
495
+ CHECK-DAG: [[ELEMENTS]] = !{ [[DISC:![0-9]+]], [[VPART:![0-9]+]] }
496
+ CHECK-DAG: [[DISC]] = !DIDerivedType({{.*}}name: "discriminator-{{[0-9]+}}"{{.*}}size: 8{{.*}}tag: DW_TAG_member)
497
+ CHECK-DAG: [[VPART]] = !DICompositeType({{.*}}discriminator: [[DISC]]{{.*}}elements: [[VMEMBERS:![0-9]+]]{{.*}}tag: DW_TAG_variant_part)
498
+ CHECK-DAG: [[VMEMBERS]] = !{ [[VM1:![0-9]+]], [[VM2:![0-9]+]], [[VM3:![0-9]+]], [[VM4:![0-9]+]] }
499
+ CHECK-DAG: [[VM1]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_bool"{{.*}}offset: 8{{.*}}tag: DW_TAG_member)
500
+ CHECK-DAG: [[VM2]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_float64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
501
+ CHECK-DAG: [[VM3]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int32"{{.*}}offset: 32{{.*}}tag: DW_TAG_member)
502
+ CHECK-DAG: [[VM4]] = !DIDerivedType({{.*}}extraData: EXTRADATA{{.*}}name: "_int64"{{.*}}offset: 64{{.*}}tag: DW_TAG_member)
503
+ """.replace("EXTRADATA", extradata_pattern)
504
+
505
+ self.assertFileCheckMatches(llvm_ir, check_pattern)
506
+
507
+ def test_DW_LANG(self):
508
+ @cuda.jit(debug=True, opt=False)
509
+ def foo():
510
+ """
511
+ CHECK: distinct !DICompileUnit
512
+ CHECK-SAME: emissionKind: FullDebug
513
+ CHECK-SAME: isOptimized: true
514
+ CHECK-SAME: language: DW_LANG_C_plus_plus
515
+ CHECK-SAME: producer: "clang (Numba)"
516
+ """
517
+ pass
518
+
519
+ foo[1, 1]()
520
+
521
+ llvm_ir = foo.inspect_llvm()[tuple()]
522
+ self.assertFileCheckMatches(llvm_ir, foo.__doc__)
523
+
524
+ def test_DILocation(self):
525
+ """Tests that DILocation information is reasonable.
526
+
527
+ The kernel `foo` produces LLVM like:
528
+ define function() {
529
+ entry:
530
+ alloca
531
+ store 0 to alloca
532
+ <arithmetic for doing the operations on b, c, d>
533
+ setup for print
534
+ branch
535
+ other_labels:
536
+ ... <elided>
537
+ }
538
+
539
+ The following checks that:
540
+ * the alloca and store have no !dbg
541
+ * the arithmetic occurs in the order defined and with !dbg
542
+ * that the !dbg entries are monotonically increasing in value with
543
+ source line number
544
+ """
545
+ sig = (types.float64,)
546
+
547
+ @cuda.jit(sig, debug=True, opt=False)
548
+ def foo(a):
549
+ """
550
+ CHECK-LABEL: define void @{{.+}}foo
551
+ CHECK: entry:
552
+
553
+ CHECK: %[[VAL_0:.*]] = alloca double
554
+ CHECK-NOT: !dbg
555
+ CHECK: store double 0.0, double* %[[VAL_0]]
556
+ CHECK-NOT: !dbg
557
+ CHECK: %[[VAL_1:.*]] = alloca double
558
+ CHECK-NOT: !dbg
559
+ CHECK: store double 0.0, double* %[[VAL_1]]
560
+ CHECK-NOT: !dbg
561
+ CHECK: %[[VAL_2:.*]] = alloca double
562
+ CHECK-NOT: !dbg
563
+ CHECK: store double 0.0, double* %[[VAL_2]]
564
+ CHECK-NOT: !dbg
565
+ CHECK: %[[VAL_3:.*]] = alloca double
566
+ CHECK-NOT: !dbg
567
+ CHECK: store double 0.0, double* %[[VAL_3]]
568
+ CHECK-NOT: !dbg
569
+ CHECK: %[[VAL_4:.*]] = alloca double
570
+ CHECK-NOT: !dbg
571
+ CHECK: store double 0.0, double* %[[VAL_4]]
572
+ CHECK-NOT: !dbg
573
+ CHECK: %[[VAL_5:.*]] = alloca double
574
+ CHECK-NOT: !dbg
575
+ CHECK: store double 0.0, double* %[[VAL_5]]
576
+ CHECK-NOT: !dbg
577
+ CHECK: %[[VAL_6:.*]] = alloca i8*
578
+ CHECK-NOT: !dbg
579
+ CHECK: store i8* null, i8** %[[VAL_6]]
580
+ CHECK-NOT: !dbg
581
+ CHECK: %[[VAL_7:.*]] = alloca i8*
582
+ CHECK-NOT: !dbg
583
+ CHECK: store i8* null, i8** %[[VAL_7]]
584
+ CHECK-NOT: !dbg
585
+
586
+ CHECK: br label %"[[ENTRY:.+]]"
587
+ CHECK-NOT: !dbg
588
+ CHECK: [[ENTRY]]:
589
+
590
+ CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
591
+ CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
592
+ CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
593
+
594
+ CHECK: ![[DBGADD]] = !DILocation
595
+ CHECK: ![[DBGMUL]] = !DILocation
596
+ CHECK: ![[DBGDIV]] = !DILocation
597
+ """
598
+ b = a + 1.23
599
+ c = b * 2.34
600
+ a = b / c
601
+
602
+ ir = foo.inspect_llvm()[sig]
603
+ self.assertFileCheckMatches(ir, foo.__doc__)
604
+
605
+ def test_missing_source(self):
606
+ strsrc = """
607
+ def foo():
608
+ pass
609
+ """
610
+ l = dict()
611
+ exec(dedent(strsrc), {}, l)
612
+ foo = cuda.jit(debug=True, opt=False)(l["foo"])
613
+
614
+ with warnings.catch_warnings(record=True) as w:
615
+ warnings.simplefilter("always", NumbaDebugInfoWarning)
616
+ ignore_internal_warnings()
617
+ foo[1, 1]()
618
+
619
+ self.assertEqual(len(w), 1)
620
+ found = w[0]
621
+ self.assertEqual(found.category, NumbaDebugInfoWarning)
622
+ msg = str(found.message)
623
+ # make sure the warning contains the right message
624
+ self.assertIn("Could not find source for function", msg)
625
+ # and refers to the offending function
626
+ self.assertIn(str(foo.py_func), msg)
627
+
628
+ def test_no_if_op_bools_declared(self):
629
+ @cuda.jit(
630
+ "int64(boolean, boolean)",
631
+ debug=True,
632
+ opt=False,
633
+ _dbg_optnone=True,
634
+ device=True,
635
+ )
636
+ def choice(cond1, cond2):
637
+ """
638
+ CHECK: define void @{{.+}}choices
639
+ """
640
+ if cond1 and cond2:
641
+ return 1
642
+ else:
643
+ return 2
644
+
645
+ ir_content = choice.inspect_llvm()[choice.signatures[0]]
646
+ # We should not declare variables used as the condition in if ops.
647
+ # See Numba PR #9888: https://github.com/numba/numba/pull/9888
648
+
649
+ for line in ir_content.splitlines():
650
+ if "llvm.dbg.declare" in line:
651
+ self.assertNotIn("bool", line)
652
+
653
+ def test_llvm_inliner_flag_conflict(self):
654
+ # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
655
+ # set functions are not marked as 'alwaysinline' and this results in a
656
+ # conflict. baz will not be marked as 'alwaysinline' as a result of
657
+ # DEBUGINFO_DEFAULT
658
+
659
+ @cuda.jit(forceinline=True)
660
+ def bar(x):
661
+ return math.sin(x)
662
+
663
+ @cuda.jit(forceinline=False)
664
+ def baz(x):
665
+ return math.cos(x)
666
+
667
+ @cuda.jit(opt=True)
668
+ def foo(x, y):
669
+ """
670
+ CHECK-LABEL: define void @{{.+}}foo
671
+ CHECK: call i32 @"[[BAR:.+]]"(
672
+ CHECK: call i32 @"[[BAZ:.+]]"(
673
+
674
+ CHECK-DAG: declare i32 @"[[BAR]]"({{.+}}alwaysinline
675
+ CHECK-DAG: declare i32 @"[[BAZ]]"(
676
+ CHECK-DAG: define linkonce_odr i32 @"[[BAR]]"({{.+}}alwaysinline
677
+ CHECK-DAG: define linkonce_odr i32 @"[[BAZ]]"(
678
+ """
679
+ a = bar(y)
680
+ b = baz(y)
681
+ x[0] = a + b
682
+
683
+ # check it compiles
684
+ with override_config("DEBUGINFO_DEFAULT", 1):
685
+ result = cuda.device_array(1, dtype=np.float32)
686
+ foo[1, 1](result, np.pi)
687
+ result.copy_to_host()
688
+
689
+ result_host = math.sin(np.pi) + math.cos(np.pi)
690
+ self.assertPreciseEqual(result[0], result_host)
691
+
692
+ ir_content = foo.inspect_llvm()[foo.signatures[0]]
693
+ self.assertFileCheckMatches(ir_content, foo.__doc__)
694
+
695
+ # Check that the device functions call the appropriate device
696
+ # math functions and have the correct attributes.
697
+ self.assertFileCheckMatches(
698
+ ir_content,
699
+ """
700
+ CHECK: define linkonce_odr i32 @{{.+}}bar
701
+ CHECK-SAME: alwaysinline
702
+ CHECK-NEXT: {
703
+ CHECK-NEXT: {{.*}}:
704
+ CHECK-NEXT: br label {{.*}}
705
+ CHECK-NEXT: {{.*}}:
706
+ CHECK-NEXT: call double @"__nv_sin"
707
+ CHECK-NEXT: store double {{.*}}, double* {{.*}}
708
+ CHECK-NEXT: ret i32 0
709
+ CHECK-NEXT: }
710
+ """,
711
+ )
712
+
713
+ self.assertFileCheckMatches(
714
+ ir_content,
715
+ """
716
+ CHECK: define linkonce_odr i32 @{{.+}}baz
717
+ CHECK-NOT: alwaysinline
718
+ CHECK-NEXT: {
719
+ CHECK-NEXT: {{.*}}:
720
+ CHECK-NEXT: br label {{.*}}
721
+ CHECK-NEXT: {{.*}}:
722
+ CHECK-NEXT: call double @"__nv_cos"
723
+ CHECK-NEXT: store double {{.*}}, double* {{.*}}
724
+ CHECK-NEXT: ret i32 0
725
+ CHECK-NEXT: }
726
+ """,
727
+ )
728
+
729
+ def test_DILocation_versioned_variables(self):
730
+ """Tests that DILocation information for versions of variables matches
731
+ up to their definition site."""
732
+
733
+ @cuda.jit(debug=True, opt=False)
734
+ def foo(dest, n):
735
+ """
736
+ CHECK: define void @{{.+}}foo
737
+ CHECK: store i64 5, i64* %"c{{.+}} !dbg ![[STORE5:.+]]
738
+ CHECK: store i64 1, i64* %"c{{.+}} !dbg ![[STORE1:.+]]
739
+ CHECK: [[STORE5]] = !DILocation(
740
+ CHECK: [[STORE1]] = !DILocation(
741
+ """
742
+ if n:
743
+ c = 5
744
+ else:
745
+ c = 1
746
+ dest[0] = c
747
+
748
+ foo_source_lines, foo_source_lineno = inspect.getsourcelines(
749
+ foo.py_func
750
+ )
751
+
752
+ result = cuda.device_array(1, dtype=np.int32)
753
+ foo[1, 1](result, 1)
754
+ result.copy_to_host()
755
+ self.assertEqual(result[0], 5)
756
+
757
+ ir_content = foo.inspect_llvm()[foo.signatures[0]]
758
+ self.assertFileCheckMatches(ir_content, foo.__doc__)
759
+
760
+ # Collect lines pertaining to the function `foo` and debuginfo
761
+ # metadata
762
+ lines = ir_content.splitlines()
763
+ debuginfo_equals = re.compile(r"!(\d+) = ")
764
+ debug_info_lines = list(
765
+ filter(lambda x: debuginfo_equals.search(x), lines)
766
+ )
767
+
768
+ function_start_regex = re.compile(r"define void @.+foo")
769
+ function_start_lines = list(
770
+ filter(
771
+ lambda x: function_start_regex.search(x[1]), enumerate(lines)
772
+ )
773
+ )
774
+ function_end_lines = list(
775
+ filter(lambda x: x[1] == "}", enumerate(lines))
776
+ )
777
+ foo_ir_lines = lines[
778
+ function_start_lines[0][0] : function_end_lines[0][0]
779
+ ]
780
+
781
+ # Check the if condition's debuginfo
782
+ cond_branch = list(filter(lambda x: "br i1" in x, foo_ir_lines))
783
+ self.assertEqual(len(cond_branch), 1)
784
+ self.assertIn("!dbg", cond_branch[0])
785
+ cond_branch_dbginfo_node = cond_branch[0].split("!dbg")[1].strip()
786
+ cond_branch_dbginfos = list(
787
+ filter(
788
+ lambda x: cond_branch_dbginfo_node + " = " in x,
789
+ debug_info_lines,
790
+ )
791
+ )
792
+ self.assertEqual(len(cond_branch_dbginfos), 1)
793
+ cond_branch_dbginfo = cond_branch_dbginfos[0]
794
+
795
+ # Check debuginfo for the store instructions
796
+ store_1_lines = list(filter(lambda x: "store i64 1" in x, foo_ir_lines))
797
+ store_5_lines = list(filter(lambda x: "store i64 5" in x, foo_ir_lines))
798
+
799
+ self.assertEqual(len(store_1_lines), 2)
800
+ self.assertEqual(len(store_5_lines), 2)
801
+
802
+ store_1_dbginfo_set = set(
803
+ map(lambda x: x.split("!dbg")[1].strip(), store_1_lines)
804
+ )
805
+ store_5_dbginfo_set = set(
806
+ map(lambda x: x.split("!dbg")[1].strip(), store_5_lines)
807
+ )
808
+ self.assertEqual(len(store_1_dbginfo_set), 1)
809
+ self.assertEqual(len(store_5_dbginfo_set), 1)
810
+ store_1_dbginfo_node = store_1_dbginfo_set.pop()
811
+ store_5_dbginfo_node = store_5_dbginfo_set.pop()
812
+ store_1_dbginfos = list(
813
+ filter(
814
+ lambda x: store_1_dbginfo_node + " = " in x, debug_info_lines
815
+ )
816
+ )
817
+ store_5_dbginfos = list(
818
+ filter(
819
+ lambda x: store_5_dbginfo_node + " = " in x, debug_info_lines
820
+ )
821
+ )
822
+ self.assertEqual(len(store_1_dbginfos), 1)
823
+ self.assertEqual(len(store_5_dbginfos), 1)
824
+ store_1_dbginfo = store_1_dbginfos[0]
825
+ store_5_dbginfo = store_5_dbginfos[0]
826
+
827
+ # Ensure the line numbers match what we expect based on the Python source
828
+ line_number_regex = re.compile(r"line: (\d+)")
829
+ LineNumbers = namedtuple(
830
+ "LineNumbers", ["cond_branch", "store_5", "store_1"]
831
+ )
832
+ line_number_matches = LineNumbers(
833
+ *map(
834
+ lambda x: line_number_regex.search(x),
835
+ [cond_branch_dbginfo, store_5_dbginfo, store_1_dbginfo],
836
+ )
837
+ )
838
+ self.assertTrue(
839
+ all(
840
+ map(
841
+ lambda x: x is not None,
842
+ line_number_matches,
843
+ )
844
+ )
845
+ )
846
+ line_numbers = LineNumbers(
847
+ *map(
848
+ lambda x: int(x.group(1)),
849
+ line_number_matches,
850
+ )
851
+ )
852
+ source_line_numbers = LineNumbers(
853
+ *map(
854
+ lambda x: x[0] + foo_source_lineno,
855
+ filter(
856
+ lambda x: "c = " in x[1] or "if n:" in x[1],
857
+ enumerate(foo_source_lines),
858
+ ),
859
+ )
860
+ )
861
+ self.assertEqual(line_numbers, source_line_numbers)
862
+
863
+ def test_debuginfo_asm(self):
864
+ def foo():
865
+ pass
866
+
867
+ foo_debug = cuda.jit(debug=True, opt=False)(foo)
868
+ foo_debug[1, 1]()
869
+ asm = foo_debug.inspect_asm()[foo_debug.signatures[0]]
870
+ self.assertFileCheckMatches(
871
+ asm,
872
+ """
873
+ CHECK: .section{{.+}}.debug
874
+ """,
875
+ )
876
+
877
+ foo_nodebug = cuda.jit(debug=False)(foo)
878
+ foo_nodebug[1, 1]()
879
+ asm = foo_nodebug.inspect_asm()[foo_nodebug.signatures[0]]
880
+ self.assertFileCheckMatches(
881
+ asm,
882
+ """
883
+ CHECK-NOT: .section{{.+}}.debug
884
+ """,
885
+ )
886
+
887
+
888
+ if __name__ == "__main__":
889
+ unittest.main()