numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1951 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import namedtuple, defaultdict
5
+ import operator
6
+ import warnings
7
+ from functools import partial
8
+
9
+ from llvmlite import ir as llvm_ir
10
+
11
+ from numba.cuda import HAS_NUMBA
12
+ from numba.cuda.core import ir
13
+ from numba.cuda import debuginfo, cgutils, utils, typing, types
14
+ from numba.cuda.core import (
15
+ ir_utils,
16
+ targetconfig,
17
+ funcdesc,
18
+ config,
19
+ generators,
20
+ removerefctpass,
21
+ )
22
+
23
+ from numba.cuda.core.errors import (
24
+ LoweringError,
25
+ new_error_context,
26
+ TypingError,
27
+ LiteralTypingError,
28
+ UnsupportedError,
29
+ NumbaDebugInfoWarning,
30
+ )
31
+ from numba.cuda.core.funcdesc import default_mangler
32
+ from numba.cuda.core.environment import Environment
33
+ from numba.cuda.core.analysis import compute_use_defs, must_use_alloca
34
+ from numba.cuda.misc.firstlinefinder import get_func_body_first_lineno
35
+ from numba.cuda.misc.coverage_support import get_registered_loc_notify
36
+
37
+
38
+ _VarArgItem = namedtuple("_VarArgItem", ("vararg", "index"))
39
+
40
+
41
+ class BaseLower(object):
42
+ """
43
+ Lower IR to LLVM
44
+ """
45
+
46
+ def __init__(self, context, library, fndesc, func_ir, metadata=None):
47
+ self.library = library
48
+ self.fndesc = fndesc
49
+ self.blocks = dict(sorted(func_ir.blocks.items()))
50
+ self.func_ir = func_ir
51
+ self.generator_info = func_ir.generator_info
52
+ self.metadata = metadata
53
+ self.flags = targetconfig.ConfigStack.top_or_none()
54
+
55
+ # Initialize LLVM
56
+ self.module = self.library.create_ir_module(self.fndesc.unique_name)
57
+
58
+ # Python execution environment (will be available to the compiled
59
+ # function).
60
+ self.env = Environment.from_fndesc(self.fndesc)
61
+
62
+ # Internal states
63
+ self.blkmap = {}
64
+ self.pending_phis = {}
65
+ self.varmap = {}
66
+ self.firstblk = min(self.blocks.keys())
67
+ self.loc = -1
68
+
69
+ # Specializes the target context as seen inside the Lowerer
70
+ # This adds:
71
+ # - environment: the python execution environment
72
+ self.context = context.subtarget(
73
+ environment=self.env, fndesc=self.fndesc
74
+ )
75
+
76
+ # Debuginfo
77
+ dibuildercls = (
78
+ self.context.DIBuilder
79
+ if self.context.enable_debuginfo
80
+ else debuginfo.DummyDIBuilder
81
+ )
82
+
83
+ # debuginfo def location
84
+ self.defn_loc = self._compute_def_location()
85
+
86
+ directives_only = self.flags.dbg_directives_only
87
+ self.debuginfo = dibuildercls(
88
+ module=self.module,
89
+ filepath=func_ir.loc.filename,
90
+ cgctx=context,
91
+ directives_only=directives_only,
92
+ )
93
+
94
+ # Loc notify objects
95
+ self._loc_notify_registry = get_registered_loc_notify()
96
+
97
+ # Subclass initialization
98
+ self.init()
99
+
100
+ @property
101
+ def call_conv(self):
102
+ return self.context.call_conv
103
+
104
+ def init(self):
105
+ pass
106
+
107
+ def init_pyapi(self):
108
+ """
109
+ Init the Python API and Environment Manager for the function being
110
+ lowered.
111
+ """
112
+ if self.pyapi is not None:
113
+ return
114
+ self.pyapi = self.context.get_python_api(self.builder)
115
+
116
+ # Store environment argument for later use
117
+ self.env_manager = self.context.get_env_manager(self.builder)
118
+ self.env_body = self.env_manager.env_body
119
+ self.envarg = self.env_manager.env_ptr
120
+
121
+ def _compute_def_location(self):
122
+ # Debuginfo requires source to be accurate. Find it and warn if not
123
+ # found. If it's not found, use the func_ir line + 1, this assumes that
124
+ # the function definition is decorated with a 1 line jit decorator.
125
+ defn_loc = self.func_ir.loc.with_lineno(self.func_ir.loc.line + 1)
126
+ if self.context.enable_debuginfo:
127
+ fn = self.func_ir.func_id.func
128
+ optional_lno = get_func_body_first_lineno(fn)
129
+ if optional_lno is not None:
130
+ # -1 as lines start at 1 and this is an offset.
131
+ offset = optional_lno - 1
132
+ defn_loc = self.func_ir.loc.with_lineno(offset)
133
+ else:
134
+ msg = (
135
+ "Could not find source for function: "
136
+ f"{self.func_ir.func_id.func}. Debug line information "
137
+ "may be inaccurate."
138
+ )
139
+ warnings.warn(NumbaDebugInfoWarning(msg))
140
+ return defn_loc
141
+
142
+ def pre_lower(self):
143
+ """
144
+ Called before lowering all blocks.
145
+ """
146
+ # A given Lower object can be used for several LL functions
147
+ # (for generators) and it's important to use a new API and
148
+ # EnvironmentManager.
149
+ self.pyapi = None
150
+ self.debuginfo.mark_subprogram(
151
+ function=self.builder.function,
152
+ qualname=self.fndesc.qualname,
153
+ argnames=self.fndesc.args,
154
+ argtypes=self.fndesc.argtypes,
155
+ line=self.defn_loc.line,
156
+ )
157
+
158
+ # When full debug info is enabled, disable inlining where possible, to
159
+ # improve the quality of the debug experience. 'alwaysinline' functions
160
+ # cannot have inlining disabled.
161
+ attributes = self.builder.function.attributes
162
+ full_debug = self.flags.debuginfo and not self.flags.dbg_directives_only
163
+ if full_debug and "alwaysinline" not in attributes:
164
+ attributes.add("noinline")
165
+
166
+ def post_lower(self):
167
+ """
168
+ Called after all blocks are lowered
169
+ """
170
+ self.debuginfo.finalize()
171
+ for notify in self._loc_notify_registry:
172
+ notify.close()
173
+
174
+ def pre_block(self, block):
175
+ """
176
+ Called before lowering a block.
177
+ """
178
+
179
+ def post_block(self, block):
180
+ """
181
+ Called after lowering a block.
182
+ """
183
+
184
+ def return_dynamic_exception(self, exc_class, exc_args, nb_types, loc=None):
185
+ self.call_conv.return_dynamic_user_exc(
186
+ self.builder,
187
+ exc_class,
188
+ exc_args,
189
+ nb_types,
190
+ loc=loc,
191
+ func_name=self.func_ir.func_id.func_name,
192
+ )
193
+
194
+ def return_exception(self, exc_class, exc_args=None, loc=None):
195
+ """Propagate exception to the caller."""
196
+ self.call_conv.return_user_exc(
197
+ self.builder,
198
+ exc_class,
199
+ exc_args,
200
+ loc=loc,
201
+ func_name=self.func_ir.func_id.func_name,
202
+ )
203
+
204
+ def set_exception(self, exc_class, exc_args=None, loc=None):
205
+ """Set exception state in the current function."""
206
+ self.call_conv.set_static_user_exc(
207
+ self.builder,
208
+ exc_class,
209
+ exc_args,
210
+ loc=loc,
211
+ func_name=self.func_ir.func_id.func_name,
212
+ )
213
+
214
+ def emit_environment_object(self):
215
+ """Emit a pointer to hold the Environment object."""
216
+ # Define global for the environment and initialize it to NULL
217
+ envname = self.context.get_env_name(self.fndesc)
218
+ self.context.declare_env_global(self.module, envname)
219
+
220
+ def lower(self):
221
+ # Emit the Env into the module
222
+ self.emit_environment_object()
223
+ if self.generator_info is None:
224
+ self.genlower = None
225
+ self.lower_normal_function(self.fndesc)
226
+ else:
227
+ self.genlower = self.GeneratorLower(self)
228
+ self.gentype = self.genlower.gentype
229
+
230
+ self.genlower.lower_init_func(self)
231
+ self.genlower.lower_next_func(self)
232
+ if self.gentype.has_finalizer:
233
+ self.genlower.lower_finalize_func(self)
234
+
235
+ if config.DUMP_LLVM:
236
+ utils.dump_llvm(self.fndesc, self.module)
237
+
238
+ # Special optimization to remove NRT on functions that do not need it.
239
+ if self.context.enable_nrt and self.generator_info is None:
240
+ removerefctpass.remove_unnecessary_nrt_usage(
241
+ self.function, context=self.context, fndesc=self.fndesc
242
+ )
243
+
244
+ # Run target specific post lowering transformation
245
+ self.context.post_lowering(self.module, self.library)
246
+
247
+ # Materialize LLVM Module
248
+ self.library.add_ir_module(self.module)
249
+
250
+ def extract_function_arguments(self):
251
+ self.fnargs = self.call_conv.decode_arguments(
252
+ self.builder, self.fndesc.argtypes, self.function
253
+ )
254
+ return self.fnargs
255
+
256
+ def lower_normal_function(self, fndesc):
257
+ """
258
+ Lower non-generator *fndesc*.
259
+ """
260
+ self.setup_function(fndesc)
261
+
262
+ # Init argument values
263
+ self.extract_function_arguments()
264
+ entry_block_tail = self.lower_function_body()
265
+
266
+ # Close tail of entry block, do not emit debug metadata else the
267
+ # unconditional jump gets associated with the metadata from the function
268
+ # body end.
269
+ with debuginfo.suspend_emission(self.builder):
270
+ self.builder.position_at_end(entry_block_tail)
271
+ self.builder.branch(self.blkmap[self.firstblk])
272
+
273
+ def lower_function_body(self):
274
+ """
275
+ Lower the current function's body, and return the entry block.
276
+ """
277
+ # Init Python blocks
278
+ for offset in self.blocks:
279
+ bname = "B%s" % offset
280
+ self.blkmap[offset] = self.function.append_basic_block(bname)
281
+
282
+ self.pre_lower()
283
+ # pre_lower() may have changed the current basic block
284
+ entry_block_tail = self.builder.basic_block
285
+
286
+ self.debug_print(
287
+ "# function begin: {0}".format(self.fndesc.unique_name)
288
+ )
289
+
290
+ # Lower all blocks
291
+ for offset, block in self.blocks.items():
292
+ bb = self.blkmap[offset]
293
+ self.builder.position_at_end(bb)
294
+ self.debug_print(f"# lower block: {offset}")
295
+ self.lower_block(block)
296
+ self.post_lower()
297
+ return entry_block_tail
298
+
299
+ def lower_block(self, block):
300
+ """
301
+ Lower the given block.
302
+ """
303
+ self.pre_block(block)
304
+ for inst in block.body:
305
+ self.loc = inst.loc
306
+ defaulterrcls = partial(LoweringError, loc=self.loc)
307
+ with new_error_context(
308
+ 'lowering "{inst}" at {loc}',
309
+ inst=inst,
310
+ loc=self.loc,
311
+ errcls_=defaulterrcls,
312
+ ):
313
+ self.lower_inst(inst)
314
+ self.post_block(block)
315
+
316
+ def create_cpython_wrapper(self, release_gil=False):
317
+ """
318
+ Create CPython wrapper(s) around this function (or generator).
319
+ """
320
+ if self.genlower:
321
+ self.context.create_cpython_wrapper(
322
+ self.library,
323
+ self.genlower.gendesc,
324
+ self.env,
325
+ self.call_helper,
326
+ release_gil=release_gil,
327
+ )
328
+ self.context.create_cpython_wrapper(
329
+ self.library,
330
+ self.fndesc,
331
+ self.env,
332
+ self.call_helper,
333
+ release_gil=release_gil,
334
+ )
335
+
336
+ def create_cfunc_wrapper(self):
337
+ """
338
+ Create C wrapper around this function.
339
+ """
340
+ if self.genlower:
341
+ raise UnsupportedError("generator as a first-class function type")
342
+ self.context.create_cfunc_wrapper(
343
+ self.library, self.fndesc, self.env, self.call_helper
344
+ )
345
+
346
+ def setup_function(self, fndesc):
347
+ # Setup function
348
+ self.function = self.context.declare_function(self.module, fndesc)
349
+ if self.flags.dbg_optnone:
350
+ attrset = self.function.attributes
351
+ if "alwaysinline" not in attrset:
352
+ attrset.add("optnone")
353
+ attrset.add("noinline")
354
+ self.entry_block = self.function.append_basic_block("entry")
355
+ self.builder = llvm_ir.IRBuilder(self.entry_block)
356
+ self.call_helper = self.call_conv.init_call_helper(self.builder)
357
+
358
+ def typeof(self, varname):
359
+ return self.fndesc.typemap[varname]
360
+
361
+ def notify_loc(self, loc: ir.Loc) -> None:
362
+ """Called when a new instruction with the given `loc` is about to be
363
+ lowered.
364
+ """
365
+ for notify_obj in self._loc_notify_registry:
366
+ notify_obj.notify(loc)
367
+
368
+ def debug_print(self, msg):
369
+ if config.DEBUG_JIT:
370
+ self.context.debug_print(
371
+ self.builder, f"DEBUGJIT [{self.fndesc.qualname}]: {msg}"
372
+ )
373
+
374
+ def print_variable(self, msg, varname):
375
+ """Helper to emit ``print(msg, varname)`` for debugging.
376
+
377
+ Parameters
378
+ ----------
379
+ msg : str
380
+ Literal string to be printed.
381
+ varname : str
382
+ A variable name whose value will be printed.
383
+ """
384
+ argtys = (types.literal(msg), self.fndesc.typemap[varname])
385
+ args = (
386
+ self.context.get_dummy_value(),
387
+ self.loadvar(varname),
388
+ )
389
+ sig = typing.signature(types.none, *argtys)
390
+
391
+ impl = self.context.get_function(print, sig)
392
+ impl(self.builder, args)
393
+
394
+
395
+ class Lower(BaseLower):
396
+ GeneratorLower = generators.GeneratorLower
397
+
398
+ def init(self):
399
+ super().init()
400
+ # find all singly assigned variables
401
+ self._find_singly_assigned_variable()
402
+
403
+ @property
404
+ def _disable_sroa_like_opt(self):
405
+ """Flags that the SROA like optimisation that Numba performs (which
406
+ prevent alloca and subsequent load/store for locals) should be disabled.
407
+ Currently, this is conditional solely on the presence of a request for
408
+ the emission of debug information."""
409
+ if self.flags is None:
410
+ return False
411
+
412
+ return self.flags.debuginfo and not self.flags.dbg_directives_only
413
+
414
+ def _find_singly_assigned_variable(self):
415
+ func_ir = self.func_ir
416
+ blocks = func_ir.blocks
417
+
418
+ sav = set()
419
+
420
+ if not self.func_ir.func_id.is_generator:
421
+ use_defs = compute_use_defs(blocks)
422
+ alloca_vars = must_use_alloca(blocks)
423
+
424
+ # Compute where variables are defined
425
+ var_assign_map = defaultdict(set)
426
+ for blk, vl in use_defs.defmap.items():
427
+ for var in vl:
428
+ var_assign_map[var].add(blk)
429
+
430
+ # Compute where variables are used
431
+ var_use_map = defaultdict(set)
432
+ for blk, vl in use_defs.usemap.items():
433
+ for var in vl:
434
+ var_use_map[var].add(blk)
435
+
436
+ # Keep only variables that are defined locally and used locally
437
+ for var in var_assign_map:
438
+ if var not in alloca_vars and len(var_assign_map[var]) == 1:
439
+ # Usemap does not keep locally defined variables.
440
+ if len(var_use_map[var]) == 0:
441
+ # Ensure that the variable is not defined multiple times
442
+ # in the block
443
+ [defblk] = var_assign_map[var]
444
+ assign_stmts = self.blocks[defblk].find_insts(ir.Assign)
445
+ assigns = [
446
+ stmt
447
+ for stmt in assign_stmts
448
+ if stmt.target.name == var
449
+ ]
450
+ if len(assigns) == 1:
451
+ sav.add(var)
452
+
453
+ self._singly_assigned_vars = sav
454
+ self._blk_local_varmap = {}
455
+
456
+ def pre_block(self, block):
457
+ from numba.cuda.core.unsafe import eh
458
+
459
+ super(Lower, self).pre_block(block)
460
+ self._cur_ir_block = block
461
+
462
+ if block == self.firstblk:
463
+ # create slots for all the vars, irrespective of whether they are
464
+ # initialized, SSA will pick this up and warn users about using
465
+ # uninitialized variables. Slots are added as alloca in the first
466
+ # block
467
+ bb = self.blkmap[self.firstblk]
468
+ self.builder.position_at_end(bb)
469
+ all_names = set()
470
+ for block in self.blocks.values():
471
+ for x in block.find_insts(ir.Del):
472
+ if x.value not in all_names:
473
+ all_names.add(x.value)
474
+ for name in all_names:
475
+ fetype = self.typeof(name)
476
+ self._alloca_var(name, fetype)
477
+
478
+ # Detect if we are in a TRY block by looking for a call to
479
+ # `eh.exception_check`.
480
+ for call in block.find_exprs(op="call"):
481
+ defn = ir_utils.guard(
482
+ ir_utils.get_definition,
483
+ self.func_ir,
484
+ call.func,
485
+ )
486
+ if defn is not None and isinstance(defn, ir.Global):
487
+ if defn.value is eh.exception_check:
488
+ if isinstance(block.terminator, ir.Branch):
489
+ targetblk = self.blkmap[block.terminator.truebr]
490
+ # NOTE: This hacks in an attribute for call_conv to
491
+ # pick up. This hack is no longer needed when
492
+ # all old-style implementations are gone.
493
+ self.builder._in_try_block = {"target": targetblk}
494
+ break
495
+
496
+ def post_block(self, block):
497
+ # Clean-up
498
+ try:
499
+ del self.builder._in_try_block
500
+ except AttributeError:
501
+ pass
502
+
503
+ def lower_inst(self, inst):
504
+ # Set debug location for all subsequent LL instructions
505
+ self.debuginfo.mark_location(self.builder, self.loc.line)
506
+ self.notify_loc(self.loc)
507
+ self.debug_print(str(inst))
508
+ if isinstance(inst, ir.Assign):
509
+ ty = self.typeof(inst.target.name)
510
+ val = self.lower_assign(ty, inst)
511
+ argidx = None
512
+ # If this is a store from an arg, like x = arg.x then tell debuginfo
513
+ # that this is the arg
514
+ if isinstance(inst.value, ir.Arg):
515
+ # NOTE: debug location is the `def <func>` line
516
+ self.debuginfo.mark_location(self.builder, self.defn_loc.line)
517
+ argidx = inst.value.index + 1 # args start at 1
518
+ self.storevar(val, inst.target.name, argidx=argidx)
519
+
520
+ elif isinstance(inst, ir.Branch):
521
+ cond = self.loadvar(inst.cond.name)
522
+ tr = self.blkmap[inst.truebr]
523
+ fl = self.blkmap[inst.falsebr]
524
+
525
+ condty = self.typeof(inst.cond.name)
526
+ pred = self.context.cast(self.builder, cond, condty, types.boolean)
527
+ assert pred.type == llvm_ir.IntType(1), (
528
+ "cond is not i1: %s" % pred.type
529
+ )
530
+ self.builder.cbranch(pred, tr, fl)
531
+
532
+ elif isinstance(inst, ir.Jump):
533
+ target = self.blkmap[inst.target]
534
+ self.builder.branch(target)
535
+
536
+ elif isinstance(inst, ir.Return):
537
+ if self.generator_info:
538
+ # StopIteration
539
+ self.genlower.return_from_generator(self)
540
+ return
541
+ val = self.loadvar(inst.value.name)
542
+ oty = self.typeof(inst.value.name)
543
+ ty = self.fndesc.restype
544
+ if isinstance(ty, types.Optional):
545
+ # If returning an optional type
546
+ self.call_conv.return_optional_value(self.builder, ty, oty, val)
547
+ return
548
+ assert ty == oty, (
549
+ "type '{}' does not match return type '{}'".format(oty, ty)
550
+ )
551
+ retval = self.context.get_return_value(self.builder, ty, val)
552
+ self.call_conv.return_value(self.builder, retval)
553
+
554
+ elif isinstance(inst, ir.PopBlock):
555
+ pass # this is just a marker
556
+
557
+ elif isinstance(inst, ir.StaticSetItem):
558
+ signature = self.fndesc.calltypes[inst]
559
+ assert signature is not None
560
+ try:
561
+ impl = self.context.get_function("static_setitem", signature)
562
+ except NotImplementedError:
563
+ return self.lower_setitem(
564
+ inst.target, inst.index_var, inst.value, signature
565
+ )
566
+ else:
567
+ target = self.loadvar(inst.target.name)
568
+ value = self.loadvar(inst.value.name)
569
+ valuety = self.typeof(inst.value.name)
570
+ value = self.context.cast(
571
+ self.builder, value, valuety, signature.args[2]
572
+ )
573
+ return impl(self.builder, (target, inst.index, value))
574
+
575
+ elif isinstance(inst, ir.Print):
576
+ self.lower_print(inst)
577
+
578
+ elif isinstance(inst, ir.SetItem):
579
+ signature = self.fndesc.calltypes[inst]
580
+ assert signature is not None
581
+ return self.lower_setitem(
582
+ inst.target, inst.index, inst.value, signature
583
+ )
584
+
585
+ elif isinstance(inst, ir.StoreMap):
586
+ signature = self.fndesc.calltypes[inst]
587
+ assert signature is not None
588
+ return self.lower_setitem(inst.dct, inst.key, inst.value, signature)
589
+
590
+ elif isinstance(inst, ir.DelItem):
591
+ target = self.loadvar(inst.target.name)
592
+ index = self.loadvar(inst.index.name)
593
+
594
+ targetty = self.typeof(inst.target.name)
595
+ indexty = self.typeof(inst.index.name)
596
+
597
+ signature = self.fndesc.calltypes[inst]
598
+ assert signature is not None
599
+
600
+ op = operator.delitem
601
+ fnop = self.context.typing_context.resolve_value_type(op)
602
+ callsig = fnop.get_call_type(
603
+ self.context.typing_context,
604
+ signature.args,
605
+ {},
606
+ )
607
+ impl = self.context.get_function(fnop, callsig)
608
+
609
+ assert targetty == signature.args[0]
610
+ index = self.context.cast(
611
+ self.builder, index, indexty, signature.args[1]
612
+ )
613
+
614
+ return impl(self.builder, (target, index))
615
+
616
+ elif isinstance(inst, ir.Del):
617
+ self.delvar(inst.value)
618
+
619
+ elif isinstance(inst, ir.SetAttr):
620
+ target = self.loadvar(inst.target.name)
621
+ value = self.loadvar(inst.value.name)
622
+ signature = self.fndesc.calltypes[inst]
623
+
624
+ targetty = self.typeof(inst.target.name)
625
+ valuety = self.typeof(inst.value.name)
626
+ assert signature is not None
627
+ assert signature.args[0] == targetty
628
+ impl = self.context.get_setattr(inst.attr, signature)
629
+
630
+ # Convert argument to match
631
+ value = self.context.cast(
632
+ self.builder, value, valuety, signature.args[1]
633
+ )
634
+
635
+ return impl(self.builder, (target, value))
636
+
637
+ elif isinstance(inst, ir.DynamicRaise):
638
+ self.lower_dynamic_raise(inst)
639
+
640
+ elif isinstance(inst, ir.DynamicTryRaise):
641
+ self.lower_try_dynamic_raise(inst)
642
+
643
+ elif isinstance(inst, ir.StaticRaise):
644
+ self.lower_static_raise(inst)
645
+
646
+ elif isinstance(inst, ir.StaticTryRaise):
647
+ self.lower_static_try_raise(inst)
648
+
649
+ else:
650
+ raise NotImplementedError(type(inst))
651
+
652
+ def lower_setitem(self, target_var, index_var, value_var, signature):
653
+ target = self.loadvar(target_var.name)
654
+ value = self.loadvar(value_var.name)
655
+ index = self.loadvar(index_var.name)
656
+
657
+ targetty = self.typeof(target_var.name)
658
+ valuety = self.typeof(value_var.name)
659
+ indexty = self.typeof(index_var.name)
660
+
661
+ op = operator.setitem
662
+ fnop = self.context.typing_context.resolve_value_type(op)
663
+ callsig = fnop.get_call_type(
664
+ self.context.typing_context,
665
+ signature.args,
666
+ {},
667
+ )
668
+ impl = self.context.get_function(fnop, callsig)
669
+
670
+ # Convert argument to match
671
+ if isinstance(targetty, types.Optional):
672
+ target = self.context.cast(
673
+ self.builder, target, targetty, targetty.type
674
+ )
675
+ else:
676
+ ul = types.unliteral
677
+ assert ul(targetty) == ul(signature.args[0])
678
+
679
+ index = self.context.cast(
680
+ self.builder, index, indexty, signature.args[1]
681
+ )
682
+ value = self.context.cast(
683
+ self.builder, value, valuety, signature.args[2]
684
+ )
685
+
686
+ return impl(self.builder, (target, index, value))
687
+
688
+ def lower_try_dynamic_raise(self, inst):
689
+ # Numba is a bit limited in what it can do with exceptions in a try
690
+ # block. Thus, it is safe to use the same code as the static try raise.
691
+ self.lower_static_try_raise(inst)
692
+
693
+ def lower_dynamic_raise(self, inst):
694
+ exc_args = inst.exc_args
695
+ args = []
696
+ nb_types = []
697
+ for exc_arg in exc_args:
698
+ if isinstance(exc_arg, ir.Var):
699
+ # dynamic values
700
+ typ = self.typeof(exc_arg.name)
701
+ val = self.loadvar(exc_arg.name)
702
+ self.incref(typ, val)
703
+ else:
704
+ typ = None
705
+ val = exc_arg
706
+ nb_types.append(typ)
707
+ args.append(val)
708
+
709
+ self.return_dynamic_exception(
710
+ inst.exc_class, tuple(args), tuple(nb_types), loc=self.loc
711
+ )
712
+
713
+ def lower_static_raise(self, inst):
714
+ if inst.exc_class is None:
715
+ # Reraise
716
+ self.return_exception(None, loc=self.loc)
717
+ else:
718
+ self.return_exception(inst.exc_class, inst.exc_args, loc=self.loc)
719
+
720
+ def lower_static_try_raise(self, inst):
721
+ if inst.exc_class is None:
722
+ # Reraise
723
+ self.set_exception(None, loc=self.loc)
724
+ else:
725
+ self.set_exception(inst.exc_class, inst.exc_args, loc=self.loc)
726
+
727
+ def lower_assign(self, ty, inst):
728
+ value = inst.value
729
+ # In nopython mode, closure vars are frozen like globals
730
+ if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)):
731
+ res = self.context.get_constant_generic(
732
+ self.builder, ty, value.value
733
+ )
734
+ self.incref(ty, res)
735
+ return res
736
+
737
+ elif isinstance(value, ir.Expr):
738
+ return self.lower_expr(ty, value)
739
+
740
+ elif isinstance(value, ir.Var):
741
+ val = self.loadvar(value.name)
742
+ oty = self.typeof(value.name)
743
+ res = self.context.cast(self.builder, val, oty, ty)
744
+ self.incref(ty, res)
745
+ return res
746
+
747
+ elif isinstance(value, ir.Arg):
748
+ # Suspend debug info else all the arg repacking ends up being
749
+ # associated with some line or other and it's actually just a detail
750
+ # of Numba's CC.
751
+ with debuginfo.suspend_emission(self.builder):
752
+ # Cast from the argument type to the local variable type
753
+ # (note the "arg.FOO" convention as used in typeinfer)
754
+ argty = self.typeof("arg." + value.name)
755
+ if isinstance(argty, types.Omitted):
756
+ pyval = argty.value
757
+ tyctx = self.context.typing_context
758
+ valty = tyctx.resolve_value_type_prefer_literal(pyval)
759
+ # use the type of the constant value
760
+ const = self.context.get_constant_generic(
761
+ self.builder,
762
+ valty,
763
+ pyval,
764
+ )
765
+ # cast it to the variable type
766
+ res = self.context.cast(self.builder, const, valty, ty)
767
+ else:
768
+ val = self.fnargs[value.index]
769
+ res = self.context.cast(self.builder, val, argty, ty)
770
+ self.incref(ty, res)
771
+ return res
772
+
773
+ elif isinstance(value, ir.Yield):
774
+ res = self.lower_yield(ty, value)
775
+ self.incref(ty, res)
776
+ return res
777
+
778
+ raise NotImplementedError(type(value), value)
779
+
780
+ def lower_yield(self, retty, inst):
781
+ yp = self.generator_info.yield_points[inst.index]
782
+ assert yp.inst is inst
783
+ y = generators.LowerYield(self, yp, yp.live_vars)
784
+ y.lower_yield_suspend()
785
+ # Yield to caller
786
+ val = self.loadvar(inst.value.name)
787
+ typ = self.typeof(inst.value.name)
788
+ actual_rettyp = self.gentype.yield_type
789
+
790
+ # cast the local val to the type yielded
791
+ yret = self.context.cast(self.builder, val, typ, actual_rettyp)
792
+
793
+ # get the return repr of yielded value
794
+ retval = self.context.get_return_value(
795
+ self.builder,
796
+ actual_rettyp,
797
+ yret,
798
+ )
799
+
800
+ # return
801
+ self.call_conv.return_value(self.builder, retval)
802
+
803
+ # Resumption point
804
+ y.lower_yield_resume()
805
+ # None is returned by the yield expression
806
+ return self.context.get_constant_generic(self.builder, retty, None)
807
+
808
+ def lower_binop(self, resty, expr, op):
809
+ # if op in utils.OPERATORS_TO_BUILTINS:
810
+ # map operator.the_op => the corresponding types.Function()
811
+ # TODO: is this looks dodgy ...
812
+ op = self.context.typing_context.resolve_value_type(op)
813
+
814
+ lhs = expr.lhs
815
+ rhs = expr.rhs
816
+ static_lhs = expr.static_lhs
817
+ static_rhs = expr.static_rhs
818
+ lty = self.typeof(lhs.name)
819
+ rty = self.typeof(rhs.name)
820
+ lhs = self.loadvar(lhs.name)
821
+ rhs = self.loadvar(rhs.name)
822
+
823
+ # Convert argument to match
824
+ signature = self.fndesc.calltypes[expr]
825
+ lhs = self.context.cast(self.builder, lhs, lty, signature.args[0])
826
+ rhs = self.context.cast(self.builder, rhs, rty, signature.args[1])
827
+
828
+ def cast_result(res):
829
+ return self.context.cast(
830
+ self.builder, res, signature.return_type, resty
831
+ )
832
+
833
+ # First try with static operands, if known
834
+ def try_static_impl(tys, args):
835
+ if any(a is ir.UNDEFINED for a in args):
836
+ return None
837
+ try:
838
+ if isinstance(op, types.Function):
839
+ static_sig = op.get_call_type(
840
+ self.context.typing_context, tys, {}
841
+ )
842
+ else:
843
+ static_sig = typing.signature(signature.return_type, *tys)
844
+ except TypingError:
845
+ return None
846
+ try:
847
+ static_impl = self.context.get_function(op, static_sig)
848
+ return static_impl(self.builder, args)
849
+ except NotImplementedError:
850
+ return None
851
+
852
+ res = try_static_impl(
853
+ (_lit_or_omitted(static_lhs), _lit_or_omitted(static_rhs)),
854
+ (static_lhs, static_rhs),
855
+ )
856
+ if res is not None:
857
+ return cast_result(res)
858
+
859
+ res = try_static_impl(
860
+ (_lit_or_omitted(static_lhs), rty),
861
+ (static_lhs, rhs),
862
+ )
863
+ if res is not None:
864
+ return cast_result(res)
865
+
866
+ res = try_static_impl(
867
+ (lty, _lit_or_omitted(static_rhs)),
868
+ (lhs, static_rhs),
869
+ )
870
+ if res is not None:
871
+ return cast_result(res)
872
+
873
+ # Normal implementation for generic arguments
874
+
875
+ sig = op.get_call_type(self.context.typing_context, signature.args, {})
876
+ impl = self.context.get_function(op, sig)
877
+ res = impl(self.builder, (lhs, rhs))
878
+ return cast_result(res)
879
+
880
+ def lower_getitem(self, resty, expr, value, index, signature):
881
+ baseval = self.loadvar(value.name)
882
+ indexval = self.loadvar(index.name)
883
+ # Get implementation of getitem
884
+ op = operator.getitem
885
+ fnop = self.context.typing_context.resolve_value_type(op)
886
+ callsig = fnop.get_call_type(
887
+ self.context.typing_context,
888
+ signature.args,
889
+ {},
890
+ )
891
+ impl = self.context.get_function(fnop, callsig)
892
+
893
+ argvals = (baseval, indexval)
894
+ argtyps = (self.typeof(value.name), self.typeof(index.name))
895
+ castvals = [
896
+ self.context.cast(self.builder, av, at, ft)
897
+ for av, at, ft in zip(argvals, argtyps, signature.args)
898
+ ]
899
+ res = impl(self.builder, castvals)
900
+ return self.context.cast(
901
+ self.builder, res, signature.return_type, resty
902
+ )
903
+
904
+ def _cast_var(self, var, ty):
905
+ """
906
+ Cast a Numba IR variable to the given Numba type, returning a
907
+ low-level value.
908
+ """
909
+ if isinstance(var, _VarArgItem):
910
+ varty = self.typeof(var.vararg.name)[var.index]
911
+ val = self.builder.extract_value(
912
+ self.loadvar(var.vararg.name), var.index
913
+ )
914
+ else:
915
+ varty = self.typeof(var.name)
916
+ val = self.loadvar(var.name)
917
+ return self.context.cast(self.builder, val, varty, ty)
918
+
919
+ def fold_call_args(self, fnty, signature, pos_args, vararg, kw_args):
920
+ if vararg:
921
+ # Inject *args from function call
922
+ # The lowering will be done in _cast_var() above.
923
+ tp_vararg = self.typeof(vararg.name)
924
+ assert isinstance(tp_vararg, types.BaseTuple)
925
+ pos_args = pos_args + [
926
+ _VarArgItem(vararg, i) for i in range(len(tp_vararg))
927
+ ]
928
+
929
+ # Fold keyword arguments and resolve default argument values
930
+ pysig = signature.pysig
931
+ if pysig is None:
932
+ if kw_args:
933
+ raise NotImplementedError(
934
+ "unsupported keyword arguments when calling %s" % (fnty,)
935
+ )
936
+ argvals = [
937
+ self._cast_var(var, sigty)
938
+ for var, sigty in zip(pos_args, signature.args)
939
+ ]
940
+ else:
941
+
942
+ def normal_handler(index, param, var):
943
+ return self._cast_var(var, signature.args[index])
944
+
945
+ def default_handler(index, param, default):
946
+ return self.context.get_constant_generic(
947
+ self.builder, signature.args[index], default
948
+ )
949
+
950
+ def stararg_handler(index, param, vars):
951
+ stararg_ty = signature.args[index]
952
+ assert isinstance(stararg_ty, types.BaseTuple), stararg_ty
953
+ values = [
954
+ self._cast_var(var, sigty)
955
+ for var, sigty in zip(vars, stararg_ty)
956
+ ]
957
+ return cgutils.make_anonymous_struct(self.builder, values)
958
+
959
+ argvals = typing.fold_arguments(
960
+ pysig,
961
+ pos_args,
962
+ dict(kw_args),
963
+ normal_handler,
964
+ default_handler,
965
+ stararg_handler,
966
+ )
967
+ return argvals
968
+
969
+ def lower_print(self, inst):
970
+ """
971
+ Lower a ir.Print()
972
+ """
973
+ # We handle this, as far as possible, as a normal call to built-in
974
+ # print(). This will make it easy to undo the special ir.Print
975
+ # rewrite when it becomes unnecessary (e.g. when we have native
976
+ # strings).
977
+ sig = self.fndesc.calltypes[inst]
978
+ assert sig.return_type == types.none
979
+ fnty = self.context.typing_context.resolve_value_type(print)
980
+
981
+ # Fix the call signature to inject any constant-inferred
982
+ # string argument
983
+ pos_tys = list(sig.args)
984
+ pos_args = list(inst.args)
985
+ for i in range(len(pos_args)):
986
+ if i in inst.consts:
987
+ pyval = inst.consts[i]
988
+ if isinstance(pyval, str):
989
+ pos_tys[i] = types.literal(pyval)
990
+
991
+ fixed_sig = typing.signature(sig.return_type, *pos_tys)
992
+ fixed_sig = fixed_sig.replace(pysig=sig.pysig)
993
+
994
+ argvals = self.fold_call_args(fnty, sig, pos_args, inst.vararg, {})
995
+ impl = self.context.get_function(print, fixed_sig)
996
+ impl(self.builder, argvals)
997
+
998
+ def lower_call(self, resty, expr):
999
+ signature = self.fndesc.calltypes[expr]
1000
+ self.debug_print("# lower_call: expr = {0}".format(expr))
1001
+ if isinstance(signature.return_type, types.Phantom):
1002
+ return self.context.get_dummy_value()
1003
+
1004
+ fnty = self.typeof(expr.func.name)
1005
+
1006
+ if isinstance(fnty, types.ObjModeDispatcher):
1007
+ res = self._lower_call_ObjModeDispatcher(fnty, expr, signature)
1008
+
1009
+ elif isinstance(fnty, types.ExternalFunction):
1010
+ res = self._lower_call_ExternalFunction(fnty, expr, signature)
1011
+
1012
+ elif isinstance(fnty, types.ExternalFunctionPointer):
1013
+ res = self._lower_call_ExternalFunctionPointer(
1014
+ fnty, expr, signature
1015
+ )
1016
+
1017
+ elif isinstance(fnty, types.RecursiveCall):
1018
+ res = self._lower_call_RecursiveCall(fnty, expr, signature)
1019
+
1020
+ else:
1021
+ res = self._lower_call_normal(fnty, expr, signature)
1022
+
1023
+ # If lowering the call returned None, interpret that as returning dummy
1024
+ # value if the return type of the function is void, otherwise there is
1025
+ # a problem
1026
+ if res is None:
1027
+ if signature.return_type == types.void:
1028
+ res = self.context.get_dummy_value()
1029
+ else:
1030
+ raise LoweringError(
1031
+ msg="non-void function returns None from implementation",
1032
+ loc=self.loc,
1033
+ )
1034
+
1035
+ return self.context.cast(
1036
+ self.builder, res, signature.return_type, resty
1037
+ )
1038
+
1039
+ def _lower_call_ObjModeDispatcher(self, fnty, expr, signature):
1040
+ from numba.cuda.core.pythonapi import ObjModeUtils
1041
+
1042
+ self.init_pyapi()
1043
+ # Acquire the GIL
1044
+ gil_state = self.pyapi.gil_ensure()
1045
+ # Fix types
1046
+ argnames = [a.name for a in expr.args]
1047
+ argtypes = [self.typeof(a) for a in argnames]
1048
+ argvalues = [self.loadvar(a) for a in argnames]
1049
+ for v, ty in zip(argvalues, argtypes):
1050
+ # Because .from_native_value steal the reference
1051
+ self.incref(ty, v)
1052
+
1053
+ argobjs = [
1054
+ self.pyapi.from_native_value(atyp, aval, self.env_manager)
1055
+ for atyp, aval in zip(argtypes, argvalues)
1056
+ ]
1057
+
1058
+ # Load objmode dispatcher
1059
+ callee = ObjModeUtils(self.pyapi).load_dispatcher(fnty, argtypes)
1060
+ # Make Call
1061
+ ret_obj = self.pyapi.call_function_objargs(callee, argobjs)
1062
+ has_exception = cgutils.is_null(self.builder, ret_obj)
1063
+ with self.builder.if_else(has_exception) as (then, orelse):
1064
+ # Handles exception
1065
+ # This branch must exit the function
1066
+ with then:
1067
+ # Clean arg
1068
+ for obj in argobjs:
1069
+ self.pyapi.decref(obj)
1070
+
1071
+ # Release the GIL
1072
+ self.pyapi.gil_release(gil_state)
1073
+
1074
+ # Return and signal exception
1075
+ self.call_conv.return_exc(self.builder)
1076
+
1077
+ # Handles normal return
1078
+ with orelse:
1079
+ # Fix output value
1080
+ native = self.pyapi.to_native_value(
1081
+ fnty.dispatcher.output_types,
1082
+ ret_obj,
1083
+ )
1084
+ output = native.value
1085
+
1086
+ # Release objs
1087
+ self.pyapi.decref(ret_obj)
1088
+ for obj in argobjs:
1089
+ self.pyapi.decref(obj)
1090
+
1091
+ # cleanup output
1092
+ if callable(native.cleanup):
1093
+ native.cleanup()
1094
+
1095
+ # Release the GIL
1096
+ self.pyapi.gil_release(gil_state)
1097
+
1098
+ # Error during unboxing
1099
+ with self.builder.if_then(native.is_error):
1100
+ self.call_conv.return_exc(self.builder)
1101
+
1102
+ return output
1103
+
1104
+ def _lower_call_ExternalFunction(self, fnty, expr, signature):
1105
+ # Handle a named external function
1106
+ self.debug_print("# external function")
1107
+ argvals = self.fold_call_args(
1108
+ fnty,
1109
+ signature,
1110
+ expr.args,
1111
+ expr.vararg,
1112
+ expr.kws,
1113
+ )
1114
+ fndesc = funcdesc.ExternalFunctionDescriptor(
1115
+ fnty.symbol, fnty.sig.return_type, fnty.sig.args
1116
+ )
1117
+ func = self.context.declare_external_function(
1118
+ self.builder.module, fndesc
1119
+ )
1120
+ return self.context.call_external_function(
1121
+ self.builder,
1122
+ func,
1123
+ fndesc.argtypes,
1124
+ argvals,
1125
+ )
1126
+
1127
+ def _lower_call_ExternalFunctionPointer(self, fnty, expr, signature):
1128
+ # Handle a C function pointer
1129
+ self.debug_print("# calling external function pointer")
1130
+ argvals = self.fold_call_args(
1131
+ fnty,
1132
+ signature,
1133
+ expr.args,
1134
+ expr.vararg,
1135
+ expr.kws,
1136
+ )
1137
+ pointer = self.loadvar(expr.func.name)
1138
+ # If the external function pointer uses libpython
1139
+ if fnty.requires_gil:
1140
+ self.init_pyapi()
1141
+ # Acquire the GIL
1142
+ gil_state = self.pyapi.gil_ensure()
1143
+ # Make PyObjects
1144
+ newargvals = []
1145
+ pyvals = []
1146
+ for exptyp, gottyp, aval in zip(
1147
+ fnty.sig.args, signature.args, argvals
1148
+ ):
1149
+ # Adjust argument values to pyobjects
1150
+ if exptyp == types.ffi_forced_object:
1151
+ self.incref(gottyp, aval)
1152
+ obj = self.pyapi.from_native_value(
1153
+ gottyp,
1154
+ aval,
1155
+ self.env_manager,
1156
+ )
1157
+ newargvals.append(obj)
1158
+ pyvals.append(obj)
1159
+ else:
1160
+ newargvals.append(aval)
1161
+
1162
+ # Call external function
1163
+ res = self.context.call_function_pointer(
1164
+ self.builder,
1165
+ pointer,
1166
+ newargvals,
1167
+ fnty.cconv,
1168
+ )
1169
+ # Release PyObjects
1170
+ for obj in pyvals:
1171
+ self.pyapi.decref(obj)
1172
+
1173
+ # Release the GIL
1174
+ self.pyapi.gil_release(gil_state)
1175
+ # If the external function pointer does NOT use libpython
1176
+ else:
1177
+ res = self.context.call_function_pointer(
1178
+ self.builder,
1179
+ pointer,
1180
+ argvals,
1181
+ fnty.cconv,
1182
+ )
1183
+ return res
1184
+
1185
+ def _lower_call_RecursiveCall(self, fnty, expr, signature):
1186
+ # Recursive call
1187
+ argvals = self.fold_call_args(
1188
+ fnty,
1189
+ signature,
1190
+ expr.args,
1191
+ expr.vararg,
1192
+ expr.kws,
1193
+ )
1194
+ rec_ov = fnty.get_overloads(signature.args)
1195
+ mangler = self.context.mangler or default_mangler
1196
+ abi_tags = self.fndesc.abi_tags
1197
+ mangled_name = mangler(
1198
+ rec_ov.qualname, signature.args, abi_tags=abi_tags, uid=rec_ov.uid
1199
+ )
1200
+ # special case self recursion
1201
+ if self.builder.function.name.startswith(mangled_name):
1202
+ res = self.context.call_internal(
1203
+ self.builder,
1204
+ self.fndesc,
1205
+ signature,
1206
+ argvals,
1207
+ )
1208
+ else:
1209
+ res = self.context.call_unresolved(
1210
+ self.builder,
1211
+ mangled_name,
1212
+ signature,
1213
+ argvals,
1214
+ )
1215
+ return res
1216
+
1217
+ def _lower_call_normal(self, fnty, expr, signature):
1218
+ # Normal function resolution
1219
+ self.debug_print("# calling normal function: {0}".format(fnty))
1220
+ self.debug_print("# signature: {0}".format(signature))
1221
+ if isinstance(fnty, types.ObjModeDispatcher):
1222
+ argvals = expr.func.args
1223
+ else:
1224
+ argvals = self.fold_call_args(
1225
+ fnty,
1226
+ signature,
1227
+ expr.args,
1228
+ expr.vararg,
1229
+ expr.kws,
1230
+ )
1231
+ tname = expr.target
1232
+ if tname is not None:
1233
+ from numba.cuda.descriptor import cuda_target
1234
+
1235
+ hw_ctx = cuda_target.target_context
1236
+ impl = hw_ctx.get_function(fnty, signature)
1237
+ else:
1238
+ impl = self.context.get_function(fnty, signature)
1239
+ if signature.recvr:
1240
+ # The "self" object is passed as the function object
1241
+ # for bounded function
1242
+ the_self = self.loadvar(expr.func.name)
1243
+ # Prepend the self reference
1244
+ argvals = [the_self] + list(argvals)
1245
+
1246
+ res = impl(self.builder, argvals, self.loc)
1247
+ return res
1248
+
1249
+ def lower_expr(self, resty, expr):
1250
+ if expr.op == "binop":
1251
+ return self.lower_binop(resty, expr, expr.fn)
1252
+ elif expr.op == "inplace_binop":
1253
+ lty = self.typeof(expr.lhs.name)
1254
+ if lty.mutable:
1255
+ return self.lower_binop(resty, expr, expr.fn)
1256
+ else:
1257
+ # inplace operators on non-mutable types reuse the same
1258
+ # definition as the corresponding copying operators.)
1259
+ return self.lower_binop(resty, expr, expr.immutable_fn)
1260
+ elif expr.op == "unary":
1261
+ val = self.loadvar(expr.value.name)
1262
+ typ = self.typeof(expr.value.name)
1263
+ func_ty = self.context.typing_context.resolve_value_type(expr.fn)
1264
+ # Get function
1265
+ signature = self.fndesc.calltypes[expr]
1266
+ impl = self.context.get_function(func_ty, signature)
1267
+ # Convert argument to match
1268
+ val = self.context.cast(self.builder, val, typ, signature.args[0])
1269
+ res = impl(self.builder, [val])
1270
+ res = self.context.cast(
1271
+ self.builder, res, signature.return_type, resty
1272
+ )
1273
+ return res
1274
+
1275
+ elif expr.op == "call":
1276
+ res = self.lower_call(resty, expr)
1277
+ return res
1278
+
1279
+ elif expr.op == "pair_first":
1280
+ val = self.loadvar(expr.value.name)
1281
+ ty = self.typeof(expr.value.name)
1282
+ res = self.context.pair_first(self.builder, val, ty)
1283
+ self.incref(resty, res)
1284
+ return res
1285
+
1286
+ elif expr.op == "pair_second":
1287
+ val = self.loadvar(expr.value.name)
1288
+ ty = self.typeof(expr.value.name)
1289
+ res = self.context.pair_second(self.builder, val, ty)
1290
+ self.incref(resty, res)
1291
+ return res
1292
+
1293
+ elif expr.op in ("getiter", "iternext"):
1294
+ val = self.loadvar(expr.value.name)
1295
+ ty = self.typeof(expr.value.name)
1296
+ signature = self.fndesc.calltypes[expr]
1297
+ impl = self.context.get_function(expr.op, signature)
1298
+ [fty] = signature.args
1299
+ castval = self.context.cast(self.builder, val, ty, fty)
1300
+ res = impl(self.builder, (castval,))
1301
+ res = self.context.cast(
1302
+ self.builder, res, signature.return_type, resty
1303
+ )
1304
+ return res
1305
+
1306
+ elif expr.op == "exhaust_iter":
1307
+ val = self.loadvar(expr.value.name)
1308
+ ty = self.typeof(expr.value.name)
1309
+ # Unpack optional
1310
+ if isinstance(ty, types.Optional):
1311
+ val = self.context.cast(self.builder, val, ty, ty.type)
1312
+ ty = ty.type
1313
+
1314
+ # If we have a tuple, we needn't do anything
1315
+ # (and we can't iterate over the heterogeneous ones).
1316
+ if isinstance(ty, types.BaseTuple):
1317
+ assert ty == resty
1318
+ self.incref(ty, val)
1319
+ return val
1320
+
1321
+ itemty = ty.iterator_type.yield_type
1322
+ tup = self.context.get_constant_undef(resty)
1323
+ pairty = types.Pair(itemty, types.boolean)
1324
+ getiter_sig = typing.signature(ty.iterator_type, ty)
1325
+ getiter_impl = self.context.get_function("getiter", getiter_sig)
1326
+ iternext_sig = typing.signature(pairty, ty.iterator_type)
1327
+ iternext_impl = self.context.get_function("iternext", iternext_sig)
1328
+ iterobj = getiter_impl(self.builder, (val,))
1329
+ # We call iternext() as many times as desired (`expr.count`).
1330
+ for i in range(expr.count):
1331
+ pair = iternext_impl(self.builder, (iterobj,))
1332
+ is_valid = self.context.pair_second(self.builder, pair, pairty)
1333
+ with cgutils.if_unlikely(
1334
+ self.builder, self.builder.not_(is_valid)
1335
+ ):
1336
+ self.return_exception(ValueError, loc=self.loc)
1337
+ item = self.context.pair_first(self.builder, pair, pairty)
1338
+ tup = self.builder.insert_value(tup, item, i)
1339
+
1340
+ # Call iternext() once more to check that the iterator
1341
+ # is exhausted.
1342
+ pair = iternext_impl(self.builder, (iterobj,))
1343
+ is_valid = self.context.pair_second(self.builder, pair, pairty)
1344
+ with cgutils.if_unlikely(self.builder, is_valid):
1345
+ self.return_exception(ValueError, loc=self.loc)
1346
+
1347
+ self.decref(ty.iterator_type, iterobj)
1348
+ return tup
1349
+
1350
+ elif expr.op == "getattr":
1351
+ val = self.loadvar(expr.value.name)
1352
+ ty = self.typeof(expr.value.name)
1353
+
1354
+ if isinstance(resty, types.BoundFunction):
1355
+ # if we are getting out a method, assume we have typed this
1356
+ # properly and just build a bound function object
1357
+ casted = self.context.cast(self.builder, val, ty, resty.this)
1358
+ res = self.context.get_bound_function(
1359
+ self.builder, casted, resty.this
1360
+ )
1361
+ self.incref(resty, res)
1362
+ return res
1363
+ else:
1364
+ impl = self.context.get_getattr(ty, expr.attr)
1365
+ attrty = self.context.typing_context.resolve_getattr(
1366
+ ty, expr.attr
1367
+ )
1368
+
1369
+ if impl is None:
1370
+ # ignore the attribute
1371
+ return self.context.get_dummy_value()
1372
+ else:
1373
+ res = impl(self.context, self.builder, ty, val, expr.attr)
1374
+
1375
+ # Cast the attribute type to the expected output type
1376
+ res = self.context.cast(self.builder, res, attrty, resty)
1377
+ return res
1378
+
1379
+ elif expr.op == "static_getitem":
1380
+ signature = typing.signature(
1381
+ resty,
1382
+ self.typeof(expr.value.name),
1383
+ _lit_or_omitted(expr.index),
1384
+ )
1385
+ try:
1386
+ # Both get_function() and the returned implementation can
1387
+ # raise NotImplementedError if the types aren't supported
1388
+ impl = self.context.get_function("static_getitem", signature)
1389
+ return impl(
1390
+ self.builder, (self.loadvar(expr.value.name), expr.index)
1391
+ )
1392
+ except NotImplementedError:
1393
+ if expr.index_var is None:
1394
+ raise
1395
+ # Fall back on the generic getitem() implementation
1396
+ # for this type.
1397
+ signature = self.fndesc.calltypes[expr]
1398
+ return self.lower_getitem(
1399
+ resty, expr, expr.value, expr.index_var, signature
1400
+ )
1401
+ elif expr.op == "typed_getitem":
1402
+ signature = typing.signature(
1403
+ resty,
1404
+ self.typeof(expr.value.name),
1405
+ self.typeof(expr.index.name),
1406
+ )
1407
+ impl = self.context.get_function("typed_getitem", signature)
1408
+ return impl(
1409
+ self.builder,
1410
+ (self.loadvar(expr.value.name), self.loadvar(expr.index.name)),
1411
+ )
1412
+ elif expr.op == "getitem":
1413
+ signature = self.fndesc.calltypes[expr]
1414
+ return self.lower_getitem(
1415
+ resty, expr, expr.value, expr.index, signature
1416
+ )
1417
+
1418
+ elif expr.op == "build_tuple":
1419
+ itemvals = [self.loadvar(i.name) for i in expr.items]
1420
+ itemtys = [self.typeof(i.name) for i in expr.items]
1421
+ castvals = [
1422
+ self.context.cast(self.builder, val, fromty, toty)
1423
+ for val, toty, fromty in zip(itemvals, resty, itemtys)
1424
+ ]
1425
+ tup = self.context.make_tuple(self.builder, resty, castvals)
1426
+ self.incref(resty, tup)
1427
+ return tup
1428
+
1429
+ elif expr.op == "build_list":
1430
+ itemvals = [self.loadvar(i.name) for i in expr.items]
1431
+ itemtys = [self.typeof(i.name) for i in expr.items]
1432
+ if isinstance(resty, types.LiteralList):
1433
+ castvals = [
1434
+ self.context.cast(self.builder, val, fromty, toty)
1435
+ for val, toty, fromty in zip(itemvals, resty.types, itemtys)
1436
+ ]
1437
+ tup = self.context.make_tuple(
1438
+ self.builder, types.Tuple(resty.types), castvals
1439
+ )
1440
+ self.incref(resty, tup)
1441
+ return tup
1442
+ else:
1443
+ castvals = [
1444
+ self.context.cast(self.builder, val, fromty, resty.dtype)
1445
+ for val, fromty in zip(itemvals, itemtys)
1446
+ ]
1447
+ return self.context.build_list(self.builder, resty, castvals)
1448
+
1449
+ elif expr.op == "build_set":
1450
+ # Insert in reverse order, as Python does
1451
+ items = expr.items[::-1]
1452
+ itemvals = [self.loadvar(i.name) for i in items]
1453
+ itemtys = [self.typeof(i.name) for i in items]
1454
+ castvals = [
1455
+ self.context.cast(self.builder, val, fromty, resty.dtype)
1456
+ for val, fromty in zip(itemvals, itemtys)
1457
+ ]
1458
+ return self.context.build_set(self.builder, resty, castvals)
1459
+
1460
+ elif expr.op == "build_map":
1461
+ items = expr.items
1462
+ keys, values = [], []
1463
+ key_types, value_types = [], []
1464
+ for k, v in items:
1465
+ key = self.loadvar(k.name)
1466
+ keytype = self.typeof(k.name)
1467
+ val = self.loadvar(v.name)
1468
+ valtype = self.typeof(v.name)
1469
+ keys.append(key)
1470
+ values.append(val)
1471
+ key_types.append(keytype)
1472
+ value_types.append(valtype)
1473
+ return self.context.build_map(
1474
+ self.builder,
1475
+ resty,
1476
+ list(zip(key_types, value_types)),
1477
+ list(zip(keys, values)),
1478
+ )
1479
+
1480
+ elif expr.op == "cast":
1481
+ val = self.loadvar(expr.value.name)
1482
+ ty = self.typeof(expr.value.name)
1483
+ castval = self.context.cast(self.builder, val, ty, resty)
1484
+ self.incref(resty, castval)
1485
+ return castval
1486
+
1487
+ elif expr.op == "phi":
1488
+ raise LoweringError("PHI not stripped")
1489
+
1490
+ elif expr.op == "null":
1491
+ return self.context.get_constant_null(resty)
1492
+
1493
+ elif expr.op == "undef":
1494
+ # Numba does not raise an UnboundLocalError for undefined variables.
1495
+ # The variable is set to zero.
1496
+ return self.context.get_constant_null(resty)
1497
+
1498
+ elif expr.op in self.context.special_ops:
1499
+ res = self.context.special_ops[expr.op](self, expr)
1500
+ return res
1501
+
1502
+ raise NotImplementedError(expr)
1503
+
1504
+ def _alloca_var(self, name, fetype):
1505
+ """
1506
+ Ensure the given variable has an allocated stack slot (if needed).
1507
+ """
1508
+ if name in self.varmap:
1509
+ # quit early
1510
+ return
1511
+
1512
+ # If the name is used in multiple blocks or lowering with debuginfo...
1513
+ if (
1514
+ name not in self._singly_assigned_vars
1515
+ ) or self._disable_sroa_like_opt:
1516
+ # If not already defined, allocate it
1517
+ ptr = self.alloca(name, fetype)
1518
+ # Remember the pointer
1519
+ self.varmap[name] = ptr
1520
+
1521
+ def getvar(self, name):
1522
+ """
1523
+ Get a pointer to the given variable's slot.
1524
+ """
1525
+ if not self._disable_sroa_like_opt:
1526
+ assert name not in self._blk_local_varmap
1527
+ assert name not in self._singly_assigned_vars
1528
+ if name not in self.varmap:
1529
+ # Allocate undefined variable as needed.
1530
+ # NOTE: Py3.12 use of LOAD_FAST_AND_CLEAR will allow variable be
1531
+ # referenced before it is defined.
1532
+ self._alloca_var(name, self.typeof(name))
1533
+ return self.varmap[name]
1534
+
1535
+ def loadvar(self, name):
1536
+ """
1537
+ Load the given variable's value.
1538
+ """
1539
+ if name in self._blk_local_varmap and not self._disable_sroa_like_opt:
1540
+ return self._blk_local_varmap[name]
1541
+ ptr = self.getvar(name)
1542
+
1543
+ # Don't associate debuginfo with the load for a function arg else it
1544
+ # creates instructions ahead of the first source line of the
1545
+ # function which then causes problems with breaking on the function
1546
+ # symbol (it hits the symbol, not the first line).
1547
+ if name in self.func_ir.arg_names:
1548
+ with debuginfo.suspend_emission(self.builder):
1549
+ return self.builder.load(ptr)
1550
+ else:
1551
+ return self.builder.load(ptr)
1552
+
1553
+ def storevar(self, value, name, argidx=None):
1554
+ """
1555
+ Store the value into the given variable.
1556
+ """
1557
+ fetype = self.typeof(name)
1558
+ # Define if not already
1559
+ self._alloca_var(name, fetype)
1560
+
1561
+ # Store variable
1562
+ if (
1563
+ name in self._singly_assigned_vars
1564
+ and not self._disable_sroa_like_opt
1565
+ ):
1566
+ self._blk_local_varmap[name] = value
1567
+ else:
1568
+ if argidx is None:
1569
+ # Clean up existing value stored in the variable, not needed
1570
+ # if it's an arg
1571
+ old = self.loadvar(name)
1572
+ self.decref(fetype, old)
1573
+
1574
+ # stack stored variable
1575
+ ptr = self.getvar(name)
1576
+ if value.type != ptr.type.pointee:
1577
+ msg = (
1578
+ "Storing {value.type} to ptr of {ptr.type.pointee} "
1579
+ "('{name}'). FE type {fetype}"
1580
+ ).format(value=value, ptr=ptr, fetype=fetype, name=name)
1581
+ raise AssertionError(msg)
1582
+
1583
+ # If this store is associated with an argument to the function (i.e.
1584
+ # store following reassemble from CC splatting structs as many args
1585
+ # to the function) then mark this variable as such.
1586
+ if argidx is not None:
1587
+ with debuginfo.suspend_emission(self.builder):
1588
+ self.builder.store(value, ptr)
1589
+ loc = self.defn_loc # the line with `def <func>`
1590
+ lltype = self.context.get_value_type(fetype)
1591
+ sizeof = self.context.get_abi_sizeof(lltype)
1592
+ datamodel = self.context.data_model_manager[fetype]
1593
+ self.debuginfo.mark_variable(
1594
+ self.builder,
1595
+ ptr,
1596
+ name=name,
1597
+ lltype=lltype,
1598
+ size=sizeof,
1599
+ line=loc.line,
1600
+ datamodel=datamodel,
1601
+ argidx=argidx,
1602
+ )
1603
+ else:
1604
+ self.builder.store(value, ptr)
1605
+
1606
+ def delvar(self, name):
1607
+ """
1608
+ Delete the given variable.
1609
+ """
1610
+ fetype = self.typeof(name)
1611
+
1612
+ # Out-of-order
1613
+ if (
1614
+ name not in self._blk_local_varmap
1615
+ and not self._disable_sroa_like_opt
1616
+ ):
1617
+ if name in self._singly_assigned_vars:
1618
+ self._singly_assigned_vars.discard(name)
1619
+
1620
+ # Define if not already (may happen if the variable is deleted
1621
+ # at the beginning of a loop, but only set later in the loop)
1622
+ self._alloca_var(name, fetype)
1623
+
1624
+ if name in self._blk_local_varmap and not self._disable_sroa_like_opt:
1625
+ llval = self._blk_local_varmap[name]
1626
+ self.decref(fetype, llval)
1627
+ else:
1628
+ ptr = self.getvar(name)
1629
+ self.decref(fetype, self.builder.load(ptr))
1630
+ # Zero-fill variable to avoid double frees on subsequent dels
1631
+ self.builder.store(llvm_ir.Constant(ptr.type.pointee, None), ptr)
1632
+
1633
+ def alloca(self, name, type):
1634
+ lltype = self.context.get_value_type(type)
1635
+ datamodel = self.context.data_model_manager[type]
1636
+ return self.alloca_lltype(name, lltype, datamodel=datamodel)
1637
+
1638
+ def alloca_lltype(self, name, lltype, datamodel=None):
1639
+ # Is user variable?
1640
+ is_uservar = not name.startswith("$")
1641
+ # Allocate space for variable
1642
+ aptr = cgutils.alloca_once(self.builder, lltype, name=name, zfill=False)
1643
+
1644
+ # Emit debug info for user variable
1645
+ if is_uservar:
1646
+ # Don't associate debuginfo with the alloca for a function arg, this
1647
+ # is handled by the first store to the alloca so that repacking the
1648
+ # splatted args from the CC is dealt with.
1649
+ if name not in self.func_ir.arg_names:
1650
+ sizeof = self.context.get_abi_sizeof(lltype)
1651
+ self.debuginfo.mark_variable(
1652
+ self.builder,
1653
+ aptr,
1654
+ name=name,
1655
+ lltype=lltype,
1656
+ size=sizeof,
1657
+ line=self.loc.line,
1658
+ datamodel=datamodel,
1659
+ )
1660
+ return aptr
1661
+
1662
+ def incref(self, typ, val):
1663
+ if not self.context.enable_nrt:
1664
+ return
1665
+
1666
+ self.context.nrt.incref(self.builder, typ, val)
1667
+
1668
+ def decref(self, typ, val):
1669
+ if not self.context.enable_nrt:
1670
+ return
1671
+
1672
+ # do not associate decref with "use", it creates "jumpy" line info as
1673
+ # the decrefs are usually where the ir.Del nodes are, which is at the
1674
+ # end of the block.
1675
+ with debuginfo.suspend_emission(self.builder):
1676
+ self.context.nrt.decref(self.builder, typ, val)
1677
+
1678
+
1679
+ class CUDALower(Lower):
1680
+ def storevar(self, value, name, argidx=None):
1681
+ """
1682
+ Store the value into the given variable.
1683
+ """
1684
+ # Handle polymorphic variables with CUDA_DEBUG_POLY enabled
1685
+ if config.CUDA_DEBUG_POLY:
1686
+ src_name = name.split(".")[0]
1687
+ if src_name in self.poly_var_typ_map:
1688
+ # Ensure allocation happens first (if needed)
1689
+ fetype = self.typeof(name)
1690
+ self._alloca_var(name, fetype)
1691
+ # Discriminant and data are located in the same union
1692
+ ptr = self.poly_var_loc_map[src_name]
1693
+ # Firstly write discriminant to the beginning of union as i8
1694
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
1695
+ # Compute discriminant = index of type in sorted union
1696
+ if isinstance(fetype, types.Literal):
1697
+ lookup_type = fetype.literal_type
1698
+ else:
1699
+ lookup_type = fetype
1700
+ discriminant_val = list(dtype.types).index(lookup_type)
1701
+ # Bitcast union pointer directly to i8* and write
1702
+ # discriminant at offset 0
1703
+ discriminant_ptr = self.builder.bitcast(
1704
+ ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
1705
+ )
1706
+ discriminant_i8 = llvm_ir.Constant(
1707
+ llvm_ir.IntType(8), discriminant_val
1708
+ )
1709
+ self.builder.store(discriminant_i8, discriminant_ptr)
1710
+ # Secondly write data at offset = sizeof(fetype) in bytes
1711
+ lltype = self.context.get_value_type(fetype)
1712
+ sizeof_bytes = self.context.get_abi_sizeof(lltype)
1713
+ # Bitcast to i8* and use byte-level GEP
1714
+ byte_ptr = self.builder.bitcast(
1715
+ ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
1716
+ )
1717
+ data_byte_ptr = self.builder.gep(
1718
+ byte_ptr,
1719
+ [llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
1720
+ )
1721
+ # Cast to the correct type pointer
1722
+ castptr = self.builder.bitcast(
1723
+ data_byte_ptr, llvm_ir.PointerType(lltype)
1724
+ )
1725
+ self.builder.store(value, castptr)
1726
+ return
1727
+
1728
+ # For non-polymorphic variables, use parent implementation
1729
+ super().storevar(value, name, argidx)
1730
+
1731
+ # Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
1732
+ # variables immediately after a store instruction.
1733
+ if (
1734
+ self.context.enable_debuginfo
1735
+ # Conditions used to elide stores in parent method
1736
+ and self.store_var_needed(name)
1737
+ ):
1738
+ fetype = self.typeof(name)
1739
+ lltype = self.context.get_value_type(fetype)
1740
+ int_type = (llvm_ir.IntType,)
1741
+ real_type = llvm_ir.FloatType, llvm_ir.DoubleType
1742
+ if isinstance(lltype, int_type + real_type):
1743
+ sizeof = self.context.get_abi_sizeof(lltype)
1744
+ datamodel = self.context.data_model_manager[fetype]
1745
+ line = self.loc.line if argidx is None else self.defn_loc.line
1746
+ if not name.startswith("$"):
1747
+ # Emit debug value for user variable
1748
+ src_name = name.split(".")[0]
1749
+ if src_name not in self.poly_var_typ_map:
1750
+ # Insert the llvm.dbg.value intrinsic call
1751
+ self.debuginfo.update_variable(
1752
+ self.builder,
1753
+ value,
1754
+ src_name,
1755
+ lltype,
1756
+ sizeof,
1757
+ line,
1758
+ datamodel,
1759
+ argidx,
1760
+ )
1761
+ elif isinstance(value, llvm_ir.LoadInstr):
1762
+ # Emit debug value for user variable that falls out of the
1763
+ # coverage of dbg.value range per basic block
1764
+ ld_name = value.operands[0].name
1765
+ if not ld_name.startswith(("$", ".")):
1766
+ src_name = ld_name.split(".")[0]
1767
+ if (
1768
+ src_name not in self.poly_var_typ_map
1769
+ # Not yet covered by the dbg.value range
1770
+ and src_name not in self.dbg_val_names
1771
+ ):
1772
+ for index, item in enumerate(self.fnargs):
1773
+ if item.name == src_name:
1774
+ argidx = index + 1
1775
+ break
1776
+ # Insert the llvm.dbg.value intrinsic call
1777
+ self.debuginfo.update_variable(
1778
+ self.builder,
1779
+ value,
1780
+ src_name,
1781
+ lltype,
1782
+ sizeof,
1783
+ line,
1784
+ datamodel,
1785
+ argidx,
1786
+ )
1787
+
1788
+ def pre_block(self, block):
1789
+ super().pre_block(block)
1790
+
1791
+ # dbg.value range covered names
1792
+ self.dbg_val_names = set()
1793
+
1794
+ if self.context.enable_debuginfo and self._disable_sroa_like_opt:
1795
+ for x in block.find_insts(ir.Assign):
1796
+ if x.target.name.startswith("$"):
1797
+ continue
1798
+ ssa_name = x.target.name
1799
+ src_name = ssa_name.split(".")[0]
1800
+ if src_name not in self.dbg_val_names:
1801
+ self.dbg_val_names.add(src_name)
1802
+
1803
+ def pre_lower(self):
1804
+ """
1805
+ Called before lowering all blocks.
1806
+ """
1807
+ super().pre_lower()
1808
+
1809
+ self.poly_var_typ_map = {}
1810
+ self.poly_var_loc_map = {}
1811
+ self.poly_var_set = set()
1812
+ self.poly_cleaned = False
1813
+ self.lastblk = max(self.blocks.keys())
1814
+
1815
+ # When debug info is enabled, walk through function body and mark
1816
+ # variables with polymorphic types.
1817
+ if self.context.enable_debuginfo and self._disable_sroa_like_opt:
1818
+ poly_map = {}
1819
+ # pre-scan all blocks
1820
+ for block in self.blocks.values():
1821
+ for x in block.find_insts(ir.Assign):
1822
+ if x.target.name.startswith("$"):
1823
+ continue
1824
+ ssa_name = x.target.name
1825
+ src_name = ssa_name.split(".")[0]
1826
+ # Check all the multi-versioned targets
1827
+ if len(x.target.versioned_names) > 0:
1828
+ fetype = self.typeof(ssa_name)
1829
+ if src_name not in poly_map:
1830
+ poly_map[src_name] = set()
1831
+ # deduplicate polymorphic types
1832
+ if isinstance(fetype, types.Literal):
1833
+ fetype = fetype.literal_type
1834
+ poly_map[src_name].add(fetype)
1835
+ # Filter out multi-versioned but single typed variables
1836
+ self.poly_var_typ_map = {
1837
+ k: v for k, v in poly_map.items() if len(v) > 1
1838
+ }
1839
+
1840
+ def _alloca_var(self, name, fetype):
1841
+ """
1842
+ Ensure the given variable has an allocated stack slot (if needed).
1843
+ """
1844
+ # If the name is not handled yet and a store is needed
1845
+ if name not in self.varmap and self.store_var_needed(name):
1846
+ src_name = name.split(".")[0]
1847
+ if src_name in self.poly_var_typ_map:
1848
+ self.poly_var_set.add(name)
1849
+ if src_name not in self.poly_var_loc_map:
1850
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
1851
+ datamodel = self.context.data_model_manager[dtype]
1852
+ # UnionType has sorted set of types, max at last index
1853
+ maxsizetype = dtype.types[-1]
1854
+ if config.CUDA_DEBUG_POLY:
1855
+ # allocate double the max element size to house
1856
+ # [discriminant + data]
1857
+ aggr_type = types.UniTuple(maxsizetype, 2)
1858
+ else:
1859
+ # allocate single element for data only
1860
+ aggr_type = types.UniTuple(maxsizetype, 1)
1861
+ lltype = self.context.get_value_type(aggr_type)
1862
+ ptr = self.alloca_lltype(src_name, lltype, datamodel)
1863
+ # save the location of the union type for polymorphic var
1864
+ self.poly_var_loc_map[src_name] = ptr
1865
+ return
1866
+
1867
+ super()._alloca_var(name, fetype)
1868
+
1869
+ def store_var_needed(self, name):
1870
+ # Check the conditions used to elide stores in parent class,
1871
+ # e.g. in method storevar() and _alloca_var()
1872
+ return (
1873
+ # used in multiple blocks
1874
+ name not in self._singly_assigned_vars
1875
+ # lowering with debuginfo
1876
+ or self._disable_sroa_like_opt
1877
+ )
1878
+
1879
+ def delvar(self, name):
1880
+ """
1881
+ Delete the given variable.
1882
+ """
1883
+ if name in self.poly_var_set:
1884
+ fetype = self.typeof(name)
1885
+ src_name = name.split(".")[0]
1886
+ ptr = self.poly_var_loc_map[src_name]
1887
+ self.decref(fetype, self.builder.load(ptr))
1888
+ if (
1889
+ self._cur_ir_block == self.blocks[self.lastblk]
1890
+ and not self.poly_cleaned
1891
+ ):
1892
+ # Zero-fill the debug union for polymorphic only
1893
+ # at the last block
1894
+ for v in self.poly_var_loc_map.values():
1895
+ self.builder.store(
1896
+ llvm_ir.Constant(v.type.pointee, None), v
1897
+ )
1898
+ self.poly_cleaned = True
1899
+ return
1900
+
1901
+ super().delvar(name)
1902
+
1903
+ def getvar(self, name):
1904
+ """
1905
+ Get a pointer to the given variable's slot.
1906
+ """
1907
+ if name in self.poly_var_set:
1908
+ src_name = name.split(".")[0]
1909
+ fetype = self.typeof(name)
1910
+ lltype = self.context.get_value_type(fetype)
1911
+ ptr = self.poly_var_loc_map[src_name]
1912
+
1913
+ if config.CUDA_DEBUG_POLY:
1914
+ # With CUDA_DEBUG_POLY enabled, read value at
1915
+ # offset = sizeof(fetype) in bytes
1916
+ sizeof_bytes = self.context.get_abi_sizeof(lltype)
1917
+ # Bitcast to i8* and use byte-level GEP
1918
+ byte_ptr = self.builder.bitcast(
1919
+ ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
1920
+ )
1921
+ value_byte_ptr = self.builder.gep(
1922
+ byte_ptr,
1923
+ [llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
1924
+ )
1925
+ # Cast to the correct type pointer
1926
+ castptr = self.builder.bitcast(
1927
+ value_byte_ptr, llvm_ir.PointerType(lltype)
1928
+ )
1929
+ else:
1930
+ # Otherwise, just bitcast to the correct type
1931
+ castptr = self.builder.bitcast(ptr, llvm_ir.PointerType(lltype))
1932
+ return castptr
1933
+ else:
1934
+ return super().getvar(name)
1935
+
1936
+
1937
+ def _lit_or_omitted(value):
1938
+ """Returns a Literal instance if the type of value is supported;
1939
+ otherwise, return `Omitted(value)`.
1940
+ """
1941
+ typing_errors = LiteralTypingError
1942
+ if HAS_NUMBA:
1943
+ from numba.core.errors import (
1944
+ LiteralTypingError as CoreLiteralTypingError,
1945
+ )
1946
+
1947
+ typing_errors = (LiteralTypingError, CoreLiteralTypingError)
1948
+ try:
1949
+ return types.literal(value)
1950
+ except typing_errors:
1951
+ return types.Omitted(value)