numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1980 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import namedtuple, defaultdict
5
+ import operator
6
+ import warnings
7
+ from functools import partial
8
+
9
+ from llvmlite import ir as llvm_ir
10
+
11
+ from numba.cuda import HAS_NUMBA
12
+ from numba.cuda.core import ir
13
+ from numba.cuda import debuginfo, cgutils, utils, typing, types
14
+ from numba import cuda
15
+ from numba.cuda.core import (
16
+ ir_utils,
17
+ targetconfig,
18
+ funcdesc,
19
+ config,
20
+ generators,
21
+ removerefctpass,
22
+ )
23
+
24
+ from numba.cuda.core.errors import (
25
+ LoweringError,
26
+ new_error_context,
27
+ TypingError,
28
+ LiteralTypingError,
29
+ UnsupportedError,
30
+ NumbaDebugInfoWarning,
31
+ )
32
+ from numba.cuda.core.funcdesc import default_mangler
33
+ from numba.cuda.core.environment import Environment
34
+ from numba.cuda.core.analysis import compute_use_defs, must_use_alloca
35
+ from numba.cuda.misc.firstlinefinder import get_func_body_first_lineno
36
+ from numba.cuda.misc.coverage_support import get_registered_loc_notify
37
+
38
+
39
+ _VarArgItem = namedtuple("_VarArgItem", ("vararg", "index"))
40
+
41
+
42
+ class BaseLower(object):
43
+ """
44
+ Lower IR to LLVM
45
+ """
46
+
47
+ def __init__(self, context, library, fndesc, func_ir, metadata=None):
48
+ self.library = library
49
+ self.fndesc = fndesc
50
+ self.blocks = dict(sorted(func_ir.blocks.items()))
51
+ self.func_ir = func_ir
52
+ self.generator_info = func_ir.generator_info
53
+ self.metadata = metadata
54
+ self.flags = targetconfig.ConfigStack.top_or_none()
55
+
56
+ # Initialize LLVM
57
+ self.module = self.library.create_ir_module(self.fndesc.unique_name)
58
+
59
+ # Python execution environment (will be available to the compiled
60
+ # function).
61
+ self.env = Environment.from_fndesc(self.fndesc)
62
+
63
+ # Internal states
64
+ self.blkmap = {}
65
+ self.pending_phis = {}
66
+ self.varmap = {}
67
+ self.firstblk = min(self.blocks.keys())
68
+ self.loc = -1
69
+
70
+ # Specializes the target context as seen inside the Lowerer
71
+ # This adds:
72
+ # - environment: the python execution environment
73
+ self.context = context.subtarget(
74
+ environment=self.env, fndesc=self.fndesc
75
+ )
76
+
77
+ # Debuginfo
78
+ dibuildercls = (
79
+ self.context.DIBuilder
80
+ if self.context.enable_debuginfo
81
+ else debuginfo.DummyDIBuilder
82
+ )
83
+
84
+ # debuginfo def location
85
+ self.defn_loc = self._compute_def_location()
86
+
87
+ directives_only = self.flags.dbg_directives_only
88
+ self.debuginfo = dibuildercls(
89
+ module=self.module,
90
+ filepath=func_ir.loc.filename,
91
+ cgctx=context,
92
+ directives_only=directives_only,
93
+ )
94
+
95
+ # Loc notify objects
96
+ self._loc_notify_registry = get_registered_loc_notify()
97
+
98
+ # Subclass initialization
99
+ self.init()
100
+
101
+ @property
102
+ def call_conv(self):
103
+ return self.context.call_conv
104
+
105
+ def init(self):
106
+ pass
107
+
108
+ def init_pyapi(self):
109
+ """
110
+ Init the Python API and Environment Manager for the function being
111
+ lowered.
112
+ """
113
+ if self.pyapi is not None:
114
+ return
115
+ self.pyapi = self.context.get_python_api(self.builder)
116
+
117
+ # Store environment argument for later use
118
+ self.env_manager = self.context.get_env_manager(self.builder)
119
+ self.env_body = self.env_manager.env_body
120
+ self.envarg = self.env_manager.env_ptr
121
+
122
+ def _compute_def_location(self):
123
+ # Debuginfo requires source to be accurate. Find it and warn if not
124
+ # found. If it's not found, use the func_ir line + 1, this assumes that
125
+ # the function definition is decorated with a 1 line jit decorator.
126
+ defn_loc = self.func_ir.loc.with_lineno(self.func_ir.loc.line + 1)
127
+ if self.context.enable_debuginfo:
128
+ fn = self.func_ir.func_id.func
129
+ optional_lno = get_func_body_first_lineno(fn)
130
+ if optional_lno is not None:
131
+ # -1 as lines start at 1 and this is an offset.
132
+ offset = optional_lno - 1
133
+ defn_loc = self.func_ir.loc.with_lineno(offset)
134
+ else:
135
+ msg = (
136
+ "Could not find source for function: "
137
+ f"{self.func_ir.func_id.func}. Debug line information "
138
+ "may be inaccurate."
139
+ )
140
+ warnings.warn(NumbaDebugInfoWarning(msg))
141
+ return defn_loc
142
+
143
+ def pre_lower(self):
144
+ """
145
+ Called before lowering all blocks.
146
+ """
147
+ # A given Lower object can be used for several LL functions
148
+ # (for generators) and it's important to use a new API and
149
+ # EnvironmentManager.
150
+ self.pyapi = None
151
+ self.debuginfo.mark_subprogram(
152
+ function=self.builder.function,
153
+ qualname=self.fndesc.qualname,
154
+ argnames=self.fndesc.args,
155
+ argtypes=self.fndesc.argtypes,
156
+ line=self.defn_loc.line,
157
+ )
158
+
159
+ # When full debug info is enabled, disable inlining where possible, to
160
+ # improve the quality of the debug experience. 'alwaysinline' functions
161
+ # cannot have inlining disabled.
162
+ attributes = self.builder.function.attributes
163
+ full_debug = self.flags.debuginfo and not self.flags.dbg_directives_only
164
+ if full_debug and "alwaysinline" not in attributes:
165
+ attributes.add("noinline")
166
+
167
+ def post_lower(self):
168
+ """
169
+ Called after all blocks are lowered
170
+ """
171
+ self.debuginfo.finalize()
172
+ for notify in self._loc_notify_registry:
173
+ notify.close()
174
+
175
+ def pre_block(self, block):
176
+ """
177
+ Called before lowering a block.
178
+ """
179
+
180
+ def post_block(self, block):
181
+ """
182
+ Called after lowering a block.
183
+ """
184
+
185
+ def return_dynamic_exception(self, exc_class, exc_args, nb_types, loc=None):
186
+ self.call_conv.return_dynamic_user_exc(
187
+ self.builder,
188
+ exc_class,
189
+ exc_args,
190
+ nb_types,
191
+ loc=loc,
192
+ func_name=self.func_ir.func_id.func_name,
193
+ )
194
+
195
+ def return_exception(self, exc_class, exc_args=None, loc=None):
196
+ """Propagate exception to the caller."""
197
+ self.call_conv.return_user_exc(
198
+ self.builder,
199
+ exc_class,
200
+ exc_args,
201
+ loc=loc,
202
+ func_name=self.func_ir.func_id.func_name,
203
+ )
204
+
205
+ def set_exception(self, exc_class, exc_args=None, loc=None):
206
+ """Set exception state in the current function."""
207
+ self.call_conv.set_static_user_exc(
208
+ self.builder,
209
+ exc_class,
210
+ exc_args,
211
+ loc=loc,
212
+ func_name=self.func_ir.func_id.func_name,
213
+ )
214
+
215
+ def emit_environment_object(self):
216
+ """Emit a pointer to hold the Environment object."""
217
+ # Define global for the environment and initialize it to NULL
218
+ envname = self.context.get_env_name(self.fndesc)
219
+ self.context.declare_env_global(self.module, envname)
220
+
221
+ def lower(self):
222
+ # Emit the Env into the module
223
+ self.emit_environment_object()
224
+ if self.generator_info is None:
225
+ self.genlower = None
226
+ self.lower_normal_function(self.fndesc)
227
+ else:
228
+ self.genlower = self.GeneratorLower(self)
229
+ self.gentype = self.genlower.gentype
230
+
231
+ self.genlower.lower_init_func(self)
232
+ self.genlower.lower_next_func(self)
233
+ if self.gentype.has_finalizer:
234
+ self.genlower.lower_finalize_func(self)
235
+
236
+ if config.DUMP_LLVM:
237
+ utils.dump_llvm(self.fndesc, self.module)
238
+
239
+ # Special optimization to remove NRT on functions that do not need it.
240
+ if self.context.enable_nrt and self.generator_info is None:
241
+ removerefctpass.remove_unnecessary_nrt_usage(
242
+ self.function, context=self.context, fndesc=self.fndesc
243
+ )
244
+
245
+ # Run target specific post lowering transformation
246
+ self.context.post_lowering(self.module, self.library)
247
+
248
+ # Materialize LLVM Module
249
+ self.library.add_ir_module(self.module)
250
+
251
+ def extract_function_arguments(self):
252
+ self.fnargs = self.call_conv.decode_arguments(
253
+ self.builder, self.fndesc.argtypes, self.function
254
+ )
255
+ return self.fnargs
256
+
257
+ def lower_normal_function(self, fndesc):
258
+ """
259
+ Lower non-generator *fndesc*.
260
+ """
261
+ self.setup_function(fndesc)
262
+
263
+ # Init argument values
264
+ self.extract_function_arguments()
265
+ entry_block_tail = self.lower_function_body()
266
+
267
+ # Close tail of entry block, do not emit debug metadata else the
268
+ # unconditional jump gets associated with the metadata from the function
269
+ # body end.
270
+ with debuginfo.suspend_emission(self.builder):
271
+ self.builder.position_at_end(entry_block_tail)
272
+ self.builder.branch(self.blkmap[self.firstblk])
273
+
274
+ def lower_function_body(self):
275
+ """
276
+ Lower the current function's body, and return the entry block.
277
+ """
278
+ # Init Python blocks
279
+ for offset in self.blocks:
280
+ bname = "B%s" % offset
281
+ self.blkmap[offset] = self.function.append_basic_block(bname)
282
+
283
+ self.pre_lower()
284
+ # pre_lower() may have changed the current basic block
285
+ entry_block_tail = self.builder.basic_block
286
+
287
+ self.debug_print(
288
+ "# function begin: {0}".format(self.fndesc.unique_name)
289
+ )
290
+
291
+ # Lower all blocks
292
+ for offset, block in self.blocks.items():
293
+ bb = self.blkmap[offset]
294
+ self.builder.position_at_end(bb)
295
+ self.debug_print(f"# lower block: {offset}")
296
+ self.lower_block(block)
297
+ self.post_lower()
298
+ return entry_block_tail
299
+
300
+ def lower_block(self, block):
301
+ """
302
+ Lower the given block.
303
+ """
304
+ self.pre_block(block)
305
+ for inst in block.body:
306
+ self.loc = inst.loc
307
+ defaulterrcls = partial(LoweringError, loc=self.loc)
308
+ with new_error_context(
309
+ 'lowering "{inst}" at {loc}',
310
+ inst=inst,
311
+ loc=self.loc,
312
+ errcls_=defaulterrcls,
313
+ ):
314
+ self.lower_inst(inst)
315
+ self.post_block(block)
316
+
317
+ def create_cpython_wrapper(self, release_gil=False):
318
+ """
319
+ Create CPython wrapper(s) around this function (or generator).
320
+ """
321
+ if self.genlower:
322
+ self.context.create_cpython_wrapper(
323
+ self.library,
324
+ self.genlower.gendesc,
325
+ self.env,
326
+ self.call_helper,
327
+ release_gil=release_gil,
328
+ )
329
+ self.context.create_cpython_wrapper(
330
+ self.library,
331
+ self.fndesc,
332
+ self.env,
333
+ self.call_helper,
334
+ release_gil=release_gil,
335
+ )
336
+
337
+ def create_cfunc_wrapper(self):
338
+ """
339
+ Create C wrapper around this function.
340
+ """
341
+ if self.genlower:
342
+ raise UnsupportedError("generator as a first-class function type")
343
+ self.context.create_cfunc_wrapper(
344
+ self.library, self.fndesc, self.env, self.call_helper
345
+ )
346
+
347
+ def setup_function(self, fndesc):
348
+ # Setup function
349
+ self.function = self.context.declare_function(self.module, fndesc)
350
+ if self.flags.dbg_optnone:
351
+ attrset = self.function.attributes
352
+ if "alwaysinline" not in attrset:
353
+ attrset.add("optnone")
354
+ attrset.add("noinline")
355
+ self.entry_block = self.function.append_basic_block("entry")
356
+ self.builder = llvm_ir.IRBuilder(self.entry_block)
357
+ self.call_helper = self.call_conv.init_call_helper(self.builder)
358
+
359
+ def typeof(self, varname):
360
+ return self.fndesc.typemap[varname]
361
+
362
+ def notify_loc(self, loc: ir.Loc) -> None:
363
+ """Called when a new instruction with the given `loc` is about to be
364
+ lowered.
365
+ """
366
+ for notify_obj in self._loc_notify_registry:
367
+ notify_obj.notify(loc)
368
+
369
+ def debug_print(self, msg):
370
+ if config.DEBUG_JIT:
371
+ self.context.debug_print(
372
+ self.builder, f"DEBUGJIT [{self.fndesc.qualname}]: {msg}"
373
+ )
374
+
375
+ def print_variable(self, msg, varname):
376
+ """Helper to emit ``print(msg, varname)`` for debugging.
377
+
378
+ Parameters
379
+ ----------
380
+ msg : str
381
+ Literal string to be printed.
382
+ varname : str
383
+ A variable name whose value will be printed.
384
+ """
385
+ argtys = (types.literal(msg), self.fndesc.typemap[varname])
386
+ args = (
387
+ self.context.get_dummy_value(),
388
+ self.loadvar(varname),
389
+ )
390
+ sig = typing.signature(types.none, *argtys)
391
+
392
+ impl = self.context.get_function(print, sig)
393
+ impl(self.builder, args)
394
+
395
+
396
+ class Lower(BaseLower):
397
+ GeneratorLower = generators.GeneratorLower
398
+
399
+ def init(self):
400
+ super().init()
401
+ # find all singly assigned variables
402
+ self._find_singly_assigned_variable()
403
+
404
+ @property
405
+ def _disable_sroa_like_opt(self):
406
+ """Flags that the SROA like optimisation that Numba performs (which
407
+ prevent alloca and subsequent load/store for locals) should be disabled.
408
+ Currently, this is conditional solely on the presence of a request for
409
+ the emission of debug information."""
410
+ if self.flags is None:
411
+ return False
412
+
413
+ return self.flags.debuginfo and not self.flags.dbg_directives_only
414
+
415
+ def _find_singly_assigned_variable(self):
416
+ func_ir = self.func_ir
417
+ blocks = func_ir.blocks
418
+
419
+ sav = set()
420
+
421
+ if not self.func_ir.func_id.is_generator:
422
+ use_defs = compute_use_defs(blocks)
423
+ alloca_vars = must_use_alloca(blocks)
424
+
425
+ # Compute where variables are defined
426
+ var_assign_map = defaultdict(set)
427
+ for blk, vl in use_defs.defmap.items():
428
+ for var in vl:
429
+ var_assign_map[var].add(blk)
430
+
431
+ # Compute where variables are used
432
+ var_use_map = defaultdict(set)
433
+ for blk, vl in use_defs.usemap.items():
434
+ for var in vl:
435
+ var_use_map[var].add(blk)
436
+
437
+ # Keep only variables that are defined locally and used locally
438
+ for var in var_assign_map:
439
+ if var not in alloca_vars and len(var_assign_map[var]) == 1:
440
+ # Usemap does not keep locally defined variables.
441
+ if len(var_use_map[var]) == 0:
442
+ # Ensure that the variable is not defined multiple times
443
+ # in the block
444
+ [defblk] = var_assign_map[var]
445
+ assign_stmts = self.blocks[defblk].find_insts(ir.Assign)
446
+ assigns = [
447
+ stmt
448
+ for stmt in assign_stmts
449
+ if stmt.target.name == var
450
+ ]
451
+ if len(assigns) == 1:
452
+ sav.add(var)
453
+
454
+ self._singly_assigned_vars = sav
455
+ self._blk_local_varmap = {}
456
+
457
+ def pre_block(self, block):
458
+ from numba.cuda.core.unsafe import eh
459
+
460
+ super(Lower, self).pre_block(block)
461
+ self._cur_ir_block = block
462
+
463
+ if block == self.firstblk:
464
+ # create slots for all the vars, irrespective of whether they are
465
+ # initialized, SSA will pick this up and warn users about using
466
+ # uninitialized variables. Slots are added as alloca in the first
467
+ # block
468
+ bb = self.blkmap[self.firstblk]
469
+ self.builder.position_at_end(bb)
470
+ all_names = set()
471
+ for block in self.blocks.values():
472
+ for x in block.find_insts(ir.Del):
473
+ if x.value not in all_names:
474
+ all_names.add(x.value)
475
+ for name in all_names:
476
+ fetype = self.typeof(name)
477
+ self._alloca_var(name, fetype)
478
+
479
+ # Detect if we are in a TRY block by looking for a call to
480
+ # `eh.exception_check`.
481
+ for call in block.find_exprs(op="call"):
482
+ defn = ir_utils.guard(
483
+ ir_utils.get_definition,
484
+ self.func_ir,
485
+ call.func,
486
+ )
487
+ if defn is not None and isinstance(defn, ir.Global):
488
+ if defn.value is eh.exception_check:
489
+ if isinstance(block.terminator, ir.Branch):
490
+ targetblk = self.blkmap[block.terminator.truebr]
491
+ # NOTE: This hacks in an attribute for call_conv to
492
+ # pick up. This hack is no longer needed when
493
+ # all old-style implementations are gone.
494
+ self.builder._in_try_block = {"target": targetblk}
495
+ break
496
+
497
+ def post_block(self, block):
498
+ # Clean-up
499
+ try:
500
+ del self.builder._in_try_block
501
+ except AttributeError:
502
+ pass
503
+
504
+ def lower_inst(self, inst):
505
+ # Set debug location for all subsequent LL instructions
506
+ self.debuginfo.mark_location(self.builder, self.loc.line)
507
+ self.notify_loc(self.loc)
508
+ self.debug_print(str(inst))
509
+ if isinstance(inst, ir.Assign):
510
+ ty = self.typeof(inst.target.name)
511
+ val = self.lower_assign(ty, inst)
512
+ argidx = None
513
+ # If this is a store from an arg, like x = arg.x then tell debuginfo
514
+ # that this is the arg
515
+ if isinstance(inst.value, ir.Arg):
516
+ # NOTE: debug location is the `def <func>` line
517
+ self.debuginfo.mark_location(self.builder, self.defn_loc.line)
518
+ argidx = inst.value.index + 1 # args start at 1
519
+ self.storevar(val, inst.target.name, argidx=argidx)
520
+
521
+ elif isinstance(inst, ir.Branch):
522
+ cond = self.loadvar(inst.cond.name)
523
+ tr = self.blkmap[inst.truebr]
524
+ fl = self.blkmap[inst.falsebr]
525
+
526
+ condty = self.typeof(inst.cond.name)
527
+ pred = self.context.cast(self.builder, cond, condty, types.boolean)
528
+ assert pred.type == llvm_ir.IntType(1), (
529
+ "cond is not i1: %s" % pred.type
530
+ )
531
+ self.builder.cbranch(pred, tr, fl)
532
+
533
+ elif isinstance(inst, ir.Jump):
534
+ target = self.blkmap[inst.target]
535
+ self.builder.branch(target)
536
+
537
+ elif isinstance(inst, ir.Return):
538
+ if self.generator_info:
539
+ # StopIteration
540
+ self.genlower.return_from_generator(self)
541
+ return
542
+ val = self.loadvar(inst.value.name)
543
+ oty = self.typeof(inst.value.name)
544
+ ty = self.fndesc.restype
545
+ if isinstance(ty, types.Optional):
546
+ # If returning an optional type
547
+ self.call_conv.return_optional_value(self.builder, ty, oty, val)
548
+ return
549
+ assert ty == oty, (
550
+ "type '{}' does not match return type '{}'".format(oty, ty)
551
+ )
552
+ retval = self.context.get_return_value(self.builder, ty, val)
553
+ self.call_conv.return_value(self.builder, retval)
554
+
555
+ elif isinstance(inst, ir.PopBlock):
556
+ pass # this is just a marker
557
+
558
+ elif isinstance(inst, ir.StaticSetItem):
559
+ signature = self.fndesc.calltypes[inst]
560
+ assert signature is not None
561
+ try:
562
+ impl = self.context.get_function("static_setitem", signature)
563
+ except NotImplementedError:
564
+ return self.lower_setitem(
565
+ inst.target, inst.index_var, inst.value, signature
566
+ )
567
+ else:
568
+ target = self.loadvar(inst.target.name)
569
+ value = self.loadvar(inst.value.name)
570
+ valuety = self.typeof(inst.value.name)
571
+ value = self.context.cast(
572
+ self.builder, value, valuety, signature.args[2]
573
+ )
574
+ return impl(self.builder, (target, inst.index, value))
575
+
576
+ elif isinstance(inst, ir.Print):
577
+ self.lower_print(inst)
578
+
579
+ elif isinstance(inst, ir.SetItem):
580
+ signature = self.fndesc.calltypes[inst]
581
+ assert signature is not None
582
+ return self.lower_setitem(
583
+ inst.target, inst.index, inst.value, signature
584
+ )
585
+
586
+ elif isinstance(inst, ir.StoreMap):
587
+ signature = self.fndesc.calltypes[inst]
588
+ assert signature is not None
589
+ return self.lower_setitem(inst.dct, inst.key, inst.value, signature)
590
+
591
+ elif isinstance(inst, ir.DelItem):
592
+ target = self.loadvar(inst.target.name)
593
+ index = self.loadvar(inst.index.name)
594
+
595
+ targetty = self.typeof(inst.target.name)
596
+ indexty = self.typeof(inst.index.name)
597
+
598
+ signature = self.fndesc.calltypes[inst]
599
+ assert signature is not None
600
+
601
+ op = operator.delitem
602
+ fnop = self.context.typing_context.resolve_value_type(op)
603
+ callsig = fnop.get_call_type(
604
+ self.context.typing_context,
605
+ signature.args,
606
+ {},
607
+ )
608
+ impl = self.context.get_function(fnop, callsig)
609
+
610
+ assert targetty == signature.args[0]
611
+ index = self.context.cast(
612
+ self.builder, index, indexty, signature.args[1]
613
+ )
614
+
615
+ return impl(self.builder, (target, index))
616
+
617
+ elif isinstance(inst, ir.Del):
618
+ self.delvar(inst.value)
619
+
620
+ elif isinstance(inst, ir.SetAttr):
621
+ target = self.loadvar(inst.target.name)
622
+ value = self.loadvar(inst.value.name)
623
+ signature = self.fndesc.calltypes[inst]
624
+
625
+ targetty = self.typeof(inst.target.name)
626
+ valuety = self.typeof(inst.value.name)
627
+ assert signature is not None
628
+ assert signature.args[0] == targetty
629
+ impl = self.context.get_setattr(inst.attr, signature)
630
+
631
+ # Convert argument to match
632
+ value = self.context.cast(
633
+ self.builder, value, valuety, signature.args[1]
634
+ )
635
+
636
+ return impl(self.builder, (target, value))
637
+
638
+ elif isinstance(inst, ir.DynamicRaise):
639
+ self.lower_dynamic_raise(inst)
640
+
641
+ elif isinstance(inst, ir.DynamicTryRaise):
642
+ self.lower_try_dynamic_raise(inst)
643
+
644
+ elif isinstance(inst, ir.StaticRaise):
645
+ self.lower_static_raise(inst)
646
+
647
+ elif isinstance(inst, ir.StaticTryRaise):
648
+ self.lower_static_try_raise(inst)
649
+
650
+ else:
651
+ raise NotImplementedError(type(inst))
652
+
653
+ def lower_setitem(self, target_var, index_var, value_var, signature):
654
+ target = self.loadvar(target_var.name)
655
+ value = self.loadvar(value_var.name)
656
+ index = self.loadvar(index_var.name)
657
+
658
+ targetty = self.typeof(target_var.name)
659
+ valuety = self.typeof(value_var.name)
660
+ indexty = self.typeof(index_var.name)
661
+
662
+ op = operator.setitem
663
+ fnop = self.context.typing_context.resolve_value_type(op)
664
+ callsig = fnop.get_call_type(
665
+ self.context.typing_context,
666
+ signature.args,
667
+ {},
668
+ )
669
+ impl = self.context.get_function(fnop, callsig)
670
+
671
+ # Convert argument to match
672
+ if isinstance(targetty, types.Optional):
673
+ target = self.context.cast(
674
+ self.builder, target, targetty, targetty.type
675
+ )
676
+ else:
677
+ ul = types.unliteral
678
+ assert ul(targetty) == ul(signature.args[0])
679
+
680
+ index = self.context.cast(
681
+ self.builder, index, indexty, signature.args[1]
682
+ )
683
+ value = self.context.cast(
684
+ self.builder, value, valuety, signature.args[2]
685
+ )
686
+
687
+ return impl(self.builder, (target, index, value))
688
+
689
+ def lower_try_dynamic_raise(self, inst):
690
+ # Numba is a bit limited in what it can do with exceptions in a try
691
+ # block. Thus, it is safe to use the same code as the static try raise.
692
+ self.lower_static_try_raise(inst)
693
+
694
+ def lower_dynamic_raise(self, inst):
695
+ exc_args = inst.exc_args
696
+ args = []
697
+ nb_types = []
698
+ for exc_arg in exc_args:
699
+ if isinstance(exc_arg, ir.Var):
700
+ # dynamic values
701
+ typ = self.typeof(exc_arg.name)
702
+ val = self.loadvar(exc_arg.name)
703
+ self.incref(typ, val)
704
+ else:
705
+ typ = None
706
+ val = exc_arg
707
+ nb_types.append(typ)
708
+ args.append(val)
709
+
710
+ self.return_dynamic_exception(
711
+ inst.exc_class, tuple(args), tuple(nb_types), loc=self.loc
712
+ )
713
+
714
+ def lower_static_raise(self, inst):
715
+ if inst.exc_class is None:
716
+ # Reraise
717
+ self.return_exception(None, loc=self.loc)
718
+ else:
719
+ self.return_exception(inst.exc_class, inst.exc_args, loc=self.loc)
720
+
721
+ def lower_static_try_raise(self, inst):
722
+ if inst.exc_class is None:
723
+ # Reraise
724
+ self.set_exception(None, loc=self.loc)
725
+ else:
726
+ self.set_exception(inst.exc_class, inst.exc_args, loc=self.loc)
727
+
728
+ def lower_assign(self, ty, inst):
729
+ value = inst.value
730
+ # In nopython mode, closure vars are frozen like globals
731
+ if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)):
732
+ res = self.context.get_constant_generic(
733
+ self.builder, ty, value.value
734
+ )
735
+ self.incref(ty, res)
736
+ return res
737
+
738
+ elif isinstance(value, ir.Expr):
739
+ return self.lower_expr(ty, value)
740
+
741
+ elif isinstance(value, ir.Var):
742
+ val = self.loadvar(value.name)
743
+ oty = self.typeof(value.name)
744
+ res = self.context.cast(self.builder, val, oty, ty)
745
+ self.incref(ty, res)
746
+ return res
747
+
748
+ elif isinstance(value, ir.Arg):
749
+ # Suspend debug info else all the arg repacking ends up being
750
+ # associated with some line or other and it's actually just a detail
751
+ # of Numba's CC.
752
+ with debuginfo.suspend_emission(self.builder):
753
+ # Cast from the argument type to the local variable type
754
+ # (note the "arg.FOO" convention as used in typeinfer)
755
+ argty = self.typeof("arg." + value.name)
756
+ if isinstance(argty, types.Omitted):
757
+ pyval = argty.value
758
+ tyctx = self.context.typing_context
759
+ valty = tyctx.resolve_value_type_prefer_literal(pyval)
760
+ # use the type of the constant value
761
+ const = self.context.get_constant_generic(
762
+ self.builder,
763
+ valty,
764
+ pyval,
765
+ )
766
+ # cast it to the variable type
767
+ res = self.context.cast(self.builder, const, valty, ty)
768
+ else:
769
+ val = self.fnargs[value.index]
770
+ res = self.context.cast(self.builder, val, argty, ty)
771
+ self.incref(ty, res)
772
+ return res
773
+
774
+ elif isinstance(value, ir.Yield):
775
+ res = self.lower_yield(ty, value)
776
+ self.incref(ty, res)
777
+ return res
778
+
779
+ raise NotImplementedError(type(value), value)
780
+
781
+ def lower_yield(self, retty, inst):
782
+ yp = self.generator_info.yield_points[inst.index]
783
+ assert yp.inst is inst
784
+ y = generators.LowerYield(self, yp, yp.live_vars)
785
+ y.lower_yield_suspend()
786
+ # Yield to caller
787
+ val = self.loadvar(inst.value.name)
788
+ typ = self.typeof(inst.value.name)
789
+ actual_rettyp = self.gentype.yield_type
790
+
791
+ # cast the local val to the type yielded
792
+ yret = self.context.cast(self.builder, val, typ, actual_rettyp)
793
+
794
+ # get the return repr of yielded value
795
+ retval = self.context.get_return_value(
796
+ self.builder,
797
+ actual_rettyp,
798
+ yret,
799
+ )
800
+
801
+ # return
802
+ self.call_conv.return_value(self.builder, retval)
803
+
804
+ # Resumption point
805
+ y.lower_yield_resume()
806
+ # None is returned by the yield expression
807
+ return self.context.get_constant_generic(self.builder, retty, None)
808
+
809
+ def lower_binop(self, resty, expr, op):
810
+ # if op in utils.OPERATORS_TO_BUILTINS:
811
+ # map operator.the_op => the corresponding types.Function()
812
+ # TODO: is this looks dodgy ...
813
+ op = self.context.typing_context.resolve_value_type(op)
814
+
815
+ lhs = expr.lhs
816
+ rhs = expr.rhs
817
+ static_lhs = expr.static_lhs
818
+ static_rhs = expr.static_rhs
819
+ lty = self.typeof(lhs.name)
820
+ rty = self.typeof(rhs.name)
821
+ lhs = self.loadvar(lhs.name)
822
+ rhs = self.loadvar(rhs.name)
823
+
824
+ # Convert argument to match
825
+ signature = self.fndesc.calltypes[expr]
826
+ lhs = self.context.cast(self.builder, lhs, lty, signature.args[0])
827
+ rhs = self.context.cast(self.builder, rhs, rty, signature.args[1])
828
+
829
+ def cast_result(res):
830
+ return self.context.cast(
831
+ self.builder, res, signature.return_type, resty
832
+ )
833
+
834
+ # First try with static operands, if known
835
+ def try_static_impl(tys, args):
836
+ if any(a is ir.UNDEFINED for a in args):
837
+ return None
838
+ try:
839
+ if isinstance(op, types.Function):
840
+ static_sig = op.get_call_type(
841
+ self.context.typing_context, tys, {}
842
+ )
843
+ else:
844
+ static_sig = typing.signature(signature.return_type, *tys)
845
+ except TypingError:
846
+ return None
847
+ try:
848
+ static_impl = self.context.get_function(op, static_sig)
849
+ return static_impl(self.builder, args)
850
+ except NotImplementedError:
851
+ return None
852
+
853
+ res = try_static_impl(
854
+ (_lit_or_omitted(static_lhs), _lit_or_omitted(static_rhs)),
855
+ (static_lhs, static_rhs),
856
+ )
857
+ if res is not None:
858
+ return cast_result(res)
859
+
860
+ res = try_static_impl(
861
+ (_lit_or_omitted(static_lhs), rty),
862
+ (static_lhs, rhs),
863
+ )
864
+ if res is not None:
865
+ return cast_result(res)
866
+
867
+ res = try_static_impl(
868
+ (lty, _lit_or_omitted(static_rhs)),
869
+ (lhs, static_rhs),
870
+ )
871
+ if res is not None:
872
+ return cast_result(res)
873
+
874
+ # Normal implementation for generic arguments
875
+
876
+ sig = op.get_call_type(self.context.typing_context, signature.args, {})
877
+ impl = self.context.get_function(op, sig)
878
+ res = impl(self.builder, (lhs, rhs))
879
+ return cast_result(res)
880
+
881
+ def lower_getitem(self, resty, expr, value, index, signature):
882
+ baseval = self.loadvar(value.name)
883
+ indexval = self.loadvar(index.name)
884
+ # Get implementation of getitem
885
+ op = operator.getitem
886
+ fnop = self.context.typing_context.resolve_value_type(op)
887
+ callsig = fnop.get_call_type(
888
+ self.context.typing_context,
889
+ signature.args,
890
+ {},
891
+ )
892
+ impl = self.context.get_function(fnop, callsig)
893
+
894
+ argvals = (baseval, indexval)
895
+ argtyps = (self.typeof(value.name), self.typeof(index.name))
896
+ castvals = [
897
+ self.context.cast(self.builder, av, at, ft)
898
+ for av, at, ft in zip(argvals, argtyps, signature.args)
899
+ ]
900
+ res = impl(self.builder, castvals)
901
+ return self.context.cast(
902
+ self.builder, res, signature.return_type, resty
903
+ )
904
+
905
+ def _cast_var(self, var, ty):
906
+ """
907
+ Cast a Numba IR variable to the given Numba type, returning a
908
+ low-level value.
909
+ """
910
+ if isinstance(var, _VarArgItem):
911
+ varty = self.typeof(var.vararg.name)[var.index]
912
+ val = self.builder.extract_value(
913
+ self.loadvar(var.vararg.name), var.index
914
+ )
915
+ else:
916
+ varty = self.typeof(var.name)
917
+ val = self.loadvar(var.name)
918
+ return self.context.cast(self.builder, val, varty, ty)
919
+
920
+ def fold_call_args(self, fnty, signature, pos_args, vararg, kw_args):
921
+ if vararg:
922
+ # Inject *args from function call
923
+ # The lowering will be done in _cast_var() above.
924
+ tp_vararg = self.typeof(vararg.name)
925
+ assert isinstance(tp_vararg, types.BaseTuple)
926
+ pos_args = pos_args + [
927
+ _VarArgItem(vararg, i) for i in range(len(tp_vararg))
928
+ ]
929
+
930
+ # Fold keyword arguments and resolve default argument values
931
+ pysig = signature.pysig
932
+ if pysig is None:
933
+ if kw_args:
934
+ raise NotImplementedError(
935
+ "unsupported keyword arguments when calling %s" % (fnty,)
936
+ )
937
+ argvals = [
938
+ self._cast_var(var, sigty)
939
+ for var, sigty in zip(pos_args, signature.args)
940
+ ]
941
+ else:
942
+
943
+ def normal_handler(index, param, var):
944
+ return self._cast_var(var, signature.args[index])
945
+
946
+ def default_handler(index, param, default):
947
+ return self.context.get_constant_generic(
948
+ self.builder, signature.args[index], default
949
+ )
950
+
951
+ def stararg_handler(index, param, vars):
952
+ stararg_ty = signature.args[index]
953
+ assert isinstance(stararg_ty, types.BaseTuple), stararg_ty
954
+ values = [
955
+ self._cast_var(var, sigty)
956
+ for var, sigty in zip(vars, stararg_ty)
957
+ ]
958
+ return cgutils.make_anonymous_struct(self.builder, values)
959
+
960
+ argvals = typing.fold_arguments(
961
+ pysig,
962
+ pos_args,
963
+ dict(kw_args),
964
+ normal_handler,
965
+ default_handler,
966
+ stararg_handler,
967
+ )
968
+ return argvals
969
+
970
+ def lower_print(self, inst):
971
+ """
972
+ Lower a ir.Print()
973
+ """
974
+ # We handle this, as far as possible, as a normal call to built-in
975
+ # print(). This will make it easy to undo the special ir.Print
976
+ # rewrite when it becomes unnecessary (e.g. when we have native
977
+ # strings).
978
+ sig = self.fndesc.calltypes[inst]
979
+ assert sig.return_type == types.none
980
+ fnty = self.context.typing_context.resolve_value_type(print)
981
+
982
+ # Fix the call signature to inject any constant-inferred
983
+ # string argument
984
+ pos_tys = list(sig.args)
985
+ pos_args = list(inst.args)
986
+ for i in range(len(pos_args)):
987
+ if i in inst.consts:
988
+ pyval = inst.consts[i]
989
+ if isinstance(pyval, str):
990
+ pos_tys[i] = types.literal(pyval)
991
+
992
+ fixed_sig = typing.signature(sig.return_type, *pos_tys)
993
+ fixed_sig = fixed_sig.replace(pysig=sig.pysig)
994
+
995
+ argvals = self.fold_call_args(fnty, sig, pos_args, inst.vararg, {})
996
+ impl = self.context.get_function(print, fixed_sig)
997
+ impl(self.builder, argvals)
998
+
999
+ def lower_call(self, resty, expr):
1000
+ signature = self.fndesc.calltypes[expr]
1001
+ self.debug_print("# lower_call: expr = {0}".format(expr))
1002
+ if isinstance(signature.return_type, types.Phantom):
1003
+ return self.context.get_dummy_value()
1004
+
1005
+ fnty = self.typeof(expr.func.name)
1006
+
1007
+ if isinstance(fnty, types.ObjModeDispatcher):
1008
+ res = self._lower_call_ObjModeDispatcher(fnty, expr, signature)
1009
+
1010
+ elif isinstance(fnty, types.ExternalFunction):
1011
+ res = self._lower_call_ExternalFunction(fnty, expr, signature)
1012
+
1013
+ elif isinstance(fnty, types.ExternalFunctionPointer):
1014
+ res = self._lower_call_ExternalFunctionPointer(
1015
+ fnty, expr, signature
1016
+ )
1017
+
1018
+ elif isinstance(fnty, types.RecursiveCall):
1019
+ res = self._lower_call_RecursiveCall(fnty, expr, signature)
1020
+
1021
+ else:
1022
+ res = self._lower_call_normal(fnty, expr, signature)
1023
+
1024
+ # If lowering the call returned None, interpret that as returning dummy
1025
+ # value if the return type of the function is void, otherwise there is
1026
+ # a problem
1027
+ if res is None:
1028
+ if signature.return_type == types.void:
1029
+ res = self.context.get_dummy_value()
1030
+ else:
1031
+ raise LoweringError(
1032
+ msg="non-void function returns None from implementation",
1033
+ loc=self.loc,
1034
+ )
1035
+
1036
+ return self.context.cast(
1037
+ self.builder, res, signature.return_type, resty
1038
+ )
1039
+
1040
+ def _lower_call_ObjModeDispatcher(self, fnty, expr, signature):
1041
+ from numba.cuda.core.pythonapi import ObjModeUtils
1042
+
1043
+ self.init_pyapi()
1044
+ # Acquire the GIL
1045
+ gil_state = self.pyapi.gil_ensure()
1046
+ # Fix types
1047
+ argnames = [a.name for a in expr.args]
1048
+ argtypes = [self.typeof(a) for a in argnames]
1049
+ argvalues = [self.loadvar(a) for a in argnames]
1050
+ for v, ty in zip(argvalues, argtypes):
1051
+ # Because .from_native_value steal the reference
1052
+ self.incref(ty, v)
1053
+
1054
+ argobjs = [
1055
+ self.pyapi.from_native_value(atyp, aval, self.env_manager)
1056
+ for atyp, aval in zip(argtypes, argvalues)
1057
+ ]
1058
+
1059
+ # Load objmode dispatcher
1060
+ callee = ObjModeUtils(self.pyapi).load_dispatcher(fnty, argtypes)
1061
+ # Make Call
1062
+ ret_obj = self.pyapi.call_function_objargs(callee, argobjs)
1063
+ has_exception = cgutils.is_null(self.builder, ret_obj)
1064
+ with self.builder.if_else(has_exception) as (then, orelse):
1065
+ # Handles exception
1066
+ # This branch must exit the function
1067
+ with then:
1068
+ # Clean arg
1069
+ for obj in argobjs:
1070
+ self.pyapi.decref(obj)
1071
+
1072
+ # Release the GIL
1073
+ self.pyapi.gil_release(gil_state)
1074
+
1075
+ # Return and signal exception
1076
+ self.call_conv.return_exc(self.builder)
1077
+
1078
+ # Handles normal return
1079
+ with orelse:
1080
+ # Fix output value
1081
+ native = self.pyapi.to_native_value(
1082
+ fnty.dispatcher.output_types,
1083
+ ret_obj,
1084
+ )
1085
+ output = native.value
1086
+
1087
+ # Release objs
1088
+ self.pyapi.decref(ret_obj)
1089
+ for obj in argobjs:
1090
+ self.pyapi.decref(obj)
1091
+
1092
+ # cleanup output
1093
+ if callable(native.cleanup):
1094
+ native.cleanup()
1095
+
1096
+ # Release the GIL
1097
+ self.pyapi.gil_release(gil_state)
1098
+
1099
+ # Error during unboxing
1100
+ with self.builder.if_then(native.is_error):
1101
+ self.call_conv.return_exc(self.builder)
1102
+
1103
+ return output
1104
+
1105
+ def _lower_call_ExternalFunction(self, fnty, expr, signature):
1106
+ # Handle a named external function
1107
+ self.debug_print("# external function")
1108
+ argvals = self.fold_call_args(
1109
+ fnty,
1110
+ signature,
1111
+ expr.args,
1112
+ expr.vararg,
1113
+ expr.kws,
1114
+ )
1115
+ fndesc = funcdesc.ExternalFunctionDescriptor(
1116
+ fnty.symbol, fnty.sig.return_type, fnty.sig.args
1117
+ )
1118
+ func = self.context.declare_external_function(
1119
+ self.builder.module, fndesc
1120
+ )
1121
+ return self.context.call_external_function(
1122
+ self.builder,
1123
+ func,
1124
+ fndesc.argtypes,
1125
+ argvals,
1126
+ )
1127
+
1128
+ def _lower_call_ExternalFunctionPointer(self, fnty, expr, signature):
1129
+ # Handle a C function pointer
1130
+ self.debug_print("# calling external function pointer")
1131
+ argvals = self.fold_call_args(
1132
+ fnty,
1133
+ signature,
1134
+ expr.args,
1135
+ expr.vararg,
1136
+ expr.kws,
1137
+ )
1138
+ pointer = self.loadvar(expr.func.name)
1139
+ # If the external function pointer uses libpython
1140
+ if fnty.requires_gil:
1141
+ self.init_pyapi()
1142
+ # Acquire the GIL
1143
+ gil_state = self.pyapi.gil_ensure()
1144
+ # Make PyObjects
1145
+ newargvals = []
1146
+ pyvals = []
1147
+ for exptyp, gottyp, aval in zip(
1148
+ fnty.sig.args, signature.args, argvals
1149
+ ):
1150
+ # Adjust argument values to pyobjects
1151
+ if exptyp == types.ffi_forced_object:
1152
+ self.incref(gottyp, aval)
1153
+ obj = self.pyapi.from_native_value(
1154
+ gottyp,
1155
+ aval,
1156
+ self.env_manager,
1157
+ )
1158
+ newargvals.append(obj)
1159
+ pyvals.append(obj)
1160
+ else:
1161
+ newargvals.append(aval)
1162
+
1163
+ # Call external function
1164
+ res = self.context.call_function_pointer(
1165
+ self.builder,
1166
+ pointer,
1167
+ newargvals,
1168
+ fnty.cconv,
1169
+ )
1170
+ # Release PyObjects
1171
+ for obj in pyvals:
1172
+ self.pyapi.decref(obj)
1173
+
1174
+ # Release the GIL
1175
+ self.pyapi.gil_release(gil_state)
1176
+ # If the external function pointer does NOT use libpython
1177
+ else:
1178
+ res = self.context.call_function_pointer(
1179
+ self.builder,
1180
+ pointer,
1181
+ argvals,
1182
+ fnty.cconv,
1183
+ )
1184
+ return res
1185
+
1186
+ def _lower_call_RecursiveCall(self, fnty, expr, signature):
1187
+ # Recursive call
1188
+ argvals = self.fold_call_args(
1189
+ fnty,
1190
+ signature,
1191
+ expr.args,
1192
+ expr.vararg,
1193
+ expr.kws,
1194
+ )
1195
+ rec_ov = fnty.get_overloads(signature.args)
1196
+ mangler = self.context.mangler or default_mangler
1197
+ abi_tags = self.fndesc.abi_tags
1198
+ mangled_name = mangler(
1199
+ rec_ov.qualname, signature.args, abi_tags=abi_tags, uid=rec_ov.uid
1200
+ )
1201
+ # special case self recursion
1202
+ if self.builder.function.name.startswith(mangled_name):
1203
+ res = self.context.call_internal(
1204
+ self.builder,
1205
+ self.fndesc,
1206
+ signature,
1207
+ argvals,
1208
+ )
1209
+ else:
1210
+ res = self.context.call_unresolved(
1211
+ self.builder,
1212
+ mangled_name,
1213
+ signature,
1214
+ argvals,
1215
+ )
1216
+ return res
1217
+
1218
+ def _lower_call_normal(self, fnty, expr, signature):
1219
+ # Normal function resolution
1220
+ self.debug_print("# calling normal function: {0}".format(fnty))
1221
+ self.debug_print("# signature: {0}".format(signature))
1222
+ if isinstance(fnty, types.ObjModeDispatcher):
1223
+ argvals = expr.func.args
1224
+ else:
1225
+ argvals = self.fold_call_args(
1226
+ fnty,
1227
+ signature,
1228
+ expr.args,
1229
+ expr.vararg,
1230
+ expr.kws,
1231
+ )
1232
+ tname = expr.target
1233
+ if tname is not None:
1234
+ from numba.cuda.descriptor import cuda_target
1235
+
1236
+ hw_ctx = cuda_target.target_context
1237
+ impl = hw_ctx.get_function(fnty, signature)
1238
+ else:
1239
+ impl = self.context.get_function(fnty, signature)
1240
+ if signature.recvr:
1241
+ # The "self" object is passed as the function object
1242
+ # for bounded function
1243
+ the_self = self.loadvar(expr.func.name)
1244
+ # Prepend the self reference
1245
+ argvals = [the_self] + list(argvals)
1246
+
1247
+ res = impl(self.builder, argvals, self.loc)
1248
+ return res
1249
+
1250
+ def lower_expr(self, resty, expr):
1251
+ if expr.op == "binop":
1252
+ return self.lower_binop(resty, expr, expr.fn)
1253
+ elif expr.op == "inplace_binop":
1254
+ lty = self.typeof(expr.lhs.name)
1255
+ if lty.mutable:
1256
+ return self.lower_binop(resty, expr, expr.fn)
1257
+ else:
1258
+ # inplace operators on non-mutable types reuse the same
1259
+ # definition as the corresponding copying operators.)
1260
+ return self.lower_binop(resty, expr, expr.immutable_fn)
1261
+ elif expr.op == "unary":
1262
+ val = self.loadvar(expr.value.name)
1263
+ typ = self.typeof(expr.value.name)
1264
+ func_ty = self.context.typing_context.resolve_value_type(expr.fn)
1265
+ # Get function
1266
+ signature = self.fndesc.calltypes[expr]
1267
+ impl = self.context.get_function(func_ty, signature)
1268
+ # Convert argument to match
1269
+ val = self.context.cast(self.builder, val, typ, signature.args[0])
1270
+ res = impl(self.builder, [val])
1271
+ res = self.context.cast(
1272
+ self.builder, res, signature.return_type, resty
1273
+ )
1274
+ return res
1275
+
1276
+ elif expr.op == "call":
1277
+ res = self.lower_call(resty, expr)
1278
+ return res
1279
+
1280
+ elif expr.op == "pair_first":
1281
+ val = self.loadvar(expr.value.name)
1282
+ ty = self.typeof(expr.value.name)
1283
+ res = self.context.pair_first(self.builder, val, ty)
1284
+ self.incref(resty, res)
1285
+ return res
1286
+
1287
+ elif expr.op == "pair_second":
1288
+ val = self.loadvar(expr.value.name)
1289
+ ty = self.typeof(expr.value.name)
1290
+ res = self.context.pair_second(self.builder, val, ty)
1291
+ self.incref(resty, res)
1292
+ return res
1293
+
1294
+ elif expr.op in ("getiter", "iternext"):
1295
+ val = self.loadvar(expr.value.name)
1296
+ ty = self.typeof(expr.value.name)
1297
+ signature = self.fndesc.calltypes[expr]
1298
+ impl = self.context.get_function(expr.op, signature)
1299
+ [fty] = signature.args
1300
+ castval = self.context.cast(self.builder, val, ty, fty)
1301
+ res = impl(self.builder, (castval,))
1302
+ res = self.context.cast(
1303
+ self.builder, res, signature.return_type, resty
1304
+ )
1305
+ return res
1306
+
1307
+ elif expr.op == "exhaust_iter":
1308
+ val = self.loadvar(expr.value.name)
1309
+ ty = self.typeof(expr.value.name)
1310
+ # Unpack optional
1311
+ if isinstance(ty, types.Optional):
1312
+ val = self.context.cast(self.builder, val, ty, ty.type)
1313
+ ty = ty.type
1314
+
1315
+ # If we have a tuple, we needn't do anything
1316
+ # (and we can't iterate over the heterogeneous ones).
1317
+ if isinstance(ty, types.BaseTuple):
1318
+ assert ty == resty
1319
+ self.incref(ty, val)
1320
+ return val
1321
+
1322
+ itemty = ty.iterator_type.yield_type
1323
+ tup = self.context.get_constant_undef(resty)
1324
+ pairty = types.Pair(itemty, types.boolean)
1325
+ getiter_sig = typing.signature(ty.iterator_type, ty)
1326
+ getiter_impl = self.context.get_function("getiter", getiter_sig)
1327
+ iternext_sig = typing.signature(pairty, ty.iterator_type)
1328
+ iternext_impl = self.context.get_function("iternext", iternext_sig)
1329
+ iterobj = getiter_impl(self.builder, (val,))
1330
+ # We call iternext() as many times as desired (`expr.count`).
1331
+ for i in range(expr.count):
1332
+ pair = iternext_impl(self.builder, (iterobj,))
1333
+ is_valid = self.context.pair_second(self.builder, pair, pairty)
1334
+ with cgutils.if_unlikely(
1335
+ self.builder, self.builder.not_(is_valid)
1336
+ ):
1337
+ self.return_exception(ValueError, loc=self.loc)
1338
+ item = self.context.pair_first(self.builder, pair, pairty)
1339
+ tup = self.builder.insert_value(tup, item, i)
1340
+
1341
+ # Call iternext() once more to check that the iterator
1342
+ # is exhausted.
1343
+ pair = iternext_impl(self.builder, (iterobj,))
1344
+ is_valid = self.context.pair_second(self.builder, pair, pairty)
1345
+ with cgutils.if_unlikely(self.builder, is_valid):
1346
+ self.return_exception(ValueError, loc=self.loc)
1347
+
1348
+ self.decref(ty.iterator_type, iterobj)
1349
+ return tup
1350
+
1351
+ elif expr.op == "getattr":
1352
+ val = self.loadvar(expr.value.name)
1353
+ ty = self.typeof(expr.value.name)
1354
+
1355
+ if isinstance(resty, types.BoundFunction):
1356
+ # if we are getting out a method, assume we have typed this
1357
+ # properly and just build a bound function object
1358
+ casted = self.context.cast(self.builder, val, ty, resty.this)
1359
+ res = self.context.get_bound_function(
1360
+ self.builder, casted, resty.this
1361
+ )
1362
+ self.incref(resty, res)
1363
+ return res
1364
+ else:
1365
+ impl = self.context.get_getattr(ty, expr.attr)
1366
+ attrty = self.context.typing_context.resolve_getattr(
1367
+ ty, expr.attr
1368
+ )
1369
+
1370
+ if impl is None:
1371
+ # ignore the attribute
1372
+ return self.context.get_dummy_value()
1373
+ else:
1374
+ res = impl(self.context, self.builder, ty, val, expr.attr)
1375
+
1376
+ # Cast the attribute type to the expected output type
1377
+ res = self.context.cast(self.builder, res, attrty, resty)
1378
+ return res
1379
+
1380
+ elif expr.op == "static_getitem":
1381
+ signature = typing.signature(
1382
+ resty,
1383
+ self.typeof(expr.value.name),
1384
+ _lit_or_omitted(expr.index),
1385
+ )
1386
+ try:
1387
+ # Both get_function() and the returned implementation can
1388
+ # raise NotImplementedError if the types aren't supported
1389
+ impl = self.context.get_function("static_getitem", signature)
1390
+ return impl(
1391
+ self.builder, (self.loadvar(expr.value.name), expr.index)
1392
+ )
1393
+ except NotImplementedError:
1394
+ if expr.index_var is None:
1395
+ raise
1396
+ # Fall back on the generic getitem() implementation
1397
+ # for this type.
1398
+ signature = self.fndesc.calltypes[expr]
1399
+ return self.lower_getitem(
1400
+ resty, expr, expr.value, expr.index_var, signature
1401
+ )
1402
+ elif expr.op == "typed_getitem":
1403
+ signature = typing.signature(
1404
+ resty,
1405
+ self.typeof(expr.value.name),
1406
+ self.typeof(expr.index.name),
1407
+ )
1408
+ impl = self.context.get_function("typed_getitem", signature)
1409
+ return impl(
1410
+ self.builder,
1411
+ (self.loadvar(expr.value.name), self.loadvar(expr.index.name)),
1412
+ )
1413
+ elif expr.op == "getitem":
1414
+ signature = self.fndesc.calltypes[expr]
1415
+ return self.lower_getitem(
1416
+ resty, expr, expr.value, expr.index, signature
1417
+ )
1418
+
1419
+ elif expr.op == "build_tuple":
1420
+ itemvals = [self.loadvar(i.name) for i in expr.items]
1421
+ itemtys = [self.typeof(i.name) for i in expr.items]
1422
+ castvals = [
1423
+ self.context.cast(self.builder, val, fromty, toty)
1424
+ for val, toty, fromty in zip(itemvals, resty, itemtys)
1425
+ ]
1426
+ tup = self.context.make_tuple(self.builder, resty, castvals)
1427
+ self.incref(resty, tup)
1428
+ return tup
1429
+
1430
+ elif expr.op == "build_list":
1431
+ itemvals = [self.loadvar(i.name) for i in expr.items]
1432
+ itemtys = [self.typeof(i.name) for i in expr.items]
1433
+ if isinstance(resty, types.LiteralList):
1434
+ castvals = [
1435
+ self.context.cast(self.builder, val, fromty, toty)
1436
+ for val, toty, fromty in zip(itemvals, resty.types, itemtys)
1437
+ ]
1438
+ tup = self.context.make_tuple(
1439
+ self.builder, types.Tuple(resty.types), castvals
1440
+ )
1441
+ self.incref(resty, tup)
1442
+ return tup
1443
+ else:
1444
+ castvals = [
1445
+ self.context.cast(self.builder, val, fromty, resty.dtype)
1446
+ for val, fromty in zip(itemvals, itemtys)
1447
+ ]
1448
+ return self.context.build_list(self.builder, resty, castvals)
1449
+
1450
+ elif expr.op == "build_set":
1451
+ # Insert in reverse order, as Python does
1452
+ items = expr.items[::-1]
1453
+ itemvals = [self.loadvar(i.name) for i in items]
1454
+ itemtys = [self.typeof(i.name) for i in items]
1455
+ castvals = [
1456
+ self.context.cast(self.builder, val, fromty, resty.dtype)
1457
+ for val, fromty in zip(itemvals, itemtys)
1458
+ ]
1459
+ return self.context.build_set(self.builder, resty, castvals)
1460
+
1461
+ elif expr.op == "build_map":
1462
+ items = expr.items
1463
+ keys, values = [], []
1464
+ key_types, value_types = [], []
1465
+ for k, v in items:
1466
+ key = self.loadvar(k.name)
1467
+ keytype = self.typeof(k.name)
1468
+ val = self.loadvar(v.name)
1469
+ valtype = self.typeof(v.name)
1470
+ keys.append(key)
1471
+ values.append(val)
1472
+ key_types.append(keytype)
1473
+ value_types.append(valtype)
1474
+ return self.context.build_map(
1475
+ self.builder,
1476
+ resty,
1477
+ list(zip(key_types, value_types)),
1478
+ list(zip(keys, values)),
1479
+ )
1480
+
1481
+ elif expr.op == "cast":
1482
+ val = self.loadvar(expr.value.name)
1483
+ ty = self.typeof(expr.value.name)
1484
+ castval = self.context.cast(self.builder, val, ty, resty)
1485
+ self.incref(resty, castval)
1486
+ return castval
1487
+
1488
+ elif expr.op == "phi":
1489
+ raise LoweringError("PHI not stripped")
1490
+
1491
+ elif expr.op == "null":
1492
+ return self.context.get_constant_null(resty)
1493
+
1494
+ elif expr.op == "undef":
1495
+ # Numba does not raise an UnboundLocalError for undefined variables.
1496
+ # The variable is set to zero.
1497
+ return self.context.get_constant_null(resty)
1498
+
1499
+ elif expr.op in self.context.special_ops:
1500
+ res = self.context.special_ops[expr.op](self, expr)
1501
+ return res
1502
+
1503
+ raise NotImplementedError(expr)
1504
+
1505
+ def _alloca_var(self, name, fetype):
1506
+ """
1507
+ Ensure the given variable has an allocated stack slot (if needed).
1508
+ """
1509
+ if name in self.varmap:
1510
+ # quit early
1511
+ return
1512
+
1513
+ # If the name is used in multiple blocks or lowering with debuginfo...
1514
+ if (
1515
+ name not in self._singly_assigned_vars
1516
+ ) or self._disable_sroa_like_opt:
1517
+ # If not already defined, allocate it
1518
+ ptr = self.alloca(name, fetype)
1519
+ # Remember the pointer
1520
+ self.varmap[name] = ptr
1521
+
1522
+ def getvar(self, name):
1523
+ """
1524
+ Get a pointer to the given variable's slot.
1525
+ """
1526
+ if not self._disable_sroa_like_opt:
1527
+ assert name not in self._blk_local_varmap
1528
+ assert name not in self._singly_assigned_vars
1529
+ if name not in self.varmap:
1530
+ # Allocate undefined variable as needed.
1531
+ # NOTE: Py3.12 use of LOAD_FAST_AND_CLEAR will allow variable be
1532
+ # referenced before it is defined.
1533
+ self._alloca_var(name, self.typeof(name))
1534
+ return self.varmap[name]
1535
+
1536
+ def loadvar(self, name):
1537
+ """
1538
+ Load the given variable's value.
1539
+ """
1540
+ if name in self._blk_local_varmap and not self._disable_sroa_like_opt:
1541
+ return self._blk_local_varmap[name]
1542
+ ptr = self.getvar(name)
1543
+
1544
+ # Don't associate debuginfo with the load for a function arg else it
1545
+ # creates instructions ahead of the first source line of the
1546
+ # function which then causes problems with breaking on the function
1547
+ # symbol (it hits the symbol, not the first line).
1548
+ if name in self.func_ir.arg_names:
1549
+ with debuginfo.suspend_emission(self.builder):
1550
+ return self.builder.load(ptr)
1551
+ else:
1552
+ return self.builder.load(ptr)
1553
+
1554
+ def storevar(self, value, name, argidx=None):
1555
+ """
1556
+ Store the value into the given variable.
1557
+ """
1558
+ fetype = self.typeof(name)
1559
+ # Define if not already
1560
+ self._alloca_var(name, fetype)
1561
+
1562
+ # Store variable
1563
+ if (
1564
+ name in self._singly_assigned_vars
1565
+ and not self._disable_sroa_like_opt
1566
+ ):
1567
+ self._blk_local_varmap[name] = value
1568
+ else:
1569
+ if argidx is None:
1570
+ # Clean up existing value stored in the variable, not needed
1571
+ # if it's an arg
1572
+ old = self.loadvar(name)
1573
+ self.decref(fetype, old)
1574
+
1575
+ # stack stored variable
1576
+ ptr = self.getvar(name)
1577
+ if value.type != ptr.type.pointee:
1578
+ msg = (
1579
+ "Storing {value.type} to ptr of {ptr.type.pointee} "
1580
+ "('{name}'). FE type {fetype}"
1581
+ ).format(value=value, ptr=ptr, fetype=fetype, name=name)
1582
+ raise AssertionError(msg)
1583
+
1584
+ # If this store is associated with an argument to the function (i.e.
1585
+ # store following reassemble from CC splatting structs as many args
1586
+ # to the function) then mark this variable as such.
1587
+ if argidx is not None:
1588
+ with debuginfo.suspend_emission(self.builder):
1589
+ self.builder.store(value, ptr)
1590
+ loc = self.defn_loc # the line with `def <func>`
1591
+ lltype = self.context.get_value_type(fetype)
1592
+ sizeof = self.context.get_abi_sizeof(lltype)
1593
+ datamodel = self.context.data_model_manager[fetype]
1594
+ self.debuginfo.mark_variable(
1595
+ self.builder,
1596
+ ptr,
1597
+ name=name,
1598
+ lltype=lltype,
1599
+ size=sizeof,
1600
+ line=loc.line,
1601
+ datamodel=datamodel,
1602
+ argidx=argidx,
1603
+ )
1604
+ else:
1605
+ self.builder.store(value, ptr)
1606
+
1607
+ def delvar(self, name):
1608
+ """
1609
+ Delete the given variable.
1610
+ """
1611
+ fetype = self.typeof(name)
1612
+
1613
+ # Out-of-order
1614
+ if (
1615
+ name not in self._blk_local_varmap
1616
+ and not self._disable_sroa_like_opt
1617
+ ):
1618
+ if name in self._singly_assigned_vars:
1619
+ self._singly_assigned_vars.discard(name)
1620
+
1621
+ # Define if not already (may happen if the variable is deleted
1622
+ # at the beginning of a loop, but only set later in the loop)
1623
+ self._alloca_var(name, fetype)
1624
+
1625
+ if name in self._blk_local_varmap and not self._disable_sroa_like_opt:
1626
+ llval = self._blk_local_varmap[name]
1627
+ self.decref(fetype, llval)
1628
+ else:
1629
+ ptr = self.getvar(name)
1630
+ self.decref(fetype, self.builder.load(ptr))
1631
+ # Zero-fill variable to avoid double frees on subsequent dels
1632
+ self.builder.store(llvm_ir.Constant(ptr.type.pointee, None), ptr)
1633
+
1634
+ def alloca(self, name, type):
1635
+ lltype = self.context.get_value_type(type)
1636
+ datamodel = self.context.data_model_manager[type]
1637
+ return self.alloca_lltype(name, lltype, datamodel=datamodel)
1638
+
1639
+ def alloca_lltype(self, name, lltype, datamodel=None):
1640
+ # Is user variable?
1641
+ is_uservar = not name.startswith("$")
1642
+ # Allocate space for variable
1643
+ aptr = cgutils.alloca_once(self.builder, lltype, name=name, zfill=False)
1644
+
1645
+ # Emit debug info for user variable
1646
+ if is_uservar:
1647
+ # Don't associate debuginfo with the alloca for a function arg, this
1648
+ # is handled by the first store to the alloca so that repacking the
1649
+ # splatted args from the CC is dealt with.
1650
+ if name not in self.func_ir.arg_names:
1651
+ sizeof = self.context.get_abi_sizeof(lltype)
1652
+ self.debuginfo.mark_variable(
1653
+ self.builder,
1654
+ aptr,
1655
+ name=name,
1656
+ lltype=lltype,
1657
+ size=sizeof,
1658
+ line=self.loc.line,
1659
+ datamodel=datamodel,
1660
+ )
1661
+ return aptr
1662
+
1663
+ def incref(self, typ, val):
1664
+ if not self.context.enable_nrt:
1665
+ return
1666
+
1667
+ self.context.nrt.incref(self.builder, typ, val)
1668
+
1669
+ def decref(self, typ, val):
1670
+ if not self.context.enable_nrt:
1671
+ return
1672
+
1673
+ # do not associate decref with "use", it creates "jumpy" line info as
1674
+ # the decrefs are usually where the ir.Del nodes are, which is at the
1675
+ # end of the block.
1676
+ with debuginfo.suspend_emission(self.builder):
1677
+ self.context.nrt.decref(self.builder, typ, val)
1678
+
1679
+
1680
+ class CUDALower(Lower):
1681
+ def _is_shared_array_call(self, fnty):
1682
+ # Check if function type is a cuda.shared.array call
1683
+ if not hasattr(fnty, "typing_key"):
1684
+ return False
1685
+ return fnty.typing_key is cuda.shared.array
1686
+
1687
+ def _lower_call_normal(self, fnty, expr, signature):
1688
+ # Set flag for subsequent store to track shared address space
1689
+ if self.context.enable_debuginfo and self._is_shared_array_call(fnty):
1690
+ self._pending_shared_store = True
1691
+
1692
+ return super()._lower_call_normal(fnty, expr, signature)
1693
+
1694
+ def storevar(self, value, name, argidx=None):
1695
+ """
1696
+ Store the value into the given variable.
1697
+ """
1698
+ # Track address space for debug info
1699
+ if self.context.enable_debuginfo and self._pending_shared_store:
1700
+ from numba.cuda.cudadrv import nvvm
1701
+
1702
+ self._addrspace_map[name] = nvvm.ADDRSPACE_SHARED
1703
+ if not name.startswith("$") and not name.startswith("."):
1704
+ self._pending_shared_store = False
1705
+
1706
+ # Handle polymorphic variables with CUDA_DEBUG_POLY enabled
1707
+ if config.CUDA_DEBUG_POLY:
1708
+ src_name = name.split(".")[0]
1709
+ if src_name in self.poly_var_typ_map:
1710
+ # Ensure allocation happens first (if needed)
1711
+ fetype = self.typeof(name)
1712
+ self._alloca_var(name, fetype)
1713
+ # Discriminant and data are located in the same union
1714
+ ptr = self.poly_var_loc_map[src_name]
1715
+ # Firstly write discriminant to the beginning of union as i8
1716
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
1717
+ # Compute discriminant = index of type in sorted union
1718
+ if isinstance(fetype, types.Literal):
1719
+ lookup_type = fetype.literal_type
1720
+ else:
1721
+ lookup_type = fetype
1722
+ discriminant_val = list(dtype.types).index(lookup_type)
1723
+ # Bitcast union pointer directly to i8* and write
1724
+ # discriminant at offset 0
1725
+ discriminant_ptr = self.builder.bitcast(
1726
+ ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
1727
+ )
1728
+ discriminant_i8 = llvm_ir.Constant(
1729
+ llvm_ir.IntType(8), discriminant_val
1730
+ )
1731
+ self.builder.store(discriminant_i8, discriminant_ptr)
1732
+ # Secondly write data at offset = sizeof(fetype) in bytes
1733
+ lltype = self.context.get_value_type(fetype)
1734
+ sizeof_bytes = self.context.get_abi_sizeof(lltype)
1735
+ # Bitcast to i8* and use byte-level GEP
1736
+ byte_ptr = self.builder.bitcast(
1737
+ ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
1738
+ )
1739
+ data_byte_ptr = self.builder.gep(
1740
+ byte_ptr,
1741
+ [llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
1742
+ )
1743
+ # Cast to the correct type pointer
1744
+ castptr = self.builder.bitcast(
1745
+ data_byte_ptr, llvm_ir.PointerType(lltype)
1746
+ )
1747
+ self.builder.store(value, castptr)
1748
+ return
1749
+
1750
+ # For non-polymorphic variables, use parent implementation
1751
+ super().storevar(value, name, argidx)
1752
+
1753
+ # Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
1754
+ # variables immediately after a store instruction.
1755
+ if (
1756
+ self.context.enable_debuginfo
1757
+ # Conditions used to elide stores in parent method
1758
+ and self.store_var_needed(name)
1759
+ ):
1760
+ fetype = self.typeof(name)
1761
+ lltype = self.context.get_value_type(fetype)
1762
+ int_type = (llvm_ir.IntType,)
1763
+ real_type = llvm_ir.FloatType, llvm_ir.DoubleType
1764
+ if isinstance(lltype, int_type + real_type):
1765
+ sizeof = self.context.get_abi_sizeof(lltype)
1766
+ datamodel = self.context.data_model_manager[fetype]
1767
+ line = self.loc.line if argidx is None else self.defn_loc.line
1768
+ if not name.startswith("$"):
1769
+ # Emit debug value for user variable
1770
+ src_name = name.split(".")[0]
1771
+ if src_name not in self.poly_var_typ_map:
1772
+ # Insert the llvm.dbg.value intrinsic call
1773
+ self.debuginfo.update_variable(
1774
+ self.builder,
1775
+ value,
1776
+ src_name,
1777
+ lltype,
1778
+ sizeof,
1779
+ line,
1780
+ datamodel,
1781
+ argidx,
1782
+ )
1783
+ elif isinstance(value, llvm_ir.LoadInstr):
1784
+ # Emit debug value for user variable that falls out of the
1785
+ # coverage of dbg.value range per basic block
1786
+ ld_name = value.operands[0].name
1787
+ if not ld_name.startswith(("$", ".")):
1788
+ src_name = ld_name.split(".")[0]
1789
+ if (
1790
+ src_name not in self.poly_var_typ_map
1791
+ # Not yet covered by the dbg.value range
1792
+ and src_name not in self.dbg_val_names
1793
+ ):
1794
+ for index, item in enumerate(self.fnargs):
1795
+ if item.name == src_name:
1796
+ argidx = index + 1
1797
+ break
1798
+ # Insert the llvm.dbg.value intrinsic call
1799
+ self.debuginfo.update_variable(
1800
+ self.builder,
1801
+ value,
1802
+ src_name,
1803
+ lltype,
1804
+ sizeof,
1805
+ line,
1806
+ datamodel,
1807
+ argidx,
1808
+ )
1809
+
1810
+ def pre_block(self, block):
1811
+ super().pre_block(block)
1812
+
1813
+ # dbg.value range covered names
1814
+ self.dbg_val_names = set()
1815
+
1816
+ if self.context.enable_debuginfo and self._disable_sroa_like_opt:
1817
+ for x in block.find_insts(ir.Assign):
1818
+ if x.target.name.startswith("$"):
1819
+ continue
1820
+ ssa_name = x.target.name
1821
+ src_name = ssa_name.split(".")[0]
1822
+ if src_name not in self.dbg_val_names:
1823
+ self.dbg_val_names.add(src_name)
1824
+
1825
+ def pre_lower(self):
1826
+ """
1827
+ Called before lowering all blocks.
1828
+ """
1829
+ super().pre_lower()
1830
+
1831
+ # Track address space for debug info
1832
+ self._addrspace_map = {}
1833
+ self._pending_shared_store = False
1834
+ if self.context.enable_debuginfo:
1835
+ self.debuginfo._set_addrspace_map(self._addrspace_map)
1836
+
1837
+ # Track polymorphic variables for debug info
1838
+ self.poly_var_typ_map = {}
1839
+ self.poly_var_loc_map = {}
1840
+ self.poly_var_set = set()
1841
+ self.poly_cleaned = False
1842
+ self.lastblk = max(self.blocks.keys())
1843
+
1844
+ # When debug info is enabled, walk through function body and mark
1845
+ # variables with polymorphic types.
1846
+ if self.context.enable_debuginfo and self._disable_sroa_like_opt:
1847
+ poly_map = {}
1848
+ # pre-scan all blocks
1849
+ for block in self.blocks.values():
1850
+ for x in block.find_insts(ir.Assign):
1851
+ if x.target.name.startswith("$"):
1852
+ continue
1853
+ ssa_name = x.target.name
1854
+ src_name = ssa_name.split(".")[0]
1855
+ # Check all the multi-versioned targets
1856
+ if len(x.target.versioned_names) > 0:
1857
+ fetype = self.typeof(ssa_name)
1858
+ if src_name not in poly_map:
1859
+ poly_map[src_name] = set()
1860
+ # deduplicate polymorphic types
1861
+ if isinstance(fetype, types.Literal):
1862
+ fetype = fetype.literal_type
1863
+ poly_map[src_name].add(fetype)
1864
+ # Filter out multi-versioned but single typed variables
1865
+ self.poly_var_typ_map = {
1866
+ k: v for k, v in poly_map.items() if len(v) > 1
1867
+ }
1868
+
1869
+ def _alloca_var(self, name, fetype):
1870
+ """
1871
+ Ensure the given variable has an allocated stack slot (if needed).
1872
+ """
1873
+ # If the name is not handled yet and a store is needed
1874
+ if name not in self.varmap and self.store_var_needed(name):
1875
+ src_name = name.split(".")[0]
1876
+ if src_name in self.poly_var_typ_map:
1877
+ self.poly_var_set.add(name)
1878
+ if src_name not in self.poly_var_loc_map:
1879
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
1880
+ datamodel = self.context.data_model_manager[dtype]
1881
+ # UnionType has sorted set of types, max at last index
1882
+ maxsizetype = dtype.types[-1]
1883
+ if config.CUDA_DEBUG_POLY:
1884
+ # allocate double the max element size to house
1885
+ # [discriminant + data]
1886
+ aggr_type = types.UniTuple(maxsizetype, 2)
1887
+ else:
1888
+ # allocate single element for data only
1889
+ aggr_type = types.UniTuple(maxsizetype, 1)
1890
+ lltype = self.context.get_value_type(aggr_type)
1891
+ ptr = self.alloca_lltype(src_name, lltype, datamodel)
1892
+ # save the location of the union type for polymorphic var
1893
+ self.poly_var_loc_map[src_name] = ptr
1894
+ return
1895
+
1896
+ super()._alloca_var(name, fetype)
1897
+
1898
+ def store_var_needed(self, name):
1899
+ # Check the conditions used to elide stores in parent class,
1900
+ # e.g. in method storevar() and _alloca_var()
1901
+ return (
1902
+ # used in multiple blocks
1903
+ name not in self._singly_assigned_vars
1904
+ # lowering with debuginfo
1905
+ or self._disable_sroa_like_opt
1906
+ )
1907
+
1908
+ def delvar(self, name):
1909
+ """
1910
+ Delete the given variable.
1911
+ """
1912
+ if name in self.poly_var_set:
1913
+ fetype = self.typeof(name)
1914
+ src_name = name.split(".")[0]
1915
+ ptr = self.poly_var_loc_map[src_name]
1916
+ self.decref(fetype, self.builder.load(ptr))
1917
+ if (
1918
+ self._cur_ir_block == self.blocks[self.lastblk]
1919
+ and not self.poly_cleaned
1920
+ ):
1921
+ # Zero-fill the debug union for polymorphic only
1922
+ # at the last block
1923
+ for v in self.poly_var_loc_map.values():
1924
+ self.builder.store(
1925
+ llvm_ir.Constant(v.type.pointee, None), v
1926
+ )
1927
+ self.poly_cleaned = True
1928
+ return
1929
+
1930
+ super().delvar(name)
1931
+
1932
+ def getvar(self, name):
1933
+ """
1934
+ Get a pointer to the given variable's slot.
1935
+ """
1936
+ if name in self.poly_var_set:
1937
+ src_name = name.split(".")[0]
1938
+ fetype = self.typeof(name)
1939
+ lltype = self.context.get_value_type(fetype)
1940
+ ptr = self.poly_var_loc_map[src_name]
1941
+
1942
+ if config.CUDA_DEBUG_POLY:
1943
+ # With CUDA_DEBUG_POLY enabled, read value at
1944
+ # offset = sizeof(fetype) in bytes
1945
+ sizeof_bytes = self.context.get_abi_sizeof(lltype)
1946
+ # Bitcast to i8* and use byte-level GEP
1947
+ byte_ptr = self.builder.bitcast(
1948
+ ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
1949
+ )
1950
+ value_byte_ptr = self.builder.gep(
1951
+ byte_ptr,
1952
+ [llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
1953
+ )
1954
+ # Cast to the correct type pointer
1955
+ castptr = self.builder.bitcast(
1956
+ value_byte_ptr, llvm_ir.PointerType(lltype)
1957
+ )
1958
+ else:
1959
+ # Otherwise, just bitcast to the correct type
1960
+ castptr = self.builder.bitcast(ptr, llvm_ir.PointerType(lltype))
1961
+ return castptr
1962
+ else:
1963
+ return super().getvar(name)
1964
+
1965
+
1966
+ def _lit_or_omitted(value):
1967
+ """Returns a Literal instance if the type of value is supported;
1968
+ otherwise, return `Omitted(value)`.
1969
+ """
1970
+ typing_errors = LiteralTypingError
1971
+ if HAS_NUMBA:
1972
+ from numba.core.errors import (
1973
+ LiteralTypingError as CoreLiteralTypingError,
1974
+ )
1975
+
1976
+ typing_errors = (LiteralTypingError, CoreLiteralTypingError)
1977
+ try:
1978
+ return types.literal(value)
1979
+ except typing_errors:
1980
+ return types.Omitted(value)