numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1787 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import types as pytypes # avoid confusion with numba.types
5
+ import copy
6
+ import ctypes
7
+ import numba.cuda.core.analysis
8
+ from numba.cuda import HAS_NUMBA
9
+ from numba.cuda import types, config, cgutils
10
+ from numba.cuda.core import ir
11
+ from numba.cuda.core import errors
12
+ from numba.cuda import typing, utils
13
+ from numba.cuda.core.ir_utils import (
14
+ next_label,
15
+ add_offset_to_labels,
16
+ replace_vars,
17
+ remove_dels,
18
+ rename_labels,
19
+ find_topo_order,
20
+ merge_adjacent_blocks,
21
+ GuardException,
22
+ require,
23
+ guard,
24
+ get_definition,
25
+ find_callname,
26
+ find_build_sequence,
27
+ get_np_ufunc_typ,
28
+ get_ir_of_code,
29
+ simplify_CFG,
30
+ canonicalize_array_math,
31
+ dead_code_elimination,
32
+ )
33
+
34
+ from numba.cuda.core.analysis import (
35
+ compute_cfg_from_blocks,
36
+ compute_use_defs,
37
+ compute_live_variables,
38
+ )
39
+ from numba.cuda.core.imputils import impl_ret_untracked
40
+ from numba.cuda.extending import intrinsic
41
+ from numba.cuda.typing import signature
42
+
43
+ from numba.cuda.core import postproc, rewrites
44
+ from numba.cuda.np.unsafe.ndarray import empty_inferred as unsafe_empty_inferred
45
+ import numpy as np
46
+ import operator
47
+
48
+ """
49
+ Variable enable_inline_arraycall is only used for testing purpose.
50
+ """
51
+ enable_inline_arraycall = True
52
+
53
+
54
+ def callee_ir_validator(func_ir):
55
+ """Checks the IR of a callee is supported for inlining"""
56
+ for blk in func_ir.blocks.values():
57
+ for stmt in blk.find_insts(ir.Assign):
58
+ if isinstance(stmt.value, ir.Yield):
59
+ msg = "The use of yield in a closure is unsupported."
60
+ raise errors.UnsupportedError(msg, loc=stmt.loc)
61
+
62
+
63
+ def _created_inlined_var_name(function_name, var_name):
64
+ """Creates a name for an inlined variable based on the function name and the
65
+ variable name. It does this "safely" to avoid the use of characters that are
66
+ illegal in python variable names as there are occasions when function
67
+ generation needs valid python name tokens."""
68
+ inlined_name = f"{function_name}.{var_name}"
69
+ # Replace angle brackets, e.g. "<locals>" is replaced with "_locals_"
70
+ new_name = inlined_name.replace("<", "_").replace(">", "_")
71
+ # The version "version" of the closure function e.g. foo$2 (id 2) is
72
+ # rewritten as "foo_v2". Further "." is also replaced with "_".
73
+ new_name = new_name.replace(".", "_").replace("$", "_v")
74
+ return new_name
75
+
76
+
77
+ class InlineClosureCallPass(object):
78
+ """InlineClosureCallPass class looks for direct calls to locally defined
79
+ closures, and inlines the body of the closure function to the call site.
80
+ """
81
+
82
+ def __init__(self, func_ir, parallel_options, swapped=None, typed=False):
83
+ if swapped is None:
84
+ swapped = {}
85
+ self.func_ir = func_ir
86
+ self.parallel_options = parallel_options
87
+ self.swapped = swapped
88
+ self.typed = typed
89
+
90
+ def run(self):
91
+ """Run inline closure call pass."""
92
+ # Analysis relies on ir.Del presence, strip out later
93
+ pp = postproc.PostProcessor(self.func_ir)
94
+ pp.run(True)
95
+
96
+ modified = False
97
+ work_list = list(self.func_ir.blocks.items())
98
+ debug_print = _make_debug_print("InlineClosureCallPass")
99
+ debug_print(f"START {self.func_ir.func_id.func_qualname}")
100
+ while work_list:
101
+ _label, block = work_list.pop()
102
+ for i, instr in enumerate(block.body):
103
+ if isinstance(instr, ir.Assign):
104
+ expr = instr.value
105
+ if isinstance(expr, ir.Expr) and expr.op == "call":
106
+ call_name = guard(find_callname, self.func_ir, expr)
107
+ func_def = guard(
108
+ get_definition, self.func_ir, expr.func
109
+ )
110
+
111
+ if guard(
112
+ self._inline_reduction,
113
+ work_list,
114
+ block,
115
+ i,
116
+ expr,
117
+ call_name,
118
+ ):
119
+ modified = True
120
+ break # because block structure changed
121
+
122
+ if guard(
123
+ self._inline_closure, work_list, block, i, func_def
124
+ ):
125
+ modified = True
126
+ break # because block structure changed
127
+
128
+ if enable_inline_arraycall:
129
+ # Identify loop structure
130
+ if modified:
131
+ # Need to do some cleanups if closure inlining kicked in
132
+ merge_adjacent_blocks(self.func_ir.blocks)
133
+ cfg = compute_cfg_from_blocks(self.func_ir.blocks)
134
+ debug_print("start inline arraycall")
135
+ _debug_dump(cfg)
136
+ loops = cfg.loops()
137
+ sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
138
+ visited = []
139
+ # We go over all loops, bigger loops first (outer first)
140
+ for k, s in sorted(
141
+ sized_loops, key=lambda tup: tup[1], reverse=True
142
+ ):
143
+ visited.append(k)
144
+ if guard(
145
+ _inline_arraycall,
146
+ self.func_ir,
147
+ cfg,
148
+ visited,
149
+ loops[k],
150
+ self.swapped,
151
+ self.parallel_options.comprehension,
152
+ self.typed,
153
+ ):
154
+ modified = True
155
+ if modified:
156
+ _fix_nested_array(self.func_ir)
157
+
158
+ if modified:
159
+ # clean up now dead/unreachable blocks, e.g. unconditionally raising
160
+ # an exception in an inlined function would render some parts of the
161
+ # inliner unreachable
162
+ cfg = compute_cfg_from_blocks(self.func_ir.blocks)
163
+ for dead in cfg.dead_nodes():
164
+ del self.func_ir.blocks[dead]
165
+
166
+ # run dead code elimination
167
+ dead_code_elimination(self.func_ir)
168
+ # do label renaming
169
+ self.func_ir.blocks = rename_labels(self.func_ir.blocks)
170
+
171
+ # inlining done, strip dels
172
+ remove_dels(self.func_ir.blocks)
173
+
174
+ debug_print("END")
175
+
176
+ def _inline_reduction(self, work_list, block, i, expr, call_name):
177
+ # only inline reduction in sequential execution, parallel handling
178
+ # is done in ParforPass.
179
+ require(not self.parallel_options.reduction)
180
+ require(
181
+ call_name == ("reduce", "builtins")
182
+ or call_name == ("reduce", "_functools")
183
+ )
184
+ if len(expr.args) not in (2, 3):
185
+ raise TypeError(
186
+ "invalid reduce call, "
187
+ "two arguments are required (optional initial "
188
+ "value can also be specified)"
189
+ )
190
+ check_reduce_func(self.func_ir, expr.args[0])
191
+
192
+ def reduce_func(f, A, v=None):
193
+ it = iter(A)
194
+ if v is not None:
195
+ s = v
196
+ else:
197
+ s = next(it)
198
+ for a in it:
199
+ s = f(s, a)
200
+ return s
201
+
202
+ inline_closure_call(
203
+ self.func_ir,
204
+ self.func_ir.func_id.func.__globals__,
205
+ block,
206
+ i,
207
+ reduce_func,
208
+ work_list=work_list,
209
+ callee_validator=callee_ir_validator,
210
+ )
211
+ return True
212
+
213
+ def _inline_closure(self, work_list, block, i, func_def):
214
+ require(
215
+ isinstance(func_def, ir.Expr) and func_def.op == "make_function"
216
+ )
217
+ inline_closure_call(
218
+ self.func_ir,
219
+ self.func_ir.func_id.func.__globals__,
220
+ block,
221
+ i,
222
+ func_def,
223
+ work_list=work_list,
224
+ callee_validator=callee_ir_validator,
225
+ )
226
+ return True
227
+
228
+
229
+ def check_reduce_func(func_ir, func_var):
230
+ """Checks the function at func_var in func_ir to make sure it's amenable
231
+ for inlining. Returns the function itself"""
232
+ reduce_func = guard(get_definition, func_ir, func_var)
233
+ if reduce_func is None:
234
+ raise ValueError(
235
+ "Reduce function cannot be found for njit \
236
+ analysis"
237
+ )
238
+ if isinstance(reduce_func, (ir.FreeVar, ir.Global)):
239
+ if HAS_NUMBA:
240
+ from numba.core.registry import CPUDispatcher
241
+
242
+ if not isinstance(reduce_func.value, CPUDispatcher):
243
+ raise ValueError("Invalid reduction function")
244
+
245
+ # pull out the python function for inlining
246
+ reduce_func = reduce_func.value.py_func
247
+ elif not (hasattr(reduce_func, "code") or hasattr(reduce_func, "__code__")):
248
+ raise ValueError("Invalid reduction function")
249
+ f_code = (
250
+ reduce_func.code
251
+ if hasattr(reduce_func, "code")
252
+ else reduce_func.__code__
253
+ )
254
+ if not f_code.co_argcount == 2:
255
+ raise TypeError("Reduction function should take 2 arguments")
256
+ return reduce_func
257
+
258
+
259
+ class InlineWorker(object):
260
+ """A worker class for inlining, this is a more advanced version of
261
+ `inline_closure_call` in that it permits inlining from function type, Numba
262
+ IR and code object. It also, runs the entire untyped compiler pipeline on
263
+ the inlinee to ensure that it is transformed as though it were compiled
264
+ directly.
265
+ """
266
+
267
+ def __init__(
268
+ self,
269
+ typingctx=None,
270
+ targetctx=None,
271
+ locals=None,
272
+ pipeline=None,
273
+ flags=None,
274
+ validator=callee_ir_validator,
275
+ typemap=None,
276
+ calltypes=None,
277
+ ):
278
+ """
279
+ Instantiate a new InlineWorker, all arguments are optional though some
280
+ must be supplied together for certain use cases. The methods will refuse
281
+ to run if the object isn't configured in the manner needed. Args are the
282
+ same as those in a numba.core.Compiler.state, except the validator which
283
+ is a function taking Numba IR and validating it for use when inlining
284
+ (this is optional and really to just provide better error messages about
285
+ things which the inliner cannot handle like yield in closure).
286
+ """
287
+
288
+ def check(arg, name):
289
+ if arg is None:
290
+ raise TypeError("{} must not be None".format(name))
291
+
292
+ from numba.cuda.compiler import DefaultPassBuilder
293
+
294
+ # check the stuff needed to run the more advanced compilation pipeline
295
+ # is valid if any of it is provided
296
+ compiler_args = (targetctx, locals, pipeline, flags)
297
+ compiler_group = [x is not None for x in compiler_args]
298
+ if any(compiler_group) and not all(compiler_group):
299
+ check(targetctx, "targetctx")
300
+ check(locals, "locals")
301
+ check(pipeline, "pipeline")
302
+ check(flags, "flags")
303
+ elif all(compiler_group):
304
+ check(typingctx, "typingctx")
305
+
306
+ self._compiler_pipeline = DefaultPassBuilder.define_untyped_pipeline
307
+
308
+ self.typingctx = typingctx
309
+ self.targetctx = targetctx
310
+ self.locals = locals
311
+ self.pipeline = pipeline
312
+ self.flags = flags
313
+ self.validator = validator
314
+ self.debug_print = _make_debug_print("InlineWorker")
315
+
316
+ # check whether this inliner can also support typemap and calltypes
317
+ # update and if what's provided is valid
318
+ pair = (typemap, calltypes)
319
+ pair_is_none = [x is None for x in pair]
320
+ if any(pair_is_none) and not all(pair_is_none):
321
+ msg = (
322
+ "typemap and calltypes must both be either None or have a "
323
+ "value, got: %s, %s"
324
+ )
325
+ raise TypeError(msg % pair)
326
+ self._permit_update_type_and_call_maps = not all(pair_is_none)
327
+ self.typemap = typemap
328
+ self.calltypes = calltypes
329
+
330
+ def inline_ir(
331
+ self, caller_ir, block, i, callee_ir, callee_freevars, arg_typs=None
332
+ ):
333
+ """Inlines the callee_ir in the caller_ir at statement index i of block
334
+ `block`, callee_freevars are the free variables for the callee_ir. If
335
+ the callee_ir is derived from a function `func` then this is
336
+ `func.__code__.co_freevars`. If `arg_typs` is given and the InlineWorker
337
+ instance was initialized with a typemap and calltypes then they will be
338
+ appropriately updated based on the arg_typs.
339
+ """
340
+
341
+ # Always copy the callee IR, it gets mutated
342
+ def copy_ir(the_ir):
343
+ kernel_copy = the_ir.copy()
344
+ kernel_copy.blocks = {}
345
+ for block_label, block in the_ir.blocks.items():
346
+ new_block = copy.deepcopy(the_ir.blocks[block_label])
347
+ kernel_copy.blocks[block_label] = new_block
348
+ return kernel_copy
349
+
350
+ callee_ir = copy_ir(callee_ir)
351
+
352
+ # check that the contents of the callee IR is something that can be
353
+ # inlined if a validator is present
354
+ if self.validator is not None:
355
+ self.validator(callee_ir)
356
+
357
+ # save an unmutated copy of the callee_ir to return
358
+ callee_ir_original = copy_ir(callee_ir)
359
+ scope = block.scope
360
+ instr = block.body[i]
361
+ call_expr = instr.value
362
+ callee_blocks = callee_ir.blocks
363
+ from numba.cuda.core import ir_utils
364
+
365
+ # 1. relabel callee_ir by adding an offset
366
+ max_label = max(
367
+ ir_utils._the_max_label.next(),
368
+ max(caller_ir.blocks.keys()),
369
+ )
370
+ callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1)
371
+ callee_blocks = simplify_CFG(callee_blocks)
372
+ callee_ir.blocks = callee_blocks
373
+ min_label = min(callee_blocks.keys())
374
+ max_label = max(callee_blocks.keys())
375
+ # reset globals in ir_utils before we use it
376
+ ir_utils._the_max_label.update(max_label)
377
+ self.debug_print("After relabel")
378
+ _debug_dump(callee_ir)
379
+
380
+ # 2. rename all local variables in callee_ir with new locals created in
381
+ # caller_ir
382
+ callee_scopes = _get_all_scopes(callee_blocks)
383
+ self.debug_print("callee_scopes = ", callee_scopes)
384
+ # one function should only have one local scope
385
+ assert len(callee_scopes) == 1
386
+ callee_scope = callee_scopes[0]
387
+ var_dict = {}
388
+ for var in tuple(callee_scope.localvars._con.values()):
389
+ if var.name not in callee_freevars:
390
+ inlined_name = _created_inlined_var_name(
391
+ callee_ir.func_id.unique_name, var.name
392
+ )
393
+ # Update the caller scope with the new names
394
+ new_var = scope.redefine(inlined_name, loc=var.loc)
395
+ # Also update the callee scope with the new names. Should the
396
+ # type and call maps need updating (which requires SSA form) the
397
+ # transformation to SSA is valid as the IR object is internally
398
+ # consistent.
399
+ callee_scope.redefine(inlined_name, loc=var.loc)
400
+ var_dict[var.name] = new_var
401
+ self.debug_print("var_dict = ", var_dict)
402
+ replace_vars(callee_blocks, var_dict)
403
+ self.debug_print("After local var rename")
404
+ _debug_dump(callee_ir)
405
+
406
+ # 3. replace formal parameters with actual arguments
407
+ callee_func = callee_ir.func_id.func
408
+ args = _get_callee_args(
409
+ call_expr, callee_func, block.body[i].loc, caller_ir
410
+ )
411
+
412
+ # 4. Update typemap
413
+ if self._permit_update_type_and_call_maps:
414
+ if arg_typs is None:
415
+ raise TypeError("arg_typs should have a value not None")
416
+ self.update_type_and_call_maps(callee_ir, arg_typs)
417
+ # update_type_and_call_maps replaces blocks
418
+ callee_blocks = callee_ir.blocks
419
+
420
+ self.debug_print("After arguments rename: ")
421
+ _debug_dump(callee_ir)
422
+
423
+ _replace_args_with(callee_blocks, args)
424
+ # 5. split caller blocks into two
425
+ new_blocks = []
426
+ new_block = ir.Block(scope, block.loc)
427
+ new_block.body = block.body[i + 1 :]
428
+ new_label = next_label()
429
+ caller_ir.blocks[new_label] = new_block
430
+ new_blocks.append((new_label, new_block))
431
+ block.body = block.body[:i]
432
+ block.body.append(ir.Jump(min_label, instr.loc))
433
+
434
+ # 6. replace Return with assignment to LHS
435
+ topo_order = find_topo_order(callee_blocks)
436
+ _replace_returns(callee_blocks, instr.target, new_label)
437
+
438
+ # remove the old definition of instr.target too
439
+ if (
440
+ instr.target.name in caller_ir._definitions
441
+ and call_expr in caller_ir._definitions[instr.target.name]
442
+ ):
443
+ # NOTE: target can have multiple definitions due to control flow
444
+ caller_ir._definitions[instr.target.name].remove(call_expr)
445
+
446
+ # 7. insert all new blocks, and add back definitions
447
+ for label in topo_order:
448
+ # block scope must point to parent's
449
+ block = callee_blocks[label]
450
+ block.scope = scope
451
+ _add_definitions(caller_ir, block)
452
+ caller_ir.blocks[label] = block
453
+ new_blocks.append((label, block))
454
+ self.debug_print("After merge in")
455
+ _debug_dump(caller_ir)
456
+
457
+ return callee_ir_original, callee_blocks, var_dict, new_blocks
458
+
459
+ def inline_function(self, caller_ir, block, i, function, arg_typs=None):
460
+ """Inlines the function in the caller_ir at statement index i of block
461
+ `block`. If `arg_typs` is given and the InlineWorker instance was
462
+ initialized with a typemap and calltypes then they will be appropriately
463
+ updated based on the arg_typs.
464
+ """
465
+ callee_ir = self.run_untyped_passes(function)
466
+ freevars = function.__code__.co_freevars
467
+ return self.inline_ir(
468
+ caller_ir, block, i, callee_ir, freevars, arg_typs=arg_typs
469
+ )
470
+
471
+ def run_untyped_passes(self, func, enable_ssa=False):
472
+ """
473
+ Run the compiler frontend's untyped passes over the given Python
474
+ function, and return the function's canonical Numba IR.
475
+
476
+ Disable SSA transformation by default, since the call site won't be in
477
+ SSA form and self.inline_ir depends on this being the case.
478
+ """
479
+ from numba.cuda.core.compiler import StateDict, _CompileStatus
480
+ from numba.cuda.core.untyped_passes import ExtractByteCode
481
+ from numba.cuda.core import bytecode
482
+
483
+ state = StateDict()
484
+ state.func_ir = None
485
+ state.typingctx = self.typingctx
486
+ state.targetctx = self.targetctx
487
+ state.locals = self.locals
488
+ state.pipeline = self.pipeline
489
+ state.flags = self.flags
490
+ state.flags.enable_ssa = enable_ssa
491
+
492
+ state.func_id = bytecode.FunctionIdentity.from_function(func)
493
+
494
+ state.typemap = None
495
+ state.calltypes = None
496
+ state.type_annotation = None
497
+ state.status = _CompileStatus(False)
498
+ state.return_type = None
499
+ state.metadata = {}
500
+
501
+ ExtractByteCode().run_pass(state)
502
+ # This is a lie, just need *some* args for the case where an obj mode
503
+ # with lift is needed
504
+ state.args = len(state.bc.func_id.pysig.parameters) * (types.pyobject,)
505
+
506
+ pm = self._compiler_pipeline(state)
507
+
508
+ pm.finalize()
509
+ pm.run(state)
510
+ return state.func_ir
511
+
512
+ def update_type_and_call_maps(self, callee_ir, arg_typs):
513
+ """Updates the type and call maps based on calling callee_ir with
514
+ arguments from arg_typs"""
515
+ from numba.cuda.core.ssa import reconstruct_ssa
516
+ from numba.cuda.core.typed_passes import PreLowerStripPhis
517
+
518
+ if not self._permit_update_type_and_call_maps:
519
+ msg = (
520
+ "InlineWorker instance not configured correctly, typemap or "
521
+ "calltypes missing in initialization."
522
+ )
523
+ raise ValueError(msg)
524
+ from numba.cuda.core import typed_passes, ir_utils
525
+
526
+ # call branch pruning to simplify IR and avoid inference errors
527
+ callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
528
+ numba.cuda.core.analysis.dead_branch_prune(callee_ir, arg_typs)
529
+ # callee's typing may require SSA
530
+ callee_ir = reconstruct_ssa(callee_ir)
531
+ callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
532
+ [f_typemap, _f_return_type, f_calltypes, _] = (
533
+ typed_passes.type_inference_stage(
534
+ self.typingctx,
535
+ self.targetctx,
536
+ callee_ir,
537
+ arg_typs,
538
+ None,
539
+ )
540
+ )
541
+ callee_ir = PreLowerStripPhis()._strip_phi_nodes(callee_ir)
542
+ callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
543
+ canonicalize_array_math(
544
+ callee_ir, f_typemap, f_calltypes, self.typingctx
545
+ )
546
+ # remove argument entries like arg.a from typemap
547
+ arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
548
+ for a in arg_names:
549
+ f_typemap.pop(a)
550
+ self.typemap.update(f_typemap)
551
+ self.calltypes.update(f_calltypes)
552
+
553
+
554
+ def inline_closure_call(
555
+ func_ir,
556
+ glbls,
557
+ block,
558
+ i,
559
+ callee,
560
+ typingctx=None,
561
+ targetctx=None,
562
+ arg_typs=None,
563
+ typemap=None,
564
+ calltypes=None,
565
+ work_list=None,
566
+ callee_validator=None,
567
+ replace_freevars=True,
568
+ ):
569
+ """Inline the body of `callee` at its callsite (`i`-th instruction of
570
+ `block`)
571
+
572
+ `func_ir` is the func_ir object of the caller function and `glbls` is its
573
+ global variable environment (func_ir.func_id.func.__globals__).
574
+ `block` is the IR block of the callsite and `i` is the index of the
575
+ callsite's node. `callee` is either the called function or a
576
+ make_function node. `typingctx`, `typemap` and `calltypes` are typing
577
+ data structures of the caller, available if we are in a typed pass.
578
+ `arg_typs` includes the types of the arguments at the callsite.
579
+ `callee_validator` is an optional callable which can be used to validate the
580
+ IR of the callee to ensure that it contains IR supported for inlining, it
581
+ takes one argument, the func_ir of the callee
582
+
583
+ Returns IR blocks of the callee and the variable renaming dictionary used
584
+ for them to facilitate further processing of new blocks.
585
+ """
586
+ scope = block.scope
587
+ instr = block.body[i]
588
+ call_expr = instr.value
589
+ debug_print = _make_debug_print("inline_closure_call")
590
+ debug_print("Found closure call: ", instr, " with callee = ", callee)
591
+ # support both function object and make_function Expr
592
+ callee_code = callee.code if hasattr(callee, "code") else callee.__code__
593
+ callee_closure = (
594
+ callee.closure if hasattr(callee, "closure") else callee.__closure__
595
+ )
596
+ from numba.cuda.core import ir_utils
597
+
598
+ # first, get the IR of the callee
599
+ if isinstance(callee, pytypes.FunctionType):
600
+ from numba.cuda.compiler import run_frontend
601
+
602
+ callee_ir = run_frontend(callee, inline_closures=True)
603
+ else:
604
+ callee_ir = get_ir_of_code(glbls, callee_code)
605
+
606
+ # check that the contents of the callee IR is something that can be inlined
607
+ # if a validator is supplied
608
+ if callee_validator is not None:
609
+ callee_validator(callee_ir)
610
+
611
+ callee_blocks = callee_ir.blocks
612
+
613
+ # 1. relabel callee_ir by adding an offset
614
+ max_label = max(ir_utils._the_max_label.next(), max(func_ir.blocks.keys()))
615
+ callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1)
616
+ callee_blocks = simplify_CFG(callee_blocks)
617
+ callee_ir.blocks = callee_blocks
618
+ min_label = min(callee_blocks.keys())
619
+ max_label = max(callee_blocks.keys())
620
+ # reset globals in ir_utils before we use it
621
+ ir_utils._the_max_label.update(max_label)
622
+ debug_print("After relabel")
623
+ _debug_dump(callee_ir)
624
+
625
+ # 2. rename all local variables in callee_ir with new locals created in
626
+ # func_ir
627
+ callee_scopes = _get_all_scopes(callee_blocks)
628
+ debug_print("callee_scopes = ", callee_scopes)
629
+ # one function should only have one local scope
630
+ assert len(callee_scopes) == 1
631
+ callee_scope = callee_scopes[0]
632
+ var_dict = {}
633
+ for var in callee_scope.localvars._con.values():
634
+ if var.name not in callee_code.co_freevars:
635
+ inlined_name = _created_inlined_var_name(
636
+ callee_ir.func_id.unique_name, var.name
637
+ )
638
+ new_var = scope.redefine(inlined_name, loc=var.loc)
639
+ var_dict[var.name] = new_var
640
+ debug_print("var_dict = ", var_dict)
641
+ replace_vars(callee_blocks, var_dict)
642
+ debug_print("After local var rename")
643
+ _debug_dump(callee_ir)
644
+
645
+ # 3. replace formal parameters with actual arguments
646
+ args = _get_callee_args(call_expr, callee, block.body[i].loc, func_ir)
647
+
648
+ debug_print("After arguments rename: ")
649
+ _debug_dump(callee_ir)
650
+
651
+ # 4. replace freevar with actual closure var
652
+ if callee_closure and replace_freevars:
653
+ closure = func_ir.get_definition(callee_closure)
654
+ debug_print("callee's closure = ", closure)
655
+ if isinstance(closure, tuple):
656
+ cellget = ctypes.pythonapi.PyCell_Get
657
+ cellget.restype = ctypes.py_object
658
+ cellget.argtypes = (ctypes.py_object,)
659
+ items = tuple(cellget(x) for x in closure)
660
+ else:
661
+ assert isinstance(closure, ir.Expr) and closure.op == "build_tuple"
662
+ items = closure.items
663
+ assert len(callee_code.co_freevars) == len(items)
664
+ _replace_freevars(callee_blocks, items)
665
+ debug_print("After closure rename")
666
+ _debug_dump(callee_ir)
667
+
668
+ if typingctx:
669
+ from numba.cuda.core import typed_passes
670
+
671
+ # call branch pruning to simplify IR and avoid inference errors
672
+ callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
673
+ numba.cuda.core.analysis.dead_branch_prune(callee_ir, arg_typs)
674
+ try:
675
+ [f_typemap, f_return_type, f_calltypes, _] = (
676
+ typed_passes.type_inference_stage(
677
+ typingctx, targetctx, callee_ir, arg_typs, None
678
+ )
679
+ )
680
+ except Exception:
681
+ [f_typemap, f_return_type, f_calltypes, _] = (
682
+ typed_passes.type_inference_stage(
683
+ typingctx, targetctx, callee_ir, arg_typs, None
684
+ )
685
+ )
686
+ canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx)
687
+ # remove argument entries like arg.a from typemap
688
+ arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
689
+ for a in arg_names:
690
+ f_typemap.pop(a)
691
+ typemap.update(f_typemap)
692
+ calltypes.update(f_calltypes)
693
+
694
+ _replace_args_with(callee_blocks, args)
695
+ # 5. split caller blocks into two
696
+ new_blocks = []
697
+ new_block = ir.Block(scope, block.loc)
698
+ new_block.body = block.body[i + 1 :]
699
+ new_label = next_label()
700
+ func_ir.blocks[new_label] = new_block
701
+ new_blocks.append((new_label, new_block))
702
+ block.body = block.body[:i]
703
+ block.body.append(ir.Jump(min_label, instr.loc))
704
+
705
+ # 6. replace Return with assignment to LHS
706
+ topo_order = find_topo_order(callee_blocks)
707
+ _replace_returns(callee_blocks, instr.target, new_label)
708
+
709
+ # remove the old definition of instr.target too
710
+ if (
711
+ instr.target.name in func_ir._definitions
712
+ and call_expr in func_ir._definitions[instr.target.name]
713
+ ):
714
+ # NOTE: target can have multiple definitions due to control flow
715
+ func_ir._definitions[instr.target.name].remove(call_expr)
716
+
717
+ # 7. insert all new blocks, and add back definitions
718
+ for label in topo_order:
719
+ # block scope must point to parent's
720
+ block = callee_blocks[label]
721
+ block.scope = scope
722
+ _add_definitions(func_ir, block)
723
+ func_ir.blocks[label] = block
724
+ new_blocks.append((label, block))
725
+ debug_print("After merge in")
726
+ _debug_dump(func_ir)
727
+
728
+ if work_list is not None:
729
+ for block in new_blocks:
730
+ work_list.append(block)
731
+ return callee_blocks, var_dict
732
+
733
+
734
+ def _get_callee_args(call_expr, callee, loc, func_ir):
735
+ """Get arguments for calling 'callee', including the default arguments.
736
+ keyword arguments are currently only handled when 'callee' is a function.
737
+ """
738
+ from numba.cuda.core import ir_utils
739
+
740
+ if call_expr.op == "call":
741
+ args = list(call_expr.args)
742
+ if call_expr.vararg:
743
+ msg = "Calling a closure with *args is unsupported."
744
+ raise errors.UnsupportedError(msg, call_expr.loc)
745
+ elif call_expr.op == "getattr":
746
+ args = [call_expr.value]
747
+ elif ir_utils.is_operator_or_getitem(call_expr):
748
+ args = call_expr.list_vars()
749
+ else:
750
+ raise TypeError("Unsupported ir.Expr.{}".format(call_expr.op))
751
+
752
+ debug_print = _make_debug_print("inline_closure_call default handling")
753
+
754
+ # handle defaults and kw arguments using pysignature if callee is function
755
+ if isinstance(callee, pytypes.FunctionType):
756
+ pysig = utils.pysignature(callee)
757
+ normal_handler = lambda index, param, default: default
758
+ default_handler = lambda index, param, default: ir.Const(default, loc)
759
+
760
+ # Throw error for stararg
761
+ # TODO: handle stararg
762
+ def stararg_handler(index, param, default):
763
+ raise NotImplementedError(
764
+ "Stararg not supported in inliner for arg {} {}".format(
765
+ index, param
766
+ )
767
+ )
768
+
769
+ if call_expr.op == "call":
770
+ kws = dict(call_expr.kws)
771
+ else:
772
+ kws = {}
773
+ return numba.cuda.typing.fold_arguments(
774
+ pysig, args, kws, normal_handler, default_handler, stararg_handler
775
+ )
776
+ else:
777
+ # TODO: handle arguments for make_function case similar to function
778
+ # case above
779
+ callee_defaults = (
780
+ callee.defaults
781
+ if hasattr(callee, "defaults")
782
+ else callee.__defaults__
783
+ )
784
+ if callee_defaults:
785
+ debug_print("defaults = ", callee_defaults)
786
+ if isinstance(callee_defaults, tuple): # Python 3.5
787
+ defaults_list = []
788
+ for x in callee_defaults:
789
+ if isinstance(x, ir.Var):
790
+ defaults_list.append(x)
791
+ else:
792
+ # this branch is predominantly for kwargs from
793
+ # inlinable functions
794
+ defaults_list.append(ir.Const(value=x, loc=loc))
795
+ args = args + defaults_list
796
+ elif isinstance(callee_defaults, ir.Var) or isinstance(
797
+ callee_defaults, str
798
+ ):
799
+ default_tuple = func_ir.get_definition(callee_defaults)
800
+ assert isinstance(default_tuple, ir.Expr)
801
+ assert default_tuple.op == "build_tuple"
802
+ const_vals = [
803
+ func_ir.get_definition(x) for x in default_tuple.items
804
+ ]
805
+ args = args + const_vals
806
+ else:
807
+ raise NotImplementedError(
808
+ "Unsupported defaults to make_function: {}".format(
809
+ callee_defaults
810
+ )
811
+ )
812
+ return args
813
+
814
+
815
+ def _make_debug_print(prefix):
816
+ def debug_print(*args):
817
+ if config.DEBUG_INLINE_CLOSURE:
818
+ print(prefix + ": " + "".join(str(x) for x in args))
819
+
820
+ return debug_print
821
+
822
+
823
+ def _debug_dump(func_ir):
824
+ if config.DEBUG_INLINE_CLOSURE:
825
+ func_ir.dump()
826
+
827
+
828
+ def _get_all_scopes(blocks):
829
+ """Get all block-local scopes from an IR."""
830
+ all_scopes = []
831
+ for label, block in blocks.items():
832
+ if block.scope not in all_scopes:
833
+ all_scopes.append(block.scope)
834
+ return all_scopes
835
+
836
+
837
+ def _replace_args_with(blocks, args):
838
+ """
839
+ Replace ir.Arg(...) with real arguments from call site
840
+ """
841
+ for label, block in blocks.items():
842
+ assigns = block.find_insts(ir.Assign)
843
+ for stmt in assigns:
844
+ if isinstance(stmt.value, ir.Arg):
845
+ idx = stmt.value.index
846
+ assert idx < len(args)
847
+ stmt.value = args[idx]
848
+
849
+
850
+ def _replace_freevars(blocks, args):
851
+ """
852
+ Replace ir.FreeVar(...) with real variables from parent function
853
+ """
854
+ for label, block in blocks.items():
855
+ assigns = block.find_insts(ir.Assign)
856
+ for stmt in assigns:
857
+ if isinstance(stmt.value, ir.FreeVar):
858
+ idx = stmt.value.index
859
+ assert idx < len(args)
860
+ if isinstance(args[idx], ir.Var):
861
+ stmt.value = args[idx]
862
+ else:
863
+ stmt.value = ir.Const(args[idx], stmt.loc)
864
+
865
+
866
+ def _replace_returns(blocks, target, return_label):
867
+ """
868
+ Return return statement by assigning directly to target, and a jump.
869
+ """
870
+ for label, block in blocks.items():
871
+ casts = []
872
+ for i in range(len(block.body)):
873
+ stmt = block.body[i]
874
+ if isinstance(stmt, ir.Return):
875
+ assert i + 1 == len(block.body)
876
+ block.body[i] = ir.Assign(stmt.value, target, stmt.loc)
877
+ block.body.append(ir.Jump(return_label, stmt.loc))
878
+ # remove cast of the returned value
879
+ for cast in casts:
880
+ if cast.target.name == stmt.value.name:
881
+ cast.value = cast.value.value
882
+ elif (
883
+ isinstance(stmt, ir.Assign)
884
+ and isinstance(stmt.value, ir.Expr)
885
+ and stmt.value.op == "cast"
886
+ ):
887
+ casts.append(stmt)
888
+
889
+
890
+ def _add_definitions(func_ir, block):
891
+ """
892
+ Add variable definitions found in a block to parent func_ir.
893
+ """
894
+ definitions = func_ir._definitions
895
+ assigns = block.find_insts(ir.Assign)
896
+ for stmt in assigns:
897
+ definitions[stmt.target.name].append(stmt.value)
898
+
899
+
900
+ def _find_arraycall(func_ir, block):
901
+ """Look for statement like "x = numpy.array(y)" or "x[..] = y"
902
+ immediately after the closure call that creates list y (the i-th
903
+ statement in block). Return the statement index if found, or
904
+ raise GuardException.
905
+ """
906
+ array_var = None
907
+ list_var_dead_after_array_call = False
908
+ list_var = None
909
+
910
+ i = 0
911
+ while i < len(block.body):
912
+ instr = block.body[i]
913
+ if isinstance(instr, ir.Del):
914
+ # Stop the process if list_var becomes dead
915
+ if list_var and array_var and instr.value == list_var.name:
916
+ list_var_dead_after_array_call = True
917
+ break
918
+ pass
919
+ elif isinstance(instr, ir.Assign):
920
+ # Found array_var = array(list_var)
921
+ lhs = instr.target
922
+ expr = instr.value
923
+ if guard(find_callname, func_ir, expr) == (
924
+ "array",
925
+ "numpy",
926
+ ) and isinstance(expr.args[0], ir.Var):
927
+ list_var = expr.args[0]
928
+ array_var = lhs
929
+ array_stmt_index = i
930
+ array_kws = dict(expr.kws)
931
+ elif (
932
+ isinstance(instr, ir.SetItem)
933
+ and isinstance(instr.value, ir.Var)
934
+ and not list_var
935
+ ):
936
+ list_var = instr.value
937
+ # Found array_var[..] = list_var, the case for nested array
938
+ array_var = instr.target
939
+ array_def = get_definition(func_ir, array_var)
940
+ require(guard(_find_unsafe_empty_inferred, func_ir, array_def))
941
+ array_stmt_index = i
942
+ array_kws = {}
943
+ else:
944
+ # Bail out otherwise
945
+ break
946
+ i = i + 1
947
+ # require array_var is found, and list_var is dead after array_call.
948
+ require(array_var and list_var_dead_after_array_call)
949
+ _make_debug_print("find_array_call")(block.body[array_stmt_index])
950
+ return list_var, array_stmt_index, array_kws
951
+
952
+
953
+ def _find_iter_range(func_ir, range_iter_var, swapped):
954
+ """Find the iterator's actual range if it is either range(n), or
955
+ range(m, n), otherwise return raise GuardException.
956
+ """
957
+ debug_print = _make_debug_print("find_iter_range")
958
+ range_iter_def = get_definition(func_ir, range_iter_var)
959
+ debug_print("range_iter_var = ", range_iter_var, " def = ", range_iter_def)
960
+ require(
961
+ isinstance(range_iter_def, ir.Expr) and range_iter_def.op == "getiter"
962
+ )
963
+ range_var = range_iter_def.value
964
+ range_def = get_definition(func_ir, range_var)
965
+ debug_print("range_var = ", range_var, " range_def = ", range_def)
966
+ require(isinstance(range_def, ir.Expr) and range_def.op == "call")
967
+ func_var = range_def.func
968
+ func_def = get_definition(func_ir, func_var)
969
+ debug_print("func_var = ", func_var, " func_def = ", func_def)
970
+ require(isinstance(func_def, ir.Global) and func_def.value is range)
971
+ nargs = len(range_def.args)
972
+ swapping = [('"array comprehension"', "closure of"), range_def.func.loc]
973
+ if nargs == 1:
974
+ swapped[range_def.func.name] = swapping
975
+ stop = get_definition(func_ir, range_def.args[0], lhs_only=True)
976
+ return (0, range_def.args[0], func_def)
977
+ elif nargs == 2:
978
+ swapped[range_def.func.name] = swapping
979
+ start = get_definition(func_ir, range_def.args[0], lhs_only=True)
980
+ stop = get_definition(func_ir, range_def.args[1], lhs_only=True)
981
+ return (start, stop, func_def)
982
+ else:
983
+ raise GuardException
984
+
985
+
986
+ @intrinsic
987
+ def length_of_iterator(typingctx, val):
988
+ """
989
+ An implementation of len(iter) for internal use.
990
+ Primary use is for array comprehensions (see inline_closurecall).
991
+ """
992
+ if isinstance(val, types.RangeIteratorType):
993
+ val_type = val.yield_type
994
+
995
+ def codegen(context, builder, sig, args):
996
+ (value,) = args
997
+ from numba.cuda.cpython.rangeobj import range_impl_map
998
+
999
+ iter_type = range_impl_map[val_type][1]
1000
+ iterobj = cgutils.create_struct_proxy(iter_type)(
1001
+ context, builder, value
1002
+ )
1003
+ int_type = iterobj.count.type
1004
+ return impl_ret_untracked(
1005
+ context, builder, int_type, builder.load(iterobj.count)
1006
+ )
1007
+
1008
+ return signature(val_type, val), codegen
1009
+ elif isinstance(val, types.ListIter):
1010
+
1011
+ def codegen(context, builder, sig, args):
1012
+ (value,) = args
1013
+ intp_t = context.get_value_type(types.intp)
1014
+ from numba.cuda.cpython.listobj import ListIterInstance
1015
+
1016
+ iterobj = ListIterInstance(context, builder, sig.args[0], value)
1017
+ return impl_ret_untracked(context, builder, intp_t, iterobj.size)
1018
+
1019
+ return signature(types.intp, val), codegen
1020
+ elif isinstance(val, types.ArrayIterator):
1021
+
1022
+ def codegen(context, builder, sig, args):
1023
+ (iterty,) = sig.args
1024
+ (value,) = args
1025
+ intp_t = context.get_value_type(types.intp)
1026
+ iterobj = context.make_helper(builder, iterty, value=value)
1027
+ arrayty = iterty.array_type
1028
+ from numba.cuda.np.arrayobj import make_array
1029
+
1030
+ ary = make_array(arrayty)(context, builder, value=iterobj.array)
1031
+ shape = cgutils.unpack_tuple(builder, ary.shape)
1032
+ # array iterates along the outer dimension
1033
+ return impl_ret_untracked(context, builder, intp_t, shape[0])
1034
+
1035
+ return signature(types.intp, val), codegen
1036
+ elif isinstance(val, types.UniTupleIter):
1037
+
1038
+ def codegen(context, builder, sig, args):
1039
+ (iterty,) = sig.args
1040
+ tuplety = iterty.container
1041
+ intp_t = context.get_value_type(types.intp)
1042
+ count_const = intp_t(tuplety.count)
1043
+ return impl_ret_untracked(context, builder, intp_t, count_const)
1044
+
1045
+ return signature(types.intp, val), codegen
1046
+ else:
1047
+ msg = (
1048
+ "Unsupported iterator found in array comprehension, try "
1049
+ "preallocating the array and filling manually."
1050
+ )
1051
+ raise errors.TypingError(msg)
1052
+
1053
+
1054
+ def _inline_arraycall(
1055
+ func_ir, cfg, visited, loop, swapped, enable_prange=False, typed=False
1056
+ ):
1057
+ """Look for array(list) call in the exit block of a given loop, and turn
1058
+ list operations into array operations in the loop if the following
1059
+ conditions are met:
1060
+ 1. The exit block contains an array call on the list;
1061
+ 2. The list variable is no longer live after array call;
1062
+ 3. The list is created in the loop entry block;
1063
+ 4. The loop is created from an range iterator whose length is known prior
1064
+ to the loop;
1065
+ 5. There is only one list_append operation on the list variable in the
1066
+ loop body;
1067
+ 6. The block that contains list_append dominates the loop head, which
1068
+ ensures list length is the same as loop length;
1069
+ If any condition check fails, no modification will be made to the incoming
1070
+ IR.
1071
+ """
1072
+ debug_print = _make_debug_print("inline_arraycall")
1073
+ # There should only be one loop exit
1074
+ require(len(loop.exits) == 1)
1075
+ exit_block = next(iter(loop.exits))
1076
+ list_var, array_call_index, array_kws = _find_arraycall(
1077
+ func_ir,
1078
+ func_ir.blocks[exit_block],
1079
+ )
1080
+
1081
+ # check if dtype is present in array call
1082
+ dtype_def = None
1083
+ dtype_mod_def = None
1084
+ if "dtype" in array_kws:
1085
+ require(isinstance(array_kws["dtype"], ir.Var))
1086
+ # We require that dtype argument to be a constant of getattr Expr, and
1087
+ # we'll remember its definition for later use.
1088
+ dtype_def = get_definition(func_ir, array_kws["dtype"])
1089
+ require(isinstance(dtype_def, ir.Expr) and dtype_def.op == "getattr")
1090
+ dtype_mod_def = get_definition(func_ir, dtype_def.value)
1091
+
1092
+ list_var_def = get_definition(func_ir, list_var)
1093
+ debug_print("list_var = ", list_var, " def = ", list_var_def)
1094
+ if isinstance(list_var_def, ir.Expr) and list_var_def.op == "cast":
1095
+ list_var_def = get_definition(func_ir, list_var_def.value)
1096
+ # Check if the definition is a build_list
1097
+ require(
1098
+ isinstance(list_var_def, ir.Expr) and list_var_def.op == "build_list"
1099
+ )
1100
+ # The build_list must be empty
1101
+ require(len(list_var_def.items) == 0)
1102
+
1103
+ # Look for list_append in "last" block in loop body, which should be a block
1104
+ # that is a post-dominator of the loop header.
1105
+ list_append_stmts = []
1106
+ for label in loop.body:
1107
+ # We have to consider blocks of this loop, but not sub-loops.
1108
+ # To achieve this, we require the set of "in_loops" of "label" to be
1109
+ # visited loops.
1110
+ in_visited_loops = [l.header in visited for l in cfg.in_loops(label)]
1111
+ if not all(in_visited_loops):
1112
+ continue
1113
+ block = func_ir.blocks[label]
1114
+ debug_print("check loop body block ", label)
1115
+ for stmt in block.find_insts(ir.Assign):
1116
+ expr = stmt.value
1117
+ if isinstance(expr, ir.Expr) and expr.op == "call":
1118
+ func_def = get_definition(func_ir, expr.func)
1119
+ if (
1120
+ isinstance(func_def, ir.Expr)
1121
+ and func_def.op == "getattr"
1122
+ and func_def.attr == "append"
1123
+ ):
1124
+ list_def = get_definition(func_ir, func_def.value)
1125
+ debug_print(
1126
+ "list_def = ", list_def, list_def is list_var_def
1127
+ )
1128
+ if list_def is list_var_def:
1129
+ # found matching append call
1130
+ list_append_stmts.append((label, block, stmt))
1131
+
1132
+ # Require only one list_append, otherwise we won't know the indices
1133
+ require(len(list_append_stmts) == 1)
1134
+ append_block_label, append_block, append_stmt = list_append_stmts[0]
1135
+
1136
+ # Check if append_block (besides loop entry) dominates loop header.
1137
+ # Since CFG doesn't give us this info without loop entry, we approximate
1138
+ # by checking if the predecessor set of the header block is the same
1139
+ # as loop_entries plus append_block, which is certainly more restrictive
1140
+ # than necessary, and can be relaxed if needed.
1141
+ preds = set(l for l, b in cfg.predecessors(loop.header))
1142
+ debug_print("preds = ", preds, (loop.entries | set([append_block_label])))
1143
+ require(preds == (loop.entries | set([append_block_label])))
1144
+
1145
+ # Find iterator in loop header
1146
+ iter_vars = []
1147
+ iter_first_vars = []
1148
+ loop_header = func_ir.blocks[loop.header]
1149
+ for stmt in loop_header.find_insts(ir.Assign):
1150
+ expr = stmt.value
1151
+ if isinstance(expr, ir.Expr):
1152
+ if expr.op == "iternext":
1153
+ iter_def = get_definition(func_ir, expr.value)
1154
+ debug_print("iter_def = ", iter_def)
1155
+ iter_vars.append(expr.value)
1156
+ elif expr.op == "pair_first":
1157
+ iter_first_vars.append(stmt.target)
1158
+
1159
+ # Require only one iterator in loop header
1160
+ require(len(iter_vars) == 1 and len(iter_first_vars) == 1)
1161
+ # variable that holds the iterator object
1162
+ iter_var = iter_vars[0]
1163
+ # variable that holds the value out of iterator
1164
+ iter_first_var = iter_first_vars[0]
1165
+
1166
+ # Final requirement: only one loop entry, and we're going to modify it by:
1167
+ # 1. replacing the list definition with an array definition;
1168
+ # 2. adding a counter for the array iteration.
1169
+ require(len(loop.entries) == 1)
1170
+ loop_entry = func_ir.blocks[next(iter(loop.entries))]
1171
+ terminator = loop_entry.terminator
1172
+ scope = loop_entry.scope
1173
+ loc = loop_entry.loc
1174
+ stmts = []
1175
+ removed = []
1176
+
1177
+ def is_removed(val, removed):
1178
+ if isinstance(val, ir.Var):
1179
+ for x in removed:
1180
+ if x.name == val.name:
1181
+ return True
1182
+ return False
1183
+
1184
+ # Skip list construction and skip terminator, add the rest to stmts
1185
+ for i in range(len(loop_entry.body) - 1):
1186
+ stmt = loop_entry.body[i]
1187
+ if isinstance(stmt, ir.Assign) and (
1188
+ stmt.value is list_def or is_removed(stmt.value, removed)
1189
+ ):
1190
+ removed.append(stmt.target)
1191
+ else:
1192
+ stmts.append(stmt)
1193
+ debug_print("removed variables: ", removed)
1194
+
1195
+ # Define an index_var to index the array.
1196
+ # If the range happens to be single step ranges like range(n), or
1197
+ # range(m, n), then the index_var correlates to iterator index; otherwise
1198
+ # we'll have to define a new counter.
1199
+ range_def = guard(_find_iter_range, func_ir, iter_var, swapped)
1200
+ index_var = scope.redefine("index", loc)
1201
+ if range_def and range_def[0] == 0:
1202
+ # iterator starts with 0, index_var can just be iter_first_var
1203
+ index_var = iter_first_var
1204
+ else:
1205
+ # index_var = -1 # starting the index with -1 since it will incremented
1206
+ # in loop header
1207
+ stmts.append(
1208
+ _new_definition(
1209
+ func_ir, index_var, ir.Const(value=-1, loc=loc), loc
1210
+ )
1211
+ )
1212
+
1213
+ # Insert statement to get the size of the loop iterator
1214
+ size_var = scope.redefine("size", loc)
1215
+ if range_def:
1216
+ start, stop, range_func_def = range_def
1217
+ if start == 0:
1218
+ size_val = stop
1219
+ else:
1220
+ size_val = ir.Expr.binop(
1221
+ fn=operator.sub, lhs=stop, rhs=start, loc=loc
1222
+ )
1223
+
1224
+ else:
1225
+ # this doesn't work in objmode as it's effectively untyped
1226
+ if typed:
1227
+ len_func_var = scope.redefine("len_func", loc)
1228
+ stmts.append(
1229
+ _new_definition(
1230
+ func_ir,
1231
+ len_func_var,
1232
+ ir.Global(
1233
+ "length_of_iterator", length_of_iterator, loc=loc
1234
+ ),
1235
+ loc,
1236
+ )
1237
+ )
1238
+ size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc)
1239
+ else:
1240
+ raise GuardException
1241
+
1242
+ stmts.append(_new_definition(func_ir, size_var, size_val, loc))
1243
+
1244
+ size_tuple_var = scope.redefine("size_tuple", loc)
1245
+ stmts.append(
1246
+ _new_definition(
1247
+ func_ir,
1248
+ size_tuple_var,
1249
+ ir.Expr.build_tuple(items=[size_var], loc=loc),
1250
+ loc,
1251
+ )
1252
+ )
1253
+
1254
+ # Insert array allocation
1255
+ array_var = scope.redefine("array", loc)
1256
+ empty_func = scope.redefine("empty_func", loc)
1257
+ if dtype_def and dtype_mod_def:
1258
+ # when dtype is present, we'll call empty with dtype
1259
+ dtype_mod_var = scope.redefine("dtype_mod", loc)
1260
+ dtype_var = scope.redefine("dtype", loc)
1261
+ stmts.append(
1262
+ _new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)
1263
+ )
1264
+ stmts.append(
1265
+ _new_definition(
1266
+ func_ir,
1267
+ dtype_var,
1268
+ ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc),
1269
+ loc,
1270
+ )
1271
+ )
1272
+ stmts.append(
1273
+ _new_definition(
1274
+ func_ir, empty_func, ir.Global("empty", np.empty, loc=loc), loc
1275
+ )
1276
+ )
1277
+ array_kws = [("dtype", dtype_var)]
1278
+ else:
1279
+ # this doesn't work in objmode as it's effectively untyped
1280
+ if typed:
1281
+ # otherwise we'll call unsafe_empty_inferred
1282
+ stmts.append(
1283
+ _new_definition(
1284
+ func_ir,
1285
+ empty_func,
1286
+ ir.Global(
1287
+ "unsafe_empty_inferred", unsafe_empty_inferred, loc=loc
1288
+ ),
1289
+ loc,
1290
+ )
1291
+ )
1292
+ array_kws = []
1293
+ else:
1294
+ raise GuardException
1295
+
1296
+ # array_var = empty_func(size_tuple_var)
1297
+ stmts.append(
1298
+ _new_definition(
1299
+ func_ir,
1300
+ array_var,
1301
+ ir.Expr.call(
1302
+ empty_func, (size_tuple_var,), list(array_kws), loc=loc
1303
+ ),
1304
+ loc,
1305
+ )
1306
+ )
1307
+
1308
+ # Add back removed just in case they are used by something else
1309
+ for var in removed:
1310
+ stmts.append(_new_definition(func_ir, var, array_var, loc))
1311
+
1312
+ # Add back terminator
1313
+ stmts.append(terminator)
1314
+ # Modify loop_entry
1315
+ loop_entry.body = stmts
1316
+
1317
+ if range_def:
1318
+ if range_def[0] != 0:
1319
+ # when range doesn't start from 0, index_var becomes loop index
1320
+ # (iter_first_var) minus an offset (range_def[0])
1321
+ terminator = loop_header.terminator
1322
+ assert isinstance(terminator, ir.Branch)
1323
+ # find the block in the loop body that header jumps to
1324
+ block_id = terminator.truebr
1325
+ blk = func_ir.blocks[block_id]
1326
+ loc = blk.loc
1327
+ blk.body.insert(
1328
+ 0,
1329
+ _new_definition(
1330
+ func_ir,
1331
+ index_var,
1332
+ ir.Expr.binop(
1333
+ fn=operator.sub,
1334
+ lhs=iter_first_var,
1335
+ rhs=range_def[0],
1336
+ loc=loc,
1337
+ ),
1338
+ loc,
1339
+ ),
1340
+ )
1341
+ else:
1342
+ # Insert index_var increment to the end of loop header
1343
+ loc = loop_header.loc
1344
+ terminator = loop_header.terminator
1345
+ stmts = loop_header.body[0:-1]
1346
+ next_index_var = scope.redefine("next_index", loc)
1347
+ one = scope.redefine("one", loc)
1348
+ # one = 1
1349
+ stmts.append(
1350
+ _new_definition(func_ir, one, ir.Const(value=1, loc=loc), loc)
1351
+ )
1352
+ # next_index_var = index_var + 1
1353
+ stmts.append(
1354
+ _new_definition(
1355
+ func_ir,
1356
+ next_index_var,
1357
+ ir.Expr.binop(fn=operator.add, lhs=index_var, rhs=one, loc=loc),
1358
+ loc,
1359
+ )
1360
+ )
1361
+ # index_var = next_index_var
1362
+ stmts.append(_new_definition(func_ir, index_var, next_index_var, loc))
1363
+ stmts.append(terminator)
1364
+ loop_header.body = stmts
1365
+
1366
+ # In append_block, change list_append into array assign
1367
+ for i in range(len(append_block.body)):
1368
+ if append_block.body[i] is append_stmt:
1369
+ debug_print("Replace append with SetItem")
1370
+ append_block.body[i] = ir.SetItem(
1371
+ target=array_var,
1372
+ index=index_var,
1373
+ value=append_stmt.value.args[0],
1374
+ loc=append_stmt.loc,
1375
+ )
1376
+
1377
+ # replace array call, by changing "a = array(b)" to "a = b"
1378
+ stmt = func_ir.blocks[exit_block].body[array_call_index]
1379
+ # stmt can be either array call or SetItem, we only replace array call
1380
+ if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr):
1381
+ stmt.value = array_var
1382
+ func_ir._definitions[stmt.target.name] = [stmt.value]
1383
+
1384
+ return True
1385
+
1386
+
1387
+ def _find_unsafe_empty_inferred(func_ir, expr):
1388
+ unsafe_empty_inferred
1389
+ require(isinstance(expr, ir.Expr) and expr.op == "call")
1390
+ callee = expr.func
1391
+ callee_def = get_definition(func_ir, callee)
1392
+ require(isinstance(callee_def, ir.Global))
1393
+ _make_debug_print("_find_unsafe_empty_inferred")(callee_def.value)
1394
+ return callee_def.value == unsafe_empty_inferred
1395
+
1396
+
1397
+ def _fix_nested_array(func_ir):
1398
+ """Look for assignment like: a[..] = b, where both a and b are numpy arrays,
1399
+ and try to eliminate array b by expanding a with an extra dimension.
1400
+ """
1401
+ blocks = func_ir.blocks
1402
+ cfg = compute_cfg_from_blocks(blocks)
1403
+ usedefs = compute_use_defs(blocks)
1404
+ empty_deadmap = dict([(label, set()) for label in blocks.keys()])
1405
+ livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap)
1406
+
1407
+ def find_array_def(arr):
1408
+ """Find numpy array definition such as
1409
+ arr = numba.unsafe.ndarray.empty_inferred(...).
1410
+ If it is arr = b[...], find array definition of b recursively.
1411
+ """
1412
+ arr_def = get_definition(func_ir, arr)
1413
+ _make_debug_print("find_array_def")(arr, arr_def)
1414
+ if isinstance(arr_def, ir.Expr):
1415
+ if guard(_find_unsafe_empty_inferred, func_ir, arr_def):
1416
+ return arr_def
1417
+ elif arr_def.op == "getitem":
1418
+ return find_array_def(arr_def.value)
1419
+ raise GuardException
1420
+
1421
+ def fix_dependencies(expr, varlist):
1422
+ """Double check if all variables in varlist are defined before
1423
+ expr is used. Try to move constant definition when the check fails.
1424
+ Bails out by raising GuardException if it can't be moved.
1425
+ """
1426
+ debug_print = _make_debug_print("fix_dependencies")
1427
+ for label, block in blocks.items():
1428
+ scope = block.scope
1429
+ body = block.body
1430
+ defined = set()
1431
+ for i in range(len(body)):
1432
+ inst = body[i]
1433
+ if isinstance(inst, ir.Assign):
1434
+ defined.add(inst.target.name)
1435
+ if inst.value is expr:
1436
+ new_varlist = []
1437
+ for var in varlist:
1438
+ # var must be defined before this inst, or live
1439
+ # and not later defined.
1440
+ if var.name in defined or (
1441
+ var.name in livemap[label]
1442
+ and var.name not in usedefs.defmap[label]
1443
+ ):
1444
+ debug_print(var.name, " already defined")
1445
+ new_varlist.append(var)
1446
+ else:
1447
+ debug_print(var.name, " not yet defined")
1448
+ var_def = get_definition(func_ir, var.name)
1449
+ if isinstance(var_def, ir.Const):
1450
+ loc = var.loc
1451
+ new_var = scope.redefine("new_var", loc)
1452
+ new_const = ir.Const(var_def.value, loc)
1453
+ new_vardef = _new_definition(
1454
+ func_ir, new_var, new_const, loc
1455
+ )
1456
+ new_body = []
1457
+ new_body.extend(body[:i])
1458
+ new_body.append(new_vardef)
1459
+ new_body.extend(body[i:])
1460
+ block.body = new_body
1461
+ new_varlist.append(new_var)
1462
+ else:
1463
+ raise GuardException
1464
+ return new_varlist
1465
+ # when expr is not found in block
1466
+ raise GuardException
1467
+
1468
+ def fix_array_assign(stmt):
1469
+ """For assignment like lhs[idx] = rhs, where both lhs and rhs are
1470
+ arrays, do the following:
1471
+ 1. find the definition of rhs, which has to be a call to
1472
+ numba.unsafe.ndarray.empty_inferred
1473
+ 2. find the source array creation for lhs, insert an extra dimension of
1474
+ size of b.
1475
+ 3. replace the definition of
1476
+ rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx]
1477
+ """
1478
+ require(isinstance(stmt, ir.SetItem))
1479
+ require(isinstance(stmt.value, ir.Var))
1480
+ debug_print = _make_debug_print("fix_array_assign")
1481
+ debug_print("found SetItem: ", stmt)
1482
+ lhs = stmt.target
1483
+ # Find the source array creation of lhs
1484
+ lhs_def = find_array_def(lhs)
1485
+ debug_print("found lhs_def: ", lhs_def)
1486
+ rhs_def = get_definition(func_ir, stmt.value)
1487
+ debug_print("found rhs_def: ", rhs_def)
1488
+ require(isinstance(rhs_def, ir.Expr))
1489
+ if rhs_def.op == "cast":
1490
+ rhs_def = get_definition(func_ir, rhs_def.value)
1491
+ require(isinstance(rhs_def, ir.Expr))
1492
+ require(_find_unsafe_empty_inferred(func_ir, rhs_def))
1493
+ # Find the array dimension of rhs
1494
+ dim_def = get_definition(func_ir, rhs_def.args[0])
1495
+ require(isinstance(dim_def, ir.Expr) and dim_def.op == "build_tuple")
1496
+ debug_print("dim_def = ", dim_def)
1497
+ extra_dims = [
1498
+ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items
1499
+ ]
1500
+ debug_print("extra_dims = ", extra_dims)
1501
+ # Expand size tuple when creating lhs_def with extra_dims
1502
+ size_tuple_def = get_definition(func_ir, lhs_def.args[0])
1503
+ require(
1504
+ isinstance(size_tuple_def, ir.Expr)
1505
+ and size_tuple_def.op == "build_tuple"
1506
+ )
1507
+ debug_print("size_tuple_def = ", size_tuple_def)
1508
+ extra_dims = fix_dependencies(size_tuple_def, extra_dims)
1509
+ size_tuple_def.items += extra_dims
1510
+ # In-place modify rhs_def to be getitem
1511
+ rhs_def.op = "getitem"
1512
+ rhs_def.fn = operator.getitem
1513
+ rhs_def.value = get_definition(func_ir, lhs, lhs_only=True)
1514
+ rhs_def.index = stmt.index
1515
+ del rhs_def._kws["func"]
1516
+ del rhs_def._kws["args"]
1517
+ del rhs_def._kws["vararg"]
1518
+ del rhs_def._kws["kws"]
1519
+ # success
1520
+ return True
1521
+
1522
+ for label in find_topo_order(func_ir.blocks):
1523
+ block = func_ir.blocks[label]
1524
+ for stmt in block.body:
1525
+ if guard(fix_array_assign, stmt):
1526
+ block.body.remove(stmt)
1527
+
1528
+
1529
+ def _new_definition(func_ir, var, value, loc):
1530
+ func_ir._definitions[var.name] = [value]
1531
+ return ir.Assign(value=value, target=var, loc=loc)
1532
+
1533
+
1534
+ @rewrites.register_rewrite("after-inference")
1535
+ class RewriteArrayOfConsts(rewrites.Rewrite):
1536
+ """The RewriteArrayOfConsts class is responsible for finding
1537
+ 1D array creations from a constant list, and rewriting it into
1538
+ direct initialization of array elements without creating the list.
1539
+ """
1540
+
1541
+ def __init__(self, state, *args, **kws):
1542
+ self.typingctx = state.typingctx
1543
+ super(RewriteArrayOfConsts, self).__init__(*args, **kws)
1544
+
1545
+ def match(self, func_ir, block, typemap, calltypes):
1546
+ if len(calltypes) == 0:
1547
+ return False
1548
+ self.crnt_block = block
1549
+ self.new_body = guard(
1550
+ _inline_const_arraycall,
1551
+ block,
1552
+ func_ir,
1553
+ self.typingctx,
1554
+ typemap,
1555
+ calltypes,
1556
+ )
1557
+ return self.new_body is not None
1558
+
1559
+ def apply(self):
1560
+ self.crnt_block.body = self.new_body
1561
+ return self.crnt_block
1562
+
1563
+
1564
+ def _inline_const_arraycall(block, func_ir, context, typemap, calltypes):
1565
+ """Look for array(list) call where list is a constant list created by
1566
+ build_list, and turn them into direct array creation and initialization, if
1567
+ the following conditions are met:
1568
+ 1. The build_list call immediate precedes the array call;
1569
+ 2. The list variable is no longer live after array call;
1570
+ If any condition check fails, no modification will be made.
1571
+ """
1572
+ debug_print = _make_debug_print("inline_const_arraycall")
1573
+ scope = block.scope
1574
+
1575
+ def inline_array(array_var, expr, stmts, list_vars, dels):
1576
+ """Check to see if the given "array_var" is created from a list
1577
+ of constants, and try to inline the list definition as array
1578
+ initialization.
1579
+
1580
+ Extra statements produced with be appended to "stmts".
1581
+ """
1582
+ callname = guard(find_callname, func_ir, expr)
1583
+ require(callname and callname[1] == "numpy" and callname[0] == "array")
1584
+ require(expr.args[0].name in list_vars)
1585
+ ret_type = calltypes[expr].return_type
1586
+ require(
1587
+ isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1
1588
+ )
1589
+ loc = expr.loc
1590
+ list_var = expr.args[0]
1591
+ # Get the type of the array to be created.
1592
+ array_typ = typemap[array_var.name]
1593
+ debug_print("inline array_var = ", array_var, " list_var = ", list_var)
1594
+ # Get the element type of the array to be created.
1595
+ dtype = array_typ.dtype
1596
+ # Get the sequence of operations to provide values to the new array.
1597
+ seq, _ = find_build_sequence(func_ir, list_var)
1598
+ size = len(seq)
1599
+ # Create a tuple to pass to empty below to specify the new array size.
1600
+ size_var = scope.redefine("size", loc)
1601
+ size_tuple_var = scope.redefine("size_tuple", loc)
1602
+ size_typ = types.intp
1603
+ size_tuple_typ = types.UniTuple(size_typ, 1)
1604
+ typemap[size_var.name] = size_typ
1605
+ typemap[size_tuple_var.name] = size_tuple_typ
1606
+ stmts.append(
1607
+ _new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)
1608
+ )
1609
+ stmts.append(
1610
+ _new_definition(
1611
+ func_ir,
1612
+ size_tuple_var,
1613
+ ir.Expr.build_tuple(items=[size_var], loc=loc),
1614
+ loc,
1615
+ )
1616
+ )
1617
+
1618
+ # The general approach is to create an empty array and then fill
1619
+ # the elements in one-by-one from their specification.
1620
+
1621
+ # Get the numpy type to pass to empty.
1622
+ nptype = types.DType(dtype)
1623
+
1624
+ # Create a variable to hold the numpy empty function.
1625
+ empty_func = scope.redefine("empty_func", loc)
1626
+ fnty = get_np_ufunc_typ(np.empty)
1627
+ context.resolve_function_type(fnty, (size_typ,), {"dtype": nptype})
1628
+
1629
+ typemap[empty_func.name] = fnty
1630
+
1631
+ stmts.append(
1632
+ _new_definition(
1633
+ func_ir, empty_func, ir.Global("empty", np.empty, loc=loc), loc
1634
+ )
1635
+ )
1636
+
1637
+ # We pass two arguments to empty, first the size tuple and second
1638
+ # the dtype of the new array. Here, we created typ_var which is
1639
+ # the dtype argument of the new array. typ_var in turn is created
1640
+ # by getattr of the dtype string on the numpy module.
1641
+
1642
+ # Create var for numpy module.
1643
+ g_np_var = scope.redefine("$np_g_var", loc)
1644
+ typemap[g_np_var.name] = types.misc.Module(np)
1645
+ g_np = ir.Global("np", np, loc)
1646
+ stmts.append(_new_definition(func_ir, g_np_var, g_np, loc))
1647
+
1648
+ # Create var for result of numpy.<dtype>.
1649
+ typ_var = scope.redefine("$np_typ_var", loc)
1650
+ typemap[typ_var.name] = nptype
1651
+ dtype_str = str(dtype)
1652
+ if dtype_str == "bool":
1653
+ dtype_str = "bool_"
1654
+ # Get dtype attribute of numpy module.
1655
+ np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc)
1656
+ stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc))
1657
+
1658
+ # Create the call to numpy.empty passing the size tuple and dtype var.
1659
+ empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc)
1660
+ calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype)
1661
+ stmts.append(_new_definition(func_ir, array_var, empty_call, loc))
1662
+
1663
+ # Fill in the new empty array one-by-one.
1664
+ for i in range(size):
1665
+ index_var = scope.redefine("index", loc)
1666
+ index_typ = types.intp
1667
+ typemap[index_var.name] = index_typ
1668
+ stmts.append(
1669
+ _new_definition(func_ir, index_var, ir.Const(i, loc), loc)
1670
+ )
1671
+ setitem = ir.SetItem(array_var, index_var, seq[i], loc)
1672
+ calltypes[setitem] = typing.signature(
1673
+ types.none, array_typ, index_typ, dtype
1674
+ )
1675
+ stmts.append(setitem)
1676
+
1677
+ stmts.extend(dels)
1678
+ return True
1679
+
1680
+ class State(object):
1681
+ """
1682
+ This class is used to hold the state in the following loop so as to make
1683
+ it easy to reset the state of the variables tracking the various
1684
+ statement kinds
1685
+ """
1686
+
1687
+ def __init__(self):
1688
+ # list_vars keep track of the variable created from the latest
1689
+ # build_list instruction, as well as its synonyms.
1690
+ self.list_vars = []
1691
+ # dead_vars keep track of those in list_vars that are considered
1692
+ # dead.
1693
+ self.dead_vars = []
1694
+ # list_items keep track of the elements used in build_list.
1695
+ self.list_items = []
1696
+ self.stmts = []
1697
+ # dels keep track of the deletion of list_items, which will need to
1698
+ # be moved after array initialization.
1699
+ self.dels = []
1700
+ # tracks if a modification has taken place
1701
+ self.modified = False
1702
+
1703
+ def reset(self):
1704
+ """
1705
+ Resets the internal state of the variables used for tracking
1706
+ """
1707
+ self.list_vars = []
1708
+ self.dead_vars = []
1709
+ self.list_items = []
1710
+ self.dels = []
1711
+
1712
+ def list_var_used(self, inst):
1713
+ """
1714
+ Returns True if the list being analysed is used between the
1715
+ build_list and the array call.
1716
+ """
1717
+ return any([x.name in self.list_vars for x in inst.list_vars()])
1718
+
1719
+ state = State()
1720
+
1721
+ for inst in block.body:
1722
+ if isinstance(inst, ir.Assign):
1723
+ if isinstance(inst.value, ir.Var):
1724
+ if inst.value.name in state.list_vars:
1725
+ state.list_vars.append(inst.target.name)
1726
+ state.stmts.append(inst)
1727
+ continue
1728
+ elif isinstance(inst.value, ir.Expr):
1729
+ expr = inst.value
1730
+ if expr.op == "build_list":
1731
+ # new build_list encountered, reset state
1732
+ state.reset()
1733
+ state.list_items = [x.name for x in expr.items]
1734
+ state.list_vars = [inst.target.name]
1735
+ state.stmts.append(inst)
1736
+ continue
1737
+ elif expr.op == "call" and expr in calltypes:
1738
+ if guard(
1739
+ inline_array,
1740
+ inst.target,
1741
+ expr,
1742
+ state.stmts,
1743
+ state.list_vars,
1744
+ state.dels,
1745
+ ):
1746
+ state.modified = True
1747
+ continue
1748
+ elif isinstance(inst, ir.Del):
1749
+ removed_var = inst.value
1750
+ if removed_var in state.list_items:
1751
+ state.dels.append(inst)
1752
+ continue
1753
+ elif removed_var in state.list_vars:
1754
+ # one of the list_vars is considered dead.
1755
+ state.dead_vars.append(removed_var)
1756
+ state.list_vars.remove(removed_var)
1757
+ state.stmts.append(inst)
1758
+ if state.list_vars == []:
1759
+ # if all list_vars are considered dead, we need to filter
1760
+ # them out from existing stmts to completely remove
1761
+ # build_list.
1762
+ # Note that if a translation didn't take place, dead_vars
1763
+ # will also be empty when we reach this point.
1764
+ body = []
1765
+ for inst in state.stmts:
1766
+ if (
1767
+ isinstance(inst, ir.Assign)
1768
+ and inst.target.name in state.dead_vars
1769
+ ) or (
1770
+ isinstance(inst, ir.Del)
1771
+ and inst.value in state.dead_vars
1772
+ ):
1773
+ continue
1774
+ body.append(inst)
1775
+ state.stmts = body
1776
+ state.dead_vars = []
1777
+ state.modified = True
1778
+ continue
1779
+ state.stmts.append(inst)
1780
+
1781
+ # If the list is used in any capacity between build_list and array
1782
+ # call, then we must call off the translation for this list because
1783
+ # it could be mutated and list_items would no longer be applicable.
1784
+ if state.list_var_used(inst):
1785
+ state.reset()
1786
+
1787
+ return state.stmts if state.modified else None