numba-cuda 0.18.1__py3-none-any.whl → 0.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (301) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +5 -2
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +4 -1
  9. numba_cuda/numba/cuda/api.py +5 -7
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +3 -0
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +3 -0
  15. numba_cuda/numba/cuda/codegen.py +3 -0
  16. numba_cuda/numba/cuda/compiler.py +10 -4
  17. numba_cuda/numba/cuda/core/caching.py +3 -0
  18. numba_cuda/numba/cuda/core/callconv.py +3 -0
  19. numba_cuda/numba/cuda/core/codegen.py +3 -0
  20. numba_cuda/numba/cuda/core/compiler.py +3 -0
  21. numba_cuda/numba/cuda/core/interpreter.py +3595 -0
  22. numba_cuda/numba/cuda/core/ir_utils.py +2644 -0
  23. numba_cuda/numba/cuda/core/sigutils.py +58 -0
  24. numba_cuda/numba/cuda/core/typed_passes.py +3 -0
  25. numba_cuda/numba/cuda/cuda_paths.py +12 -17
  26. numba_cuda/numba/cuda/cudadecl.py +4 -1
  27. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -0
  28. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  29. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  30. numba_cuda/numba/cuda/cudadrv/driver.py +7 -19
  31. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  32. numba_cuda/numba/cuda/cudadrv/dummyarray.py +3 -0
  33. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  34. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  35. numba_cuda/numba/cuda/cudadrv/libs.py +4 -2
  36. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  37. numba_cuda/numba/cuda/cudadrv/mappings.py +3 -0
  38. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  39. numba_cuda/numba/cuda/cudadrv/nvrtc.py +47 -44
  40. numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -18
  41. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  42. numba_cuda/numba/cuda/cudadrv/runtime.py +15 -1
  43. numba_cuda/numba/cuda/cudaimpl.py +3 -0
  44. numba_cuda/numba/cuda/cudamath.py +4 -1
  45. numba_cuda/numba/cuda/debuginfo.py +3 -0
  46. numba_cuda/numba/cuda/decorators.py +7 -3
  47. numba_cuda/numba/cuda/descriptor.py +3 -0
  48. numba_cuda/numba/cuda/device_init.py +3 -0
  49. numba_cuda/numba/cuda/deviceufunc.py +5 -1
  50. numba_cuda/numba/cuda/dispatcher.py +6 -2
  51. numba_cuda/numba/cuda/errors.py +10 -0
  52. numba_cuda/numba/cuda/extending.py +4 -1
  53. numba_cuda/numba/cuda/flags.py +2 -0
  54. numba_cuda/numba/cuda/fp16.py +3 -0
  55. numba_cuda/numba/cuda/initialize.py +4 -0
  56. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  57. numba_cuda/numba/cuda/intrinsics.py +3 -0
  58. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  59. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  60. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  61. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  62. numba_cuda/numba/cuda/libdevice.py +4 -0
  63. numba_cuda/numba/cuda/libdevicedecl.py +4 -1
  64. numba_cuda/numba/cuda/libdevicefuncs.py +4 -1
  65. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  66. numba_cuda/numba/cuda/locks.py +3 -0
  67. numba_cuda/numba/cuda/lowering.py +53 -16
  68. numba_cuda/numba/cuda/mathimpl.py +3 -0
  69. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  70. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  71. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  72. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  73. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  74. numba_cuda/numba/cuda/memory_management/nrt.py +5 -1
  75. numba_cuda/numba/cuda/models.py +3 -0
  76. numba_cuda/numba/cuda/nvvmutils.py +3 -0
  77. numba_cuda/numba/cuda/printimpl.py +3 -0
  78. numba_cuda/numba/cuda/random.py +3 -0
  79. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  80. numba_cuda/numba/cuda/serialize.py +3 -0
  81. numba_cuda/numba/cuda/simulator/__init__.py +3 -0
  82. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  83. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  84. numba_cuda/numba/cuda/simulator/api.py +4 -1
  85. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  86. numba_cuda/numba/cuda/simulator/compiler.py +3 -0
  87. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  88. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +3 -0
  89. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  90. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -7
  91. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  93. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  94. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  95. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  96. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  97. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  98. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  99. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  100. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  101. numba_cuda/numba/cuda/simulator/kernelapi.py +3 -0
  102. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  103. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +3 -0
  104. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  105. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  106. numba_cuda/numba/cuda/simulator_init.py +3 -0
  107. numba_cuda/numba/cuda/stubs.py +3 -0
  108. numba_cuda/numba/cuda/target.py +4 -2
  109. numba_cuda/numba/cuda/testing.py +7 -6
  110. numba_cuda/numba/cuda/tests/__init__.py +3 -0
  111. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  112. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  113. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  114. numba_cuda/numba/cuda/tests/core/test_serialize.py +3 -0
  115. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  116. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  117. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  118. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  119. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  120. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +3 -0
  121. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  122. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -0
  123. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  124. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +4 -1
  125. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  126. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +4 -1
  127. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  128. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  129. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  130. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  131. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  132. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  133. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +4 -1
  134. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +4 -1
  135. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +3 -0
  136. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +4 -1
  137. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +3 -0
  138. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +7 -6
  139. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -4
  140. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  141. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  142. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  143. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  144. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  145. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  146. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  147. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  148. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  149. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  150. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  151. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +3 -0
  152. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_array.py +3 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +3 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +4 -3
  160. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +4 -3
  161. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +149 -3
  164. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +4 -1
  166. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -4
  167. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +3 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +4 -1
  173. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +3 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +23 -284
  177. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +4 -1
  179. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  182. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -6
  184. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +4 -1
  197. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +298 -0
  198. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  199. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  202. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +4 -1
  203. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_math.py +3 -0
  206. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +3 -0
  207. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  208. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  211. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  214. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  215. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_print.py +3 -0
  218. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  219. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +3 -0
  223. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  224. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +3 -0
  225. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  226. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  228. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +3 -0
  230. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  231. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +4 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +8 -1
  240. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  242. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  244. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  245. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  246. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  247. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  248. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  249. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  250. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  251. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  252. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  253. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  254. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  255. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  256. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -0
  257. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +4 -1
  258. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -1
  259. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +4 -1
  260. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +4 -1
  261. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +4 -1
  262. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +4 -1
  263. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  264. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +4 -1
  265. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +4 -1
  266. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +4 -1
  267. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +4 -1
  268. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  269. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  270. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  271. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  272. numba_cuda/numba/cuda/tests/nocuda/test_import.py +4 -1
  273. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +3 -0
  274. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  275. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  276. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -2
  277. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
  278. numba_cuda/numba/cuda/tests/support.py +755 -0
  279. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +6 -3
  280. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +6 -2
  281. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  282. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  283. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  284. numba_cuda/numba/cuda/types.py +3 -0
  285. numba_cuda/numba/cuda/typing/__init__.py +11 -0
  286. numba_cuda/numba/cuda/typing/templates.py +1448 -0
  287. numba_cuda/numba/cuda/ufuncs.py +3 -0
  288. numba_cuda/numba/cuda/utils.py +3 -0
  289. numba_cuda/numba/cuda/vector_types.py +6 -3
  290. numba_cuda/numba/cuda/vectorizers.py +3 -0
  291. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/METADATA +25 -29
  292. numba_cuda-0.19.1.dist-info/RECORD +302 -0
  293. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/licenses/LICENSE +1 -0
  294. numba_cuda-0.19.1.dist-info/licenses/LICENSE.numba +24 -0
  295. numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
  296. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
  297. numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
  298. numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
  299. numba_cuda-0.18.1.dist-info/RECORD +0 -296
  300. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/WHEEL +0 -0
  301. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2644 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2017 Intel Corporation
2
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ # SPDX-License-Identifier: BSD-2-Clause
4
+
5
+ import numpy
6
+ import math
7
+
8
+ import types as pytypes
9
+ import collections
10
+ import warnings
11
+
12
+ import numba
13
+ from numba.core.extending import _Intrinsic
14
+ from numba.core import types, typing, ir, analysis, postproc, rewrites, config
15
+ from numba.core.typing.templates import signature
16
+ from numba.core.analysis import (
17
+ compute_live_map,
18
+ compute_use_defs,
19
+ compute_cfg_from_blocks,
20
+ )
21
+ from numba.core.errors import (
22
+ TypingError,
23
+ UnsupportedError,
24
+ NumbaPendingDeprecationWarning,
25
+ CompilerError,
26
+ )
27
+
28
+ import copy
29
+
30
+ _unique_var_count = 0
31
+
32
+
33
+ def mk_unique_var(prefix):
34
+ global _unique_var_count
35
+ var = prefix + "." + str(_unique_var_count)
36
+ _unique_var_count = _unique_var_count + 1
37
+ return var
38
+
39
+
40
+ class _MaxLabel:
41
+ def __init__(self, value=0):
42
+ self._value = value
43
+
44
+ def next(self):
45
+ self._value += 1
46
+ return self._value
47
+
48
+ def update(self, newval):
49
+ self._value = max(newval, self._value)
50
+
51
+
52
+ _the_max_label = _MaxLabel()
53
+ del _MaxLabel
54
+
55
+
56
+ def get_unused_var_name(prefix, var_table):
57
+ """Get a new var name with a given prefix and
58
+ make sure it is unused in the given variable table.
59
+ """
60
+ cur = 0
61
+ while True:
62
+ var = prefix + str(cur)
63
+ if var not in var_table:
64
+ return var
65
+ cur += 1
66
+
67
+
68
+ def next_label():
69
+ return _the_max_label.next()
70
+
71
+
72
+ def mk_alloc(
73
+ typingctx, typemap, calltypes, lhs, size_var, dtype, scope, loc, lhs_typ
74
+ ):
75
+ """generate an array allocation with np.empty() and return list of nodes.
76
+ size_var can be an int variable or tuple of int variables.
77
+ lhs_typ is the type of the array being allocated.
78
+ """
79
+ out = []
80
+ ndims = 1
81
+ size_typ = types.intp
82
+ if isinstance(size_var, tuple):
83
+ if len(size_var) == 1:
84
+ size_var = size_var[0]
85
+ size_var = convert_size_to_var(size_var, typemap, scope, loc, out)
86
+ else:
87
+ # tuple_var = build_tuple([size_var...])
88
+ ndims = len(size_var)
89
+ tuple_var = ir.Var(scope, mk_unique_var("$tuple_var"), loc)
90
+ if typemap:
91
+ typemap[tuple_var.name] = types.containers.UniTuple(
92
+ types.intp, ndims
93
+ )
94
+ # constant sizes need to be assigned to vars
95
+ new_sizes = [
96
+ convert_size_to_var(s, typemap, scope, loc, out)
97
+ for s in size_var
98
+ ]
99
+ tuple_call = ir.Expr.build_tuple(new_sizes, loc)
100
+ tuple_assign = ir.Assign(tuple_call, tuple_var, loc)
101
+ out.append(tuple_assign)
102
+ size_var = tuple_var
103
+ size_typ = types.containers.UniTuple(types.intp, ndims)
104
+ if hasattr(lhs_typ, "__allocate__"):
105
+ return lhs_typ.__allocate__(
106
+ typingctx,
107
+ typemap,
108
+ calltypes,
109
+ lhs,
110
+ size_var,
111
+ dtype,
112
+ scope,
113
+ loc,
114
+ lhs_typ,
115
+ size_typ,
116
+ out,
117
+ )
118
+ # g_np_var = Global(numpy)
119
+ g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
120
+ if typemap:
121
+ typemap[g_np_var.name] = types.misc.Module(numpy)
122
+ g_np = ir.Global("np", numpy, loc)
123
+ g_np_assign = ir.Assign(g_np, g_np_var, loc)
124
+ # attr call: empty_attr = getattr(g_np_var, empty)
125
+ empty_attr_call = ir.Expr.getattr(g_np_var, "empty", loc)
126
+ attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc)
127
+ if typemap:
128
+ typemap[attr_var.name] = get_np_ufunc_typ(numpy.empty)
129
+ attr_assign = ir.Assign(empty_attr_call, attr_var, loc)
130
+ # Assume str(dtype) returns a valid type
131
+ dtype_str = str(dtype)
132
+ # alloc call: lhs = empty_attr(size_var, typ_var)
133
+ typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc)
134
+ if typemap:
135
+ typemap[typ_var.name] = types.functions.NumberClass(dtype)
136
+ # If dtype is a datetime/timedelta with a unit,
137
+ # then it won't return a valid type and instead can be created
138
+ # with a string. i.e. "datetime64[ns]")
139
+ if (
140
+ isinstance(dtype, (types.NPDatetime, types.NPTimedelta))
141
+ and dtype.unit != ""
142
+ ):
143
+ typename_const = ir.Const(dtype_str, loc)
144
+ typ_var_assign = ir.Assign(typename_const, typ_var, loc)
145
+ else:
146
+ if dtype_str == "bool":
147
+ # empty doesn't like 'bool' sometimes (e.g. kmeans example)
148
+ dtype_str = "bool_"
149
+ np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc)
150
+ typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc)
151
+ alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc)
152
+
153
+ if calltypes:
154
+ cac = typemap[attr_var.name].get_call_type(
155
+ typingctx, [size_typ, types.functions.NumberClass(dtype)], {}
156
+ )
157
+ # By default, all calls to "empty" are typed as returning a standard
158
+ # NumPy ndarray. If we are allocating a ndarray subclass here then
159
+ # just change the return type to be that of the subclass.
160
+ cac._return_type = (
161
+ lhs_typ.copy(layout="C") if lhs_typ.layout == "F" else lhs_typ
162
+ )
163
+ calltypes[alloc_call] = cac
164
+ if lhs_typ.layout == "F":
165
+ empty_c_typ = lhs_typ.copy(layout="C")
166
+ empty_c_var = ir.Var(scope, mk_unique_var("$empty_c_var"), loc)
167
+ if typemap:
168
+ typemap[empty_c_var.name] = lhs_typ.copy(layout="C")
169
+ empty_c_assign = ir.Assign(alloc_call, empty_c_var, loc)
170
+
171
+ # attr call: asfortranarray = getattr(g_np_var, asfortranarray)
172
+ asfortranarray_attr_call = ir.Expr.getattr(
173
+ g_np_var, "asfortranarray", loc
174
+ )
175
+ afa_attr_var = ir.Var(
176
+ scope, mk_unique_var("$asfortran_array_attr"), loc
177
+ )
178
+ if typemap:
179
+ typemap[afa_attr_var.name] = get_np_ufunc_typ(numpy.asfortranarray)
180
+ afa_attr_assign = ir.Assign(asfortranarray_attr_call, afa_attr_var, loc)
181
+ # call asfortranarray
182
+ asfortranarray_call = ir.Expr.call(afa_attr_var, [empty_c_var], (), loc)
183
+ if calltypes:
184
+ calltypes[asfortranarray_call] = typemap[
185
+ afa_attr_var.name
186
+ ].get_call_type(typingctx, [empty_c_typ], {})
187
+
188
+ asfortranarray_assign = ir.Assign(asfortranarray_call, lhs, loc)
189
+
190
+ out.extend(
191
+ [
192
+ g_np_assign,
193
+ attr_assign,
194
+ typ_var_assign,
195
+ empty_c_assign,
196
+ afa_attr_assign,
197
+ asfortranarray_assign,
198
+ ]
199
+ )
200
+ else:
201
+ alloc_assign = ir.Assign(alloc_call, lhs, loc)
202
+ out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign])
203
+
204
+ return out
205
+
206
+
207
+ def convert_size_to_var(size_var, typemap, scope, loc, nodes):
208
+ if isinstance(size_var, int):
209
+ new_size = ir.Var(scope, mk_unique_var("$alloc_size"), loc)
210
+ if typemap:
211
+ typemap[new_size.name] = types.intp
212
+ size_assign = ir.Assign(ir.Const(size_var, loc), new_size, loc)
213
+ nodes.append(size_assign)
214
+ return new_size
215
+ assert isinstance(size_var, ir.Var)
216
+ return size_var
217
+
218
+
219
+ def get_np_ufunc_typ(func):
220
+ """get type of the incoming function from builtin registry"""
221
+ for k, v in typing.npydecl.registry.globals:
222
+ if k == func:
223
+ return v
224
+ for k, v in typing.templates.builtin_registry.globals:
225
+ if k == func:
226
+ return v
227
+ raise RuntimeError("type for func ", func, " not found")
228
+
229
+
230
+ def mk_range_block(typemap, start, stop, step, calltypes, scope, loc):
231
+ """make a block that initializes loop range and iteration variables.
232
+ target label in jump needs to be set.
233
+ """
234
+ # g_range_var = Global(range)
235
+ g_range_var = ir.Var(scope, mk_unique_var("$range_g_var"), loc)
236
+ typemap[g_range_var.name] = get_global_func_typ(range)
237
+ g_range = ir.Global("range", range, loc)
238
+ g_range_assign = ir.Assign(g_range, g_range_var, loc)
239
+ arg_nodes, args = _mk_range_args(typemap, start, stop, step, scope, loc)
240
+ # range_call_var = call g_range_var(start, stop, step)
241
+ range_call = ir.Expr.call(g_range_var, args, (), loc)
242
+ calltypes[range_call] = typemap[g_range_var.name].get_call_type(
243
+ typing.Context(), [types.intp] * len(args), {}
244
+ )
245
+ # signature(types.range_state64_type, types.intp)
246
+ range_call_var = ir.Var(scope, mk_unique_var("$range_c_var"), loc)
247
+ typemap[range_call_var.name] = types.iterators.RangeType(types.intp)
248
+ range_call_assign = ir.Assign(range_call, range_call_var, loc)
249
+ # iter_var = getiter(range_call_var)
250
+ iter_call = ir.Expr.getiter(range_call_var, loc)
251
+ if config.USE_LEGACY_TYPE_SYSTEM:
252
+ calltype_sig = signature(
253
+ types.range_iter64_type, types.range_state64_type
254
+ )
255
+ else:
256
+ calltype_sig = signature(types.range_iter_type, types.range_state_type)
257
+ calltypes[iter_call] = calltype_sig
258
+ iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc)
259
+ typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp)
260
+ iter_call_assign = ir.Assign(iter_call, iter_var, loc)
261
+ # $phi = iter_var
262
+ phi_var = ir.Var(scope, mk_unique_var("$phi"), loc)
263
+ typemap[phi_var.name] = types.iterators.RangeIteratorType(types.intp)
264
+ phi_assign = ir.Assign(iter_var, phi_var, loc)
265
+ # jump to header
266
+ jump_header = ir.Jump(-1, loc)
267
+ range_block = ir.Block(scope, loc)
268
+ range_block.body = arg_nodes + [
269
+ g_range_assign,
270
+ range_call_assign,
271
+ iter_call_assign,
272
+ phi_assign,
273
+ jump_header,
274
+ ]
275
+ return range_block
276
+
277
+
278
+ def _mk_range_args(typemap, start, stop, step, scope, loc):
279
+ nodes = []
280
+ if isinstance(stop, ir.Var):
281
+ g_stop_var = stop
282
+ else:
283
+ assert isinstance(stop, int)
284
+ g_stop_var = ir.Var(scope, mk_unique_var("$range_stop"), loc)
285
+ if typemap:
286
+ typemap[g_stop_var.name] = types.intp
287
+ stop_assign = ir.Assign(ir.Const(stop, loc), g_stop_var, loc)
288
+ nodes.append(stop_assign)
289
+ if start == 0 and step == 1:
290
+ return nodes, [g_stop_var]
291
+
292
+ if isinstance(start, ir.Var):
293
+ g_start_var = start
294
+ else:
295
+ assert isinstance(start, int)
296
+ g_start_var = ir.Var(scope, mk_unique_var("$range_start"), loc)
297
+ if typemap:
298
+ typemap[g_start_var.name] = types.intp
299
+ start_assign = ir.Assign(ir.Const(start, loc), g_start_var, loc)
300
+ nodes.append(start_assign)
301
+ if step == 1:
302
+ return nodes, [g_start_var, g_stop_var]
303
+
304
+ if isinstance(step, ir.Var):
305
+ g_step_var = step
306
+ else:
307
+ assert isinstance(step, int)
308
+ g_step_var = ir.Var(scope, mk_unique_var("$range_step"), loc)
309
+ if typemap:
310
+ typemap[g_step_var.name] = types.intp
311
+ step_assign = ir.Assign(ir.Const(step, loc), g_step_var, loc)
312
+ nodes.append(step_assign)
313
+
314
+ return nodes, [g_start_var, g_stop_var, g_step_var]
315
+
316
+
317
+ def get_global_func_typ(func):
318
+ """get type variable for func() from builtin registry"""
319
+ for k, v in typing.templates.builtin_registry.globals:
320
+ if k == func:
321
+ return v
322
+ raise RuntimeError("func type not found {}".format(func))
323
+
324
+
325
+ def mk_loop_header(typemap, phi_var, calltypes, scope, loc):
326
+ """make a block that is a loop header updating iteration variables.
327
+ target labels in branch need to be set.
328
+ """
329
+ # iternext_var = iternext(phi_var)
330
+ iternext_var = ir.Var(scope, mk_unique_var("$iternext_var"), loc)
331
+ typemap[iternext_var.name] = types.containers.Pair(
332
+ types.intp, types.boolean
333
+ )
334
+ iternext_call = ir.Expr.iternext(phi_var, loc)
335
+ if config.USE_LEGACY_TYPE_SYSTEM:
336
+ range_iter_type = types.range_iter64_type
337
+ else:
338
+ range_iter_type = types.range_iter_type
339
+ calltypes[iternext_call] = signature(
340
+ types.containers.Pair(types.intp, types.boolean), range_iter_type
341
+ )
342
+ iternext_assign = ir.Assign(iternext_call, iternext_var, loc)
343
+ # pair_first_var = pair_first(iternext_var)
344
+ pair_first_var = ir.Var(scope, mk_unique_var("$pair_first_var"), loc)
345
+ typemap[pair_first_var.name] = types.intp
346
+ pair_first_call = ir.Expr.pair_first(iternext_var, loc)
347
+ pair_first_assign = ir.Assign(pair_first_call, pair_first_var, loc)
348
+ # pair_second_var = pair_second(iternext_var)
349
+ pair_second_var = ir.Var(scope, mk_unique_var("$pair_second_var"), loc)
350
+ typemap[pair_second_var.name] = types.boolean
351
+ pair_second_call = ir.Expr.pair_second(iternext_var, loc)
352
+ pair_second_assign = ir.Assign(pair_second_call, pair_second_var, loc)
353
+ # phi_b_var = pair_first_var
354
+ phi_b_var = ir.Var(scope, mk_unique_var("$phi"), loc)
355
+ typemap[phi_b_var.name] = types.intp
356
+ phi_b_assign = ir.Assign(pair_first_var, phi_b_var, loc)
357
+ # branch pair_second_var body_block out_block
358
+ branch = ir.Branch(pair_second_var, -1, -1, loc)
359
+ header_block = ir.Block(scope, loc)
360
+ header_block.body = [
361
+ iternext_assign,
362
+ pair_first_assign,
363
+ pair_second_assign,
364
+ phi_b_assign,
365
+ branch,
366
+ ]
367
+ return header_block
368
+
369
+
370
+ def legalize_names(varnames):
371
+ """returns a dictionary for conversion of variable names to legal
372
+ parameter names.
373
+ """
374
+ var_map = {}
375
+ for var in varnames:
376
+ new_name = var.replace("_", "__").replace("$", "_").replace(".", "_")
377
+ assert new_name not in var_map
378
+ var_map[var] = new_name
379
+ return var_map
380
+
381
+
382
+ def get_name_var_table(blocks):
383
+ """create a mapping from variable names to their ir.Var objects"""
384
+
385
+ def get_name_var_visit(var, namevar):
386
+ namevar[var.name] = var
387
+ return var
388
+
389
+ namevar = {}
390
+ visit_vars(blocks, get_name_var_visit, namevar)
391
+ return namevar
392
+
393
+
394
+ def replace_var_names(blocks, namedict):
395
+ """replace variables (ir.Var to ir.Var) from dictionary (name -> name)"""
396
+ # remove identity values to avoid infinite loop
397
+ new_namedict = {}
398
+ for l, r in namedict.items():
399
+ if l != r:
400
+ new_namedict[l] = r
401
+
402
+ def replace_name(var, namedict):
403
+ assert isinstance(var, ir.Var)
404
+ while var.name in namedict:
405
+ var = ir.Var(var.scope, namedict[var.name], var.loc)
406
+ return var
407
+
408
+ visit_vars(blocks, replace_name, new_namedict)
409
+
410
+
411
+ def replace_var_callback(var, vardict):
412
+ assert isinstance(var, ir.Var)
413
+ while var.name in vardict.keys():
414
+ assert vardict[var.name].name != var.name
415
+ new_var = vardict[var.name]
416
+ var = ir.Var(new_var.scope, new_var.name, new_var.loc)
417
+ return var
418
+
419
+
420
+ def replace_vars(blocks, vardict):
421
+ """replace variables (ir.Var to ir.Var) from dictionary (name -> ir.Var)"""
422
+ # remove identity values to avoid infinite loop
423
+ new_vardict = {}
424
+ for l, r in vardict.items():
425
+ if l != r.name:
426
+ new_vardict[l] = r
427
+ visit_vars(blocks, replace_var_callback, new_vardict)
428
+
429
+
430
+ def replace_vars_stmt(stmt, vardict):
431
+ visit_vars_stmt(stmt, replace_var_callback, vardict)
432
+
433
+
434
+ def replace_vars_inner(node, vardict):
435
+ return visit_vars_inner(node, replace_var_callback, vardict)
436
+
437
+
438
+ # other packages that define new nodes add calls to visit variables in them
439
+ # format: {type:function}
440
+ visit_vars_extensions = {}
441
+
442
+
443
+ def visit_vars(blocks, callback, cbdata):
444
+ """go over statements of block bodies and replace variable names with
445
+ dictionary.
446
+ """
447
+ for block in blocks.values():
448
+ for stmt in block.body:
449
+ visit_vars_stmt(stmt, callback, cbdata)
450
+ return
451
+
452
+
453
+ def visit_vars_stmt(stmt, callback, cbdata):
454
+ # let external calls handle stmt if type matches
455
+ for t, f in visit_vars_extensions.items():
456
+ if isinstance(stmt, t):
457
+ f(stmt, callback, cbdata)
458
+ return
459
+ if isinstance(stmt, ir.Assign):
460
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
461
+ stmt.value = visit_vars_inner(stmt.value, callback, cbdata)
462
+ elif isinstance(stmt, ir.Arg):
463
+ stmt.name = visit_vars_inner(stmt.name, callback, cbdata)
464
+ elif isinstance(stmt, ir.Return):
465
+ stmt.value = visit_vars_inner(stmt.value, callback, cbdata)
466
+ elif isinstance(stmt, ir.Raise):
467
+ stmt.exception = visit_vars_inner(stmt.exception, callback, cbdata)
468
+ elif isinstance(stmt, ir.Branch):
469
+ stmt.cond = visit_vars_inner(stmt.cond, callback, cbdata)
470
+ elif isinstance(stmt, ir.Jump):
471
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
472
+ elif isinstance(stmt, ir.Del):
473
+ # Because Del takes only a var name, we make up by
474
+ # constructing a temporary variable.
475
+ var = ir.Var(None, stmt.value, stmt.loc)
476
+ var = visit_vars_inner(var, callback, cbdata)
477
+ stmt.value = var.name
478
+ elif isinstance(stmt, ir.DelAttr):
479
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
480
+ stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata)
481
+ elif isinstance(stmt, ir.SetAttr):
482
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
483
+ stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata)
484
+ stmt.value = visit_vars_inner(stmt.value, callback, cbdata)
485
+ elif isinstance(stmt, ir.DelItem):
486
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
487
+ stmt.index = visit_vars_inner(stmt.index, callback, cbdata)
488
+ elif isinstance(stmt, ir.StaticSetItem):
489
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
490
+ stmt.index_var = visit_vars_inner(stmt.index_var, callback, cbdata)
491
+ stmt.value = visit_vars_inner(stmt.value, callback, cbdata)
492
+ elif isinstance(stmt, ir.SetItem):
493
+ stmt.target = visit_vars_inner(stmt.target, callback, cbdata)
494
+ stmt.index = visit_vars_inner(stmt.index, callback, cbdata)
495
+ stmt.value = visit_vars_inner(stmt.value, callback, cbdata)
496
+ elif isinstance(stmt, ir.Print):
497
+ stmt.args = [visit_vars_inner(x, callback, cbdata) for x in stmt.args]
498
+ else:
499
+ # TODO: raise NotImplementedError("no replacement for IR node: ", stmt)
500
+ pass
501
+ return
502
+
503
+
504
+ def visit_vars_inner(node, callback, cbdata):
505
+ if isinstance(node, ir.Var):
506
+ return callback(node, cbdata)
507
+ elif isinstance(node, list):
508
+ return [visit_vars_inner(n, callback, cbdata) for n in node]
509
+ elif isinstance(node, tuple):
510
+ return tuple([visit_vars_inner(n, callback, cbdata) for n in node])
511
+ elif isinstance(node, ir.Expr):
512
+ # if node.op in ['binop', 'inplace_binop']:
513
+ # lhs = node.lhs.name
514
+ # rhs = node.rhs.name
515
+ # node.lhs.name = callback, cbdata.get(lhs, lhs)
516
+ # node.rhs.name = callback, cbdata.get(rhs, rhs)
517
+ for arg in node._kws.keys():
518
+ node._kws[arg] = visit_vars_inner(node._kws[arg], callback, cbdata)
519
+ elif isinstance(node, ir.Yield):
520
+ node.value = visit_vars_inner(node.value, callback, cbdata)
521
+ return node
522
+
523
+
524
+ add_offset_to_labels_extensions = {}
525
+
526
+
527
+ def add_offset_to_labels(blocks, offset):
528
+ """add an offset to all block labels and jump/branch targets"""
529
+ new_blocks = {}
530
+ for l, b in blocks.items():
531
+ # some parfor last blocks might be empty
532
+ term = None
533
+ if b.body:
534
+ term = b.body[-1]
535
+ for inst in b.body:
536
+ for T, f in add_offset_to_labels_extensions.items():
537
+ if isinstance(inst, T):
538
+ f(inst, offset)
539
+ if isinstance(term, ir.Jump):
540
+ b.body[-1] = ir.Jump(term.target + offset, term.loc)
541
+ if isinstance(term, ir.Branch):
542
+ b.body[-1] = ir.Branch(
543
+ term.cond, term.truebr + offset, term.falsebr + offset, term.loc
544
+ )
545
+ new_blocks[l + offset] = b
546
+ return new_blocks
547
+
548
+
549
+ find_max_label_extensions = {}
550
+
551
+
552
+ def find_max_label(blocks):
553
+ max_label = 0
554
+ for l, b in blocks.items():
555
+ if b.body:
556
+ for inst in b.body:
557
+ for T, f in find_max_label_extensions.items():
558
+ if isinstance(inst, T):
559
+ f_max = f(inst)
560
+ if f_max > max_label:
561
+ max_label = f_max
562
+ if l > max_label:
563
+ max_label = l
564
+ return max_label
565
+
566
+
567
+ def flatten_labels(blocks):
568
+ """makes the labels in range(0, len(blocks)), useful to compare CFGs"""
569
+ # first bulk move the labels out of the rewrite range
570
+ blocks = add_offset_to_labels(blocks, find_max_label(blocks) + 1)
571
+ # order them in topo order because it's easier to read
572
+ new_blocks = {}
573
+ topo_order = find_topo_order(blocks)
574
+ l_map = dict()
575
+ idx = 0
576
+ for x in topo_order:
577
+ l_map[x] = idx
578
+ idx += 1
579
+
580
+ for t_node in topo_order:
581
+ b = blocks[t_node]
582
+ # some parfor last blocks might be empty
583
+ term = None
584
+ if b.body:
585
+ term = b.body[-1]
586
+ if isinstance(term, ir.Jump):
587
+ b.body[-1] = ir.Jump(l_map[term.target], term.loc)
588
+ if isinstance(term, ir.Branch):
589
+ b.body[-1] = ir.Branch(
590
+ term.cond, l_map[term.truebr], l_map[term.falsebr], term.loc
591
+ )
592
+ new_blocks[l_map[t_node]] = b
593
+ return new_blocks
594
+
595
+
596
+ def remove_dels(blocks):
597
+ """remove ir.Del nodes"""
598
+ for block in blocks.values():
599
+ new_body = []
600
+ for stmt in block.body:
601
+ if not isinstance(stmt, ir.Del):
602
+ new_body.append(stmt)
603
+ block.body = new_body
604
+ return
605
+
606
+
607
+ def remove_args(blocks):
608
+ """remove ir.Arg nodes"""
609
+ for block in blocks.values():
610
+ new_body = []
611
+ for stmt in block.body:
612
+ if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg):
613
+ continue
614
+ new_body.append(stmt)
615
+ block.body = new_body
616
+ return
617
+
618
+
619
+ def dead_code_elimination(
620
+ func_ir, typemap=None, alias_map=None, arg_aliases=None
621
+ ):
622
+ """Performs dead code elimination and leaves the IR in a valid state on
623
+ exit
624
+ """
625
+ do_post_proc = False
626
+ while remove_dead(
627
+ func_ir.blocks,
628
+ func_ir.arg_names,
629
+ func_ir,
630
+ typemap,
631
+ alias_map,
632
+ arg_aliases,
633
+ ):
634
+ do_post_proc = True
635
+
636
+ if do_post_proc:
637
+ post_proc = postproc.PostProcessor(func_ir)
638
+ post_proc.run()
639
+
640
+
641
+ def remove_dead(
642
+ blocks, args, func_ir, typemap=None, alias_map=None, arg_aliases=None
643
+ ):
644
+ """dead code elimination using liveness and CFG info.
645
+ Returns True if something has been removed, or False if nothing is removed.
646
+ """
647
+ cfg = compute_cfg_from_blocks(blocks)
648
+ usedefs = compute_use_defs(blocks)
649
+ live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap)
650
+ call_table, _ = get_call_table(blocks)
651
+ if alias_map is None or arg_aliases is None:
652
+ alias_map, arg_aliases = find_potential_aliases(
653
+ blocks, args, typemap, func_ir
654
+ )
655
+ if config.DEBUG_ARRAY_OPT >= 1:
656
+ print("args:", args)
657
+ print("alias map:", alias_map)
658
+ print("arg_aliases:", arg_aliases)
659
+ print("live_map:", live_map)
660
+ print("usemap:", usedefs.usemap)
661
+ print("defmap:", usedefs.defmap)
662
+ # keep set for easier search
663
+ alias_set = set(alias_map.keys())
664
+
665
+ removed = False
666
+ for label, block in blocks.items():
667
+ # find live variables at each statement to delete dead assignment
668
+ lives = {v.name for v in block.terminator.list_vars()}
669
+ if config.DEBUG_ARRAY_OPT >= 2:
670
+ print("remove_dead processing block", label, lives)
671
+ # find live variables at the end of block
672
+ for out_blk, _data in cfg.successors(label):
673
+ if config.DEBUG_ARRAY_OPT >= 2:
674
+ print("succ live_map", out_blk, live_map[out_blk])
675
+ lives |= live_map[out_blk]
676
+ removed |= remove_dead_block(
677
+ block,
678
+ lives,
679
+ call_table,
680
+ arg_aliases,
681
+ alias_map,
682
+ alias_set,
683
+ func_ir,
684
+ typemap,
685
+ )
686
+
687
+ return removed
688
+
689
+
690
+ # other packages that define new nodes add calls to remove dead code in them
691
+ # format: {type:function}
692
+ remove_dead_extensions = {}
693
+
694
+
695
+ def remove_dead_block(
696
+ block,
697
+ lives,
698
+ call_table,
699
+ arg_aliases,
700
+ alias_map,
701
+ alias_set,
702
+ func_ir,
703
+ typemap,
704
+ ):
705
+ """remove dead code using liveness info.
706
+ Mutable arguments (e.g. arrays) that are not definitely assigned are live
707
+ after return of function.
708
+ """
709
+ # TODO: find mutable args that are not definitely assigned instead of
710
+ # assuming all args are live after return
711
+ removed = False
712
+
713
+ # add statements in reverse order
714
+ new_body = [block.terminator]
715
+ # for each statement in reverse order, excluding terminator
716
+ for stmt in reversed(block.body[:-1]):
717
+ if config.DEBUG_ARRAY_OPT >= 2:
718
+ print("remove_dead_block", stmt)
719
+ # aliases of lives are also live
720
+ alias_lives = set()
721
+ init_alias_lives = lives & alias_set
722
+ for v in init_alias_lives:
723
+ alias_lives |= alias_map[v]
724
+ lives_n_aliases = lives | alias_lives | arg_aliases
725
+
726
+ # let external calls handle stmt if type matches
727
+ if type(stmt) in remove_dead_extensions:
728
+ f = remove_dead_extensions[type(stmt)]
729
+ stmt = f(
730
+ stmt,
731
+ lives,
732
+ lives_n_aliases,
733
+ arg_aliases,
734
+ alias_map,
735
+ func_ir,
736
+ typemap,
737
+ )
738
+ if stmt is None:
739
+ if config.DEBUG_ARRAY_OPT >= 2:
740
+ print("Statement was removed.")
741
+ removed = True
742
+ continue
743
+
744
+ # ignore assignments that their lhs is not live or lhs==rhs
745
+ if isinstance(stmt, ir.Assign):
746
+ lhs = stmt.target
747
+ rhs = stmt.value
748
+ if lhs.name not in lives and has_no_side_effect(
749
+ rhs, lives_n_aliases, call_table
750
+ ):
751
+ if config.DEBUG_ARRAY_OPT >= 2:
752
+ print("Statement was removed.")
753
+ removed = True
754
+ continue
755
+ if isinstance(rhs, ir.Var) and lhs.name == rhs.name:
756
+ if config.DEBUG_ARRAY_OPT >= 2:
757
+ print("Statement was removed.")
758
+ removed = True
759
+ continue
760
+ # TODO: remove other nodes like SetItem etc.
761
+
762
+ if isinstance(stmt, ir.Del):
763
+ if stmt.value not in lives:
764
+ if config.DEBUG_ARRAY_OPT >= 2:
765
+ print("Statement was removed.")
766
+ removed = True
767
+ continue
768
+
769
+ if isinstance(stmt, ir.SetItem):
770
+ name = stmt.target.name
771
+ if name not in lives_n_aliases:
772
+ if config.DEBUG_ARRAY_OPT >= 2:
773
+ print("Statement was removed.")
774
+ continue
775
+
776
+ if type(stmt) in analysis.ir_extension_usedefs:
777
+ def_func = analysis.ir_extension_usedefs[type(stmt)]
778
+ uses, defs = def_func(stmt)
779
+ lives -= defs
780
+ lives |= uses
781
+ else:
782
+ lives |= {v.name for v in stmt.list_vars()}
783
+ if isinstance(stmt, ir.Assign):
784
+ # make sure lhs is not used in rhs, e.g. a = g(a)
785
+ if isinstance(stmt.value, ir.Expr):
786
+ rhs_vars = {v.name for v in stmt.value.list_vars()}
787
+ if lhs.name not in rhs_vars:
788
+ lives.remove(lhs.name)
789
+ else:
790
+ lives.remove(lhs.name)
791
+
792
+ new_body.append(stmt)
793
+ new_body.reverse()
794
+ block.body = new_body
795
+ return removed
796
+
797
+
798
+ # list of functions
799
+ remove_call_handlers = []
800
+
801
+
802
+ def remove_dead_random_call(rhs, lives, call_list):
803
+ if len(call_list) == 3 and call_list[1:] == ["random", numpy]:
804
+ return call_list[0] not in {"seed", "shuffle"}
805
+ return False
806
+
807
+
808
+ remove_call_handlers.append(remove_dead_random_call)
809
+
810
+
811
+ def has_no_side_effect(rhs, lives, call_table):
812
+ """Returns True if this expression has no side effects that
813
+ would prevent re-ordering.
814
+ """
815
+ from numba.parfors import array_analysis, parfor
816
+ from numba.misc.special import prange
817
+
818
+ if isinstance(rhs, ir.Expr) and rhs.op == "call":
819
+ func_name = rhs.func.name
820
+ if func_name not in call_table or call_table[func_name] == []:
821
+ return False
822
+ call_list = call_table[func_name]
823
+ if (
824
+ call_list == ["empty", numpy]
825
+ or call_list == [slice]
826
+ or call_list == ["stencil", numba]
827
+ or call_list == ["log", numpy]
828
+ or call_list == ["dtype", numpy]
829
+ or call_list == [array_analysis.wrap_index]
830
+ or call_list == [prange]
831
+ or call_list == ["prange", numba]
832
+ or call_list == ["pndindex", numba]
833
+ or call_list == [parfor.internal_prange]
834
+ or call_list == ["ceil", math]
835
+ or call_list == [max]
836
+ or call_list == [int]
837
+ ):
838
+ return True
839
+ elif isinstance(call_list[0], _Intrinsic) and (
840
+ call_list[0]._name == "empty_inferred"
841
+ or call_list[0]._name == "unsafe_empty_inferred"
842
+ ):
843
+ return True
844
+ from numba.core.registry import CPUDispatcher
845
+ from numba.np.linalg import dot_3_mv_check_args
846
+
847
+ if isinstance(call_list[0], CPUDispatcher):
848
+ py_func = call_list[0].py_func
849
+ if py_func == dot_3_mv_check_args:
850
+ return True
851
+ for f in remove_call_handlers:
852
+ if f(rhs, lives, call_list):
853
+ return True
854
+ return False
855
+ if isinstance(rhs, ir.Expr) and rhs.op == "inplace_binop":
856
+ return rhs.lhs.name not in lives
857
+ if isinstance(rhs, ir.Yield):
858
+ return False
859
+ if isinstance(rhs, ir.Expr) and rhs.op == "pair_first":
860
+ # don't remove pair_first since prange looks for it
861
+ return False
862
+ return True
863
+
864
+
865
+ is_pure_extensions = []
866
+
867
+
868
+ def is_pure(rhs, lives, call_table):
869
+ """Returns True if every time this expression is evaluated it
870
+ returns the same result. This is not the case for things
871
+ like calls to numpy.random.
872
+ """
873
+ if isinstance(rhs, ir.Expr):
874
+ if rhs.op == "call":
875
+ func_name = rhs.func.name
876
+ if func_name not in call_table or call_table[func_name] == []:
877
+ return False
878
+ call_list = call_table[func_name]
879
+ if (
880
+ call_list == [slice]
881
+ or call_list == ["log", numpy]
882
+ or call_list == ["empty", numpy]
883
+ or call_list == ["ceil", math]
884
+ or call_list == [max]
885
+ or call_list == [int]
886
+ ):
887
+ return True
888
+ for f in is_pure_extensions:
889
+ if f(rhs, lives, call_list):
890
+ return True
891
+ return False
892
+ elif rhs.op == "getiter" or rhs.op == "iternext":
893
+ return False
894
+ if isinstance(rhs, ir.Yield):
895
+ return False
896
+ return True
897
+
898
+
899
+ def is_const_call(module_name, func_name):
900
+ # Returns True if there is no state in the given module changed by the given function.
901
+ if module_name == "numpy":
902
+ if func_name in ["empty"]:
903
+ return True
904
+ return False
905
+
906
+
907
+ alias_analysis_extensions = {}
908
+ alias_func_extensions = {}
909
+
910
+
911
+ def get_canonical_alias(v, alias_map):
912
+ if v not in alias_map:
913
+ return v
914
+
915
+ v_aliases = sorted(list(alias_map[v]))
916
+ return v_aliases[0]
917
+
918
+
919
+ def find_potential_aliases(
920
+ blocks, args, typemap, func_ir, alias_map=None, arg_aliases=None
921
+ ):
922
+ "find all array aliases and argument aliases to avoid remove as dead"
923
+ if alias_map is None:
924
+ alias_map = {}
925
+ if arg_aliases is None:
926
+ arg_aliases = set(a for a in args if not is_immutable_type(a, typemap))
927
+
928
+ # update definitions since they are not guaranteed to be up-to-date
929
+ # FIXME keep definitions up-to-date to avoid the need for rebuilding
930
+ func_ir._definitions = build_definitions(func_ir.blocks)
931
+ np_alias_funcs = ["ravel", "transpose", "reshape"]
932
+
933
+ for bl in blocks.values():
934
+ for instr in bl.body:
935
+ if type(instr) in alias_analysis_extensions:
936
+ f = alias_analysis_extensions[type(instr)]
937
+ f(instr, args, typemap, func_ir, alias_map, arg_aliases)
938
+ if isinstance(instr, ir.Assign):
939
+ expr = instr.value
940
+ lhs = instr.target.name
941
+ # only mutable types can alias
942
+ if is_immutable_type(lhs, typemap):
943
+ continue
944
+ if isinstance(expr, ir.Var) and lhs != expr.name:
945
+ _add_alias(lhs, expr.name, alias_map, arg_aliases)
946
+ # subarrays like A = B[0] for 2D B
947
+ if isinstance(expr, ir.Expr) and (
948
+ expr.op == "cast"
949
+ or expr.op in ["getitem", "static_getitem"]
950
+ ):
951
+ _add_alias(lhs, expr.value.name, alias_map, arg_aliases)
952
+ if isinstance(expr, ir.Expr) and expr.op == "inplace_binop":
953
+ _add_alias(lhs, expr.lhs.name, alias_map, arg_aliases)
954
+ # array attributes like A.T
955
+ if (
956
+ isinstance(expr, ir.Expr)
957
+ and expr.op == "getattr"
958
+ and expr.attr in ["T", "ctypes", "flat"]
959
+ ):
960
+ _add_alias(lhs, expr.value.name, alias_map, arg_aliases)
961
+ # a = b.c. a should alias b
962
+ if (
963
+ isinstance(expr, ir.Expr)
964
+ and expr.op == "getattr"
965
+ and expr.attr not in ["shape"]
966
+ and expr.value.name in arg_aliases
967
+ ):
968
+ _add_alias(lhs, expr.value.name, alias_map, arg_aliases)
969
+ # calls that can create aliases such as B = A.ravel()
970
+ if isinstance(expr, ir.Expr) and expr.op == "call":
971
+ fdef = guard(find_callname, func_ir, expr, typemap)
972
+ # TODO: sometimes gufunc backend creates duplicate code
973
+ # causing find_callname to fail. Example: test_argmax
974
+ # ignored here since those cases don't create aliases
975
+ # but should be fixed in general
976
+ if fdef is None:
977
+ continue
978
+ fname, fmod = fdef
979
+ if fdef in alias_func_extensions:
980
+ alias_func = alias_func_extensions[fdef]
981
+ alias_func(lhs, expr.args, alias_map, arg_aliases)
982
+ if fmod == "numpy" and fname in np_alias_funcs:
983
+ _add_alias(
984
+ lhs, expr.args[0].name, alias_map, arg_aliases
985
+ )
986
+ if isinstance(fmod, ir.Var) and fname in np_alias_funcs:
987
+ _add_alias(lhs, fmod.name, alias_map, arg_aliases)
988
+
989
+ # copy to avoid changing size during iteration
990
+ old_alias_map = copy.deepcopy(alias_map)
991
+ # combine all aliases transitively
992
+ for v in old_alias_map:
993
+ for w in old_alias_map[v]:
994
+ alias_map[v] |= alias_map[w]
995
+ for w in old_alias_map[v]:
996
+ alias_map[w] = alias_map[v]
997
+
998
+ return alias_map, arg_aliases
999
+
1000
+
1001
+ def _add_alias(lhs, rhs, alias_map, arg_aliases):
1002
+ if rhs in arg_aliases:
1003
+ arg_aliases.add(lhs)
1004
+ else:
1005
+ if rhs not in alias_map:
1006
+ alias_map[rhs] = set()
1007
+ if lhs not in alias_map:
1008
+ alias_map[lhs] = set()
1009
+ alias_map[rhs].add(lhs)
1010
+ alias_map[lhs].add(rhs)
1011
+ return
1012
+
1013
+
1014
+ def is_immutable_type(var, typemap):
1015
+ # Conservatively, assume mutable if type not available
1016
+ if typemap is None or var not in typemap:
1017
+ return False
1018
+ typ = typemap[var]
1019
+ # TODO: add more immutable types
1020
+ if isinstance(
1021
+ typ,
1022
+ (
1023
+ types.Number,
1024
+ types.scalars._NPDatetimeBase,
1025
+ types.iterators.RangeType,
1026
+ ),
1027
+ ):
1028
+ return True
1029
+ if typ == types.string:
1030
+ return True
1031
+ # conservatively, assume mutable
1032
+ return False
1033
+
1034
+
1035
+ def copy_propagate(blocks, typemap):
1036
+ """compute copy propagation information for each block using fixed-point
1037
+ iteration on data flow equations:
1038
+ in_b = intersect(predec(B))
1039
+ out_b = gen_b | (in_b - kill_b)
1040
+ """
1041
+ cfg = compute_cfg_from_blocks(blocks)
1042
+ entry = cfg.entry_point()
1043
+
1044
+ # format: dict of block labels to copies as tuples
1045
+ # label -> (l,r)
1046
+ c_data = init_copy_propagate_data(blocks, entry, typemap)
1047
+ (gen_copies, all_copies, kill_copies, in_copies, out_copies) = c_data
1048
+
1049
+ old_point = None
1050
+ new_point = copy.deepcopy(out_copies)
1051
+ # comparison works since dictionary of built-in types
1052
+ while old_point != new_point:
1053
+ for label in blocks.keys():
1054
+ if label == entry:
1055
+ continue
1056
+ predecs = [i for i, _d in cfg.predecessors(label)]
1057
+ # in_b = intersect(predec(B))
1058
+ in_copies[label] = out_copies[predecs[0]].copy()
1059
+ for p in predecs:
1060
+ in_copies[label] &= out_copies[p]
1061
+
1062
+ # out_b = gen_b | (in_b - kill_b)
1063
+ out_copies[label] = gen_copies[label] | (
1064
+ in_copies[label] - kill_copies[label]
1065
+ )
1066
+ old_point = new_point
1067
+ new_point = copy.deepcopy(out_copies)
1068
+ if config.DEBUG_ARRAY_OPT >= 1:
1069
+ print("copy propagate out_copies:", out_copies)
1070
+ return in_copies, out_copies
1071
+
1072
+
1073
+ def init_copy_propagate_data(blocks, entry, typemap):
1074
+ """get initial condition of copy propagation data flow for each block."""
1075
+ # gen is all definite copies, extra_kill is additional ones that may hit
1076
+ # for example, parfors can have control flow so they may hit extra copies
1077
+ gen_copies, extra_kill = get_block_copies(blocks, typemap)
1078
+ # set of all program copies
1079
+ all_copies = set()
1080
+ for l, s in gen_copies.items():
1081
+ all_copies |= gen_copies[l]
1082
+ kill_copies = {}
1083
+ for label, gen_set in gen_copies.items():
1084
+ kill_copies[label] = set()
1085
+ for lhs, rhs in all_copies:
1086
+ if lhs in extra_kill[label] or rhs in extra_kill[label]:
1087
+ kill_copies[label].add((lhs, rhs))
1088
+ # a copy is killed if it is not in this block and lhs or rhs are
1089
+ # assigned in this block
1090
+ assigned = {lhs for lhs, rhs in gen_set}
1091
+ if (lhs, rhs) not in gen_set and (
1092
+ lhs in assigned or rhs in assigned
1093
+ ):
1094
+ kill_copies[label].add((lhs, rhs))
1095
+ # set initial values
1096
+ # all copies are in for all blocks except entry
1097
+ in_copies = {l: all_copies.copy() for l in blocks.keys()}
1098
+ in_copies[entry] = set()
1099
+ out_copies = {}
1100
+ for label in blocks.keys():
1101
+ # out_b = gen_b | (in_b - kill_b)
1102
+ out_copies[label] = gen_copies[label] | (
1103
+ in_copies[label] - kill_copies[label]
1104
+ )
1105
+ out_copies[entry] = gen_copies[entry]
1106
+ return (gen_copies, all_copies, kill_copies, in_copies, out_copies)
1107
+
1108
+
1109
+ # other packages that define new nodes add calls to get copies in them
1110
+ # format: {type:function}
1111
+ copy_propagate_extensions = {}
1112
+
1113
+
1114
+ def get_block_copies(blocks, typemap):
1115
+ """get copies generated and killed by each block"""
1116
+ block_copies = {}
1117
+ extra_kill = {}
1118
+ for label, block in blocks.items():
1119
+ assign_dict = {}
1120
+ extra_kill[label] = set()
1121
+ # assignments as dict to replace with latest value
1122
+ for stmt in block.body:
1123
+ for T, f in copy_propagate_extensions.items():
1124
+ if isinstance(stmt, T):
1125
+ gen_set, kill_set = f(stmt, typemap)
1126
+ for lhs, rhs in gen_set:
1127
+ assign_dict[lhs] = rhs
1128
+ # if a=b is in dict and b is killed, a is also killed
1129
+ new_assign_dict = {}
1130
+ for l, r in assign_dict.items():
1131
+ if l not in kill_set and r not in kill_set:
1132
+ new_assign_dict[l] = r
1133
+ if r in kill_set:
1134
+ extra_kill[label].add(l)
1135
+ assign_dict = new_assign_dict
1136
+ extra_kill[label] |= kill_set
1137
+ if isinstance(stmt, ir.Assign):
1138
+ lhs = stmt.target.name
1139
+ if isinstance(stmt.value, ir.Var):
1140
+ rhs = stmt.value.name
1141
+ # copy is valid only if same type (see
1142
+ # TestCFunc.test_locals)
1143
+ # Some transformations can produce assignments of the
1144
+ # form A = A. We don't put these mapping in the
1145
+ # copy propagation set because then you get cycles and
1146
+ # infinite loops in the replacement phase.
1147
+ if typemap[lhs] == typemap[rhs] and lhs != rhs:
1148
+ assign_dict[lhs] = rhs
1149
+ continue
1150
+ if (
1151
+ isinstance(stmt.value, ir.Expr)
1152
+ and stmt.value.op == "inplace_binop"
1153
+ ):
1154
+ in1_var = stmt.value.lhs.name
1155
+ in1_typ = typemap[in1_var]
1156
+ # inplace_binop assigns first operand if mutable
1157
+ if not (
1158
+ isinstance(in1_typ, types.Number)
1159
+ or in1_typ == types.string
1160
+ ):
1161
+ extra_kill[label].add(in1_var)
1162
+ # if a=b is in dict and b is killed, a is also killed
1163
+ new_assign_dict = {}
1164
+ for l, r in assign_dict.items():
1165
+ if l != in1_var and r != in1_var:
1166
+ new_assign_dict[l] = r
1167
+ if r == in1_var:
1168
+ extra_kill[label].add(l)
1169
+ assign_dict = new_assign_dict
1170
+ extra_kill[label].add(lhs)
1171
+ block_cps = set(assign_dict.items())
1172
+ block_copies[label] = block_cps
1173
+ return block_copies, extra_kill
1174
+
1175
+
1176
+ # other packages that define new nodes add calls to apply copy propagate in them
1177
+ # format: {type:function}
1178
+ apply_copy_propagate_extensions = {}
1179
+
1180
+
1181
+ def apply_copy_propagate(
1182
+ blocks, in_copies, name_var_table, typemap, calltypes, save_copies=None
1183
+ ):
1184
+ """apply copy propagation to IR: replace variables when copies available"""
1185
+ # save_copies keeps an approximation of the copies that were applied, so
1186
+ # that the variable names of removed user variables can be recovered to some
1187
+ # extent.
1188
+ if save_copies is None:
1189
+ save_copies = []
1190
+
1191
+ for label, block in blocks.items():
1192
+ var_dict = {l: name_var_table[r] for l, r in in_copies[label]}
1193
+ # assignments as dict to replace with latest value
1194
+ for stmt in block.body:
1195
+ if type(stmt) in apply_copy_propagate_extensions:
1196
+ f = apply_copy_propagate_extensions[type(stmt)]
1197
+ f(
1198
+ stmt,
1199
+ var_dict,
1200
+ name_var_table,
1201
+ typemap,
1202
+ calltypes,
1203
+ save_copies,
1204
+ )
1205
+ # only rhs of assignments should be replaced
1206
+ # e.g. if x=y is available, x in x=z shouldn't be replaced
1207
+ elif isinstance(stmt, ir.Assign):
1208
+ stmt.value = replace_vars_inner(stmt.value, var_dict)
1209
+ else:
1210
+ replace_vars_stmt(stmt, var_dict)
1211
+ fix_setitem_type(stmt, typemap, calltypes)
1212
+ for T, f in copy_propagate_extensions.items():
1213
+ if isinstance(stmt, T):
1214
+ gen_set, kill_set = f(stmt, typemap)
1215
+ for lhs, rhs in gen_set:
1216
+ if rhs in name_var_table:
1217
+ var_dict[lhs] = name_var_table[rhs]
1218
+ for l, r in var_dict.copy().items():
1219
+ if l in kill_set or r.name in kill_set:
1220
+ var_dict.pop(l)
1221
+ if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var):
1222
+ lhs = stmt.target.name
1223
+ rhs = stmt.value.name
1224
+ # rhs could be replaced with lhs from previous copies
1225
+ if lhs != rhs:
1226
+ # copy is valid only if same type (see
1227
+ # TestCFunc.test_locals)
1228
+ if typemap[lhs] == typemap[rhs] and rhs in name_var_table:
1229
+ var_dict[lhs] = name_var_table[rhs]
1230
+ else:
1231
+ var_dict.pop(lhs, None)
1232
+ # a=b kills previous t=a
1233
+ lhs_kill = []
1234
+ for k, v in var_dict.items():
1235
+ if v.name == lhs:
1236
+ lhs_kill.append(k)
1237
+ for k in lhs_kill:
1238
+ var_dict.pop(k, None)
1239
+ if isinstance(stmt, ir.Assign) and not isinstance(
1240
+ stmt.value, ir.Var
1241
+ ):
1242
+ lhs = stmt.target.name
1243
+ var_dict.pop(lhs, None)
1244
+ # previous t=a is killed if a is killed
1245
+ lhs_kill = []
1246
+ for k, v in var_dict.items():
1247
+ if v.name == lhs:
1248
+ lhs_kill.append(k)
1249
+ for k in lhs_kill:
1250
+ var_dict.pop(k, None)
1251
+ save_copies.extend(var_dict.items())
1252
+
1253
+ return save_copies
1254
+
1255
+
1256
+ def fix_setitem_type(stmt, typemap, calltypes):
1257
+ """Copy propagation can replace setitem target variable, which can be array
1258
+ with 'A' layout. The replaced variable can be 'C' or 'F', so we update
1259
+ setitem call type reflect this (from matrix power test)
1260
+ """
1261
+ if not isinstance(stmt, (ir.SetItem, ir.StaticSetItem)):
1262
+ return
1263
+ t_typ = typemap[stmt.target.name]
1264
+ s_typ = calltypes[stmt].args[0]
1265
+ # test_optional t_typ can be Optional with array
1266
+ if not isinstance(s_typ, types.npytypes.Array) or not isinstance(
1267
+ t_typ, types.npytypes.Array
1268
+ ):
1269
+ return
1270
+ if s_typ.layout == "A" and t_typ.layout != "A":
1271
+ new_s_typ = s_typ.copy(layout=t_typ.layout)
1272
+ calltypes[stmt].args = (
1273
+ new_s_typ,
1274
+ calltypes[stmt].args[1],
1275
+ calltypes[stmt].args[2],
1276
+ )
1277
+ return
1278
+
1279
+
1280
+ def dprint_func_ir(func_ir, title, blocks=None):
1281
+ """Debug print function IR, with an optional blocks argument
1282
+ that may differ from the IR's original blocks.
1283
+ """
1284
+ if config.DEBUG_ARRAY_OPT >= 1:
1285
+ ir_blocks = func_ir.blocks
1286
+ func_ir.blocks = ir_blocks if blocks is None else blocks
1287
+ name = func_ir.func_id.func_qualname
1288
+ print(("IR %s: %s" % (title, name)).center(80, "-"))
1289
+ func_ir.dump()
1290
+ print("-" * 40)
1291
+ func_ir.blocks = ir_blocks
1292
+
1293
+
1294
+ def find_topo_order(blocks, cfg=None):
1295
+ """find topological order of blocks such that true branches are visited
1296
+ first (e.g. for_break test in test_dataflow). This is written as an iterative
1297
+ implementation of post order traversal to avoid recursion limit issues.
1298
+ """
1299
+ if cfg is None:
1300
+ cfg = compute_cfg_from_blocks(blocks)
1301
+
1302
+ post_order = []
1303
+ # Has the node already added its children?
1304
+ seen = set()
1305
+ # Has the node already been pushed to post order?
1306
+ visited = set()
1307
+ stack = [cfg.entry_point()]
1308
+
1309
+ while len(stack) > 0:
1310
+ node = stack[-1]
1311
+ if node not in visited and node not in seen:
1312
+ # We haven't added a node or its children.
1313
+ seen.add(node)
1314
+ succs = cfg._succs[node]
1315
+ last_inst = blocks[node].body[-1]
1316
+ if isinstance(last_inst, ir.Branch):
1317
+ succs = [last_inst.truebr, last_inst.falsebr]
1318
+ for dest in succs:
1319
+ if (node, dest) not in cfg._back_edges:
1320
+ if dest not in seen:
1321
+ stack.append(dest)
1322
+ else:
1323
+ # This node has already added its children. We either need
1324
+ # to visit the node or it has been added multiple times in
1325
+ # which case we should just skip the node.
1326
+ node = stack.pop()
1327
+ if node not in visited:
1328
+ post_order.append(node)
1329
+ visited.add(node)
1330
+ if node in seen:
1331
+ # Remove the node from seen if it exists to limit the memory
1332
+ # usage to 1 entry per node. Otherwise the memory requirement
1333
+ # can double the recursive version.
1334
+ seen.remove(node)
1335
+
1336
+ post_order.reverse()
1337
+ return post_order
1338
+
1339
+
1340
+ # other packages that define new nodes add calls to get call table
1341
+ # format: {type:function}
1342
+ call_table_extensions = {}
1343
+
1344
+
1345
+ def get_call_table(
1346
+ blocks, call_table=None, reverse_call_table=None, topological_ordering=True
1347
+ ):
1348
+ """returns a dictionary of call variables and their references."""
1349
+ # call_table example: c = np.zeros becomes c:["zeroes", np]
1350
+ # reverse_call_table example: c = np.zeros becomes np_var:c
1351
+ if call_table is None:
1352
+ call_table = {}
1353
+ if reverse_call_table is None:
1354
+ reverse_call_table = {}
1355
+
1356
+ if topological_ordering:
1357
+ order = find_topo_order(blocks)
1358
+ else:
1359
+ order = list(blocks.keys())
1360
+
1361
+ for label in reversed(order):
1362
+ for inst in reversed(blocks[label].body):
1363
+ if isinstance(inst, ir.Assign):
1364
+ lhs = inst.target.name
1365
+ rhs = inst.value
1366
+ if isinstance(rhs, ir.Expr) and rhs.op == "call":
1367
+ call_table[rhs.func.name] = []
1368
+ if isinstance(rhs, ir.Expr) and rhs.op == "getattr":
1369
+ if lhs in call_table:
1370
+ call_table[lhs].append(rhs.attr)
1371
+ reverse_call_table[rhs.value.name] = lhs
1372
+ if lhs in reverse_call_table:
1373
+ call_var = reverse_call_table[lhs]
1374
+ call_table[call_var].append(rhs.attr)
1375
+ reverse_call_table[rhs.value.name] = call_var
1376
+ if isinstance(rhs, ir.Global):
1377
+ if lhs in call_table:
1378
+ call_table[lhs].append(rhs.value)
1379
+ if lhs in reverse_call_table:
1380
+ call_var = reverse_call_table[lhs]
1381
+ call_table[call_var].append(rhs.value)
1382
+ if isinstance(rhs, ir.FreeVar):
1383
+ if lhs in call_table:
1384
+ call_table[lhs].append(rhs.value)
1385
+ if lhs in reverse_call_table:
1386
+ call_var = reverse_call_table[lhs]
1387
+ call_table[call_var].append(rhs.value)
1388
+ if isinstance(rhs, ir.Var):
1389
+ if lhs in call_table:
1390
+ call_table[lhs].append(rhs.name)
1391
+ reverse_call_table[rhs.name] = lhs
1392
+ if lhs in reverse_call_table:
1393
+ call_var = reverse_call_table[lhs]
1394
+ call_table[call_var].append(rhs.name)
1395
+ for T, f in call_table_extensions.items():
1396
+ if isinstance(inst, T):
1397
+ f(inst, call_table, reverse_call_table)
1398
+ return call_table, reverse_call_table
1399
+
1400
+
1401
+ # other packages that define new nodes add calls to get tuple table
1402
+ # format: {type:function}
1403
+ tuple_table_extensions = {}
1404
+
1405
+
1406
+ def get_tuple_table(blocks, tuple_table=None):
1407
+ """returns a dictionary of tuple variables and their values."""
1408
+ if tuple_table is None:
1409
+ tuple_table = {}
1410
+
1411
+ for block in blocks.values():
1412
+ for inst in block.body:
1413
+ if isinstance(inst, ir.Assign):
1414
+ lhs = inst.target.name
1415
+ rhs = inst.value
1416
+ if isinstance(rhs, ir.Expr) and rhs.op == "build_tuple":
1417
+ tuple_table[lhs] = rhs.items
1418
+ if isinstance(rhs, ir.Const) and isinstance(rhs.value, tuple):
1419
+ tuple_table[lhs] = rhs.value
1420
+ for T, f in tuple_table_extensions.items():
1421
+ if isinstance(inst, T):
1422
+ f(inst, tuple_table)
1423
+ return tuple_table
1424
+
1425
+
1426
+ def get_stmt_writes(stmt):
1427
+ writes = set()
1428
+ if isinstance(stmt, (ir.Assign, ir.SetItem, ir.StaticSetItem)):
1429
+ writes.add(stmt.target.name)
1430
+ return writes
1431
+
1432
+
1433
+ def rename_labels(blocks):
1434
+ """rename labels of function body blocks according to topological sort.
1435
+ The set of labels of these blocks will remain unchanged.
1436
+ """
1437
+ topo_order = find_topo_order(blocks)
1438
+
1439
+ # make a block with return last if available (just for readability)
1440
+ return_label = -1
1441
+ for l, b in blocks.items():
1442
+ if isinstance(b.body[-1], ir.Return):
1443
+ return_label = l
1444
+ # some cases like generators can have no return blocks
1445
+ if return_label != -1:
1446
+ topo_order.remove(return_label)
1447
+ topo_order.append(return_label)
1448
+
1449
+ label_map = {}
1450
+ all_labels = sorted(topo_order, reverse=True)
1451
+ for label in topo_order:
1452
+ label_map[label] = all_labels.pop()
1453
+ # update target labels in jumps/branches
1454
+ for b in blocks.values():
1455
+ term = b.terminator
1456
+ # create new IR nodes instead of mutating the existing one as copies of
1457
+ # the IR may also refer to the same nodes!
1458
+ if isinstance(term, ir.Jump):
1459
+ b.body[-1] = ir.Jump(label_map[term.target], term.loc)
1460
+ if isinstance(term, ir.Branch):
1461
+ b.body[-1] = ir.Branch(
1462
+ term.cond,
1463
+ label_map[term.truebr],
1464
+ label_map[term.falsebr],
1465
+ term.loc,
1466
+ )
1467
+
1468
+ # update blocks dictionary keys
1469
+ new_blocks = {}
1470
+ for k, b in blocks.items():
1471
+ new_label = label_map[k]
1472
+ new_blocks[new_label] = b
1473
+
1474
+ return new_blocks
1475
+
1476
+
1477
+ def simplify_CFG(blocks):
1478
+ """transform chains of blocks that have no loop into a single block"""
1479
+ # first, inline single-branch-block to its predecessors
1480
+ cfg = compute_cfg_from_blocks(blocks)
1481
+
1482
+ def find_single_branch(label):
1483
+ block = blocks[label]
1484
+ return len(block.body) == 1 and isinstance(block.body[0], ir.Branch)
1485
+
1486
+ single_branch_blocks = list(filter(find_single_branch, blocks.keys()))
1487
+ marked_for_del = set()
1488
+ for label in single_branch_blocks:
1489
+ inst = blocks[label].body[0]
1490
+ predecessors = cfg.predecessors(label)
1491
+ delete_block = True
1492
+ for p, q in predecessors:
1493
+ block = blocks[p]
1494
+ if isinstance(block.body[-1], ir.Jump):
1495
+ block.body[-1] = copy.copy(inst)
1496
+ else:
1497
+ delete_block = False
1498
+ if delete_block:
1499
+ marked_for_del.add(label)
1500
+ # Delete marked labels
1501
+ for label in marked_for_del:
1502
+ del blocks[label]
1503
+ merge_adjacent_blocks(blocks)
1504
+ return rename_labels(blocks)
1505
+
1506
+
1507
+ arr_math = [
1508
+ "min",
1509
+ "max",
1510
+ "sum",
1511
+ "prod",
1512
+ "mean",
1513
+ "var",
1514
+ "std",
1515
+ "cumsum",
1516
+ "cumprod",
1517
+ "argmax",
1518
+ "argmin",
1519
+ "argsort",
1520
+ "nonzero",
1521
+ "ravel",
1522
+ ]
1523
+
1524
+
1525
+ def canonicalize_array_math(func_ir, typemap, calltypes, typingctx):
1526
+ # save array arg to call
1527
+ # call_varname -> array
1528
+ blocks = func_ir.blocks
1529
+ saved_arr_arg = {}
1530
+ topo_order = find_topo_order(blocks)
1531
+ for label in topo_order:
1532
+ block = blocks[label]
1533
+ new_body = []
1534
+ for stmt in block.body:
1535
+ if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr):
1536
+ lhs = stmt.target.name
1537
+ rhs = stmt.value
1538
+ # replace A.func with np.func, and save A in saved_arr_arg
1539
+ if (
1540
+ rhs.op == "getattr"
1541
+ and rhs.attr in arr_math
1542
+ and isinstance(
1543
+ typemap[rhs.value.name], types.npytypes.Array
1544
+ )
1545
+ ):
1546
+ rhs = stmt.value
1547
+ arr = rhs.value
1548
+ saved_arr_arg[lhs] = arr
1549
+ scope = arr.scope
1550
+ loc = arr.loc
1551
+ # g_np_var = Global(numpy)
1552
+ g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
1553
+ typemap[g_np_var.name] = types.misc.Module(numpy)
1554
+ g_np = ir.Global("np", numpy, loc)
1555
+ g_np_assign = ir.Assign(g_np, g_np_var, loc)
1556
+ rhs.value = g_np_var
1557
+ new_body.append(g_np_assign)
1558
+ func_ir._definitions[g_np_var.name] = [g_np]
1559
+ # update func var type
1560
+ func = getattr(numpy, rhs.attr)
1561
+ func_typ = get_np_ufunc_typ(func)
1562
+ typemap.pop(lhs)
1563
+ typemap[lhs] = func_typ
1564
+ if rhs.op == "call" and rhs.func.name in saved_arr_arg:
1565
+ # add array as first arg
1566
+ arr = saved_arr_arg[rhs.func.name]
1567
+ # update call type signature to include array arg
1568
+ old_sig = calltypes.pop(rhs)
1569
+ # argsort requires kws for typing so sig.args can't be used
1570
+ # reusing sig.args since some types become Const in sig
1571
+ argtyps = old_sig.args[: len(rhs.args)]
1572
+ kwtyps = {name: typemap[v.name] for name, v in rhs.kws}
1573
+ calltypes[rhs] = typemap[rhs.func.name].get_call_type(
1574
+ typingctx, [typemap[arr.name]] + list(argtyps), kwtyps
1575
+ )
1576
+ rhs.args = [arr] + rhs.args
1577
+
1578
+ new_body.append(stmt)
1579
+ block.body = new_body
1580
+ return
1581
+
1582
+
1583
+ # format: {type:function}
1584
+ array_accesses_extensions = {}
1585
+
1586
+
1587
+ def get_array_accesses(blocks, accesses=None):
1588
+ """returns a set of arrays accessed and their indices."""
1589
+ if accesses is None:
1590
+ accesses = set()
1591
+
1592
+ for block in blocks.values():
1593
+ for inst in block.body:
1594
+ if isinstance(inst, ir.SetItem):
1595
+ accesses.add((inst.target.name, inst.index.name))
1596
+ if isinstance(inst, ir.StaticSetItem):
1597
+ accesses.add((inst.target.name, inst.index_var.name))
1598
+ if isinstance(inst, ir.Assign):
1599
+ rhs = inst.value
1600
+ if isinstance(rhs, ir.Expr) and rhs.op == "getitem":
1601
+ accesses.add((rhs.value.name, rhs.index.name))
1602
+ if isinstance(rhs, ir.Expr) and rhs.op == "static_getitem":
1603
+ index = rhs.index
1604
+ # slice is unhashable, so just keep the variable
1605
+ if index is None or is_slice_index(index):
1606
+ index = rhs.index_var.name
1607
+ accesses.add((rhs.value.name, index))
1608
+ for T, f in array_accesses_extensions.items():
1609
+ if isinstance(inst, T):
1610
+ f(inst, accesses)
1611
+ return accesses
1612
+
1613
+
1614
+ def is_slice_index(index):
1615
+ """see if index is a slice index or has slice in it"""
1616
+ if isinstance(index, slice):
1617
+ return True
1618
+ if isinstance(index, tuple):
1619
+ for i in index:
1620
+ if isinstance(i, slice):
1621
+ return True
1622
+ return False
1623
+
1624
+
1625
+ def merge_adjacent_blocks(blocks):
1626
+ cfg = compute_cfg_from_blocks(blocks)
1627
+ # merge adjacent blocks
1628
+ removed = set()
1629
+ for label in list(blocks.keys()):
1630
+ if label in removed:
1631
+ continue
1632
+ block = blocks[label]
1633
+ succs = list(cfg.successors(label))
1634
+ while True:
1635
+ if len(succs) != 1:
1636
+ break
1637
+ next_label = succs[0][0]
1638
+ if next_label in removed:
1639
+ break
1640
+ preds = list(cfg.predecessors(next_label))
1641
+ succs = list(cfg.successors(next_label))
1642
+ if len(preds) != 1 or preds[0][0] != label:
1643
+ break
1644
+ next_block = blocks[next_label]
1645
+ # XXX: commented out since scope objects are not consistent
1646
+ # throughout the compiler. for example, pieces of code are compiled
1647
+ # and inlined on the fly without proper scope merge.
1648
+ # if block.scope != next_block.scope:
1649
+ # break
1650
+ # merge
1651
+ block.body.pop() # remove Jump
1652
+ block.body += next_block.body
1653
+ del blocks[next_label]
1654
+ removed.add(next_label)
1655
+ label = next_label
1656
+
1657
+
1658
+ def restore_copy_var_names(blocks, save_copies, typemap):
1659
+ """
1660
+ restores variable names of user variables after applying copy propagation
1661
+ """
1662
+ if not save_copies:
1663
+ return {}
1664
+
1665
+ rename_dict = {}
1666
+ var_rename_map = {}
1667
+ for a, b in save_copies:
1668
+ # a is string name, b is variable
1669
+ # if a is user variable and b is generated temporary and b is not
1670
+ # already renamed
1671
+ if (
1672
+ not a.startswith("$")
1673
+ and b.name.startswith("$")
1674
+ and b.name not in rename_dict
1675
+ ):
1676
+ new_name = mk_unique_var("${}".format(a))
1677
+ rename_dict[b.name] = new_name
1678
+ var_rename_map[new_name] = a
1679
+ typ = typemap.pop(b.name)
1680
+ typemap[new_name] = typ
1681
+
1682
+ replace_var_names(blocks, rename_dict)
1683
+ return var_rename_map
1684
+
1685
+
1686
+ def simplify(func_ir, typemap, calltypes, metadata):
1687
+ # get copies in to blocks and out from blocks
1688
+ in_cps, _ = copy_propagate(func_ir.blocks, typemap)
1689
+ # table mapping variable names to ir.Var objects to help replacement
1690
+ name_var_table = get_name_var_table(func_ir.blocks)
1691
+ save_copies = apply_copy_propagate(
1692
+ func_ir.blocks, in_cps, name_var_table, typemap, calltypes
1693
+ )
1694
+ var_rename_map = restore_copy_var_names(
1695
+ func_ir.blocks, save_copies, typemap
1696
+ )
1697
+ if "var_rename_map" not in metadata:
1698
+ metadata["var_rename_map"] = {}
1699
+ metadata["var_rename_map"].update(var_rename_map)
1700
+ # remove dead code to enable fusion
1701
+ if config.DEBUG_ARRAY_OPT >= 1:
1702
+ dprint_func_ir(func_ir, "after copy prop")
1703
+ remove_dead(func_ir.blocks, func_ir.arg_names, func_ir, typemap)
1704
+ func_ir.blocks = simplify_CFG(func_ir.blocks)
1705
+ if config.DEBUG_ARRAY_OPT >= 1:
1706
+ dprint_func_ir(func_ir, "after simplify")
1707
+
1708
+
1709
+ class GuardException(Exception):
1710
+ pass
1711
+
1712
+
1713
+ def require(cond):
1714
+ """
1715
+ Raise GuardException if the given condition is False.
1716
+ """
1717
+ if not cond:
1718
+ raise GuardException
1719
+
1720
+
1721
+ def guard(func, *args, **kwargs):
1722
+ """
1723
+ Run a function with given set of arguments, and guard against
1724
+ any GuardException raised by the function by returning None,
1725
+ or the expected return results if no such exception was raised.
1726
+ """
1727
+ try:
1728
+ return func(*args, **kwargs)
1729
+ except GuardException:
1730
+ return None
1731
+
1732
+
1733
+ def get_definition(func_ir, name, **kwargs):
1734
+ """
1735
+ Same as func_ir.get_definition(name), but raise GuardException if
1736
+ exception KeyError is caught.
1737
+ """
1738
+ try:
1739
+ return func_ir.get_definition(name, **kwargs)
1740
+ except KeyError:
1741
+ raise GuardException
1742
+
1743
+
1744
+ def build_definitions(blocks, definitions=None):
1745
+ """Build the definitions table of the given blocks by scanning
1746
+ through all blocks and instructions, useful when the definitions
1747
+ table is out-of-sync.
1748
+ Will return a new definition table if one is not passed.
1749
+ """
1750
+ if definitions is None:
1751
+ definitions = collections.defaultdict(list)
1752
+
1753
+ for block in blocks.values():
1754
+ for inst in block.body:
1755
+ if isinstance(inst, ir.Assign):
1756
+ name = inst.target.name
1757
+ definition = definitions.get(name, [])
1758
+ if definition == []:
1759
+ definitions[name] = definition
1760
+ definition.append(inst.value)
1761
+ if type(inst) in build_defs_extensions:
1762
+ f = build_defs_extensions[type(inst)]
1763
+ f(inst, definitions)
1764
+
1765
+ return definitions
1766
+
1767
+
1768
+ build_defs_extensions = {}
1769
+
1770
+
1771
+ def find_callname(
1772
+ func_ir, expr, typemap=None, definition_finder=get_definition
1773
+ ):
1774
+ """Try to find a call expression's function and module names and return
1775
+ them as strings for unbounded calls. If the call is a bounded call, return
1776
+ the self object instead of module name. Raise GuardException if failed.
1777
+
1778
+ Providing typemap can make the call matching more accurate in corner cases
1779
+ such as bounded call on an object which is inside another object.
1780
+ """
1781
+ require(isinstance(expr, ir.Expr) and expr.op == "call")
1782
+ callee = expr.func
1783
+ callee_def = definition_finder(func_ir, callee)
1784
+ attrs = []
1785
+ obj = None
1786
+ while True:
1787
+ if isinstance(callee_def, (ir.Global, ir.FreeVar)):
1788
+ # require(callee_def.value == numpy)
1789
+ # these checks support modules like numpy, numpy.random as well as
1790
+ # calls like len() and intrinsics like assertEquiv
1791
+ keys = ["name", "_name", "__name__"]
1792
+ value = None
1793
+ for key in keys:
1794
+ if hasattr(callee_def.value, key):
1795
+ value = getattr(callee_def.value, key)
1796
+ break
1797
+ if not value or not isinstance(value, str):
1798
+ raise GuardException
1799
+ attrs.append(value)
1800
+ def_val = callee_def.value
1801
+ # get the underlying definition of Intrinsic object to be able to
1802
+ # find the module effectively.
1803
+ # Otherwise, it will return numba.extending
1804
+ if isinstance(def_val, _Intrinsic):
1805
+ def_val = def_val._defn
1806
+ if hasattr(def_val, "__module__"):
1807
+ mod_name = def_val.__module__
1808
+ # The reason for first checking if the function is in NumPy's
1809
+ # top level name space by module is that some functions are
1810
+ # deprecated in NumPy but the functions' names are aliased with
1811
+ # other common names. This prevents deprecation warnings on
1812
+ # e.g. getattr(numpy, 'bool') were a bool the target.
1813
+ # For context see #6175, impacts NumPy>=1.20.
1814
+ mod_not_none = mod_name is not None
1815
+ numpy_toplevel = mod_not_none and (
1816
+ mod_name == "numpy" or mod_name.startswith("numpy.")
1817
+ )
1818
+ # it might be a numpy function imported directly
1819
+ if (
1820
+ numpy_toplevel
1821
+ and hasattr(numpy, value)
1822
+ and def_val == getattr(numpy, value)
1823
+ ):
1824
+ attrs += ["numpy"]
1825
+ # it might be a np.random function imported directly
1826
+ elif hasattr(numpy.random, value) and def_val == getattr(
1827
+ numpy.random, value
1828
+ ):
1829
+ attrs += ["random", "numpy"]
1830
+ elif mod_not_none:
1831
+ attrs.append(mod_name)
1832
+ else:
1833
+ class_name = def_val.__class__.__name__
1834
+ if class_name == "builtin_function_or_method":
1835
+ class_name = "builtin"
1836
+ if class_name != "module":
1837
+ attrs.append(class_name)
1838
+ break
1839
+ elif isinstance(callee_def, ir.Expr) and callee_def.op == "getattr":
1840
+ obj = callee_def.value
1841
+ attrs.append(callee_def.attr)
1842
+ if typemap and obj.name in typemap:
1843
+ typ = typemap[obj.name]
1844
+ if not isinstance(typ, types.Module):
1845
+ return attrs[0], obj
1846
+ callee_def = definition_finder(func_ir, obj)
1847
+ else:
1848
+ # obj.func calls where obj is not np array
1849
+ if obj is not None:
1850
+ return ".".join(reversed(attrs)), obj
1851
+ raise GuardException
1852
+ return attrs[0], ".".join(reversed(attrs[1:]))
1853
+
1854
+
1855
+ def find_build_sequence(func_ir, var):
1856
+ """Check if a variable is constructed via build_tuple or
1857
+ build_list or build_set, and return the sequence and the
1858
+ operator, or raise GuardException otherwise.
1859
+ Note: only build_tuple is immutable, so use with care.
1860
+ """
1861
+ require(isinstance(var, ir.Var))
1862
+ var_def = get_definition(func_ir, var)
1863
+ require(isinstance(var_def, ir.Expr))
1864
+ build_ops = ["build_tuple", "build_list", "build_set"]
1865
+ require(var_def.op in build_ops)
1866
+ return var_def.items, var_def.op
1867
+
1868
+
1869
+ def find_const(func_ir, var):
1870
+ """Check if a variable is defined as constant, and return
1871
+ the constant value, or raise GuardException otherwise.
1872
+ """
1873
+ require(isinstance(var, ir.Var))
1874
+ var_def = get_definition(func_ir, var)
1875
+ require(isinstance(var_def, (ir.Const, ir.Global, ir.FreeVar)))
1876
+ return var_def.value
1877
+
1878
+
1879
+ def compile_to_numba_ir(
1880
+ mk_func,
1881
+ glbls,
1882
+ typingctx=None,
1883
+ targetctx=None,
1884
+ arg_typs=None,
1885
+ typemap=None,
1886
+ calltypes=None,
1887
+ ):
1888
+ """
1889
+ Compile a function or a make_function node to Numba IR.
1890
+
1891
+ Rename variables and
1892
+ labels to avoid conflict if inlined somewhere else. Perform type inference
1893
+ if typingctx and other typing inputs are available and update typemap and
1894
+ calltypes.
1895
+ """
1896
+ from numba.core import typed_passes
1897
+
1898
+ # mk_func can be actual function or make_function node, or a njit function
1899
+ if hasattr(mk_func, "code"):
1900
+ code = mk_func.code
1901
+ elif hasattr(mk_func, "__code__"):
1902
+ code = mk_func.__code__
1903
+ else:
1904
+ raise NotImplementedError(
1905
+ "function type not recognized {}".format(mk_func)
1906
+ )
1907
+ f_ir = get_ir_of_code(glbls, code)
1908
+ remove_dels(f_ir.blocks)
1909
+
1910
+ # relabel by adding an offset
1911
+ f_ir.blocks = add_offset_to_labels(f_ir.blocks, _the_max_label.next())
1912
+ max_label = max(f_ir.blocks.keys())
1913
+ _the_max_label.update(max_label)
1914
+
1915
+ # rename all variables to avoid conflict
1916
+ var_table = get_name_var_table(f_ir.blocks)
1917
+ new_var_dict = {}
1918
+ for name, var in var_table.items():
1919
+ new_var_dict[name] = mk_unique_var(name)
1920
+ replace_var_names(f_ir.blocks, new_var_dict)
1921
+
1922
+ # perform type inference if typingctx is available and update type
1923
+ # data structures typemap and calltypes
1924
+ if typingctx:
1925
+ f_typemap, f_return_type, f_calltypes, _ = (
1926
+ typed_passes.type_inference_stage(
1927
+ typingctx, targetctx, f_ir, arg_typs, None
1928
+ )
1929
+ )
1930
+ # remove argument entries like arg.a from typemap
1931
+ arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
1932
+ for a in arg_names:
1933
+ f_typemap.pop(a)
1934
+ typemap.update(f_typemap)
1935
+ calltypes.update(f_calltypes)
1936
+ return f_ir
1937
+
1938
+
1939
+ def _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, glbls):
1940
+ """
1941
+ Creates a function from a code object. Args:
1942
+ * fcode - the code object
1943
+ * func_env - string for the freevar placeholders
1944
+ * func_arg - string for the function args (e.g. "a, b, c, d=None")
1945
+ * func_clo - string for the closure args
1946
+ * glbls - the function globals
1947
+ """
1948
+ sanitized_co_name = fcode.co_name.replace("<", "_").replace(">", "_")
1949
+ func_text = (
1950
+ f"def closure():\n{func_env}\n"
1951
+ f"\tdef {sanitized_co_name}({func_arg}):\n"
1952
+ f"\t\treturn ({func_clo})\n"
1953
+ f"\treturn {sanitized_co_name}"
1954
+ )
1955
+ loc = {}
1956
+ exec(func_text, glbls, loc)
1957
+
1958
+ f = loc["closure"]()
1959
+ # replace the code body
1960
+ f.__code__ = fcode
1961
+ f.__name__ = fcode.co_name
1962
+ return f
1963
+
1964
+
1965
+ def get_ir_of_code(glbls, fcode):
1966
+ """
1967
+ Compile a code object to get its IR, ir.Del nodes are emitted
1968
+ """
1969
+ nfree = len(fcode.co_freevars)
1970
+ func_env = "\n".join(["\tc_%d = None" % i for i in range(nfree)])
1971
+ func_clo = ",".join(["c_%d" % i for i in range(nfree)])
1972
+ func_arg = ",".join(["x_%d" % i for i in range(fcode.co_argcount)])
1973
+
1974
+ f = _create_function_from_code_obj(
1975
+ fcode, func_env, func_arg, func_clo, glbls
1976
+ )
1977
+
1978
+ from numba.core import compiler
1979
+
1980
+ ir = compiler.run_frontend(f)
1981
+
1982
+ # we need to run the before inference rewrite pass to normalize the IR
1983
+ # XXX: check rewrite pass flag?
1984
+ # for example, Raise nodes need to become StaticRaise before type inference
1985
+ class DummyPipeline(object):
1986
+ def __init__(self, f_ir):
1987
+ self.state = compiler.StateDict()
1988
+ self.state.typingctx = None
1989
+ self.state.targetctx = None
1990
+ self.state.args = None
1991
+ self.state.func_ir = f_ir
1992
+ self.state.typemap = None
1993
+ self.state.return_type = None
1994
+ self.state.calltypes = None
1995
+
1996
+ state = DummyPipeline(ir).state
1997
+ rewrites.rewrite_registry.apply("before-inference", state)
1998
+ # call inline pass to handle cases like stencils and comprehensions
1999
+ swapped = {} # TODO: get this from diagnostics store
2000
+ import numba.core.inline_closurecall
2001
+
2002
+ inline_pass = numba.core.inline_closurecall.InlineClosureCallPass(
2003
+ ir, numba.core.cpu.ParallelOptions(False), swapped
2004
+ )
2005
+ inline_pass.run()
2006
+
2007
+ # TODO: DO NOT ADD MORE THINGS HERE!
2008
+ # If adding more things here is being contemplated, it really is time to
2009
+ # retire this function and work on getting the InlineWorker class from
2010
+ # numba.core.inline_closurecall into sufficient shape as a replacement.
2011
+ # The issue with `get_ir_of_code` is that it doesn't run a full compilation
2012
+ # pipeline and as a result various additional things keep needing to be
2013
+ # added to create valid IR.
2014
+
2015
+ # rebuild IR in SSA form
2016
+ from numba.core.untyped_passes import ReconstructSSA
2017
+ from numba.core.typed_passes import PreLowerStripPhis
2018
+
2019
+ reconstruct_ssa = ReconstructSSA()
2020
+ phistrip = PreLowerStripPhis()
2021
+ reconstruct_ssa.run_pass(state)
2022
+ phistrip.run_pass(state)
2023
+
2024
+ post_proc = postproc.PostProcessor(ir)
2025
+ post_proc.run(True)
2026
+ return ir
2027
+
2028
+
2029
+ def replace_arg_nodes(block, args):
2030
+ """
2031
+ Replace ir.Arg(...) with variables
2032
+ """
2033
+ for stmt in block.body:
2034
+ if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg):
2035
+ idx = stmt.value.index
2036
+ assert idx < len(args)
2037
+ stmt.value = args[idx]
2038
+ return
2039
+
2040
+
2041
+ def replace_returns(blocks, target, return_label):
2042
+ """
2043
+ Return return statement by assigning directly to target, and a jump.
2044
+ """
2045
+ for block in blocks.values():
2046
+ # some blocks may be empty during transformations
2047
+ if not block.body:
2048
+ continue
2049
+ stmt = block.terminator
2050
+ if isinstance(stmt, ir.Return):
2051
+ block.body.pop() # remove return
2052
+ cast_stmt = block.body.pop()
2053
+ assert (
2054
+ isinstance(cast_stmt, ir.Assign)
2055
+ and isinstance(cast_stmt.value, ir.Expr)
2056
+ and cast_stmt.value.op == "cast"
2057
+ ), "invalid return cast"
2058
+ block.body.append(
2059
+ ir.Assign(cast_stmt.value.value, target, stmt.loc)
2060
+ )
2061
+ block.body.append(ir.Jump(return_label, stmt.loc))
2062
+
2063
+
2064
+ def gen_np_call(func_as_str, func, lhs, args, typingctx, typemap, calltypes):
2065
+ scope = args[0].scope
2066
+ loc = args[0].loc
2067
+
2068
+ # g_np_var = Global(numpy)
2069
+ g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
2070
+ typemap[g_np_var.name] = types.misc.Module(numpy)
2071
+ g_np = ir.Global("np", numpy, loc)
2072
+ g_np_assign = ir.Assign(g_np, g_np_var, loc)
2073
+ # attr call: <something>_attr = getattr(g_np_var, func_as_str)
2074
+ np_attr_call = ir.Expr.getattr(g_np_var, func_as_str, loc)
2075
+ attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
2076
+ func_var_typ = get_np_ufunc_typ(func)
2077
+ typemap[attr_var.name] = func_var_typ
2078
+ attr_assign = ir.Assign(np_attr_call, attr_var, loc)
2079
+ # np call: lhs = np_attr(*args)
2080
+ np_call = ir.Expr.call(attr_var, args, (), loc)
2081
+ arg_types = [typemap[x.name] for x in args]
2082
+ func_typ = func_var_typ.get_call_type(typingctx, arg_types, {})
2083
+ calltypes[np_call] = func_typ
2084
+ np_assign = ir.Assign(np_call, lhs, loc)
2085
+ return [g_np_assign, attr_assign, np_assign]
2086
+
2087
+
2088
+ def dump_block(label, block):
2089
+ print(label, ":")
2090
+ for stmt in block.body:
2091
+ print(" ", stmt)
2092
+
2093
+
2094
+ def dump_blocks(blocks):
2095
+ for label, block in blocks.items():
2096
+ dump_block(label, block)
2097
+
2098
+
2099
+ def is_operator_or_getitem(expr):
2100
+ """true if expr is unary or binary operator or getitem"""
2101
+ return (
2102
+ isinstance(expr, ir.Expr)
2103
+ and getattr(expr, "op", False)
2104
+ and expr.op
2105
+ in ["unary", "binop", "inplace_binop", "getitem", "static_getitem"]
2106
+ )
2107
+
2108
+
2109
+ def is_get_setitem(stmt):
2110
+ """stmt is getitem assignment or setitem (and static cases)"""
2111
+ return is_getitem(stmt) or is_setitem(stmt)
2112
+
2113
+
2114
+ def is_getitem(stmt):
2115
+ """true if stmt is a getitem or static_getitem assignment"""
2116
+ return (
2117
+ isinstance(stmt, ir.Assign)
2118
+ and isinstance(stmt.value, ir.Expr)
2119
+ and stmt.value.op in ["getitem", "static_getitem"]
2120
+ )
2121
+
2122
+
2123
+ def is_setitem(stmt):
2124
+ """true if stmt is a SetItem or StaticSetItem node"""
2125
+ return isinstance(stmt, (ir.SetItem, ir.StaticSetItem))
2126
+
2127
+
2128
+ def index_var_of_get_setitem(stmt):
2129
+ """get index variable for getitem/setitem nodes (and static cases)"""
2130
+ if is_getitem(stmt):
2131
+ if stmt.value.op == "getitem":
2132
+ return stmt.value.index
2133
+ else:
2134
+ return stmt.value.index_var
2135
+
2136
+ if is_setitem(stmt):
2137
+ if isinstance(stmt, ir.SetItem):
2138
+ return stmt.index
2139
+ else:
2140
+ return stmt.index_var
2141
+
2142
+ return None
2143
+
2144
+
2145
+ def set_index_var_of_get_setitem(stmt, new_index):
2146
+ if is_getitem(stmt):
2147
+ if stmt.value.op == "getitem":
2148
+ stmt.value.index = new_index
2149
+ else:
2150
+ stmt.value.index_var = new_index
2151
+ elif is_setitem(stmt):
2152
+ if isinstance(stmt, ir.SetItem):
2153
+ stmt.index = new_index
2154
+ else:
2155
+ stmt.index_var = new_index
2156
+ else:
2157
+ raise ValueError(
2158
+ "getitem or setitem node expected but received {}".format(stmt)
2159
+ )
2160
+
2161
+
2162
+ def is_namedtuple_class(c):
2163
+ """check if c is a namedtuple class"""
2164
+ if not isinstance(c, type):
2165
+ return False
2166
+ # should have only tuple as superclass
2167
+ bases = c.__bases__
2168
+ if len(bases) != 1 or bases[0] is not tuple:
2169
+ return False
2170
+ # should have _make method
2171
+ if not hasattr(c, "_make"):
2172
+ return False
2173
+ # should have _fields that is all string
2174
+ fields = getattr(c, "_fields", None)
2175
+ if not isinstance(fields, tuple):
2176
+ return False
2177
+ return all(isinstance(f, str) for f in fields)
2178
+
2179
+
2180
+ def fill_block_with_call(newblock, callee, label_next, inputs, outputs):
2181
+ """Fill *newblock* to call *callee* with arguments listed in *inputs*.
2182
+ The returned values are unwrapped into variables in *outputs*.
2183
+ The block would then jump to *label_next*.
2184
+ """
2185
+ scope = newblock.scope
2186
+ loc = newblock.loc
2187
+
2188
+ fn = ir.Const(value=callee, loc=loc)
2189
+ fnvar = scope.make_temp(loc=loc)
2190
+ newblock.append(ir.Assign(target=fnvar, value=fn, loc=loc))
2191
+ # call
2192
+ args = [scope.get_exact(name) for name in inputs]
2193
+ callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc)
2194
+ callres = scope.make_temp(loc=loc)
2195
+ newblock.append(ir.Assign(target=callres, value=callexpr, loc=loc))
2196
+ # unpack return value
2197
+ for i, out in enumerate(outputs):
2198
+ target = scope.get_exact(out)
2199
+ getitem = ir.Expr.static_getitem(
2200
+ value=callres, index=i, index_var=None, loc=loc
2201
+ )
2202
+ newblock.append(ir.Assign(target=target, value=getitem, loc=loc))
2203
+ # jump to next block
2204
+ newblock.append(ir.Jump(target=label_next, loc=loc))
2205
+ return newblock
2206
+
2207
+
2208
+ def fill_callee_prologue(block, inputs, label_next):
2209
+ """
2210
+ Fill a new block *block* that unwraps arguments using names in *inputs* and
2211
+ then jumps to *label_next*.
2212
+
2213
+ Expected to use with *fill_block_with_call()*
2214
+ """
2215
+ scope = block.scope
2216
+ loc = block.loc
2217
+ # load args
2218
+ args = [ir.Arg(name=k, index=i, loc=loc) for i, k in enumerate(inputs)]
2219
+ for aname, aval in zip(inputs, args):
2220
+ tmp = ir.Var(scope=scope, name=aname, loc=loc)
2221
+ block.append(ir.Assign(target=tmp, value=aval, loc=loc))
2222
+ # jump to loop entry
2223
+ block.append(ir.Jump(target=label_next, loc=loc))
2224
+ return block
2225
+
2226
+
2227
+ def fill_callee_epilogue(block, outputs):
2228
+ """
2229
+ Fill a new block *block* to prepare the return values.
2230
+ This block is the last block of the function.
2231
+
2232
+ Expected to use with *fill_block_with_call()*
2233
+ """
2234
+ scope = block.scope
2235
+ loc = block.loc
2236
+ # prepare tuples to return
2237
+ vals = [scope.get_exact(name=name) for name in outputs]
2238
+ tupexpr = ir.Expr.build_tuple(items=vals, loc=loc)
2239
+ tup = scope.make_temp(loc=loc)
2240
+ block.append(ir.Assign(target=tup, value=tupexpr, loc=loc))
2241
+ # return
2242
+ block.append(ir.Return(value=tup, loc=loc))
2243
+ return block
2244
+
2245
+
2246
+ def find_outer_value(func_ir, var):
2247
+ """Check if a variable is a global value, and return the value,
2248
+ or raise GuardException otherwise.
2249
+ """
2250
+ dfn = get_definition(func_ir, var)
2251
+ if isinstance(dfn, (ir.Global, ir.FreeVar)):
2252
+ return dfn.value
2253
+
2254
+ if isinstance(dfn, ir.Expr) and dfn.op == "getattr":
2255
+ prev_val = find_outer_value(func_ir, dfn.value)
2256
+ try:
2257
+ val = getattr(prev_val, dfn.attr)
2258
+ return val
2259
+ except AttributeError:
2260
+ raise GuardException
2261
+
2262
+ raise GuardException
2263
+
2264
+
2265
+ def raise_on_unsupported_feature(func_ir, typemap):
2266
+ """
2267
+ Helper function to walk IR and raise if it finds op codes
2268
+ that are unsupported. Could be extended to cover IR sequences
2269
+ as well as op codes. Intended use is to call it as a pipeline
2270
+ stage just prior to lowering to prevent LoweringErrors for known
2271
+ unsupported features.
2272
+ """
2273
+ gdb_calls = [] # accumulate calls to gdb/gdb_init
2274
+
2275
+ # issue 2195: check for excessively large tuples
2276
+ for arg_name in func_ir.arg_names:
2277
+ if (
2278
+ arg_name in typemap
2279
+ and isinstance(typemap[arg_name], types.containers.UniTuple)
2280
+ and typemap[arg_name].count > 1000
2281
+ ):
2282
+ # Raise an exception when len(tuple) > 1000. The choice of this number (1000)
2283
+ # was entirely arbitrary
2284
+ msg = (
2285
+ "Tuple '{}' length must be smaller than 1000.\n"
2286
+ "Large tuples lead to the generation of a prohibitively large "
2287
+ "LLVM IR which causes excessive memory pressure "
2288
+ "and large compile times.\n"
2289
+ "As an alternative, the use of a 'list' is recommended in "
2290
+ "place of a 'tuple' as lists do not suffer from this problem.".format(
2291
+ arg_name
2292
+ )
2293
+ )
2294
+ raise UnsupportedError(msg, func_ir.loc)
2295
+
2296
+ for blk in func_ir.blocks.values():
2297
+ for stmt in blk.find_insts(ir.Assign):
2298
+ # This raises on finding `make_function`
2299
+ if isinstance(stmt.value, ir.Expr):
2300
+ if stmt.value.op == "make_function":
2301
+ val = stmt.value
2302
+
2303
+ # See if the construct name can be refined
2304
+ code = getattr(val, "code", None)
2305
+ if code is not None:
2306
+ # check if this is a closure, the co_name will
2307
+ # be the captured function name which is not
2308
+ # useful so be explicit
2309
+ if getattr(val, "closure", None) is not None:
2310
+ use = "<creating a function from a closure>"
2311
+ expr = ""
2312
+ else:
2313
+ use = code.co_name
2314
+ expr = "(%s) " % use
2315
+ else:
2316
+ use = "<could not ascertain use case>"
2317
+ expr = ""
2318
+
2319
+ msg = (
2320
+ "Numba encountered the use of a language "
2321
+ "feature it does not support in this context: "
2322
+ "%s (op code: make_function not supported). If "
2323
+ "the feature is explicitly supported it is "
2324
+ "likely that the result of the expression %s"
2325
+ "is being used in an unsupported manner."
2326
+ ) % (use, expr)
2327
+ raise UnsupportedError(msg, stmt.value.loc)
2328
+
2329
+ # this checks for gdb initialization calls, only one is permitted
2330
+ if isinstance(stmt.value, (ir.Global, ir.FreeVar)):
2331
+ val = stmt.value
2332
+ val = getattr(val, "value", None)
2333
+ if val is None:
2334
+ continue
2335
+
2336
+ # check global function
2337
+ found = False
2338
+ if isinstance(val, pytypes.FunctionType):
2339
+ found = val in {numba.gdb, numba.gdb_init}
2340
+ if not found: # freevar bind to intrinsic
2341
+ found = getattr(val, "_name", "") == "gdb_internal"
2342
+ if found:
2343
+ gdb_calls.append(stmt.loc) # report last seen location
2344
+
2345
+ # this checks that np.<type> was called if view is called
2346
+ if isinstance(stmt.value, ir.Expr):
2347
+ if stmt.value.op == "getattr" and stmt.value.attr == "view":
2348
+ var = stmt.value.value.name
2349
+ if isinstance(typemap[var], types.Array):
2350
+ continue
2351
+ df = func_ir.get_definition(var)
2352
+ cn = guard(find_callname, func_ir, df)
2353
+ if cn and cn[1] == "numpy":
2354
+ ty = getattr(numpy, cn[0])
2355
+ if numpy.issubdtype(
2356
+ ty, numpy.integer
2357
+ ) or numpy.issubdtype(ty, numpy.floating):
2358
+ continue
2359
+
2360
+ vardescr = (
2361
+ "" if var.startswith("$") else "'{}' ".format(var)
2362
+ )
2363
+ raise TypingError(
2364
+ "'view' can only be called on NumPy dtypes, "
2365
+ "try wrapping the variable {}with 'np.<dtype>()'".format(
2366
+ vardescr
2367
+ ),
2368
+ loc=stmt.loc,
2369
+ )
2370
+
2371
+ # checks for globals that are also reflected
2372
+ if isinstance(stmt.value, ir.Global):
2373
+ ty = typemap[stmt.target.name]
2374
+ msg = (
2375
+ "The use of a %s type, assigned to variable '%s' in "
2376
+ "globals, is not supported as globals are considered "
2377
+ "compile-time constants and there is no known way to "
2378
+ "compile a %s type as a constant."
2379
+ )
2380
+ if getattr(ty, "reflected", False) or isinstance(
2381
+ ty, (types.DictType, types.ListType)
2382
+ ):
2383
+ raise TypingError(
2384
+ msg % (ty, stmt.value.name, ty), loc=stmt.loc
2385
+ )
2386
+
2387
+ # checks for generator expressions (yield in use when func_ir has
2388
+ # not been identified as a generator).
2389
+ if isinstance(stmt.value, ir.Yield) and not func_ir.is_generator:
2390
+ msg = "The use of generator expressions is unsupported."
2391
+ raise UnsupportedError(msg, loc=stmt.loc)
2392
+
2393
+ # There is more than one call to function gdb/gdb_init
2394
+ if len(gdb_calls) > 1:
2395
+ msg = (
2396
+ "Calling either numba.gdb() or numba.gdb_init() more than once "
2397
+ "in a function is unsupported (strange things happen!), use "
2398
+ "numba.gdb_breakpoint() to create additional breakpoints "
2399
+ "instead.\n\nRelevant documentation is available here:\n"
2400
+ "https://numba.readthedocs.io/en/stable/user/troubleshoot.html"
2401
+ "#using-numba-s-direct-gdb-bindings-in-nopython-mode\n\n"
2402
+ "Conflicting calls found at:\n %s"
2403
+ )
2404
+ buf = "\n".join([x.strformat() for x in gdb_calls])
2405
+ raise UnsupportedError(msg % buf)
2406
+
2407
+
2408
+ def warn_deprecated(func_ir, typemap):
2409
+ # first pass, just walk the type map
2410
+ for name, ty in typemap.items():
2411
+ # the Type Metaclass has a reflected member
2412
+ if ty.reflected:
2413
+ # if its an arg, report function call
2414
+ if name.startswith("arg."):
2415
+ loc = func_ir.loc
2416
+ arg = name.split(".")[1]
2417
+ fname = func_ir.func_id.func_qualname
2418
+ tyname = "list" if isinstance(ty, types.List) else "set"
2419
+ url = (
2420
+ "https://numba.readthedocs.io/en/stable/reference/"
2421
+ "deprecation.html#deprecation-of-reflection-for-list-and"
2422
+ "-set-types"
2423
+ )
2424
+ msg = (
2425
+ "\nEncountered the use of a type that is scheduled for "
2426
+ "deprecation: type 'reflected %s' found for argument "
2427
+ "'%s' of function '%s'.\n\nFor more information visit "
2428
+ "%s" % (tyname, arg, fname, url)
2429
+ )
2430
+ warnings.warn(NumbaPendingDeprecationWarning(msg, loc=loc))
2431
+
2432
+
2433
+ def resolve_func_from_module(func_ir, node):
2434
+ """
2435
+ This returns the python function that is being getattr'd from a module in
2436
+ some IR, it resolves import chains/submodules recursively. Should it not be
2437
+ possible to find the python function being called None will be returned.
2438
+
2439
+ func_ir - the FunctionIR object
2440
+ node - the IR node from which to start resolving (should be a `getattr`).
2441
+ """
2442
+ getattr_chain = []
2443
+
2444
+ def resolve_mod(mod):
2445
+ if getattr(mod, "op", False) == "getattr":
2446
+ getattr_chain.insert(0, mod.attr)
2447
+ try:
2448
+ mod = func_ir.get_definition(mod.value)
2449
+ except KeyError: # multiple definitions
2450
+ return None
2451
+ return resolve_mod(mod)
2452
+ elif isinstance(mod, (ir.Global, ir.FreeVar)):
2453
+ if isinstance(mod.value, pytypes.ModuleType):
2454
+ return mod
2455
+ return None
2456
+
2457
+ mod = resolve_mod(node)
2458
+ if mod is not None:
2459
+ defn = mod.value
2460
+ for x in getattr_chain:
2461
+ defn = getattr(defn, x, False)
2462
+ if not defn:
2463
+ break
2464
+ else:
2465
+ return defn
2466
+ else:
2467
+ return None
2468
+
2469
+
2470
+ def enforce_no_dels(func_ir):
2471
+ """
2472
+ Enforce there being no ir.Del nodes in the IR.
2473
+ """
2474
+ for blk in func_ir.blocks.values():
2475
+ dels = [x for x in blk.find_insts(ir.Del)]
2476
+ if dels:
2477
+ msg = "Illegal IR, del found at: %s" % dels[0]
2478
+ raise CompilerError(msg, loc=dels[0].loc)
2479
+
2480
+
2481
+ def enforce_no_phis(func_ir):
2482
+ """
2483
+ Enforce there being no ir.Expr.phi nodes in the IR.
2484
+ """
2485
+ for blk in func_ir.blocks.values():
2486
+ phis = [x for x in blk.find_exprs(op="phi")]
2487
+ if phis:
2488
+ msg = "Illegal IR, phi found at: %s" % phis[0]
2489
+ raise CompilerError(msg, loc=phis[0].loc)
2490
+
2491
+
2492
+ def legalize_single_scope(blocks):
2493
+ """Check the given mapping of ir.Block for containing a single scope."""
2494
+ return len({blk.scope for blk in blocks.values()}) == 1
2495
+
2496
+
2497
+ def check_and_legalize_ir(func_ir, flags: "numba.core.compiler.Flags"):
2498
+ """
2499
+ This checks that the IR presented is legal
2500
+ """
2501
+ enforce_no_phis(func_ir)
2502
+ enforce_no_dels(func_ir)
2503
+ # postprocess and emit ir.Dels
2504
+ post_proc = postproc.PostProcessor(func_ir)
2505
+ post_proc.run(True, extend_lifetimes=flags.dbg_extend_lifetimes)
2506
+
2507
+
2508
+ def convert_code_obj_to_function(code_obj, caller_ir):
2509
+ """
2510
+ Converts a code object from a `make_function.code` attr in the IR into a
2511
+ python function, caller_ir is the FunctionIR of the caller and is used for
2512
+ the resolution of freevars.
2513
+ """
2514
+ fcode = code_obj.code
2515
+ nfree = len(fcode.co_freevars)
2516
+
2517
+ # try and resolve freevars if they are consts in the caller's IR
2518
+ # these can be baked into the new function
2519
+ freevars = []
2520
+ for x in fcode.co_freevars:
2521
+ # not using guard here to differentiate between multiple definition and
2522
+ # non-const variable
2523
+ try:
2524
+ freevar_def = caller_ir.get_definition(x)
2525
+ except KeyError:
2526
+ msg = (
2527
+ "Cannot capture a constant value for variable '%s' as there "
2528
+ "are multiple definitions present." % x
2529
+ )
2530
+ raise TypingError(msg, loc=code_obj.loc)
2531
+ if isinstance(freevar_def, ir.Const):
2532
+ freevars.append(freevar_def.value)
2533
+ else:
2534
+ msg = (
2535
+ "Cannot capture the non-constant value associated with "
2536
+ "variable '%s' in a function that may escape." % x
2537
+ )
2538
+ raise TypingError(msg, loc=code_obj.loc)
2539
+
2540
+ func_env = "\n".join(
2541
+ ["\tc_%d = %s" % (i, x) for i, x in enumerate(freevars)]
2542
+ )
2543
+ func_clo = ",".join(["c_%d" % i for i in range(nfree)])
2544
+ co_varnames = list(fcode.co_varnames)
2545
+
2546
+ # This is horrible. The code object knows about the number of args present
2547
+ # it also knows the name of the args but these are bundled in with other
2548
+ # vars in `co_varnames`. The make_function IR node knows what the defaults
2549
+ # are, they are defined in the IR as consts. The following finds the total
2550
+ # number of args (args + kwargs with defaults), finds the default values
2551
+ # and infers the number of "kwargs with defaults" from this and then infers
2552
+ # the number of actual arguments from that.
2553
+ n_kwargs = 0
2554
+ n_allargs = fcode.co_argcount
2555
+ kwarg_defaults = caller_ir.get_definition(code_obj.defaults)
2556
+ if kwarg_defaults is not None:
2557
+ if isinstance(kwarg_defaults, tuple):
2558
+ d = [caller_ir.get_definition(x).value for x in kwarg_defaults]
2559
+ kwarg_defaults_tup = tuple(d)
2560
+ else:
2561
+ d = [
2562
+ caller_ir.get_definition(x).value for x in kwarg_defaults.items
2563
+ ]
2564
+ kwarg_defaults_tup = tuple(d)
2565
+ n_kwargs = len(kwarg_defaults_tup)
2566
+ nargs = n_allargs - n_kwargs
2567
+
2568
+ func_arg = ",".join(["%s" % (co_varnames[i]) for i in range(nargs)])
2569
+ if n_kwargs:
2570
+ kw_const = [
2571
+ "%s = %s" % (co_varnames[i + nargs], kwarg_defaults_tup[i])
2572
+ for i in range(n_kwargs)
2573
+ ]
2574
+ func_arg += ", "
2575
+ func_arg += ", ".join(kw_const)
2576
+
2577
+ # globals are the same as those in the caller
2578
+ glbls = caller_ir.func_id.func.__globals__
2579
+
2580
+ # create the function and return it
2581
+ return _create_function_from_code_obj(
2582
+ fcode, func_env, func_arg, func_clo, glbls
2583
+ )
2584
+
2585
+
2586
+ def fixup_var_define_in_scope(blocks):
2587
+ """Fixes the mapping of ir.Block to ensure all referenced ir.Var are
2588
+ defined in every scope used by the function. Such that looking up a variable
2589
+ from any scope in this function will not fail.
2590
+
2591
+ Note: This is a workaround. Ideally, all the blocks should refer to the
2592
+ same ir.Scope, but that property is not maintained by all the passes.
2593
+ """
2594
+ # Scan for all used variables
2595
+ used_var = {}
2596
+ for blk in blocks.values():
2597
+ scope = blk.scope
2598
+ for inst in blk.body:
2599
+ for var in inst.list_vars():
2600
+ used_var[var] = inst
2601
+ # Note: not all blocks share a single scope even though they should.
2602
+ # Ensure the scope of each block defines all used variables.
2603
+ for blk in blocks.values():
2604
+ scope = blk.scope
2605
+ for var, inst in used_var.items():
2606
+ # add this variable if it's not in scope
2607
+ if var.name not in scope.localvars:
2608
+ # Note: using a internal method to reuse the same
2609
+ scope.localvars.define(var.name, var)
2610
+
2611
+
2612
+ def transfer_scope(block, scope):
2613
+ """Transfer the ir.Block to use the given ir.Scope."""
2614
+ old_scope = block.scope
2615
+ if old_scope is scope:
2616
+ # bypass if the block is already using the given scope
2617
+ return block
2618
+ # Ensure variables are defined in the new scope
2619
+ for var in old_scope.localvars._con.values():
2620
+ if var.name not in scope.localvars:
2621
+ scope.localvars.define(var.name, var)
2622
+ # replace scope
2623
+ block.scope = scope
2624
+ return block
2625
+
2626
+
2627
+ def is_setup_with(stmt):
2628
+ return isinstance(stmt, ir.EnterWith)
2629
+
2630
+
2631
+ def is_terminator(stmt):
2632
+ return isinstance(stmt, ir.Terminator)
2633
+
2634
+
2635
+ def is_raise(stmt):
2636
+ return isinstance(stmt, ir.Raise)
2637
+
2638
+
2639
+ def is_return(stmt):
2640
+ return isinstance(stmt, ir.Return)
2641
+
2642
+
2643
+ def is_pop_block(stmt):
2644
+ return isinstance(stmt, ir.PopBlock)