numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1787 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import types as pytypes # avoid confusion with numba.types
|
|
5
|
+
import copy
|
|
6
|
+
import ctypes
|
|
7
|
+
import numba.cuda.core.analysis
|
|
8
|
+
from numba.cuda import HAS_NUMBA
|
|
9
|
+
from numba.cuda import types, config, cgutils
|
|
10
|
+
from numba.cuda.core import ir
|
|
11
|
+
from numba.cuda.core import errors
|
|
12
|
+
from numba.cuda import typing, utils
|
|
13
|
+
from numba.cuda.core.ir_utils import (
|
|
14
|
+
next_label,
|
|
15
|
+
add_offset_to_labels,
|
|
16
|
+
replace_vars,
|
|
17
|
+
remove_dels,
|
|
18
|
+
rename_labels,
|
|
19
|
+
find_topo_order,
|
|
20
|
+
merge_adjacent_blocks,
|
|
21
|
+
GuardException,
|
|
22
|
+
require,
|
|
23
|
+
guard,
|
|
24
|
+
get_definition,
|
|
25
|
+
find_callname,
|
|
26
|
+
find_build_sequence,
|
|
27
|
+
get_np_ufunc_typ,
|
|
28
|
+
get_ir_of_code,
|
|
29
|
+
simplify_CFG,
|
|
30
|
+
canonicalize_array_math,
|
|
31
|
+
dead_code_elimination,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
from numba.cuda.core.analysis import (
|
|
35
|
+
compute_cfg_from_blocks,
|
|
36
|
+
compute_use_defs,
|
|
37
|
+
compute_live_variables,
|
|
38
|
+
)
|
|
39
|
+
from numba.cuda.core.imputils import impl_ret_untracked
|
|
40
|
+
from numba.cuda.extending import intrinsic
|
|
41
|
+
from numba.cuda.typing import signature
|
|
42
|
+
|
|
43
|
+
from numba.cuda.core import postproc, rewrites
|
|
44
|
+
from numba.cuda.np.unsafe.ndarray import empty_inferred as unsafe_empty_inferred
|
|
45
|
+
import numpy as np
|
|
46
|
+
import operator
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
Variable enable_inline_arraycall is only used for testing purpose.
|
|
50
|
+
"""
|
|
51
|
+
enable_inline_arraycall = True
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def callee_ir_validator(func_ir):
|
|
55
|
+
"""Checks the IR of a callee is supported for inlining"""
|
|
56
|
+
for blk in func_ir.blocks.values():
|
|
57
|
+
for stmt in blk.find_insts(ir.Assign):
|
|
58
|
+
if isinstance(stmt.value, ir.Yield):
|
|
59
|
+
msg = "The use of yield in a closure is unsupported."
|
|
60
|
+
raise errors.UnsupportedError(msg, loc=stmt.loc)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _created_inlined_var_name(function_name, var_name):
|
|
64
|
+
"""Creates a name for an inlined variable based on the function name and the
|
|
65
|
+
variable name. It does this "safely" to avoid the use of characters that are
|
|
66
|
+
illegal in python variable names as there are occasions when function
|
|
67
|
+
generation needs valid python name tokens."""
|
|
68
|
+
inlined_name = f"{function_name}.{var_name}"
|
|
69
|
+
# Replace angle brackets, e.g. "<locals>" is replaced with "_locals_"
|
|
70
|
+
new_name = inlined_name.replace("<", "_").replace(">", "_")
|
|
71
|
+
# The version "version" of the closure function e.g. foo$2 (id 2) is
|
|
72
|
+
# rewritten as "foo_v2". Further "." is also replaced with "_".
|
|
73
|
+
new_name = new_name.replace(".", "_").replace("$", "_v")
|
|
74
|
+
return new_name
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class InlineClosureCallPass(object):
|
|
78
|
+
"""InlineClosureCallPass class looks for direct calls to locally defined
|
|
79
|
+
closures, and inlines the body of the closure function to the call site.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, func_ir, parallel_options, swapped=None, typed=False):
|
|
83
|
+
if swapped is None:
|
|
84
|
+
swapped = {}
|
|
85
|
+
self.func_ir = func_ir
|
|
86
|
+
self.parallel_options = parallel_options
|
|
87
|
+
self.swapped = swapped
|
|
88
|
+
self.typed = typed
|
|
89
|
+
|
|
90
|
+
def run(self):
|
|
91
|
+
"""Run inline closure call pass."""
|
|
92
|
+
# Analysis relies on ir.Del presence, strip out later
|
|
93
|
+
pp = postproc.PostProcessor(self.func_ir)
|
|
94
|
+
pp.run(True)
|
|
95
|
+
|
|
96
|
+
modified = False
|
|
97
|
+
work_list = list(self.func_ir.blocks.items())
|
|
98
|
+
debug_print = _make_debug_print("InlineClosureCallPass")
|
|
99
|
+
debug_print(f"START {self.func_ir.func_id.func_qualname}")
|
|
100
|
+
while work_list:
|
|
101
|
+
_label, block = work_list.pop()
|
|
102
|
+
for i, instr in enumerate(block.body):
|
|
103
|
+
if isinstance(instr, ir.Assign):
|
|
104
|
+
expr = instr.value
|
|
105
|
+
if isinstance(expr, ir.Expr) and expr.op == "call":
|
|
106
|
+
call_name = guard(find_callname, self.func_ir, expr)
|
|
107
|
+
func_def = guard(
|
|
108
|
+
get_definition, self.func_ir, expr.func
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if guard(
|
|
112
|
+
self._inline_reduction,
|
|
113
|
+
work_list,
|
|
114
|
+
block,
|
|
115
|
+
i,
|
|
116
|
+
expr,
|
|
117
|
+
call_name,
|
|
118
|
+
):
|
|
119
|
+
modified = True
|
|
120
|
+
break # because block structure changed
|
|
121
|
+
|
|
122
|
+
if guard(
|
|
123
|
+
self._inline_closure, work_list, block, i, func_def
|
|
124
|
+
):
|
|
125
|
+
modified = True
|
|
126
|
+
break # because block structure changed
|
|
127
|
+
|
|
128
|
+
if enable_inline_arraycall:
|
|
129
|
+
# Identify loop structure
|
|
130
|
+
if modified:
|
|
131
|
+
# Need to do some cleanups if closure inlining kicked in
|
|
132
|
+
merge_adjacent_blocks(self.func_ir.blocks)
|
|
133
|
+
cfg = compute_cfg_from_blocks(self.func_ir.blocks)
|
|
134
|
+
debug_print("start inline arraycall")
|
|
135
|
+
_debug_dump(cfg)
|
|
136
|
+
loops = cfg.loops()
|
|
137
|
+
sized_loops = [(k, len(loops[k].body)) for k in loops.keys()]
|
|
138
|
+
visited = []
|
|
139
|
+
# We go over all loops, bigger loops first (outer first)
|
|
140
|
+
for k, s in sorted(
|
|
141
|
+
sized_loops, key=lambda tup: tup[1], reverse=True
|
|
142
|
+
):
|
|
143
|
+
visited.append(k)
|
|
144
|
+
if guard(
|
|
145
|
+
_inline_arraycall,
|
|
146
|
+
self.func_ir,
|
|
147
|
+
cfg,
|
|
148
|
+
visited,
|
|
149
|
+
loops[k],
|
|
150
|
+
self.swapped,
|
|
151
|
+
self.parallel_options.comprehension,
|
|
152
|
+
self.typed,
|
|
153
|
+
):
|
|
154
|
+
modified = True
|
|
155
|
+
if modified:
|
|
156
|
+
_fix_nested_array(self.func_ir)
|
|
157
|
+
|
|
158
|
+
if modified:
|
|
159
|
+
# clean up now dead/unreachable blocks, e.g. unconditionally raising
|
|
160
|
+
# an exception in an inlined function would render some parts of the
|
|
161
|
+
# inliner unreachable
|
|
162
|
+
cfg = compute_cfg_from_blocks(self.func_ir.blocks)
|
|
163
|
+
for dead in cfg.dead_nodes():
|
|
164
|
+
del self.func_ir.blocks[dead]
|
|
165
|
+
|
|
166
|
+
# run dead code elimination
|
|
167
|
+
dead_code_elimination(self.func_ir)
|
|
168
|
+
# do label renaming
|
|
169
|
+
self.func_ir.blocks = rename_labels(self.func_ir.blocks)
|
|
170
|
+
|
|
171
|
+
# inlining done, strip dels
|
|
172
|
+
remove_dels(self.func_ir.blocks)
|
|
173
|
+
|
|
174
|
+
debug_print("END")
|
|
175
|
+
|
|
176
|
+
def _inline_reduction(self, work_list, block, i, expr, call_name):
|
|
177
|
+
# only inline reduction in sequential execution, parallel handling
|
|
178
|
+
# is done in ParforPass.
|
|
179
|
+
require(not self.parallel_options.reduction)
|
|
180
|
+
require(
|
|
181
|
+
call_name == ("reduce", "builtins")
|
|
182
|
+
or call_name == ("reduce", "_functools")
|
|
183
|
+
)
|
|
184
|
+
if len(expr.args) not in (2, 3):
|
|
185
|
+
raise TypeError(
|
|
186
|
+
"invalid reduce call, "
|
|
187
|
+
"two arguments are required (optional initial "
|
|
188
|
+
"value can also be specified)"
|
|
189
|
+
)
|
|
190
|
+
check_reduce_func(self.func_ir, expr.args[0])
|
|
191
|
+
|
|
192
|
+
def reduce_func(f, A, v=None):
|
|
193
|
+
it = iter(A)
|
|
194
|
+
if v is not None:
|
|
195
|
+
s = v
|
|
196
|
+
else:
|
|
197
|
+
s = next(it)
|
|
198
|
+
for a in it:
|
|
199
|
+
s = f(s, a)
|
|
200
|
+
return s
|
|
201
|
+
|
|
202
|
+
inline_closure_call(
|
|
203
|
+
self.func_ir,
|
|
204
|
+
self.func_ir.func_id.func.__globals__,
|
|
205
|
+
block,
|
|
206
|
+
i,
|
|
207
|
+
reduce_func,
|
|
208
|
+
work_list=work_list,
|
|
209
|
+
callee_validator=callee_ir_validator,
|
|
210
|
+
)
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
def _inline_closure(self, work_list, block, i, func_def):
|
|
214
|
+
require(
|
|
215
|
+
isinstance(func_def, ir.Expr) and func_def.op == "make_function"
|
|
216
|
+
)
|
|
217
|
+
inline_closure_call(
|
|
218
|
+
self.func_ir,
|
|
219
|
+
self.func_ir.func_id.func.__globals__,
|
|
220
|
+
block,
|
|
221
|
+
i,
|
|
222
|
+
func_def,
|
|
223
|
+
work_list=work_list,
|
|
224
|
+
callee_validator=callee_ir_validator,
|
|
225
|
+
)
|
|
226
|
+
return True
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def check_reduce_func(func_ir, func_var):
|
|
230
|
+
"""Checks the function at func_var in func_ir to make sure it's amenable
|
|
231
|
+
for inlining. Returns the function itself"""
|
|
232
|
+
reduce_func = guard(get_definition, func_ir, func_var)
|
|
233
|
+
if reduce_func is None:
|
|
234
|
+
raise ValueError(
|
|
235
|
+
"Reduce function cannot be found for njit \
|
|
236
|
+
analysis"
|
|
237
|
+
)
|
|
238
|
+
if isinstance(reduce_func, (ir.FreeVar, ir.Global)):
|
|
239
|
+
if HAS_NUMBA:
|
|
240
|
+
from numba.core.registry import CPUDispatcher
|
|
241
|
+
|
|
242
|
+
if not isinstance(reduce_func.value, CPUDispatcher):
|
|
243
|
+
raise ValueError("Invalid reduction function")
|
|
244
|
+
|
|
245
|
+
# pull out the python function for inlining
|
|
246
|
+
reduce_func = reduce_func.value.py_func
|
|
247
|
+
elif not (hasattr(reduce_func, "code") or hasattr(reduce_func, "__code__")):
|
|
248
|
+
raise ValueError("Invalid reduction function")
|
|
249
|
+
f_code = (
|
|
250
|
+
reduce_func.code
|
|
251
|
+
if hasattr(reduce_func, "code")
|
|
252
|
+
else reduce_func.__code__
|
|
253
|
+
)
|
|
254
|
+
if not f_code.co_argcount == 2:
|
|
255
|
+
raise TypeError("Reduction function should take 2 arguments")
|
|
256
|
+
return reduce_func
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class InlineWorker(object):
|
|
260
|
+
"""A worker class for inlining, this is a more advanced version of
|
|
261
|
+
`inline_closure_call` in that it permits inlining from function type, Numba
|
|
262
|
+
IR and code object. It also, runs the entire untyped compiler pipeline on
|
|
263
|
+
the inlinee to ensure that it is transformed as though it were compiled
|
|
264
|
+
directly.
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
def __init__(
|
|
268
|
+
self,
|
|
269
|
+
typingctx=None,
|
|
270
|
+
targetctx=None,
|
|
271
|
+
locals=None,
|
|
272
|
+
pipeline=None,
|
|
273
|
+
flags=None,
|
|
274
|
+
validator=callee_ir_validator,
|
|
275
|
+
typemap=None,
|
|
276
|
+
calltypes=None,
|
|
277
|
+
):
|
|
278
|
+
"""
|
|
279
|
+
Instantiate a new InlineWorker, all arguments are optional though some
|
|
280
|
+
must be supplied together for certain use cases. The methods will refuse
|
|
281
|
+
to run if the object isn't configured in the manner needed. Args are the
|
|
282
|
+
same as those in a numba.core.Compiler.state, except the validator which
|
|
283
|
+
is a function taking Numba IR and validating it for use when inlining
|
|
284
|
+
(this is optional and really to just provide better error messages about
|
|
285
|
+
things which the inliner cannot handle like yield in closure).
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
def check(arg, name):
|
|
289
|
+
if arg is None:
|
|
290
|
+
raise TypeError("{} must not be None".format(name))
|
|
291
|
+
|
|
292
|
+
from numba.cuda.compiler import DefaultPassBuilder
|
|
293
|
+
|
|
294
|
+
# check the stuff needed to run the more advanced compilation pipeline
|
|
295
|
+
# is valid if any of it is provided
|
|
296
|
+
compiler_args = (targetctx, locals, pipeline, flags)
|
|
297
|
+
compiler_group = [x is not None for x in compiler_args]
|
|
298
|
+
if any(compiler_group) and not all(compiler_group):
|
|
299
|
+
check(targetctx, "targetctx")
|
|
300
|
+
check(locals, "locals")
|
|
301
|
+
check(pipeline, "pipeline")
|
|
302
|
+
check(flags, "flags")
|
|
303
|
+
elif all(compiler_group):
|
|
304
|
+
check(typingctx, "typingctx")
|
|
305
|
+
|
|
306
|
+
self._compiler_pipeline = DefaultPassBuilder.define_untyped_pipeline
|
|
307
|
+
|
|
308
|
+
self.typingctx = typingctx
|
|
309
|
+
self.targetctx = targetctx
|
|
310
|
+
self.locals = locals
|
|
311
|
+
self.pipeline = pipeline
|
|
312
|
+
self.flags = flags
|
|
313
|
+
self.validator = validator
|
|
314
|
+
self.debug_print = _make_debug_print("InlineWorker")
|
|
315
|
+
|
|
316
|
+
# check whether this inliner can also support typemap and calltypes
|
|
317
|
+
# update and if what's provided is valid
|
|
318
|
+
pair = (typemap, calltypes)
|
|
319
|
+
pair_is_none = [x is None for x in pair]
|
|
320
|
+
if any(pair_is_none) and not all(pair_is_none):
|
|
321
|
+
msg = (
|
|
322
|
+
"typemap and calltypes must both be either None or have a "
|
|
323
|
+
"value, got: %s, %s"
|
|
324
|
+
)
|
|
325
|
+
raise TypeError(msg % pair)
|
|
326
|
+
self._permit_update_type_and_call_maps = not all(pair_is_none)
|
|
327
|
+
self.typemap = typemap
|
|
328
|
+
self.calltypes = calltypes
|
|
329
|
+
|
|
330
|
+
def inline_ir(
|
|
331
|
+
self, caller_ir, block, i, callee_ir, callee_freevars, arg_typs=None
|
|
332
|
+
):
|
|
333
|
+
"""Inlines the callee_ir in the caller_ir at statement index i of block
|
|
334
|
+
`block`, callee_freevars are the free variables for the callee_ir. If
|
|
335
|
+
the callee_ir is derived from a function `func` then this is
|
|
336
|
+
`func.__code__.co_freevars`. If `arg_typs` is given and the InlineWorker
|
|
337
|
+
instance was initialized with a typemap and calltypes then they will be
|
|
338
|
+
appropriately updated based on the arg_typs.
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
# Always copy the callee IR, it gets mutated
|
|
342
|
+
def copy_ir(the_ir):
|
|
343
|
+
kernel_copy = the_ir.copy()
|
|
344
|
+
kernel_copy.blocks = {}
|
|
345
|
+
for block_label, block in the_ir.blocks.items():
|
|
346
|
+
new_block = copy.deepcopy(the_ir.blocks[block_label])
|
|
347
|
+
kernel_copy.blocks[block_label] = new_block
|
|
348
|
+
return kernel_copy
|
|
349
|
+
|
|
350
|
+
callee_ir = copy_ir(callee_ir)
|
|
351
|
+
|
|
352
|
+
# check that the contents of the callee IR is something that can be
|
|
353
|
+
# inlined if a validator is present
|
|
354
|
+
if self.validator is not None:
|
|
355
|
+
self.validator(callee_ir)
|
|
356
|
+
|
|
357
|
+
# save an unmutated copy of the callee_ir to return
|
|
358
|
+
callee_ir_original = copy_ir(callee_ir)
|
|
359
|
+
scope = block.scope
|
|
360
|
+
instr = block.body[i]
|
|
361
|
+
call_expr = instr.value
|
|
362
|
+
callee_blocks = callee_ir.blocks
|
|
363
|
+
from numba.cuda.core import ir_utils
|
|
364
|
+
|
|
365
|
+
# 1. relabel callee_ir by adding an offset
|
|
366
|
+
max_label = max(
|
|
367
|
+
ir_utils._the_max_label.next(),
|
|
368
|
+
max(caller_ir.blocks.keys()),
|
|
369
|
+
)
|
|
370
|
+
callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1)
|
|
371
|
+
callee_blocks = simplify_CFG(callee_blocks)
|
|
372
|
+
callee_ir.blocks = callee_blocks
|
|
373
|
+
min_label = min(callee_blocks.keys())
|
|
374
|
+
max_label = max(callee_blocks.keys())
|
|
375
|
+
# reset globals in ir_utils before we use it
|
|
376
|
+
ir_utils._the_max_label.update(max_label)
|
|
377
|
+
self.debug_print("After relabel")
|
|
378
|
+
_debug_dump(callee_ir)
|
|
379
|
+
|
|
380
|
+
# 2. rename all local variables in callee_ir with new locals created in
|
|
381
|
+
# caller_ir
|
|
382
|
+
callee_scopes = _get_all_scopes(callee_blocks)
|
|
383
|
+
self.debug_print("callee_scopes = ", callee_scopes)
|
|
384
|
+
# one function should only have one local scope
|
|
385
|
+
assert len(callee_scopes) == 1
|
|
386
|
+
callee_scope = callee_scopes[0]
|
|
387
|
+
var_dict = {}
|
|
388
|
+
for var in tuple(callee_scope.localvars._con.values()):
|
|
389
|
+
if var.name not in callee_freevars:
|
|
390
|
+
inlined_name = _created_inlined_var_name(
|
|
391
|
+
callee_ir.func_id.unique_name, var.name
|
|
392
|
+
)
|
|
393
|
+
# Update the caller scope with the new names
|
|
394
|
+
new_var = scope.redefine(inlined_name, loc=var.loc)
|
|
395
|
+
# Also update the callee scope with the new names. Should the
|
|
396
|
+
# type and call maps need updating (which requires SSA form) the
|
|
397
|
+
# transformation to SSA is valid as the IR object is internally
|
|
398
|
+
# consistent.
|
|
399
|
+
callee_scope.redefine(inlined_name, loc=var.loc)
|
|
400
|
+
var_dict[var.name] = new_var
|
|
401
|
+
self.debug_print("var_dict = ", var_dict)
|
|
402
|
+
replace_vars(callee_blocks, var_dict)
|
|
403
|
+
self.debug_print("After local var rename")
|
|
404
|
+
_debug_dump(callee_ir)
|
|
405
|
+
|
|
406
|
+
# 3. replace formal parameters with actual arguments
|
|
407
|
+
callee_func = callee_ir.func_id.func
|
|
408
|
+
args = _get_callee_args(
|
|
409
|
+
call_expr, callee_func, block.body[i].loc, caller_ir
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# 4. Update typemap
|
|
413
|
+
if self._permit_update_type_and_call_maps:
|
|
414
|
+
if arg_typs is None:
|
|
415
|
+
raise TypeError("arg_typs should have a value not None")
|
|
416
|
+
self.update_type_and_call_maps(callee_ir, arg_typs)
|
|
417
|
+
# update_type_and_call_maps replaces blocks
|
|
418
|
+
callee_blocks = callee_ir.blocks
|
|
419
|
+
|
|
420
|
+
self.debug_print("After arguments rename: ")
|
|
421
|
+
_debug_dump(callee_ir)
|
|
422
|
+
|
|
423
|
+
_replace_args_with(callee_blocks, args)
|
|
424
|
+
# 5. split caller blocks into two
|
|
425
|
+
new_blocks = []
|
|
426
|
+
new_block = ir.Block(scope, block.loc)
|
|
427
|
+
new_block.body = block.body[i + 1 :]
|
|
428
|
+
new_label = next_label()
|
|
429
|
+
caller_ir.blocks[new_label] = new_block
|
|
430
|
+
new_blocks.append((new_label, new_block))
|
|
431
|
+
block.body = block.body[:i]
|
|
432
|
+
block.body.append(ir.Jump(min_label, instr.loc))
|
|
433
|
+
|
|
434
|
+
# 6. replace Return with assignment to LHS
|
|
435
|
+
topo_order = find_topo_order(callee_blocks)
|
|
436
|
+
_replace_returns(callee_blocks, instr.target, new_label)
|
|
437
|
+
|
|
438
|
+
# remove the old definition of instr.target too
|
|
439
|
+
if (
|
|
440
|
+
instr.target.name in caller_ir._definitions
|
|
441
|
+
and call_expr in caller_ir._definitions[instr.target.name]
|
|
442
|
+
):
|
|
443
|
+
# NOTE: target can have multiple definitions due to control flow
|
|
444
|
+
caller_ir._definitions[instr.target.name].remove(call_expr)
|
|
445
|
+
|
|
446
|
+
# 7. insert all new blocks, and add back definitions
|
|
447
|
+
for label in topo_order:
|
|
448
|
+
# block scope must point to parent's
|
|
449
|
+
block = callee_blocks[label]
|
|
450
|
+
block.scope = scope
|
|
451
|
+
_add_definitions(caller_ir, block)
|
|
452
|
+
caller_ir.blocks[label] = block
|
|
453
|
+
new_blocks.append((label, block))
|
|
454
|
+
self.debug_print("After merge in")
|
|
455
|
+
_debug_dump(caller_ir)
|
|
456
|
+
|
|
457
|
+
return callee_ir_original, callee_blocks, var_dict, new_blocks
|
|
458
|
+
|
|
459
|
+
def inline_function(self, caller_ir, block, i, function, arg_typs=None):
|
|
460
|
+
"""Inlines the function in the caller_ir at statement index i of block
|
|
461
|
+
`block`. If `arg_typs` is given and the InlineWorker instance was
|
|
462
|
+
initialized with a typemap and calltypes then they will be appropriately
|
|
463
|
+
updated based on the arg_typs.
|
|
464
|
+
"""
|
|
465
|
+
callee_ir = self.run_untyped_passes(function)
|
|
466
|
+
freevars = function.__code__.co_freevars
|
|
467
|
+
return self.inline_ir(
|
|
468
|
+
caller_ir, block, i, callee_ir, freevars, arg_typs=arg_typs
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
def run_untyped_passes(self, func, enable_ssa=False):
|
|
472
|
+
"""
|
|
473
|
+
Run the compiler frontend's untyped passes over the given Python
|
|
474
|
+
function, and return the function's canonical Numba IR.
|
|
475
|
+
|
|
476
|
+
Disable SSA transformation by default, since the call site won't be in
|
|
477
|
+
SSA form and self.inline_ir depends on this being the case.
|
|
478
|
+
"""
|
|
479
|
+
from numba.cuda.core.compiler import StateDict, _CompileStatus
|
|
480
|
+
from numba.cuda.core.untyped_passes import ExtractByteCode
|
|
481
|
+
from numba.cuda.core import bytecode
|
|
482
|
+
|
|
483
|
+
state = StateDict()
|
|
484
|
+
state.func_ir = None
|
|
485
|
+
state.typingctx = self.typingctx
|
|
486
|
+
state.targetctx = self.targetctx
|
|
487
|
+
state.locals = self.locals
|
|
488
|
+
state.pipeline = self.pipeline
|
|
489
|
+
state.flags = self.flags
|
|
490
|
+
state.flags.enable_ssa = enable_ssa
|
|
491
|
+
|
|
492
|
+
state.func_id = bytecode.FunctionIdentity.from_function(func)
|
|
493
|
+
|
|
494
|
+
state.typemap = None
|
|
495
|
+
state.calltypes = None
|
|
496
|
+
state.type_annotation = None
|
|
497
|
+
state.status = _CompileStatus(False)
|
|
498
|
+
state.return_type = None
|
|
499
|
+
state.metadata = {}
|
|
500
|
+
|
|
501
|
+
ExtractByteCode().run_pass(state)
|
|
502
|
+
# This is a lie, just need *some* args for the case where an obj mode
|
|
503
|
+
# with lift is needed
|
|
504
|
+
state.args = len(state.bc.func_id.pysig.parameters) * (types.pyobject,)
|
|
505
|
+
|
|
506
|
+
pm = self._compiler_pipeline(state)
|
|
507
|
+
|
|
508
|
+
pm.finalize()
|
|
509
|
+
pm.run(state)
|
|
510
|
+
return state.func_ir
|
|
511
|
+
|
|
512
|
+
def update_type_and_call_maps(self, callee_ir, arg_typs):
|
|
513
|
+
"""Updates the type and call maps based on calling callee_ir with
|
|
514
|
+
arguments from arg_typs"""
|
|
515
|
+
from numba.cuda.core.ssa import reconstruct_ssa
|
|
516
|
+
from numba.cuda.core.typed_passes import PreLowerStripPhis
|
|
517
|
+
|
|
518
|
+
if not self._permit_update_type_and_call_maps:
|
|
519
|
+
msg = (
|
|
520
|
+
"InlineWorker instance not configured correctly, typemap or "
|
|
521
|
+
"calltypes missing in initialization."
|
|
522
|
+
)
|
|
523
|
+
raise ValueError(msg)
|
|
524
|
+
from numba.cuda.core import typed_passes, ir_utils
|
|
525
|
+
|
|
526
|
+
# call branch pruning to simplify IR and avoid inference errors
|
|
527
|
+
callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
|
|
528
|
+
numba.cuda.core.analysis.dead_branch_prune(callee_ir, arg_typs)
|
|
529
|
+
# callee's typing may require SSA
|
|
530
|
+
callee_ir = reconstruct_ssa(callee_ir)
|
|
531
|
+
callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
|
|
532
|
+
[f_typemap, _f_return_type, f_calltypes, _] = (
|
|
533
|
+
typed_passes.type_inference_stage(
|
|
534
|
+
self.typingctx,
|
|
535
|
+
self.targetctx,
|
|
536
|
+
callee_ir,
|
|
537
|
+
arg_typs,
|
|
538
|
+
None,
|
|
539
|
+
)
|
|
540
|
+
)
|
|
541
|
+
callee_ir = PreLowerStripPhis()._strip_phi_nodes(callee_ir)
|
|
542
|
+
callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
|
|
543
|
+
canonicalize_array_math(
|
|
544
|
+
callee_ir, f_typemap, f_calltypes, self.typingctx
|
|
545
|
+
)
|
|
546
|
+
# remove argument entries like arg.a from typemap
|
|
547
|
+
arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
|
|
548
|
+
for a in arg_names:
|
|
549
|
+
f_typemap.pop(a)
|
|
550
|
+
self.typemap.update(f_typemap)
|
|
551
|
+
self.calltypes.update(f_calltypes)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def inline_closure_call(
|
|
555
|
+
func_ir,
|
|
556
|
+
glbls,
|
|
557
|
+
block,
|
|
558
|
+
i,
|
|
559
|
+
callee,
|
|
560
|
+
typingctx=None,
|
|
561
|
+
targetctx=None,
|
|
562
|
+
arg_typs=None,
|
|
563
|
+
typemap=None,
|
|
564
|
+
calltypes=None,
|
|
565
|
+
work_list=None,
|
|
566
|
+
callee_validator=None,
|
|
567
|
+
replace_freevars=True,
|
|
568
|
+
):
|
|
569
|
+
"""Inline the body of `callee` at its callsite (`i`-th instruction of
|
|
570
|
+
`block`)
|
|
571
|
+
|
|
572
|
+
`func_ir` is the func_ir object of the caller function and `glbls` is its
|
|
573
|
+
global variable environment (func_ir.func_id.func.__globals__).
|
|
574
|
+
`block` is the IR block of the callsite and `i` is the index of the
|
|
575
|
+
callsite's node. `callee` is either the called function or a
|
|
576
|
+
make_function node. `typingctx`, `typemap` and `calltypes` are typing
|
|
577
|
+
data structures of the caller, available if we are in a typed pass.
|
|
578
|
+
`arg_typs` includes the types of the arguments at the callsite.
|
|
579
|
+
`callee_validator` is an optional callable which can be used to validate the
|
|
580
|
+
IR of the callee to ensure that it contains IR supported for inlining, it
|
|
581
|
+
takes one argument, the func_ir of the callee
|
|
582
|
+
|
|
583
|
+
Returns IR blocks of the callee and the variable renaming dictionary used
|
|
584
|
+
for them to facilitate further processing of new blocks.
|
|
585
|
+
"""
|
|
586
|
+
scope = block.scope
|
|
587
|
+
instr = block.body[i]
|
|
588
|
+
call_expr = instr.value
|
|
589
|
+
debug_print = _make_debug_print("inline_closure_call")
|
|
590
|
+
debug_print("Found closure call: ", instr, " with callee = ", callee)
|
|
591
|
+
# support both function object and make_function Expr
|
|
592
|
+
callee_code = callee.code if hasattr(callee, "code") else callee.__code__
|
|
593
|
+
callee_closure = (
|
|
594
|
+
callee.closure if hasattr(callee, "closure") else callee.__closure__
|
|
595
|
+
)
|
|
596
|
+
from numba.cuda.core import ir_utils
|
|
597
|
+
|
|
598
|
+
# first, get the IR of the callee
|
|
599
|
+
if isinstance(callee, pytypes.FunctionType):
|
|
600
|
+
from numba.cuda.compiler import run_frontend
|
|
601
|
+
|
|
602
|
+
callee_ir = run_frontend(callee, inline_closures=True)
|
|
603
|
+
else:
|
|
604
|
+
callee_ir = get_ir_of_code(glbls, callee_code)
|
|
605
|
+
|
|
606
|
+
# check that the contents of the callee IR is something that can be inlined
|
|
607
|
+
# if a validator is supplied
|
|
608
|
+
if callee_validator is not None:
|
|
609
|
+
callee_validator(callee_ir)
|
|
610
|
+
|
|
611
|
+
callee_blocks = callee_ir.blocks
|
|
612
|
+
|
|
613
|
+
# 1. relabel callee_ir by adding an offset
|
|
614
|
+
max_label = max(ir_utils._the_max_label.next(), max(func_ir.blocks.keys()))
|
|
615
|
+
callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1)
|
|
616
|
+
callee_blocks = simplify_CFG(callee_blocks)
|
|
617
|
+
callee_ir.blocks = callee_blocks
|
|
618
|
+
min_label = min(callee_blocks.keys())
|
|
619
|
+
max_label = max(callee_blocks.keys())
|
|
620
|
+
# reset globals in ir_utils before we use it
|
|
621
|
+
ir_utils._the_max_label.update(max_label)
|
|
622
|
+
debug_print("After relabel")
|
|
623
|
+
_debug_dump(callee_ir)
|
|
624
|
+
|
|
625
|
+
# 2. rename all local variables in callee_ir with new locals created in
|
|
626
|
+
# func_ir
|
|
627
|
+
callee_scopes = _get_all_scopes(callee_blocks)
|
|
628
|
+
debug_print("callee_scopes = ", callee_scopes)
|
|
629
|
+
# one function should only have one local scope
|
|
630
|
+
assert len(callee_scopes) == 1
|
|
631
|
+
callee_scope = callee_scopes[0]
|
|
632
|
+
var_dict = {}
|
|
633
|
+
for var in callee_scope.localvars._con.values():
|
|
634
|
+
if var.name not in callee_code.co_freevars:
|
|
635
|
+
inlined_name = _created_inlined_var_name(
|
|
636
|
+
callee_ir.func_id.unique_name, var.name
|
|
637
|
+
)
|
|
638
|
+
new_var = scope.redefine(inlined_name, loc=var.loc)
|
|
639
|
+
var_dict[var.name] = new_var
|
|
640
|
+
debug_print("var_dict = ", var_dict)
|
|
641
|
+
replace_vars(callee_blocks, var_dict)
|
|
642
|
+
debug_print("After local var rename")
|
|
643
|
+
_debug_dump(callee_ir)
|
|
644
|
+
|
|
645
|
+
# 3. replace formal parameters with actual arguments
|
|
646
|
+
args = _get_callee_args(call_expr, callee, block.body[i].loc, func_ir)
|
|
647
|
+
|
|
648
|
+
debug_print("After arguments rename: ")
|
|
649
|
+
_debug_dump(callee_ir)
|
|
650
|
+
|
|
651
|
+
# 4. replace freevar with actual closure var
|
|
652
|
+
if callee_closure and replace_freevars:
|
|
653
|
+
closure = func_ir.get_definition(callee_closure)
|
|
654
|
+
debug_print("callee's closure = ", closure)
|
|
655
|
+
if isinstance(closure, tuple):
|
|
656
|
+
cellget = ctypes.pythonapi.PyCell_Get
|
|
657
|
+
cellget.restype = ctypes.py_object
|
|
658
|
+
cellget.argtypes = (ctypes.py_object,)
|
|
659
|
+
items = tuple(cellget(x) for x in closure)
|
|
660
|
+
else:
|
|
661
|
+
assert isinstance(closure, ir.Expr) and closure.op == "build_tuple"
|
|
662
|
+
items = closure.items
|
|
663
|
+
assert len(callee_code.co_freevars) == len(items)
|
|
664
|
+
_replace_freevars(callee_blocks, items)
|
|
665
|
+
debug_print("After closure rename")
|
|
666
|
+
_debug_dump(callee_ir)
|
|
667
|
+
|
|
668
|
+
if typingctx:
|
|
669
|
+
from numba.cuda.core import typed_passes
|
|
670
|
+
|
|
671
|
+
# call branch pruning to simplify IR and avoid inference errors
|
|
672
|
+
callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks)
|
|
673
|
+
numba.cuda.core.analysis.dead_branch_prune(callee_ir, arg_typs)
|
|
674
|
+
try:
|
|
675
|
+
[f_typemap, f_return_type, f_calltypes, _] = (
|
|
676
|
+
typed_passes.type_inference_stage(
|
|
677
|
+
typingctx, targetctx, callee_ir, arg_typs, None
|
|
678
|
+
)
|
|
679
|
+
)
|
|
680
|
+
except Exception:
|
|
681
|
+
[f_typemap, f_return_type, f_calltypes, _] = (
|
|
682
|
+
typed_passes.type_inference_stage(
|
|
683
|
+
typingctx, targetctx, callee_ir, arg_typs, None
|
|
684
|
+
)
|
|
685
|
+
)
|
|
686
|
+
canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx)
|
|
687
|
+
# remove argument entries like arg.a from typemap
|
|
688
|
+
arg_names = [vname for vname in f_typemap if vname.startswith("arg.")]
|
|
689
|
+
for a in arg_names:
|
|
690
|
+
f_typemap.pop(a)
|
|
691
|
+
typemap.update(f_typemap)
|
|
692
|
+
calltypes.update(f_calltypes)
|
|
693
|
+
|
|
694
|
+
_replace_args_with(callee_blocks, args)
|
|
695
|
+
# 5. split caller blocks into two
|
|
696
|
+
new_blocks = []
|
|
697
|
+
new_block = ir.Block(scope, block.loc)
|
|
698
|
+
new_block.body = block.body[i + 1 :]
|
|
699
|
+
new_label = next_label()
|
|
700
|
+
func_ir.blocks[new_label] = new_block
|
|
701
|
+
new_blocks.append((new_label, new_block))
|
|
702
|
+
block.body = block.body[:i]
|
|
703
|
+
block.body.append(ir.Jump(min_label, instr.loc))
|
|
704
|
+
|
|
705
|
+
# 6. replace Return with assignment to LHS
|
|
706
|
+
topo_order = find_topo_order(callee_blocks)
|
|
707
|
+
_replace_returns(callee_blocks, instr.target, new_label)
|
|
708
|
+
|
|
709
|
+
# remove the old definition of instr.target too
|
|
710
|
+
if (
|
|
711
|
+
instr.target.name in func_ir._definitions
|
|
712
|
+
and call_expr in func_ir._definitions[instr.target.name]
|
|
713
|
+
):
|
|
714
|
+
# NOTE: target can have multiple definitions due to control flow
|
|
715
|
+
func_ir._definitions[instr.target.name].remove(call_expr)
|
|
716
|
+
|
|
717
|
+
# 7. insert all new blocks, and add back definitions
|
|
718
|
+
for label in topo_order:
|
|
719
|
+
# block scope must point to parent's
|
|
720
|
+
block = callee_blocks[label]
|
|
721
|
+
block.scope = scope
|
|
722
|
+
_add_definitions(func_ir, block)
|
|
723
|
+
func_ir.blocks[label] = block
|
|
724
|
+
new_blocks.append((label, block))
|
|
725
|
+
debug_print("After merge in")
|
|
726
|
+
_debug_dump(func_ir)
|
|
727
|
+
|
|
728
|
+
if work_list is not None:
|
|
729
|
+
for block in new_blocks:
|
|
730
|
+
work_list.append(block)
|
|
731
|
+
return callee_blocks, var_dict
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def _get_callee_args(call_expr, callee, loc, func_ir):
|
|
735
|
+
"""Get arguments for calling 'callee', including the default arguments.
|
|
736
|
+
keyword arguments are currently only handled when 'callee' is a function.
|
|
737
|
+
"""
|
|
738
|
+
from numba.cuda.core import ir_utils
|
|
739
|
+
|
|
740
|
+
if call_expr.op == "call":
|
|
741
|
+
args = list(call_expr.args)
|
|
742
|
+
if call_expr.vararg:
|
|
743
|
+
msg = "Calling a closure with *args is unsupported."
|
|
744
|
+
raise errors.UnsupportedError(msg, call_expr.loc)
|
|
745
|
+
elif call_expr.op == "getattr":
|
|
746
|
+
args = [call_expr.value]
|
|
747
|
+
elif ir_utils.is_operator_or_getitem(call_expr):
|
|
748
|
+
args = call_expr.list_vars()
|
|
749
|
+
else:
|
|
750
|
+
raise TypeError("Unsupported ir.Expr.{}".format(call_expr.op))
|
|
751
|
+
|
|
752
|
+
debug_print = _make_debug_print("inline_closure_call default handling")
|
|
753
|
+
|
|
754
|
+
# handle defaults and kw arguments using pysignature if callee is function
|
|
755
|
+
if isinstance(callee, pytypes.FunctionType):
|
|
756
|
+
pysig = utils.pysignature(callee)
|
|
757
|
+
normal_handler = lambda index, param, default: default
|
|
758
|
+
default_handler = lambda index, param, default: ir.Const(default, loc)
|
|
759
|
+
|
|
760
|
+
# Throw error for stararg
|
|
761
|
+
# TODO: handle stararg
|
|
762
|
+
def stararg_handler(index, param, default):
|
|
763
|
+
raise NotImplementedError(
|
|
764
|
+
"Stararg not supported in inliner for arg {} {}".format(
|
|
765
|
+
index, param
|
|
766
|
+
)
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
if call_expr.op == "call":
|
|
770
|
+
kws = dict(call_expr.kws)
|
|
771
|
+
else:
|
|
772
|
+
kws = {}
|
|
773
|
+
return numba.cuda.typing.fold_arguments(
|
|
774
|
+
pysig, args, kws, normal_handler, default_handler, stararg_handler
|
|
775
|
+
)
|
|
776
|
+
else:
|
|
777
|
+
# TODO: handle arguments for make_function case similar to function
|
|
778
|
+
# case above
|
|
779
|
+
callee_defaults = (
|
|
780
|
+
callee.defaults
|
|
781
|
+
if hasattr(callee, "defaults")
|
|
782
|
+
else callee.__defaults__
|
|
783
|
+
)
|
|
784
|
+
if callee_defaults:
|
|
785
|
+
debug_print("defaults = ", callee_defaults)
|
|
786
|
+
if isinstance(callee_defaults, tuple): # Python 3.5
|
|
787
|
+
defaults_list = []
|
|
788
|
+
for x in callee_defaults:
|
|
789
|
+
if isinstance(x, ir.Var):
|
|
790
|
+
defaults_list.append(x)
|
|
791
|
+
else:
|
|
792
|
+
# this branch is predominantly for kwargs from
|
|
793
|
+
# inlinable functions
|
|
794
|
+
defaults_list.append(ir.Const(value=x, loc=loc))
|
|
795
|
+
args = args + defaults_list
|
|
796
|
+
elif isinstance(callee_defaults, ir.Var) or isinstance(
|
|
797
|
+
callee_defaults, str
|
|
798
|
+
):
|
|
799
|
+
default_tuple = func_ir.get_definition(callee_defaults)
|
|
800
|
+
assert isinstance(default_tuple, ir.Expr)
|
|
801
|
+
assert default_tuple.op == "build_tuple"
|
|
802
|
+
const_vals = [
|
|
803
|
+
func_ir.get_definition(x) for x in default_tuple.items
|
|
804
|
+
]
|
|
805
|
+
args = args + const_vals
|
|
806
|
+
else:
|
|
807
|
+
raise NotImplementedError(
|
|
808
|
+
"Unsupported defaults to make_function: {}".format(
|
|
809
|
+
callee_defaults
|
|
810
|
+
)
|
|
811
|
+
)
|
|
812
|
+
return args
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
def _make_debug_print(prefix):
|
|
816
|
+
def debug_print(*args):
|
|
817
|
+
if config.DEBUG_INLINE_CLOSURE:
|
|
818
|
+
print(prefix + ": " + "".join(str(x) for x in args))
|
|
819
|
+
|
|
820
|
+
return debug_print
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def _debug_dump(func_ir):
|
|
824
|
+
if config.DEBUG_INLINE_CLOSURE:
|
|
825
|
+
func_ir.dump()
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _get_all_scopes(blocks):
|
|
829
|
+
"""Get all block-local scopes from an IR."""
|
|
830
|
+
all_scopes = []
|
|
831
|
+
for label, block in blocks.items():
|
|
832
|
+
if block.scope not in all_scopes:
|
|
833
|
+
all_scopes.append(block.scope)
|
|
834
|
+
return all_scopes
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
def _replace_args_with(blocks, args):
|
|
838
|
+
"""
|
|
839
|
+
Replace ir.Arg(...) with real arguments from call site
|
|
840
|
+
"""
|
|
841
|
+
for label, block in blocks.items():
|
|
842
|
+
assigns = block.find_insts(ir.Assign)
|
|
843
|
+
for stmt in assigns:
|
|
844
|
+
if isinstance(stmt.value, ir.Arg):
|
|
845
|
+
idx = stmt.value.index
|
|
846
|
+
assert idx < len(args)
|
|
847
|
+
stmt.value = args[idx]
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
def _replace_freevars(blocks, args):
|
|
851
|
+
"""
|
|
852
|
+
Replace ir.FreeVar(...) with real variables from parent function
|
|
853
|
+
"""
|
|
854
|
+
for label, block in blocks.items():
|
|
855
|
+
assigns = block.find_insts(ir.Assign)
|
|
856
|
+
for stmt in assigns:
|
|
857
|
+
if isinstance(stmt.value, ir.FreeVar):
|
|
858
|
+
idx = stmt.value.index
|
|
859
|
+
assert idx < len(args)
|
|
860
|
+
if isinstance(args[idx], ir.Var):
|
|
861
|
+
stmt.value = args[idx]
|
|
862
|
+
else:
|
|
863
|
+
stmt.value = ir.Const(args[idx], stmt.loc)
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
def _replace_returns(blocks, target, return_label):
|
|
867
|
+
"""
|
|
868
|
+
Return return statement by assigning directly to target, and a jump.
|
|
869
|
+
"""
|
|
870
|
+
for label, block in blocks.items():
|
|
871
|
+
casts = []
|
|
872
|
+
for i in range(len(block.body)):
|
|
873
|
+
stmt = block.body[i]
|
|
874
|
+
if isinstance(stmt, ir.Return):
|
|
875
|
+
assert i + 1 == len(block.body)
|
|
876
|
+
block.body[i] = ir.Assign(stmt.value, target, stmt.loc)
|
|
877
|
+
block.body.append(ir.Jump(return_label, stmt.loc))
|
|
878
|
+
# remove cast of the returned value
|
|
879
|
+
for cast in casts:
|
|
880
|
+
if cast.target.name == stmt.value.name:
|
|
881
|
+
cast.value = cast.value.value
|
|
882
|
+
elif (
|
|
883
|
+
isinstance(stmt, ir.Assign)
|
|
884
|
+
and isinstance(stmt.value, ir.Expr)
|
|
885
|
+
and stmt.value.op == "cast"
|
|
886
|
+
):
|
|
887
|
+
casts.append(stmt)
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
def _add_definitions(func_ir, block):
|
|
891
|
+
"""
|
|
892
|
+
Add variable definitions found in a block to parent func_ir.
|
|
893
|
+
"""
|
|
894
|
+
definitions = func_ir._definitions
|
|
895
|
+
assigns = block.find_insts(ir.Assign)
|
|
896
|
+
for stmt in assigns:
|
|
897
|
+
definitions[stmt.target.name].append(stmt.value)
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _find_arraycall(func_ir, block):
|
|
901
|
+
"""Look for statement like "x = numpy.array(y)" or "x[..] = y"
|
|
902
|
+
immediately after the closure call that creates list y (the i-th
|
|
903
|
+
statement in block). Return the statement index if found, or
|
|
904
|
+
raise GuardException.
|
|
905
|
+
"""
|
|
906
|
+
array_var = None
|
|
907
|
+
list_var_dead_after_array_call = False
|
|
908
|
+
list_var = None
|
|
909
|
+
|
|
910
|
+
i = 0
|
|
911
|
+
while i < len(block.body):
|
|
912
|
+
instr = block.body[i]
|
|
913
|
+
if isinstance(instr, ir.Del):
|
|
914
|
+
# Stop the process if list_var becomes dead
|
|
915
|
+
if list_var and array_var and instr.value == list_var.name:
|
|
916
|
+
list_var_dead_after_array_call = True
|
|
917
|
+
break
|
|
918
|
+
pass
|
|
919
|
+
elif isinstance(instr, ir.Assign):
|
|
920
|
+
# Found array_var = array(list_var)
|
|
921
|
+
lhs = instr.target
|
|
922
|
+
expr = instr.value
|
|
923
|
+
if guard(find_callname, func_ir, expr) == (
|
|
924
|
+
"array",
|
|
925
|
+
"numpy",
|
|
926
|
+
) and isinstance(expr.args[0], ir.Var):
|
|
927
|
+
list_var = expr.args[0]
|
|
928
|
+
array_var = lhs
|
|
929
|
+
array_stmt_index = i
|
|
930
|
+
array_kws = dict(expr.kws)
|
|
931
|
+
elif (
|
|
932
|
+
isinstance(instr, ir.SetItem)
|
|
933
|
+
and isinstance(instr.value, ir.Var)
|
|
934
|
+
and not list_var
|
|
935
|
+
):
|
|
936
|
+
list_var = instr.value
|
|
937
|
+
# Found array_var[..] = list_var, the case for nested array
|
|
938
|
+
array_var = instr.target
|
|
939
|
+
array_def = get_definition(func_ir, array_var)
|
|
940
|
+
require(guard(_find_unsafe_empty_inferred, func_ir, array_def))
|
|
941
|
+
array_stmt_index = i
|
|
942
|
+
array_kws = {}
|
|
943
|
+
else:
|
|
944
|
+
# Bail out otherwise
|
|
945
|
+
break
|
|
946
|
+
i = i + 1
|
|
947
|
+
# require array_var is found, and list_var is dead after array_call.
|
|
948
|
+
require(array_var and list_var_dead_after_array_call)
|
|
949
|
+
_make_debug_print("find_array_call")(block.body[array_stmt_index])
|
|
950
|
+
return list_var, array_stmt_index, array_kws
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _find_iter_range(func_ir, range_iter_var, swapped):
|
|
954
|
+
"""Find the iterator's actual range if it is either range(n), or
|
|
955
|
+
range(m, n), otherwise return raise GuardException.
|
|
956
|
+
"""
|
|
957
|
+
debug_print = _make_debug_print("find_iter_range")
|
|
958
|
+
range_iter_def = get_definition(func_ir, range_iter_var)
|
|
959
|
+
debug_print("range_iter_var = ", range_iter_var, " def = ", range_iter_def)
|
|
960
|
+
require(
|
|
961
|
+
isinstance(range_iter_def, ir.Expr) and range_iter_def.op == "getiter"
|
|
962
|
+
)
|
|
963
|
+
range_var = range_iter_def.value
|
|
964
|
+
range_def = get_definition(func_ir, range_var)
|
|
965
|
+
debug_print("range_var = ", range_var, " range_def = ", range_def)
|
|
966
|
+
require(isinstance(range_def, ir.Expr) and range_def.op == "call")
|
|
967
|
+
func_var = range_def.func
|
|
968
|
+
func_def = get_definition(func_ir, func_var)
|
|
969
|
+
debug_print("func_var = ", func_var, " func_def = ", func_def)
|
|
970
|
+
require(isinstance(func_def, ir.Global) and func_def.value is range)
|
|
971
|
+
nargs = len(range_def.args)
|
|
972
|
+
swapping = [('"array comprehension"', "closure of"), range_def.func.loc]
|
|
973
|
+
if nargs == 1:
|
|
974
|
+
swapped[range_def.func.name] = swapping
|
|
975
|
+
stop = get_definition(func_ir, range_def.args[0], lhs_only=True)
|
|
976
|
+
return (0, range_def.args[0], func_def)
|
|
977
|
+
elif nargs == 2:
|
|
978
|
+
swapped[range_def.func.name] = swapping
|
|
979
|
+
start = get_definition(func_ir, range_def.args[0], lhs_only=True)
|
|
980
|
+
stop = get_definition(func_ir, range_def.args[1], lhs_only=True)
|
|
981
|
+
return (start, stop, func_def)
|
|
982
|
+
else:
|
|
983
|
+
raise GuardException
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
@intrinsic
|
|
987
|
+
def length_of_iterator(typingctx, val):
|
|
988
|
+
"""
|
|
989
|
+
An implementation of len(iter) for internal use.
|
|
990
|
+
Primary use is for array comprehensions (see inline_closurecall).
|
|
991
|
+
"""
|
|
992
|
+
if isinstance(val, types.RangeIteratorType):
|
|
993
|
+
val_type = val.yield_type
|
|
994
|
+
|
|
995
|
+
def codegen(context, builder, sig, args):
|
|
996
|
+
(value,) = args
|
|
997
|
+
from numba.cuda.cpython.rangeobj import range_impl_map
|
|
998
|
+
|
|
999
|
+
iter_type = range_impl_map[val_type][1]
|
|
1000
|
+
iterobj = cgutils.create_struct_proxy(iter_type)(
|
|
1001
|
+
context, builder, value
|
|
1002
|
+
)
|
|
1003
|
+
int_type = iterobj.count.type
|
|
1004
|
+
return impl_ret_untracked(
|
|
1005
|
+
context, builder, int_type, builder.load(iterobj.count)
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
return signature(val_type, val), codegen
|
|
1009
|
+
elif isinstance(val, types.ListIter):
|
|
1010
|
+
|
|
1011
|
+
def codegen(context, builder, sig, args):
|
|
1012
|
+
(value,) = args
|
|
1013
|
+
intp_t = context.get_value_type(types.intp)
|
|
1014
|
+
from numba.cuda.cpython.listobj import ListIterInstance
|
|
1015
|
+
|
|
1016
|
+
iterobj = ListIterInstance(context, builder, sig.args[0], value)
|
|
1017
|
+
return impl_ret_untracked(context, builder, intp_t, iterobj.size)
|
|
1018
|
+
|
|
1019
|
+
return signature(types.intp, val), codegen
|
|
1020
|
+
elif isinstance(val, types.ArrayIterator):
|
|
1021
|
+
|
|
1022
|
+
def codegen(context, builder, sig, args):
|
|
1023
|
+
(iterty,) = sig.args
|
|
1024
|
+
(value,) = args
|
|
1025
|
+
intp_t = context.get_value_type(types.intp)
|
|
1026
|
+
iterobj = context.make_helper(builder, iterty, value=value)
|
|
1027
|
+
arrayty = iterty.array_type
|
|
1028
|
+
from numba.cuda.np.arrayobj import make_array
|
|
1029
|
+
|
|
1030
|
+
ary = make_array(arrayty)(context, builder, value=iterobj.array)
|
|
1031
|
+
shape = cgutils.unpack_tuple(builder, ary.shape)
|
|
1032
|
+
# array iterates along the outer dimension
|
|
1033
|
+
return impl_ret_untracked(context, builder, intp_t, shape[0])
|
|
1034
|
+
|
|
1035
|
+
return signature(types.intp, val), codegen
|
|
1036
|
+
elif isinstance(val, types.UniTupleIter):
|
|
1037
|
+
|
|
1038
|
+
def codegen(context, builder, sig, args):
|
|
1039
|
+
(iterty,) = sig.args
|
|
1040
|
+
tuplety = iterty.container
|
|
1041
|
+
intp_t = context.get_value_type(types.intp)
|
|
1042
|
+
count_const = intp_t(tuplety.count)
|
|
1043
|
+
return impl_ret_untracked(context, builder, intp_t, count_const)
|
|
1044
|
+
|
|
1045
|
+
return signature(types.intp, val), codegen
|
|
1046
|
+
else:
|
|
1047
|
+
msg = (
|
|
1048
|
+
"Unsupported iterator found in array comprehension, try "
|
|
1049
|
+
"preallocating the array and filling manually."
|
|
1050
|
+
)
|
|
1051
|
+
raise errors.TypingError(msg)
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
def _inline_arraycall(
|
|
1055
|
+
func_ir, cfg, visited, loop, swapped, enable_prange=False, typed=False
|
|
1056
|
+
):
|
|
1057
|
+
"""Look for array(list) call in the exit block of a given loop, and turn
|
|
1058
|
+
list operations into array operations in the loop if the following
|
|
1059
|
+
conditions are met:
|
|
1060
|
+
1. The exit block contains an array call on the list;
|
|
1061
|
+
2. The list variable is no longer live after array call;
|
|
1062
|
+
3. The list is created in the loop entry block;
|
|
1063
|
+
4. The loop is created from an range iterator whose length is known prior
|
|
1064
|
+
to the loop;
|
|
1065
|
+
5. There is only one list_append operation on the list variable in the
|
|
1066
|
+
loop body;
|
|
1067
|
+
6. The block that contains list_append dominates the loop head, which
|
|
1068
|
+
ensures list length is the same as loop length;
|
|
1069
|
+
If any condition check fails, no modification will be made to the incoming
|
|
1070
|
+
IR.
|
|
1071
|
+
"""
|
|
1072
|
+
debug_print = _make_debug_print("inline_arraycall")
|
|
1073
|
+
# There should only be one loop exit
|
|
1074
|
+
require(len(loop.exits) == 1)
|
|
1075
|
+
exit_block = next(iter(loop.exits))
|
|
1076
|
+
list_var, array_call_index, array_kws = _find_arraycall(
|
|
1077
|
+
func_ir,
|
|
1078
|
+
func_ir.blocks[exit_block],
|
|
1079
|
+
)
|
|
1080
|
+
|
|
1081
|
+
# check if dtype is present in array call
|
|
1082
|
+
dtype_def = None
|
|
1083
|
+
dtype_mod_def = None
|
|
1084
|
+
if "dtype" in array_kws:
|
|
1085
|
+
require(isinstance(array_kws["dtype"], ir.Var))
|
|
1086
|
+
# We require that dtype argument to be a constant of getattr Expr, and
|
|
1087
|
+
# we'll remember its definition for later use.
|
|
1088
|
+
dtype_def = get_definition(func_ir, array_kws["dtype"])
|
|
1089
|
+
require(isinstance(dtype_def, ir.Expr) and dtype_def.op == "getattr")
|
|
1090
|
+
dtype_mod_def = get_definition(func_ir, dtype_def.value)
|
|
1091
|
+
|
|
1092
|
+
list_var_def = get_definition(func_ir, list_var)
|
|
1093
|
+
debug_print("list_var = ", list_var, " def = ", list_var_def)
|
|
1094
|
+
if isinstance(list_var_def, ir.Expr) and list_var_def.op == "cast":
|
|
1095
|
+
list_var_def = get_definition(func_ir, list_var_def.value)
|
|
1096
|
+
# Check if the definition is a build_list
|
|
1097
|
+
require(
|
|
1098
|
+
isinstance(list_var_def, ir.Expr) and list_var_def.op == "build_list"
|
|
1099
|
+
)
|
|
1100
|
+
# The build_list must be empty
|
|
1101
|
+
require(len(list_var_def.items) == 0)
|
|
1102
|
+
|
|
1103
|
+
# Look for list_append in "last" block in loop body, which should be a block
|
|
1104
|
+
# that is a post-dominator of the loop header.
|
|
1105
|
+
list_append_stmts = []
|
|
1106
|
+
for label in loop.body:
|
|
1107
|
+
# We have to consider blocks of this loop, but not sub-loops.
|
|
1108
|
+
# To achieve this, we require the set of "in_loops" of "label" to be
|
|
1109
|
+
# visited loops.
|
|
1110
|
+
in_visited_loops = [l.header in visited for l in cfg.in_loops(label)]
|
|
1111
|
+
if not all(in_visited_loops):
|
|
1112
|
+
continue
|
|
1113
|
+
block = func_ir.blocks[label]
|
|
1114
|
+
debug_print("check loop body block ", label)
|
|
1115
|
+
for stmt in block.find_insts(ir.Assign):
|
|
1116
|
+
expr = stmt.value
|
|
1117
|
+
if isinstance(expr, ir.Expr) and expr.op == "call":
|
|
1118
|
+
func_def = get_definition(func_ir, expr.func)
|
|
1119
|
+
if (
|
|
1120
|
+
isinstance(func_def, ir.Expr)
|
|
1121
|
+
and func_def.op == "getattr"
|
|
1122
|
+
and func_def.attr == "append"
|
|
1123
|
+
):
|
|
1124
|
+
list_def = get_definition(func_ir, func_def.value)
|
|
1125
|
+
debug_print(
|
|
1126
|
+
"list_def = ", list_def, list_def is list_var_def
|
|
1127
|
+
)
|
|
1128
|
+
if list_def is list_var_def:
|
|
1129
|
+
# found matching append call
|
|
1130
|
+
list_append_stmts.append((label, block, stmt))
|
|
1131
|
+
|
|
1132
|
+
# Require only one list_append, otherwise we won't know the indices
|
|
1133
|
+
require(len(list_append_stmts) == 1)
|
|
1134
|
+
append_block_label, append_block, append_stmt = list_append_stmts[0]
|
|
1135
|
+
|
|
1136
|
+
# Check if append_block (besides loop entry) dominates loop header.
|
|
1137
|
+
# Since CFG doesn't give us this info without loop entry, we approximate
|
|
1138
|
+
# by checking if the predecessor set of the header block is the same
|
|
1139
|
+
# as loop_entries plus append_block, which is certainly more restrictive
|
|
1140
|
+
# than necessary, and can be relaxed if needed.
|
|
1141
|
+
preds = set(l for l, b in cfg.predecessors(loop.header))
|
|
1142
|
+
debug_print("preds = ", preds, (loop.entries | set([append_block_label])))
|
|
1143
|
+
require(preds == (loop.entries | set([append_block_label])))
|
|
1144
|
+
|
|
1145
|
+
# Find iterator in loop header
|
|
1146
|
+
iter_vars = []
|
|
1147
|
+
iter_first_vars = []
|
|
1148
|
+
loop_header = func_ir.blocks[loop.header]
|
|
1149
|
+
for stmt in loop_header.find_insts(ir.Assign):
|
|
1150
|
+
expr = stmt.value
|
|
1151
|
+
if isinstance(expr, ir.Expr):
|
|
1152
|
+
if expr.op == "iternext":
|
|
1153
|
+
iter_def = get_definition(func_ir, expr.value)
|
|
1154
|
+
debug_print("iter_def = ", iter_def)
|
|
1155
|
+
iter_vars.append(expr.value)
|
|
1156
|
+
elif expr.op == "pair_first":
|
|
1157
|
+
iter_first_vars.append(stmt.target)
|
|
1158
|
+
|
|
1159
|
+
# Require only one iterator in loop header
|
|
1160
|
+
require(len(iter_vars) == 1 and len(iter_first_vars) == 1)
|
|
1161
|
+
# variable that holds the iterator object
|
|
1162
|
+
iter_var = iter_vars[0]
|
|
1163
|
+
# variable that holds the value out of iterator
|
|
1164
|
+
iter_first_var = iter_first_vars[0]
|
|
1165
|
+
|
|
1166
|
+
# Final requirement: only one loop entry, and we're going to modify it by:
|
|
1167
|
+
# 1. replacing the list definition with an array definition;
|
|
1168
|
+
# 2. adding a counter for the array iteration.
|
|
1169
|
+
require(len(loop.entries) == 1)
|
|
1170
|
+
loop_entry = func_ir.blocks[next(iter(loop.entries))]
|
|
1171
|
+
terminator = loop_entry.terminator
|
|
1172
|
+
scope = loop_entry.scope
|
|
1173
|
+
loc = loop_entry.loc
|
|
1174
|
+
stmts = []
|
|
1175
|
+
removed = []
|
|
1176
|
+
|
|
1177
|
+
def is_removed(val, removed):
|
|
1178
|
+
if isinstance(val, ir.Var):
|
|
1179
|
+
for x in removed:
|
|
1180
|
+
if x.name == val.name:
|
|
1181
|
+
return True
|
|
1182
|
+
return False
|
|
1183
|
+
|
|
1184
|
+
# Skip list construction and skip terminator, add the rest to stmts
|
|
1185
|
+
for i in range(len(loop_entry.body) - 1):
|
|
1186
|
+
stmt = loop_entry.body[i]
|
|
1187
|
+
if isinstance(stmt, ir.Assign) and (
|
|
1188
|
+
stmt.value is list_def or is_removed(stmt.value, removed)
|
|
1189
|
+
):
|
|
1190
|
+
removed.append(stmt.target)
|
|
1191
|
+
else:
|
|
1192
|
+
stmts.append(stmt)
|
|
1193
|
+
debug_print("removed variables: ", removed)
|
|
1194
|
+
|
|
1195
|
+
# Define an index_var to index the array.
|
|
1196
|
+
# If the range happens to be single step ranges like range(n), or
|
|
1197
|
+
# range(m, n), then the index_var correlates to iterator index; otherwise
|
|
1198
|
+
# we'll have to define a new counter.
|
|
1199
|
+
range_def = guard(_find_iter_range, func_ir, iter_var, swapped)
|
|
1200
|
+
index_var = scope.redefine("index", loc)
|
|
1201
|
+
if range_def and range_def[0] == 0:
|
|
1202
|
+
# iterator starts with 0, index_var can just be iter_first_var
|
|
1203
|
+
index_var = iter_first_var
|
|
1204
|
+
else:
|
|
1205
|
+
# index_var = -1 # starting the index with -1 since it will incremented
|
|
1206
|
+
# in loop header
|
|
1207
|
+
stmts.append(
|
|
1208
|
+
_new_definition(
|
|
1209
|
+
func_ir, index_var, ir.Const(value=-1, loc=loc), loc
|
|
1210
|
+
)
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
# Insert statement to get the size of the loop iterator
|
|
1214
|
+
size_var = scope.redefine("size", loc)
|
|
1215
|
+
if range_def:
|
|
1216
|
+
start, stop, range_func_def = range_def
|
|
1217
|
+
if start == 0:
|
|
1218
|
+
size_val = stop
|
|
1219
|
+
else:
|
|
1220
|
+
size_val = ir.Expr.binop(
|
|
1221
|
+
fn=operator.sub, lhs=stop, rhs=start, loc=loc
|
|
1222
|
+
)
|
|
1223
|
+
|
|
1224
|
+
else:
|
|
1225
|
+
# this doesn't work in objmode as it's effectively untyped
|
|
1226
|
+
if typed:
|
|
1227
|
+
len_func_var = scope.redefine("len_func", loc)
|
|
1228
|
+
stmts.append(
|
|
1229
|
+
_new_definition(
|
|
1230
|
+
func_ir,
|
|
1231
|
+
len_func_var,
|
|
1232
|
+
ir.Global(
|
|
1233
|
+
"length_of_iterator", length_of_iterator, loc=loc
|
|
1234
|
+
),
|
|
1235
|
+
loc,
|
|
1236
|
+
)
|
|
1237
|
+
)
|
|
1238
|
+
size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc)
|
|
1239
|
+
else:
|
|
1240
|
+
raise GuardException
|
|
1241
|
+
|
|
1242
|
+
stmts.append(_new_definition(func_ir, size_var, size_val, loc))
|
|
1243
|
+
|
|
1244
|
+
size_tuple_var = scope.redefine("size_tuple", loc)
|
|
1245
|
+
stmts.append(
|
|
1246
|
+
_new_definition(
|
|
1247
|
+
func_ir,
|
|
1248
|
+
size_tuple_var,
|
|
1249
|
+
ir.Expr.build_tuple(items=[size_var], loc=loc),
|
|
1250
|
+
loc,
|
|
1251
|
+
)
|
|
1252
|
+
)
|
|
1253
|
+
|
|
1254
|
+
# Insert array allocation
|
|
1255
|
+
array_var = scope.redefine("array", loc)
|
|
1256
|
+
empty_func = scope.redefine("empty_func", loc)
|
|
1257
|
+
if dtype_def and dtype_mod_def:
|
|
1258
|
+
# when dtype is present, we'll call empty with dtype
|
|
1259
|
+
dtype_mod_var = scope.redefine("dtype_mod", loc)
|
|
1260
|
+
dtype_var = scope.redefine("dtype", loc)
|
|
1261
|
+
stmts.append(
|
|
1262
|
+
_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)
|
|
1263
|
+
)
|
|
1264
|
+
stmts.append(
|
|
1265
|
+
_new_definition(
|
|
1266
|
+
func_ir,
|
|
1267
|
+
dtype_var,
|
|
1268
|
+
ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc),
|
|
1269
|
+
loc,
|
|
1270
|
+
)
|
|
1271
|
+
)
|
|
1272
|
+
stmts.append(
|
|
1273
|
+
_new_definition(
|
|
1274
|
+
func_ir, empty_func, ir.Global("empty", np.empty, loc=loc), loc
|
|
1275
|
+
)
|
|
1276
|
+
)
|
|
1277
|
+
array_kws = [("dtype", dtype_var)]
|
|
1278
|
+
else:
|
|
1279
|
+
# this doesn't work in objmode as it's effectively untyped
|
|
1280
|
+
if typed:
|
|
1281
|
+
# otherwise we'll call unsafe_empty_inferred
|
|
1282
|
+
stmts.append(
|
|
1283
|
+
_new_definition(
|
|
1284
|
+
func_ir,
|
|
1285
|
+
empty_func,
|
|
1286
|
+
ir.Global(
|
|
1287
|
+
"unsafe_empty_inferred", unsafe_empty_inferred, loc=loc
|
|
1288
|
+
),
|
|
1289
|
+
loc,
|
|
1290
|
+
)
|
|
1291
|
+
)
|
|
1292
|
+
array_kws = []
|
|
1293
|
+
else:
|
|
1294
|
+
raise GuardException
|
|
1295
|
+
|
|
1296
|
+
# array_var = empty_func(size_tuple_var)
|
|
1297
|
+
stmts.append(
|
|
1298
|
+
_new_definition(
|
|
1299
|
+
func_ir,
|
|
1300
|
+
array_var,
|
|
1301
|
+
ir.Expr.call(
|
|
1302
|
+
empty_func, (size_tuple_var,), list(array_kws), loc=loc
|
|
1303
|
+
),
|
|
1304
|
+
loc,
|
|
1305
|
+
)
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
# Add back removed just in case they are used by something else
|
|
1309
|
+
for var in removed:
|
|
1310
|
+
stmts.append(_new_definition(func_ir, var, array_var, loc))
|
|
1311
|
+
|
|
1312
|
+
# Add back terminator
|
|
1313
|
+
stmts.append(terminator)
|
|
1314
|
+
# Modify loop_entry
|
|
1315
|
+
loop_entry.body = stmts
|
|
1316
|
+
|
|
1317
|
+
if range_def:
|
|
1318
|
+
if range_def[0] != 0:
|
|
1319
|
+
# when range doesn't start from 0, index_var becomes loop index
|
|
1320
|
+
# (iter_first_var) minus an offset (range_def[0])
|
|
1321
|
+
terminator = loop_header.terminator
|
|
1322
|
+
assert isinstance(terminator, ir.Branch)
|
|
1323
|
+
# find the block in the loop body that header jumps to
|
|
1324
|
+
block_id = terminator.truebr
|
|
1325
|
+
blk = func_ir.blocks[block_id]
|
|
1326
|
+
loc = blk.loc
|
|
1327
|
+
blk.body.insert(
|
|
1328
|
+
0,
|
|
1329
|
+
_new_definition(
|
|
1330
|
+
func_ir,
|
|
1331
|
+
index_var,
|
|
1332
|
+
ir.Expr.binop(
|
|
1333
|
+
fn=operator.sub,
|
|
1334
|
+
lhs=iter_first_var,
|
|
1335
|
+
rhs=range_def[0],
|
|
1336
|
+
loc=loc,
|
|
1337
|
+
),
|
|
1338
|
+
loc,
|
|
1339
|
+
),
|
|
1340
|
+
)
|
|
1341
|
+
else:
|
|
1342
|
+
# Insert index_var increment to the end of loop header
|
|
1343
|
+
loc = loop_header.loc
|
|
1344
|
+
terminator = loop_header.terminator
|
|
1345
|
+
stmts = loop_header.body[0:-1]
|
|
1346
|
+
next_index_var = scope.redefine("next_index", loc)
|
|
1347
|
+
one = scope.redefine("one", loc)
|
|
1348
|
+
# one = 1
|
|
1349
|
+
stmts.append(
|
|
1350
|
+
_new_definition(func_ir, one, ir.Const(value=1, loc=loc), loc)
|
|
1351
|
+
)
|
|
1352
|
+
# next_index_var = index_var + 1
|
|
1353
|
+
stmts.append(
|
|
1354
|
+
_new_definition(
|
|
1355
|
+
func_ir,
|
|
1356
|
+
next_index_var,
|
|
1357
|
+
ir.Expr.binop(fn=operator.add, lhs=index_var, rhs=one, loc=loc),
|
|
1358
|
+
loc,
|
|
1359
|
+
)
|
|
1360
|
+
)
|
|
1361
|
+
# index_var = next_index_var
|
|
1362
|
+
stmts.append(_new_definition(func_ir, index_var, next_index_var, loc))
|
|
1363
|
+
stmts.append(terminator)
|
|
1364
|
+
loop_header.body = stmts
|
|
1365
|
+
|
|
1366
|
+
# In append_block, change list_append into array assign
|
|
1367
|
+
for i in range(len(append_block.body)):
|
|
1368
|
+
if append_block.body[i] is append_stmt:
|
|
1369
|
+
debug_print("Replace append with SetItem")
|
|
1370
|
+
append_block.body[i] = ir.SetItem(
|
|
1371
|
+
target=array_var,
|
|
1372
|
+
index=index_var,
|
|
1373
|
+
value=append_stmt.value.args[0],
|
|
1374
|
+
loc=append_stmt.loc,
|
|
1375
|
+
)
|
|
1376
|
+
|
|
1377
|
+
# replace array call, by changing "a = array(b)" to "a = b"
|
|
1378
|
+
stmt = func_ir.blocks[exit_block].body[array_call_index]
|
|
1379
|
+
# stmt can be either array call or SetItem, we only replace array call
|
|
1380
|
+
if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr):
|
|
1381
|
+
stmt.value = array_var
|
|
1382
|
+
func_ir._definitions[stmt.target.name] = [stmt.value]
|
|
1383
|
+
|
|
1384
|
+
return True
|
|
1385
|
+
|
|
1386
|
+
|
|
1387
|
+
def _find_unsafe_empty_inferred(func_ir, expr):
|
|
1388
|
+
unsafe_empty_inferred
|
|
1389
|
+
require(isinstance(expr, ir.Expr) and expr.op == "call")
|
|
1390
|
+
callee = expr.func
|
|
1391
|
+
callee_def = get_definition(func_ir, callee)
|
|
1392
|
+
require(isinstance(callee_def, ir.Global))
|
|
1393
|
+
_make_debug_print("_find_unsafe_empty_inferred")(callee_def.value)
|
|
1394
|
+
return callee_def.value == unsafe_empty_inferred
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
def _fix_nested_array(func_ir):
|
|
1398
|
+
"""Look for assignment like: a[..] = b, where both a and b are numpy arrays,
|
|
1399
|
+
and try to eliminate array b by expanding a with an extra dimension.
|
|
1400
|
+
"""
|
|
1401
|
+
blocks = func_ir.blocks
|
|
1402
|
+
cfg = compute_cfg_from_blocks(blocks)
|
|
1403
|
+
usedefs = compute_use_defs(blocks)
|
|
1404
|
+
empty_deadmap = dict([(label, set()) for label in blocks.keys()])
|
|
1405
|
+
livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap)
|
|
1406
|
+
|
|
1407
|
+
def find_array_def(arr):
|
|
1408
|
+
"""Find numpy array definition such as
|
|
1409
|
+
arr = numba.unsafe.ndarray.empty_inferred(...).
|
|
1410
|
+
If it is arr = b[...], find array definition of b recursively.
|
|
1411
|
+
"""
|
|
1412
|
+
arr_def = get_definition(func_ir, arr)
|
|
1413
|
+
_make_debug_print("find_array_def")(arr, arr_def)
|
|
1414
|
+
if isinstance(arr_def, ir.Expr):
|
|
1415
|
+
if guard(_find_unsafe_empty_inferred, func_ir, arr_def):
|
|
1416
|
+
return arr_def
|
|
1417
|
+
elif arr_def.op == "getitem":
|
|
1418
|
+
return find_array_def(arr_def.value)
|
|
1419
|
+
raise GuardException
|
|
1420
|
+
|
|
1421
|
+
def fix_dependencies(expr, varlist):
|
|
1422
|
+
"""Double check if all variables in varlist are defined before
|
|
1423
|
+
expr is used. Try to move constant definition when the check fails.
|
|
1424
|
+
Bails out by raising GuardException if it can't be moved.
|
|
1425
|
+
"""
|
|
1426
|
+
debug_print = _make_debug_print("fix_dependencies")
|
|
1427
|
+
for label, block in blocks.items():
|
|
1428
|
+
scope = block.scope
|
|
1429
|
+
body = block.body
|
|
1430
|
+
defined = set()
|
|
1431
|
+
for i in range(len(body)):
|
|
1432
|
+
inst = body[i]
|
|
1433
|
+
if isinstance(inst, ir.Assign):
|
|
1434
|
+
defined.add(inst.target.name)
|
|
1435
|
+
if inst.value is expr:
|
|
1436
|
+
new_varlist = []
|
|
1437
|
+
for var in varlist:
|
|
1438
|
+
# var must be defined before this inst, or live
|
|
1439
|
+
# and not later defined.
|
|
1440
|
+
if var.name in defined or (
|
|
1441
|
+
var.name in livemap[label]
|
|
1442
|
+
and var.name not in usedefs.defmap[label]
|
|
1443
|
+
):
|
|
1444
|
+
debug_print(var.name, " already defined")
|
|
1445
|
+
new_varlist.append(var)
|
|
1446
|
+
else:
|
|
1447
|
+
debug_print(var.name, " not yet defined")
|
|
1448
|
+
var_def = get_definition(func_ir, var.name)
|
|
1449
|
+
if isinstance(var_def, ir.Const):
|
|
1450
|
+
loc = var.loc
|
|
1451
|
+
new_var = scope.redefine("new_var", loc)
|
|
1452
|
+
new_const = ir.Const(var_def.value, loc)
|
|
1453
|
+
new_vardef = _new_definition(
|
|
1454
|
+
func_ir, new_var, new_const, loc
|
|
1455
|
+
)
|
|
1456
|
+
new_body = []
|
|
1457
|
+
new_body.extend(body[:i])
|
|
1458
|
+
new_body.append(new_vardef)
|
|
1459
|
+
new_body.extend(body[i:])
|
|
1460
|
+
block.body = new_body
|
|
1461
|
+
new_varlist.append(new_var)
|
|
1462
|
+
else:
|
|
1463
|
+
raise GuardException
|
|
1464
|
+
return new_varlist
|
|
1465
|
+
# when expr is not found in block
|
|
1466
|
+
raise GuardException
|
|
1467
|
+
|
|
1468
|
+
def fix_array_assign(stmt):
|
|
1469
|
+
"""For assignment like lhs[idx] = rhs, where both lhs and rhs are
|
|
1470
|
+
arrays, do the following:
|
|
1471
|
+
1. find the definition of rhs, which has to be a call to
|
|
1472
|
+
numba.unsafe.ndarray.empty_inferred
|
|
1473
|
+
2. find the source array creation for lhs, insert an extra dimension of
|
|
1474
|
+
size of b.
|
|
1475
|
+
3. replace the definition of
|
|
1476
|
+
rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx]
|
|
1477
|
+
"""
|
|
1478
|
+
require(isinstance(stmt, ir.SetItem))
|
|
1479
|
+
require(isinstance(stmt.value, ir.Var))
|
|
1480
|
+
debug_print = _make_debug_print("fix_array_assign")
|
|
1481
|
+
debug_print("found SetItem: ", stmt)
|
|
1482
|
+
lhs = stmt.target
|
|
1483
|
+
# Find the source array creation of lhs
|
|
1484
|
+
lhs_def = find_array_def(lhs)
|
|
1485
|
+
debug_print("found lhs_def: ", lhs_def)
|
|
1486
|
+
rhs_def = get_definition(func_ir, stmt.value)
|
|
1487
|
+
debug_print("found rhs_def: ", rhs_def)
|
|
1488
|
+
require(isinstance(rhs_def, ir.Expr))
|
|
1489
|
+
if rhs_def.op == "cast":
|
|
1490
|
+
rhs_def = get_definition(func_ir, rhs_def.value)
|
|
1491
|
+
require(isinstance(rhs_def, ir.Expr))
|
|
1492
|
+
require(_find_unsafe_empty_inferred(func_ir, rhs_def))
|
|
1493
|
+
# Find the array dimension of rhs
|
|
1494
|
+
dim_def = get_definition(func_ir, rhs_def.args[0])
|
|
1495
|
+
require(isinstance(dim_def, ir.Expr) and dim_def.op == "build_tuple")
|
|
1496
|
+
debug_print("dim_def = ", dim_def)
|
|
1497
|
+
extra_dims = [
|
|
1498
|
+
get_definition(func_ir, x, lhs_only=True) for x in dim_def.items
|
|
1499
|
+
]
|
|
1500
|
+
debug_print("extra_dims = ", extra_dims)
|
|
1501
|
+
# Expand size tuple when creating lhs_def with extra_dims
|
|
1502
|
+
size_tuple_def = get_definition(func_ir, lhs_def.args[0])
|
|
1503
|
+
require(
|
|
1504
|
+
isinstance(size_tuple_def, ir.Expr)
|
|
1505
|
+
and size_tuple_def.op == "build_tuple"
|
|
1506
|
+
)
|
|
1507
|
+
debug_print("size_tuple_def = ", size_tuple_def)
|
|
1508
|
+
extra_dims = fix_dependencies(size_tuple_def, extra_dims)
|
|
1509
|
+
size_tuple_def.items += extra_dims
|
|
1510
|
+
# In-place modify rhs_def to be getitem
|
|
1511
|
+
rhs_def.op = "getitem"
|
|
1512
|
+
rhs_def.fn = operator.getitem
|
|
1513
|
+
rhs_def.value = get_definition(func_ir, lhs, lhs_only=True)
|
|
1514
|
+
rhs_def.index = stmt.index
|
|
1515
|
+
del rhs_def._kws["func"]
|
|
1516
|
+
del rhs_def._kws["args"]
|
|
1517
|
+
del rhs_def._kws["vararg"]
|
|
1518
|
+
del rhs_def._kws["kws"]
|
|
1519
|
+
# success
|
|
1520
|
+
return True
|
|
1521
|
+
|
|
1522
|
+
for label in find_topo_order(func_ir.blocks):
|
|
1523
|
+
block = func_ir.blocks[label]
|
|
1524
|
+
for stmt in block.body:
|
|
1525
|
+
if guard(fix_array_assign, stmt):
|
|
1526
|
+
block.body.remove(stmt)
|
|
1527
|
+
|
|
1528
|
+
|
|
1529
|
+
def _new_definition(func_ir, var, value, loc):
|
|
1530
|
+
func_ir._definitions[var.name] = [value]
|
|
1531
|
+
return ir.Assign(value=value, target=var, loc=loc)
|
|
1532
|
+
|
|
1533
|
+
|
|
1534
|
+
@rewrites.register_rewrite("after-inference")
|
|
1535
|
+
class RewriteArrayOfConsts(rewrites.Rewrite):
|
|
1536
|
+
"""The RewriteArrayOfConsts class is responsible for finding
|
|
1537
|
+
1D array creations from a constant list, and rewriting it into
|
|
1538
|
+
direct initialization of array elements without creating the list.
|
|
1539
|
+
"""
|
|
1540
|
+
|
|
1541
|
+
def __init__(self, state, *args, **kws):
|
|
1542
|
+
self.typingctx = state.typingctx
|
|
1543
|
+
super(RewriteArrayOfConsts, self).__init__(*args, **kws)
|
|
1544
|
+
|
|
1545
|
+
def match(self, func_ir, block, typemap, calltypes):
|
|
1546
|
+
if len(calltypes) == 0:
|
|
1547
|
+
return False
|
|
1548
|
+
self.crnt_block = block
|
|
1549
|
+
self.new_body = guard(
|
|
1550
|
+
_inline_const_arraycall,
|
|
1551
|
+
block,
|
|
1552
|
+
func_ir,
|
|
1553
|
+
self.typingctx,
|
|
1554
|
+
typemap,
|
|
1555
|
+
calltypes,
|
|
1556
|
+
)
|
|
1557
|
+
return self.new_body is not None
|
|
1558
|
+
|
|
1559
|
+
def apply(self):
|
|
1560
|
+
self.crnt_block.body = self.new_body
|
|
1561
|
+
return self.crnt_block
|
|
1562
|
+
|
|
1563
|
+
|
|
1564
|
+
def _inline_const_arraycall(block, func_ir, context, typemap, calltypes):
|
|
1565
|
+
"""Look for array(list) call where list is a constant list created by
|
|
1566
|
+
build_list, and turn them into direct array creation and initialization, if
|
|
1567
|
+
the following conditions are met:
|
|
1568
|
+
1. The build_list call immediate precedes the array call;
|
|
1569
|
+
2. The list variable is no longer live after array call;
|
|
1570
|
+
If any condition check fails, no modification will be made.
|
|
1571
|
+
"""
|
|
1572
|
+
debug_print = _make_debug_print("inline_const_arraycall")
|
|
1573
|
+
scope = block.scope
|
|
1574
|
+
|
|
1575
|
+
def inline_array(array_var, expr, stmts, list_vars, dels):
|
|
1576
|
+
"""Check to see if the given "array_var" is created from a list
|
|
1577
|
+
of constants, and try to inline the list definition as array
|
|
1578
|
+
initialization.
|
|
1579
|
+
|
|
1580
|
+
Extra statements produced with be appended to "stmts".
|
|
1581
|
+
"""
|
|
1582
|
+
callname = guard(find_callname, func_ir, expr)
|
|
1583
|
+
require(callname and callname[1] == "numpy" and callname[0] == "array")
|
|
1584
|
+
require(expr.args[0].name in list_vars)
|
|
1585
|
+
ret_type = calltypes[expr].return_type
|
|
1586
|
+
require(
|
|
1587
|
+
isinstance(ret_type, types.ArrayCompatible) and ret_type.ndim == 1
|
|
1588
|
+
)
|
|
1589
|
+
loc = expr.loc
|
|
1590
|
+
list_var = expr.args[0]
|
|
1591
|
+
# Get the type of the array to be created.
|
|
1592
|
+
array_typ = typemap[array_var.name]
|
|
1593
|
+
debug_print("inline array_var = ", array_var, " list_var = ", list_var)
|
|
1594
|
+
# Get the element type of the array to be created.
|
|
1595
|
+
dtype = array_typ.dtype
|
|
1596
|
+
# Get the sequence of operations to provide values to the new array.
|
|
1597
|
+
seq, _ = find_build_sequence(func_ir, list_var)
|
|
1598
|
+
size = len(seq)
|
|
1599
|
+
# Create a tuple to pass to empty below to specify the new array size.
|
|
1600
|
+
size_var = scope.redefine("size", loc)
|
|
1601
|
+
size_tuple_var = scope.redefine("size_tuple", loc)
|
|
1602
|
+
size_typ = types.intp
|
|
1603
|
+
size_tuple_typ = types.UniTuple(size_typ, 1)
|
|
1604
|
+
typemap[size_var.name] = size_typ
|
|
1605
|
+
typemap[size_tuple_var.name] = size_tuple_typ
|
|
1606
|
+
stmts.append(
|
|
1607
|
+
_new_definition(func_ir, size_var, ir.Const(size, loc=loc), loc)
|
|
1608
|
+
)
|
|
1609
|
+
stmts.append(
|
|
1610
|
+
_new_definition(
|
|
1611
|
+
func_ir,
|
|
1612
|
+
size_tuple_var,
|
|
1613
|
+
ir.Expr.build_tuple(items=[size_var], loc=loc),
|
|
1614
|
+
loc,
|
|
1615
|
+
)
|
|
1616
|
+
)
|
|
1617
|
+
|
|
1618
|
+
# The general approach is to create an empty array and then fill
|
|
1619
|
+
# the elements in one-by-one from their specification.
|
|
1620
|
+
|
|
1621
|
+
# Get the numpy type to pass to empty.
|
|
1622
|
+
nptype = types.DType(dtype)
|
|
1623
|
+
|
|
1624
|
+
# Create a variable to hold the numpy empty function.
|
|
1625
|
+
empty_func = scope.redefine("empty_func", loc)
|
|
1626
|
+
fnty = get_np_ufunc_typ(np.empty)
|
|
1627
|
+
context.resolve_function_type(fnty, (size_typ,), {"dtype": nptype})
|
|
1628
|
+
|
|
1629
|
+
typemap[empty_func.name] = fnty
|
|
1630
|
+
|
|
1631
|
+
stmts.append(
|
|
1632
|
+
_new_definition(
|
|
1633
|
+
func_ir, empty_func, ir.Global("empty", np.empty, loc=loc), loc
|
|
1634
|
+
)
|
|
1635
|
+
)
|
|
1636
|
+
|
|
1637
|
+
# We pass two arguments to empty, first the size tuple and second
|
|
1638
|
+
# the dtype of the new array. Here, we created typ_var which is
|
|
1639
|
+
# the dtype argument of the new array. typ_var in turn is created
|
|
1640
|
+
# by getattr of the dtype string on the numpy module.
|
|
1641
|
+
|
|
1642
|
+
# Create var for numpy module.
|
|
1643
|
+
g_np_var = scope.redefine("$np_g_var", loc)
|
|
1644
|
+
typemap[g_np_var.name] = types.misc.Module(np)
|
|
1645
|
+
g_np = ir.Global("np", np, loc)
|
|
1646
|
+
stmts.append(_new_definition(func_ir, g_np_var, g_np, loc))
|
|
1647
|
+
|
|
1648
|
+
# Create var for result of numpy.<dtype>.
|
|
1649
|
+
typ_var = scope.redefine("$np_typ_var", loc)
|
|
1650
|
+
typemap[typ_var.name] = nptype
|
|
1651
|
+
dtype_str = str(dtype)
|
|
1652
|
+
if dtype_str == "bool":
|
|
1653
|
+
dtype_str = "bool_"
|
|
1654
|
+
# Get dtype attribute of numpy module.
|
|
1655
|
+
np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc)
|
|
1656
|
+
stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc))
|
|
1657
|
+
|
|
1658
|
+
# Create the call to numpy.empty passing the size tuple and dtype var.
|
|
1659
|
+
empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc)
|
|
1660
|
+
calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype)
|
|
1661
|
+
stmts.append(_new_definition(func_ir, array_var, empty_call, loc))
|
|
1662
|
+
|
|
1663
|
+
# Fill in the new empty array one-by-one.
|
|
1664
|
+
for i in range(size):
|
|
1665
|
+
index_var = scope.redefine("index", loc)
|
|
1666
|
+
index_typ = types.intp
|
|
1667
|
+
typemap[index_var.name] = index_typ
|
|
1668
|
+
stmts.append(
|
|
1669
|
+
_new_definition(func_ir, index_var, ir.Const(i, loc), loc)
|
|
1670
|
+
)
|
|
1671
|
+
setitem = ir.SetItem(array_var, index_var, seq[i], loc)
|
|
1672
|
+
calltypes[setitem] = typing.signature(
|
|
1673
|
+
types.none, array_typ, index_typ, dtype
|
|
1674
|
+
)
|
|
1675
|
+
stmts.append(setitem)
|
|
1676
|
+
|
|
1677
|
+
stmts.extend(dels)
|
|
1678
|
+
return True
|
|
1679
|
+
|
|
1680
|
+
class State(object):
|
|
1681
|
+
"""
|
|
1682
|
+
This class is used to hold the state in the following loop so as to make
|
|
1683
|
+
it easy to reset the state of the variables tracking the various
|
|
1684
|
+
statement kinds
|
|
1685
|
+
"""
|
|
1686
|
+
|
|
1687
|
+
def __init__(self):
|
|
1688
|
+
# list_vars keep track of the variable created from the latest
|
|
1689
|
+
# build_list instruction, as well as its synonyms.
|
|
1690
|
+
self.list_vars = []
|
|
1691
|
+
# dead_vars keep track of those in list_vars that are considered
|
|
1692
|
+
# dead.
|
|
1693
|
+
self.dead_vars = []
|
|
1694
|
+
# list_items keep track of the elements used in build_list.
|
|
1695
|
+
self.list_items = []
|
|
1696
|
+
self.stmts = []
|
|
1697
|
+
# dels keep track of the deletion of list_items, which will need to
|
|
1698
|
+
# be moved after array initialization.
|
|
1699
|
+
self.dels = []
|
|
1700
|
+
# tracks if a modification has taken place
|
|
1701
|
+
self.modified = False
|
|
1702
|
+
|
|
1703
|
+
def reset(self):
|
|
1704
|
+
"""
|
|
1705
|
+
Resets the internal state of the variables used for tracking
|
|
1706
|
+
"""
|
|
1707
|
+
self.list_vars = []
|
|
1708
|
+
self.dead_vars = []
|
|
1709
|
+
self.list_items = []
|
|
1710
|
+
self.dels = []
|
|
1711
|
+
|
|
1712
|
+
def list_var_used(self, inst):
|
|
1713
|
+
"""
|
|
1714
|
+
Returns True if the list being analysed is used between the
|
|
1715
|
+
build_list and the array call.
|
|
1716
|
+
"""
|
|
1717
|
+
return any([x.name in self.list_vars for x in inst.list_vars()])
|
|
1718
|
+
|
|
1719
|
+
state = State()
|
|
1720
|
+
|
|
1721
|
+
for inst in block.body:
|
|
1722
|
+
if isinstance(inst, ir.Assign):
|
|
1723
|
+
if isinstance(inst.value, ir.Var):
|
|
1724
|
+
if inst.value.name in state.list_vars:
|
|
1725
|
+
state.list_vars.append(inst.target.name)
|
|
1726
|
+
state.stmts.append(inst)
|
|
1727
|
+
continue
|
|
1728
|
+
elif isinstance(inst.value, ir.Expr):
|
|
1729
|
+
expr = inst.value
|
|
1730
|
+
if expr.op == "build_list":
|
|
1731
|
+
# new build_list encountered, reset state
|
|
1732
|
+
state.reset()
|
|
1733
|
+
state.list_items = [x.name for x in expr.items]
|
|
1734
|
+
state.list_vars = [inst.target.name]
|
|
1735
|
+
state.stmts.append(inst)
|
|
1736
|
+
continue
|
|
1737
|
+
elif expr.op == "call" and expr in calltypes:
|
|
1738
|
+
if guard(
|
|
1739
|
+
inline_array,
|
|
1740
|
+
inst.target,
|
|
1741
|
+
expr,
|
|
1742
|
+
state.stmts,
|
|
1743
|
+
state.list_vars,
|
|
1744
|
+
state.dels,
|
|
1745
|
+
):
|
|
1746
|
+
state.modified = True
|
|
1747
|
+
continue
|
|
1748
|
+
elif isinstance(inst, ir.Del):
|
|
1749
|
+
removed_var = inst.value
|
|
1750
|
+
if removed_var in state.list_items:
|
|
1751
|
+
state.dels.append(inst)
|
|
1752
|
+
continue
|
|
1753
|
+
elif removed_var in state.list_vars:
|
|
1754
|
+
# one of the list_vars is considered dead.
|
|
1755
|
+
state.dead_vars.append(removed_var)
|
|
1756
|
+
state.list_vars.remove(removed_var)
|
|
1757
|
+
state.stmts.append(inst)
|
|
1758
|
+
if state.list_vars == []:
|
|
1759
|
+
# if all list_vars are considered dead, we need to filter
|
|
1760
|
+
# them out from existing stmts to completely remove
|
|
1761
|
+
# build_list.
|
|
1762
|
+
# Note that if a translation didn't take place, dead_vars
|
|
1763
|
+
# will also be empty when we reach this point.
|
|
1764
|
+
body = []
|
|
1765
|
+
for inst in state.stmts:
|
|
1766
|
+
if (
|
|
1767
|
+
isinstance(inst, ir.Assign)
|
|
1768
|
+
and inst.target.name in state.dead_vars
|
|
1769
|
+
) or (
|
|
1770
|
+
isinstance(inst, ir.Del)
|
|
1771
|
+
and inst.value in state.dead_vars
|
|
1772
|
+
):
|
|
1773
|
+
continue
|
|
1774
|
+
body.append(inst)
|
|
1775
|
+
state.stmts = body
|
|
1776
|
+
state.dead_vars = []
|
|
1777
|
+
state.modified = True
|
|
1778
|
+
continue
|
|
1779
|
+
state.stmts.append(inst)
|
|
1780
|
+
|
|
1781
|
+
# If the list is used in any capacity between build_list and array
|
|
1782
|
+
# call, then we must call off the translation for this list because
|
|
1783
|
+
# it could be mutated and list_items would no longer be applicable.
|
|
1784
|
+
if state.list_var_used(inst):
|
|
1785
|
+
state.reset()
|
|
1786
|
+
|
|
1787
|
+
return state.stmts if state.modified else None
|