numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,758 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from collections import namedtuple, defaultdict
|
|
5
|
+
from numba.cuda import types
|
|
6
|
+
from numba.cuda.core import ir
|
|
7
|
+
from numba.cuda.core import errors
|
|
8
|
+
from numba.cuda.core import consts
|
|
9
|
+
import operator
|
|
10
|
+
from functools import reduce
|
|
11
|
+
|
|
12
|
+
from .controlflow import CFGraph
|
|
13
|
+
from numba.cuda.misc import special
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
# Analysis related to variable lifetime
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
_use_defs_result = namedtuple("use_defs_result", "usemap,defmap")
|
|
20
|
+
|
|
21
|
+
# other packages that define new nodes add calls for finding defs
|
|
22
|
+
# format: {type:function}
|
|
23
|
+
ir_extension_usedefs = {}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def compute_use_defs(blocks):
|
|
27
|
+
"""
|
|
28
|
+
Find variable use/def per block.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
var_use_map = {} # { block offset -> set of vars }
|
|
32
|
+
var_def_map = {} # { block offset -> set of vars }
|
|
33
|
+
for offset, ir_block in blocks.items():
|
|
34
|
+
var_use_map[offset] = use_set = set()
|
|
35
|
+
var_def_map[offset] = def_set = set()
|
|
36
|
+
for stmt in ir_block.body:
|
|
37
|
+
if type(stmt) in ir_extension_usedefs:
|
|
38
|
+
func = ir_extension_usedefs[type(stmt)]
|
|
39
|
+
func(stmt, use_set, def_set)
|
|
40
|
+
continue
|
|
41
|
+
if isinstance(stmt, ir.Assign):
|
|
42
|
+
if isinstance(stmt.value, ir.Inst):
|
|
43
|
+
rhs_set = set(var.name for var in stmt.value.list_vars())
|
|
44
|
+
elif isinstance(stmt.value, ir.Var):
|
|
45
|
+
rhs_set = set([stmt.value.name])
|
|
46
|
+
elif isinstance(
|
|
47
|
+
stmt.value, (ir.Arg, ir.Const, ir.Global, ir.FreeVar)
|
|
48
|
+
):
|
|
49
|
+
rhs_set = ()
|
|
50
|
+
else:
|
|
51
|
+
raise AssertionError("unreachable", type(stmt.value))
|
|
52
|
+
# If lhs not in rhs of the assignment
|
|
53
|
+
if stmt.target.name not in rhs_set:
|
|
54
|
+
def_set.add(stmt.target.name)
|
|
55
|
+
|
|
56
|
+
for var in stmt.list_vars():
|
|
57
|
+
# do not include locally defined vars to use-map
|
|
58
|
+
if var.name not in def_set:
|
|
59
|
+
use_set.add(var.name)
|
|
60
|
+
|
|
61
|
+
return _use_defs_result(usemap=var_use_map, defmap=var_def_map)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def compute_live_map(cfg, blocks, var_use_map, var_def_map):
|
|
65
|
+
"""
|
|
66
|
+
Find variables that must be alive at the ENTRY of each block.
|
|
67
|
+
We use a simple fix-point algorithm that iterates until the set of
|
|
68
|
+
live variables is unchanged for each block.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def fix_point_progress(dct):
|
|
72
|
+
"""Helper function to determine if a fix-point has been reached."""
|
|
73
|
+
return tuple(len(v) for v in dct.values())
|
|
74
|
+
|
|
75
|
+
def fix_point(fn, dct):
|
|
76
|
+
"""Helper function to run fix-point algorithm."""
|
|
77
|
+
old_point = None
|
|
78
|
+
new_point = fix_point_progress(dct)
|
|
79
|
+
while old_point != new_point:
|
|
80
|
+
fn(dct)
|
|
81
|
+
old_point = new_point
|
|
82
|
+
new_point = fix_point_progress(dct)
|
|
83
|
+
|
|
84
|
+
def def_reach(dct):
|
|
85
|
+
"""Find all variable definition reachable at the entry of a block"""
|
|
86
|
+
for offset in var_def_map:
|
|
87
|
+
used_or_defined = var_def_map[offset] | var_use_map[offset]
|
|
88
|
+
dct[offset] |= used_or_defined
|
|
89
|
+
# Propagate to outgoing nodes
|
|
90
|
+
for out_blk, _ in cfg.successors(offset):
|
|
91
|
+
dct[out_blk] |= dct[offset]
|
|
92
|
+
|
|
93
|
+
def liveness(dct):
|
|
94
|
+
"""Find live variables.
|
|
95
|
+
|
|
96
|
+
Push var usage backward.
|
|
97
|
+
"""
|
|
98
|
+
for offset in dct:
|
|
99
|
+
# Live vars here
|
|
100
|
+
live_vars = dct[offset]
|
|
101
|
+
for inc_blk, _data in cfg.predecessors(offset):
|
|
102
|
+
# Reachable at the predecessor
|
|
103
|
+
reachable = live_vars & def_reach_map[inc_blk]
|
|
104
|
+
# But not defined in the predecessor
|
|
105
|
+
dct[inc_blk] |= reachable - var_def_map[inc_blk]
|
|
106
|
+
|
|
107
|
+
live_map = {}
|
|
108
|
+
for offset in blocks.keys():
|
|
109
|
+
live_map[offset] = set(var_use_map[offset])
|
|
110
|
+
|
|
111
|
+
def_reach_map = defaultdict(set)
|
|
112
|
+
fix_point(def_reach, def_reach_map)
|
|
113
|
+
fix_point(liveness, live_map)
|
|
114
|
+
return live_map
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
_dead_maps_result = namedtuple("dead_maps_result", "internal,escaping,combined")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def compute_dead_maps(cfg, blocks, live_map, var_def_map):
|
|
121
|
+
"""
|
|
122
|
+
Compute the end-of-live information for variables.
|
|
123
|
+
`live_map` contains a mapping of block offset to all the living
|
|
124
|
+
variables at the ENTRY of the block.
|
|
125
|
+
"""
|
|
126
|
+
# The following three dictionaries will be
|
|
127
|
+
# { block offset -> set of variables to delete }
|
|
128
|
+
# all vars that should be deleted at the start of the successors
|
|
129
|
+
escaping_dead_map = defaultdict(set)
|
|
130
|
+
# all vars that should be deleted within this block
|
|
131
|
+
internal_dead_map = defaultdict(set)
|
|
132
|
+
# all vars that should be deleted after the function exit
|
|
133
|
+
exit_dead_map = defaultdict(set)
|
|
134
|
+
|
|
135
|
+
for offset, ir_block in blocks.items():
|
|
136
|
+
# live vars WITHIN the block will include all the locally
|
|
137
|
+
# defined variables
|
|
138
|
+
cur_live_set = live_map[offset] | var_def_map[offset]
|
|
139
|
+
# vars alive in the outgoing blocks
|
|
140
|
+
outgoing_live_map = dict(
|
|
141
|
+
(out_blk, live_map[out_blk])
|
|
142
|
+
for out_blk, _data in cfg.successors(offset)
|
|
143
|
+
)
|
|
144
|
+
# vars to keep alive for the terminator
|
|
145
|
+
terminator_liveset = set(
|
|
146
|
+
v.name for v in ir_block.terminator.list_vars()
|
|
147
|
+
)
|
|
148
|
+
# vars to keep alive in the successors
|
|
149
|
+
combined_liveset = reduce(
|
|
150
|
+
operator.or_, outgoing_live_map.values(), set()
|
|
151
|
+
)
|
|
152
|
+
# include variables used in terminator
|
|
153
|
+
combined_liveset |= terminator_liveset
|
|
154
|
+
# vars that are dead within the block because they are not
|
|
155
|
+
# propagated to any outgoing blocks
|
|
156
|
+
internal_set = cur_live_set - combined_liveset
|
|
157
|
+
internal_dead_map[offset] = internal_set
|
|
158
|
+
# vars that escape this block
|
|
159
|
+
escaping_live_set = cur_live_set - internal_set
|
|
160
|
+
for out_blk, new_live_set in outgoing_live_map.items():
|
|
161
|
+
# successor should delete the unused escaped vars
|
|
162
|
+
new_live_set = new_live_set | var_def_map[out_blk]
|
|
163
|
+
escaping_dead_map[out_blk] |= escaping_live_set - new_live_set
|
|
164
|
+
|
|
165
|
+
# if no outgoing blocks
|
|
166
|
+
if not outgoing_live_map:
|
|
167
|
+
# insert var used by terminator
|
|
168
|
+
exit_dead_map[offset] = terminator_liveset
|
|
169
|
+
|
|
170
|
+
# Verify that the dead maps cover all live variables
|
|
171
|
+
all_vars = reduce(operator.or_, live_map.values(), set())
|
|
172
|
+
internal_dead_vars = reduce(operator.or_, internal_dead_map.values(), set())
|
|
173
|
+
escaping_dead_vars = reduce(operator.or_, escaping_dead_map.values(), set())
|
|
174
|
+
exit_dead_vars = reduce(operator.or_, exit_dead_map.values(), set())
|
|
175
|
+
dead_vars = internal_dead_vars | escaping_dead_vars | exit_dead_vars
|
|
176
|
+
missing_vars = all_vars - dead_vars
|
|
177
|
+
if missing_vars:
|
|
178
|
+
# There are no exit points
|
|
179
|
+
if not cfg.exit_points():
|
|
180
|
+
# We won't be able to verify this
|
|
181
|
+
pass
|
|
182
|
+
else:
|
|
183
|
+
msg = "liveness info missing for vars: {0}".format(missing_vars)
|
|
184
|
+
raise RuntimeError(msg)
|
|
185
|
+
|
|
186
|
+
combined = dict(
|
|
187
|
+
(k, internal_dead_map[k] | escaping_dead_map[k]) for k in blocks
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
return _dead_maps_result(
|
|
191
|
+
internal=internal_dead_map,
|
|
192
|
+
escaping=escaping_dead_map,
|
|
193
|
+
combined=combined,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def compute_live_variables(cfg, blocks, var_def_map, var_dead_map):
|
|
198
|
+
"""
|
|
199
|
+
Compute the live variables at the beginning of each block
|
|
200
|
+
and at each yield point.
|
|
201
|
+
The ``var_def_map`` and ``var_dead_map`` indicates the variable defined
|
|
202
|
+
and deleted at each block, respectively.
|
|
203
|
+
"""
|
|
204
|
+
# live var at the entry per block
|
|
205
|
+
block_entry_vars = defaultdict(set)
|
|
206
|
+
|
|
207
|
+
def fix_point_progress():
|
|
208
|
+
return tuple(map(len, block_entry_vars.values()))
|
|
209
|
+
|
|
210
|
+
old_point = None
|
|
211
|
+
new_point = fix_point_progress()
|
|
212
|
+
|
|
213
|
+
# Propagate defined variables and still live the successors.
|
|
214
|
+
# (note the entry block automatically gets an empty set)
|
|
215
|
+
|
|
216
|
+
# Note: This is finding the actual available variables at the entry
|
|
217
|
+
# of each block. The algorithm in compute_live_map() is finding
|
|
218
|
+
# the variable that must be available at the entry of each block.
|
|
219
|
+
# This is top-down in the dataflow. The other one is bottom-up.
|
|
220
|
+
while old_point != new_point:
|
|
221
|
+
# We iterate until the result stabilizes. This is necessary
|
|
222
|
+
# because of loops in the graphself.
|
|
223
|
+
for offset in blocks:
|
|
224
|
+
# vars available + variable defined
|
|
225
|
+
avail = block_entry_vars[offset] | var_def_map[offset]
|
|
226
|
+
# subtract variables deleted
|
|
227
|
+
avail -= var_dead_map[offset]
|
|
228
|
+
# add ``avail`` to each successors
|
|
229
|
+
for succ, _data in cfg.successors(offset):
|
|
230
|
+
block_entry_vars[succ] |= avail
|
|
231
|
+
|
|
232
|
+
old_point = new_point
|
|
233
|
+
new_point = fix_point_progress()
|
|
234
|
+
|
|
235
|
+
return block_entry_vars
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
#
|
|
239
|
+
# Analysis related to controlflow
|
|
240
|
+
#
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def compute_cfg_from_blocks(blocks):
|
|
244
|
+
cfg = CFGraph()
|
|
245
|
+
for k in blocks:
|
|
246
|
+
cfg.add_node(k)
|
|
247
|
+
|
|
248
|
+
for k, b in blocks.items():
|
|
249
|
+
term = b.terminator
|
|
250
|
+
for target in term.get_targets():
|
|
251
|
+
cfg.add_edge(k, target)
|
|
252
|
+
|
|
253
|
+
cfg.set_entry_point(min(blocks))
|
|
254
|
+
cfg.process()
|
|
255
|
+
return cfg
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def find_top_level_loops(cfg):
|
|
259
|
+
"""
|
|
260
|
+
A generator that yields toplevel loops given a control-flow-graph
|
|
261
|
+
"""
|
|
262
|
+
blocks_in_loop = set()
|
|
263
|
+
# get loop bodies
|
|
264
|
+
for loop in cfg.loops().values():
|
|
265
|
+
insiders = set(loop.body) | set(loop.entries) | set(loop.exits)
|
|
266
|
+
insiders.discard(loop.header)
|
|
267
|
+
blocks_in_loop |= insiders
|
|
268
|
+
# find loop that is not part of other loops
|
|
269
|
+
for loop in cfg.loops().values():
|
|
270
|
+
if loop.header not in blocks_in_loop:
|
|
271
|
+
yield _fix_loop_exit(cfg, loop)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _fix_loop_exit(cfg, loop):
|
|
275
|
+
"""
|
|
276
|
+
Fixes loop.exits for Py3.8+ bytecode CFG changes.
|
|
277
|
+
This is to handle `break` inside loops.
|
|
278
|
+
"""
|
|
279
|
+
# Computes the common postdoms of exit nodes
|
|
280
|
+
postdoms = cfg.post_dominators()
|
|
281
|
+
exits = reduce(
|
|
282
|
+
operator.and_,
|
|
283
|
+
[postdoms[b] for b in loop.exits],
|
|
284
|
+
loop.exits,
|
|
285
|
+
)
|
|
286
|
+
if exits:
|
|
287
|
+
# Put the non-common-exits as body nodes
|
|
288
|
+
body = loop.body | loop.exits - exits
|
|
289
|
+
return loop._replace(exits=exits, body=body)
|
|
290
|
+
else:
|
|
291
|
+
return loop
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def rewrite_semantic_constants(func_ir, called_args):
|
|
295
|
+
"""
|
|
296
|
+
This rewrites values known to be constant by their semantics as ir.Const
|
|
297
|
+
nodes, this is to give branch pruning the best chance possible of killing
|
|
298
|
+
branches. An example might be rewriting len(tuple) as the literal length.
|
|
299
|
+
|
|
300
|
+
func_ir is the IR
|
|
301
|
+
called_args are the actual arguments with which the function is called
|
|
302
|
+
"""
|
|
303
|
+
DEBUG = 0
|
|
304
|
+
|
|
305
|
+
if DEBUG > 1:
|
|
306
|
+
print(
|
|
307
|
+
("rewrite_semantic_constants: " + func_ir.func_id.func_name).center(
|
|
308
|
+
80, "-"
|
|
309
|
+
)
|
|
310
|
+
)
|
|
311
|
+
print("before".center(80, "*"))
|
|
312
|
+
func_ir.dump()
|
|
313
|
+
|
|
314
|
+
def rewrite_statement(func_ir, stmt, new_val):
|
|
315
|
+
"""
|
|
316
|
+
Rewrites the stmt as a ir.Const new_val and fixes up the entries in
|
|
317
|
+
func_ir._definitions
|
|
318
|
+
"""
|
|
319
|
+
stmt.value = ir.Const(new_val, stmt.loc)
|
|
320
|
+
defns = func_ir._definitions[stmt.target.name]
|
|
321
|
+
repl_idx = defns.index(val)
|
|
322
|
+
defns[repl_idx] = stmt.value
|
|
323
|
+
|
|
324
|
+
def rewrite_array_ndim(val, func_ir, called_args):
|
|
325
|
+
# rewrite Array.ndim as const(ndim)
|
|
326
|
+
if getattr(val, "op", None) == "getattr":
|
|
327
|
+
if val.attr == "ndim":
|
|
328
|
+
arg_def = guard(get_definition, func_ir, val.value)
|
|
329
|
+
if isinstance(arg_def, ir.Arg):
|
|
330
|
+
argty = called_args[arg_def.index]
|
|
331
|
+
if isinstance(argty, types.Array):
|
|
332
|
+
rewrite_statement(func_ir, stmt, argty.ndim)
|
|
333
|
+
|
|
334
|
+
def rewrite_tuple_len(val, func_ir, called_args):
|
|
335
|
+
# rewrite len(tuple) as const(len(tuple))
|
|
336
|
+
if getattr(val, "op", None) == "call":
|
|
337
|
+
func = guard(get_definition, func_ir, val.func)
|
|
338
|
+
if (
|
|
339
|
+
func is not None
|
|
340
|
+
and isinstance(func, ir.Global)
|
|
341
|
+
and getattr(func, "value", None) is len
|
|
342
|
+
):
|
|
343
|
+
(arg,) = val.args
|
|
344
|
+
arg_def = guard(get_definition, func_ir, arg)
|
|
345
|
+
if isinstance(arg_def, ir.Arg):
|
|
346
|
+
argty = called_args[arg_def.index]
|
|
347
|
+
if isinstance(argty, types.BaseTuple):
|
|
348
|
+
rewrite_statement(func_ir, stmt, argty.count)
|
|
349
|
+
elif (
|
|
350
|
+
isinstance(arg_def, ir.Expr)
|
|
351
|
+
and arg_def.op == "typed_getitem"
|
|
352
|
+
):
|
|
353
|
+
argty = arg_def.dtype
|
|
354
|
+
if isinstance(argty, types.BaseTuple):
|
|
355
|
+
rewrite_statement(func_ir, stmt, argty.count)
|
|
356
|
+
|
|
357
|
+
from numba.cuda.core.ir_utils import get_definition, guard
|
|
358
|
+
|
|
359
|
+
for blk in func_ir.blocks.values():
|
|
360
|
+
for stmt in blk.body:
|
|
361
|
+
if isinstance(stmt, ir.Assign):
|
|
362
|
+
val = stmt.value
|
|
363
|
+
if isinstance(val, ir.Expr):
|
|
364
|
+
rewrite_array_ndim(val, func_ir, called_args)
|
|
365
|
+
rewrite_tuple_len(val, func_ir, called_args)
|
|
366
|
+
|
|
367
|
+
if DEBUG > 1:
|
|
368
|
+
print("after".center(80, "*"))
|
|
369
|
+
func_ir.dump()
|
|
370
|
+
print("-" * 80)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def find_literally_calls(func_ir, argtypes):
|
|
374
|
+
"""An analysis to find `numba.literally` call inside the given IR.
|
|
375
|
+
When an unsatisfied literal typing request is found, a `ForceLiteralArg`
|
|
376
|
+
exception is raised.
|
|
377
|
+
|
|
378
|
+
Parameters
|
|
379
|
+
----------
|
|
380
|
+
|
|
381
|
+
func_ir : numba.ir.FunctionIR
|
|
382
|
+
|
|
383
|
+
argtypes : Sequence[numba.types.Type]
|
|
384
|
+
The argument types.
|
|
385
|
+
"""
|
|
386
|
+
from numba.cuda.core import ir_utils
|
|
387
|
+
|
|
388
|
+
marked_args = set()
|
|
389
|
+
first_loc = {}
|
|
390
|
+
# Scan for literally calls
|
|
391
|
+
for blk in func_ir.blocks.values():
|
|
392
|
+
for assign in blk.find_exprs(op="call"):
|
|
393
|
+
var = ir_utils.guard(ir_utils.get_definition, func_ir, assign.func)
|
|
394
|
+
if isinstance(var, (ir.Global, ir.FreeVar)):
|
|
395
|
+
fnobj = var.value
|
|
396
|
+
else:
|
|
397
|
+
fnobj = ir_utils.guard(
|
|
398
|
+
ir_utils.resolve_func_from_module, func_ir, var
|
|
399
|
+
)
|
|
400
|
+
if fnobj is special.literally:
|
|
401
|
+
# Found
|
|
402
|
+
[arg] = assign.args
|
|
403
|
+
defarg = func_ir.get_definition(arg)
|
|
404
|
+
if isinstance(defarg, ir.Arg):
|
|
405
|
+
argindex = defarg.index
|
|
406
|
+
marked_args.add(argindex)
|
|
407
|
+
first_loc.setdefault(argindex, assign.loc)
|
|
408
|
+
# Signal the dispatcher to force literal typing
|
|
409
|
+
for pos in marked_args:
|
|
410
|
+
query_arg = argtypes[pos]
|
|
411
|
+
do_raise = (
|
|
412
|
+
isinstance(query_arg, types.InitialValue)
|
|
413
|
+
and query_arg.initial_value is None
|
|
414
|
+
)
|
|
415
|
+
if do_raise:
|
|
416
|
+
loc = first_loc[pos]
|
|
417
|
+
raise errors.ForceLiteralArg(marked_args, loc=loc)
|
|
418
|
+
|
|
419
|
+
if not isinstance(query_arg, (types.Literal, types.InitialValue)):
|
|
420
|
+
loc = first_loc[pos]
|
|
421
|
+
raise errors.ForceLiteralArg(marked_args, loc=loc)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
ir_extension_use_alloca = {}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def must_use_alloca(blocks):
|
|
428
|
+
"""
|
|
429
|
+
Analyzes a dictionary of blocks to find variables that must be
|
|
430
|
+
stack allocated with alloca. For each statement in the blocks,
|
|
431
|
+
determine if that statement requires certain variables to be
|
|
432
|
+
stack allocated. This function uses the extension point
|
|
433
|
+
ir_extension_use_alloca to allow other IR node types like parfors
|
|
434
|
+
to register to be processed by this analysis function. At the
|
|
435
|
+
moment, parfors are the only IR node types that may require
|
|
436
|
+
something to be stack allocated.
|
|
437
|
+
"""
|
|
438
|
+
use_alloca_vars = set()
|
|
439
|
+
|
|
440
|
+
for ir_block in blocks.values():
|
|
441
|
+
for stmt in ir_block.body:
|
|
442
|
+
if type(stmt) in ir_extension_use_alloca:
|
|
443
|
+
func = ir_extension_use_alloca[type(stmt)]
|
|
444
|
+
func(stmt, use_alloca_vars)
|
|
445
|
+
continue
|
|
446
|
+
|
|
447
|
+
return use_alloca_vars
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
# Used to describe a nullified condition in dead branch pruning
|
|
451
|
+
nullified = namedtuple("nullified", "condition, taken_br, rewrite_stmt")
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def dead_branch_prune(func_ir, called_args):
|
|
455
|
+
"""
|
|
456
|
+
Removes dead branches based on constant inference from function args.
|
|
457
|
+
This directly mutates the IR.
|
|
458
|
+
|
|
459
|
+
func_ir is the IR
|
|
460
|
+
called_args are the actual arguments with which the function is called
|
|
461
|
+
"""
|
|
462
|
+
from numba.cuda.core.ir_utils import (
|
|
463
|
+
get_definition,
|
|
464
|
+
guard,
|
|
465
|
+
find_const,
|
|
466
|
+
GuardException,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
DEBUG = 0
|
|
470
|
+
|
|
471
|
+
def find_branches(func_ir):
|
|
472
|
+
# find *all* branches
|
|
473
|
+
branches = []
|
|
474
|
+
for blk in func_ir.blocks.values():
|
|
475
|
+
branch_or_jump = blk.body[-1]
|
|
476
|
+
if isinstance(branch_or_jump, ir.Branch):
|
|
477
|
+
branch = branch_or_jump
|
|
478
|
+
pred = guard(get_definition, func_ir, branch.cond.name)
|
|
479
|
+
if pred is not None and getattr(pred, "op", None) == "call":
|
|
480
|
+
function = guard(get_definition, func_ir, pred.func)
|
|
481
|
+
if (
|
|
482
|
+
function is not None
|
|
483
|
+
and isinstance(function, ir.Global)
|
|
484
|
+
and function.value is bool
|
|
485
|
+
):
|
|
486
|
+
condition = guard(get_definition, func_ir, pred.args[0])
|
|
487
|
+
if condition is not None:
|
|
488
|
+
branches.append((branch, condition, blk))
|
|
489
|
+
return branches
|
|
490
|
+
|
|
491
|
+
def do_prune(take_truebr, blk):
|
|
492
|
+
keep = branch.truebr if take_truebr else branch.falsebr
|
|
493
|
+
# replace the branch with a direct jump
|
|
494
|
+
jmp = ir.Jump(keep, loc=branch.loc)
|
|
495
|
+
blk.body[-1] = jmp
|
|
496
|
+
return 1 if keep == branch.truebr else 0
|
|
497
|
+
|
|
498
|
+
def prune_by_type(branch, condition, blk, *conds):
|
|
499
|
+
# this prunes a given branch and fixes up the IR
|
|
500
|
+
# at least one needs to be a NoneType
|
|
501
|
+
lhs_cond, rhs_cond = conds
|
|
502
|
+
lhs_none = isinstance(lhs_cond, types.NoneType)
|
|
503
|
+
rhs_none = isinstance(rhs_cond, types.NoneType)
|
|
504
|
+
if lhs_none or rhs_none:
|
|
505
|
+
try:
|
|
506
|
+
take_truebr = condition.fn(lhs_cond, rhs_cond)
|
|
507
|
+
except Exception:
|
|
508
|
+
return False, None
|
|
509
|
+
if DEBUG > 0:
|
|
510
|
+
kill = branch.falsebr if take_truebr else branch.truebr
|
|
511
|
+
print(
|
|
512
|
+
"Pruning %s" % kill,
|
|
513
|
+
branch,
|
|
514
|
+
lhs_cond,
|
|
515
|
+
rhs_cond,
|
|
516
|
+
condition.fn,
|
|
517
|
+
)
|
|
518
|
+
taken = do_prune(take_truebr, blk)
|
|
519
|
+
return True, taken
|
|
520
|
+
return False, None
|
|
521
|
+
|
|
522
|
+
def prune_by_value(branch, condition, blk, *conds):
|
|
523
|
+
lhs_cond, rhs_cond = conds
|
|
524
|
+
try:
|
|
525
|
+
take_truebr = condition.fn(lhs_cond, rhs_cond)
|
|
526
|
+
except Exception:
|
|
527
|
+
return False, None
|
|
528
|
+
if DEBUG > 0:
|
|
529
|
+
kill = branch.falsebr if take_truebr else branch.truebr
|
|
530
|
+
print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn)
|
|
531
|
+
do_prune(take_truebr, blk)
|
|
532
|
+
# It is not safe to rewrite the predicate to a nominal value based on
|
|
533
|
+
# which branch is taken, the rewritten const predicate needs to
|
|
534
|
+
# hold the actual computed const value as something else may refer to
|
|
535
|
+
# it!
|
|
536
|
+
return True, take_truebr
|
|
537
|
+
|
|
538
|
+
def prune_by_predicate(branch, pred, blk):
|
|
539
|
+
try:
|
|
540
|
+
# Just to prevent accidents, whilst already guarded, ensure this
|
|
541
|
+
# is an ir.Const
|
|
542
|
+
if not isinstance(pred, (ir.Const, ir.FreeVar, ir.Global)):
|
|
543
|
+
raise TypeError("Expected constant Numba IR node")
|
|
544
|
+
take_truebr = bool(pred.value)
|
|
545
|
+
except TypeError:
|
|
546
|
+
return False, None
|
|
547
|
+
if DEBUG > 0:
|
|
548
|
+
kill = branch.falsebr if take_truebr else branch.truebr
|
|
549
|
+
print("Pruning %s" % kill, branch, pred)
|
|
550
|
+
taken = do_prune(take_truebr, blk)
|
|
551
|
+
return True, taken
|
|
552
|
+
|
|
553
|
+
class Unknown(object):
|
|
554
|
+
pass
|
|
555
|
+
|
|
556
|
+
def resolve_input_arg_const(input_arg_idx):
|
|
557
|
+
"""
|
|
558
|
+
Resolves an input arg to a constant (if possible)
|
|
559
|
+
"""
|
|
560
|
+
input_arg_ty = called_args[input_arg_idx]
|
|
561
|
+
|
|
562
|
+
# comparing to None?
|
|
563
|
+
if isinstance(input_arg_ty, types.NoneType):
|
|
564
|
+
return input_arg_ty
|
|
565
|
+
|
|
566
|
+
# is it a kwarg default
|
|
567
|
+
if isinstance(input_arg_ty, types.Omitted):
|
|
568
|
+
val = input_arg_ty.value
|
|
569
|
+
if isinstance(val, types.NoneType):
|
|
570
|
+
return val
|
|
571
|
+
elif val is None:
|
|
572
|
+
return types.NoneType("none")
|
|
573
|
+
|
|
574
|
+
# literal type, return the type itself so comparisons like `x == None`
|
|
575
|
+
# still work as e.g. x = types.int64 will never be None/NoneType so
|
|
576
|
+
# the branch can still be pruned
|
|
577
|
+
return getattr(input_arg_ty, "literal_type", Unknown())
|
|
578
|
+
|
|
579
|
+
if DEBUG > 1:
|
|
580
|
+
print("before".center(80, "-"))
|
|
581
|
+
print(func_ir.dump())
|
|
582
|
+
|
|
583
|
+
phi2lbl = dict()
|
|
584
|
+
phi2asgn = dict()
|
|
585
|
+
for lbl, blk in func_ir.blocks.items():
|
|
586
|
+
for stmt in blk.body:
|
|
587
|
+
if isinstance(stmt, ir.Assign):
|
|
588
|
+
if isinstance(stmt.value, ir.Expr) and stmt.value.op == "phi":
|
|
589
|
+
phi2lbl[stmt.value] = lbl
|
|
590
|
+
phi2asgn[stmt.value] = stmt
|
|
591
|
+
|
|
592
|
+
# This looks for branches where:
|
|
593
|
+
# at least one arg of the condition is in input args and const
|
|
594
|
+
# at least one an arg of the condition is a const
|
|
595
|
+
# if the condition is met it will replace the branch with a jump
|
|
596
|
+
branch_info = find_branches(func_ir)
|
|
597
|
+
# stores conditions that have no impact post prune
|
|
598
|
+
nullified_conditions = []
|
|
599
|
+
|
|
600
|
+
for branch, condition, blk in branch_info:
|
|
601
|
+
const_conds = []
|
|
602
|
+
if isinstance(condition, ir.Expr) and condition.op == "binop":
|
|
603
|
+
prune = prune_by_value
|
|
604
|
+
for arg in [condition.lhs, condition.rhs]:
|
|
605
|
+
resolved_const = Unknown()
|
|
606
|
+
arg_def = guard(get_definition, func_ir, arg)
|
|
607
|
+
if isinstance(arg_def, ir.Arg):
|
|
608
|
+
# it's an e.g. literal argument to the function
|
|
609
|
+
resolved_const = resolve_input_arg_const(arg_def.index)
|
|
610
|
+
prune = prune_by_type
|
|
611
|
+
else:
|
|
612
|
+
# it's some const argument to the function, cannot use guard
|
|
613
|
+
# here as the const itself may be None
|
|
614
|
+
try:
|
|
615
|
+
resolved_const = find_const(func_ir, arg)
|
|
616
|
+
if resolved_const is None:
|
|
617
|
+
resolved_const = types.NoneType("none")
|
|
618
|
+
except GuardException:
|
|
619
|
+
pass
|
|
620
|
+
|
|
621
|
+
if not isinstance(resolved_const, Unknown):
|
|
622
|
+
const_conds.append(resolved_const)
|
|
623
|
+
|
|
624
|
+
# lhs/rhs are consts
|
|
625
|
+
if len(const_conds) == 2:
|
|
626
|
+
# prune the branch, switch the branch for an unconditional jump
|
|
627
|
+
prune_stat, taken = prune(branch, condition, blk, *const_conds)
|
|
628
|
+
if prune_stat:
|
|
629
|
+
# add the condition to the list of nullified conditions
|
|
630
|
+
nullified_conditions.append(
|
|
631
|
+
nullified(condition, taken, True)
|
|
632
|
+
)
|
|
633
|
+
else:
|
|
634
|
+
# see if this is a branch on a constant value predicate
|
|
635
|
+
resolved_const = Unknown()
|
|
636
|
+
try:
|
|
637
|
+
pred_call = get_definition(func_ir, branch.cond)
|
|
638
|
+
resolved_const = find_const(func_ir, pred_call.args[0])
|
|
639
|
+
if resolved_const is None:
|
|
640
|
+
resolved_const = types.NoneType("none")
|
|
641
|
+
except GuardException:
|
|
642
|
+
pass
|
|
643
|
+
|
|
644
|
+
if not isinstance(resolved_const, Unknown):
|
|
645
|
+
prune_stat, taken = prune_by_predicate(branch, condition, blk)
|
|
646
|
+
if prune_stat:
|
|
647
|
+
# add the condition to the list of nullified conditions
|
|
648
|
+
nullified_conditions.append(
|
|
649
|
+
nullified(condition, taken, False)
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
# 'ERE BE DRAGONS...
|
|
653
|
+
# It is the evaluation of the condition expression that often trips up type
|
|
654
|
+
# inference, so ideally it would be removed as it is effectively rendered
|
|
655
|
+
# dead by the unconditional jump if a branch was pruned. However, there may
|
|
656
|
+
# be references to the condition that exist in multiple places (e.g. dels)
|
|
657
|
+
# and we cannot run DCE here as typing has not taken place to give enough
|
|
658
|
+
# information to run DCE safely. Upshot of all this is the condition gets
|
|
659
|
+
# rewritten below into a benign const that typing will be happy with and DCE
|
|
660
|
+
# can remove it and its reference post typing when it is safe to do so
|
|
661
|
+
# (if desired). It is required that the const is assigned a value that
|
|
662
|
+
# indicates the branch taken as its mutated value would be read in the case
|
|
663
|
+
# of object mode fall back in place of the condition itself. For
|
|
664
|
+
# completeness the func_ir._definitions and ._consts are also updated to
|
|
665
|
+
# make the IR state self consistent.
|
|
666
|
+
|
|
667
|
+
deadcond = [x.condition for x in nullified_conditions]
|
|
668
|
+
for _, cond, blk in branch_info:
|
|
669
|
+
if cond in deadcond:
|
|
670
|
+
for x in blk.body:
|
|
671
|
+
if isinstance(x, ir.Assign) and x.value is cond:
|
|
672
|
+
# rewrite the condition as a true/false bit
|
|
673
|
+
nullified_info = nullified_conditions[deadcond.index(cond)]
|
|
674
|
+
# only do a rewrite of conditions, predicates need to retain
|
|
675
|
+
# their value as they may be used later.
|
|
676
|
+
if nullified_info.rewrite_stmt:
|
|
677
|
+
branch_bit = nullified_info.taken_br
|
|
678
|
+
x.value = ir.Const(branch_bit, loc=x.loc)
|
|
679
|
+
# update the specific definition to the new const
|
|
680
|
+
defns = func_ir._definitions[x.target.name]
|
|
681
|
+
repl_idx = defns.index(cond)
|
|
682
|
+
defns[repl_idx] = x.value
|
|
683
|
+
|
|
684
|
+
# Check post dominators of dead nodes from in the original CFG for use of
|
|
685
|
+
# vars that are being removed in the dead blocks which might be referred to
|
|
686
|
+
# by phi nodes.
|
|
687
|
+
#
|
|
688
|
+
# Multiple things to fix up:
|
|
689
|
+
#
|
|
690
|
+
# 1. Cases like:
|
|
691
|
+
#
|
|
692
|
+
# A A
|
|
693
|
+
# |\ |
|
|
694
|
+
# | B --> B
|
|
695
|
+
# |/ |
|
|
696
|
+
# C C
|
|
697
|
+
#
|
|
698
|
+
# i.e. the branch is dead but the block is still alive. In this case CFG
|
|
699
|
+
# simplification will fuse A-B-C and any phi in C can be updated as an
|
|
700
|
+
# direct assignment from the last assigned version in the dominators of the
|
|
701
|
+
# fused block.
|
|
702
|
+
#
|
|
703
|
+
# 2. Cases like:
|
|
704
|
+
#
|
|
705
|
+
# A A
|
|
706
|
+
# / \ |
|
|
707
|
+
# B C --> B
|
|
708
|
+
# \ / |
|
|
709
|
+
# D D
|
|
710
|
+
#
|
|
711
|
+
# i.e. the block C is dead. In this case the phis in D need updating to
|
|
712
|
+
# reflect the collapse of the phi condition. This should result in a direct
|
|
713
|
+
# assignment of the surviving version in B to the LHS of the phi in D.
|
|
714
|
+
|
|
715
|
+
new_cfg = compute_cfg_from_blocks(func_ir.blocks)
|
|
716
|
+
dead_blocks = new_cfg.dead_nodes()
|
|
717
|
+
|
|
718
|
+
# for all phis that are still in live blocks.
|
|
719
|
+
for phi, lbl in phi2lbl.items():
|
|
720
|
+
if lbl in dead_blocks:
|
|
721
|
+
continue
|
|
722
|
+
new_incoming = [x[0] for x in new_cfg.predecessors(lbl)]
|
|
723
|
+
if set(new_incoming) != set(phi.incoming_blocks):
|
|
724
|
+
# Something has changed in the CFG...
|
|
725
|
+
if len(new_incoming) == 1:
|
|
726
|
+
# There's now just one incoming. Replace the PHI node by a
|
|
727
|
+
# direct assignment
|
|
728
|
+
idx = phi.incoming_blocks.index(new_incoming[0])
|
|
729
|
+
phi2asgn[phi].value = phi.incoming_values[idx]
|
|
730
|
+
else:
|
|
731
|
+
# There's more than one incoming still, then look through the
|
|
732
|
+
# incoming and remove dead
|
|
733
|
+
ic_val_tmp = []
|
|
734
|
+
ic_blk_tmp = []
|
|
735
|
+
for ic_val, ic_blk in zip(
|
|
736
|
+
phi.incoming_values, phi.incoming_blocks
|
|
737
|
+
):
|
|
738
|
+
if ic_blk in dead_blocks:
|
|
739
|
+
continue
|
|
740
|
+
else:
|
|
741
|
+
ic_val_tmp.append(ic_val)
|
|
742
|
+
ic_blk_tmp.append(ic_blk)
|
|
743
|
+
phi.incoming_values.clear()
|
|
744
|
+
phi.incoming_values.extend(ic_val_tmp)
|
|
745
|
+
phi.incoming_blocks.clear()
|
|
746
|
+
phi.incoming_blocks.extend(ic_blk_tmp)
|
|
747
|
+
|
|
748
|
+
# Remove dead blocks, this is safe as it relies on the CFG only.
|
|
749
|
+
for dead in dead_blocks:
|
|
750
|
+
del func_ir.blocks[dead]
|
|
751
|
+
|
|
752
|
+
# if conditions were nullified then consts were rewritten, update
|
|
753
|
+
if nullified_conditions:
|
|
754
|
+
func_ir._consts = consts.ConstantInference(func_ir)
|
|
755
|
+
|
|
756
|
+
if DEBUG > 1:
|
|
757
|
+
print("after".center(80, "-"))
|
|
758
|
+
print(func_ir.dump())
|