numba-cuda 0.21.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +577 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +556 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +995 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +903 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +158 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
- numba_cuda/numba/cuda/intrinsics.py +382 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1951 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +635 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.21.1.dist-info/METADATA +109 -0
- numba_cuda-0.21.1.dist-info/RECORD +488 -0
- numba_cuda-0.21.1.dist-info/WHEEL +5 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1951 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from collections import namedtuple, defaultdict
|
|
5
|
+
import operator
|
|
6
|
+
import warnings
|
|
7
|
+
from functools import partial
|
|
8
|
+
|
|
9
|
+
from llvmlite import ir as llvm_ir
|
|
10
|
+
|
|
11
|
+
from numba.cuda import HAS_NUMBA
|
|
12
|
+
from numba.cuda.core import ir
|
|
13
|
+
from numba.cuda import debuginfo, cgutils, utils, typing, types
|
|
14
|
+
from numba.cuda.core import (
|
|
15
|
+
ir_utils,
|
|
16
|
+
targetconfig,
|
|
17
|
+
funcdesc,
|
|
18
|
+
config,
|
|
19
|
+
generators,
|
|
20
|
+
removerefctpass,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from numba.cuda.core.errors import (
|
|
24
|
+
LoweringError,
|
|
25
|
+
new_error_context,
|
|
26
|
+
TypingError,
|
|
27
|
+
LiteralTypingError,
|
|
28
|
+
UnsupportedError,
|
|
29
|
+
NumbaDebugInfoWarning,
|
|
30
|
+
)
|
|
31
|
+
from numba.cuda.core.funcdesc import default_mangler
|
|
32
|
+
from numba.cuda.core.environment import Environment
|
|
33
|
+
from numba.cuda.core.analysis import compute_use_defs, must_use_alloca
|
|
34
|
+
from numba.cuda.misc.firstlinefinder import get_func_body_first_lineno
|
|
35
|
+
from numba.cuda.misc.coverage_support import get_registered_loc_notify
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
_VarArgItem = namedtuple("_VarArgItem", ("vararg", "index"))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class BaseLower(object):
|
|
42
|
+
"""
|
|
43
|
+
Lower IR to LLVM
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, context, library, fndesc, func_ir, metadata=None):
|
|
47
|
+
self.library = library
|
|
48
|
+
self.fndesc = fndesc
|
|
49
|
+
self.blocks = dict(sorted(func_ir.blocks.items()))
|
|
50
|
+
self.func_ir = func_ir
|
|
51
|
+
self.generator_info = func_ir.generator_info
|
|
52
|
+
self.metadata = metadata
|
|
53
|
+
self.flags = targetconfig.ConfigStack.top_or_none()
|
|
54
|
+
|
|
55
|
+
# Initialize LLVM
|
|
56
|
+
self.module = self.library.create_ir_module(self.fndesc.unique_name)
|
|
57
|
+
|
|
58
|
+
# Python execution environment (will be available to the compiled
|
|
59
|
+
# function).
|
|
60
|
+
self.env = Environment.from_fndesc(self.fndesc)
|
|
61
|
+
|
|
62
|
+
# Internal states
|
|
63
|
+
self.blkmap = {}
|
|
64
|
+
self.pending_phis = {}
|
|
65
|
+
self.varmap = {}
|
|
66
|
+
self.firstblk = min(self.blocks.keys())
|
|
67
|
+
self.loc = -1
|
|
68
|
+
|
|
69
|
+
# Specializes the target context as seen inside the Lowerer
|
|
70
|
+
# This adds:
|
|
71
|
+
# - environment: the python execution environment
|
|
72
|
+
self.context = context.subtarget(
|
|
73
|
+
environment=self.env, fndesc=self.fndesc
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Debuginfo
|
|
77
|
+
dibuildercls = (
|
|
78
|
+
self.context.DIBuilder
|
|
79
|
+
if self.context.enable_debuginfo
|
|
80
|
+
else debuginfo.DummyDIBuilder
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# debuginfo def location
|
|
84
|
+
self.defn_loc = self._compute_def_location()
|
|
85
|
+
|
|
86
|
+
directives_only = self.flags.dbg_directives_only
|
|
87
|
+
self.debuginfo = dibuildercls(
|
|
88
|
+
module=self.module,
|
|
89
|
+
filepath=func_ir.loc.filename,
|
|
90
|
+
cgctx=context,
|
|
91
|
+
directives_only=directives_only,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Loc notify objects
|
|
95
|
+
self._loc_notify_registry = get_registered_loc_notify()
|
|
96
|
+
|
|
97
|
+
# Subclass initialization
|
|
98
|
+
self.init()
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def call_conv(self):
|
|
102
|
+
return self.context.call_conv
|
|
103
|
+
|
|
104
|
+
def init(self):
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def init_pyapi(self):
|
|
108
|
+
"""
|
|
109
|
+
Init the Python API and Environment Manager for the function being
|
|
110
|
+
lowered.
|
|
111
|
+
"""
|
|
112
|
+
if self.pyapi is not None:
|
|
113
|
+
return
|
|
114
|
+
self.pyapi = self.context.get_python_api(self.builder)
|
|
115
|
+
|
|
116
|
+
# Store environment argument for later use
|
|
117
|
+
self.env_manager = self.context.get_env_manager(self.builder)
|
|
118
|
+
self.env_body = self.env_manager.env_body
|
|
119
|
+
self.envarg = self.env_manager.env_ptr
|
|
120
|
+
|
|
121
|
+
def _compute_def_location(self):
|
|
122
|
+
# Debuginfo requires source to be accurate. Find it and warn if not
|
|
123
|
+
# found. If it's not found, use the func_ir line + 1, this assumes that
|
|
124
|
+
# the function definition is decorated with a 1 line jit decorator.
|
|
125
|
+
defn_loc = self.func_ir.loc.with_lineno(self.func_ir.loc.line + 1)
|
|
126
|
+
if self.context.enable_debuginfo:
|
|
127
|
+
fn = self.func_ir.func_id.func
|
|
128
|
+
optional_lno = get_func_body_first_lineno(fn)
|
|
129
|
+
if optional_lno is not None:
|
|
130
|
+
# -1 as lines start at 1 and this is an offset.
|
|
131
|
+
offset = optional_lno - 1
|
|
132
|
+
defn_loc = self.func_ir.loc.with_lineno(offset)
|
|
133
|
+
else:
|
|
134
|
+
msg = (
|
|
135
|
+
"Could not find source for function: "
|
|
136
|
+
f"{self.func_ir.func_id.func}. Debug line information "
|
|
137
|
+
"may be inaccurate."
|
|
138
|
+
)
|
|
139
|
+
warnings.warn(NumbaDebugInfoWarning(msg))
|
|
140
|
+
return defn_loc
|
|
141
|
+
|
|
142
|
+
def pre_lower(self):
|
|
143
|
+
"""
|
|
144
|
+
Called before lowering all blocks.
|
|
145
|
+
"""
|
|
146
|
+
# A given Lower object can be used for several LL functions
|
|
147
|
+
# (for generators) and it's important to use a new API and
|
|
148
|
+
# EnvironmentManager.
|
|
149
|
+
self.pyapi = None
|
|
150
|
+
self.debuginfo.mark_subprogram(
|
|
151
|
+
function=self.builder.function,
|
|
152
|
+
qualname=self.fndesc.qualname,
|
|
153
|
+
argnames=self.fndesc.args,
|
|
154
|
+
argtypes=self.fndesc.argtypes,
|
|
155
|
+
line=self.defn_loc.line,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# When full debug info is enabled, disable inlining where possible, to
|
|
159
|
+
# improve the quality of the debug experience. 'alwaysinline' functions
|
|
160
|
+
# cannot have inlining disabled.
|
|
161
|
+
attributes = self.builder.function.attributes
|
|
162
|
+
full_debug = self.flags.debuginfo and not self.flags.dbg_directives_only
|
|
163
|
+
if full_debug and "alwaysinline" not in attributes:
|
|
164
|
+
attributes.add("noinline")
|
|
165
|
+
|
|
166
|
+
def post_lower(self):
|
|
167
|
+
"""
|
|
168
|
+
Called after all blocks are lowered
|
|
169
|
+
"""
|
|
170
|
+
self.debuginfo.finalize()
|
|
171
|
+
for notify in self._loc_notify_registry:
|
|
172
|
+
notify.close()
|
|
173
|
+
|
|
174
|
+
def pre_block(self, block):
|
|
175
|
+
"""
|
|
176
|
+
Called before lowering a block.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
def post_block(self, block):
|
|
180
|
+
"""
|
|
181
|
+
Called after lowering a block.
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
def return_dynamic_exception(self, exc_class, exc_args, nb_types, loc=None):
|
|
185
|
+
self.call_conv.return_dynamic_user_exc(
|
|
186
|
+
self.builder,
|
|
187
|
+
exc_class,
|
|
188
|
+
exc_args,
|
|
189
|
+
nb_types,
|
|
190
|
+
loc=loc,
|
|
191
|
+
func_name=self.func_ir.func_id.func_name,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def return_exception(self, exc_class, exc_args=None, loc=None):
|
|
195
|
+
"""Propagate exception to the caller."""
|
|
196
|
+
self.call_conv.return_user_exc(
|
|
197
|
+
self.builder,
|
|
198
|
+
exc_class,
|
|
199
|
+
exc_args,
|
|
200
|
+
loc=loc,
|
|
201
|
+
func_name=self.func_ir.func_id.func_name,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def set_exception(self, exc_class, exc_args=None, loc=None):
|
|
205
|
+
"""Set exception state in the current function."""
|
|
206
|
+
self.call_conv.set_static_user_exc(
|
|
207
|
+
self.builder,
|
|
208
|
+
exc_class,
|
|
209
|
+
exc_args,
|
|
210
|
+
loc=loc,
|
|
211
|
+
func_name=self.func_ir.func_id.func_name,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def emit_environment_object(self):
|
|
215
|
+
"""Emit a pointer to hold the Environment object."""
|
|
216
|
+
# Define global for the environment and initialize it to NULL
|
|
217
|
+
envname = self.context.get_env_name(self.fndesc)
|
|
218
|
+
self.context.declare_env_global(self.module, envname)
|
|
219
|
+
|
|
220
|
+
def lower(self):
|
|
221
|
+
# Emit the Env into the module
|
|
222
|
+
self.emit_environment_object()
|
|
223
|
+
if self.generator_info is None:
|
|
224
|
+
self.genlower = None
|
|
225
|
+
self.lower_normal_function(self.fndesc)
|
|
226
|
+
else:
|
|
227
|
+
self.genlower = self.GeneratorLower(self)
|
|
228
|
+
self.gentype = self.genlower.gentype
|
|
229
|
+
|
|
230
|
+
self.genlower.lower_init_func(self)
|
|
231
|
+
self.genlower.lower_next_func(self)
|
|
232
|
+
if self.gentype.has_finalizer:
|
|
233
|
+
self.genlower.lower_finalize_func(self)
|
|
234
|
+
|
|
235
|
+
if config.DUMP_LLVM:
|
|
236
|
+
utils.dump_llvm(self.fndesc, self.module)
|
|
237
|
+
|
|
238
|
+
# Special optimization to remove NRT on functions that do not need it.
|
|
239
|
+
if self.context.enable_nrt and self.generator_info is None:
|
|
240
|
+
removerefctpass.remove_unnecessary_nrt_usage(
|
|
241
|
+
self.function, context=self.context, fndesc=self.fndesc
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Run target specific post lowering transformation
|
|
245
|
+
self.context.post_lowering(self.module, self.library)
|
|
246
|
+
|
|
247
|
+
# Materialize LLVM Module
|
|
248
|
+
self.library.add_ir_module(self.module)
|
|
249
|
+
|
|
250
|
+
def extract_function_arguments(self):
|
|
251
|
+
self.fnargs = self.call_conv.decode_arguments(
|
|
252
|
+
self.builder, self.fndesc.argtypes, self.function
|
|
253
|
+
)
|
|
254
|
+
return self.fnargs
|
|
255
|
+
|
|
256
|
+
def lower_normal_function(self, fndesc):
|
|
257
|
+
"""
|
|
258
|
+
Lower non-generator *fndesc*.
|
|
259
|
+
"""
|
|
260
|
+
self.setup_function(fndesc)
|
|
261
|
+
|
|
262
|
+
# Init argument values
|
|
263
|
+
self.extract_function_arguments()
|
|
264
|
+
entry_block_tail = self.lower_function_body()
|
|
265
|
+
|
|
266
|
+
# Close tail of entry block, do not emit debug metadata else the
|
|
267
|
+
# unconditional jump gets associated with the metadata from the function
|
|
268
|
+
# body end.
|
|
269
|
+
with debuginfo.suspend_emission(self.builder):
|
|
270
|
+
self.builder.position_at_end(entry_block_tail)
|
|
271
|
+
self.builder.branch(self.blkmap[self.firstblk])
|
|
272
|
+
|
|
273
|
+
def lower_function_body(self):
|
|
274
|
+
"""
|
|
275
|
+
Lower the current function's body, and return the entry block.
|
|
276
|
+
"""
|
|
277
|
+
# Init Python blocks
|
|
278
|
+
for offset in self.blocks:
|
|
279
|
+
bname = "B%s" % offset
|
|
280
|
+
self.blkmap[offset] = self.function.append_basic_block(bname)
|
|
281
|
+
|
|
282
|
+
self.pre_lower()
|
|
283
|
+
# pre_lower() may have changed the current basic block
|
|
284
|
+
entry_block_tail = self.builder.basic_block
|
|
285
|
+
|
|
286
|
+
self.debug_print(
|
|
287
|
+
"# function begin: {0}".format(self.fndesc.unique_name)
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Lower all blocks
|
|
291
|
+
for offset, block in self.blocks.items():
|
|
292
|
+
bb = self.blkmap[offset]
|
|
293
|
+
self.builder.position_at_end(bb)
|
|
294
|
+
self.debug_print(f"# lower block: {offset}")
|
|
295
|
+
self.lower_block(block)
|
|
296
|
+
self.post_lower()
|
|
297
|
+
return entry_block_tail
|
|
298
|
+
|
|
299
|
+
def lower_block(self, block):
|
|
300
|
+
"""
|
|
301
|
+
Lower the given block.
|
|
302
|
+
"""
|
|
303
|
+
self.pre_block(block)
|
|
304
|
+
for inst in block.body:
|
|
305
|
+
self.loc = inst.loc
|
|
306
|
+
defaulterrcls = partial(LoweringError, loc=self.loc)
|
|
307
|
+
with new_error_context(
|
|
308
|
+
'lowering "{inst}" at {loc}',
|
|
309
|
+
inst=inst,
|
|
310
|
+
loc=self.loc,
|
|
311
|
+
errcls_=defaulterrcls,
|
|
312
|
+
):
|
|
313
|
+
self.lower_inst(inst)
|
|
314
|
+
self.post_block(block)
|
|
315
|
+
|
|
316
|
+
def create_cpython_wrapper(self, release_gil=False):
|
|
317
|
+
"""
|
|
318
|
+
Create CPython wrapper(s) around this function (or generator).
|
|
319
|
+
"""
|
|
320
|
+
if self.genlower:
|
|
321
|
+
self.context.create_cpython_wrapper(
|
|
322
|
+
self.library,
|
|
323
|
+
self.genlower.gendesc,
|
|
324
|
+
self.env,
|
|
325
|
+
self.call_helper,
|
|
326
|
+
release_gil=release_gil,
|
|
327
|
+
)
|
|
328
|
+
self.context.create_cpython_wrapper(
|
|
329
|
+
self.library,
|
|
330
|
+
self.fndesc,
|
|
331
|
+
self.env,
|
|
332
|
+
self.call_helper,
|
|
333
|
+
release_gil=release_gil,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def create_cfunc_wrapper(self):
|
|
337
|
+
"""
|
|
338
|
+
Create C wrapper around this function.
|
|
339
|
+
"""
|
|
340
|
+
if self.genlower:
|
|
341
|
+
raise UnsupportedError("generator as a first-class function type")
|
|
342
|
+
self.context.create_cfunc_wrapper(
|
|
343
|
+
self.library, self.fndesc, self.env, self.call_helper
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
def setup_function(self, fndesc):
|
|
347
|
+
# Setup function
|
|
348
|
+
self.function = self.context.declare_function(self.module, fndesc)
|
|
349
|
+
if self.flags.dbg_optnone:
|
|
350
|
+
attrset = self.function.attributes
|
|
351
|
+
if "alwaysinline" not in attrset:
|
|
352
|
+
attrset.add("optnone")
|
|
353
|
+
attrset.add("noinline")
|
|
354
|
+
self.entry_block = self.function.append_basic_block("entry")
|
|
355
|
+
self.builder = llvm_ir.IRBuilder(self.entry_block)
|
|
356
|
+
self.call_helper = self.call_conv.init_call_helper(self.builder)
|
|
357
|
+
|
|
358
|
+
def typeof(self, varname):
|
|
359
|
+
return self.fndesc.typemap[varname]
|
|
360
|
+
|
|
361
|
+
def notify_loc(self, loc: ir.Loc) -> None:
|
|
362
|
+
"""Called when a new instruction with the given `loc` is about to be
|
|
363
|
+
lowered.
|
|
364
|
+
"""
|
|
365
|
+
for notify_obj in self._loc_notify_registry:
|
|
366
|
+
notify_obj.notify(loc)
|
|
367
|
+
|
|
368
|
+
def debug_print(self, msg):
|
|
369
|
+
if config.DEBUG_JIT:
|
|
370
|
+
self.context.debug_print(
|
|
371
|
+
self.builder, f"DEBUGJIT [{self.fndesc.qualname}]: {msg}"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
def print_variable(self, msg, varname):
|
|
375
|
+
"""Helper to emit ``print(msg, varname)`` for debugging.
|
|
376
|
+
|
|
377
|
+
Parameters
|
|
378
|
+
----------
|
|
379
|
+
msg : str
|
|
380
|
+
Literal string to be printed.
|
|
381
|
+
varname : str
|
|
382
|
+
A variable name whose value will be printed.
|
|
383
|
+
"""
|
|
384
|
+
argtys = (types.literal(msg), self.fndesc.typemap[varname])
|
|
385
|
+
args = (
|
|
386
|
+
self.context.get_dummy_value(),
|
|
387
|
+
self.loadvar(varname),
|
|
388
|
+
)
|
|
389
|
+
sig = typing.signature(types.none, *argtys)
|
|
390
|
+
|
|
391
|
+
impl = self.context.get_function(print, sig)
|
|
392
|
+
impl(self.builder, args)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class Lower(BaseLower):
|
|
396
|
+
GeneratorLower = generators.GeneratorLower
|
|
397
|
+
|
|
398
|
+
def init(self):
|
|
399
|
+
super().init()
|
|
400
|
+
# find all singly assigned variables
|
|
401
|
+
self._find_singly_assigned_variable()
|
|
402
|
+
|
|
403
|
+
@property
|
|
404
|
+
def _disable_sroa_like_opt(self):
|
|
405
|
+
"""Flags that the SROA like optimisation that Numba performs (which
|
|
406
|
+
prevent alloca and subsequent load/store for locals) should be disabled.
|
|
407
|
+
Currently, this is conditional solely on the presence of a request for
|
|
408
|
+
the emission of debug information."""
|
|
409
|
+
if self.flags is None:
|
|
410
|
+
return False
|
|
411
|
+
|
|
412
|
+
return self.flags.debuginfo and not self.flags.dbg_directives_only
|
|
413
|
+
|
|
414
|
+
def _find_singly_assigned_variable(self):
|
|
415
|
+
func_ir = self.func_ir
|
|
416
|
+
blocks = func_ir.blocks
|
|
417
|
+
|
|
418
|
+
sav = set()
|
|
419
|
+
|
|
420
|
+
if not self.func_ir.func_id.is_generator:
|
|
421
|
+
use_defs = compute_use_defs(blocks)
|
|
422
|
+
alloca_vars = must_use_alloca(blocks)
|
|
423
|
+
|
|
424
|
+
# Compute where variables are defined
|
|
425
|
+
var_assign_map = defaultdict(set)
|
|
426
|
+
for blk, vl in use_defs.defmap.items():
|
|
427
|
+
for var in vl:
|
|
428
|
+
var_assign_map[var].add(blk)
|
|
429
|
+
|
|
430
|
+
# Compute where variables are used
|
|
431
|
+
var_use_map = defaultdict(set)
|
|
432
|
+
for blk, vl in use_defs.usemap.items():
|
|
433
|
+
for var in vl:
|
|
434
|
+
var_use_map[var].add(blk)
|
|
435
|
+
|
|
436
|
+
# Keep only variables that are defined locally and used locally
|
|
437
|
+
for var in var_assign_map:
|
|
438
|
+
if var not in alloca_vars and len(var_assign_map[var]) == 1:
|
|
439
|
+
# Usemap does not keep locally defined variables.
|
|
440
|
+
if len(var_use_map[var]) == 0:
|
|
441
|
+
# Ensure that the variable is not defined multiple times
|
|
442
|
+
# in the block
|
|
443
|
+
[defblk] = var_assign_map[var]
|
|
444
|
+
assign_stmts = self.blocks[defblk].find_insts(ir.Assign)
|
|
445
|
+
assigns = [
|
|
446
|
+
stmt
|
|
447
|
+
for stmt in assign_stmts
|
|
448
|
+
if stmt.target.name == var
|
|
449
|
+
]
|
|
450
|
+
if len(assigns) == 1:
|
|
451
|
+
sav.add(var)
|
|
452
|
+
|
|
453
|
+
self._singly_assigned_vars = sav
|
|
454
|
+
self._blk_local_varmap = {}
|
|
455
|
+
|
|
456
|
+
def pre_block(self, block):
|
|
457
|
+
from numba.cuda.core.unsafe import eh
|
|
458
|
+
|
|
459
|
+
super(Lower, self).pre_block(block)
|
|
460
|
+
self._cur_ir_block = block
|
|
461
|
+
|
|
462
|
+
if block == self.firstblk:
|
|
463
|
+
# create slots for all the vars, irrespective of whether they are
|
|
464
|
+
# initialized, SSA will pick this up and warn users about using
|
|
465
|
+
# uninitialized variables. Slots are added as alloca in the first
|
|
466
|
+
# block
|
|
467
|
+
bb = self.blkmap[self.firstblk]
|
|
468
|
+
self.builder.position_at_end(bb)
|
|
469
|
+
all_names = set()
|
|
470
|
+
for block in self.blocks.values():
|
|
471
|
+
for x in block.find_insts(ir.Del):
|
|
472
|
+
if x.value not in all_names:
|
|
473
|
+
all_names.add(x.value)
|
|
474
|
+
for name in all_names:
|
|
475
|
+
fetype = self.typeof(name)
|
|
476
|
+
self._alloca_var(name, fetype)
|
|
477
|
+
|
|
478
|
+
# Detect if we are in a TRY block by looking for a call to
|
|
479
|
+
# `eh.exception_check`.
|
|
480
|
+
for call in block.find_exprs(op="call"):
|
|
481
|
+
defn = ir_utils.guard(
|
|
482
|
+
ir_utils.get_definition,
|
|
483
|
+
self.func_ir,
|
|
484
|
+
call.func,
|
|
485
|
+
)
|
|
486
|
+
if defn is not None and isinstance(defn, ir.Global):
|
|
487
|
+
if defn.value is eh.exception_check:
|
|
488
|
+
if isinstance(block.terminator, ir.Branch):
|
|
489
|
+
targetblk = self.blkmap[block.terminator.truebr]
|
|
490
|
+
# NOTE: This hacks in an attribute for call_conv to
|
|
491
|
+
# pick up. This hack is no longer needed when
|
|
492
|
+
# all old-style implementations are gone.
|
|
493
|
+
self.builder._in_try_block = {"target": targetblk}
|
|
494
|
+
break
|
|
495
|
+
|
|
496
|
+
def post_block(self, block):
|
|
497
|
+
# Clean-up
|
|
498
|
+
try:
|
|
499
|
+
del self.builder._in_try_block
|
|
500
|
+
except AttributeError:
|
|
501
|
+
pass
|
|
502
|
+
|
|
503
|
+
def lower_inst(self, inst):
|
|
504
|
+
# Set debug location for all subsequent LL instructions
|
|
505
|
+
self.debuginfo.mark_location(self.builder, self.loc.line)
|
|
506
|
+
self.notify_loc(self.loc)
|
|
507
|
+
self.debug_print(str(inst))
|
|
508
|
+
if isinstance(inst, ir.Assign):
|
|
509
|
+
ty = self.typeof(inst.target.name)
|
|
510
|
+
val = self.lower_assign(ty, inst)
|
|
511
|
+
argidx = None
|
|
512
|
+
# If this is a store from an arg, like x = arg.x then tell debuginfo
|
|
513
|
+
# that this is the arg
|
|
514
|
+
if isinstance(inst.value, ir.Arg):
|
|
515
|
+
# NOTE: debug location is the `def <func>` line
|
|
516
|
+
self.debuginfo.mark_location(self.builder, self.defn_loc.line)
|
|
517
|
+
argidx = inst.value.index + 1 # args start at 1
|
|
518
|
+
self.storevar(val, inst.target.name, argidx=argidx)
|
|
519
|
+
|
|
520
|
+
elif isinstance(inst, ir.Branch):
|
|
521
|
+
cond = self.loadvar(inst.cond.name)
|
|
522
|
+
tr = self.blkmap[inst.truebr]
|
|
523
|
+
fl = self.blkmap[inst.falsebr]
|
|
524
|
+
|
|
525
|
+
condty = self.typeof(inst.cond.name)
|
|
526
|
+
pred = self.context.cast(self.builder, cond, condty, types.boolean)
|
|
527
|
+
assert pred.type == llvm_ir.IntType(1), (
|
|
528
|
+
"cond is not i1: %s" % pred.type
|
|
529
|
+
)
|
|
530
|
+
self.builder.cbranch(pred, tr, fl)
|
|
531
|
+
|
|
532
|
+
elif isinstance(inst, ir.Jump):
|
|
533
|
+
target = self.blkmap[inst.target]
|
|
534
|
+
self.builder.branch(target)
|
|
535
|
+
|
|
536
|
+
elif isinstance(inst, ir.Return):
|
|
537
|
+
if self.generator_info:
|
|
538
|
+
# StopIteration
|
|
539
|
+
self.genlower.return_from_generator(self)
|
|
540
|
+
return
|
|
541
|
+
val = self.loadvar(inst.value.name)
|
|
542
|
+
oty = self.typeof(inst.value.name)
|
|
543
|
+
ty = self.fndesc.restype
|
|
544
|
+
if isinstance(ty, types.Optional):
|
|
545
|
+
# If returning an optional type
|
|
546
|
+
self.call_conv.return_optional_value(self.builder, ty, oty, val)
|
|
547
|
+
return
|
|
548
|
+
assert ty == oty, (
|
|
549
|
+
"type '{}' does not match return type '{}'".format(oty, ty)
|
|
550
|
+
)
|
|
551
|
+
retval = self.context.get_return_value(self.builder, ty, val)
|
|
552
|
+
self.call_conv.return_value(self.builder, retval)
|
|
553
|
+
|
|
554
|
+
elif isinstance(inst, ir.PopBlock):
|
|
555
|
+
pass # this is just a marker
|
|
556
|
+
|
|
557
|
+
elif isinstance(inst, ir.StaticSetItem):
|
|
558
|
+
signature = self.fndesc.calltypes[inst]
|
|
559
|
+
assert signature is not None
|
|
560
|
+
try:
|
|
561
|
+
impl = self.context.get_function("static_setitem", signature)
|
|
562
|
+
except NotImplementedError:
|
|
563
|
+
return self.lower_setitem(
|
|
564
|
+
inst.target, inst.index_var, inst.value, signature
|
|
565
|
+
)
|
|
566
|
+
else:
|
|
567
|
+
target = self.loadvar(inst.target.name)
|
|
568
|
+
value = self.loadvar(inst.value.name)
|
|
569
|
+
valuety = self.typeof(inst.value.name)
|
|
570
|
+
value = self.context.cast(
|
|
571
|
+
self.builder, value, valuety, signature.args[2]
|
|
572
|
+
)
|
|
573
|
+
return impl(self.builder, (target, inst.index, value))
|
|
574
|
+
|
|
575
|
+
elif isinstance(inst, ir.Print):
|
|
576
|
+
self.lower_print(inst)
|
|
577
|
+
|
|
578
|
+
elif isinstance(inst, ir.SetItem):
|
|
579
|
+
signature = self.fndesc.calltypes[inst]
|
|
580
|
+
assert signature is not None
|
|
581
|
+
return self.lower_setitem(
|
|
582
|
+
inst.target, inst.index, inst.value, signature
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
elif isinstance(inst, ir.StoreMap):
|
|
586
|
+
signature = self.fndesc.calltypes[inst]
|
|
587
|
+
assert signature is not None
|
|
588
|
+
return self.lower_setitem(inst.dct, inst.key, inst.value, signature)
|
|
589
|
+
|
|
590
|
+
elif isinstance(inst, ir.DelItem):
|
|
591
|
+
target = self.loadvar(inst.target.name)
|
|
592
|
+
index = self.loadvar(inst.index.name)
|
|
593
|
+
|
|
594
|
+
targetty = self.typeof(inst.target.name)
|
|
595
|
+
indexty = self.typeof(inst.index.name)
|
|
596
|
+
|
|
597
|
+
signature = self.fndesc.calltypes[inst]
|
|
598
|
+
assert signature is not None
|
|
599
|
+
|
|
600
|
+
op = operator.delitem
|
|
601
|
+
fnop = self.context.typing_context.resolve_value_type(op)
|
|
602
|
+
callsig = fnop.get_call_type(
|
|
603
|
+
self.context.typing_context,
|
|
604
|
+
signature.args,
|
|
605
|
+
{},
|
|
606
|
+
)
|
|
607
|
+
impl = self.context.get_function(fnop, callsig)
|
|
608
|
+
|
|
609
|
+
assert targetty == signature.args[0]
|
|
610
|
+
index = self.context.cast(
|
|
611
|
+
self.builder, index, indexty, signature.args[1]
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
return impl(self.builder, (target, index))
|
|
615
|
+
|
|
616
|
+
elif isinstance(inst, ir.Del):
|
|
617
|
+
self.delvar(inst.value)
|
|
618
|
+
|
|
619
|
+
elif isinstance(inst, ir.SetAttr):
|
|
620
|
+
target = self.loadvar(inst.target.name)
|
|
621
|
+
value = self.loadvar(inst.value.name)
|
|
622
|
+
signature = self.fndesc.calltypes[inst]
|
|
623
|
+
|
|
624
|
+
targetty = self.typeof(inst.target.name)
|
|
625
|
+
valuety = self.typeof(inst.value.name)
|
|
626
|
+
assert signature is not None
|
|
627
|
+
assert signature.args[0] == targetty
|
|
628
|
+
impl = self.context.get_setattr(inst.attr, signature)
|
|
629
|
+
|
|
630
|
+
# Convert argument to match
|
|
631
|
+
value = self.context.cast(
|
|
632
|
+
self.builder, value, valuety, signature.args[1]
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
return impl(self.builder, (target, value))
|
|
636
|
+
|
|
637
|
+
elif isinstance(inst, ir.DynamicRaise):
|
|
638
|
+
self.lower_dynamic_raise(inst)
|
|
639
|
+
|
|
640
|
+
elif isinstance(inst, ir.DynamicTryRaise):
|
|
641
|
+
self.lower_try_dynamic_raise(inst)
|
|
642
|
+
|
|
643
|
+
elif isinstance(inst, ir.StaticRaise):
|
|
644
|
+
self.lower_static_raise(inst)
|
|
645
|
+
|
|
646
|
+
elif isinstance(inst, ir.StaticTryRaise):
|
|
647
|
+
self.lower_static_try_raise(inst)
|
|
648
|
+
|
|
649
|
+
else:
|
|
650
|
+
raise NotImplementedError(type(inst))
|
|
651
|
+
|
|
652
|
+
def lower_setitem(self, target_var, index_var, value_var, signature):
|
|
653
|
+
target = self.loadvar(target_var.name)
|
|
654
|
+
value = self.loadvar(value_var.name)
|
|
655
|
+
index = self.loadvar(index_var.name)
|
|
656
|
+
|
|
657
|
+
targetty = self.typeof(target_var.name)
|
|
658
|
+
valuety = self.typeof(value_var.name)
|
|
659
|
+
indexty = self.typeof(index_var.name)
|
|
660
|
+
|
|
661
|
+
op = operator.setitem
|
|
662
|
+
fnop = self.context.typing_context.resolve_value_type(op)
|
|
663
|
+
callsig = fnop.get_call_type(
|
|
664
|
+
self.context.typing_context,
|
|
665
|
+
signature.args,
|
|
666
|
+
{},
|
|
667
|
+
)
|
|
668
|
+
impl = self.context.get_function(fnop, callsig)
|
|
669
|
+
|
|
670
|
+
# Convert argument to match
|
|
671
|
+
if isinstance(targetty, types.Optional):
|
|
672
|
+
target = self.context.cast(
|
|
673
|
+
self.builder, target, targetty, targetty.type
|
|
674
|
+
)
|
|
675
|
+
else:
|
|
676
|
+
ul = types.unliteral
|
|
677
|
+
assert ul(targetty) == ul(signature.args[0])
|
|
678
|
+
|
|
679
|
+
index = self.context.cast(
|
|
680
|
+
self.builder, index, indexty, signature.args[1]
|
|
681
|
+
)
|
|
682
|
+
value = self.context.cast(
|
|
683
|
+
self.builder, value, valuety, signature.args[2]
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
return impl(self.builder, (target, index, value))
|
|
687
|
+
|
|
688
|
+
def lower_try_dynamic_raise(self, inst):
|
|
689
|
+
# Numba is a bit limited in what it can do with exceptions in a try
|
|
690
|
+
# block. Thus, it is safe to use the same code as the static try raise.
|
|
691
|
+
self.lower_static_try_raise(inst)
|
|
692
|
+
|
|
693
|
+
def lower_dynamic_raise(self, inst):
|
|
694
|
+
exc_args = inst.exc_args
|
|
695
|
+
args = []
|
|
696
|
+
nb_types = []
|
|
697
|
+
for exc_arg in exc_args:
|
|
698
|
+
if isinstance(exc_arg, ir.Var):
|
|
699
|
+
# dynamic values
|
|
700
|
+
typ = self.typeof(exc_arg.name)
|
|
701
|
+
val = self.loadvar(exc_arg.name)
|
|
702
|
+
self.incref(typ, val)
|
|
703
|
+
else:
|
|
704
|
+
typ = None
|
|
705
|
+
val = exc_arg
|
|
706
|
+
nb_types.append(typ)
|
|
707
|
+
args.append(val)
|
|
708
|
+
|
|
709
|
+
self.return_dynamic_exception(
|
|
710
|
+
inst.exc_class, tuple(args), tuple(nb_types), loc=self.loc
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
def lower_static_raise(self, inst):
|
|
714
|
+
if inst.exc_class is None:
|
|
715
|
+
# Reraise
|
|
716
|
+
self.return_exception(None, loc=self.loc)
|
|
717
|
+
else:
|
|
718
|
+
self.return_exception(inst.exc_class, inst.exc_args, loc=self.loc)
|
|
719
|
+
|
|
720
|
+
def lower_static_try_raise(self, inst):
|
|
721
|
+
if inst.exc_class is None:
|
|
722
|
+
# Reraise
|
|
723
|
+
self.set_exception(None, loc=self.loc)
|
|
724
|
+
else:
|
|
725
|
+
self.set_exception(inst.exc_class, inst.exc_args, loc=self.loc)
|
|
726
|
+
|
|
727
|
+
def lower_assign(self, ty, inst):
|
|
728
|
+
value = inst.value
|
|
729
|
+
# In nopython mode, closure vars are frozen like globals
|
|
730
|
+
if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)):
|
|
731
|
+
res = self.context.get_constant_generic(
|
|
732
|
+
self.builder, ty, value.value
|
|
733
|
+
)
|
|
734
|
+
self.incref(ty, res)
|
|
735
|
+
return res
|
|
736
|
+
|
|
737
|
+
elif isinstance(value, ir.Expr):
|
|
738
|
+
return self.lower_expr(ty, value)
|
|
739
|
+
|
|
740
|
+
elif isinstance(value, ir.Var):
|
|
741
|
+
val = self.loadvar(value.name)
|
|
742
|
+
oty = self.typeof(value.name)
|
|
743
|
+
res = self.context.cast(self.builder, val, oty, ty)
|
|
744
|
+
self.incref(ty, res)
|
|
745
|
+
return res
|
|
746
|
+
|
|
747
|
+
elif isinstance(value, ir.Arg):
|
|
748
|
+
# Suspend debug info else all the arg repacking ends up being
|
|
749
|
+
# associated with some line or other and it's actually just a detail
|
|
750
|
+
# of Numba's CC.
|
|
751
|
+
with debuginfo.suspend_emission(self.builder):
|
|
752
|
+
# Cast from the argument type to the local variable type
|
|
753
|
+
# (note the "arg.FOO" convention as used in typeinfer)
|
|
754
|
+
argty = self.typeof("arg." + value.name)
|
|
755
|
+
if isinstance(argty, types.Omitted):
|
|
756
|
+
pyval = argty.value
|
|
757
|
+
tyctx = self.context.typing_context
|
|
758
|
+
valty = tyctx.resolve_value_type_prefer_literal(pyval)
|
|
759
|
+
# use the type of the constant value
|
|
760
|
+
const = self.context.get_constant_generic(
|
|
761
|
+
self.builder,
|
|
762
|
+
valty,
|
|
763
|
+
pyval,
|
|
764
|
+
)
|
|
765
|
+
# cast it to the variable type
|
|
766
|
+
res = self.context.cast(self.builder, const, valty, ty)
|
|
767
|
+
else:
|
|
768
|
+
val = self.fnargs[value.index]
|
|
769
|
+
res = self.context.cast(self.builder, val, argty, ty)
|
|
770
|
+
self.incref(ty, res)
|
|
771
|
+
return res
|
|
772
|
+
|
|
773
|
+
elif isinstance(value, ir.Yield):
|
|
774
|
+
res = self.lower_yield(ty, value)
|
|
775
|
+
self.incref(ty, res)
|
|
776
|
+
return res
|
|
777
|
+
|
|
778
|
+
raise NotImplementedError(type(value), value)
|
|
779
|
+
|
|
780
|
+
def lower_yield(self, retty, inst):
|
|
781
|
+
yp = self.generator_info.yield_points[inst.index]
|
|
782
|
+
assert yp.inst is inst
|
|
783
|
+
y = generators.LowerYield(self, yp, yp.live_vars)
|
|
784
|
+
y.lower_yield_suspend()
|
|
785
|
+
# Yield to caller
|
|
786
|
+
val = self.loadvar(inst.value.name)
|
|
787
|
+
typ = self.typeof(inst.value.name)
|
|
788
|
+
actual_rettyp = self.gentype.yield_type
|
|
789
|
+
|
|
790
|
+
# cast the local val to the type yielded
|
|
791
|
+
yret = self.context.cast(self.builder, val, typ, actual_rettyp)
|
|
792
|
+
|
|
793
|
+
# get the return repr of yielded value
|
|
794
|
+
retval = self.context.get_return_value(
|
|
795
|
+
self.builder,
|
|
796
|
+
actual_rettyp,
|
|
797
|
+
yret,
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
# return
|
|
801
|
+
self.call_conv.return_value(self.builder, retval)
|
|
802
|
+
|
|
803
|
+
# Resumption point
|
|
804
|
+
y.lower_yield_resume()
|
|
805
|
+
# None is returned by the yield expression
|
|
806
|
+
return self.context.get_constant_generic(self.builder, retty, None)
|
|
807
|
+
|
|
808
|
+
def lower_binop(self, resty, expr, op):
|
|
809
|
+
# if op in utils.OPERATORS_TO_BUILTINS:
|
|
810
|
+
# map operator.the_op => the corresponding types.Function()
|
|
811
|
+
# TODO: is this looks dodgy ...
|
|
812
|
+
op = self.context.typing_context.resolve_value_type(op)
|
|
813
|
+
|
|
814
|
+
lhs = expr.lhs
|
|
815
|
+
rhs = expr.rhs
|
|
816
|
+
static_lhs = expr.static_lhs
|
|
817
|
+
static_rhs = expr.static_rhs
|
|
818
|
+
lty = self.typeof(lhs.name)
|
|
819
|
+
rty = self.typeof(rhs.name)
|
|
820
|
+
lhs = self.loadvar(lhs.name)
|
|
821
|
+
rhs = self.loadvar(rhs.name)
|
|
822
|
+
|
|
823
|
+
# Convert argument to match
|
|
824
|
+
signature = self.fndesc.calltypes[expr]
|
|
825
|
+
lhs = self.context.cast(self.builder, lhs, lty, signature.args[0])
|
|
826
|
+
rhs = self.context.cast(self.builder, rhs, rty, signature.args[1])
|
|
827
|
+
|
|
828
|
+
def cast_result(res):
|
|
829
|
+
return self.context.cast(
|
|
830
|
+
self.builder, res, signature.return_type, resty
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
# First try with static operands, if known
|
|
834
|
+
def try_static_impl(tys, args):
|
|
835
|
+
if any(a is ir.UNDEFINED for a in args):
|
|
836
|
+
return None
|
|
837
|
+
try:
|
|
838
|
+
if isinstance(op, types.Function):
|
|
839
|
+
static_sig = op.get_call_type(
|
|
840
|
+
self.context.typing_context, tys, {}
|
|
841
|
+
)
|
|
842
|
+
else:
|
|
843
|
+
static_sig = typing.signature(signature.return_type, *tys)
|
|
844
|
+
except TypingError:
|
|
845
|
+
return None
|
|
846
|
+
try:
|
|
847
|
+
static_impl = self.context.get_function(op, static_sig)
|
|
848
|
+
return static_impl(self.builder, args)
|
|
849
|
+
except NotImplementedError:
|
|
850
|
+
return None
|
|
851
|
+
|
|
852
|
+
res = try_static_impl(
|
|
853
|
+
(_lit_or_omitted(static_lhs), _lit_or_omitted(static_rhs)),
|
|
854
|
+
(static_lhs, static_rhs),
|
|
855
|
+
)
|
|
856
|
+
if res is not None:
|
|
857
|
+
return cast_result(res)
|
|
858
|
+
|
|
859
|
+
res = try_static_impl(
|
|
860
|
+
(_lit_or_omitted(static_lhs), rty),
|
|
861
|
+
(static_lhs, rhs),
|
|
862
|
+
)
|
|
863
|
+
if res is not None:
|
|
864
|
+
return cast_result(res)
|
|
865
|
+
|
|
866
|
+
res = try_static_impl(
|
|
867
|
+
(lty, _lit_or_omitted(static_rhs)),
|
|
868
|
+
(lhs, static_rhs),
|
|
869
|
+
)
|
|
870
|
+
if res is not None:
|
|
871
|
+
return cast_result(res)
|
|
872
|
+
|
|
873
|
+
# Normal implementation for generic arguments
|
|
874
|
+
|
|
875
|
+
sig = op.get_call_type(self.context.typing_context, signature.args, {})
|
|
876
|
+
impl = self.context.get_function(op, sig)
|
|
877
|
+
res = impl(self.builder, (lhs, rhs))
|
|
878
|
+
return cast_result(res)
|
|
879
|
+
|
|
880
|
+
def lower_getitem(self, resty, expr, value, index, signature):
|
|
881
|
+
baseval = self.loadvar(value.name)
|
|
882
|
+
indexval = self.loadvar(index.name)
|
|
883
|
+
# Get implementation of getitem
|
|
884
|
+
op = operator.getitem
|
|
885
|
+
fnop = self.context.typing_context.resolve_value_type(op)
|
|
886
|
+
callsig = fnop.get_call_type(
|
|
887
|
+
self.context.typing_context,
|
|
888
|
+
signature.args,
|
|
889
|
+
{},
|
|
890
|
+
)
|
|
891
|
+
impl = self.context.get_function(fnop, callsig)
|
|
892
|
+
|
|
893
|
+
argvals = (baseval, indexval)
|
|
894
|
+
argtyps = (self.typeof(value.name), self.typeof(index.name))
|
|
895
|
+
castvals = [
|
|
896
|
+
self.context.cast(self.builder, av, at, ft)
|
|
897
|
+
for av, at, ft in zip(argvals, argtyps, signature.args)
|
|
898
|
+
]
|
|
899
|
+
res = impl(self.builder, castvals)
|
|
900
|
+
return self.context.cast(
|
|
901
|
+
self.builder, res, signature.return_type, resty
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
def _cast_var(self, var, ty):
|
|
905
|
+
"""
|
|
906
|
+
Cast a Numba IR variable to the given Numba type, returning a
|
|
907
|
+
low-level value.
|
|
908
|
+
"""
|
|
909
|
+
if isinstance(var, _VarArgItem):
|
|
910
|
+
varty = self.typeof(var.vararg.name)[var.index]
|
|
911
|
+
val = self.builder.extract_value(
|
|
912
|
+
self.loadvar(var.vararg.name), var.index
|
|
913
|
+
)
|
|
914
|
+
else:
|
|
915
|
+
varty = self.typeof(var.name)
|
|
916
|
+
val = self.loadvar(var.name)
|
|
917
|
+
return self.context.cast(self.builder, val, varty, ty)
|
|
918
|
+
|
|
919
|
+
def fold_call_args(self, fnty, signature, pos_args, vararg, kw_args):
|
|
920
|
+
if vararg:
|
|
921
|
+
# Inject *args from function call
|
|
922
|
+
# The lowering will be done in _cast_var() above.
|
|
923
|
+
tp_vararg = self.typeof(vararg.name)
|
|
924
|
+
assert isinstance(tp_vararg, types.BaseTuple)
|
|
925
|
+
pos_args = pos_args + [
|
|
926
|
+
_VarArgItem(vararg, i) for i in range(len(tp_vararg))
|
|
927
|
+
]
|
|
928
|
+
|
|
929
|
+
# Fold keyword arguments and resolve default argument values
|
|
930
|
+
pysig = signature.pysig
|
|
931
|
+
if pysig is None:
|
|
932
|
+
if kw_args:
|
|
933
|
+
raise NotImplementedError(
|
|
934
|
+
"unsupported keyword arguments when calling %s" % (fnty,)
|
|
935
|
+
)
|
|
936
|
+
argvals = [
|
|
937
|
+
self._cast_var(var, sigty)
|
|
938
|
+
for var, sigty in zip(pos_args, signature.args)
|
|
939
|
+
]
|
|
940
|
+
else:
|
|
941
|
+
|
|
942
|
+
def normal_handler(index, param, var):
|
|
943
|
+
return self._cast_var(var, signature.args[index])
|
|
944
|
+
|
|
945
|
+
def default_handler(index, param, default):
|
|
946
|
+
return self.context.get_constant_generic(
|
|
947
|
+
self.builder, signature.args[index], default
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
def stararg_handler(index, param, vars):
|
|
951
|
+
stararg_ty = signature.args[index]
|
|
952
|
+
assert isinstance(stararg_ty, types.BaseTuple), stararg_ty
|
|
953
|
+
values = [
|
|
954
|
+
self._cast_var(var, sigty)
|
|
955
|
+
for var, sigty in zip(vars, stararg_ty)
|
|
956
|
+
]
|
|
957
|
+
return cgutils.make_anonymous_struct(self.builder, values)
|
|
958
|
+
|
|
959
|
+
argvals = typing.fold_arguments(
|
|
960
|
+
pysig,
|
|
961
|
+
pos_args,
|
|
962
|
+
dict(kw_args),
|
|
963
|
+
normal_handler,
|
|
964
|
+
default_handler,
|
|
965
|
+
stararg_handler,
|
|
966
|
+
)
|
|
967
|
+
return argvals
|
|
968
|
+
|
|
969
|
+
def lower_print(self, inst):
|
|
970
|
+
"""
|
|
971
|
+
Lower a ir.Print()
|
|
972
|
+
"""
|
|
973
|
+
# We handle this, as far as possible, as a normal call to built-in
|
|
974
|
+
# print(). This will make it easy to undo the special ir.Print
|
|
975
|
+
# rewrite when it becomes unnecessary (e.g. when we have native
|
|
976
|
+
# strings).
|
|
977
|
+
sig = self.fndesc.calltypes[inst]
|
|
978
|
+
assert sig.return_type == types.none
|
|
979
|
+
fnty = self.context.typing_context.resolve_value_type(print)
|
|
980
|
+
|
|
981
|
+
# Fix the call signature to inject any constant-inferred
|
|
982
|
+
# string argument
|
|
983
|
+
pos_tys = list(sig.args)
|
|
984
|
+
pos_args = list(inst.args)
|
|
985
|
+
for i in range(len(pos_args)):
|
|
986
|
+
if i in inst.consts:
|
|
987
|
+
pyval = inst.consts[i]
|
|
988
|
+
if isinstance(pyval, str):
|
|
989
|
+
pos_tys[i] = types.literal(pyval)
|
|
990
|
+
|
|
991
|
+
fixed_sig = typing.signature(sig.return_type, *pos_tys)
|
|
992
|
+
fixed_sig = fixed_sig.replace(pysig=sig.pysig)
|
|
993
|
+
|
|
994
|
+
argvals = self.fold_call_args(fnty, sig, pos_args, inst.vararg, {})
|
|
995
|
+
impl = self.context.get_function(print, fixed_sig)
|
|
996
|
+
impl(self.builder, argvals)
|
|
997
|
+
|
|
998
|
+
def lower_call(self, resty, expr):
|
|
999
|
+
signature = self.fndesc.calltypes[expr]
|
|
1000
|
+
self.debug_print("# lower_call: expr = {0}".format(expr))
|
|
1001
|
+
if isinstance(signature.return_type, types.Phantom):
|
|
1002
|
+
return self.context.get_dummy_value()
|
|
1003
|
+
|
|
1004
|
+
fnty = self.typeof(expr.func.name)
|
|
1005
|
+
|
|
1006
|
+
if isinstance(fnty, types.ObjModeDispatcher):
|
|
1007
|
+
res = self._lower_call_ObjModeDispatcher(fnty, expr, signature)
|
|
1008
|
+
|
|
1009
|
+
elif isinstance(fnty, types.ExternalFunction):
|
|
1010
|
+
res = self._lower_call_ExternalFunction(fnty, expr, signature)
|
|
1011
|
+
|
|
1012
|
+
elif isinstance(fnty, types.ExternalFunctionPointer):
|
|
1013
|
+
res = self._lower_call_ExternalFunctionPointer(
|
|
1014
|
+
fnty, expr, signature
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
elif isinstance(fnty, types.RecursiveCall):
|
|
1018
|
+
res = self._lower_call_RecursiveCall(fnty, expr, signature)
|
|
1019
|
+
|
|
1020
|
+
else:
|
|
1021
|
+
res = self._lower_call_normal(fnty, expr, signature)
|
|
1022
|
+
|
|
1023
|
+
# If lowering the call returned None, interpret that as returning dummy
|
|
1024
|
+
# value if the return type of the function is void, otherwise there is
|
|
1025
|
+
# a problem
|
|
1026
|
+
if res is None:
|
|
1027
|
+
if signature.return_type == types.void:
|
|
1028
|
+
res = self.context.get_dummy_value()
|
|
1029
|
+
else:
|
|
1030
|
+
raise LoweringError(
|
|
1031
|
+
msg="non-void function returns None from implementation",
|
|
1032
|
+
loc=self.loc,
|
|
1033
|
+
)
|
|
1034
|
+
|
|
1035
|
+
return self.context.cast(
|
|
1036
|
+
self.builder, res, signature.return_type, resty
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
def _lower_call_ObjModeDispatcher(self, fnty, expr, signature):
|
|
1040
|
+
from numba.cuda.core.pythonapi import ObjModeUtils
|
|
1041
|
+
|
|
1042
|
+
self.init_pyapi()
|
|
1043
|
+
# Acquire the GIL
|
|
1044
|
+
gil_state = self.pyapi.gil_ensure()
|
|
1045
|
+
# Fix types
|
|
1046
|
+
argnames = [a.name for a in expr.args]
|
|
1047
|
+
argtypes = [self.typeof(a) for a in argnames]
|
|
1048
|
+
argvalues = [self.loadvar(a) for a in argnames]
|
|
1049
|
+
for v, ty in zip(argvalues, argtypes):
|
|
1050
|
+
# Because .from_native_value steal the reference
|
|
1051
|
+
self.incref(ty, v)
|
|
1052
|
+
|
|
1053
|
+
argobjs = [
|
|
1054
|
+
self.pyapi.from_native_value(atyp, aval, self.env_manager)
|
|
1055
|
+
for atyp, aval in zip(argtypes, argvalues)
|
|
1056
|
+
]
|
|
1057
|
+
|
|
1058
|
+
# Load objmode dispatcher
|
|
1059
|
+
callee = ObjModeUtils(self.pyapi).load_dispatcher(fnty, argtypes)
|
|
1060
|
+
# Make Call
|
|
1061
|
+
ret_obj = self.pyapi.call_function_objargs(callee, argobjs)
|
|
1062
|
+
has_exception = cgutils.is_null(self.builder, ret_obj)
|
|
1063
|
+
with self.builder.if_else(has_exception) as (then, orelse):
|
|
1064
|
+
# Handles exception
|
|
1065
|
+
# This branch must exit the function
|
|
1066
|
+
with then:
|
|
1067
|
+
# Clean arg
|
|
1068
|
+
for obj in argobjs:
|
|
1069
|
+
self.pyapi.decref(obj)
|
|
1070
|
+
|
|
1071
|
+
# Release the GIL
|
|
1072
|
+
self.pyapi.gil_release(gil_state)
|
|
1073
|
+
|
|
1074
|
+
# Return and signal exception
|
|
1075
|
+
self.call_conv.return_exc(self.builder)
|
|
1076
|
+
|
|
1077
|
+
# Handles normal return
|
|
1078
|
+
with orelse:
|
|
1079
|
+
# Fix output value
|
|
1080
|
+
native = self.pyapi.to_native_value(
|
|
1081
|
+
fnty.dispatcher.output_types,
|
|
1082
|
+
ret_obj,
|
|
1083
|
+
)
|
|
1084
|
+
output = native.value
|
|
1085
|
+
|
|
1086
|
+
# Release objs
|
|
1087
|
+
self.pyapi.decref(ret_obj)
|
|
1088
|
+
for obj in argobjs:
|
|
1089
|
+
self.pyapi.decref(obj)
|
|
1090
|
+
|
|
1091
|
+
# cleanup output
|
|
1092
|
+
if callable(native.cleanup):
|
|
1093
|
+
native.cleanup()
|
|
1094
|
+
|
|
1095
|
+
# Release the GIL
|
|
1096
|
+
self.pyapi.gil_release(gil_state)
|
|
1097
|
+
|
|
1098
|
+
# Error during unboxing
|
|
1099
|
+
with self.builder.if_then(native.is_error):
|
|
1100
|
+
self.call_conv.return_exc(self.builder)
|
|
1101
|
+
|
|
1102
|
+
return output
|
|
1103
|
+
|
|
1104
|
+
def _lower_call_ExternalFunction(self, fnty, expr, signature):
|
|
1105
|
+
# Handle a named external function
|
|
1106
|
+
self.debug_print("# external function")
|
|
1107
|
+
argvals = self.fold_call_args(
|
|
1108
|
+
fnty,
|
|
1109
|
+
signature,
|
|
1110
|
+
expr.args,
|
|
1111
|
+
expr.vararg,
|
|
1112
|
+
expr.kws,
|
|
1113
|
+
)
|
|
1114
|
+
fndesc = funcdesc.ExternalFunctionDescriptor(
|
|
1115
|
+
fnty.symbol, fnty.sig.return_type, fnty.sig.args
|
|
1116
|
+
)
|
|
1117
|
+
func = self.context.declare_external_function(
|
|
1118
|
+
self.builder.module, fndesc
|
|
1119
|
+
)
|
|
1120
|
+
return self.context.call_external_function(
|
|
1121
|
+
self.builder,
|
|
1122
|
+
func,
|
|
1123
|
+
fndesc.argtypes,
|
|
1124
|
+
argvals,
|
|
1125
|
+
)
|
|
1126
|
+
|
|
1127
|
+
def _lower_call_ExternalFunctionPointer(self, fnty, expr, signature):
|
|
1128
|
+
# Handle a C function pointer
|
|
1129
|
+
self.debug_print("# calling external function pointer")
|
|
1130
|
+
argvals = self.fold_call_args(
|
|
1131
|
+
fnty,
|
|
1132
|
+
signature,
|
|
1133
|
+
expr.args,
|
|
1134
|
+
expr.vararg,
|
|
1135
|
+
expr.kws,
|
|
1136
|
+
)
|
|
1137
|
+
pointer = self.loadvar(expr.func.name)
|
|
1138
|
+
# If the external function pointer uses libpython
|
|
1139
|
+
if fnty.requires_gil:
|
|
1140
|
+
self.init_pyapi()
|
|
1141
|
+
# Acquire the GIL
|
|
1142
|
+
gil_state = self.pyapi.gil_ensure()
|
|
1143
|
+
# Make PyObjects
|
|
1144
|
+
newargvals = []
|
|
1145
|
+
pyvals = []
|
|
1146
|
+
for exptyp, gottyp, aval in zip(
|
|
1147
|
+
fnty.sig.args, signature.args, argvals
|
|
1148
|
+
):
|
|
1149
|
+
# Adjust argument values to pyobjects
|
|
1150
|
+
if exptyp == types.ffi_forced_object:
|
|
1151
|
+
self.incref(gottyp, aval)
|
|
1152
|
+
obj = self.pyapi.from_native_value(
|
|
1153
|
+
gottyp,
|
|
1154
|
+
aval,
|
|
1155
|
+
self.env_manager,
|
|
1156
|
+
)
|
|
1157
|
+
newargvals.append(obj)
|
|
1158
|
+
pyvals.append(obj)
|
|
1159
|
+
else:
|
|
1160
|
+
newargvals.append(aval)
|
|
1161
|
+
|
|
1162
|
+
# Call external function
|
|
1163
|
+
res = self.context.call_function_pointer(
|
|
1164
|
+
self.builder,
|
|
1165
|
+
pointer,
|
|
1166
|
+
newargvals,
|
|
1167
|
+
fnty.cconv,
|
|
1168
|
+
)
|
|
1169
|
+
# Release PyObjects
|
|
1170
|
+
for obj in pyvals:
|
|
1171
|
+
self.pyapi.decref(obj)
|
|
1172
|
+
|
|
1173
|
+
# Release the GIL
|
|
1174
|
+
self.pyapi.gil_release(gil_state)
|
|
1175
|
+
# If the external function pointer does NOT use libpython
|
|
1176
|
+
else:
|
|
1177
|
+
res = self.context.call_function_pointer(
|
|
1178
|
+
self.builder,
|
|
1179
|
+
pointer,
|
|
1180
|
+
argvals,
|
|
1181
|
+
fnty.cconv,
|
|
1182
|
+
)
|
|
1183
|
+
return res
|
|
1184
|
+
|
|
1185
|
+
def _lower_call_RecursiveCall(self, fnty, expr, signature):
|
|
1186
|
+
# Recursive call
|
|
1187
|
+
argvals = self.fold_call_args(
|
|
1188
|
+
fnty,
|
|
1189
|
+
signature,
|
|
1190
|
+
expr.args,
|
|
1191
|
+
expr.vararg,
|
|
1192
|
+
expr.kws,
|
|
1193
|
+
)
|
|
1194
|
+
rec_ov = fnty.get_overloads(signature.args)
|
|
1195
|
+
mangler = self.context.mangler or default_mangler
|
|
1196
|
+
abi_tags = self.fndesc.abi_tags
|
|
1197
|
+
mangled_name = mangler(
|
|
1198
|
+
rec_ov.qualname, signature.args, abi_tags=abi_tags, uid=rec_ov.uid
|
|
1199
|
+
)
|
|
1200
|
+
# special case self recursion
|
|
1201
|
+
if self.builder.function.name.startswith(mangled_name):
|
|
1202
|
+
res = self.context.call_internal(
|
|
1203
|
+
self.builder,
|
|
1204
|
+
self.fndesc,
|
|
1205
|
+
signature,
|
|
1206
|
+
argvals,
|
|
1207
|
+
)
|
|
1208
|
+
else:
|
|
1209
|
+
res = self.context.call_unresolved(
|
|
1210
|
+
self.builder,
|
|
1211
|
+
mangled_name,
|
|
1212
|
+
signature,
|
|
1213
|
+
argvals,
|
|
1214
|
+
)
|
|
1215
|
+
return res
|
|
1216
|
+
|
|
1217
|
+
def _lower_call_normal(self, fnty, expr, signature):
|
|
1218
|
+
# Normal function resolution
|
|
1219
|
+
self.debug_print("# calling normal function: {0}".format(fnty))
|
|
1220
|
+
self.debug_print("# signature: {0}".format(signature))
|
|
1221
|
+
if isinstance(fnty, types.ObjModeDispatcher):
|
|
1222
|
+
argvals = expr.func.args
|
|
1223
|
+
else:
|
|
1224
|
+
argvals = self.fold_call_args(
|
|
1225
|
+
fnty,
|
|
1226
|
+
signature,
|
|
1227
|
+
expr.args,
|
|
1228
|
+
expr.vararg,
|
|
1229
|
+
expr.kws,
|
|
1230
|
+
)
|
|
1231
|
+
tname = expr.target
|
|
1232
|
+
if tname is not None:
|
|
1233
|
+
from numba.cuda.descriptor import cuda_target
|
|
1234
|
+
|
|
1235
|
+
hw_ctx = cuda_target.target_context
|
|
1236
|
+
impl = hw_ctx.get_function(fnty, signature)
|
|
1237
|
+
else:
|
|
1238
|
+
impl = self.context.get_function(fnty, signature)
|
|
1239
|
+
if signature.recvr:
|
|
1240
|
+
# The "self" object is passed as the function object
|
|
1241
|
+
# for bounded function
|
|
1242
|
+
the_self = self.loadvar(expr.func.name)
|
|
1243
|
+
# Prepend the self reference
|
|
1244
|
+
argvals = [the_self] + list(argvals)
|
|
1245
|
+
|
|
1246
|
+
res = impl(self.builder, argvals, self.loc)
|
|
1247
|
+
return res
|
|
1248
|
+
|
|
1249
|
+
def lower_expr(self, resty, expr):
|
|
1250
|
+
if expr.op == "binop":
|
|
1251
|
+
return self.lower_binop(resty, expr, expr.fn)
|
|
1252
|
+
elif expr.op == "inplace_binop":
|
|
1253
|
+
lty = self.typeof(expr.lhs.name)
|
|
1254
|
+
if lty.mutable:
|
|
1255
|
+
return self.lower_binop(resty, expr, expr.fn)
|
|
1256
|
+
else:
|
|
1257
|
+
# inplace operators on non-mutable types reuse the same
|
|
1258
|
+
# definition as the corresponding copying operators.)
|
|
1259
|
+
return self.lower_binop(resty, expr, expr.immutable_fn)
|
|
1260
|
+
elif expr.op == "unary":
|
|
1261
|
+
val = self.loadvar(expr.value.name)
|
|
1262
|
+
typ = self.typeof(expr.value.name)
|
|
1263
|
+
func_ty = self.context.typing_context.resolve_value_type(expr.fn)
|
|
1264
|
+
# Get function
|
|
1265
|
+
signature = self.fndesc.calltypes[expr]
|
|
1266
|
+
impl = self.context.get_function(func_ty, signature)
|
|
1267
|
+
# Convert argument to match
|
|
1268
|
+
val = self.context.cast(self.builder, val, typ, signature.args[0])
|
|
1269
|
+
res = impl(self.builder, [val])
|
|
1270
|
+
res = self.context.cast(
|
|
1271
|
+
self.builder, res, signature.return_type, resty
|
|
1272
|
+
)
|
|
1273
|
+
return res
|
|
1274
|
+
|
|
1275
|
+
elif expr.op == "call":
|
|
1276
|
+
res = self.lower_call(resty, expr)
|
|
1277
|
+
return res
|
|
1278
|
+
|
|
1279
|
+
elif expr.op == "pair_first":
|
|
1280
|
+
val = self.loadvar(expr.value.name)
|
|
1281
|
+
ty = self.typeof(expr.value.name)
|
|
1282
|
+
res = self.context.pair_first(self.builder, val, ty)
|
|
1283
|
+
self.incref(resty, res)
|
|
1284
|
+
return res
|
|
1285
|
+
|
|
1286
|
+
elif expr.op == "pair_second":
|
|
1287
|
+
val = self.loadvar(expr.value.name)
|
|
1288
|
+
ty = self.typeof(expr.value.name)
|
|
1289
|
+
res = self.context.pair_second(self.builder, val, ty)
|
|
1290
|
+
self.incref(resty, res)
|
|
1291
|
+
return res
|
|
1292
|
+
|
|
1293
|
+
elif expr.op in ("getiter", "iternext"):
|
|
1294
|
+
val = self.loadvar(expr.value.name)
|
|
1295
|
+
ty = self.typeof(expr.value.name)
|
|
1296
|
+
signature = self.fndesc.calltypes[expr]
|
|
1297
|
+
impl = self.context.get_function(expr.op, signature)
|
|
1298
|
+
[fty] = signature.args
|
|
1299
|
+
castval = self.context.cast(self.builder, val, ty, fty)
|
|
1300
|
+
res = impl(self.builder, (castval,))
|
|
1301
|
+
res = self.context.cast(
|
|
1302
|
+
self.builder, res, signature.return_type, resty
|
|
1303
|
+
)
|
|
1304
|
+
return res
|
|
1305
|
+
|
|
1306
|
+
elif expr.op == "exhaust_iter":
|
|
1307
|
+
val = self.loadvar(expr.value.name)
|
|
1308
|
+
ty = self.typeof(expr.value.name)
|
|
1309
|
+
# Unpack optional
|
|
1310
|
+
if isinstance(ty, types.Optional):
|
|
1311
|
+
val = self.context.cast(self.builder, val, ty, ty.type)
|
|
1312
|
+
ty = ty.type
|
|
1313
|
+
|
|
1314
|
+
# If we have a tuple, we needn't do anything
|
|
1315
|
+
# (and we can't iterate over the heterogeneous ones).
|
|
1316
|
+
if isinstance(ty, types.BaseTuple):
|
|
1317
|
+
assert ty == resty
|
|
1318
|
+
self.incref(ty, val)
|
|
1319
|
+
return val
|
|
1320
|
+
|
|
1321
|
+
itemty = ty.iterator_type.yield_type
|
|
1322
|
+
tup = self.context.get_constant_undef(resty)
|
|
1323
|
+
pairty = types.Pair(itemty, types.boolean)
|
|
1324
|
+
getiter_sig = typing.signature(ty.iterator_type, ty)
|
|
1325
|
+
getiter_impl = self.context.get_function("getiter", getiter_sig)
|
|
1326
|
+
iternext_sig = typing.signature(pairty, ty.iterator_type)
|
|
1327
|
+
iternext_impl = self.context.get_function("iternext", iternext_sig)
|
|
1328
|
+
iterobj = getiter_impl(self.builder, (val,))
|
|
1329
|
+
# We call iternext() as many times as desired (`expr.count`).
|
|
1330
|
+
for i in range(expr.count):
|
|
1331
|
+
pair = iternext_impl(self.builder, (iterobj,))
|
|
1332
|
+
is_valid = self.context.pair_second(self.builder, pair, pairty)
|
|
1333
|
+
with cgutils.if_unlikely(
|
|
1334
|
+
self.builder, self.builder.not_(is_valid)
|
|
1335
|
+
):
|
|
1336
|
+
self.return_exception(ValueError, loc=self.loc)
|
|
1337
|
+
item = self.context.pair_first(self.builder, pair, pairty)
|
|
1338
|
+
tup = self.builder.insert_value(tup, item, i)
|
|
1339
|
+
|
|
1340
|
+
# Call iternext() once more to check that the iterator
|
|
1341
|
+
# is exhausted.
|
|
1342
|
+
pair = iternext_impl(self.builder, (iterobj,))
|
|
1343
|
+
is_valid = self.context.pair_second(self.builder, pair, pairty)
|
|
1344
|
+
with cgutils.if_unlikely(self.builder, is_valid):
|
|
1345
|
+
self.return_exception(ValueError, loc=self.loc)
|
|
1346
|
+
|
|
1347
|
+
self.decref(ty.iterator_type, iterobj)
|
|
1348
|
+
return tup
|
|
1349
|
+
|
|
1350
|
+
elif expr.op == "getattr":
|
|
1351
|
+
val = self.loadvar(expr.value.name)
|
|
1352
|
+
ty = self.typeof(expr.value.name)
|
|
1353
|
+
|
|
1354
|
+
if isinstance(resty, types.BoundFunction):
|
|
1355
|
+
# if we are getting out a method, assume we have typed this
|
|
1356
|
+
# properly and just build a bound function object
|
|
1357
|
+
casted = self.context.cast(self.builder, val, ty, resty.this)
|
|
1358
|
+
res = self.context.get_bound_function(
|
|
1359
|
+
self.builder, casted, resty.this
|
|
1360
|
+
)
|
|
1361
|
+
self.incref(resty, res)
|
|
1362
|
+
return res
|
|
1363
|
+
else:
|
|
1364
|
+
impl = self.context.get_getattr(ty, expr.attr)
|
|
1365
|
+
attrty = self.context.typing_context.resolve_getattr(
|
|
1366
|
+
ty, expr.attr
|
|
1367
|
+
)
|
|
1368
|
+
|
|
1369
|
+
if impl is None:
|
|
1370
|
+
# ignore the attribute
|
|
1371
|
+
return self.context.get_dummy_value()
|
|
1372
|
+
else:
|
|
1373
|
+
res = impl(self.context, self.builder, ty, val, expr.attr)
|
|
1374
|
+
|
|
1375
|
+
# Cast the attribute type to the expected output type
|
|
1376
|
+
res = self.context.cast(self.builder, res, attrty, resty)
|
|
1377
|
+
return res
|
|
1378
|
+
|
|
1379
|
+
elif expr.op == "static_getitem":
|
|
1380
|
+
signature = typing.signature(
|
|
1381
|
+
resty,
|
|
1382
|
+
self.typeof(expr.value.name),
|
|
1383
|
+
_lit_or_omitted(expr.index),
|
|
1384
|
+
)
|
|
1385
|
+
try:
|
|
1386
|
+
# Both get_function() and the returned implementation can
|
|
1387
|
+
# raise NotImplementedError if the types aren't supported
|
|
1388
|
+
impl = self.context.get_function("static_getitem", signature)
|
|
1389
|
+
return impl(
|
|
1390
|
+
self.builder, (self.loadvar(expr.value.name), expr.index)
|
|
1391
|
+
)
|
|
1392
|
+
except NotImplementedError:
|
|
1393
|
+
if expr.index_var is None:
|
|
1394
|
+
raise
|
|
1395
|
+
# Fall back on the generic getitem() implementation
|
|
1396
|
+
# for this type.
|
|
1397
|
+
signature = self.fndesc.calltypes[expr]
|
|
1398
|
+
return self.lower_getitem(
|
|
1399
|
+
resty, expr, expr.value, expr.index_var, signature
|
|
1400
|
+
)
|
|
1401
|
+
elif expr.op == "typed_getitem":
|
|
1402
|
+
signature = typing.signature(
|
|
1403
|
+
resty,
|
|
1404
|
+
self.typeof(expr.value.name),
|
|
1405
|
+
self.typeof(expr.index.name),
|
|
1406
|
+
)
|
|
1407
|
+
impl = self.context.get_function("typed_getitem", signature)
|
|
1408
|
+
return impl(
|
|
1409
|
+
self.builder,
|
|
1410
|
+
(self.loadvar(expr.value.name), self.loadvar(expr.index.name)),
|
|
1411
|
+
)
|
|
1412
|
+
elif expr.op == "getitem":
|
|
1413
|
+
signature = self.fndesc.calltypes[expr]
|
|
1414
|
+
return self.lower_getitem(
|
|
1415
|
+
resty, expr, expr.value, expr.index, signature
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
elif expr.op == "build_tuple":
|
|
1419
|
+
itemvals = [self.loadvar(i.name) for i in expr.items]
|
|
1420
|
+
itemtys = [self.typeof(i.name) for i in expr.items]
|
|
1421
|
+
castvals = [
|
|
1422
|
+
self.context.cast(self.builder, val, fromty, toty)
|
|
1423
|
+
for val, toty, fromty in zip(itemvals, resty, itemtys)
|
|
1424
|
+
]
|
|
1425
|
+
tup = self.context.make_tuple(self.builder, resty, castvals)
|
|
1426
|
+
self.incref(resty, tup)
|
|
1427
|
+
return tup
|
|
1428
|
+
|
|
1429
|
+
elif expr.op == "build_list":
|
|
1430
|
+
itemvals = [self.loadvar(i.name) for i in expr.items]
|
|
1431
|
+
itemtys = [self.typeof(i.name) for i in expr.items]
|
|
1432
|
+
if isinstance(resty, types.LiteralList):
|
|
1433
|
+
castvals = [
|
|
1434
|
+
self.context.cast(self.builder, val, fromty, toty)
|
|
1435
|
+
for val, toty, fromty in zip(itemvals, resty.types, itemtys)
|
|
1436
|
+
]
|
|
1437
|
+
tup = self.context.make_tuple(
|
|
1438
|
+
self.builder, types.Tuple(resty.types), castvals
|
|
1439
|
+
)
|
|
1440
|
+
self.incref(resty, tup)
|
|
1441
|
+
return tup
|
|
1442
|
+
else:
|
|
1443
|
+
castvals = [
|
|
1444
|
+
self.context.cast(self.builder, val, fromty, resty.dtype)
|
|
1445
|
+
for val, fromty in zip(itemvals, itemtys)
|
|
1446
|
+
]
|
|
1447
|
+
return self.context.build_list(self.builder, resty, castvals)
|
|
1448
|
+
|
|
1449
|
+
elif expr.op == "build_set":
|
|
1450
|
+
# Insert in reverse order, as Python does
|
|
1451
|
+
items = expr.items[::-1]
|
|
1452
|
+
itemvals = [self.loadvar(i.name) for i in items]
|
|
1453
|
+
itemtys = [self.typeof(i.name) for i in items]
|
|
1454
|
+
castvals = [
|
|
1455
|
+
self.context.cast(self.builder, val, fromty, resty.dtype)
|
|
1456
|
+
for val, fromty in zip(itemvals, itemtys)
|
|
1457
|
+
]
|
|
1458
|
+
return self.context.build_set(self.builder, resty, castvals)
|
|
1459
|
+
|
|
1460
|
+
elif expr.op == "build_map":
|
|
1461
|
+
items = expr.items
|
|
1462
|
+
keys, values = [], []
|
|
1463
|
+
key_types, value_types = [], []
|
|
1464
|
+
for k, v in items:
|
|
1465
|
+
key = self.loadvar(k.name)
|
|
1466
|
+
keytype = self.typeof(k.name)
|
|
1467
|
+
val = self.loadvar(v.name)
|
|
1468
|
+
valtype = self.typeof(v.name)
|
|
1469
|
+
keys.append(key)
|
|
1470
|
+
values.append(val)
|
|
1471
|
+
key_types.append(keytype)
|
|
1472
|
+
value_types.append(valtype)
|
|
1473
|
+
return self.context.build_map(
|
|
1474
|
+
self.builder,
|
|
1475
|
+
resty,
|
|
1476
|
+
list(zip(key_types, value_types)),
|
|
1477
|
+
list(zip(keys, values)),
|
|
1478
|
+
)
|
|
1479
|
+
|
|
1480
|
+
elif expr.op == "cast":
|
|
1481
|
+
val = self.loadvar(expr.value.name)
|
|
1482
|
+
ty = self.typeof(expr.value.name)
|
|
1483
|
+
castval = self.context.cast(self.builder, val, ty, resty)
|
|
1484
|
+
self.incref(resty, castval)
|
|
1485
|
+
return castval
|
|
1486
|
+
|
|
1487
|
+
elif expr.op == "phi":
|
|
1488
|
+
raise LoweringError("PHI not stripped")
|
|
1489
|
+
|
|
1490
|
+
elif expr.op == "null":
|
|
1491
|
+
return self.context.get_constant_null(resty)
|
|
1492
|
+
|
|
1493
|
+
elif expr.op == "undef":
|
|
1494
|
+
# Numba does not raise an UnboundLocalError for undefined variables.
|
|
1495
|
+
# The variable is set to zero.
|
|
1496
|
+
return self.context.get_constant_null(resty)
|
|
1497
|
+
|
|
1498
|
+
elif expr.op in self.context.special_ops:
|
|
1499
|
+
res = self.context.special_ops[expr.op](self, expr)
|
|
1500
|
+
return res
|
|
1501
|
+
|
|
1502
|
+
raise NotImplementedError(expr)
|
|
1503
|
+
|
|
1504
|
+
def _alloca_var(self, name, fetype):
|
|
1505
|
+
"""
|
|
1506
|
+
Ensure the given variable has an allocated stack slot (if needed).
|
|
1507
|
+
"""
|
|
1508
|
+
if name in self.varmap:
|
|
1509
|
+
# quit early
|
|
1510
|
+
return
|
|
1511
|
+
|
|
1512
|
+
# If the name is used in multiple blocks or lowering with debuginfo...
|
|
1513
|
+
if (
|
|
1514
|
+
name not in self._singly_assigned_vars
|
|
1515
|
+
) or self._disable_sroa_like_opt:
|
|
1516
|
+
# If not already defined, allocate it
|
|
1517
|
+
ptr = self.alloca(name, fetype)
|
|
1518
|
+
# Remember the pointer
|
|
1519
|
+
self.varmap[name] = ptr
|
|
1520
|
+
|
|
1521
|
+
def getvar(self, name):
|
|
1522
|
+
"""
|
|
1523
|
+
Get a pointer to the given variable's slot.
|
|
1524
|
+
"""
|
|
1525
|
+
if not self._disable_sroa_like_opt:
|
|
1526
|
+
assert name not in self._blk_local_varmap
|
|
1527
|
+
assert name not in self._singly_assigned_vars
|
|
1528
|
+
if name not in self.varmap:
|
|
1529
|
+
# Allocate undefined variable as needed.
|
|
1530
|
+
# NOTE: Py3.12 use of LOAD_FAST_AND_CLEAR will allow variable be
|
|
1531
|
+
# referenced before it is defined.
|
|
1532
|
+
self._alloca_var(name, self.typeof(name))
|
|
1533
|
+
return self.varmap[name]
|
|
1534
|
+
|
|
1535
|
+
def loadvar(self, name):
|
|
1536
|
+
"""
|
|
1537
|
+
Load the given variable's value.
|
|
1538
|
+
"""
|
|
1539
|
+
if name in self._blk_local_varmap and not self._disable_sroa_like_opt:
|
|
1540
|
+
return self._blk_local_varmap[name]
|
|
1541
|
+
ptr = self.getvar(name)
|
|
1542
|
+
|
|
1543
|
+
# Don't associate debuginfo with the load for a function arg else it
|
|
1544
|
+
# creates instructions ahead of the first source line of the
|
|
1545
|
+
# function which then causes problems with breaking on the function
|
|
1546
|
+
# symbol (it hits the symbol, not the first line).
|
|
1547
|
+
if name in self.func_ir.arg_names:
|
|
1548
|
+
with debuginfo.suspend_emission(self.builder):
|
|
1549
|
+
return self.builder.load(ptr)
|
|
1550
|
+
else:
|
|
1551
|
+
return self.builder.load(ptr)
|
|
1552
|
+
|
|
1553
|
+
def storevar(self, value, name, argidx=None):
|
|
1554
|
+
"""
|
|
1555
|
+
Store the value into the given variable.
|
|
1556
|
+
"""
|
|
1557
|
+
fetype = self.typeof(name)
|
|
1558
|
+
# Define if not already
|
|
1559
|
+
self._alloca_var(name, fetype)
|
|
1560
|
+
|
|
1561
|
+
# Store variable
|
|
1562
|
+
if (
|
|
1563
|
+
name in self._singly_assigned_vars
|
|
1564
|
+
and not self._disable_sroa_like_opt
|
|
1565
|
+
):
|
|
1566
|
+
self._blk_local_varmap[name] = value
|
|
1567
|
+
else:
|
|
1568
|
+
if argidx is None:
|
|
1569
|
+
# Clean up existing value stored in the variable, not needed
|
|
1570
|
+
# if it's an arg
|
|
1571
|
+
old = self.loadvar(name)
|
|
1572
|
+
self.decref(fetype, old)
|
|
1573
|
+
|
|
1574
|
+
# stack stored variable
|
|
1575
|
+
ptr = self.getvar(name)
|
|
1576
|
+
if value.type != ptr.type.pointee:
|
|
1577
|
+
msg = (
|
|
1578
|
+
"Storing {value.type} to ptr of {ptr.type.pointee} "
|
|
1579
|
+
"('{name}'). FE type {fetype}"
|
|
1580
|
+
).format(value=value, ptr=ptr, fetype=fetype, name=name)
|
|
1581
|
+
raise AssertionError(msg)
|
|
1582
|
+
|
|
1583
|
+
# If this store is associated with an argument to the function (i.e.
|
|
1584
|
+
# store following reassemble from CC splatting structs as many args
|
|
1585
|
+
# to the function) then mark this variable as such.
|
|
1586
|
+
if argidx is not None:
|
|
1587
|
+
with debuginfo.suspend_emission(self.builder):
|
|
1588
|
+
self.builder.store(value, ptr)
|
|
1589
|
+
loc = self.defn_loc # the line with `def <func>`
|
|
1590
|
+
lltype = self.context.get_value_type(fetype)
|
|
1591
|
+
sizeof = self.context.get_abi_sizeof(lltype)
|
|
1592
|
+
datamodel = self.context.data_model_manager[fetype]
|
|
1593
|
+
self.debuginfo.mark_variable(
|
|
1594
|
+
self.builder,
|
|
1595
|
+
ptr,
|
|
1596
|
+
name=name,
|
|
1597
|
+
lltype=lltype,
|
|
1598
|
+
size=sizeof,
|
|
1599
|
+
line=loc.line,
|
|
1600
|
+
datamodel=datamodel,
|
|
1601
|
+
argidx=argidx,
|
|
1602
|
+
)
|
|
1603
|
+
else:
|
|
1604
|
+
self.builder.store(value, ptr)
|
|
1605
|
+
|
|
1606
|
+
def delvar(self, name):
|
|
1607
|
+
"""
|
|
1608
|
+
Delete the given variable.
|
|
1609
|
+
"""
|
|
1610
|
+
fetype = self.typeof(name)
|
|
1611
|
+
|
|
1612
|
+
# Out-of-order
|
|
1613
|
+
if (
|
|
1614
|
+
name not in self._blk_local_varmap
|
|
1615
|
+
and not self._disable_sroa_like_opt
|
|
1616
|
+
):
|
|
1617
|
+
if name in self._singly_assigned_vars:
|
|
1618
|
+
self._singly_assigned_vars.discard(name)
|
|
1619
|
+
|
|
1620
|
+
# Define if not already (may happen if the variable is deleted
|
|
1621
|
+
# at the beginning of a loop, but only set later in the loop)
|
|
1622
|
+
self._alloca_var(name, fetype)
|
|
1623
|
+
|
|
1624
|
+
if name in self._blk_local_varmap and not self._disable_sroa_like_opt:
|
|
1625
|
+
llval = self._blk_local_varmap[name]
|
|
1626
|
+
self.decref(fetype, llval)
|
|
1627
|
+
else:
|
|
1628
|
+
ptr = self.getvar(name)
|
|
1629
|
+
self.decref(fetype, self.builder.load(ptr))
|
|
1630
|
+
# Zero-fill variable to avoid double frees on subsequent dels
|
|
1631
|
+
self.builder.store(llvm_ir.Constant(ptr.type.pointee, None), ptr)
|
|
1632
|
+
|
|
1633
|
+
def alloca(self, name, type):
|
|
1634
|
+
lltype = self.context.get_value_type(type)
|
|
1635
|
+
datamodel = self.context.data_model_manager[type]
|
|
1636
|
+
return self.alloca_lltype(name, lltype, datamodel=datamodel)
|
|
1637
|
+
|
|
1638
|
+
def alloca_lltype(self, name, lltype, datamodel=None):
|
|
1639
|
+
# Is user variable?
|
|
1640
|
+
is_uservar = not name.startswith("$")
|
|
1641
|
+
# Allocate space for variable
|
|
1642
|
+
aptr = cgutils.alloca_once(self.builder, lltype, name=name, zfill=False)
|
|
1643
|
+
|
|
1644
|
+
# Emit debug info for user variable
|
|
1645
|
+
if is_uservar:
|
|
1646
|
+
# Don't associate debuginfo with the alloca for a function arg, this
|
|
1647
|
+
# is handled by the first store to the alloca so that repacking the
|
|
1648
|
+
# splatted args from the CC is dealt with.
|
|
1649
|
+
if name not in self.func_ir.arg_names:
|
|
1650
|
+
sizeof = self.context.get_abi_sizeof(lltype)
|
|
1651
|
+
self.debuginfo.mark_variable(
|
|
1652
|
+
self.builder,
|
|
1653
|
+
aptr,
|
|
1654
|
+
name=name,
|
|
1655
|
+
lltype=lltype,
|
|
1656
|
+
size=sizeof,
|
|
1657
|
+
line=self.loc.line,
|
|
1658
|
+
datamodel=datamodel,
|
|
1659
|
+
)
|
|
1660
|
+
return aptr
|
|
1661
|
+
|
|
1662
|
+
def incref(self, typ, val):
|
|
1663
|
+
if not self.context.enable_nrt:
|
|
1664
|
+
return
|
|
1665
|
+
|
|
1666
|
+
self.context.nrt.incref(self.builder, typ, val)
|
|
1667
|
+
|
|
1668
|
+
def decref(self, typ, val):
|
|
1669
|
+
if not self.context.enable_nrt:
|
|
1670
|
+
return
|
|
1671
|
+
|
|
1672
|
+
# do not associate decref with "use", it creates "jumpy" line info as
|
|
1673
|
+
# the decrefs are usually where the ir.Del nodes are, which is at the
|
|
1674
|
+
# end of the block.
|
|
1675
|
+
with debuginfo.suspend_emission(self.builder):
|
|
1676
|
+
self.context.nrt.decref(self.builder, typ, val)
|
|
1677
|
+
|
|
1678
|
+
|
|
1679
|
+
class CUDALower(Lower):
|
|
1680
|
+
def storevar(self, value, name, argidx=None):
|
|
1681
|
+
"""
|
|
1682
|
+
Store the value into the given variable.
|
|
1683
|
+
"""
|
|
1684
|
+
# Handle polymorphic variables with CUDA_DEBUG_POLY enabled
|
|
1685
|
+
if config.CUDA_DEBUG_POLY:
|
|
1686
|
+
src_name = name.split(".")[0]
|
|
1687
|
+
if src_name in self.poly_var_typ_map:
|
|
1688
|
+
# Ensure allocation happens first (if needed)
|
|
1689
|
+
fetype = self.typeof(name)
|
|
1690
|
+
self._alloca_var(name, fetype)
|
|
1691
|
+
# Discriminant and data are located in the same union
|
|
1692
|
+
ptr = self.poly_var_loc_map[src_name]
|
|
1693
|
+
# Firstly write discriminant to the beginning of union as i8
|
|
1694
|
+
dtype = types.UnionType(self.poly_var_typ_map[src_name])
|
|
1695
|
+
# Compute discriminant = index of type in sorted union
|
|
1696
|
+
if isinstance(fetype, types.Literal):
|
|
1697
|
+
lookup_type = fetype.literal_type
|
|
1698
|
+
else:
|
|
1699
|
+
lookup_type = fetype
|
|
1700
|
+
discriminant_val = list(dtype.types).index(lookup_type)
|
|
1701
|
+
# Bitcast union pointer directly to i8* and write
|
|
1702
|
+
# discriminant at offset 0
|
|
1703
|
+
discriminant_ptr = self.builder.bitcast(
|
|
1704
|
+
ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
|
|
1705
|
+
)
|
|
1706
|
+
discriminant_i8 = llvm_ir.Constant(
|
|
1707
|
+
llvm_ir.IntType(8), discriminant_val
|
|
1708
|
+
)
|
|
1709
|
+
self.builder.store(discriminant_i8, discriminant_ptr)
|
|
1710
|
+
# Secondly write data at offset = sizeof(fetype) in bytes
|
|
1711
|
+
lltype = self.context.get_value_type(fetype)
|
|
1712
|
+
sizeof_bytes = self.context.get_abi_sizeof(lltype)
|
|
1713
|
+
# Bitcast to i8* and use byte-level GEP
|
|
1714
|
+
byte_ptr = self.builder.bitcast(
|
|
1715
|
+
ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
|
|
1716
|
+
)
|
|
1717
|
+
data_byte_ptr = self.builder.gep(
|
|
1718
|
+
byte_ptr,
|
|
1719
|
+
[llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
|
|
1720
|
+
)
|
|
1721
|
+
# Cast to the correct type pointer
|
|
1722
|
+
castptr = self.builder.bitcast(
|
|
1723
|
+
data_byte_ptr, llvm_ir.PointerType(lltype)
|
|
1724
|
+
)
|
|
1725
|
+
self.builder.store(value, castptr)
|
|
1726
|
+
return
|
|
1727
|
+
|
|
1728
|
+
# For non-polymorphic variables, use parent implementation
|
|
1729
|
+
super().storevar(value, name, argidx)
|
|
1730
|
+
|
|
1731
|
+
# Emit llvm.dbg.value instead of llvm.dbg.declare for local scalar
|
|
1732
|
+
# variables immediately after a store instruction.
|
|
1733
|
+
if (
|
|
1734
|
+
self.context.enable_debuginfo
|
|
1735
|
+
# Conditions used to elide stores in parent method
|
|
1736
|
+
and self.store_var_needed(name)
|
|
1737
|
+
):
|
|
1738
|
+
fetype = self.typeof(name)
|
|
1739
|
+
lltype = self.context.get_value_type(fetype)
|
|
1740
|
+
int_type = (llvm_ir.IntType,)
|
|
1741
|
+
real_type = llvm_ir.FloatType, llvm_ir.DoubleType
|
|
1742
|
+
if isinstance(lltype, int_type + real_type):
|
|
1743
|
+
sizeof = self.context.get_abi_sizeof(lltype)
|
|
1744
|
+
datamodel = self.context.data_model_manager[fetype]
|
|
1745
|
+
line = self.loc.line if argidx is None else self.defn_loc.line
|
|
1746
|
+
if not name.startswith("$"):
|
|
1747
|
+
# Emit debug value for user variable
|
|
1748
|
+
src_name = name.split(".")[0]
|
|
1749
|
+
if src_name not in self.poly_var_typ_map:
|
|
1750
|
+
# Insert the llvm.dbg.value intrinsic call
|
|
1751
|
+
self.debuginfo.update_variable(
|
|
1752
|
+
self.builder,
|
|
1753
|
+
value,
|
|
1754
|
+
src_name,
|
|
1755
|
+
lltype,
|
|
1756
|
+
sizeof,
|
|
1757
|
+
line,
|
|
1758
|
+
datamodel,
|
|
1759
|
+
argidx,
|
|
1760
|
+
)
|
|
1761
|
+
elif isinstance(value, llvm_ir.LoadInstr):
|
|
1762
|
+
# Emit debug value for user variable that falls out of the
|
|
1763
|
+
# coverage of dbg.value range per basic block
|
|
1764
|
+
ld_name = value.operands[0].name
|
|
1765
|
+
if not ld_name.startswith(("$", ".")):
|
|
1766
|
+
src_name = ld_name.split(".")[0]
|
|
1767
|
+
if (
|
|
1768
|
+
src_name not in self.poly_var_typ_map
|
|
1769
|
+
# Not yet covered by the dbg.value range
|
|
1770
|
+
and src_name not in self.dbg_val_names
|
|
1771
|
+
):
|
|
1772
|
+
for index, item in enumerate(self.fnargs):
|
|
1773
|
+
if item.name == src_name:
|
|
1774
|
+
argidx = index + 1
|
|
1775
|
+
break
|
|
1776
|
+
# Insert the llvm.dbg.value intrinsic call
|
|
1777
|
+
self.debuginfo.update_variable(
|
|
1778
|
+
self.builder,
|
|
1779
|
+
value,
|
|
1780
|
+
src_name,
|
|
1781
|
+
lltype,
|
|
1782
|
+
sizeof,
|
|
1783
|
+
line,
|
|
1784
|
+
datamodel,
|
|
1785
|
+
argidx,
|
|
1786
|
+
)
|
|
1787
|
+
|
|
1788
|
+
def pre_block(self, block):
|
|
1789
|
+
super().pre_block(block)
|
|
1790
|
+
|
|
1791
|
+
# dbg.value range covered names
|
|
1792
|
+
self.dbg_val_names = set()
|
|
1793
|
+
|
|
1794
|
+
if self.context.enable_debuginfo and self._disable_sroa_like_opt:
|
|
1795
|
+
for x in block.find_insts(ir.Assign):
|
|
1796
|
+
if x.target.name.startswith("$"):
|
|
1797
|
+
continue
|
|
1798
|
+
ssa_name = x.target.name
|
|
1799
|
+
src_name = ssa_name.split(".")[0]
|
|
1800
|
+
if src_name not in self.dbg_val_names:
|
|
1801
|
+
self.dbg_val_names.add(src_name)
|
|
1802
|
+
|
|
1803
|
+
def pre_lower(self):
|
|
1804
|
+
"""
|
|
1805
|
+
Called before lowering all blocks.
|
|
1806
|
+
"""
|
|
1807
|
+
super().pre_lower()
|
|
1808
|
+
|
|
1809
|
+
self.poly_var_typ_map = {}
|
|
1810
|
+
self.poly_var_loc_map = {}
|
|
1811
|
+
self.poly_var_set = set()
|
|
1812
|
+
self.poly_cleaned = False
|
|
1813
|
+
self.lastblk = max(self.blocks.keys())
|
|
1814
|
+
|
|
1815
|
+
# When debug info is enabled, walk through function body and mark
|
|
1816
|
+
# variables with polymorphic types.
|
|
1817
|
+
if self.context.enable_debuginfo and self._disable_sroa_like_opt:
|
|
1818
|
+
poly_map = {}
|
|
1819
|
+
# pre-scan all blocks
|
|
1820
|
+
for block in self.blocks.values():
|
|
1821
|
+
for x in block.find_insts(ir.Assign):
|
|
1822
|
+
if x.target.name.startswith("$"):
|
|
1823
|
+
continue
|
|
1824
|
+
ssa_name = x.target.name
|
|
1825
|
+
src_name = ssa_name.split(".")[0]
|
|
1826
|
+
# Check all the multi-versioned targets
|
|
1827
|
+
if len(x.target.versioned_names) > 0:
|
|
1828
|
+
fetype = self.typeof(ssa_name)
|
|
1829
|
+
if src_name not in poly_map:
|
|
1830
|
+
poly_map[src_name] = set()
|
|
1831
|
+
# deduplicate polymorphic types
|
|
1832
|
+
if isinstance(fetype, types.Literal):
|
|
1833
|
+
fetype = fetype.literal_type
|
|
1834
|
+
poly_map[src_name].add(fetype)
|
|
1835
|
+
# Filter out multi-versioned but single typed variables
|
|
1836
|
+
self.poly_var_typ_map = {
|
|
1837
|
+
k: v for k, v in poly_map.items() if len(v) > 1
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
def _alloca_var(self, name, fetype):
|
|
1841
|
+
"""
|
|
1842
|
+
Ensure the given variable has an allocated stack slot (if needed).
|
|
1843
|
+
"""
|
|
1844
|
+
# If the name is not handled yet and a store is needed
|
|
1845
|
+
if name not in self.varmap and self.store_var_needed(name):
|
|
1846
|
+
src_name = name.split(".")[0]
|
|
1847
|
+
if src_name in self.poly_var_typ_map:
|
|
1848
|
+
self.poly_var_set.add(name)
|
|
1849
|
+
if src_name not in self.poly_var_loc_map:
|
|
1850
|
+
dtype = types.UnionType(self.poly_var_typ_map[src_name])
|
|
1851
|
+
datamodel = self.context.data_model_manager[dtype]
|
|
1852
|
+
# UnionType has sorted set of types, max at last index
|
|
1853
|
+
maxsizetype = dtype.types[-1]
|
|
1854
|
+
if config.CUDA_DEBUG_POLY:
|
|
1855
|
+
# allocate double the max element size to house
|
|
1856
|
+
# [discriminant + data]
|
|
1857
|
+
aggr_type = types.UniTuple(maxsizetype, 2)
|
|
1858
|
+
else:
|
|
1859
|
+
# allocate single element for data only
|
|
1860
|
+
aggr_type = types.UniTuple(maxsizetype, 1)
|
|
1861
|
+
lltype = self.context.get_value_type(aggr_type)
|
|
1862
|
+
ptr = self.alloca_lltype(src_name, lltype, datamodel)
|
|
1863
|
+
# save the location of the union type for polymorphic var
|
|
1864
|
+
self.poly_var_loc_map[src_name] = ptr
|
|
1865
|
+
return
|
|
1866
|
+
|
|
1867
|
+
super()._alloca_var(name, fetype)
|
|
1868
|
+
|
|
1869
|
+
def store_var_needed(self, name):
|
|
1870
|
+
# Check the conditions used to elide stores in parent class,
|
|
1871
|
+
# e.g. in method storevar() and _alloca_var()
|
|
1872
|
+
return (
|
|
1873
|
+
# used in multiple blocks
|
|
1874
|
+
name not in self._singly_assigned_vars
|
|
1875
|
+
# lowering with debuginfo
|
|
1876
|
+
or self._disable_sroa_like_opt
|
|
1877
|
+
)
|
|
1878
|
+
|
|
1879
|
+
def delvar(self, name):
|
|
1880
|
+
"""
|
|
1881
|
+
Delete the given variable.
|
|
1882
|
+
"""
|
|
1883
|
+
if name in self.poly_var_set:
|
|
1884
|
+
fetype = self.typeof(name)
|
|
1885
|
+
src_name = name.split(".")[0]
|
|
1886
|
+
ptr = self.poly_var_loc_map[src_name]
|
|
1887
|
+
self.decref(fetype, self.builder.load(ptr))
|
|
1888
|
+
if (
|
|
1889
|
+
self._cur_ir_block == self.blocks[self.lastblk]
|
|
1890
|
+
and not self.poly_cleaned
|
|
1891
|
+
):
|
|
1892
|
+
# Zero-fill the debug union for polymorphic only
|
|
1893
|
+
# at the last block
|
|
1894
|
+
for v in self.poly_var_loc_map.values():
|
|
1895
|
+
self.builder.store(
|
|
1896
|
+
llvm_ir.Constant(v.type.pointee, None), v
|
|
1897
|
+
)
|
|
1898
|
+
self.poly_cleaned = True
|
|
1899
|
+
return
|
|
1900
|
+
|
|
1901
|
+
super().delvar(name)
|
|
1902
|
+
|
|
1903
|
+
def getvar(self, name):
|
|
1904
|
+
"""
|
|
1905
|
+
Get a pointer to the given variable's slot.
|
|
1906
|
+
"""
|
|
1907
|
+
if name in self.poly_var_set:
|
|
1908
|
+
src_name = name.split(".")[0]
|
|
1909
|
+
fetype = self.typeof(name)
|
|
1910
|
+
lltype = self.context.get_value_type(fetype)
|
|
1911
|
+
ptr = self.poly_var_loc_map[src_name]
|
|
1912
|
+
|
|
1913
|
+
if config.CUDA_DEBUG_POLY:
|
|
1914
|
+
# With CUDA_DEBUG_POLY enabled, read value at
|
|
1915
|
+
# offset = sizeof(fetype) in bytes
|
|
1916
|
+
sizeof_bytes = self.context.get_abi_sizeof(lltype)
|
|
1917
|
+
# Bitcast to i8* and use byte-level GEP
|
|
1918
|
+
byte_ptr = self.builder.bitcast(
|
|
1919
|
+
ptr, llvm_ir.PointerType(llvm_ir.IntType(8))
|
|
1920
|
+
)
|
|
1921
|
+
value_byte_ptr = self.builder.gep(
|
|
1922
|
+
byte_ptr,
|
|
1923
|
+
[llvm_ir.Constant(llvm_ir.IntType(64), sizeof_bytes)],
|
|
1924
|
+
)
|
|
1925
|
+
# Cast to the correct type pointer
|
|
1926
|
+
castptr = self.builder.bitcast(
|
|
1927
|
+
value_byte_ptr, llvm_ir.PointerType(lltype)
|
|
1928
|
+
)
|
|
1929
|
+
else:
|
|
1930
|
+
# Otherwise, just bitcast to the correct type
|
|
1931
|
+
castptr = self.builder.bitcast(ptr, llvm_ir.PointerType(lltype))
|
|
1932
|
+
return castptr
|
|
1933
|
+
else:
|
|
1934
|
+
return super().getvar(name)
|
|
1935
|
+
|
|
1936
|
+
|
|
1937
|
+
def _lit_or_omitted(value):
|
|
1938
|
+
"""Returns a Literal instance if the type of value is supported;
|
|
1939
|
+
otherwise, return `Omitted(value)`.
|
|
1940
|
+
"""
|
|
1941
|
+
typing_errors = LiteralTypingError
|
|
1942
|
+
if HAS_NUMBA:
|
|
1943
|
+
from numba.core.errors import (
|
|
1944
|
+
LiteralTypingError as CoreLiteralTypingError,
|
|
1945
|
+
)
|
|
1946
|
+
|
|
1947
|
+
typing_errors = (LiteralTypingError, CoreLiteralTypingError)
|
|
1948
|
+
try:
|
|
1949
|
+
return types.literal(value)
|
|
1950
|
+
except typing_errors:
|
|
1951
|
+
return types.Omitted(value)
|