numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from abc import abstractmethod, ABCMeta
|
|
2
5
|
import itertools
|
|
3
6
|
import numba
|
|
@@ -14,8 +17,8 @@ import sys
|
|
|
14
17
|
from numba.misc.appdirs import AppDirs
|
|
15
18
|
from pathlib import Path
|
|
16
19
|
|
|
17
|
-
from numba.core import config
|
|
18
|
-
from numba.
|
|
20
|
+
from numba.cuda.core import config
|
|
21
|
+
from numba.cuda.serialize import dumps
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
def _cache_log(msg, *args):
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
2
3
|
|
|
3
|
-
from numba.core import
|
|
4
|
+
from numba.cuda.core.tracing import event
|
|
5
|
+
|
|
6
|
+
from numba.cuda.core import bytecode
|
|
7
|
+
from numba.core import callconv, errors
|
|
8
|
+
from numba.cuda.core import config
|
|
4
9
|
from numba.core.errors import CompilerError
|
|
5
|
-
from numba.parfors.parfor import ParforDiagnostics
|
|
6
10
|
|
|
7
|
-
from numba.core.untyped_passes import ExtractByteCode, FixupArgs
|
|
8
|
-
from numba.core.targetconfig import ConfigStack
|
|
11
|
+
from numba.cuda.core.untyped_passes import ExtractByteCode, FixupArgs
|
|
12
|
+
from numba.cuda.core.targetconfig import ConfigStack
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class _CompileStatus(object):
|
|
@@ -62,8 +66,6 @@ def _make_subtarget(targetctx, flags):
|
|
|
62
66
|
subtargetoptions["enable_boundscheck"] = True
|
|
63
67
|
if flags.nrt:
|
|
64
68
|
subtargetoptions["enable_nrt"] = True
|
|
65
|
-
if flags.auto_parallel:
|
|
66
|
-
subtargetoptions["auto_parallel"] = flags.auto_parallel
|
|
67
69
|
if flags.fastmath:
|
|
68
70
|
subtargetoptions["fastmath"] = flags.fastmath
|
|
69
71
|
error_model = callconv.create_error_model(flags.error_model, targetctx)
|
|
@@ -110,13 +112,6 @@ class CompilerBase(object):
|
|
|
110
112
|
# hold this for e.g. with_lifting, null out on exit
|
|
111
113
|
self.state.pipeline = self
|
|
112
114
|
|
|
113
|
-
# parfor diagnostics info, add to metadata
|
|
114
|
-
self.state.parfor_diagnostics = ParforDiagnostics()
|
|
115
|
-
self.state.metadata["parfor_diagnostics"] = (
|
|
116
|
-
self.state.parfor_diagnostics
|
|
117
|
-
)
|
|
118
|
-
self.state.metadata["parfors"] = {}
|
|
119
|
-
|
|
120
115
|
self.state.status = _CompileStatus(
|
|
121
116
|
can_fallback=self.state.flags.enable_pyobject
|
|
122
117
|
)
|
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import timeit
|
|
5
|
+
from abc import abstractmethod, ABCMeta
|
|
6
|
+
from collections import namedtuple, OrderedDict
|
|
7
|
+
import inspect
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from numba.core.compiler_lock import global_compiler_lock
|
|
11
|
+
from numba.core import errors
|
|
12
|
+
from numba.cuda.core import config
|
|
13
|
+
from numba.cuda import utils
|
|
14
|
+
from numba.cuda.core import transforms
|
|
15
|
+
from numba.cuda.core.tracing import event
|
|
16
|
+
from numba.cuda.core.postproc import PostProcessor
|
|
17
|
+
from numba.cuda.core.ir_utils import enforce_no_dels, legalize_single_scope
|
|
18
|
+
import numba.cuda.core.event as ev
|
|
19
|
+
|
|
20
|
+
import numba.cuda.core.compiler_machinery as nccm
|
|
21
|
+
|
|
22
|
+
# terminal color markup
|
|
23
|
+
_termcolor = errors.termcolor()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SimpleTimer(object):
|
|
27
|
+
"""
|
|
28
|
+
A simple context managed timer
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __enter__(self):
|
|
32
|
+
self.ts = timeit.default_timer()
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
def __exit__(self, *exc):
|
|
36
|
+
self.elapsed = timeit.default_timer() - self.ts
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CompilerPass(metaclass=ABCMeta):
|
|
40
|
+
"""The base class for all compiler passes."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def __init__(self, *args, **kwargs):
|
|
44
|
+
self._analysis = None
|
|
45
|
+
self._pass_id = None
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def name(cls):
|
|
49
|
+
"""
|
|
50
|
+
Returns the name of the pass
|
|
51
|
+
"""
|
|
52
|
+
return cls._name
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def pass_id(self):
|
|
56
|
+
"""
|
|
57
|
+
The ID of the pass
|
|
58
|
+
"""
|
|
59
|
+
return self._pass_id
|
|
60
|
+
|
|
61
|
+
@pass_id.setter
|
|
62
|
+
def pass_id(self, val):
|
|
63
|
+
"""
|
|
64
|
+
Sets the ID of the pass
|
|
65
|
+
"""
|
|
66
|
+
self._pass_id = val
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def analysis(self):
|
|
70
|
+
"""
|
|
71
|
+
Analysis data for the pass
|
|
72
|
+
"""
|
|
73
|
+
return self._analysis
|
|
74
|
+
|
|
75
|
+
@analysis.setter
|
|
76
|
+
def analysis(self, val):
|
|
77
|
+
"""
|
|
78
|
+
Set the analysis data for the pass
|
|
79
|
+
"""
|
|
80
|
+
self._analysis = val
|
|
81
|
+
|
|
82
|
+
def run_initialization(self, *args, **kwargs):
|
|
83
|
+
"""
|
|
84
|
+
Runs the initialization sequence for the pass, will run before
|
|
85
|
+
`run_pass`.
|
|
86
|
+
"""
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
@abstractmethod
|
|
90
|
+
def run_pass(self, *args, **kwargs):
|
|
91
|
+
"""
|
|
92
|
+
Runs the pass itself. Must return True/False depending on whether
|
|
93
|
+
statement level modification took place.
|
|
94
|
+
"""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
def run_finalizer(self, *args, **kwargs):
|
|
98
|
+
"""
|
|
99
|
+
Runs the initialization sequence for the pass, will run before
|
|
100
|
+
`run_pass`.
|
|
101
|
+
"""
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def get_analysis_usage(self, AU):
|
|
105
|
+
"""Override to set analysis usage"""
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
def get_analysis(self, pass_name):
|
|
109
|
+
"""
|
|
110
|
+
Gets the analysis from a given pass
|
|
111
|
+
"""
|
|
112
|
+
return self._analysis[pass_name]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class SSACompliantMixin(object):
|
|
116
|
+
"""Mixin to indicate a pass is SSA form compliant. Nothing is asserted
|
|
117
|
+
about this condition at present.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class FunctionPass(CompilerPass):
|
|
124
|
+
"""Base class for function passes"""
|
|
125
|
+
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class AnalysisPass(CompilerPass):
|
|
130
|
+
"""Base class for analysis passes (no modification made to state)"""
|
|
131
|
+
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class LoweringPass(CompilerPass):
|
|
136
|
+
"""Base class for lowering passes"""
|
|
137
|
+
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class AnalysisUsage(object):
|
|
142
|
+
"""This looks and behaves like LLVM's AnalysisUsage because its like that."""
|
|
143
|
+
|
|
144
|
+
def __init__(self):
|
|
145
|
+
self._required = set()
|
|
146
|
+
self._preserved = set()
|
|
147
|
+
|
|
148
|
+
def get_required_set(self):
|
|
149
|
+
return self._required
|
|
150
|
+
|
|
151
|
+
def get_preserved_set(self):
|
|
152
|
+
return self._preserved
|
|
153
|
+
|
|
154
|
+
def add_required(self, pss):
|
|
155
|
+
self._required.add(pss)
|
|
156
|
+
|
|
157
|
+
def add_preserved(self, pss):
|
|
158
|
+
self._preserved.add(pss)
|
|
159
|
+
|
|
160
|
+
def __str__(self):
|
|
161
|
+
return "required: %s\n" % self._required
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
_DEBUG = False
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def debug_print(*args, **kwargs):
|
|
168
|
+
if _DEBUG:
|
|
169
|
+
print(*args, **kwargs)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
pass_timings = namedtuple("pass_timings", "init run finalize")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class PassManager(object):
|
|
176
|
+
"""
|
|
177
|
+
The PassManager is a named instance of a particular compilation pipeline
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
# TODO: Eventually enable this, it enforces self consistency after each pass
|
|
181
|
+
_ENFORCING = False
|
|
182
|
+
|
|
183
|
+
def __init__(self, pipeline_name):
|
|
184
|
+
"""
|
|
185
|
+
Create a new pipeline with name "pipeline_name"
|
|
186
|
+
"""
|
|
187
|
+
self.passes = []
|
|
188
|
+
self.exec_times = OrderedDict()
|
|
189
|
+
self._finalized = False
|
|
190
|
+
self._analysis = None
|
|
191
|
+
self._print_after = None
|
|
192
|
+
self.pipeline_name = pipeline_name
|
|
193
|
+
|
|
194
|
+
def _validate_pass(self, pass_cls):
|
|
195
|
+
if not (
|
|
196
|
+
isinstance(pass_cls, str)
|
|
197
|
+
or (
|
|
198
|
+
inspect.isclass(pass_cls)
|
|
199
|
+
and (
|
|
200
|
+
issubclass(pass_cls, CompilerPass)
|
|
201
|
+
or issubclass(pass_cls, nccm.CompilerPass)
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
):
|
|
205
|
+
msg = (
|
|
206
|
+
"Pass must be referenced by name or be a subclass of a "
|
|
207
|
+
"CompilerPass. Have %s" % pass_cls
|
|
208
|
+
)
|
|
209
|
+
raise TypeError(msg)
|
|
210
|
+
if isinstance(pass_cls, str):
|
|
211
|
+
pass_cls = _pass_registry.find_by_name(pass_cls)
|
|
212
|
+
else:
|
|
213
|
+
if not _pass_registry.is_registered(pass_cls):
|
|
214
|
+
raise ValueError("Pass %s is not registered" % pass_cls)
|
|
215
|
+
|
|
216
|
+
def add_pass(self, pss, description=""):
|
|
217
|
+
"""
|
|
218
|
+
Append a pass to the PassManager's compilation pipeline
|
|
219
|
+
"""
|
|
220
|
+
self._validate_pass(pss)
|
|
221
|
+
func_desc_tuple = (pss, description)
|
|
222
|
+
self.passes.append(func_desc_tuple)
|
|
223
|
+
self._finalized = False
|
|
224
|
+
|
|
225
|
+
def add_pass_after(self, pass_cls, location):
|
|
226
|
+
"""
|
|
227
|
+
Add a pass `pass_cls` to the PassManager's compilation pipeline after
|
|
228
|
+
the pass `location`.
|
|
229
|
+
"""
|
|
230
|
+
assert self.passes
|
|
231
|
+
self._validate_pass(pass_cls)
|
|
232
|
+
self._validate_pass(location)
|
|
233
|
+
for idx, (x, _) in enumerate(self.passes):
|
|
234
|
+
if x == location:
|
|
235
|
+
break
|
|
236
|
+
else:
|
|
237
|
+
raise ValueError("Could not find pass %s" % location)
|
|
238
|
+
self.passes.insert(idx + 1, (pass_cls, str(pass_cls)))
|
|
239
|
+
# if a pass has been added, it's not finalized
|
|
240
|
+
self._finalized = False
|
|
241
|
+
|
|
242
|
+
def _debug_init(self):
|
|
243
|
+
# determine after which passes IR dumps should take place
|
|
244
|
+
def parse(conf_item):
|
|
245
|
+
print_passes = []
|
|
246
|
+
if conf_item != "none":
|
|
247
|
+
if conf_item == "all":
|
|
248
|
+
print_passes = [x.name() for (x, _) in self.passes]
|
|
249
|
+
else:
|
|
250
|
+
# we don't validate whether the named passes exist in this
|
|
251
|
+
# pipeline the compiler may be used reentrantly and
|
|
252
|
+
# different pipelines may contain different passes
|
|
253
|
+
splitted = conf_item.split(",")
|
|
254
|
+
print_passes = [x.strip() for x in splitted]
|
|
255
|
+
return print_passes
|
|
256
|
+
|
|
257
|
+
ret = (
|
|
258
|
+
parse(config.DEBUG_PRINT_AFTER),
|
|
259
|
+
parse(config.DEBUG_PRINT_BEFORE),
|
|
260
|
+
parse(config.DEBUG_PRINT_WRAP),
|
|
261
|
+
)
|
|
262
|
+
return ret
|
|
263
|
+
|
|
264
|
+
def finalize(self):
|
|
265
|
+
"""
|
|
266
|
+
Finalize the PassManager, after which no more passes may be added
|
|
267
|
+
without re-finalization.
|
|
268
|
+
"""
|
|
269
|
+
self._analysis = self.dependency_analysis()
|
|
270
|
+
self._print_after, self._print_before, self._print_wrap = (
|
|
271
|
+
self._debug_init()
|
|
272
|
+
)
|
|
273
|
+
self._finalized = True
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def finalized(self):
|
|
277
|
+
return self._finalized
|
|
278
|
+
|
|
279
|
+
def _patch_error(self, desc, exc):
|
|
280
|
+
"""
|
|
281
|
+
Patches the error to show the stage that it arose in.
|
|
282
|
+
"""
|
|
283
|
+
newmsg = "{desc}\n{exc}".format(desc=desc, exc=exc)
|
|
284
|
+
exc.args = (newmsg,)
|
|
285
|
+
return exc
|
|
286
|
+
|
|
287
|
+
@global_compiler_lock # this need a lock, likely calls LLVM
|
|
288
|
+
def _runPass(self, index, pss, internal_state):
|
|
289
|
+
mutated = False
|
|
290
|
+
|
|
291
|
+
def check(func, compiler_state):
|
|
292
|
+
mangled = func(compiler_state)
|
|
293
|
+
if mangled not in (True, False):
|
|
294
|
+
msg = (
|
|
295
|
+
"CompilerPass implementations should return True/False. "
|
|
296
|
+
"CompilerPass with name '%s' did not."
|
|
297
|
+
)
|
|
298
|
+
raise ValueError(msg % pss.name())
|
|
299
|
+
return mangled
|
|
300
|
+
|
|
301
|
+
def debug_print(pass_name, print_condition, printable_condition):
|
|
302
|
+
if pass_name in print_condition:
|
|
303
|
+
fid = internal_state.func_id
|
|
304
|
+
args = (
|
|
305
|
+
fid.modname,
|
|
306
|
+
fid.func_qualname,
|
|
307
|
+
self.pipeline_name,
|
|
308
|
+
printable_condition,
|
|
309
|
+
pass_name,
|
|
310
|
+
)
|
|
311
|
+
print(("%s.%s: %s: %s %s" % args).center(120, "-"))
|
|
312
|
+
if internal_state.func_ir is not None:
|
|
313
|
+
internal_state.func_ir.dump()
|
|
314
|
+
else:
|
|
315
|
+
print("func_ir is None")
|
|
316
|
+
|
|
317
|
+
# debug print before this pass?
|
|
318
|
+
debug_print(pss.name(), self._print_before + self._print_wrap, "BEFORE")
|
|
319
|
+
|
|
320
|
+
# wire in the analysis info so it's accessible
|
|
321
|
+
pss.analysis = self._analysis
|
|
322
|
+
|
|
323
|
+
qualname = internal_state.func_id.func_qualname
|
|
324
|
+
|
|
325
|
+
ev_details = dict(
|
|
326
|
+
name=f"{pss.name()} [{qualname}]",
|
|
327
|
+
qualname=qualname,
|
|
328
|
+
module=internal_state.func_id.modname,
|
|
329
|
+
flags=utils._lazy_pformat(internal_state.flags.values()),
|
|
330
|
+
args=str(internal_state.args),
|
|
331
|
+
return_type=str(internal_state.return_type),
|
|
332
|
+
)
|
|
333
|
+
errctx = errors.new_error_context(f"Pass {pss.name()}")
|
|
334
|
+
with ev.trigger_event("numba-cuda:run_pass", data=ev_details), errctx:
|
|
335
|
+
with SimpleTimer() as init_time:
|
|
336
|
+
mutated |= check(pss.run_initialization, internal_state)
|
|
337
|
+
with SimpleTimer() as pass_time:
|
|
338
|
+
mutated |= check(pss.run_pass, internal_state)
|
|
339
|
+
with SimpleTimer() as finalize_time:
|
|
340
|
+
mutated |= check(pss.run_finalizer, internal_state)
|
|
341
|
+
|
|
342
|
+
# Check that if the pass is an instance of a FunctionPass that it hasn't
|
|
343
|
+
# emitted ir.Dels.
|
|
344
|
+
if isinstance(pss, FunctionPass):
|
|
345
|
+
enforce_no_dels(internal_state.func_ir)
|
|
346
|
+
|
|
347
|
+
if self._ENFORCING:
|
|
348
|
+
# TODO: Add in self consistency enforcement for
|
|
349
|
+
# `func_ir._definitions` etc
|
|
350
|
+
if _pass_registry.get(pss.__class__).mutates_CFG:
|
|
351
|
+
if mutated: # block level changes, rebuild all
|
|
352
|
+
PostProcessor(internal_state.func_ir).run()
|
|
353
|
+
else: # CFG level changes rebuild CFG
|
|
354
|
+
internal_state.func_ir.blocks = transforms.canonicalize_cfg(
|
|
355
|
+
internal_state.func_ir.blocks
|
|
356
|
+
)
|
|
357
|
+
# Check the func_ir has exactly one Scope instance
|
|
358
|
+
if not legalize_single_scope(internal_state.func_ir.blocks):
|
|
359
|
+
raise errors.CompilerError(
|
|
360
|
+
f"multiple scope in func_ir detected in {pss}",
|
|
361
|
+
)
|
|
362
|
+
# inject runtimes
|
|
363
|
+
pt = pass_timings(
|
|
364
|
+
init_time.elapsed, pass_time.elapsed, finalize_time.elapsed
|
|
365
|
+
)
|
|
366
|
+
self.exec_times["%s_%s" % (index, pss.name())] = pt
|
|
367
|
+
|
|
368
|
+
# debug print after this pass?
|
|
369
|
+
debug_print(pss.name(), self._print_after + self._print_wrap, "AFTER")
|
|
370
|
+
|
|
371
|
+
def run(self, state):
|
|
372
|
+
"""
|
|
373
|
+
Run the defined pipelines on the state.
|
|
374
|
+
"""
|
|
375
|
+
from numba.cuda.core.compiler import _EarlyPipelineCompletion
|
|
376
|
+
|
|
377
|
+
if not self.finalized:
|
|
378
|
+
raise RuntimeError("Cannot run non-finalised pipeline")
|
|
379
|
+
|
|
380
|
+
# walk the passes and run them
|
|
381
|
+
for idx, (pss, pass_desc) in enumerate(self.passes):
|
|
382
|
+
try:
|
|
383
|
+
event("-- %s" % pass_desc)
|
|
384
|
+
pass_inst = _pass_registry.get(pss).pass_inst
|
|
385
|
+
if isinstance(pass_inst, CompilerPass):
|
|
386
|
+
self._runPass(idx, pass_inst, state)
|
|
387
|
+
else:
|
|
388
|
+
raise BaseException("Legacy pass in use")
|
|
389
|
+
except _EarlyPipelineCompletion as e:
|
|
390
|
+
raise e
|
|
391
|
+
except Exception as e:
|
|
392
|
+
if not isinstance(e, errors.NumbaError):
|
|
393
|
+
raise e
|
|
394
|
+
msg = "Failed in %s mode pipeline (step: %s)" % (
|
|
395
|
+
self.pipeline_name,
|
|
396
|
+
pass_desc,
|
|
397
|
+
)
|
|
398
|
+
patched_exception = self._patch_error(msg, e)
|
|
399
|
+
raise patched_exception
|
|
400
|
+
|
|
401
|
+
def dependency_analysis(self):
|
|
402
|
+
"""
|
|
403
|
+
Computes dependency analysis
|
|
404
|
+
"""
|
|
405
|
+
deps = dict()
|
|
406
|
+
for pss, _ in self.passes:
|
|
407
|
+
x = _pass_registry.get(pss).pass_inst
|
|
408
|
+
au = AnalysisUsage()
|
|
409
|
+
x.get_analysis_usage(au)
|
|
410
|
+
deps[type(x)] = au
|
|
411
|
+
|
|
412
|
+
requires_map = dict()
|
|
413
|
+
for k, v in deps.items():
|
|
414
|
+
requires_map[k] = v.get_required_set()
|
|
415
|
+
|
|
416
|
+
def resolve_requires(key, rmap):
|
|
417
|
+
def walk(lkey, rmap):
|
|
418
|
+
dep_set = rmap[lkey] if lkey in rmap else set()
|
|
419
|
+
if dep_set:
|
|
420
|
+
for x in dep_set:
|
|
421
|
+
dep_set |= walk(x, rmap)
|
|
422
|
+
return dep_set
|
|
423
|
+
else:
|
|
424
|
+
return set()
|
|
425
|
+
|
|
426
|
+
ret = set()
|
|
427
|
+
for k in key:
|
|
428
|
+
ret |= walk(k, rmap)
|
|
429
|
+
return ret
|
|
430
|
+
|
|
431
|
+
dep_chain = dict()
|
|
432
|
+
for k, v in requires_map.items():
|
|
433
|
+
dep_chain[k] = set(v) | (resolve_requires(v, requires_map))
|
|
434
|
+
|
|
435
|
+
return dep_chain
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
pass_info = namedtuple("pass_info", "pass_inst mutates_CFG analysis_only")
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class PassRegistry(object):
|
|
442
|
+
"""
|
|
443
|
+
Pass registry singleton class.
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
_id = 0
|
|
447
|
+
|
|
448
|
+
_registry = dict()
|
|
449
|
+
|
|
450
|
+
def register(self, mutates_CFG, analysis_only):
|
|
451
|
+
def make_festive(pass_class):
|
|
452
|
+
assert not self.is_registered(pass_class)
|
|
453
|
+
assert not self._does_pass_name_alias(pass_class.name())
|
|
454
|
+
pass_class.pass_id = self._id
|
|
455
|
+
self._id += 1
|
|
456
|
+
self._registry[pass_class] = pass_info(
|
|
457
|
+
pass_class(), mutates_CFG, analysis_only
|
|
458
|
+
)
|
|
459
|
+
return pass_class
|
|
460
|
+
|
|
461
|
+
return make_festive
|
|
462
|
+
|
|
463
|
+
def is_registered(self, clazz):
|
|
464
|
+
return clazz in self._registry.keys()
|
|
465
|
+
|
|
466
|
+
def get(self, clazz):
|
|
467
|
+
assert self.is_registered(clazz)
|
|
468
|
+
return self._registry[clazz]
|
|
469
|
+
|
|
470
|
+
def _does_pass_name_alias(self, check):
|
|
471
|
+
for k, v in self._registry.items():
|
|
472
|
+
if v.pass_inst.name == check:
|
|
473
|
+
return True
|
|
474
|
+
return False
|
|
475
|
+
|
|
476
|
+
def find_by_name(self, class_name):
|
|
477
|
+
assert isinstance(class_name, str)
|
|
478
|
+
for k, v in self._registry.items():
|
|
479
|
+
if v.pass_inst.name == class_name:
|
|
480
|
+
return v
|
|
481
|
+
else:
|
|
482
|
+
raise ValueError("No pass with name %s is registered" % class_name)
|
|
483
|
+
|
|
484
|
+
def dump(self):
|
|
485
|
+
for k, v in self._registry.items():
|
|
486
|
+
print("%s: %s" % (k, v))
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
_pass_registry = PassRegistry()
|
|
490
|
+
del PassRegistry
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
"""
|
|
494
|
+
register_pass is used to register a compiler pass class for use with PassManager
|
|
495
|
+
instances.
|
|
496
|
+
"""
|
|
497
|
+
register_pass = _pass_registry.register
|