numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""Compatibility module.
|
|
5
|
+
|
|
6
|
+
It can be necessary to load files generated by previous versions of cloudpickle
|
|
7
|
+
that rely on symbols being defined under the `cloudpickle.cloudpickle_fast`
|
|
8
|
+
namespace.
|
|
9
|
+
|
|
10
|
+
See: tests/test_backward_compat.py
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from . import cloudpickle
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def __getattr__(name):
|
|
17
|
+
return getattr(cloudpickle, name)
|
numba_cuda/numba/cuda/codegen.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from llvmlite import ir
|
|
2
5
|
|
|
3
|
-
from numba.core import config
|
|
6
|
+
from numba.cuda.core import config
|
|
4
7
|
from numba.cuda import serialize
|
|
5
8
|
from .cudadrv import devices, driver, nvvm, runtime, nvrtc
|
|
6
9
|
from numba.cuda.core.codegen import Codegen, CodeLibrary
|
|
@@ -1,48 +1,258 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from llvmlite import ir
|
|
2
5
|
from collections import namedtuple
|
|
6
|
+
from warnings import warn, catch_warnings, simplefilter
|
|
7
|
+
import copy
|
|
8
|
+
|
|
3
9
|
from numba.core import ir as numba_ir
|
|
4
|
-
from numba.cuda import cgutils, typing
|
|
5
10
|
from numba.core import (
|
|
6
11
|
types,
|
|
7
|
-
|
|
8
|
-
config,
|
|
9
|
-
compiler,
|
|
12
|
+
bytecode,
|
|
10
13
|
)
|
|
11
|
-
from numba.core.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
+
from numba.cuda.core.options import ParallelOptions
|
|
15
|
+
from numba.core.compiler_lock import global_compiler_lock
|
|
16
|
+
from numba.core.errors import NumbaWarning, NumbaInvalidConfigWarning
|
|
17
|
+
from numba.cuda.core.interpreter import Interpreter
|
|
18
|
+
|
|
19
|
+
from numba.cuda import cgutils, typing, lowering, nvvmutils, utils
|
|
20
|
+
from numba.cuda.api import get_current_device
|
|
21
|
+
from numba.cuda.codegen import ExternalCodeLibrary
|
|
22
|
+
|
|
23
|
+
from numba.cuda.core import (
|
|
24
|
+
inline_closurecall,
|
|
25
|
+
sigutils,
|
|
26
|
+
postproc,
|
|
27
|
+
config,
|
|
28
|
+
funcdesc,
|
|
14
29
|
)
|
|
30
|
+
from numba.cuda.cudadrv import nvvm, nvrtc
|
|
31
|
+
from numba.cuda.descriptor import cuda_target
|
|
32
|
+
from numba.cuda.flags import CUDAFlags
|
|
33
|
+
from numba.cuda.target import CUDACABICallConv
|
|
15
34
|
from numba.cuda.core.compiler import CompilerBase
|
|
16
|
-
from numba.core.
|
|
17
|
-
from numba.core.compiler_machinery import (
|
|
35
|
+
from numba.cuda.core.compiler_machinery import (
|
|
18
36
|
FunctionPass,
|
|
19
37
|
LoweringPass,
|
|
20
38
|
PassManager,
|
|
21
39
|
register_pass,
|
|
22
40
|
)
|
|
23
|
-
from numba.core.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
41
|
+
from numba.cuda.core.untyped_passes import (
|
|
42
|
+
TranslateByteCode,
|
|
43
|
+
FixupArgs,
|
|
44
|
+
IRProcessing,
|
|
45
|
+
DeadBranchPrune,
|
|
46
|
+
RewriteSemanticConstants,
|
|
47
|
+
InlineClosureLikes,
|
|
48
|
+
GenericRewrites,
|
|
49
|
+
WithLifting,
|
|
50
|
+
InlineInlinables,
|
|
51
|
+
FindLiterallyCalls,
|
|
52
|
+
MakeFunctionToJitFunction,
|
|
53
|
+
LiteralUnroll,
|
|
54
|
+
ReconstructSSA,
|
|
55
|
+
RewriteDynamicRaises,
|
|
56
|
+
LiteralPropagationSubPipelinePass,
|
|
57
|
+
)
|
|
58
|
+
from numba.cuda.core.typed_passes import (
|
|
59
|
+
BaseNativeLowering,
|
|
60
|
+
NativeLowering,
|
|
27
61
|
AnnotateTypes,
|
|
62
|
+
IRLegalization,
|
|
63
|
+
NopythonTypeInference,
|
|
64
|
+
NopythonRewrites,
|
|
65
|
+
InlineOverloads,
|
|
66
|
+
PreLowerStripPhis,
|
|
67
|
+
NoPythonSupportedFeatureValidation,
|
|
28
68
|
)
|
|
29
|
-
from warnings import warn
|
|
30
|
-
from numba.cuda import nvvmutils
|
|
31
|
-
from numba.cuda.api import get_current_device
|
|
32
|
-
from numba.cuda.codegen import ExternalCodeLibrary
|
|
33
|
-
from numba.cuda.core.typed_passes import BaseNativeLowering
|
|
34
|
-
from numba.cuda.core import sigutils
|
|
35
|
-
from numba.cuda.cudadrv import nvvm, nvrtc
|
|
36
|
-
from numba.cuda.descriptor import cuda_target
|
|
37
|
-
from numba.cuda.flags import CUDAFlags
|
|
38
|
-
from numba.cuda.target import CUDACABICallConv
|
|
39
|
-
from numba.cuda import lowering, utils
|
|
40
|
-
from numba.core.utils import PYVERSION
|
|
41
69
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
70
|
+
|
|
71
|
+
_LowerResult = namedtuple(
|
|
72
|
+
"_LowerResult",
|
|
73
|
+
[
|
|
74
|
+
"fndesc",
|
|
75
|
+
"call_helper",
|
|
76
|
+
"cfunc",
|
|
77
|
+
"env",
|
|
78
|
+
],
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def sanitize_compile_result_entries(entries):
|
|
83
|
+
keys = set(entries.keys())
|
|
84
|
+
fieldset = set(CR_FIELDS)
|
|
85
|
+
badnames = keys - fieldset
|
|
86
|
+
if badnames:
|
|
87
|
+
raise NameError(*badnames)
|
|
88
|
+
missing = fieldset - keys
|
|
89
|
+
for k in missing:
|
|
90
|
+
entries[k] = None
|
|
91
|
+
# Avoid keeping alive traceback variables
|
|
92
|
+
err = entries["typing_error"]
|
|
93
|
+
if err is not None:
|
|
94
|
+
entries["typing_error"] = err.with_traceback(None)
|
|
95
|
+
return entries
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def run_frontend(func, inline_closures=False, emit_dels=False):
|
|
99
|
+
"""
|
|
100
|
+
Run the compiler frontend over the given Python function, and return
|
|
101
|
+
the function's canonical Numba IR.
|
|
102
|
+
|
|
103
|
+
If inline_closures is Truthy then closure inlining will be run
|
|
104
|
+
If emit_dels is Truthy the ir.Del nodes will be emitted appropriately
|
|
105
|
+
"""
|
|
106
|
+
# XXX make this a dedicated Pipeline?
|
|
107
|
+
func_id = bytecode.FunctionIdentity.from_function(func)
|
|
108
|
+
interp = Interpreter(func_id)
|
|
109
|
+
bc = bytecode.ByteCode(func_id=func_id)
|
|
110
|
+
func_ir = interp.interpret(bc)
|
|
111
|
+
if inline_closures:
|
|
112
|
+
inline_pass = inline_closurecall.InlineClosureCallPass(
|
|
113
|
+
func_ir, ParallelOptions(False), {}, False
|
|
114
|
+
)
|
|
115
|
+
inline_pass.run()
|
|
116
|
+
post_proc = postproc.PostProcessor(func_ir)
|
|
117
|
+
post_proc.run(emit_dels)
|
|
118
|
+
return func_ir
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class DefaultPassBuilder(object):
|
|
122
|
+
"""
|
|
123
|
+
This is the default pass builder, it contains the "classic" default
|
|
124
|
+
pipelines as pre-canned PassManager instances:
|
|
125
|
+
- nopython
|
|
126
|
+
- objectmode
|
|
127
|
+
- interpreted
|
|
128
|
+
- typed
|
|
129
|
+
- untyped
|
|
130
|
+
- nopython lowering
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
@staticmethod
|
|
134
|
+
def define_nopython_pipeline(state, name="nopython"):
|
|
135
|
+
"""Returns an nopython mode pipeline based PassManager"""
|
|
136
|
+
# compose pipeline from untyped, typed and lowering parts
|
|
137
|
+
dpb = DefaultPassBuilder
|
|
138
|
+
pm = PassManager(name)
|
|
139
|
+
untyped_passes = dpb.define_untyped_pipeline(state)
|
|
140
|
+
pm.passes.extend(untyped_passes.passes)
|
|
141
|
+
|
|
142
|
+
typed_passes = dpb.define_typed_pipeline(state)
|
|
143
|
+
pm.passes.extend(typed_passes.passes)
|
|
144
|
+
|
|
145
|
+
lowering_passes = dpb.define_nopython_lowering_pipeline(state)
|
|
146
|
+
pm.passes.extend(lowering_passes.passes)
|
|
147
|
+
|
|
148
|
+
pm.finalize()
|
|
149
|
+
return pm
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def define_nopython_lowering_pipeline(state, name="nopython_lowering"):
|
|
153
|
+
pm = PassManager(name)
|
|
154
|
+
# legalise
|
|
155
|
+
pm.add_pass(
|
|
156
|
+
NoPythonSupportedFeatureValidation,
|
|
157
|
+
"ensure features that are in use are in a valid form",
|
|
158
|
+
)
|
|
159
|
+
pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering")
|
|
160
|
+
# Annotate only once legalized
|
|
161
|
+
pm.add_pass(AnnotateTypes, "annotate types")
|
|
162
|
+
# lower
|
|
163
|
+
pm.add_pass(NativeLowering, "native lowering")
|
|
164
|
+
pm.add_pass(CUDABackend, "nopython mode backend")
|
|
165
|
+
pm.finalize()
|
|
166
|
+
return pm
|
|
167
|
+
|
|
168
|
+
@staticmethod
|
|
169
|
+
def define_parfor_gufunc_nopython_lowering_pipeline(
|
|
170
|
+
state, name="parfor_gufunc_nopython_lowering"
|
|
171
|
+
):
|
|
172
|
+
pm = PassManager(name)
|
|
173
|
+
# legalise
|
|
174
|
+
pm.add_pass(
|
|
175
|
+
NoPythonSupportedFeatureValidation,
|
|
176
|
+
"ensure features that are in use are in a valid form",
|
|
177
|
+
)
|
|
178
|
+
pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering")
|
|
179
|
+
# Annotate only once legalized
|
|
180
|
+
pm.add_pass(AnnotateTypes, "annotate types")
|
|
181
|
+
# lower
|
|
182
|
+
pm.add_pass(NativeLowering, "native lowering")
|
|
183
|
+
pm.add_pass(CUDABackend, "nopython mode backend")
|
|
184
|
+
pm.finalize()
|
|
185
|
+
return pm
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def define_typed_pipeline(state, name="typed"):
|
|
189
|
+
"""Returns the typed part of the nopython pipeline"""
|
|
190
|
+
pm = PassManager(name)
|
|
191
|
+
# typing
|
|
192
|
+
pm.add_pass(NopythonTypeInference, "nopython frontend")
|
|
193
|
+
|
|
194
|
+
# strip phis
|
|
195
|
+
pm.add_pass(PreLowerStripPhis, "remove phis nodes")
|
|
196
|
+
|
|
197
|
+
# optimisation
|
|
198
|
+
pm.add_pass(InlineOverloads, "inline overloaded functions")
|
|
199
|
+
if not state.flags.no_rewrites:
|
|
200
|
+
pm.add_pass(NopythonRewrites, "nopython rewrites")
|
|
201
|
+
|
|
202
|
+
pm.finalize()
|
|
203
|
+
return pm
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def define_untyped_pipeline(state, name="untyped"):
|
|
207
|
+
"""Returns an untyped part of the nopython pipeline"""
|
|
208
|
+
pm = PassManager(name)
|
|
209
|
+
if state.func_ir is None:
|
|
210
|
+
pm.add_pass(TranslateByteCode, "analyzing bytecode")
|
|
211
|
+
pm.add_pass(FixupArgs, "fix up args")
|
|
212
|
+
pm.add_pass(IRProcessing, "processing IR")
|
|
213
|
+
pm.add_pass(WithLifting, "Handle with contexts")
|
|
214
|
+
|
|
215
|
+
# inline closures early in case they are using nonlocal's
|
|
216
|
+
# see issue #6585.
|
|
217
|
+
pm.add_pass(
|
|
218
|
+
InlineClosureLikes, "inline calls to locally defined closures"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# pre typing
|
|
222
|
+
if not state.flags.no_rewrites:
|
|
223
|
+
pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants")
|
|
224
|
+
pm.add_pass(DeadBranchPrune, "dead branch pruning")
|
|
225
|
+
pm.add_pass(GenericRewrites, "nopython rewrites")
|
|
226
|
+
|
|
227
|
+
pm.add_pass(RewriteDynamicRaises, "rewrite dynamic raises")
|
|
228
|
+
|
|
229
|
+
# convert any remaining closures into functions
|
|
230
|
+
pm.add_pass(
|
|
231
|
+
MakeFunctionToJitFunction,
|
|
232
|
+
"convert make_function into JIT functions",
|
|
233
|
+
)
|
|
234
|
+
# inline functions that have been determined as inlinable and rerun
|
|
235
|
+
# branch pruning, this needs to be run after closures are inlined as
|
|
236
|
+
# the IR repr of a closure masks call sites if an inlinable is called
|
|
237
|
+
# inside a closure
|
|
238
|
+
pm.add_pass(InlineInlinables, "inline inlinable functions")
|
|
239
|
+
if not state.flags.no_rewrites:
|
|
240
|
+
pm.add_pass(DeadBranchPrune, "dead branch pruning")
|
|
241
|
+
|
|
242
|
+
pm.add_pass(FindLiterallyCalls, "find literally calls")
|
|
243
|
+
pm.add_pass(LiteralUnroll, "handles literal_unroll")
|
|
244
|
+
|
|
245
|
+
if state.flags.enable_ssa:
|
|
246
|
+
pm.add_pass(ReconstructSSA, "ssa")
|
|
247
|
+
|
|
248
|
+
if not state.flags.no_rewrites:
|
|
249
|
+
pm.add_pass(DeadBranchPrune, "dead branch pruning")
|
|
250
|
+
|
|
251
|
+
pm.add_pass(LiteralPropagationSubPipelinePass, "Literal propagation")
|
|
252
|
+
|
|
253
|
+
pm.finalize()
|
|
254
|
+
return pm
|
|
255
|
+
|
|
46
256
|
|
|
47
257
|
# The CUDACompileResult (CCR) has a specially-defined entry point equal to its
|
|
48
258
|
# id. This is because the entry point is used as a key into a dict of
|
|
@@ -341,6 +551,142 @@ class CUDACompiler(CompilerBase):
|
|
|
341
551
|
return pm
|
|
342
552
|
|
|
343
553
|
|
|
554
|
+
def compile_extra(
|
|
555
|
+
typingctx,
|
|
556
|
+
targetctx,
|
|
557
|
+
func,
|
|
558
|
+
args,
|
|
559
|
+
return_type,
|
|
560
|
+
flags,
|
|
561
|
+
locals,
|
|
562
|
+
library=None,
|
|
563
|
+
pipeline_class=CUDACompiler,
|
|
564
|
+
):
|
|
565
|
+
"""Compiler entry point
|
|
566
|
+
|
|
567
|
+
Parameter
|
|
568
|
+
---------
|
|
569
|
+
typingctx :
|
|
570
|
+
typing context
|
|
571
|
+
targetctx :
|
|
572
|
+
target context
|
|
573
|
+
func : function
|
|
574
|
+
the python function to be compiled
|
|
575
|
+
args : tuple, list
|
|
576
|
+
argument types
|
|
577
|
+
return_type :
|
|
578
|
+
Use ``None`` to indicate void return
|
|
579
|
+
flags : numba.compiler.Flags
|
|
580
|
+
compiler flags
|
|
581
|
+
library : numba.codegen.CodeLibrary
|
|
582
|
+
Used to store the compiled code.
|
|
583
|
+
If it is ``None``, a new CodeLibrary is used.
|
|
584
|
+
pipeline_class : type like numba.compiler.CompilerBase
|
|
585
|
+
compiler pipeline
|
|
586
|
+
"""
|
|
587
|
+
pipeline = pipeline_class(
|
|
588
|
+
typingctx, targetctx, library, args, return_type, flags, locals
|
|
589
|
+
)
|
|
590
|
+
return pipeline.compile_extra(func)
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def compile_ir(
|
|
594
|
+
typingctx,
|
|
595
|
+
targetctx,
|
|
596
|
+
func_ir,
|
|
597
|
+
args,
|
|
598
|
+
return_type,
|
|
599
|
+
flags,
|
|
600
|
+
locals,
|
|
601
|
+
lifted=(),
|
|
602
|
+
lifted_from=None,
|
|
603
|
+
is_lifted_loop=False,
|
|
604
|
+
library=None,
|
|
605
|
+
pipeline_class=CUDACompiler,
|
|
606
|
+
):
|
|
607
|
+
"""
|
|
608
|
+
Compile a function with the given IR.
|
|
609
|
+
|
|
610
|
+
For internal use only.
|
|
611
|
+
"""
|
|
612
|
+
|
|
613
|
+
# This is a special branch that should only run on IR from a lifted loop
|
|
614
|
+
if is_lifted_loop:
|
|
615
|
+
# This code is pessimistic and costly, but it is a not often trodden
|
|
616
|
+
# path and it will go away once IR is made immutable. The problem is
|
|
617
|
+
# that the rewrite passes can mutate the IR into a state that makes
|
|
618
|
+
# it possible for invalid tokens to be transmitted to lowering which
|
|
619
|
+
# then trickle through into LLVM IR and causes RuntimeErrors as LLVM
|
|
620
|
+
# cannot compile it. As a result the following approach is taken:
|
|
621
|
+
# 1. Create some new flags that copy the original ones but switch
|
|
622
|
+
# off rewrites.
|
|
623
|
+
# 2. Compile with 1. to get a compile result
|
|
624
|
+
# 3. Try and compile another compile result but this time with the
|
|
625
|
+
# original flags (and IR being rewritten).
|
|
626
|
+
# 4. If 3 was successful, use the result, else use 2.
|
|
627
|
+
|
|
628
|
+
# create flags with no rewrites
|
|
629
|
+
norw_flags = copy.deepcopy(flags)
|
|
630
|
+
norw_flags.no_rewrites = True
|
|
631
|
+
|
|
632
|
+
def compile_local(the_ir, the_flags):
|
|
633
|
+
pipeline = pipeline_class(
|
|
634
|
+
typingctx,
|
|
635
|
+
targetctx,
|
|
636
|
+
library,
|
|
637
|
+
args,
|
|
638
|
+
return_type,
|
|
639
|
+
the_flags,
|
|
640
|
+
locals,
|
|
641
|
+
)
|
|
642
|
+
return pipeline.compile_ir(
|
|
643
|
+
func_ir=the_ir, lifted=lifted, lifted_from=lifted_from
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
# compile with rewrites off, IR shouldn't be mutated irreparably
|
|
647
|
+
norw_cres = compile_local(func_ir.copy(), norw_flags)
|
|
648
|
+
|
|
649
|
+
# try and compile with rewrites on if no_rewrites was not set in the
|
|
650
|
+
# original flags, IR might get broken but we've got a CompileResult
|
|
651
|
+
# that's usable from above.
|
|
652
|
+
rw_cres = None
|
|
653
|
+
if not flags.no_rewrites:
|
|
654
|
+
# Suppress warnings in compilation retry
|
|
655
|
+
with catch_warnings():
|
|
656
|
+
simplefilter("ignore", NumbaWarning)
|
|
657
|
+
try:
|
|
658
|
+
rw_cres = compile_local(func_ir.copy(), flags)
|
|
659
|
+
except Exception:
|
|
660
|
+
pass
|
|
661
|
+
# if the rewrite variant of compilation worked, use it, else use
|
|
662
|
+
# the norewrites backup
|
|
663
|
+
if rw_cres is not None:
|
|
664
|
+
cres = rw_cres
|
|
665
|
+
else:
|
|
666
|
+
cres = norw_cres
|
|
667
|
+
return cres
|
|
668
|
+
|
|
669
|
+
else:
|
|
670
|
+
pipeline = pipeline_class(
|
|
671
|
+
typingctx, targetctx, library, args, return_type, flags, locals
|
|
672
|
+
)
|
|
673
|
+
return pipeline.compile_ir(
|
|
674
|
+
func_ir=func_ir, lifted=lifted, lifted_from=lifted_from
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def compile_internal(
|
|
679
|
+
typingctx, targetctx, library, func, args, return_type, flags, locals
|
|
680
|
+
):
|
|
681
|
+
"""
|
|
682
|
+
For internal use only.
|
|
683
|
+
"""
|
|
684
|
+
pipeline = CUDACompiler(
|
|
685
|
+
typingctx, targetctx, library, args, return_type, flags, locals
|
|
686
|
+
)
|
|
687
|
+
return pipeline.compile_extra(func)
|
|
688
|
+
|
|
689
|
+
|
|
344
690
|
@global_compiler_lock
|
|
345
691
|
def compile_cuda(
|
|
346
692
|
pyfunc,
|
|
@@ -400,7 +746,7 @@ def compile_cuda(
|
|
|
400
746
|
from numba.core.target_extension import target_override
|
|
401
747
|
|
|
402
748
|
with target_override("cuda"):
|
|
403
|
-
cres =
|
|
749
|
+
cres = compile_extra(
|
|
404
750
|
typingctx=typingctx,
|
|
405
751
|
targetctx=targetctx,
|
|
406
752
|
func=pyfunc,
|