numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import builtins
|
|
2
5
|
import collections
|
|
3
6
|
import dis
|
|
@@ -5,28 +8,29 @@ import operator
|
|
|
5
8
|
import logging
|
|
6
9
|
import textwrap
|
|
7
10
|
|
|
8
|
-
from numba.core import errors, ir
|
|
11
|
+
from numba.core import errors, ir
|
|
12
|
+
from numba.cuda.core import config
|
|
13
|
+
from numba.cuda.errors import UnsupportedBytecodeError
|
|
9
14
|
from numba.core.errors import (
|
|
10
15
|
NotDefinedError,
|
|
11
|
-
UnsupportedBytecodeError,
|
|
12
16
|
error_extras,
|
|
13
17
|
)
|
|
14
18
|
from numba.cuda.core import ir_utils
|
|
15
|
-
from numba.
|
|
19
|
+
from numba.cuda.utils import (
|
|
16
20
|
PYVERSION,
|
|
17
21
|
BINOPS_TO_OPERATORS,
|
|
18
22
|
INPLACE_BINOPS_TO_OPERATORS,
|
|
19
|
-
_lazy_pformat,
|
|
20
23
|
)
|
|
24
|
+
from numba.cuda.utils import _lazy_pformat
|
|
21
25
|
from numba.core.byteflow import Flow, AdaptDFA, AdaptCFA, BlockKind
|
|
22
|
-
from numba.core.unsafe import eh
|
|
26
|
+
from numba.cuda.core.unsafe import eh
|
|
23
27
|
from numba.cpython.unsafe.tuple import unpack_single_tuple
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
if PYVERSION in ((3, 12), (3, 13)):
|
|
27
31
|
# Operands for CALL_INTRINSIC_1
|
|
28
32
|
from numba.core.byteflow import CALL_INTRINSIC_1_Operand as ci1op
|
|
29
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
33
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
30
34
|
pass
|
|
31
35
|
else:
|
|
32
36
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1392,7 +1396,7 @@ class Interpreter(object):
|
|
|
1392
1396
|
if entry.start < self.last_active_offset
|
|
1393
1397
|
]
|
|
1394
1398
|
)
|
|
1395
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
1399
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
1396
1400
|
pass
|
|
1397
1401
|
else:
|
|
1398
1402
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1407,7 +1411,10 @@ class Interpreter(object):
|
|
|
1407
1411
|
if PYVERSION in ((3, 11), (3, 12), (3, 13)):
|
|
1408
1412
|
# Insert end of try markers
|
|
1409
1413
|
self._end_try_blocks()
|
|
1410
|
-
elif PYVERSION in (
|
|
1414
|
+
elif PYVERSION in (
|
|
1415
|
+
(3, 9),
|
|
1416
|
+
(3, 10),
|
|
1417
|
+
):
|
|
1411
1418
|
pass
|
|
1412
1419
|
else:
|
|
1413
1420
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1431,7 +1438,7 @@ class Interpreter(object):
|
|
|
1431
1438
|
peepholes = []
|
|
1432
1439
|
if PYVERSION in ((3, 11), (3, 12), (3, 13)):
|
|
1433
1440
|
peepholes.append(peep_hole_split_at_pop_block)
|
|
1434
|
-
if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
|
|
1441
|
+
if PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12), (3, 13)):
|
|
1435
1442
|
peepholes.append(peep_hole_list_to_tuple)
|
|
1436
1443
|
peepholes.append(peep_hole_delete_with_exit)
|
|
1437
1444
|
if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
|
|
@@ -1589,7 +1596,10 @@ class Interpreter(object):
|
|
|
1589
1596
|
if newtryblk is not None:
|
|
1590
1597
|
if newtryblk is not tryblk:
|
|
1591
1598
|
self._insert_try_block_begin()
|
|
1592
|
-
elif PYVERSION in (
|
|
1599
|
+
elif PYVERSION in (
|
|
1600
|
+
(3, 9),
|
|
1601
|
+
(3, 10),
|
|
1602
|
+
):
|
|
1593
1603
|
while self.syntax_blocks:
|
|
1594
1604
|
if offset >= self.syntax_blocks[-1].exit:
|
|
1595
1605
|
self.syntax_blocks.pop()
|
|
@@ -1826,7 +1836,10 @@ class Interpreter(object):
|
|
|
1826
1836
|
if inst.offset >= top.exit:
|
|
1827
1837
|
self.current_block.append(ir.PopBlock(loc=self.loc))
|
|
1828
1838
|
self.syntax_blocks.pop()
|
|
1829
|
-
elif PYVERSION in (
|
|
1839
|
+
elif PYVERSION in (
|
|
1840
|
+
(3, 9),
|
|
1841
|
+
(3, 10),
|
|
1842
|
+
):
|
|
1830
1843
|
pass
|
|
1831
1844
|
else:
|
|
1832
1845
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2023,7 +2036,7 @@ class Interpreter(object):
|
|
|
2023
2036
|
target = self.get(container)
|
|
2024
2037
|
expr = ir.Expr.getitem(target, index=index, loc=self.loc)
|
|
2025
2038
|
self.store(expr, res)
|
|
2026
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2039
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2027
2040
|
pass
|
|
2028
2041
|
else:
|
|
2029
2042
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2049,7 +2062,7 @@ class Interpreter(object):
|
|
|
2049
2062
|
target=target, index=index, value=value, loc=self.loc
|
|
2050
2063
|
)
|
|
2051
2064
|
self.current_block.append(stmt)
|
|
2052
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2065
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2053
2066
|
pass
|
|
2054
2067
|
else:
|
|
2055
2068
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2342,7 +2355,7 @@ class Interpreter(object):
|
|
|
2342
2355
|
dstname = self.code_locals[oparg2]
|
|
2343
2356
|
self.store(value=self.get(value2), name=dstname)
|
|
2344
2357
|
|
|
2345
|
-
elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
|
|
2358
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
|
|
2346
2359
|
pass
|
|
2347
2360
|
else:
|
|
2348
2361
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2360,7 +2373,7 @@ class Interpreter(object):
|
|
|
2360
2373
|
undef = ir.Expr.undef(loc=self.loc)
|
|
2361
2374
|
self.store(undef, name=res)
|
|
2362
2375
|
|
|
2363
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2376
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2364
2377
|
pass
|
|
2365
2378
|
else:
|
|
2366
2379
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2400,7 +2413,7 @@ class Interpreter(object):
|
|
|
2400
2413
|
item = self.get(item)
|
|
2401
2414
|
if PYVERSION in ((3, 12), (3, 13)):
|
|
2402
2415
|
attr = self.code_names[inst.arg >> 1]
|
|
2403
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2416
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2404
2417
|
attr = self.code_names[inst.arg]
|
|
2405
2418
|
else:
|
|
2406
2419
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2436,7 +2449,10 @@ class Interpreter(object):
|
|
|
2436
2449
|
value = self.get_global_value(name)
|
|
2437
2450
|
gl = ir.Global(name, value, loc=self.loc)
|
|
2438
2451
|
self.store(gl, res)
|
|
2439
|
-
elif PYVERSION in (
|
|
2452
|
+
elif PYVERSION in (
|
|
2453
|
+
(3, 9),
|
|
2454
|
+
(3, 10),
|
|
2455
|
+
):
|
|
2440
2456
|
|
|
2441
2457
|
def op_LOAD_GLOBAL(self, inst, res):
|
|
2442
2458
|
name = self.code_names[inst.arg]
|
|
@@ -2464,7 +2480,10 @@ class Interpreter(object):
|
|
|
2464
2480
|
value = self.get_closure_value(idx)
|
|
2465
2481
|
gl = ir.FreeVar(idx, name, value, loc=self.loc)
|
|
2466
2482
|
self.store(gl, res)
|
|
2467
|
-
elif PYVERSION in (
|
|
2483
|
+
elif PYVERSION in (
|
|
2484
|
+
(3, 9),
|
|
2485
|
+
(3, 10),
|
|
2486
|
+
):
|
|
2468
2487
|
|
|
2469
2488
|
def op_LOAD_DEREF(self, inst, res):
|
|
2470
2489
|
n_cellvars = len(self.code_cellvars)
|
|
@@ -2491,7 +2510,10 @@ class Interpreter(object):
|
|
|
2491
2510
|
name = self.func_id.func.__code__._varname_from_oparg(inst.arg)
|
|
2492
2511
|
value = self.get(value)
|
|
2493
2512
|
self.store(value=value, name=name)
|
|
2494
|
-
elif PYVERSION in (
|
|
2513
|
+
elif PYVERSION in (
|
|
2514
|
+
(3, 9),
|
|
2515
|
+
(3, 10),
|
|
2516
|
+
):
|
|
2495
2517
|
|
|
2496
2518
|
def op_STORE_DEREF(self, inst, value):
|
|
2497
2519
|
n_cellvars = len(self.code_cellvars)
|
|
@@ -2543,7 +2565,7 @@ class Interpreter(object):
|
|
|
2543
2565
|
if ex.target == end
|
|
2544
2566
|
]
|
|
2545
2567
|
)
|
|
2546
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2568
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2547
2569
|
pass
|
|
2548
2570
|
else:
|
|
2549
2571
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3104,7 +3126,7 @@ class Interpreter(object):
|
|
|
3104
3126
|
self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval)
|
|
3105
3127
|
ret = ir.Return(self.get(castval), loc=self.loc)
|
|
3106
3128
|
self.current_block.append(ret)
|
|
3107
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3129
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3108
3130
|
pass
|
|
3109
3131
|
else:
|
|
3110
3132
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3114,7 +3136,7 @@ class Interpreter(object):
|
|
|
3114
3136
|
def op_TO_BOOL(self, inst, val, res):
|
|
3115
3137
|
self.store(self.get(val), res) # TODO: just a lazy hack
|
|
3116
3138
|
|
|
3117
|
-
elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
|
|
3139
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
|
|
3118
3140
|
pass
|
|
3119
3141
|
else:
|
|
3120
3142
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3125,7 +3147,7 @@ class Interpreter(object):
|
|
|
3125
3147
|
# TODO: fifth lowest bit now indicates a forced version to bool.
|
|
3126
3148
|
elif PYVERSION in ((3, 12),):
|
|
3127
3149
|
op = dis.cmp_op[inst.arg >> 4]
|
|
3128
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3150
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3129
3151
|
op = dis.cmp_op[inst.arg]
|
|
3130
3152
|
else:
|
|
3131
3153
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3256,7 +3278,7 @@ class Interpreter(object):
|
|
|
3256
3278
|
|
|
3257
3279
|
def op_POP_JUMP_IF_NOT_NONE(self, inst, pred):
|
|
3258
3280
|
self._jump_if_none(inst, pred, False)
|
|
3259
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3281
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3260
3282
|
pass
|
|
3261
3283
|
else:
|
|
3262
3284
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3426,7 +3448,10 @@ class Interpreter(object):
|
|
|
3426
3448
|
assert 0, "unreachable"
|
|
3427
3449
|
self.store(gl, res)
|
|
3428
3450
|
|
|
3429
|
-
elif PYVERSION in (
|
|
3451
|
+
elif PYVERSION in (
|
|
3452
|
+
(3, 9),
|
|
3453
|
+
(3, 10),
|
|
3454
|
+
):
|
|
3430
3455
|
|
|
3431
3456
|
def op_LOAD_CLOSURE(self, inst, res):
|
|
3432
3457
|
n_cellvars = len(self.code_cellvars)
|
|
@@ -3576,7 +3601,7 @@ class Interpreter(object):
|
|
|
3576
3601
|
return
|
|
3577
3602
|
else:
|
|
3578
3603
|
raise NotImplementedError(operand)
|
|
3579
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3604
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3580
3605
|
pass
|
|
3581
3606
|
else:
|
|
3582
3607
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3586,7 +3611,7 @@ if PYVERSION in ((3, 12), (3, 13)):
|
|
|
3586
3611
|
|
|
3587
3612
|
class INTRINSIC_STOPITERATION_ERROR(AssertionError):
|
|
3588
3613
|
pass
|
|
3589
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3614
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3590
3615
|
pass
|
|
3591
3616
|
else:
|
|
3592
3617
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c)
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2017 Intel Corporation
|
|
2
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
4
|
-
#
|
|
5
4
|
|
|
6
5
|
import numpy
|
|
7
6
|
import math
|
|
@@ -12,7 +11,9 @@ import warnings
|
|
|
12
11
|
|
|
13
12
|
import numba
|
|
14
13
|
from numba.core.extending import _Intrinsic
|
|
15
|
-
from numba.core import types,
|
|
14
|
+
from numba.core import types, ir, analysis
|
|
15
|
+
from numba.cuda import typing
|
|
16
|
+
from numba.cuda.core import postproc, rewrites, config
|
|
16
17
|
from numba.core.typing.templates import signature
|
|
17
18
|
from numba.core.analysis import (
|
|
18
19
|
compute_live_map,
|
|
@@ -249,12 +250,7 @@ def mk_range_block(typemap, start, stop, step, calltypes, scope, loc):
|
|
|
249
250
|
range_call_assign = ir.Assign(range_call, range_call_var, loc)
|
|
250
251
|
# iter_var = getiter(range_call_var)
|
|
251
252
|
iter_call = ir.Expr.getiter(range_call_var, loc)
|
|
252
|
-
|
|
253
|
-
calltype_sig = signature(
|
|
254
|
-
types.range_iter64_type, types.range_state64_type
|
|
255
|
-
)
|
|
256
|
-
else:
|
|
257
|
-
calltype_sig = signature(types.range_iter_type, types.range_state_type)
|
|
253
|
+
calltype_sig = signature(types.range_iter64_type, types.range_state64_type)
|
|
258
254
|
calltypes[iter_call] = calltype_sig
|
|
259
255
|
iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc)
|
|
260
256
|
typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp)
|
|
@@ -333,10 +329,7 @@ def mk_loop_header(typemap, phi_var, calltypes, scope, loc):
|
|
|
333
329
|
types.intp, types.boolean
|
|
334
330
|
)
|
|
335
331
|
iternext_call = ir.Expr.iternext(phi_var, loc)
|
|
336
|
-
|
|
337
|
-
range_iter_type = types.range_iter64_type
|
|
338
|
-
else:
|
|
339
|
-
range_iter_type = types.range_iter_type
|
|
332
|
+
range_iter_type = types.range_iter64_type
|
|
340
333
|
calltypes[iternext_call] = signature(
|
|
341
334
|
types.containers.Pair(types.intp, types.boolean), range_iter_type
|
|
342
335
|
)
|
|
@@ -813,8 +806,6 @@ def has_no_side_effect(rhs, lives, call_table):
|
|
|
813
806
|
"""Returns True if this expression has no side effects that
|
|
814
807
|
would prevent re-ordering.
|
|
815
808
|
"""
|
|
816
|
-
from numba.parfors import array_analysis, parfor
|
|
817
|
-
from numba.misc.special import prange
|
|
818
809
|
|
|
819
810
|
if isinstance(rhs, ir.Expr) and rhs.op == "call":
|
|
820
811
|
func_name = rhs.func.name
|
|
@@ -827,11 +818,7 @@ def has_no_side_effect(rhs, lives, call_table):
|
|
|
827
818
|
or call_list == ["stencil", numba]
|
|
828
819
|
or call_list == ["log", numpy]
|
|
829
820
|
or call_list == ["dtype", numpy]
|
|
830
|
-
or call_list == [array_analysis.wrap_index]
|
|
831
|
-
or call_list == [prange]
|
|
832
|
-
or call_list == ["prange", numba]
|
|
833
821
|
or call_list == ["pndindex", numba]
|
|
834
|
-
or call_list == [parfor.internal_prange]
|
|
835
822
|
or call_list == ["ceil", math]
|
|
836
823
|
or call_list == [max]
|
|
837
824
|
or call_list == [int]
|
|
@@ -1894,7 +1881,7 @@ def compile_to_numba_ir(
|
|
|
1894
1881
|
if typingctx and other typing inputs are available and update typemap and
|
|
1895
1882
|
calltypes.
|
|
1896
1883
|
"""
|
|
1897
|
-
from numba.core import typed_passes
|
|
1884
|
+
from numba.cuda.core import typed_passes
|
|
1898
1885
|
|
|
1899
1886
|
# mk_func can be actual function or make_function node, or a njit function
|
|
1900
1887
|
if hasattr(mk_func, "code"):
|
|
@@ -1976,7 +1963,8 @@ def get_ir_of_code(glbls, fcode):
|
|
|
1976
1963
|
fcode, func_env, func_arg, func_clo, glbls
|
|
1977
1964
|
)
|
|
1978
1965
|
|
|
1979
|
-
from numba.
|
|
1966
|
+
from numba.cuda import compiler
|
|
1967
|
+
from numba.cuda.core.compiler import StateDict
|
|
1980
1968
|
|
|
1981
1969
|
ir = compiler.run_frontend(f)
|
|
1982
1970
|
|
|
@@ -1985,7 +1973,7 @@ def get_ir_of_code(glbls, fcode):
|
|
|
1985
1973
|
# for example, Raise nodes need to become StaticRaise before type inference
|
|
1986
1974
|
class DummyPipeline(object):
|
|
1987
1975
|
def __init__(self, f_ir):
|
|
1988
|
-
self.state =
|
|
1976
|
+
self.state = StateDict()
|
|
1989
1977
|
self.state.typingctx = None
|
|
1990
1978
|
self.state.targetctx = None
|
|
1991
1979
|
self.state.args = None
|
|
@@ -1998,10 +1986,10 @@ def get_ir_of_code(glbls, fcode):
|
|
|
1998
1986
|
rewrites.rewrite_registry.apply("before-inference", state)
|
|
1999
1987
|
# call inline pass to handle cases like stencils and comprehensions
|
|
2000
1988
|
swapped = {} # TODO: get this from diagnostics store
|
|
2001
|
-
|
|
1989
|
+
from numba.cuda.core.inline_closurecall import InlineClosureCallPass
|
|
2002
1990
|
|
|
2003
|
-
inline_pass =
|
|
2004
|
-
ir, numba.core.
|
|
1991
|
+
inline_pass = InlineClosureCallPass(
|
|
1992
|
+
ir, numba.cuda.core.options.ParallelOptions(False), swapped
|
|
2005
1993
|
)
|
|
2006
1994
|
inline_pass.run()
|
|
2007
1995
|
|
|
@@ -2014,8 +2002,8 @@ def get_ir_of_code(glbls, fcode):
|
|
|
2014
2002
|
# added to create valid IR.
|
|
2015
2003
|
|
|
2016
2004
|
# rebuild IR in SSA form
|
|
2017
|
-
from numba.core.untyped_passes import ReconstructSSA
|
|
2018
|
-
from numba.core.typed_passes import PreLowerStripPhis
|
|
2005
|
+
from numba.cuda.core.untyped_passes import ReconstructSSA
|
|
2006
|
+
from numba.cuda.core.typed_passes import PreLowerStripPhis
|
|
2019
2007
|
|
|
2020
2008
|
reconstruct_ssa = ReconstructSSA()
|
|
2021
2009
|
phistrip = PreLowerStripPhis()
|
|
@@ -2495,7 +2483,7 @@ def legalize_single_scope(blocks):
|
|
|
2495
2483
|
return len({blk.scope for blk in blocks.values()}) == 1
|
|
2496
2484
|
|
|
2497
2485
|
|
|
2498
|
-
def check_and_legalize_ir(func_ir, flags: "numba.core.
|
|
2486
|
+
def check_and_legalize_ir(func_ir, flags: "numba.core.flags.Flags"):
|
|
2499
2487
|
"""
|
|
2500
2488
|
This checks that the IR presented is legal
|
|
2501
2489
|
"""
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Defines CUDA Options for use in the CUDA target
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABCMeta, abstractmethod
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AbstractOptionValue(metaclass=ABCMeta):
|
|
12
|
+
"""Abstract base class for custom option values."""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def encode(self) -> str:
|
|
16
|
+
"""Returns an encoding of the values"""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def __repr__(self) -> str:
|
|
20
|
+
return f"{self.__class__.__name__}({self.encode()})"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FastMathOptions(AbstractOptionValue):
|
|
24
|
+
"""
|
|
25
|
+
Options for controlling fast math optimization.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, value):
|
|
29
|
+
# https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags
|
|
30
|
+
valid_flags = {
|
|
31
|
+
"fast",
|
|
32
|
+
"nnan",
|
|
33
|
+
"ninf",
|
|
34
|
+
"nsz",
|
|
35
|
+
"arcp",
|
|
36
|
+
"contract",
|
|
37
|
+
"afn",
|
|
38
|
+
"reassoc",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if isinstance(value, FastMathOptions):
|
|
42
|
+
self.flags = value.flags.copy()
|
|
43
|
+
elif value is True:
|
|
44
|
+
self.flags = {"fast"}
|
|
45
|
+
elif value is False:
|
|
46
|
+
self.flags = set()
|
|
47
|
+
elif isinstance(value, set):
|
|
48
|
+
invalid = value - valid_flags
|
|
49
|
+
if invalid:
|
|
50
|
+
raise ValueError("Unrecognized fastmath flags: %s" % invalid)
|
|
51
|
+
self.flags = value
|
|
52
|
+
elif isinstance(value, dict):
|
|
53
|
+
invalid = set(value.keys()) - valid_flags
|
|
54
|
+
if invalid:
|
|
55
|
+
raise ValueError("Unrecognized fastmath flags: %s" % invalid)
|
|
56
|
+
self.flags = {v for v, enable in value.items() if enable}
|
|
57
|
+
else:
|
|
58
|
+
msg = "Expected fastmath option(s) to be either a bool, dict or set"
|
|
59
|
+
raise ValueError(msg)
|
|
60
|
+
|
|
61
|
+
def __bool__(self):
|
|
62
|
+
return bool(self.flags)
|
|
63
|
+
|
|
64
|
+
__nonzero__ = __bool__
|
|
65
|
+
|
|
66
|
+
def encode(self) -> str:
|
|
67
|
+
return str(self.flags)
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other):
|
|
70
|
+
if type(other) is type(self):
|
|
71
|
+
return self.flags == other.flags
|
|
72
|
+
return NotImplemented
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ParallelOptions(AbstractOptionValue):
|
|
76
|
+
"""
|
|
77
|
+
Options for controlling auto parallelization.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
__slots__ = (
|
|
81
|
+
"enabled",
|
|
82
|
+
"comprehension",
|
|
83
|
+
"reduction",
|
|
84
|
+
"inplace_binop",
|
|
85
|
+
"setitem",
|
|
86
|
+
"numpy",
|
|
87
|
+
"stencil",
|
|
88
|
+
"fusion",
|
|
89
|
+
"prange",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def __init__(self, value):
|
|
93
|
+
if isinstance(value, bool):
|
|
94
|
+
self.enabled = value
|
|
95
|
+
self.comprehension = value
|
|
96
|
+
self.reduction = value
|
|
97
|
+
self.inplace_binop = value
|
|
98
|
+
self.setitem = value
|
|
99
|
+
self.numpy = value
|
|
100
|
+
self.stencil = value
|
|
101
|
+
self.fusion = value
|
|
102
|
+
self.prange = value
|
|
103
|
+
elif isinstance(value, dict):
|
|
104
|
+
self.enabled = True
|
|
105
|
+
self.comprehension = value.pop("comprehension", True)
|
|
106
|
+
self.reduction = value.pop("reduction", True)
|
|
107
|
+
self.inplace_binop = value.pop("inplace_binop", True)
|
|
108
|
+
self.setitem = value.pop("setitem", True)
|
|
109
|
+
self.numpy = value.pop("numpy", True)
|
|
110
|
+
self.stencil = value.pop("stencil", True)
|
|
111
|
+
self.fusion = value.pop("fusion", True)
|
|
112
|
+
self.prange = value.pop("prange", True)
|
|
113
|
+
if value:
|
|
114
|
+
msg = "Unrecognized parallel options: %s" % value.keys()
|
|
115
|
+
raise NameError(msg)
|
|
116
|
+
elif isinstance(value, ParallelOptions):
|
|
117
|
+
self.enabled = value.enabled
|
|
118
|
+
self.comprehension = value.comprehension
|
|
119
|
+
self.reduction = value.reduction
|
|
120
|
+
self.inplace_binop = value.inplace_binop
|
|
121
|
+
self.setitem = value.setitem
|
|
122
|
+
self.numpy = value.numpy
|
|
123
|
+
self.stencil = value.stencil
|
|
124
|
+
self.fusion = value.fusion
|
|
125
|
+
self.prange = value.prange
|
|
126
|
+
else:
|
|
127
|
+
msg = "Expect parallel option to be either a bool or a dict"
|
|
128
|
+
raise ValueError(msg)
|
|
129
|
+
|
|
130
|
+
def _get_values(self):
|
|
131
|
+
"""Get values as dictionary."""
|
|
132
|
+
return {k: getattr(self, k) for k in self.__slots__}
|
|
133
|
+
|
|
134
|
+
def __eq__(self, other):
|
|
135
|
+
if type(other) is type(self):
|
|
136
|
+
return self._get_values() == other._get_values()
|
|
137
|
+
return NotImplemented
|
|
138
|
+
|
|
139
|
+
def encode(self) -> str:
|
|
140
|
+
return ", ".join(f"{k}={v}" for k, v in self._get_values().items())
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class InlineOptions(AbstractOptionValue):
|
|
144
|
+
"""
|
|
145
|
+
Options for controlling inlining
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def __init__(self, value):
|
|
149
|
+
ok = False
|
|
150
|
+
if isinstance(value, str):
|
|
151
|
+
if value in ("always", "never"):
|
|
152
|
+
ok = True
|
|
153
|
+
else:
|
|
154
|
+
ok = hasattr(value, "__call__")
|
|
155
|
+
|
|
156
|
+
if ok:
|
|
157
|
+
self._inline = value
|
|
158
|
+
else:
|
|
159
|
+
msg = (
|
|
160
|
+
"kwarg 'inline' must be one of the strings 'always' or "
|
|
161
|
+
"'never', or it can be a callable that returns True/False. "
|
|
162
|
+
"Found value %s" % value
|
|
163
|
+
)
|
|
164
|
+
raise ValueError(msg)
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def is_never_inline(self):
|
|
168
|
+
"""
|
|
169
|
+
True if never inline
|
|
170
|
+
"""
|
|
171
|
+
return self._inline == "never"
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def is_always_inline(self):
|
|
175
|
+
"""
|
|
176
|
+
True if always inline
|
|
177
|
+
"""
|
|
178
|
+
return self._inline == "always"
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def has_cost_model(self):
|
|
182
|
+
"""
|
|
183
|
+
True if a cost model is provided
|
|
184
|
+
"""
|
|
185
|
+
return not (self.is_always_inline or self.is_never_inline)
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def value(self):
|
|
189
|
+
"""
|
|
190
|
+
The raw value
|
|
191
|
+
"""
|
|
192
|
+
return self._inline
|
|
193
|
+
|
|
194
|
+
def __eq__(self, other):
|
|
195
|
+
if type(other) is type(self):
|
|
196
|
+
return self.value == other.value
|
|
197
|
+
return NotImplemented
|
|
198
|
+
|
|
199
|
+
def encode(self) -> str:
|
|
200
|
+
return repr(self._inline)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class TargetOptions:
|
|
204
|
+
"""Target options maps user options from decorators to the
|
|
205
|
+
``numba.core.compiler.Flags`` used by lowering and target context.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
class Mapping:
|
|
209
|
+
def __init__(self, flag_name, apply=lambda x: x):
|
|
210
|
+
self.flag_name = flag_name
|
|
211
|
+
self.apply = apply
|
|
212
|
+
|
|
213
|
+
def finalize(self, flags, options):
|
|
214
|
+
"""Subclasses can override this method to make target specific
|
|
215
|
+
customizations of default flags.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
flags : Flags
|
|
220
|
+
options : dict
|
|
221
|
+
"""
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def parse_as_flags(cls, flags, options):
|
|
226
|
+
"""Parse target options defined in ``options`` and set ``flags``
|
|
227
|
+
accordingly.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
flags : Flags
|
|
232
|
+
options : dict
|
|
233
|
+
"""
|
|
234
|
+
opt = cls()
|
|
235
|
+
opt._apply(flags, options)
|
|
236
|
+
opt.finalize(flags, options)
|
|
237
|
+
return flags
|
|
238
|
+
|
|
239
|
+
def _apply(self, flags, options):
|
|
240
|
+
# Find all Mapping instances in the class
|
|
241
|
+
mappings = {}
|
|
242
|
+
cls = type(self)
|
|
243
|
+
for k in dir(cls):
|
|
244
|
+
v = getattr(cls, k)
|
|
245
|
+
if isinstance(v, cls.Mapping):
|
|
246
|
+
mappings[k] = v
|
|
247
|
+
|
|
248
|
+
used = set()
|
|
249
|
+
for k, mapping in mappings.items():
|
|
250
|
+
if k in options:
|
|
251
|
+
v = mapping.apply(options[k])
|
|
252
|
+
setattr(flags, mapping.flag_name, v)
|
|
253
|
+
used.add(k)
|
|
254
|
+
|
|
255
|
+
unused = set(options) - used
|
|
256
|
+
if unused:
|
|
257
|
+
# Unread options?
|
|
258
|
+
m = (
|
|
259
|
+
f"Unrecognized options: {unused}. "
|
|
260
|
+
f"Known options are {mappings.keys()}"
|
|
261
|
+
)
|
|
262
|
+
raise KeyError(m)
|