numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
|
|
1
5
|
def initialize_all():
|
|
2
6
|
# Import models to register them with the data model manager
|
|
3
7
|
import numba.cuda.models # noqa: F401
|
numba_cuda/numba/cuda/locks.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
from collections import namedtuple, defaultdict
|
|
2
5
|
import operator
|
|
3
6
|
import warnings
|
|
@@ -7,18 +10,14 @@ from llvmlite import ir as llvm_ir
|
|
|
7
10
|
|
|
8
11
|
from numba.core import (
|
|
9
12
|
typing,
|
|
10
|
-
utils,
|
|
11
13
|
types,
|
|
12
14
|
ir,
|
|
13
|
-
debuginfo,
|
|
14
|
-
funcdesc,
|
|
15
15
|
generators,
|
|
16
|
-
config,
|
|
17
|
-
cgutils,
|
|
18
16
|
removerefctpass,
|
|
19
|
-
targetconfig,
|
|
20
17
|
)
|
|
21
|
-
from numba.cuda
|
|
18
|
+
from numba.cuda import debuginfo, cgutils, utils
|
|
19
|
+
from numba.cuda.core import ir_utils, targetconfig, funcdesc, config
|
|
20
|
+
|
|
22
21
|
from numba.core.errors import (
|
|
23
22
|
LoweringError,
|
|
24
23
|
new_error_context,
|
|
@@ -27,8 +26,8 @@ from numba.core.errors import (
|
|
|
27
26
|
UnsupportedError,
|
|
28
27
|
NumbaDebugInfoWarning,
|
|
29
28
|
)
|
|
30
|
-
from numba.core.funcdesc import default_mangler
|
|
31
|
-
from numba.core.environment import Environment
|
|
29
|
+
from numba.cuda.core.funcdesc import default_mangler
|
|
30
|
+
from numba.cuda.core.environment import Environment
|
|
32
31
|
from numba.core.analysis import compute_use_defs, must_use_alloca
|
|
33
32
|
from numba.misc.firstlinefinder import get_func_body_first_lineno
|
|
34
33
|
from numba import version_info
|
|
@@ -463,7 +462,7 @@ class Lower(BaseLower):
|
|
|
463
462
|
self._blk_local_varmap = {}
|
|
464
463
|
|
|
465
464
|
def pre_block(self, block):
|
|
466
|
-
from numba.core.unsafe import eh
|
|
465
|
+
from numba.cuda.core.unsafe import eh
|
|
467
466
|
|
|
468
467
|
super(Lower, self).pre_block(block)
|
|
469
468
|
self._cur_ir_block = block
|
|
@@ -1026,9 +1025,6 @@ class Lower(BaseLower):
|
|
|
1026
1025
|
elif isinstance(fnty, types.RecursiveCall):
|
|
1027
1026
|
res = self._lower_call_RecursiveCall(fnty, expr, signature)
|
|
1028
1027
|
|
|
1029
|
-
elif isinstance(fnty, types.FunctionType):
|
|
1030
|
-
res = self._lower_call_FunctionType(fnty, expr, signature)
|
|
1031
|
-
|
|
1032
1028
|
else:
|
|
1033
1029
|
res = self._lower_call_normal(fnty, expr, signature)
|
|
1034
1030
|
|
|
@@ -1049,7 +1045,7 @@ class Lower(BaseLower):
|
|
|
1049
1045
|
)
|
|
1050
1046
|
|
|
1051
1047
|
def _lower_call_ObjModeDispatcher(self, fnty, expr, signature):
|
|
1052
|
-
from numba.core.pythonapi import ObjModeUtils
|
|
1048
|
+
from numba.cuda.core.pythonapi import ObjModeUtils
|
|
1053
1049
|
|
|
1054
1050
|
self.init_pyapi()
|
|
1055
1051
|
# Acquire the GIL
|
|
@@ -1226,136 +1222,6 @@ class Lower(BaseLower):
|
|
|
1226
1222
|
)
|
|
1227
1223
|
return res
|
|
1228
1224
|
|
|
1229
|
-
def _lower_call_FunctionType(self, fnty, expr, signature):
|
|
1230
|
-
self.debug_print("# calling first-class function type")
|
|
1231
|
-
sig = types.unliteral(signature)
|
|
1232
|
-
if not fnty.check_signature(signature):
|
|
1233
|
-
# value dependent polymorphism?
|
|
1234
|
-
raise UnsupportedError(
|
|
1235
|
-
f"mismatch of function types:"
|
|
1236
|
-
f" expected {fnty} but got {types.FunctionType(sig)}"
|
|
1237
|
-
)
|
|
1238
|
-
argvals = self.fold_call_args(
|
|
1239
|
-
fnty,
|
|
1240
|
-
sig,
|
|
1241
|
-
expr.args,
|
|
1242
|
-
expr.vararg,
|
|
1243
|
-
expr.kws,
|
|
1244
|
-
)
|
|
1245
|
-
return self.__call_first_class_function_pointer(
|
|
1246
|
-
fnty.ftype,
|
|
1247
|
-
expr.func.name,
|
|
1248
|
-
sig,
|
|
1249
|
-
argvals,
|
|
1250
|
-
)
|
|
1251
|
-
|
|
1252
|
-
def __call_first_class_function_pointer(self, ftype, fname, sig, argvals):
|
|
1253
|
-
"""
|
|
1254
|
-
Calls a first-class function pointer.
|
|
1255
|
-
|
|
1256
|
-
This function is responsible for calling a first-class function pointer,
|
|
1257
|
-
which can either be a JIT-compiled function or a Python function. It
|
|
1258
|
-
determines if a JIT address is available, and if so, calls the function
|
|
1259
|
-
using the JIT address. Otherwise, it calls the function using a function
|
|
1260
|
-
pointer obtained from the `__get_first_class_function_pointer` method.
|
|
1261
|
-
|
|
1262
|
-
Args:
|
|
1263
|
-
ftype: The type of the function.
|
|
1264
|
-
fname: The name of the function.
|
|
1265
|
-
sig: The signature of the function.
|
|
1266
|
-
argvals: The argument values to pass to the function.
|
|
1267
|
-
|
|
1268
|
-
Returns:
|
|
1269
|
-
The result of calling the function.
|
|
1270
|
-
"""
|
|
1271
|
-
context = self.context
|
|
1272
|
-
builder = self.builder
|
|
1273
|
-
# Determine if jit address is available
|
|
1274
|
-
fstruct = self.loadvar(fname)
|
|
1275
|
-
struct = cgutils.create_struct_proxy(self.typeof(fname))(
|
|
1276
|
-
context, builder, value=fstruct
|
|
1277
|
-
)
|
|
1278
|
-
jit_addr = struct.jit_addr
|
|
1279
|
-
jit_addr.name = f"jit_addr_of_{fname}"
|
|
1280
|
-
|
|
1281
|
-
ctx = context
|
|
1282
|
-
res_slot = cgutils.alloca_once(
|
|
1283
|
-
builder, ctx.get_value_type(sig.return_type)
|
|
1284
|
-
)
|
|
1285
|
-
|
|
1286
|
-
if_jit_addr_is_null = builder.if_else(
|
|
1287
|
-
cgutils.is_null(builder, jit_addr), likely=False
|
|
1288
|
-
)
|
|
1289
|
-
with if_jit_addr_is_null as (then, orelse):
|
|
1290
|
-
with then:
|
|
1291
|
-
func_ptr = self.__get_first_class_function_pointer(
|
|
1292
|
-
ftype, fname, sig
|
|
1293
|
-
)
|
|
1294
|
-
res = builder.call(func_ptr, argvals)
|
|
1295
|
-
builder.store(res, res_slot)
|
|
1296
|
-
|
|
1297
|
-
with orelse:
|
|
1298
|
-
llty = ctx.call_conv.get_function_type(
|
|
1299
|
-
sig.return_type, sig.args
|
|
1300
|
-
).as_pointer()
|
|
1301
|
-
func_ptr = builder.bitcast(jit_addr, llty)
|
|
1302
|
-
# call
|
|
1303
|
-
status, res = ctx.call_conv.call_function(
|
|
1304
|
-
builder, func_ptr, sig.return_type, sig.args, argvals
|
|
1305
|
-
)
|
|
1306
|
-
with cgutils.if_unlikely(builder, status.is_error):
|
|
1307
|
-
context.call_conv.return_status_propagate(builder, status)
|
|
1308
|
-
builder.store(res, res_slot)
|
|
1309
|
-
return builder.load(res_slot)
|
|
1310
|
-
|
|
1311
|
-
def __get_first_class_function_pointer(self, ftype, fname, sig):
|
|
1312
|
-
from numba.experimental.function_type import lower_get_wrapper_address
|
|
1313
|
-
|
|
1314
|
-
llty = self.context.get_value_type(ftype)
|
|
1315
|
-
fstruct = self.loadvar(fname)
|
|
1316
|
-
addr = self.builder.extract_value(
|
|
1317
|
-
fstruct, 0, name="addr_of_%s" % (fname)
|
|
1318
|
-
)
|
|
1319
|
-
|
|
1320
|
-
fptr = cgutils.alloca_once(
|
|
1321
|
-
self.builder, llty, name="fptr_of_%s" % (fname)
|
|
1322
|
-
)
|
|
1323
|
-
with self.builder.if_else(
|
|
1324
|
-
cgutils.is_null(self.builder, addr), likely=False
|
|
1325
|
-
) as (then, orelse):
|
|
1326
|
-
with then:
|
|
1327
|
-
self.init_pyapi()
|
|
1328
|
-
# Acquire the GIL
|
|
1329
|
-
gil_state = self.pyapi.gil_ensure()
|
|
1330
|
-
pyaddr = self.builder.extract_value(
|
|
1331
|
-
fstruct, 1, name="pyaddr_of_%s" % (fname)
|
|
1332
|
-
)
|
|
1333
|
-
# try to recover the function address, see
|
|
1334
|
-
# test_zero_address BadToGood example in
|
|
1335
|
-
# test_function_type.py
|
|
1336
|
-
addr1 = lower_get_wrapper_address(
|
|
1337
|
-
self.context,
|
|
1338
|
-
self.builder,
|
|
1339
|
-
pyaddr,
|
|
1340
|
-
sig,
|
|
1341
|
-
failure_mode="ignore",
|
|
1342
|
-
)
|
|
1343
|
-
with self.builder.if_then(
|
|
1344
|
-
cgutils.is_null(self.builder, addr1), likely=False
|
|
1345
|
-
):
|
|
1346
|
-
self.return_exception(
|
|
1347
|
-
RuntimeError,
|
|
1348
|
-
exc_args=(f"{ftype} function address is null",),
|
|
1349
|
-
loc=self.loc,
|
|
1350
|
-
)
|
|
1351
|
-
addr2 = self.pyapi.long_as_voidptr(addr1)
|
|
1352
|
-
self.builder.store(self.builder.bitcast(addr2, llty), fptr)
|
|
1353
|
-
self.pyapi.decref(addr1)
|
|
1354
|
-
self.pyapi.gil_release(gil_state)
|
|
1355
|
-
with orelse:
|
|
1356
|
-
self.builder.store(self.builder.bitcast(addr, llty), fptr)
|
|
1357
|
-
return self.builder.load(fptr)
|
|
1358
|
-
|
|
1359
1225
|
def _lower_call_normal(self, fnty, expr, signature):
|
|
1360
1226
|
# Normal function resolution
|
|
1361
1227
|
self.debug_print("# calling normal function: {0}".format(fnty))
|
|
@@ -1841,8 +1707,7 @@ class CUDALower(Lower):
|
|
|
1841
1707
|
int_type = (llvm_ir.IntType,)
|
|
1842
1708
|
real_type = llvm_ir.FloatType, llvm_ir.DoubleType
|
|
1843
1709
|
if isinstance(lltype, int_type + real_type):
|
|
1844
|
-
|
|
1845
|
-
src_name = name[:index] if index > 0 else name
|
|
1710
|
+
src_name = name.split(".")[0]
|
|
1846
1711
|
if src_name in self.poly_var_typ_map:
|
|
1847
1712
|
# Do not emit debug value on polymorphic type var
|
|
1848
1713
|
return
|
|
@@ -1869,6 +1734,9 @@ class CUDALower(Lower):
|
|
|
1869
1734
|
|
|
1870
1735
|
self.poly_var_typ_map = {}
|
|
1871
1736
|
self.poly_var_loc_map = {}
|
|
1737
|
+
self.poly_var_set = set()
|
|
1738
|
+
self.poly_cleaned = False
|
|
1739
|
+
self.lastblk = max(self.blocks.keys())
|
|
1872
1740
|
|
|
1873
1741
|
# When debug info is enabled, walk through function body and mark
|
|
1874
1742
|
# variables with polymorphic types.
|
|
@@ -1880,8 +1748,7 @@ class CUDALower(Lower):
|
|
|
1880
1748
|
if x.target.name.startswith("$"):
|
|
1881
1749
|
continue
|
|
1882
1750
|
ssa_name = x.target.name
|
|
1883
|
-
|
|
1884
|
-
src_name = ssa_name[:index] if index > 0 else ssa_name
|
|
1751
|
+
src_name = ssa_name.split(".")[0]
|
|
1885
1752
|
# Check all the multi-versioned targets
|
|
1886
1753
|
if len(x.target.versioned_names) > 0:
|
|
1887
1754
|
fetype = self.typeof(ssa_name)
|
|
@@ -1902,12 +1769,12 @@ class CUDALower(Lower):
|
|
|
1902
1769
|
"""
|
|
1903
1770
|
# If the name is not handled yet and a store is needed
|
|
1904
1771
|
if name not in self.varmap and self.store_var_needed(name):
|
|
1905
|
-
|
|
1906
|
-
src_name = name[:index] if index > 0 else name
|
|
1772
|
+
src_name = name.split(".")[0]
|
|
1907
1773
|
if src_name in self.poly_var_typ_map:
|
|
1908
|
-
|
|
1909
|
-
datamodel = self.context.data_model_manager[dtype]
|
|
1774
|
+
self.poly_var_set.add(name)
|
|
1910
1775
|
if src_name not in self.poly_var_loc_map:
|
|
1776
|
+
dtype = types.UnionType(self.poly_var_typ_map[src_name])
|
|
1777
|
+
datamodel = self.context.data_model_manager[dtype]
|
|
1911
1778
|
# UnionType has sorted set of types, max at last index
|
|
1912
1779
|
maxsizetype = dtype.types[-1]
|
|
1913
1780
|
# Create a single element aggregate type
|
|
@@ -1916,13 +1783,7 @@ class CUDALower(Lower):
|
|
|
1916
1783
|
ptr = self.alloca_lltype(src_name, lltype, datamodel)
|
|
1917
1784
|
# save the location of the union type for polymorphic var
|
|
1918
1785
|
self.poly_var_loc_map[src_name] = ptr
|
|
1919
|
-
|
|
1920
|
-
lltype = self.context.get_value_type(fetype)
|
|
1921
|
-
castptr = self.builder.bitcast(
|
|
1922
|
-
self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
|
|
1923
|
-
)
|
|
1924
|
-
# Remember the pointer
|
|
1925
|
-
self.varmap[name] = castptr
|
|
1786
|
+
return
|
|
1926
1787
|
|
|
1927
1788
|
super()._alloca_var(name, fetype)
|
|
1928
1789
|
|
|
@@ -1936,6 +1797,45 @@ class CUDALower(Lower):
|
|
|
1936
1797
|
or self._disable_sroa_like_opt
|
|
1937
1798
|
)
|
|
1938
1799
|
|
|
1800
|
+
def delvar(self, name):
|
|
1801
|
+
"""
|
|
1802
|
+
Delete the given variable.
|
|
1803
|
+
"""
|
|
1804
|
+
if name in self.poly_var_set:
|
|
1805
|
+
fetype = self.typeof(name)
|
|
1806
|
+
src_name = name.split(".")[0]
|
|
1807
|
+
ptr = self.poly_var_loc_map[src_name]
|
|
1808
|
+
self.decref(fetype, self.builder.load(ptr))
|
|
1809
|
+
if (
|
|
1810
|
+
self._cur_ir_block == self.blocks[self.lastblk]
|
|
1811
|
+
and not self.poly_cleaned
|
|
1812
|
+
):
|
|
1813
|
+
# Zero-fill the debug union for polymorphic only
|
|
1814
|
+
# at the last block
|
|
1815
|
+
for v in self.poly_var_loc_map.values():
|
|
1816
|
+
self.builder.store(
|
|
1817
|
+
llvm_ir.Constant(v.type.pointee, None), v
|
|
1818
|
+
)
|
|
1819
|
+
self.poly_cleaned = True
|
|
1820
|
+
return
|
|
1821
|
+
|
|
1822
|
+
super().delvar(name)
|
|
1823
|
+
|
|
1824
|
+
def getvar(self, name):
|
|
1825
|
+
"""
|
|
1826
|
+
Get a pointer to the given variable's slot.
|
|
1827
|
+
"""
|
|
1828
|
+
if name in self.poly_var_set:
|
|
1829
|
+
src_name = name.split(".")[0]
|
|
1830
|
+
fetype = self.typeof(name)
|
|
1831
|
+
lltype = self.context.get_value_type(fetype)
|
|
1832
|
+
castptr = self.builder.bitcast(
|
|
1833
|
+
self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
|
|
1834
|
+
)
|
|
1835
|
+
return castptr
|
|
1836
|
+
else:
|
|
1837
|
+
return super().getvar(name)
|
|
1838
|
+
|
|
1939
1839
|
|
|
1940
1840
|
def _lit_or_omitted(value):
|
|
1941
1841
|
"""Returns a Literal instance if the type of value is supported;
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import math
|
|
2
5
|
import operator
|
|
3
6
|
from llvmlite import ir
|
|
4
|
-
from numba.core import types, typing
|
|
7
|
+
from numba.core import types, typing
|
|
5
8
|
from numba.cuda import cgutils
|
|
6
9
|
from numba.core.imputils import Registry
|
|
7
10
|
from numba.types import float32, float64, int64, uint64
|
|
8
11
|
from numba.cuda import libdevice
|
|
12
|
+
from numba.cuda.core import targetconfig
|
|
9
13
|
|
|
10
14
|
registry = Registry()
|
|
11
15
|
lower = registry.lower
|
|
@@ -1,36 +1,30 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import ctypes
|
|
2
5
|
import os
|
|
3
6
|
from functools import wraps
|
|
4
7
|
import numpy as np
|
|
5
8
|
|
|
6
|
-
|
|
9
|
+
|
|
10
|
+
from numba import cuda, types
|
|
11
|
+
from numba.cuda import config
|
|
12
|
+
|
|
7
13
|
from numba.core.runtime.nrt import _nrt_mstats
|
|
8
14
|
from numba.cuda.cudadrv.driver import (
|
|
9
15
|
_Linker,
|
|
10
16
|
driver,
|
|
11
17
|
launch_kernel,
|
|
12
18
|
USE_NV_BINDING,
|
|
19
|
+
_have_nvjitlink,
|
|
13
20
|
)
|
|
14
21
|
from numba.cuda.cudadrv import devices
|
|
15
22
|
from numba.cuda.api import get_current_device
|
|
16
|
-
from numba.cuda.utils import
|
|
23
|
+
from numba.cuda.utils import cached_file_read
|
|
17
24
|
from numba.cuda.cudadrv.linkable_code import CUSource
|
|
25
|
+
from numba.cuda.typing.templates import signature
|
|
18
26
|
|
|
19
|
-
|
|
20
|
-
# Check environment variable or config for NRT statistics enablement
|
|
21
|
-
NRT_STATS = _readenv("NUMBA_CUDA_NRT_STATS", bool, False) or getattr(
|
|
22
|
-
config, "NUMBA_CUDA_NRT_STATS", False
|
|
23
|
-
)
|
|
24
|
-
if not hasattr(config, "NUMBA_CUDA_NRT_STATS"):
|
|
25
|
-
config.CUDA_NRT_STATS = NRT_STATS
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
# Check environment variable or config for NRT enablement
|
|
29
|
-
ENABLE_NRT = _readenv("NUMBA_CUDA_ENABLE_NRT", bool, False) or getattr(
|
|
30
|
-
config, "NUMBA_CUDA_ENABLE_NRT", False
|
|
31
|
-
)
|
|
32
|
-
if not hasattr(config, "NUMBA_CUDA_ENABLE_NRT"):
|
|
33
|
-
config.CUDA_ENABLE_NRT = ENABLE_NRT
|
|
27
|
+
from numba.core.extending import intrinsic, overload_classmethod
|
|
34
28
|
|
|
35
29
|
|
|
36
30
|
def get_include():
|
|
@@ -38,6 +32,34 @@ def get_include():
|
|
|
38
32
|
return os.path.dirname(os.path.abspath(__file__))
|
|
39
33
|
|
|
40
34
|
|
|
35
|
+
# Provide an implementation of Array._allocate() for the CUDA target (used
|
|
36
|
+
# internally by Numba when generating the allocation of an array)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@intrinsic
|
|
40
|
+
def intrin_alloc(typingctx, allocsize, align):
|
|
41
|
+
"""Intrinsic to call into the allocator for Array"""
|
|
42
|
+
|
|
43
|
+
def codegen(context, builder, signature, args):
|
|
44
|
+
allocsize, align = args
|
|
45
|
+
meminfo = context.nrt.meminfo_alloc_aligned(builder, allocsize, align)
|
|
46
|
+
return meminfo
|
|
47
|
+
|
|
48
|
+
mip = types.MemInfoPointer(types.voidptr) # return untyped pointer
|
|
49
|
+
sig = signature(mip, allocsize, align)
|
|
50
|
+
return sig, codegen
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@overload_classmethod(types.Array, "_allocate", target="CUDA")
|
|
54
|
+
def _ol_array_allocate(cls, allocsize, align):
|
|
55
|
+
"""Implements a Numba-only CUDA-target classmethod on the array type."""
|
|
56
|
+
|
|
57
|
+
def impl(cls, allocsize, align):
|
|
58
|
+
return intrin_alloc(allocsize, align)
|
|
59
|
+
|
|
60
|
+
return impl
|
|
61
|
+
|
|
62
|
+
|
|
41
63
|
# Protect method to ensure NRT memory allocation and initialization
|
|
42
64
|
def _alloc_init_guard(method):
|
|
43
65
|
"""
|
|
@@ -65,10 +87,18 @@ class _Runtime:
|
|
|
65
87
|
|
|
66
88
|
def __init__(self):
|
|
67
89
|
"""Initialize memsys module and variable"""
|
|
90
|
+
self._reset()
|
|
91
|
+
|
|
92
|
+
def _reset(self):
|
|
93
|
+
"""Reset to the uninitialized state"""
|
|
68
94
|
self._memsys_module = None
|
|
69
95
|
self._memsys = None
|
|
70
96
|
self._initialized = False
|
|
71
97
|
|
|
98
|
+
def close(self):
|
|
99
|
+
"""Close and reset"""
|
|
100
|
+
self._reset()
|
|
101
|
+
|
|
72
102
|
def _compile_memsys_module(self):
|
|
73
103
|
"""
|
|
74
104
|
Compile memsys.cu and create a module from it in the current context
|
|
@@ -80,7 +110,7 @@ class _Runtime:
|
|
|
80
110
|
cc = get_current_device().compute_capability
|
|
81
111
|
|
|
82
112
|
# Create a new linker instance and add the cu file
|
|
83
|
-
linker = _Linker.new(cc=cc)
|
|
113
|
+
linker = _Linker.new(cc=cc, lto=_have_nvjitlink())
|
|
84
114
|
linker.add_cu_file(memsys_mod)
|
|
85
115
|
|
|
86
116
|
# Complete the linker and create a module from it
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_lib_dirs():
|
|
10
|
+
"""
|
|
11
|
+
Anaconda specific
|
|
12
|
+
"""
|
|
13
|
+
if sys.platform == "win32":
|
|
14
|
+
# CUDA 12 puts in "bin" directory, whereas CUDA 13 puts in "bin\x64" directory
|
|
15
|
+
dirnames = [
|
|
16
|
+
os.path.join("Library", "bin"),
|
|
17
|
+
os.path.join("Library", "bin", "x64"),
|
|
18
|
+
os.path.join("Library", "nvvm", "bin"),
|
|
19
|
+
os.path.join("Library", "nvvm", "bin", "x64"),
|
|
20
|
+
]
|
|
21
|
+
else:
|
|
22
|
+
dirnames = [
|
|
23
|
+
"lib",
|
|
24
|
+
]
|
|
25
|
+
libdirs = [os.path.join(sys.prefix, x) for x in dirnames]
|
|
26
|
+
return libdirs
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
DLLNAMEMAP = {
|
|
30
|
+
"linux": r"lib%(name)s\.so\.%(ver)s$",
|
|
31
|
+
"linux2": r"lib%(name)s\.so\.%(ver)s$",
|
|
32
|
+
"linux-static": r"lib%(name)s\.a$",
|
|
33
|
+
"darwin": r"lib%(name)s\.%(ver)s\.dylib$",
|
|
34
|
+
"win32": r"%(name)s%(ver)s\.dll$",
|
|
35
|
+
"win32-static": r"%(name)s\.lib$",
|
|
36
|
+
"bsd": r"lib%(name)s\.so\.%(ver)s$",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
RE_VER = r"[0-9]*([_\.][0-9]+)*"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def find_lib(libname, libdir=None, platform=None, static=False):
|
|
43
|
+
platform = platform or sys.platform
|
|
44
|
+
platform = "bsd" if "bsd" in platform else platform
|
|
45
|
+
if static:
|
|
46
|
+
platform = f"{platform}-static"
|
|
47
|
+
if platform not in DLLNAMEMAP:
|
|
48
|
+
# Return empty list if platform name is undefined.
|
|
49
|
+
# Not all platforms define their static library paths.
|
|
50
|
+
return []
|
|
51
|
+
pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER}
|
|
52
|
+
regex = re.compile(pat)
|
|
53
|
+
return find_file(regex, libdir)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def find_file(pat, libdir=None):
|
|
57
|
+
if libdir is None:
|
|
58
|
+
libdirs = get_lib_dirs()
|
|
59
|
+
elif isinstance(libdir, str):
|
|
60
|
+
libdirs = [
|
|
61
|
+
libdir,
|
|
62
|
+
]
|
|
63
|
+
else:
|
|
64
|
+
libdirs = list(libdir)
|
|
65
|
+
files = []
|
|
66
|
+
for ldir in libdirs:
|
|
67
|
+
try:
|
|
68
|
+
entries = os.listdir(ldir)
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
continue
|
|
71
|
+
candidates = [
|
|
72
|
+
os.path.join(ldir, ent) for ent in entries if pat.match(ent)
|
|
73
|
+
]
|
|
74
|
+
files.extend([c for c in candidates if os.path.isfile(c)])
|
|
75
|
+
return files
|
numba_cuda/numba/cuda/models.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import functools
|
|
2
5
|
|
|
3
6
|
from llvmlite import ir
|
|
4
7
|
|
|
5
8
|
from numba.core.datamodel.registry import DataModelManager, register
|
|
9
|
+
from numba.core.datamodel import PrimitiveModel
|
|
6
10
|
from numba.core.extending import models
|
|
7
11
|
from numba.core import types
|
|
8
|
-
from numba.cuda.types import Dim3, GridGroup, CUDADispatcher
|
|
12
|
+
from numba.cuda.types import Dim3, GridGroup, CUDADispatcher, Bfloat16
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
cuda_data_manager = DataModelManager()
|
|
@@ -42,3 +46,10 @@ class FloatModel(models.PrimitiveModel):
|
|
|
42
46
|
|
|
43
47
|
|
|
44
48
|
register_model(CUDADispatcher)(models.OpaqueModel)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@register_model(Bfloat16)
|
|
52
|
+
class _model___nv_bfloat16(PrimitiveModel):
|
|
53
|
+
def __init__(self, dmm, fe_type):
|
|
54
|
+
be_type = ir.IntType(16)
|
|
55
|
+
super(_model___nv_bfloat16, self).__init__(dmm, fe_type, be_type)
|