numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +3 -0
- _numba_cuda_redirector.py +3 -0
- numba_cuda/VERSION +1 -1
- numba_cuda/__init__.py +2 -1
- numba_cuda/_version.py +2 -13
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
- numba_cuda/numba/cuda/api.py +9 -1
- numba_cuda/numba/cuda/api_util.py +3 -0
- numba_cuda/numba/cuda/args.py +3 -0
- numba_cuda/numba/cuda/bf16.py +288 -2
- numba_cuda/numba/cuda/cg.py +3 -0
- numba_cuda/numba/cuda/cgutils.py +5 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +4 -1
- numba_cuda/numba/cuda/compiler.py +376 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +5 -2
- numba_cuda/numba/cuda/core/callconv.py +3 -0
- numba_cuda/numba/cuda/core/codegen.py +3 -0
- numba_cuda/numba/cuda/core/compiler.py +9 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +52 -27
- numba_cuda/numba/cuda/core/ir_utils.py +17 -29
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/sigutils.py +3 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +741 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +425 -246
- numba_cuda/numba/cuda/cudadecl.py +4 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
- numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
- numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
- numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
- numba_cuda/numba/cuda/cudaimpl.py +8 -1
- numba_cuda/numba/cuda/cudamath.py +3 -0
- numba_cuda/numba/cuda/debuginfo.py +88 -2
- numba_cuda/numba/cuda/decorators.py +6 -3
- numba_cuda/numba/cuda/descriptor.py +6 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/deviceufunc.py +69 -2
- numba_cuda/numba/cuda/dispatcher.py +21 -39
- numba_cuda/numba/cuda/errors.py +10 -0
- numba_cuda/numba/cuda/extending.py +3 -0
- numba_cuda/numba/cuda/flags.py +143 -1
- numba_cuda/numba/cuda/fp16.py +3 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +4 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
- numba_cuda/numba/cuda/intrinsics.py +3 -0
- numba_cuda/numba/cuda/itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +3 -0
- numba_cuda/numba/cuda/kernels/transpose.py +3 -0
- numba_cuda/numba/cuda/libdevice.py +4 -0
- numba_cuda/numba/cuda/libdevicedecl.py +3 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
- numba_cuda/numba/cuda/locks.py +3 -0
- numba_cuda/numba/cuda/lowering.py +59 -159
- numba_cuda/numba/cuda/mathimpl.py +5 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +12 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +4 -1
- numba_cuda/numba/cuda/printimpl.py +15 -1
- numba_cuda/numba/cuda/random.py +4 -1
- numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
- numba_cuda/numba/cuda/serialize.py +4 -1
- numba_cuda/numba/cuda/simulator/__init__.py +4 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +4 -1
- numba_cuda/numba/cuda/simulator/bf16.py +3 -0
- numba_cuda/numba/cuda/simulator/compiler.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
- numba_cuda/numba/cuda/simulator/kernel.py +3 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
- numba_cuda/numba/cuda/simulator/reduction.py +3 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
- numba_cuda/numba/cuda/simulator_init.py +3 -0
- numba_cuda/numba/cuda/stubs.py +3 -0
- numba_cuda/numba/cuda/target.py +38 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +4 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +5 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
- numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
- numba_cuda/numba/cuda/tests/support.py +58 -15
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +59 -0
- numba_cuda/numba/cuda/typing/__init__.py +12 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +10 -14
- numba_cuda/numba/cuda/ufuncs.py +6 -3
- numba_cuda/numba/cuda/utils.py +9 -112
- numba_cuda/numba/cuda/vector_types.py +3 -0
- numba_cuda/numba/cuda/vectorizers.py +3 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
- numba_cuda-0.20.0.dist-info/RECORD +357 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
- numba_cuda-0.19.0.dist-info/RECORD +0 -301
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -1,27 +1,34 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
1
4
|
import sys
|
|
2
|
-
import re
|
|
3
5
|
import os
|
|
4
6
|
from collections import namedtuple
|
|
5
7
|
import platform
|
|
6
|
-
import
|
|
7
|
-
from
|
|
8
|
-
from numba.
|
|
9
|
-
from numba.
|
|
10
|
-
from numba import config
|
|
11
|
-
import ctypes
|
|
8
|
+
import importlib.metadata
|
|
9
|
+
from numba.cuda.core.config import IS_WIN32
|
|
10
|
+
from numba.cuda.misc.findlib import find_lib
|
|
11
|
+
from numba.cuda import config
|
|
12
12
|
|
|
13
13
|
_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"])
|
|
14
14
|
|
|
15
15
|
SEARCH_PRIORITY = [
|
|
16
16
|
"Conda environment",
|
|
17
|
-
"Conda environment (NVIDIA package)",
|
|
18
17
|
"NVIDIA NVCC Wheel",
|
|
19
18
|
"CUDA_HOME",
|
|
20
19
|
"System",
|
|
21
|
-
"Debian package",
|
|
22
20
|
]
|
|
23
21
|
|
|
24
22
|
|
|
23
|
+
def _get_distribution(distribution_name):
|
|
24
|
+
"""Get the distribution path using importlib.metadata, returning None if not found."""
|
|
25
|
+
try:
|
|
26
|
+
dist = importlib.metadata.distribution(distribution_name)
|
|
27
|
+
return dist
|
|
28
|
+
except importlib.metadata.PackageNotFoundError:
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
25
32
|
def _priority_index(label):
|
|
26
33
|
if label in SEARCH_PRIORITY:
|
|
27
34
|
return SEARCH_PRIORITY.index(label)
|
|
@@ -61,182 +68,183 @@ def _find_valid_path(options):
|
|
|
61
68
|
def _get_libdevice_path_decision():
|
|
62
69
|
options = _build_options(
|
|
63
70
|
[
|
|
64
|
-
("Conda environment",
|
|
65
|
-
("
|
|
66
|
-
(
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
71
|
+
("Conda environment", get_libdevice_conda_path),
|
|
72
|
+
("NVIDIA NVCC Wheel", get_libdevice_wheel_path),
|
|
73
|
+
(
|
|
74
|
+
"CUDA_HOME",
|
|
75
|
+
lambda: get_cuda_home("nvvm", "libdevice", "libdevice.10.bc"),
|
|
76
|
+
),
|
|
77
|
+
(
|
|
78
|
+
"System",
|
|
79
|
+
lambda: get_system_ctk("nvvm", "libdevice", "libdevice.10.bc"),
|
|
80
|
+
),
|
|
70
81
|
]
|
|
71
82
|
)
|
|
72
83
|
return _find_first_valid_lazy(options)
|
|
73
84
|
|
|
74
85
|
|
|
75
|
-
def _nvvm_lib_dir():
|
|
76
|
-
if IS_WIN32:
|
|
77
|
-
return "nvvm", "bin"
|
|
78
|
-
else:
|
|
79
|
-
return "nvvm", "lib64"
|
|
80
|
-
|
|
81
|
-
|
|
82
86
|
def _get_nvvm_path_decision():
|
|
83
|
-
options =
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
87
|
+
options = _build_options(
|
|
88
|
+
[
|
|
89
|
+
("Conda environment", _get_nvvm_conda_path),
|
|
90
|
+
("NVIDIA NVCC Wheel", _get_nvvm_wheel_path),
|
|
91
|
+
("CUDA_HOME", _get_nvvm_cuda_home_path),
|
|
92
|
+
("System", _get_nvvm_system_path),
|
|
93
|
+
]
|
|
94
|
+
)
|
|
90
95
|
return _find_first_valid_lazy(options)
|
|
91
96
|
|
|
92
97
|
|
|
93
|
-
def _get_nvrtc_system_ctk():
|
|
94
|
-
sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64")
|
|
95
|
-
candidates = find_lib("nvrtc", sys_path)
|
|
96
|
-
if candidates:
|
|
97
|
-
return max(candidates)
|
|
98
|
-
|
|
99
|
-
|
|
100
98
|
def _get_nvrtc_path_decision():
|
|
101
99
|
options = _build_options(
|
|
102
100
|
[
|
|
103
|
-
("
|
|
104
|
-
("
|
|
105
|
-
("
|
|
106
|
-
("
|
|
107
|
-
("System", _get_nvrtc_system_ctk),
|
|
101
|
+
("Conda environment", get_conda_ctk_libdir),
|
|
102
|
+
("NVIDIA NVCC Wheel", _get_nvrtc_wheel_libdir),
|
|
103
|
+
("CUDA_HOME", get_cuda_home_libdir),
|
|
104
|
+
("System", get_system_ctk_libdir),
|
|
108
105
|
]
|
|
109
106
|
)
|
|
110
107
|
return _find_first_valid_lazy(options)
|
|
111
108
|
|
|
112
109
|
|
|
113
|
-
def
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
110
|
+
def _get_nvvm_wheel_path():
|
|
111
|
+
dso_path = None
|
|
112
|
+
# CUDA 12
|
|
113
|
+
nvcc_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
|
|
114
|
+
if nvcc_distribution is not None:
|
|
115
|
+
site_packages_path = nvcc_distribution.locate_file("")
|
|
116
|
+
nvvm_lib_dir = os.path.join(
|
|
117
|
+
site_packages_path,
|
|
118
|
+
"nvidia",
|
|
119
|
+
"cuda_nvcc",
|
|
120
|
+
"nvvm",
|
|
121
|
+
"bin" if IS_WIN32 else "lib64",
|
|
122
|
+
)
|
|
123
|
+
dso_path = os.path.join(
|
|
124
|
+
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so"
|
|
125
|
+
)
|
|
126
126
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
127
|
+
# CUDA 13
|
|
128
|
+
if dso_path is None:
|
|
129
|
+
nvcc_distribution = _get_distribution("nvidia-nvvm")
|
|
130
|
+
if (
|
|
131
|
+
nvcc_distribution is not None
|
|
132
|
+
and nvcc_distribution.version.startswith("13.")
|
|
133
|
+
):
|
|
134
|
+
site_packages_path = nvcc_distribution.locate_file("")
|
|
135
|
+
nvvm_lib_dir = os.path.join(
|
|
136
|
+
site_packages_path,
|
|
137
|
+
"nvidia",
|
|
138
|
+
"cu13",
|
|
139
|
+
"bin" if IS_WIN32 else "lib",
|
|
140
|
+
"x86_64" if IS_WIN32 else "",
|
|
141
|
+
)
|
|
142
|
+
dso_path = os.path.join(
|
|
143
|
+
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
|
|
144
|
+
)
|
|
131
145
|
|
|
146
|
+
if dso_path and os.path.isfile(dso_path):
|
|
147
|
+
return dso_path
|
|
132
148
|
return None
|
|
133
149
|
|
|
134
150
|
|
|
135
|
-
def
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
151
|
+
def _get_nvrtc_wheel_libdir():
|
|
152
|
+
dso_path = None
|
|
153
|
+
# CUDA 12
|
|
154
|
+
nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc-cu12")
|
|
155
|
+
if nvrtc_distribution is not None:
|
|
156
|
+
site_packages_path = nvrtc_distribution.locate_file("")
|
|
157
|
+
nvrtc_lib_dir = os.path.join(
|
|
158
|
+
site_packages_path,
|
|
141
159
|
"nvidia",
|
|
142
160
|
"cuda_nvrtc",
|
|
143
|
-
|
|
161
|
+
"bin" if IS_WIN32 else "lib",
|
|
162
|
+
)
|
|
163
|
+
dso_path = os.path.join(
|
|
164
|
+
nvrtc_lib_dir, "nvrtc64_120_0.dll" if IS_WIN32 else "libnvrtc.so.12"
|
|
144
165
|
)
|
|
145
|
-
if lib_dir and os.path.exists(lib_dir):
|
|
146
|
-
chosen_path = None
|
|
147
|
-
|
|
148
|
-
# Check for each version of the NVRTC DLL, preferring the most
|
|
149
|
-
# recent.
|
|
150
|
-
versions = (
|
|
151
|
-
"120" if IS_WIN32 else "12",
|
|
152
|
-
"130" if IS_WIN32 else "13",
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
for version in versions:
|
|
156
|
-
dso_path = os.path.join(
|
|
157
|
-
lib_dir,
|
|
158
|
-
f"nvrtc64_{version}_0.dll"
|
|
159
|
-
if IS_WIN32
|
|
160
|
-
else f"libnvrtc.so.{version}",
|
|
161
|
-
)
|
|
162
166
|
|
|
163
|
-
|
|
164
|
-
|
|
167
|
+
# CUDA 13
|
|
168
|
+
if dso_path is None:
|
|
169
|
+
nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc")
|
|
170
|
+
if (
|
|
171
|
+
nvrtc_distribution is not None
|
|
172
|
+
and nvrtc_distribution.version.startswith("13.")
|
|
173
|
+
):
|
|
174
|
+
site_packages_path = nvrtc_distribution.locate_file("")
|
|
175
|
+
nvrtc_lib_dir = os.path.join(
|
|
176
|
+
site_packages_path,
|
|
177
|
+
"nvidia",
|
|
178
|
+
"cu13",
|
|
179
|
+
"bin" if IS_WIN32 else "lib",
|
|
180
|
+
"x86_64" if IS_WIN32 else "",
|
|
181
|
+
)
|
|
182
|
+
dso_path = os.path.join(
|
|
183
|
+
nvrtc_lib_dir,
|
|
184
|
+
"nvrtc64_130_0.dll" if IS_WIN32 else "libnvrtc.so.13",
|
|
185
|
+
)
|
|
165
186
|
|
|
166
|
-
|
|
187
|
+
if dso_path and os.path.isfile(dso_path):
|
|
188
|
+
return os.path.dirname(dso_path)
|
|
189
|
+
return None
|
|
167
190
|
|
|
168
191
|
|
|
169
|
-
def
|
|
170
|
-
|
|
171
|
-
if
|
|
172
|
-
try:
|
|
173
|
-
result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL)
|
|
174
|
-
except OSError:
|
|
175
|
-
pass
|
|
176
|
-
else:
|
|
177
|
-
if IS_WIN32:
|
|
178
|
-
import win32api
|
|
179
|
-
|
|
180
|
-
# This absolute path will
|
|
181
|
-
# always be correct regardless of the package source
|
|
182
|
-
nvrtc_path = win32api.GetModuleFileNameW(result._handle)
|
|
183
|
-
dso_dir = os.path.dirname(nvrtc_path)
|
|
184
|
-
builtins_path = os.path.join(
|
|
185
|
-
dso_dir,
|
|
186
|
-
[
|
|
187
|
-
f
|
|
188
|
-
for f in os.listdir(dso_dir)
|
|
189
|
-
if re.match("^nvrtc-builtins.*.dll$", f)
|
|
190
|
-
][0],
|
|
191
|
-
)
|
|
192
|
-
if not os.path.exists(builtins_path):
|
|
193
|
-
raise RuntimeError(
|
|
194
|
-
f'Path does not exist: "{builtins_path}"'
|
|
195
|
-
)
|
|
196
|
-
return Path(dso_path)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def _get_libdevice_paths():
|
|
200
|
-
by, libdir = _get_libdevice_path_decision()
|
|
201
|
-
if not libdir:
|
|
192
|
+
def _get_libdevice_path():
|
|
193
|
+
by, out = _get_libdevice_path_decision()
|
|
194
|
+
if not out:
|
|
202
195
|
return _env_path_tuple(by, None)
|
|
203
|
-
out = os.path.join(libdir, "libdevice.10.bc")
|
|
204
196
|
return _env_path_tuple(by, out)
|
|
205
197
|
|
|
206
198
|
|
|
207
|
-
def
|
|
199
|
+
def _cuda_static_libdir():
|
|
208
200
|
if IS_WIN32:
|
|
209
|
-
return "
|
|
201
|
+
return ("lib", "x64")
|
|
210
202
|
else:
|
|
211
|
-
return "lib64"
|
|
203
|
+
return ("lib64",)
|
|
212
204
|
|
|
213
205
|
|
|
214
|
-
def
|
|
215
|
-
|
|
216
|
-
|
|
206
|
+
def _get_cudalib_wheel_libdir():
|
|
207
|
+
"""Get the cudalib path from the cudart wheel."""
|
|
208
|
+
cuda_module_lib_dir = None
|
|
209
|
+
cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
|
|
210
|
+
if cuda_runtime_distribution is not None:
|
|
211
|
+
site_packages_path = cuda_runtime_distribution.locate_file("")
|
|
212
|
+
cuda_module_lib_dir = os.path.join(
|
|
213
|
+
site_packages_path,
|
|
214
|
+
"nvidia",
|
|
215
|
+
"cuda_runtime",
|
|
216
|
+
"bin" if IS_WIN32 else "lib",
|
|
217
|
+
)
|
|
217
218
|
else:
|
|
218
|
-
|
|
219
|
+
cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
|
|
220
|
+
if (
|
|
221
|
+
cuda_runtime_distribution is not None
|
|
222
|
+
and cuda_runtime_distribution.version.startswith("13.")
|
|
223
|
+
):
|
|
224
|
+
site_packages_path = cuda_runtime_distribution.locate_file("")
|
|
225
|
+
cuda_module_lib_dir = os.path.join(
|
|
226
|
+
site_packages_path,
|
|
227
|
+
"nvidia",
|
|
228
|
+
"cu13",
|
|
229
|
+
"bin" if IS_WIN32 else "lib",
|
|
230
|
+
"x86_64" if IS_WIN32 else "",
|
|
231
|
+
)
|
|
219
232
|
|
|
233
|
+
if cuda_module_lib_dir is None:
|
|
234
|
+
return None
|
|
220
235
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
site_paths = [site.getusersitepackages()] + site.getsitepackages()
|
|
224
|
-
libdir = "bin" if IS_WIN32 else "lib"
|
|
225
|
-
for sp in filter(None, site_paths):
|
|
226
|
-
cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir)
|
|
227
|
-
if cudalib_path.exists():
|
|
228
|
-
return str(cudalib_path)
|
|
236
|
+
if cuda_module_lib_dir and os.path.isdir(cuda_module_lib_dir):
|
|
237
|
+
return cuda_module_lib_dir
|
|
229
238
|
return None
|
|
230
239
|
|
|
231
240
|
|
|
232
241
|
def _get_cudalib_dir_path_decision():
|
|
233
242
|
options = _build_options(
|
|
234
243
|
[
|
|
235
|
-
("Conda environment",
|
|
236
|
-
("
|
|
237
|
-
("
|
|
238
|
-
("
|
|
239
|
-
("System", lambda: get_system_ctk(_cudalib_path())),
|
|
244
|
+
("Conda environment", get_conda_ctk_libdir),
|
|
245
|
+
("NVIDIA NVCC Wheel", _get_cudalib_wheel_libdir),
|
|
246
|
+
("CUDA_HOME", get_cuda_home_libdir),
|
|
247
|
+
("System", get_system_ctk_libdir),
|
|
240
248
|
]
|
|
241
249
|
)
|
|
242
250
|
return _find_first_valid_lazy(options)
|
|
@@ -245,16 +253,13 @@ def _get_cudalib_dir_path_decision():
|
|
|
245
253
|
def _get_static_cudalib_dir_path_decision():
|
|
246
254
|
options = _build_options(
|
|
247
255
|
[
|
|
248
|
-
("Conda environment",
|
|
249
|
-
(
|
|
250
|
-
"Conda environment (NVIDIA package)",
|
|
251
|
-
get_nvidia_static_cudalib_ctk,
|
|
252
|
-
),
|
|
256
|
+
("Conda environment", get_conda_ctk_libdir),
|
|
257
|
+
("NVIDIA NVCC Wheel", get_wheel_static_libdir),
|
|
253
258
|
(
|
|
254
259
|
"CUDA_HOME",
|
|
255
|
-
lambda: get_cuda_home(*
|
|
260
|
+
lambda: get_cuda_home(*_cuda_static_libdir()),
|
|
256
261
|
),
|
|
257
|
-
("System", lambda: get_system_ctk(
|
|
262
|
+
("System", lambda: get_system_ctk(*_cuda_static_libdir())),
|
|
258
263
|
]
|
|
259
264
|
)
|
|
260
265
|
return _find_first_valid_lazy(options)
|
|
@@ -279,74 +284,196 @@ def get_system_ctk(*subdirs):
|
|
|
279
284
|
result = os.path.join("/usr/local/cuda", *subdirs)
|
|
280
285
|
if os.path.exists(result):
|
|
281
286
|
return result
|
|
287
|
+
return None
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def get_system_ctk_libdir():
|
|
292
|
+
"""Return path to directory containing the shared libraries of cudatoolkit."""
|
|
293
|
+
system_ctk_dir = get_system_ctk()
|
|
294
|
+
if system_ctk_dir is None:
|
|
295
|
+
return None
|
|
296
|
+
libdir = os.path.join(
|
|
297
|
+
system_ctk_dir,
|
|
298
|
+
"Library" if IS_WIN32 else "lib64",
|
|
299
|
+
"bin" if IS_WIN32 else "",
|
|
300
|
+
)
|
|
301
|
+
# Windows CUDA 13 system CTK uses "bin\x64" directory
|
|
302
|
+
if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
|
|
303
|
+
libdir = os.path.join(libdir, "x64")
|
|
304
|
+
|
|
305
|
+
if libdir and os.path.isdir(libdir):
|
|
306
|
+
return os.path.normpath(libdir)
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def get_system_ctk_include():
|
|
311
|
+
system_ctk_dir = get_system_ctk()
|
|
312
|
+
if system_ctk_dir is None:
|
|
313
|
+
return None
|
|
314
|
+
include_dir = os.path.join(system_ctk_dir, "include")
|
|
315
|
+
|
|
316
|
+
if include_dir and os.path.isdir(include_dir):
|
|
317
|
+
if os.path.isfile(
|
|
318
|
+
os.path.join(include_dir, "cuda_device_runtime_api.h")
|
|
319
|
+
):
|
|
320
|
+
return include_dir
|
|
321
|
+
return None
|
|
282
322
|
|
|
283
323
|
|
|
284
|
-
def
|
|
324
|
+
def _get_nvvm_system_path():
|
|
325
|
+
nvvm_lib_dir = get_system_ctk("nvvm")
|
|
326
|
+
if nvvm_lib_dir is None:
|
|
327
|
+
return None
|
|
328
|
+
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
|
|
329
|
+
if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
|
|
330
|
+
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")
|
|
331
|
+
|
|
332
|
+
nvvm_path = os.path.join(
|
|
333
|
+
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
|
|
334
|
+
)
|
|
335
|
+
# if os.path.isfile(nvvm_path):
|
|
336
|
+
# return nvvm_path
|
|
337
|
+
return nvvm_path
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def get_conda_ctk_libdir():
|
|
285
341
|
"""Return path to directory containing the shared libraries of cudatoolkit."""
|
|
286
|
-
is_conda_env = os.path.
|
|
342
|
+
is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
|
|
287
343
|
if not is_conda_env:
|
|
288
|
-
return
|
|
289
|
-
|
|
290
|
-
|
|
344
|
+
return None
|
|
345
|
+
libdir = os.path.join(
|
|
346
|
+
sys.prefix,
|
|
347
|
+
"Library" if IS_WIN32 else "lib",
|
|
348
|
+
"bin" if IS_WIN32 else "",
|
|
349
|
+
)
|
|
350
|
+
# Windows CUDA 13.0.0 uses "bin\x64" directory but 13.0.1+ just uses "bin" directory
|
|
351
|
+
if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
|
|
352
|
+
libdir = os.path.join(libdir, "x64")
|
|
353
|
+
# Assume the existence of nvrtc to imply needed CTK libraries are installed
|
|
354
|
+
paths = find_lib("nvrtc", libdir)
|
|
291
355
|
if not paths:
|
|
292
|
-
return
|
|
356
|
+
return None
|
|
293
357
|
# Use the directory name of the max path
|
|
294
358
|
return os.path.dirname(max(paths))
|
|
295
359
|
|
|
296
360
|
|
|
297
|
-
def
|
|
298
|
-
"""Return path to directory containing the
|
|
299
|
-
is_conda_env = os.path.
|
|
361
|
+
def get_libdevice_conda_path():
|
|
362
|
+
"""Return path to directory containing the libdevice bitcode library."""
|
|
363
|
+
is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
|
|
300
364
|
if not is_conda_env:
|
|
301
|
-
return
|
|
365
|
+
return None
|
|
302
366
|
|
|
303
|
-
#
|
|
304
|
-
#
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
367
|
+
# Linux: nvvm/libdevice/libdevice.10.bc
|
|
368
|
+
# Windows: Library/nvvm/libdevice/libdevice.10.bc
|
|
369
|
+
libdevice_path = os.path.join(
|
|
370
|
+
sys.prefix,
|
|
371
|
+
"Library" if IS_WIN32 else "",
|
|
372
|
+
"nvvm",
|
|
373
|
+
"libdevice",
|
|
374
|
+
"libdevice.10.bc",
|
|
375
|
+
)
|
|
376
|
+
if os.path.isfile(libdevice_path):
|
|
377
|
+
return libdevice_path
|
|
378
|
+
return None
|
|
311
379
|
|
|
312
|
-
if not os.path.exists(libdir) or not os.path.isdir(libdir):
|
|
313
|
-
# If the path doesn't exist, we didn't find the NVIDIA conda package
|
|
314
|
-
return
|
|
315
380
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
381
|
+
def _get_nvvm_conda_path():
|
|
382
|
+
"""Return path to directory containing the nvvm library."""
|
|
383
|
+
is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
|
|
384
|
+
if not is_conda_env:
|
|
385
|
+
return None
|
|
386
|
+
nvvm_dir = os.path.join(
|
|
387
|
+
sys.prefix,
|
|
388
|
+
"Library" if IS_WIN32 else "",
|
|
389
|
+
"nvvm",
|
|
390
|
+
"bin" if IS_WIN32 else "lib64",
|
|
391
|
+
)
|
|
392
|
+
# Windows CUDA 13.0.0 puts in "bin\x64" directory but 13.0.1+ just uses "bin" directory
|
|
393
|
+
if IS_WIN32 and os.path.isdir(os.path.join(nvvm_dir, "x64")):
|
|
394
|
+
nvvm_dir = os.path.join(nvvm_dir, "x64")
|
|
321
395
|
|
|
396
|
+
nvvm_path = os.path.join(
|
|
397
|
+
nvvm_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
|
|
398
|
+
)
|
|
399
|
+
if os.path.isfile(nvvm_path):
|
|
400
|
+
return nvvm_path
|
|
401
|
+
return None
|
|
322
402
|
|
|
323
|
-
def get_nvidia_libdevice_ctk():
|
|
324
|
-
"""Return path to directory containing the libdevice library."""
|
|
325
|
-
nvvm_ctk = get_nvidia_nvvm_ctk()
|
|
326
|
-
if not nvvm_ctk:
|
|
327
|
-
return
|
|
328
|
-
nvvm_dir = os.path.dirname(nvvm_ctk)
|
|
329
|
-
return os.path.join(nvvm_dir, "libdevice")
|
|
330
403
|
|
|
404
|
+
def get_wheel_static_libdir():
|
|
405
|
+
cuda_module_static_lib_dir = None
|
|
406
|
+
# CUDA 12
|
|
407
|
+
cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
|
|
408
|
+
if cuda_runtime_distribution is not None:
|
|
409
|
+
site_packages_path = cuda_runtime_distribution.locate_file("")
|
|
410
|
+
cuda_module_static_lib_dir = os.path.join(
|
|
411
|
+
site_packages_path,
|
|
412
|
+
"nvidia",
|
|
413
|
+
"cuda_runtime",
|
|
414
|
+
"lib",
|
|
415
|
+
"x64" if IS_WIN32 else "",
|
|
416
|
+
)
|
|
417
|
+
else:
|
|
418
|
+
cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
|
|
419
|
+
if (
|
|
420
|
+
cuda_runtime_distribution is not None
|
|
421
|
+
and cuda_runtime_distribution.version.startswith("13.")
|
|
422
|
+
):
|
|
423
|
+
site_packages_path = cuda_runtime_distribution.locate_file("")
|
|
424
|
+
cuda_module_static_lib_dir = os.path.join(
|
|
425
|
+
site_packages_path,
|
|
426
|
+
"nvidia",
|
|
427
|
+
"cu13",
|
|
428
|
+
"lib",
|
|
429
|
+
"x64" if IS_WIN32 else "",
|
|
430
|
+
)
|
|
331
431
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
nvvm_ctk = get_nvidia_nvvm_ctk()
|
|
335
|
-
if not nvvm_ctk:
|
|
336
|
-
return
|
|
337
|
-
env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
|
|
338
|
-
subdir = "bin" if IS_WIN32 else "lib"
|
|
339
|
-
return os.path.join(env_dir, subdir)
|
|
432
|
+
if cuda_module_static_lib_dir is None:
|
|
433
|
+
return None
|
|
340
434
|
|
|
435
|
+
cudadevrt_path = os.path.join(
|
|
436
|
+
cuda_module_static_lib_dir,
|
|
437
|
+
"cudadevrt.lib" if IS_WIN32 else "libcudadevrt.a",
|
|
438
|
+
)
|
|
341
439
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
if not nvvm_ctk:
|
|
346
|
-
return
|
|
440
|
+
if cudadevrt_path and os.path.isfile(cudadevrt_path):
|
|
441
|
+
return os.path.dirname(cudadevrt_path)
|
|
442
|
+
return None
|
|
347
443
|
|
|
348
|
-
|
|
349
|
-
|
|
444
|
+
|
|
445
|
+
def get_wheel_include():
|
|
446
|
+
cuda_module_include_dir = None
|
|
447
|
+
# CUDA 12
|
|
448
|
+
cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
|
|
449
|
+
if cuda_runtime_distribution is not None:
|
|
450
|
+
site_packages_path = cuda_runtime_distribution.locate_file("")
|
|
451
|
+
cuda_module_include_dir = os.path.join(
|
|
452
|
+
site_packages_path,
|
|
453
|
+
"nvidia",
|
|
454
|
+
"cuda_runtime",
|
|
455
|
+
"include",
|
|
456
|
+
)
|
|
457
|
+
else:
|
|
458
|
+
cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
|
|
459
|
+
if (
|
|
460
|
+
cuda_runtime_distribution is not None
|
|
461
|
+
and cuda_runtime_distribution.version.startswith("13.")
|
|
462
|
+
):
|
|
463
|
+
site_packages_path = cuda_runtime_distribution.locate_file("")
|
|
464
|
+
cuda_module_include_dir = os.path.join(
|
|
465
|
+
site_packages_path,
|
|
466
|
+
"nvidia",
|
|
467
|
+
"cu13",
|
|
468
|
+
"include",
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
if cuda_module_include_dir and os.path.isdir(cuda_module_include_dir):
|
|
472
|
+
if os.path.isfile(
|
|
473
|
+
os.path.join(cuda_module_include_dir, "cuda_device_runtime_api.h")
|
|
474
|
+
):
|
|
475
|
+
return cuda_module_include_dir
|
|
476
|
+
return None
|
|
350
477
|
|
|
351
478
|
|
|
352
479
|
def get_cuda_home(*subdirs):
|
|
@@ -360,39 +487,74 @@ def get_cuda_home(*subdirs):
|
|
|
360
487
|
cuda_home = os.environ.get("CUDA_PATH")
|
|
361
488
|
if cuda_home is not None:
|
|
362
489
|
return os.path.join(cuda_home, *subdirs)
|
|
490
|
+
return None
|
|
363
491
|
|
|
364
492
|
|
|
365
|
-
def
|
|
366
|
-
|
|
493
|
+
def get_cuda_home_libdir():
|
|
494
|
+
"""Return path to directory containing the shared libraries of cudatoolkit."""
|
|
495
|
+
cuda_home_dir = get_cuda_home()
|
|
496
|
+
if cuda_home_dir is None:
|
|
497
|
+
return None
|
|
498
|
+
libdir = os.path.join(
|
|
499
|
+
cuda_home_dir,
|
|
500
|
+
"Library" if IS_WIN32 else "lib64",
|
|
501
|
+
"bin" if IS_WIN32 else "",
|
|
502
|
+
)
|
|
503
|
+
# Windows CUDA 13 system CTK uses "bin\x64" directory while conda just uses "bin" directory
|
|
504
|
+
if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
|
|
505
|
+
libdir = os.path.join(libdir, "x64")
|
|
506
|
+
return os.path.normpath(libdir)
|
|
367
507
|
|
|
368
|
-
if by == "NVIDIA NVCC Wheel":
|
|
369
|
-
platform_map = {
|
|
370
|
-
"linux": "libnvvm.so",
|
|
371
|
-
"win32": "nvvm64_40_0.dll",
|
|
372
|
-
}
|
|
373
508
|
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
509
|
+
def get_cuda_home_include():
|
|
510
|
+
cuda_home_dir = get_cuda_home()
|
|
511
|
+
if cuda_home_dir is None:
|
|
512
|
+
return None
|
|
513
|
+
include_dir = cuda_home_dir
|
|
514
|
+
# For Windows, CTK puts it in $CTK/include but conda puts it in $CTK/Library/include
|
|
515
|
+
if IS_WIN32:
|
|
516
|
+
if os.path.isdir(os.path.join(include_dir, "Library")):
|
|
517
|
+
include_dir = os.path.join(include_dir, "Library", "include")
|
|
377
518
|
else:
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
path = os.path.join(path, dso_name)
|
|
519
|
+
include_dir = os.path.join(include_dir, "include")
|
|
381
520
|
else:
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
521
|
+
include_dir = os.path.join(include_dir, "include")
|
|
522
|
+
|
|
523
|
+
if include_dir and os.path.isdir(include_dir):
|
|
524
|
+
if os.path.isfile(
|
|
525
|
+
os.path.join(include_dir, "cuda_device_runtime_api.h")
|
|
526
|
+
):
|
|
527
|
+
return include_dir
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _get_nvvm_cuda_home_path():
|
|
532
|
+
nvvm_lib_dir = get_cuda_home("nvvm")
|
|
533
|
+
if nvvm_lib_dir is None:
|
|
534
|
+
return
|
|
535
|
+
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
|
|
536
|
+
if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
|
|
537
|
+
nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")
|
|
538
|
+
|
|
539
|
+
nvvm_path = os.path.join(
|
|
540
|
+
nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
|
|
541
|
+
)
|
|
542
|
+
# if os.path.isfile(nvvm_path):
|
|
543
|
+
# return nvvm_path
|
|
544
|
+
return nvvm_path
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def _get_nvvm_path():
|
|
548
|
+
by, out = _get_nvvm_path_decision()
|
|
549
|
+
if not out:
|
|
550
|
+
return _env_path_tuple(by, None)
|
|
551
|
+
return _env_path_tuple(by, out)
|
|
385
552
|
|
|
386
553
|
|
|
387
554
|
def _get_nvrtc_path():
|
|
388
555
|
by, path = _get_nvrtc_path_decision()
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
elif by == "System":
|
|
392
|
-
return _env_path_tuple(by, path)
|
|
393
|
-
else:
|
|
394
|
-
candidates = find_lib("nvrtc", path)
|
|
395
|
-
path = max(candidates) if candidates else None
|
|
556
|
+
candidates = find_lib("nvrtc", libdir=path)
|
|
557
|
+
path = max(candidates) if candidates else None
|
|
396
558
|
return _env_path_tuple(by, path)
|
|
397
559
|
|
|
398
560
|
|
|
@@ -402,8 +564,11 @@ def get_cuda_paths():
|
|
|
402
564
|
|
|
403
565
|
The returned dictionary will have the following keys and infos:
|
|
404
566
|
- "nvvm": file_path
|
|
405
|
-
- "
|
|
567
|
+
- "nvrtc": file_path
|
|
568
|
+
- "libdevice": file_path
|
|
406
569
|
- "cudalib_dir": directory_path
|
|
570
|
+
- "static_cudalib_dir": directory_path
|
|
571
|
+
- "include_dir": directory_path
|
|
407
572
|
|
|
408
573
|
Note: The result of the function is cached.
|
|
409
574
|
"""
|
|
@@ -415,7 +580,7 @@ def get_cuda_paths():
|
|
|
415
580
|
d = {
|
|
416
581
|
"nvvm": _get_nvvm_path(),
|
|
417
582
|
"nvrtc": _get_nvrtc_path(),
|
|
418
|
-
"libdevice":
|
|
583
|
+
"libdevice": _get_libdevice_path(),
|
|
419
584
|
"cudalib_dir": _get_cudalib_dir(),
|
|
420
585
|
"static_cudalib_dir": _get_static_cudalib_dir(),
|
|
421
586
|
"include_dir": _get_include_dir(),
|
|
@@ -425,25 +590,41 @@ def get_cuda_paths():
|
|
|
425
590
|
return d
|
|
426
591
|
|
|
427
592
|
|
|
428
|
-
def
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
593
|
+
def get_libdevice_wheel_path():
|
|
594
|
+
libdevice_path = None
|
|
595
|
+
# CUDA 12
|
|
596
|
+
nvvm_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
|
|
597
|
+
if nvvm_distribution is not None:
|
|
598
|
+
site_packages_path = nvvm_distribution.locate_file("")
|
|
599
|
+
libdevice_path = os.path.join(
|
|
600
|
+
site_packages_path,
|
|
601
|
+
"nvidia",
|
|
602
|
+
"cuda_nvcc",
|
|
603
|
+
"nvvm",
|
|
604
|
+
"libdevice",
|
|
605
|
+
"libdevice.10.bc",
|
|
606
|
+
)
|
|
438
607
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
608
|
+
# CUDA 13
|
|
609
|
+
if libdevice_path is None:
|
|
610
|
+
nvvm_distribution = _get_distribution("nvidia-nvvm")
|
|
611
|
+
if (
|
|
612
|
+
nvvm_distribution is not None
|
|
613
|
+
and nvvm_distribution.version.startswith("13.")
|
|
614
|
+
):
|
|
615
|
+
site_packages_path = nvvm_distribution.locate_file("")
|
|
616
|
+
libdevice_path = os.path.join(
|
|
617
|
+
site_packages_path,
|
|
618
|
+
"nvidia",
|
|
619
|
+
"cu13",
|
|
620
|
+
"nvvm",
|
|
621
|
+
"libdevice",
|
|
622
|
+
"libdevice.10.bc",
|
|
623
|
+
)
|
|
445
624
|
|
|
446
|
-
|
|
625
|
+
if libdevice_path and os.path.isfile(libdevice_path):
|
|
626
|
+
return libdevice_path
|
|
627
|
+
return None
|
|
447
628
|
|
|
448
629
|
|
|
449
630
|
def get_current_cuda_target_name():
|
|
@@ -475,11 +656,11 @@ def get_conda_include_dir():
|
|
|
475
656
|
Return the include directory in the current conda environment, if one
|
|
476
657
|
is active and it exists.
|
|
477
658
|
"""
|
|
478
|
-
is_conda_env = os.path.
|
|
659
|
+
is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
|
|
479
660
|
if not is_conda_env:
|
|
480
661
|
return
|
|
481
662
|
|
|
482
|
-
if
|
|
663
|
+
if IS_WIN32:
|
|
483
664
|
include_dir = os.path.join(sys.prefix, "Library", "include")
|
|
484
665
|
elif target_name := get_current_cuda_target_name():
|
|
485
666
|
include_dir = os.path.join(
|
|
@@ -490,23 +671,21 @@ def get_conda_include_dir():
|
|
|
490
671
|
# though usually it shouldn't.
|
|
491
672
|
include_dir = os.path.join(sys.prefix, "include")
|
|
492
673
|
|
|
493
|
-
if (
|
|
494
|
-
os.path.
|
|
495
|
-
and os.path.isdir(include_dir)
|
|
496
|
-
and os.path.exists(
|
|
497
|
-
os.path.join(include_dir, "cuda_device_runtime_api.h")
|
|
498
|
-
)
|
|
674
|
+
if os.path.isdir(include_dir) and os.path.isfile(
|
|
675
|
+
os.path.join(include_dir, "cuda_device_runtime_api.h")
|
|
499
676
|
):
|
|
500
677
|
return include_dir
|
|
501
|
-
return
|
|
678
|
+
return None
|
|
502
679
|
|
|
503
680
|
|
|
504
681
|
def _get_include_dir():
|
|
505
682
|
"""Find the root include directory."""
|
|
506
683
|
options = [
|
|
507
684
|
("Conda environment (NVIDIA package)", get_conda_include_dir()),
|
|
685
|
+
("NVIDIA NVCC Wheel", get_wheel_include()),
|
|
686
|
+
("CUDA_HOME", get_cuda_home_include()),
|
|
687
|
+
("System", get_system_ctk_include()),
|
|
508
688
|
("CUDA_INCLUDE_PATH Config Entry", config.CUDA_INCLUDE_PATH),
|
|
509
|
-
# TODO: add others
|
|
510
689
|
]
|
|
511
690
|
by, include_dir = _find_valid_path(options)
|
|
512
691
|
return _env_path_tuple(by, include_dir)
|