numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""CUDA Toolkit libraries lookup utilities.
|
|
5
|
+
|
|
6
|
+
CUDA Toolkit libraries can be available via either:
|
|
7
|
+
|
|
8
|
+
- the `cuda-nvcc` and `cuda-nvrtc` conda packages,
|
|
9
|
+
- a user supplied location from CUDA_HOME,
|
|
10
|
+
- a system wide location,
|
|
11
|
+
- package-specific locations (e.g. the Debian NVIDIA packages),
|
|
12
|
+
- or can be discovered by the system loader.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
import ctypes
|
|
18
|
+
|
|
19
|
+
from numba.cuda.misc.findlib import find_lib
|
|
20
|
+
from numba.cuda.cuda_paths import get_cuda_paths
|
|
21
|
+
from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
|
|
22
|
+
from numba.cuda.cudadrv.error import CudaSupportError
|
|
23
|
+
from numba.cuda.core import config
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if sys.platform == "win32":
|
|
27
|
+
_dllnamepattern = "%s.dll"
|
|
28
|
+
_staticnamepattern = "%s.lib"
|
|
29
|
+
elif sys.platform == "darwin":
|
|
30
|
+
_dllnamepattern = "lib%s.dylib"
|
|
31
|
+
_staticnamepattern = "lib%s.a"
|
|
32
|
+
else:
|
|
33
|
+
_dllnamepattern = "lib%s.so"
|
|
34
|
+
_staticnamepattern = "lib%s.a"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_libdevice():
|
|
38
|
+
d = get_cuda_paths()
|
|
39
|
+
paths = d["libdevice"].info
|
|
40
|
+
return paths
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def open_libdevice():
|
|
44
|
+
with open(get_libdevice(), "rb") as bcfile:
|
|
45
|
+
return bcfile.read()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_cudalib(lib, static=False):
|
|
49
|
+
"""
|
|
50
|
+
Find the path of a CUDA library based on a search of known locations. If
|
|
51
|
+
the search fails, return a generic filename for the library (e.g.
|
|
52
|
+
'libnvvm.so' for 'nvvm') so that we may attempt to load it using the system
|
|
53
|
+
loader's search mechanism.
|
|
54
|
+
"""
|
|
55
|
+
if lib in {"nvrtc", "nvvm"}:
|
|
56
|
+
return get_cuda_paths()[lib].info or _dllnamepattern % lib
|
|
57
|
+
|
|
58
|
+
dir_type = "static_cudalib_dir" if static else "cudalib_dir"
|
|
59
|
+
libdir = get_cuda_paths()[dir_type].info
|
|
60
|
+
|
|
61
|
+
candidates = find_lib(lib, libdir, static=static)
|
|
62
|
+
namepattern = _staticnamepattern if static else _dllnamepattern
|
|
63
|
+
return max(candidates) if candidates else namepattern % lib
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_cuda_include_dir():
|
|
67
|
+
"""
|
|
68
|
+
Find the path to cuda include dir based on a list of default locations.
|
|
69
|
+
Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
|
|
70
|
+
configuration.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
return get_cuda_paths()["include_dir"].info
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def check_cuda_include_dir(path):
|
|
77
|
+
if path is None or not os.path.exists(path):
|
|
78
|
+
raise FileNotFoundError(f"{path} not found")
|
|
79
|
+
|
|
80
|
+
if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
|
|
81
|
+
raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def open_cudalib(lib):
|
|
85
|
+
path = get_cudalib(lib)
|
|
86
|
+
return ctypes.CDLL(path)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def check_static_lib(path):
|
|
90
|
+
if not os.path.isfile(path):
|
|
91
|
+
raise FileNotFoundError(f"{path} not found")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _get_source_variable(lib, static=False):
|
|
95
|
+
if lib == "nvvm":
|
|
96
|
+
return get_cuda_paths()["nvvm"].by
|
|
97
|
+
elif lib == "nvrtc":
|
|
98
|
+
return get_cuda_paths()["nvrtc"].by
|
|
99
|
+
elif lib == "libdevice":
|
|
100
|
+
return get_cuda_paths()["libdevice"].by
|
|
101
|
+
elif lib == "include_dir":
|
|
102
|
+
return get_cuda_paths()["include_dir"].by
|
|
103
|
+
else:
|
|
104
|
+
dir_type = "static_cudalib_dir" if static else "cudalib_dir"
|
|
105
|
+
return get_cuda_paths()[dir_type].by
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test():
|
|
109
|
+
"""Test library lookup. Path info is printed to stdout."""
|
|
110
|
+
failed = False
|
|
111
|
+
|
|
112
|
+
# Check for the driver
|
|
113
|
+
try:
|
|
114
|
+
dlloader, candidates = locate_driver_and_loader()
|
|
115
|
+
print("Finding driver from candidates:")
|
|
116
|
+
for location in candidates:
|
|
117
|
+
print(f"\t{location}")
|
|
118
|
+
print(f"Using loader {dlloader}")
|
|
119
|
+
print("\tTrying to load driver", end="...")
|
|
120
|
+
dll, path = load_driver(dlloader, candidates)
|
|
121
|
+
print("\tok")
|
|
122
|
+
print(f"\t\tLoaded from {path}")
|
|
123
|
+
except CudaSupportError as e:
|
|
124
|
+
print(f"\tERROR: failed to open driver: {e}")
|
|
125
|
+
failed = True
|
|
126
|
+
|
|
127
|
+
# Find the absolute location of the driver on Linux. Various driver-related
|
|
128
|
+
# issues have been reported by WSL2 users, and it is almost always due to a
|
|
129
|
+
# Linux (i.e. not- WSL2) driver being installed in a WSL2 system.
|
|
130
|
+
# Providing the absolute location of the driver indicates its version
|
|
131
|
+
# number in the soname (e.g. "libcuda.so.530.30.02"), which can be used to
|
|
132
|
+
# look up whether the driver was intended for "native" Linux.
|
|
133
|
+
if sys.platform == "linux" and not failed:
|
|
134
|
+
pid = os.getpid()
|
|
135
|
+
mapsfile = os.path.join(os.path.sep, "proc", f"{pid}", "maps")
|
|
136
|
+
try:
|
|
137
|
+
with open(mapsfile) as f:
|
|
138
|
+
maps = f.read()
|
|
139
|
+
# It's difficult to predict all that might go wrong reading the maps
|
|
140
|
+
# file - in case various error conditions ensue (the file is not found,
|
|
141
|
+
# not readable, etc.) we use OSError to hopefully catch any of them.
|
|
142
|
+
except OSError:
|
|
143
|
+
# It's helpful to report that this went wrong to the user, but we
|
|
144
|
+
# don't set failed to True because this doesn't have any connection
|
|
145
|
+
# to actual CUDA functionality.
|
|
146
|
+
print(
|
|
147
|
+
f"\tERROR: Could not open {mapsfile} to determine absolute "
|
|
148
|
+
"path to libcuda.so"
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
# In this case we could read the maps, so we can report the
|
|
152
|
+
# relevant ones to the user
|
|
153
|
+
locations = set(s for s in maps.split() if "libcuda.so" in s)
|
|
154
|
+
print("\tMapped libcuda.so paths:")
|
|
155
|
+
for location in locations:
|
|
156
|
+
print(f"\t\t{location}")
|
|
157
|
+
|
|
158
|
+
# Checks for dynamic libraries
|
|
159
|
+
libs = "nvvm nvrtc".split()
|
|
160
|
+
for lib in libs:
|
|
161
|
+
path = get_cudalib(lib)
|
|
162
|
+
print("Finding {} from {}".format(lib, _get_source_variable(lib)))
|
|
163
|
+
print("\tLocated at", path)
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
print("\tTrying to open library", end="...")
|
|
167
|
+
open_cudalib(lib)
|
|
168
|
+
print("\tok")
|
|
169
|
+
except OSError as e:
|
|
170
|
+
print("\tERROR: failed to open %s:\n%s" % (lib, e))
|
|
171
|
+
failed = True
|
|
172
|
+
|
|
173
|
+
# Check for cudadevrt (the only static library)
|
|
174
|
+
lib = "cudadevrt"
|
|
175
|
+
path = get_cudalib(lib, static=True)
|
|
176
|
+
print(
|
|
177
|
+
"Finding {} from {}".format(lib, _get_source_variable(lib, static=True))
|
|
178
|
+
)
|
|
179
|
+
print("\tLocated at", path)
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
print("\tChecking library", end="...")
|
|
183
|
+
check_static_lib(path)
|
|
184
|
+
print("\tok")
|
|
185
|
+
except FileNotFoundError as e:
|
|
186
|
+
print("\tERROR: failed to find %s:\n%s" % (lib, e))
|
|
187
|
+
failed = True
|
|
188
|
+
|
|
189
|
+
# Check for libdevice
|
|
190
|
+
where = _get_source_variable("libdevice")
|
|
191
|
+
print(f"Finding libdevice from {where}")
|
|
192
|
+
path = get_libdevice()
|
|
193
|
+
print("\tLocated at", path)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
print("\tChecking library", end="...")
|
|
197
|
+
check_static_lib(path)
|
|
198
|
+
print("\tok")
|
|
199
|
+
except FileNotFoundError as e:
|
|
200
|
+
print("\tERROR: failed to find %s:\n%s" % (lib, e))
|
|
201
|
+
failed = True
|
|
202
|
+
|
|
203
|
+
# Check cuda include paths
|
|
204
|
+
|
|
205
|
+
print("Include directory configuration variable:")
|
|
206
|
+
print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
|
|
207
|
+
|
|
208
|
+
where = _get_source_variable("include_dir")
|
|
209
|
+
print(f"Finding include directory from {where}")
|
|
210
|
+
include = get_cuda_include_dir()
|
|
211
|
+
print("\tLocated at", include)
|
|
212
|
+
try:
|
|
213
|
+
print("\tChecking include directory", end="...")
|
|
214
|
+
check_cuda_include_dir(include)
|
|
215
|
+
print("\tok")
|
|
216
|
+
except FileNotFoundError as e:
|
|
217
|
+
print("\tERROR: failed to find cuda include directory:\n%s" % e)
|
|
218
|
+
failed = True
|
|
219
|
+
|
|
220
|
+
return not failed
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import io
|
|
5
|
+
import os
|
|
6
|
+
from typing import Union, Type
|
|
7
|
+
|
|
8
|
+
from .mappings import FILE_EXTENSION_MAP
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LinkableCode:
|
|
12
|
+
"""An object that holds code to be linked from memory.
|
|
13
|
+
|
|
14
|
+
:param data: A buffer, StringIO or BytesIO containing the data to link.
|
|
15
|
+
If a file object is passed, the content in the object is
|
|
16
|
+
read when `data` property is accessed.
|
|
17
|
+
:param name: The name of the file to be referenced in any compilation or
|
|
18
|
+
linking errors that may be produced.
|
|
19
|
+
:param setup_callback: A function called prior to the launch of a kernel
|
|
20
|
+
contained within a module that has this code object
|
|
21
|
+
linked into it.
|
|
22
|
+
:param teardown_callback: A function called just prior to the unloading of
|
|
23
|
+
a module that has this code object linked into
|
|
24
|
+
it.
|
|
25
|
+
:param nrt: If True, assume this object contains NRT function calls and
|
|
26
|
+
add NRT source code to the final link.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
data,
|
|
32
|
+
name=None,
|
|
33
|
+
setup_callback=None,
|
|
34
|
+
teardown_callback=None,
|
|
35
|
+
nrt=False,
|
|
36
|
+
):
|
|
37
|
+
if setup_callback and not callable(setup_callback):
|
|
38
|
+
raise TypeError("setup_callback must be callable")
|
|
39
|
+
if teardown_callback and not callable(teardown_callback):
|
|
40
|
+
raise TypeError("teardown_callback must be callable")
|
|
41
|
+
|
|
42
|
+
self.nrt = nrt
|
|
43
|
+
self._name = name
|
|
44
|
+
self._data = data
|
|
45
|
+
self.setup_callback = setup_callback
|
|
46
|
+
self.teardown_callback = teardown_callback
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def name(self):
|
|
50
|
+
return self._name or self.default_name
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def data(self):
|
|
54
|
+
if isinstance(self._data, (io.StringIO, io.BytesIO)):
|
|
55
|
+
return self._data.getvalue()
|
|
56
|
+
return self._data
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def from_path(path: str):
|
|
60
|
+
"""
|
|
61
|
+
Load a linkable code object from a file.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
path : str
|
|
66
|
+
The path to the file to load.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
LinkableCode
|
|
71
|
+
The linkable code object.
|
|
72
|
+
|
|
73
|
+
Raises
|
|
74
|
+
------
|
|
75
|
+
ValueError
|
|
76
|
+
If the file extension is not supported.
|
|
77
|
+
"""
|
|
78
|
+
root, extension = os.path.splitext(path)
|
|
79
|
+
basename = os.path.basename(root)
|
|
80
|
+
if extension in (".cu", ".ptx"):
|
|
81
|
+
mode = "r"
|
|
82
|
+
else:
|
|
83
|
+
mode = "rb"
|
|
84
|
+
|
|
85
|
+
with open(path, mode) as f:
|
|
86
|
+
data = f.read()
|
|
87
|
+
|
|
88
|
+
cls = _extension_to_linkable_code_kind(extension)
|
|
89
|
+
return cls(data, name=basename)
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def from_path_or_obj(cls, path_or_obj: Union[str, "LinkableCode"]):
|
|
93
|
+
"""
|
|
94
|
+
Load a linkable code object from a file or a LinkableCode object.
|
|
95
|
+
|
|
96
|
+
If a path is provided, the file is loaded and the LinkableCode object
|
|
97
|
+
is returned. If a LinkableCode object is provided, it is returned as is.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
path_or_obj : str or LinkableCode
|
|
102
|
+
The path to the file or the LinkableCode object to load.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
LinkableCode
|
|
107
|
+
The linkable code object.
|
|
108
|
+
|
|
109
|
+
Raises
|
|
110
|
+
------
|
|
111
|
+
ValueError
|
|
112
|
+
If the file extension is not supported.
|
|
113
|
+
"""
|
|
114
|
+
if isinstance(path_or_obj, str):
|
|
115
|
+
return cls.from_path(path_or_obj)
|
|
116
|
+
return path_or_obj
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class PTXSource(LinkableCode):
|
|
120
|
+
"""PTX source code in memory."""
|
|
121
|
+
|
|
122
|
+
kind = FILE_EXTENSION_MAP["ptx"]
|
|
123
|
+
default_name = "<unnamed-ptx>"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class CUSource(LinkableCode):
|
|
127
|
+
"""CUDA C/C++ source code in memory."""
|
|
128
|
+
|
|
129
|
+
kind = "cu"
|
|
130
|
+
default_name = "<unnamed-cu>"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class Fatbin(LinkableCode):
|
|
134
|
+
"""An ELF Fatbin in memory."""
|
|
135
|
+
|
|
136
|
+
kind = FILE_EXTENSION_MAP["fatbin"]
|
|
137
|
+
default_name = "<unnamed-fatbin>"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class Cubin(LinkableCode):
|
|
141
|
+
"""An ELF Cubin in memory."""
|
|
142
|
+
|
|
143
|
+
kind = FILE_EXTENSION_MAP["cubin"]
|
|
144
|
+
default_name = "<unnamed-cubin>"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class Archive(LinkableCode):
|
|
148
|
+
"""An archive of objects in memory."""
|
|
149
|
+
|
|
150
|
+
kind = FILE_EXTENSION_MAP["a"]
|
|
151
|
+
default_name = "<unnamed-archive>"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Object(LinkableCode):
|
|
155
|
+
"""An object file in memory."""
|
|
156
|
+
|
|
157
|
+
kind = FILE_EXTENSION_MAP["o"]
|
|
158
|
+
default_name = "<unnamed-object>"
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class LTOIR(LinkableCode):
|
|
162
|
+
"""An LTOIR file in memory."""
|
|
163
|
+
|
|
164
|
+
kind = FILE_EXTENSION_MAP["ltoir"]
|
|
165
|
+
default_name = "<unnamed-ltoir>"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _extension_to_linkable_code_kind(extension: str) -> Type[LinkableCode]:
|
|
169
|
+
if extension == ".cu":
|
|
170
|
+
return CUSource
|
|
171
|
+
elif extension == ".ptx":
|
|
172
|
+
return PTXSource
|
|
173
|
+
elif extension == ".fatbin":
|
|
174
|
+
return Fatbin
|
|
175
|
+
elif extension == ".cubin":
|
|
176
|
+
return Cubin
|
|
177
|
+
elif extension == ".a":
|
|
178
|
+
return Archive
|
|
179
|
+
elif extension == ".o":
|
|
180
|
+
return Object
|
|
181
|
+
elif extension == ".ltoir":
|
|
182
|
+
return LTOIR
|
|
183
|
+
else:
|
|
184
|
+
raise ValueError(f"Unknown extension: {extension}")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from cuda.bindings.driver import CUjitInputType
|
|
5
|
+
|
|
6
|
+
FILE_EXTENSION_MAP = {
|
|
7
|
+
"o": CUjitInputType.CU_JIT_INPUT_OBJECT,
|
|
8
|
+
"ptx": CUjitInputType.CU_JIT_INPUT_PTX,
|
|
9
|
+
"a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
|
|
10
|
+
"lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
|
|
11
|
+
"cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
|
|
12
|
+
"fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
|
|
13
|
+
"ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
|
|
14
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.cudadrv import devices, driver
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from numba.core.registry import cpu_target
|
|
8
|
+
|
|
9
|
+
def _calc_array_sizeof(ndim):
|
|
10
|
+
"""
|
|
11
|
+
Use the ABI size in the CPU target
|
|
12
|
+
"""
|
|
13
|
+
ctx = cpu_target.target_context
|
|
14
|
+
return ctx.calc_array_sizeof(ndim)
|
|
15
|
+
except ImportError:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def ndarray_device_allocate_data(ary):
|
|
20
|
+
"""
|
|
21
|
+
Allocate gpu data buffer
|
|
22
|
+
"""
|
|
23
|
+
datasize = driver.host_memory_size(ary)
|
|
24
|
+
# allocate
|
|
25
|
+
gpu_data = devices.get_context().memalloc(datasize)
|
|
26
|
+
return gpu_data
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.cudadrv.error import (
|
|
5
|
+
CCSupportError,
|
|
6
|
+
)
|
|
7
|
+
from numba.cuda import config
|
|
8
|
+
from numba.cuda.cuda_paths import get_cuda_paths
|
|
9
|
+
from numba.cuda.utils import _readenv
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import warnings
|
|
13
|
+
import functools
|
|
14
|
+
|
|
15
|
+
from cuda.core.experimental import Program, ProgramOptions
|
|
16
|
+
from cuda.bindings import nvrtc as bindings_nvrtc
|
|
17
|
+
|
|
18
|
+
NVRTC_EXTRA_SEARCH_PATHS = _readenv(
|
|
19
|
+
"NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
|
|
20
|
+
) or getattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
|
|
21
|
+
if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
|
|
22
|
+
config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@functools.cache
|
|
26
|
+
def _get_nvrtc_version():
|
|
27
|
+
retcode, major, minor = bindings_nvrtc.nvrtcVersion()
|
|
28
|
+
if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
29
|
+
raise RuntimeError(f"{retcode.name} when calling nvrtcVersion()")
|
|
30
|
+
return (major, minor)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
|
|
34
|
+
"""
|
|
35
|
+
Compile a CUDA C/C++ source to PTX or LTOIR for a given compute capability.
|
|
36
|
+
|
|
37
|
+
:param src: The source code to compile
|
|
38
|
+
:type src: str
|
|
39
|
+
:param name: The filename of the source (for information only)
|
|
40
|
+
:type name: str
|
|
41
|
+
:param cc: A tuple ``(major, minor)`` of the compute capability
|
|
42
|
+
:type cc: tuple
|
|
43
|
+
:param ltoir: Compile into LTOIR if True, otherwise into PTX
|
|
44
|
+
:type ltoir: bool
|
|
45
|
+
:param lineinfo: Whether to include line information in the compiled code
|
|
46
|
+
:type lineinfo: bool
|
|
47
|
+
:param debug: Whether to include debug information in the compiled code
|
|
48
|
+
:type debug: bool
|
|
49
|
+
:return: The compiled PTX or LTOIR and compilation log
|
|
50
|
+
:rtype: tuple
|
|
51
|
+
"""
|
|
52
|
+
version = _get_nvrtc_version()
|
|
53
|
+
|
|
54
|
+
ver_str = lambda version: ".".join(str(v) for v in version)
|
|
55
|
+
supported_ccs = get_supported_ccs()
|
|
56
|
+
try:
|
|
57
|
+
found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
|
|
58
|
+
except ValueError:
|
|
59
|
+
raise RuntimeError(
|
|
60
|
+
f"Device compute capability {ver_str(cc)} is less than the "
|
|
61
|
+
f"minimum supported by NVRTC {ver_str(version)}. Supported "
|
|
62
|
+
"compute capabilities are "
|
|
63
|
+
f"{', '.join([ver_str(v) for v in supported_ccs])}."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if found != cc:
|
|
67
|
+
warnings.warn(
|
|
68
|
+
f"Device compute capability {ver_str(cc)} is not supported by "
|
|
69
|
+
f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Compilation options:
|
|
73
|
+
# - Compile for the current device's compute capability.
|
|
74
|
+
# - The CUDA include path is added.
|
|
75
|
+
# - Relocatable Device Code (rdc) is needed to prevent device functions
|
|
76
|
+
# being optimized away.
|
|
77
|
+
major, minor = found
|
|
78
|
+
|
|
79
|
+
arch = f"sm_{major}{minor}"
|
|
80
|
+
|
|
81
|
+
cuda_include_dir = get_cuda_paths()["include_dir"].info
|
|
82
|
+
cuda_includes = [f"{cuda_include_dir}"]
|
|
83
|
+
|
|
84
|
+
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
|
85
|
+
numba_cuda_path = os.path.dirname(cudadrv_path)
|
|
86
|
+
|
|
87
|
+
nvrtc_ver_major = version[0]
|
|
88
|
+
if nvrtc_ver_major == 12:
|
|
89
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
90
|
+
# For CUDA 12 wheels, `cuda_include_dir` is `site-packages/nvidia/cuda_runtime/include`
|
|
91
|
+
# We need to find CCCL at `site-packages/nvidia/cuda_cccl/include`
|
|
92
|
+
# For CUDA 12 conda / system install, CCCL is just in the `include` directory
|
|
93
|
+
cuda_includes.append(
|
|
94
|
+
f"{os.path.join(cuda_include_dir, '..', '..', 'cuda_cccl', 'include')}"
|
|
95
|
+
)
|
|
96
|
+
elif nvrtc_ver_major == 13:
|
|
97
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '13')}"
|
|
98
|
+
# For CUDA 13 wheels, `cuda_include_dir` is `site-packages/nvidia/cu13/include`
|
|
99
|
+
# We need to find CCCL at `site-packages/nvidia/cu13/include/cccl`
|
|
100
|
+
# For CUDA 13 conda / system install, CCCL is in the `include/cccl` directory
|
|
101
|
+
cuda_includes.append(f"{os.path.join(cuda_include_dir, 'cccl')}")
|
|
102
|
+
|
|
103
|
+
if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
|
|
104
|
+
extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
|
|
105
|
+
else:
|
|
106
|
+
extra_includes = []
|
|
107
|
+
|
|
108
|
+
nrt_include = os.path.join(numba_cuda_path, "memory_management")
|
|
109
|
+
|
|
110
|
+
includes = [numba_include, *cuda_includes, nrt_include, *extra_includes]
|
|
111
|
+
|
|
112
|
+
options = ProgramOptions(
|
|
113
|
+
arch=arch,
|
|
114
|
+
include_path=includes,
|
|
115
|
+
relocatable_device_code=True,
|
|
116
|
+
link_time_optimization=ltoir,
|
|
117
|
+
name=name,
|
|
118
|
+
debug=debug,
|
|
119
|
+
lineinfo=lineinfo,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
class Logger:
|
|
123
|
+
def __init__(self):
|
|
124
|
+
self.log = []
|
|
125
|
+
|
|
126
|
+
def write(self, msg):
|
|
127
|
+
self.log.append(msg)
|
|
128
|
+
|
|
129
|
+
logger = Logger()
|
|
130
|
+
if isinstance(src, bytes):
|
|
131
|
+
src = src.decode("utf8")
|
|
132
|
+
|
|
133
|
+
prog = Program(src, "c++", options=options)
|
|
134
|
+
result = prog.compile("ltoir" if ltoir else "ptx", logs=logger)
|
|
135
|
+
log = ""
|
|
136
|
+
if logger.log:
|
|
137
|
+
log = logger.log
|
|
138
|
+
joined_logs = "\n".join(log)
|
|
139
|
+
warnings.warn(f"NVRTC log messages: {joined_logs}")
|
|
140
|
+
return result, log
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def find_closest_arch(mycc):
|
|
144
|
+
"""
|
|
145
|
+
Given a compute capability, return the closest compute capability supported
|
|
146
|
+
by the CUDA toolkit.
|
|
147
|
+
|
|
148
|
+
:param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
|
|
149
|
+
:return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
|
|
150
|
+
"""
|
|
151
|
+
supported_ccs = get_supported_ccs()
|
|
152
|
+
|
|
153
|
+
for i, cc in enumerate(supported_ccs):
|
|
154
|
+
if cc == mycc:
|
|
155
|
+
# Matches
|
|
156
|
+
return cc
|
|
157
|
+
elif cc > mycc:
|
|
158
|
+
# Exceeded
|
|
159
|
+
if i == 0:
|
|
160
|
+
# CC lower than supported
|
|
161
|
+
msg = (
|
|
162
|
+
"GPU compute capability %d.%d is not supported"
|
|
163
|
+
"(requires >=%d.%d)" % (mycc + cc)
|
|
164
|
+
)
|
|
165
|
+
raise CCSupportError(msg)
|
|
166
|
+
else:
|
|
167
|
+
# return the previous CC
|
|
168
|
+
return supported_ccs[i - 1]
|
|
169
|
+
|
|
170
|
+
# CC higher than supported
|
|
171
|
+
return supported_ccs[-1] # Choose the highest
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_arch_option(major, minor):
|
|
175
|
+
"""Matches with the closest architecture option"""
|
|
176
|
+
if config.FORCE_CUDA_CC:
|
|
177
|
+
arch = config.FORCE_CUDA_CC
|
|
178
|
+
else:
|
|
179
|
+
arch = find_closest_arch((major, minor))
|
|
180
|
+
return "compute_%d%d" % arch
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_lowest_supported_cc():
|
|
184
|
+
return min(get_supported_ccs())
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def get_supported_ccs():
|
|
188
|
+
retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
|
|
189
|
+
if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
190
|
+
raise RuntimeError(
|
|
191
|
+
f"{retcode.name} when calling nvrtcGetSupportedArchs()"
|
|
192
|
+
)
|
|
193
|
+
return [(arch // 10, arch % 10) for arch in archs]
|