numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import concurrent.futures
|
|
5
|
+
import multiprocessing as mp
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
from numba import cuda
|
|
9
|
+
from numba.cuda.cudadrv.driver import CudaAPIError, driver
|
|
10
|
+
from numba.cuda.cudadrv.error import CudaSupportError
|
|
11
|
+
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# A mock of cuInit that always raises a CudaAPIError
|
|
15
|
+
def cuInit_raising(arg):
|
|
16
|
+
raise CudaAPIError(999, "CUDA_ERROR_UNKNOWN")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Test code to run in a child that patches driver.cuInit to a variant that
|
|
20
|
+
# always raises. We can't use mock.patch.object here because driver.cuInit is
|
|
21
|
+
# not assigned until we attempt to initialize - mock.patch.object cannot locate
|
|
22
|
+
# the non-existent original method, and so fails. Instead we patch
|
|
23
|
+
# driver.cuInit with our raising version prior to any attempt to initialize.
|
|
24
|
+
def cuInit_raising_test():
|
|
25
|
+
driver.cuInit = cuInit_raising
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
# A CUDA operation that forces initialization of the device
|
|
29
|
+
cuda.device_array(1)
|
|
30
|
+
except CudaSupportError as e:
|
|
31
|
+
success = True
|
|
32
|
+
msg = e.msg
|
|
33
|
+
else:
|
|
34
|
+
success = False
|
|
35
|
+
msg = None
|
|
36
|
+
|
|
37
|
+
return success, msg
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Similar to cuInit_raising_test above, but for testing that the string
|
|
41
|
+
# returned by cuda_error() is as expected.
|
|
42
|
+
def initialization_error_test():
|
|
43
|
+
driver.cuInit = cuInit_raising
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
# A CUDA operation that forces initialization of the device
|
|
47
|
+
cuda.device_array(1)
|
|
48
|
+
except CudaSupportError:
|
|
49
|
+
success = True
|
|
50
|
+
else:
|
|
51
|
+
success = False
|
|
52
|
+
|
|
53
|
+
return success, cuda.cuda_error()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# For testing the path where Driver.__init__() catches a CudaSupportError
|
|
57
|
+
def cuda_disabled_test():
|
|
58
|
+
try:
|
|
59
|
+
# A CUDA operation that forces initialization of the device
|
|
60
|
+
cuda.device_array(1)
|
|
61
|
+
except CudaSupportError as e:
|
|
62
|
+
success = True
|
|
63
|
+
msg = e.msg
|
|
64
|
+
else:
|
|
65
|
+
success = False
|
|
66
|
+
msg = None
|
|
67
|
+
|
|
68
|
+
return success, msg
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Similar to cuda_disabled_test, but checks cuda.cuda_error() instead of the
|
|
72
|
+
# exception raised on initialization
|
|
73
|
+
def cuda_disabled_error_test():
|
|
74
|
+
try:
|
|
75
|
+
# A CUDA operation that forces initialization of the device
|
|
76
|
+
cuda.device_array(1)
|
|
77
|
+
except CudaSupportError:
|
|
78
|
+
success = True
|
|
79
|
+
else:
|
|
80
|
+
success = False
|
|
81
|
+
|
|
82
|
+
return success, cuda.cuda_error()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@skip_on_cudasim("CUDA Simulator does not initialize driver")
|
|
86
|
+
class TestInit(CUDATestCase):
|
|
87
|
+
def _test_init_failure(self, target, expected):
|
|
88
|
+
# Run the initialization failure test in a separate subprocess
|
|
89
|
+
with concurrent.futures.ProcessPoolExecutor(
|
|
90
|
+
mp_context=mp.get_context("spawn")
|
|
91
|
+
) as exe:
|
|
92
|
+
# should complete within 30s
|
|
93
|
+
success, msg = exe.submit(target).result(timeout=30)
|
|
94
|
+
|
|
95
|
+
# Ensure the child process raised an exception during initialization
|
|
96
|
+
# before checking the message
|
|
97
|
+
if not success:
|
|
98
|
+
assert "CudaSupportError not raised" in msg
|
|
99
|
+
|
|
100
|
+
assert expected in msg
|
|
101
|
+
|
|
102
|
+
def test_init_failure_raising(self):
|
|
103
|
+
expected = "Error at driver init: CUDA_ERROR_UNKNOWN (999)"
|
|
104
|
+
self._test_init_failure(cuInit_raising_test, expected)
|
|
105
|
+
|
|
106
|
+
def test_init_failure_error(self):
|
|
107
|
+
expected = "CUDA_ERROR_UNKNOWN (999)"
|
|
108
|
+
self._test_init_failure(initialization_error_test, expected)
|
|
109
|
+
|
|
110
|
+
def _test_cuda_disabled(self, target):
|
|
111
|
+
# Uses _test_init_failure to launch the test in a separate subprocess
|
|
112
|
+
# with CUDA disabled.
|
|
113
|
+
cuda_disabled = os.environ.get("NUMBA_DISABLE_CUDA")
|
|
114
|
+
os.environ["NUMBA_DISABLE_CUDA"] = "1"
|
|
115
|
+
try:
|
|
116
|
+
expected = "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1"
|
|
117
|
+
self._test_init_failure(cuda_disabled_test, expected)
|
|
118
|
+
finally:
|
|
119
|
+
if cuda_disabled is not None:
|
|
120
|
+
os.environ["NUMBA_DISABLE_CUDA"] = cuda_disabled
|
|
121
|
+
else:
|
|
122
|
+
os.environ.pop("NUMBA_DISABLE_CUDA")
|
|
123
|
+
|
|
124
|
+
def test_cuda_disabled_raising(self):
|
|
125
|
+
self._test_cuda_disabled(cuda_disabled_test)
|
|
126
|
+
|
|
127
|
+
def test_cuda_disabled_error(self):
|
|
128
|
+
self._test_cuda_disabled(cuda_disabled_error_test)
|
|
129
|
+
|
|
130
|
+
def test_init_success(self):
|
|
131
|
+
# Here we assume that initialization is successful (because many bad
|
|
132
|
+
# things will happen with the test suite if it is not) and check that
|
|
133
|
+
# there is no error recorded.
|
|
134
|
+
self.assertIsNone(cuda.cuda_error())
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
unittest.main()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from llvmlite import ir
|
|
5
|
+
|
|
6
|
+
from numba.cuda.cudadrv import nvvm
|
|
7
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
|
8
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@skip_on_cudasim("Inline PTX cannot be used in the simulator")
|
|
12
|
+
class TestCudaInlineAsm(CUDATestCase):
|
|
13
|
+
def test_inline_rsqrt(self):
|
|
14
|
+
mod = ir.Module(__name__)
|
|
15
|
+
mod.triple = "nvptx64-nvidia-cuda"
|
|
16
|
+
nvvm.add_ir_version(mod)
|
|
17
|
+
fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())])
|
|
18
|
+
fn = ir.Function(mod, fnty, "cu_rsqrt")
|
|
19
|
+
bldr = ir.IRBuilder(fn.append_basic_block("entry"))
|
|
20
|
+
|
|
21
|
+
rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
|
|
22
|
+
inlineasm = ir.InlineAsm(
|
|
23
|
+
rsqrt_approx_fnty,
|
|
24
|
+
"rsqrt.approx.f32 $0, $1;",
|
|
25
|
+
"=f,f",
|
|
26
|
+
side_effect=True,
|
|
27
|
+
)
|
|
28
|
+
val = bldr.load(fn.args[0])
|
|
29
|
+
res = bldr.call(inlineasm, [val])
|
|
30
|
+
|
|
31
|
+
bldr.store(res, fn.args[0])
|
|
32
|
+
bldr.ret_void()
|
|
33
|
+
|
|
34
|
+
# generate ptx
|
|
35
|
+
mod.data_layout = nvvm.NVVM().data_layout
|
|
36
|
+
nvvm.set_cuda_kernel(fn)
|
|
37
|
+
nvvmir = str(mod)
|
|
38
|
+
ptx = nvvm.compile_ir(nvvmir)
|
|
39
|
+
self.assertTrue("rsqrt.approx.f32" in str(ptx))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
unittest.main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba import cuda
|
|
5
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_53
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestIsFP16Supported(CUDATestCase):
|
|
9
|
+
def test_is_fp16_supported(self):
|
|
10
|
+
self.assertTrue(cuda.is_float16_supported())
|
|
11
|
+
|
|
12
|
+
@skip_on_cudasim("fp16 not available in sim")
|
|
13
|
+
@skip_unless_cc_53
|
|
14
|
+
def test_device_supports_float16(self):
|
|
15
|
+
self.assertTrue(cuda.get_current_device().supports_float16)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from numba import cuda
|
|
6
|
+
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
|
7
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
|
11
|
+
if TEST_BIN_DIR:
|
|
12
|
+
test_device_functions_a = os.path.join(
|
|
13
|
+
TEST_BIN_DIR, "test_device_functions.a"
|
|
14
|
+
)
|
|
15
|
+
test_device_functions_cubin = os.path.join(
|
|
16
|
+
TEST_BIN_DIR, "test_device_functions.cubin"
|
|
17
|
+
)
|
|
18
|
+
test_device_functions_cu = os.path.join(
|
|
19
|
+
TEST_BIN_DIR, "test_device_functions.cu"
|
|
20
|
+
)
|
|
21
|
+
test_device_functions_fatbin = os.path.join(
|
|
22
|
+
TEST_BIN_DIR, "test_device_functions.fatbin"
|
|
23
|
+
)
|
|
24
|
+
test_device_functions_fatbin_multi = os.path.join(
|
|
25
|
+
TEST_BIN_DIR, "test_device_functions_multi.fatbin"
|
|
26
|
+
)
|
|
27
|
+
test_device_functions_o = os.path.join(
|
|
28
|
+
TEST_BIN_DIR, "test_device_functions.o"
|
|
29
|
+
)
|
|
30
|
+
test_device_functions_ptx = os.path.join(
|
|
31
|
+
TEST_BIN_DIR, "test_device_functions.ptx"
|
|
32
|
+
)
|
|
33
|
+
test_device_functions_ltoir = os.path.join(
|
|
34
|
+
TEST_BIN_DIR, "test_device_functions.ltoir"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestLinkableCode(CUDATestCase):
|
|
39
|
+
@skip_on_cudasim(reason="Simulator does not support linkable code")
|
|
40
|
+
@unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
|
|
41
|
+
def test_linkable_code_from_path_or_obj(self):
|
|
42
|
+
files_kind = [
|
|
43
|
+
(test_device_functions_a, cuda.Archive),
|
|
44
|
+
(test_device_functions_cubin, cuda.Cubin),
|
|
45
|
+
(test_device_functions_cu, cuda.CUSource),
|
|
46
|
+
(test_device_functions_fatbin, cuda.Fatbin),
|
|
47
|
+
(test_device_functions_o, cuda.Object),
|
|
48
|
+
(test_device_functions_ptx, cuda.PTXSource),
|
|
49
|
+
(test_device_functions_ltoir, cuda.LTOIR),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
for path, kind in files_kind:
|
|
53
|
+
obj = LinkableCode.from_path_or_obj(path)
|
|
54
|
+
assert isinstance(obj, kind)
|
|
55
|
+
|
|
56
|
+
# test identity of from_path_or_obj
|
|
57
|
+
obj2 = LinkableCode.from_path_or_obj(obj)
|
|
58
|
+
assert obj2 is obj
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
from numba.cuda.testing import unittest
|
|
7
|
+
from numba.cuda.testing import (
|
|
8
|
+
skip_on_cudasim,
|
|
9
|
+
skip_if_cuda_includes_missing,
|
|
10
|
+
skip_if_nvjitlink_missing,
|
|
11
|
+
)
|
|
12
|
+
from numba.cuda.testing import CUDATestCase, test_data_dir
|
|
13
|
+
from numba.cuda.cudadrv.driver import CudaAPIError, _Linker, LinkerError
|
|
14
|
+
from numba.cuda import require_context
|
|
15
|
+
from numba import cuda
|
|
16
|
+
from numba.cuda import void, float64, int64, int32, float32
|
|
17
|
+
from numba.cuda.typing.typeof import typeof
|
|
18
|
+
|
|
19
|
+
CONST1D = np.arange(10, dtype=np.float64)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def simple_const_mem(A):
|
|
23
|
+
C = cuda.const.array_like(CONST1D)
|
|
24
|
+
i = cuda.grid(1)
|
|
25
|
+
|
|
26
|
+
A[i] = C[i] + 1.0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def func_with_lots_of_registers(x, a, b, c, d, e, f):
|
|
30
|
+
a1 = 1.0
|
|
31
|
+
a2 = 1.0
|
|
32
|
+
a3 = 1.0
|
|
33
|
+
a4 = 1.0
|
|
34
|
+
a5 = 1.0
|
|
35
|
+
b1 = 1.0
|
|
36
|
+
b2 = 1.0
|
|
37
|
+
b3 = 1.0
|
|
38
|
+
b4 = 1.0
|
|
39
|
+
b5 = 1.0
|
|
40
|
+
c1 = 1.0
|
|
41
|
+
c2 = 1.0
|
|
42
|
+
c3 = 1.0
|
|
43
|
+
c4 = 1.0
|
|
44
|
+
c5 = 1.0
|
|
45
|
+
d1 = 10
|
|
46
|
+
d2 = 10
|
|
47
|
+
d3 = 10
|
|
48
|
+
d4 = 10
|
|
49
|
+
d5 = 10
|
|
50
|
+
for i in range(a):
|
|
51
|
+
a1 += b
|
|
52
|
+
a2 += c
|
|
53
|
+
a3 += d
|
|
54
|
+
a4 += e
|
|
55
|
+
a5 += f
|
|
56
|
+
b1 *= b
|
|
57
|
+
b2 *= c
|
|
58
|
+
b3 *= d
|
|
59
|
+
b4 *= e
|
|
60
|
+
b5 *= f
|
|
61
|
+
c1 /= b
|
|
62
|
+
c2 /= c
|
|
63
|
+
c3 /= d
|
|
64
|
+
c4 /= e
|
|
65
|
+
c5 /= f
|
|
66
|
+
d1 <<= b
|
|
67
|
+
d2 <<= c
|
|
68
|
+
d3 <<= d
|
|
69
|
+
d4 <<= e
|
|
70
|
+
d5 <<= f
|
|
71
|
+
x[cuda.grid(1)] = a1 + a2 + a3 + a4 + a5
|
|
72
|
+
x[cuda.grid(1)] += b1 + b2 + b3 + b4 + b5
|
|
73
|
+
x[cuda.grid(1)] += c1 + c2 + c3 + c4 + c5
|
|
74
|
+
x[cuda.grid(1)] += d1 + d2 + d3 + d4 + d5
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def simple_smem(ary, dty):
|
|
78
|
+
sm = cuda.shared.array(100, dty)
|
|
79
|
+
i = cuda.grid(1)
|
|
80
|
+
if i == 0:
|
|
81
|
+
for j in range(100):
|
|
82
|
+
sm[j] = j
|
|
83
|
+
cuda.syncthreads()
|
|
84
|
+
ary[i] = sm[i]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def coop_smem2d(ary):
|
|
88
|
+
i, j = cuda.grid(2)
|
|
89
|
+
sm = cuda.shared.array((10, 20), float32)
|
|
90
|
+
sm[i, j] = (i + 1) / (j + 1)
|
|
91
|
+
cuda.syncthreads()
|
|
92
|
+
ary[i, j] = sm[i, j]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def simple_maxthreads(ary):
|
|
96
|
+
i = cuda.grid(1)
|
|
97
|
+
ary[i] = i
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
LMEM_SIZE = 1000
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def simple_lmem(A, B, dty):
|
|
104
|
+
C = cuda.local.array(LMEM_SIZE, dty)
|
|
105
|
+
for i in range(C.shape[0]):
|
|
106
|
+
C[i] = A[i]
|
|
107
|
+
for i in range(C.shape[0]):
|
|
108
|
+
B[i] = C[i]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@skip_on_cudasim("Linking unsupported in the simulator")
|
|
112
|
+
class TestLinker(CUDATestCase):
|
|
113
|
+
@require_context
|
|
114
|
+
def test_linker_basic(self):
|
|
115
|
+
"""Simply go through the constructor and destructor"""
|
|
116
|
+
linker = _Linker.new(cc=(7, 5))
|
|
117
|
+
del linker
|
|
118
|
+
|
|
119
|
+
def _test_linking(self, eager):
|
|
120
|
+
global bar # must be a global; other it is recognized as a freevar
|
|
121
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
|
122
|
+
|
|
123
|
+
link = str(test_data_dir / "jitlink.ptx")
|
|
124
|
+
|
|
125
|
+
if eager:
|
|
126
|
+
args = ["void(int32[:], int32[:])"]
|
|
127
|
+
else:
|
|
128
|
+
args = []
|
|
129
|
+
|
|
130
|
+
@cuda.jit(*args, link=[link])
|
|
131
|
+
def foo(x, y):
|
|
132
|
+
i = cuda.grid(1)
|
|
133
|
+
x[i] += bar(y[i])
|
|
134
|
+
|
|
135
|
+
A = np.array([123], dtype=np.int32)
|
|
136
|
+
B = np.array([321], dtype=np.int32)
|
|
137
|
+
|
|
138
|
+
foo[1, 1](A, B)
|
|
139
|
+
|
|
140
|
+
self.assertTrue(A[0] == 123 + 2 * 321)
|
|
141
|
+
|
|
142
|
+
def test_linking_lazy_compile(self):
|
|
143
|
+
self._test_linking(eager=False)
|
|
144
|
+
|
|
145
|
+
def test_linking_eager_compile(self):
|
|
146
|
+
self._test_linking(eager=True)
|
|
147
|
+
|
|
148
|
+
def test_linking_cu(self):
|
|
149
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
|
150
|
+
|
|
151
|
+
link = str(test_data_dir / "jitlink.cu")
|
|
152
|
+
|
|
153
|
+
@cuda.jit(link=[link])
|
|
154
|
+
def kernel(r, x):
|
|
155
|
+
i = cuda.grid(1)
|
|
156
|
+
|
|
157
|
+
if i < len(r):
|
|
158
|
+
r[i] = bar(x[i])
|
|
159
|
+
|
|
160
|
+
x = np.arange(10, dtype=np.int32)
|
|
161
|
+
r = np.zeros_like(x)
|
|
162
|
+
|
|
163
|
+
kernel[1, 32](r, x)
|
|
164
|
+
|
|
165
|
+
# Matches the operation of bar() in jitlink.cu
|
|
166
|
+
expected = x * 2
|
|
167
|
+
np.testing.assert_array_equal(r, expected)
|
|
168
|
+
|
|
169
|
+
def test_linking_cu_log_warning(self):
|
|
170
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
|
171
|
+
|
|
172
|
+
link = str(test_data_dir / "warn.cu")
|
|
173
|
+
|
|
174
|
+
with pytest.warns(UserWarning) as w:
|
|
175
|
+
|
|
176
|
+
@cuda.jit("void(int32)", link=[link])
|
|
177
|
+
def kernel(x):
|
|
178
|
+
bar(x)
|
|
179
|
+
|
|
180
|
+
nvrtc_log_warnings = [
|
|
181
|
+
wi for wi in w if "NVRTC log messages" in str(wi.message)
|
|
182
|
+
]
|
|
183
|
+
self.assertEqual(
|
|
184
|
+
len(nvrtc_log_warnings), 1, "Expected warnings from NVRTC"
|
|
185
|
+
)
|
|
186
|
+
# Check the warning refers to the log messages
|
|
187
|
+
self.assertIn("NVRTC log messages", str(nvrtc_log_warnings[0].message))
|
|
188
|
+
# Check the message pertaining to the unused variable is provided
|
|
189
|
+
self.assertIn(
|
|
190
|
+
"declared but never referenced", str(nvrtc_log_warnings[0].message)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def test_linking_cu_error(self):
|
|
194
|
+
bar = cuda.declare_device("bar", "int32(int32)")
|
|
195
|
+
|
|
196
|
+
link = str(test_data_dir / "error.cu")
|
|
197
|
+
|
|
198
|
+
from cuda.core.experimental._utils.cuda_utils import NVRTCError
|
|
199
|
+
|
|
200
|
+
errty = NVRTCError
|
|
201
|
+
with self.assertRaises(errty) as e:
|
|
202
|
+
|
|
203
|
+
@cuda.jit("void(int32)", link=[link])
|
|
204
|
+
def kernel(x):
|
|
205
|
+
bar(x)
|
|
206
|
+
|
|
207
|
+
msg = e.exception.args[0]
|
|
208
|
+
# Check the error message refers to the NVRTC compile
|
|
209
|
+
nvrtc_err_str = "NVRTC_ERROR_COMPILATION"
|
|
210
|
+
self.assertIn(nvrtc_err_str, msg)
|
|
211
|
+
# Check the expected error in the CUDA source is reported
|
|
212
|
+
self.assertIn('identifier "SYNTAX" is undefined', msg)
|
|
213
|
+
# Check the filename is reported correctly
|
|
214
|
+
self.assertIn('in the compilation of "error.cu"', msg)
|
|
215
|
+
|
|
216
|
+
def test_linking_unknown_filetype_error(self):
|
|
217
|
+
expected_err = "Don't know how to link file with extension .cuh"
|
|
218
|
+
with self.assertRaisesRegex(RuntimeError, expected_err):
|
|
219
|
+
|
|
220
|
+
@cuda.jit("void()", link=["header.cuh"])
|
|
221
|
+
def kernel():
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
def test_linking_file_with_no_extension_error(self):
|
|
225
|
+
expected_err = "Don't know how to link file with no extension"
|
|
226
|
+
with self.assertRaisesRegex(RuntimeError, expected_err):
|
|
227
|
+
|
|
228
|
+
@cuda.jit("void()", link=["data"])
|
|
229
|
+
def kernel():
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
@skip_if_cuda_includes_missing
|
|
233
|
+
def test_linking_cu_cuda_include(self):
|
|
234
|
+
link = str(test_data_dir / "cuda_include.cu")
|
|
235
|
+
|
|
236
|
+
# An exception will be raised when linking this kernel due to the
|
|
237
|
+
# compile failure if CUDA includes cannot be found by Nvrtc.
|
|
238
|
+
@cuda.jit("void()", link=[link])
|
|
239
|
+
def kernel():
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
def test_try_to_link_nonexistent(self):
|
|
243
|
+
with self.assertRaises(LinkerError) as e:
|
|
244
|
+
|
|
245
|
+
@cuda.jit("void(int32[::1])", link=["nonexistent.a"])
|
|
246
|
+
def f(x):
|
|
247
|
+
x[0] = 0
|
|
248
|
+
|
|
249
|
+
self.assertIn("nonexistent.a not found", e.exception.args)
|
|
250
|
+
|
|
251
|
+
def test_set_registers_no_max(self):
|
|
252
|
+
"""Ensure that the jitted kernel used in the test_set_registers_* tests
|
|
253
|
+
uses more than 57 registers - this ensures that test_set_registers_*
|
|
254
|
+
are really checking that they reduced the number of registers used from
|
|
255
|
+
something greater than the maximum."""
|
|
256
|
+
compiled = cuda.jit(func_with_lots_of_registers)
|
|
257
|
+
compiled = compiled.specialize(np.empty(32), *range(6))
|
|
258
|
+
self.assertGreater(compiled.get_regs_per_thread(), 57)
|
|
259
|
+
|
|
260
|
+
def test_set_registers_57(self):
|
|
261
|
+
compiled = cuda.jit(max_registers=57)(func_with_lots_of_registers)
|
|
262
|
+
compiled = compiled.specialize(np.empty(32), *range(6))
|
|
263
|
+
self.assertLessEqual(compiled.get_regs_per_thread(), 57)
|
|
264
|
+
|
|
265
|
+
def test_set_registers_38(self):
|
|
266
|
+
compiled = cuda.jit(max_registers=38)(func_with_lots_of_registers)
|
|
267
|
+
compiled = compiled.specialize(np.empty(32), *range(6))
|
|
268
|
+
self.assertLessEqual(compiled.get_regs_per_thread(), 38)
|
|
269
|
+
|
|
270
|
+
def test_set_registers_eager(self):
|
|
271
|
+
sig = void(float64[::1], int64, int64, int64, int64, int64, int64)
|
|
272
|
+
compiled = cuda.jit(sig, max_registers=38)(func_with_lots_of_registers)
|
|
273
|
+
self.assertLessEqual(compiled.get_regs_per_thread(), 38)
|
|
274
|
+
|
|
275
|
+
def test_get_const_mem_size(self):
|
|
276
|
+
sig = void(float64[::1])
|
|
277
|
+
compiled = cuda.jit(sig)(simple_const_mem)
|
|
278
|
+
const_mem_size = compiled.get_const_mem_size()
|
|
279
|
+
self.assertGreaterEqual(const_mem_size, CONST1D.nbytes)
|
|
280
|
+
|
|
281
|
+
def test_get_no_shared_memory(self):
|
|
282
|
+
compiled = cuda.jit(func_with_lots_of_registers)
|
|
283
|
+
compiled = compiled.specialize(np.empty(32), *range(6))
|
|
284
|
+
shared_mem_size = compiled.get_shared_mem_per_block()
|
|
285
|
+
self.assertEqual(shared_mem_size, 0)
|
|
286
|
+
|
|
287
|
+
def test_get_shared_mem_per_block(self):
|
|
288
|
+
sig = void(int32[::1], typeof(np.int32))
|
|
289
|
+
compiled = cuda.jit(sig)(simple_smem)
|
|
290
|
+
shared_mem_size = compiled.get_shared_mem_per_block()
|
|
291
|
+
self.assertEqual(shared_mem_size, 400)
|
|
292
|
+
|
|
293
|
+
def test_get_shared_mem_per_specialized(self):
|
|
294
|
+
compiled = cuda.jit(simple_smem)
|
|
295
|
+
compiled_specialized = compiled.specialize(
|
|
296
|
+
np.zeros(100, dtype=np.int32), np.float64
|
|
297
|
+
)
|
|
298
|
+
shared_mem_size = compiled_specialized.get_shared_mem_per_block()
|
|
299
|
+
self.assertEqual(shared_mem_size, 800)
|
|
300
|
+
|
|
301
|
+
def test_get_max_threads_per_block(self):
|
|
302
|
+
compiled = cuda.jit("void(float32[:,::1])")(coop_smem2d)
|
|
303
|
+
max_threads = compiled.get_max_threads_per_block()
|
|
304
|
+
self.assertGreater(max_threads, 0)
|
|
305
|
+
|
|
306
|
+
def test_max_threads_exceeded(self):
|
|
307
|
+
compiled = cuda.jit("void(int32[::1])")(simple_maxthreads)
|
|
308
|
+
max_threads = compiled.get_max_threads_per_block()
|
|
309
|
+
nelem = max_threads + 1
|
|
310
|
+
ary = np.empty(nelem, dtype=np.int32)
|
|
311
|
+
try:
|
|
312
|
+
compiled[1, nelem](ary)
|
|
313
|
+
except CudaAPIError as e:
|
|
314
|
+
self.assertIn("cuLaunchKernel", e.msg)
|
|
315
|
+
|
|
316
|
+
def test_get_local_mem_per_thread(self):
|
|
317
|
+
sig = void(int32[::1], int32[::1], typeof(np.int32))
|
|
318
|
+
compiled = cuda.jit(sig)(simple_lmem)
|
|
319
|
+
local_mem_size = compiled.get_local_mem_per_thread()
|
|
320
|
+
calc_size = np.dtype(np.int32).itemsize * LMEM_SIZE
|
|
321
|
+
self.assertGreaterEqual(local_mem_size, calc_size)
|
|
322
|
+
|
|
323
|
+
def test_get_local_mem_per_specialized(self):
|
|
324
|
+
compiled = cuda.jit(simple_lmem)
|
|
325
|
+
compiled_specialized = compiled.specialize(
|
|
326
|
+
np.zeros(LMEM_SIZE, dtype=np.int32),
|
|
327
|
+
np.zeros(LMEM_SIZE, dtype=np.int32),
|
|
328
|
+
np.float64,
|
|
329
|
+
)
|
|
330
|
+
local_mem_size = compiled_specialized.get_local_mem_per_thread()
|
|
331
|
+
calc_size = np.dtype(np.float64).itemsize * LMEM_SIZE
|
|
332
|
+
self.assertGreaterEqual(local_mem_size, calc_size)
|
|
333
|
+
|
|
334
|
+
@skip_if_nvjitlink_missing("nvJitLink not installed or new enough (>12.3)")
|
|
335
|
+
def test_link_for_different_cc(self):
|
|
336
|
+
linker = _Linker.new(cc=(7, 5), lto=True)
|
|
337
|
+
code = """
|
|
338
|
+
__device__ int foo(int x) {
|
|
339
|
+
return x + 1;
|
|
340
|
+
}
|
|
341
|
+
"""
|
|
342
|
+
linker.add_cu(code, "foo")
|
|
343
|
+
ptx = linker.get_linked_ptx().decode()
|
|
344
|
+
assert "target sm_75" in ptx
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
if __name__ == "__main__":
|
|
348
|
+
unittest.main()
|