numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- _numba_cuda_redirector.pth +4 -0
- _numba_cuda_redirector.py +89 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +6 -0
- numba_cuda/_version.py +11 -0
- numba_cuda/numba/cuda/__init__.py +70 -0
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
- numba_cuda/numba/cuda/api.py +580 -0
- numba_cuda/numba/cuda/api_util.py +76 -0
- numba_cuda/numba/cuda/args.py +72 -0
- numba_cuda/numba/cuda/bf16.py +397 -0
- numba_cuda/numba/cuda/cache_hints.py +287 -0
- numba_cuda/numba/cuda/cext/__init__.py +2 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
- numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
- numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
- numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
- numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
- numba_cuda/numba/cuda/cext/_typeof.h +19 -0
- numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
- numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
- numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
- numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
- numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
- numba_cuda/numba/cuda/cg.py +67 -0
- numba_cuda/numba/cuda/cgutils.py +1294 -0
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +541 -0
- numba_cuda/numba/cuda/compiler.py +1396 -0
- numba_cuda/numba/cuda/core/analysis.py +758 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
- numba_cuda/numba/cuda/core/base.py +1332 -0
- numba_cuda/numba/cuda/core/boxing.py +1411 -0
- numba_cuda/numba/cuda/core/bytecode.py +728 -0
- numba_cuda/numba/cuda/core/byteflow.py +2346 -0
- numba_cuda/numba/cuda/core/caching.py +744 -0
- numba_cuda/numba/cuda/core/callconv.py +392 -0
- numba_cuda/numba/cuda/core/codegen.py +171 -0
- numba_cuda/numba/cuda/core/compiler.py +199 -0
- numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +650 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/controlflow.py +989 -0
- numba_cuda/numba/cuda/core/entrypoints.py +57 -0
- numba_cuda/numba/cuda/core/environment.py +66 -0
- numba_cuda/numba/cuda/core/errors.py +917 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/generators.py +387 -0
- numba_cuda/numba/cuda/core/imputils.py +509 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
- numba_cuda/numba/cuda/core/interpreter.py +3617 -0
- numba_cuda/numba/cuda/core/ir.py +1812 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
- numba_cuda/numba/cuda/core/optional.py +129 -0
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
- numba_cuda/numba/cuda/core/registry.py +46 -0
- numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
- numba_cuda/numba/cuda/core/sigutils.py +68 -0
- numba_cuda/numba/cuda/core/ssa.py +498 -0
- numba_cuda/numba/cuda/core/targetconfig.py +330 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +956 -0
- numba_cuda/numba/cuda/core/typed_passes.py +867 -0
- numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
- numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
- numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
- numba_cuda/numba/cuda/cpython/iterators.py +167 -0
- numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
- numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
- numba_cuda/numba/cuda/cpython/slicing.py +322 -0
- numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
- numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
- numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
- numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
- numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
- numba_cuda/numba/cuda/cuda_paths.py +691 -0
- numba_cuda/numba/cuda/cudadecl.py +543 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
- numba_cuda/numba/cuda/cudadrv/error.py +48 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
- numba_cuda/numba/cuda/cudaimpl.py +983 -0
- numba_cuda/numba/cuda/cudamath.py +149 -0
- numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
- numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
- numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
- numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
- numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
- numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
- numba_cuda/numba/cuda/datamodel/manager.py +11 -0
- numba_cuda/numba/cuda/datamodel/models.py +9 -0
- numba_cuda/numba/cuda/datamodel/packer.py +9 -0
- numba_cuda/numba/cuda/datamodel/registry.py +11 -0
- numba_cuda/numba/cuda/datamodel/testing.py +11 -0
- numba_cuda/numba/cuda/debuginfo.py +997 -0
- numba_cuda/numba/cuda/decorators.py +294 -0
- numba_cuda/numba/cuda/descriptor.py +35 -0
- numba_cuda/numba/cuda/device_init.py +155 -0
- numba_cuda/numba/cuda/deviceufunc.py +1021 -0
- numba_cuda/numba/cuda/dispatcher.py +2463 -0
- numba_cuda/numba/cuda/errors.py +72 -0
- numba_cuda/numba/cuda/extending.py +697 -0
- numba_cuda/numba/cuda/flags.py +178 -0
- numba_cuda/numba/cuda/fp16.py +357 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +24 -0
- numba_cuda/numba/cuda/intrinsics.py +531 -0
- numba_cuda/numba/cuda/itanium_mangler.py +214 -0
- numba_cuda/numba/cuda/kernels/__init__.py +2 -0
- numba_cuda/numba/cuda/kernels/reduction.py +265 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3386 -0
- numba_cuda/numba/cuda/libdevicedecl.py +20 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
- numba_cuda/numba/cuda/locks.py +19 -0
- numba_cuda/numba/cuda/lowering.py +1980 -0
- numba_cuda/numba/cuda/mathimpl.py +374 -0
- numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
- numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
- numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
- numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
- numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
- numba_cuda/numba/cuda/misc/appdirs.py +594 -0
- numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
- numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
- numba_cuda/numba/cuda/misc/dump_style.py +41 -0
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
- numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
- numba_cuda/numba/cuda/misc/literal.py +28 -0
- numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
- numba_cuda/numba/cuda/misc/special.py +94 -0
- numba_cuda/numba/cuda/models.py +56 -0
- numba_cuda/numba/cuda/np/arraymath.py +5130 -0
- numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
- numba_cuda/numba/cuda/np/extensions.py +11 -0
- numba_cuda/numba/cuda/np/linalg.py +3087 -0
- numba_cuda/numba/cuda/np/math/__init__.py +0 -0
- numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
- numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
- numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
- numba_cuda/numba/cuda/np/npdatetime.py +969 -0
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
- numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
- numba_cuda/numba/cuda/np/numpy_support.py +798 -0
- numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
- numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
- numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
- numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
- numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
- numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
- numba_cuda/numba/cuda/nvvmutils.py +254 -0
- numba_cuda/numba/cuda/printimpl.py +126 -0
- numba_cuda/numba/cuda/random.py +308 -0
- numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
- numba_cuda/numba/cuda/serialize.py +267 -0
- numba_cuda/numba/cuda/simulator/__init__.py +63 -0
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
- numba_cuda/numba/cuda/simulator/api.py +179 -0
- numba_cuda/numba/cuda/simulator/bf16.py +4 -0
- numba_cuda/numba/cuda/simulator/compiler.py +38 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
- numba_cuda/numba/cuda/simulator/kernel.py +320 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
- numba_cuda/numba/cuda/simulator/reduction.py +19 -0
- numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
- numba_cuda/numba/cuda/simulator_init.py +18 -0
- numba_cuda/numba/cuda/stubs.py +624 -0
- numba_cuda/numba/cuda/target.py +505 -0
- numba_cuda/numba/cuda/testing.py +347 -0
- numba_cuda/numba/cuda/tests/__init__.py +62 -0
- numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
- numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
- numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
- numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
- numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
- numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
- numba_cuda/numba/cuda/tests/data/error.cu +12 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
- numba_cuda/numba/cuda/tests/support.py +900 -0
- numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
- numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
- numba_cuda/numba/cuda/typeconv/rules.py +63 -0
- numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
- numba_cuda/numba/cuda/types/__init__.py +233 -0
- numba_cuda/numba/cuda/types/__init__.pyi +167 -0
- numba_cuda/numba/cuda/types/abstract.py +9 -0
- numba_cuda/numba/cuda/types/common.py +9 -0
- numba_cuda/numba/cuda/types/containers.py +9 -0
- numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
- numba_cuda/numba/cuda/types/cuda_common.py +110 -0
- numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
- numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
- numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
- numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
- numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
- numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
- numba_cuda/numba/cuda/types/ext_types.py +101 -0
- numba_cuda/numba/cuda/types/function_type.py +11 -0
- numba_cuda/numba/cuda/types/functions.py +9 -0
- numba_cuda/numba/cuda/types/iterators.py +9 -0
- numba_cuda/numba/cuda/types/misc.py +9 -0
- numba_cuda/numba/cuda/types/npytypes.py +9 -0
- numba_cuda/numba/cuda/types/scalars.py +9 -0
- numba_cuda/numba/cuda/typing/__init__.py +19 -0
- numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
- numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
- numba_cuda/numba/cuda/typing/bufproto.py +70 -0
- numba_cuda/numba/cuda/typing/builtins.py +1209 -0
- numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
- numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
- numba_cuda/numba/cuda/typing/collections.py +138 -0
- numba_cuda/numba/cuda/typing/context.py +782 -0
- numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
- numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/listdecl.py +147 -0
- numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
- numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
- numba_cuda/numba/cuda/typing/npydecl.py +749 -0
- numba_cuda/numba/cuda/typing/setdecl.py +115 -0
- numba_cuda/numba/cuda/typing/templates.py +1446 -0
- numba_cuda/numba/cuda/typing/typeof.py +301 -0
- numba_cuda/numba/cuda/ufuncs.py +746 -0
- numba_cuda/numba/cuda/utils.py +724 -0
- numba_cuda/numba/cuda/vector_types.py +214 -0
- numba_cuda/numba/cuda/vectorizers.py +260 -0
- numba_cuda-0.22.0.dist-info/METADATA +109 -0
- numba_cuda-0.22.0.dist-info/RECORD +487 -0
- numba_cuda-0.22.0.dist-info/WHEEL +6 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
- numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
- numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from llvmlite import ir
|
|
7
|
+
from numba.cuda.cudadrv import nvrtc, nvvm, runtime
|
|
8
|
+
from numba.cuda.testing import unittest
|
|
9
|
+
from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
|
|
10
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
|
14
|
+
class TestNvvmDriver(unittest.TestCase):
|
|
15
|
+
def get_nvvmir(self):
|
|
16
|
+
versions = NVVM().get_ir_version()
|
|
17
|
+
data_layout = NVVM().data_layout
|
|
18
|
+
return nvvmir_generic.format(data_layout=data_layout, v=versions)
|
|
19
|
+
|
|
20
|
+
def test_nvvm_compile_simple(self):
|
|
21
|
+
nvvmir = self.get_nvvmir()
|
|
22
|
+
ptx = nvvm.compile_ir(nvvmir).decode("utf8")
|
|
23
|
+
self.assertTrue("simple" in ptx)
|
|
24
|
+
self.assertTrue("ave" in ptx)
|
|
25
|
+
|
|
26
|
+
def test_nvvm_compile_nullary_option(self):
|
|
27
|
+
# Tests compilation with an option that doesn't take an argument
|
|
28
|
+
# ("-gen-lto") - all other NVVM options are of the form
|
|
29
|
+
# "-<name>=<value>"
|
|
30
|
+
|
|
31
|
+
nvvmir = self.get_nvvmir()
|
|
32
|
+
arch = "compute_%d%d" % nvrtc.get_lowest_supported_cc()
|
|
33
|
+
ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
|
|
34
|
+
|
|
35
|
+
# Verify we correctly passed the option by checking if we got LTOIR
|
|
36
|
+
# from NVVM (by looking for the expected magic number for LTOIR)
|
|
37
|
+
self.assertEqual(ltoir[:4], b"\xed\x43\x4e\x7f")
|
|
38
|
+
|
|
39
|
+
def test_nvvm_bad_option(self):
|
|
40
|
+
# Ensure that unsupported / non-existent options are reported as such
|
|
41
|
+
# to the user / caller
|
|
42
|
+
msg = "-made-up-option=2 is an unsupported option"
|
|
43
|
+
with self.assertRaisesRegex(NvvmError, msg):
|
|
44
|
+
nvvm.compile_ir("", made_up_option=2)
|
|
45
|
+
|
|
46
|
+
def test_nvvm_from_llvm(self):
|
|
47
|
+
m = ir.Module("test_nvvm_from_llvm")
|
|
48
|
+
m.triple = "nvptx64-nvidia-cuda"
|
|
49
|
+
nvvm.add_ir_version(m)
|
|
50
|
+
fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
|
|
51
|
+
kernel = ir.Function(m, fty, name="mycudakernel")
|
|
52
|
+
bldr = ir.IRBuilder(kernel.append_basic_block("entry"))
|
|
53
|
+
bldr.ret_void()
|
|
54
|
+
nvvm.set_cuda_kernel(kernel)
|
|
55
|
+
|
|
56
|
+
m.data_layout = NVVM().data_layout
|
|
57
|
+
ptx = nvvm.compile_ir(str(m)).decode("utf8")
|
|
58
|
+
self.assertTrue("mycudakernel" in ptx)
|
|
59
|
+
self.assertTrue(".address_size 64" in ptx)
|
|
60
|
+
|
|
61
|
+
def test_used_list(self):
|
|
62
|
+
# Construct a module
|
|
63
|
+
m = ir.Module("test_used_list")
|
|
64
|
+
m.triple = "nvptx64-nvidia-cuda"
|
|
65
|
+
m.data_layout = NVVM().data_layout
|
|
66
|
+
nvvm.add_ir_version(m)
|
|
67
|
+
|
|
68
|
+
# Add a function and mark it as a kernel
|
|
69
|
+
fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
|
|
70
|
+
kernel = ir.Function(m, fty, name="mycudakernel")
|
|
71
|
+
bldr = ir.IRBuilder(kernel.append_basic_block("entry"))
|
|
72
|
+
bldr.ret_void()
|
|
73
|
+
nvvm.set_cuda_kernel(kernel)
|
|
74
|
+
|
|
75
|
+
# Verify that the used list was correctly constructed
|
|
76
|
+
used_lines = [
|
|
77
|
+
line for line in str(m).splitlines() if "llvm.used" in line
|
|
78
|
+
]
|
|
79
|
+
msg = 'Expected exactly one @"llvm.used" array'
|
|
80
|
+
self.assertEqual(len(used_lines), 1, msg)
|
|
81
|
+
|
|
82
|
+
used_line = used_lines[0]
|
|
83
|
+
# Kernel should be referenced in the used list
|
|
84
|
+
self.assertIn("mycudakernel", used_line)
|
|
85
|
+
# Check linkage of the used list
|
|
86
|
+
self.assertIn("appending global", used_line)
|
|
87
|
+
# Ensure used list is in the metadata section
|
|
88
|
+
self.assertIn('section "llvm.metadata"', used_line)
|
|
89
|
+
|
|
90
|
+
def test_nvvm_ir_verify_fail(self):
|
|
91
|
+
if runtime.get_version() >= (12, 5):
|
|
92
|
+
self.skipTest("Bad triple doesn't fail verify on CUDA >= 12.5")
|
|
93
|
+
m = ir.Module("test_bad_ir")
|
|
94
|
+
m.triple = "unknown-unknown-unknown"
|
|
95
|
+
m.data_layout = NVVM().data_layout
|
|
96
|
+
nvvm.add_ir_version(m)
|
|
97
|
+
with self.assertRaisesRegex(NvvmError, "Invalid target triple"):
|
|
98
|
+
nvvm.compile_ir(str(m))
|
|
99
|
+
|
|
100
|
+
def _test_nvvm_support(self, arch):
|
|
101
|
+
compute_xx = "compute_{0}{1}".format(*arch)
|
|
102
|
+
nvvmir = self.get_nvvmir()
|
|
103
|
+
ptx = nvvm.compile_ir(
|
|
104
|
+
nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0, prec_div=0
|
|
105
|
+
).decode("utf8")
|
|
106
|
+
self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
|
|
107
|
+
self.assertIn("simple", ptx)
|
|
108
|
+
self.assertIn("ave", ptx)
|
|
109
|
+
|
|
110
|
+
def test_nvvm_support(self):
|
|
111
|
+
"""Test supported CC by NVVM"""
|
|
112
|
+
for arch in nvrtc.get_supported_ccs():
|
|
113
|
+
self._test_nvvm_support(arch=arch)
|
|
114
|
+
|
|
115
|
+
def test_nvvm_warning(self):
|
|
116
|
+
m = ir.Module("test_nvvm_warning")
|
|
117
|
+
m.triple = "nvptx64-nvidia-cuda"
|
|
118
|
+
m.data_layout = NVVM().data_layout
|
|
119
|
+
nvvm.add_ir_version(m)
|
|
120
|
+
|
|
121
|
+
fty = ir.FunctionType(ir.VoidType(), [])
|
|
122
|
+
kernel = ir.Function(m, fty, name="inlinekernel")
|
|
123
|
+
builder = ir.IRBuilder(kernel.append_basic_block("entry"))
|
|
124
|
+
builder.ret_void()
|
|
125
|
+
nvvm.set_cuda_kernel(kernel)
|
|
126
|
+
|
|
127
|
+
# Add the noinline attribute to trigger NVVM to generate a warning
|
|
128
|
+
kernel.attributes.add("noinline")
|
|
129
|
+
|
|
130
|
+
code = str(m)
|
|
131
|
+
with pytest.warns(Warning, match="overriding noinline attribute"):
|
|
132
|
+
nvvm.compile_ir(code)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@skip_on_cudasim("NVVM Driver unsupported in the simulator")
|
|
136
|
+
class TestLibDevice(unittest.TestCase):
|
|
137
|
+
def test_libdevice_load(self):
|
|
138
|
+
# Test that constructing LibDevice gives a bitcode file
|
|
139
|
+
libdevice = LibDevice()
|
|
140
|
+
self.assertEqual(libdevice.bc[:4], b"BC\xc0\xde")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
nvvmir_generic = """\
|
|
144
|
+
target triple="nvptx64-nvidia-cuda"
|
|
145
|
+
target datalayout = "{data_layout}"
|
|
146
|
+
|
|
147
|
+
define i32 @ave(i32 %a, i32 %b) {{
|
|
148
|
+
entry:
|
|
149
|
+
%add = add nsw i32 %a, %b
|
|
150
|
+
%div = sdiv i32 %add, 2
|
|
151
|
+
ret i32 %div
|
|
152
|
+
}}
|
|
153
|
+
|
|
154
|
+
define void @simple(i32* %data) {{
|
|
155
|
+
entry:
|
|
156
|
+
%0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
|
157
|
+
%1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
|
158
|
+
%mul = mul i32 %0, %1
|
|
159
|
+
%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
|
160
|
+
%add = add i32 %mul, %2
|
|
161
|
+
%call = call i32 @ave(i32 %add, i32 %add)
|
|
162
|
+
%idxprom = sext i32 %add to i64
|
|
163
|
+
%arrayidx = getelementptr inbounds i32, i32* %data, i64 %idxprom
|
|
164
|
+
store i32 %call, i32* %arrayidx, align 4
|
|
165
|
+
ret void
|
|
166
|
+
}}
|
|
167
|
+
|
|
168
|
+
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
|
|
169
|
+
|
|
170
|
+
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone
|
|
171
|
+
|
|
172
|
+
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
|
|
173
|
+
|
|
174
|
+
!nvvmir.version = !{{!1}}
|
|
175
|
+
!1 = !{{i32 {v[0]}, i32 {v[1]}, i32 {v[2]}, i32 {v[3]}}}
|
|
176
|
+
|
|
177
|
+
!nvvm.annotations = !{{!2}}
|
|
178
|
+
!2 = !{{void (i32*)* @simple, !"kernel", i32 1}}
|
|
179
|
+
|
|
180
|
+
@"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i32*)* @simple to i8*)], section "llvm.metadata"
|
|
181
|
+
""" # noqa: E501
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
unittest.main()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import platform
|
|
6
|
+
|
|
7
|
+
from numba import cuda
|
|
8
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestPinned(CUDATestCase):
|
|
12
|
+
def _run_copies(self, A):
|
|
13
|
+
A0 = np.copy(A)
|
|
14
|
+
|
|
15
|
+
stream = cuda.stream()
|
|
16
|
+
ptr = cuda.to_device(A, copy=False, stream=stream)
|
|
17
|
+
ptr.copy_to_device(A, stream=stream)
|
|
18
|
+
ptr.copy_to_host(A, stream=stream)
|
|
19
|
+
stream.synchronize()
|
|
20
|
+
|
|
21
|
+
self.assertTrue(np.allclose(A, A0))
|
|
22
|
+
|
|
23
|
+
def test_pinned(self):
|
|
24
|
+
machine = platform.machine()
|
|
25
|
+
if machine.startswith("arm") or machine.startswith("aarch64"):
|
|
26
|
+
count = 262144 # 2MB
|
|
27
|
+
else:
|
|
28
|
+
count = 2097152 # 16MB
|
|
29
|
+
A = np.arange(count)
|
|
30
|
+
with cuda.pinned(A):
|
|
31
|
+
self._run_copies(A)
|
|
32
|
+
|
|
33
|
+
def test_unpinned(self):
|
|
34
|
+
A = np.arange(2 * 1024 * 1024) # 16 MB
|
|
35
|
+
self._run_copies(A)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
unittest.main()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import unittest
|
|
5
|
+
from numba.cuda.testing import CUDATestCase
|
|
6
|
+
from numba import cuda
|
|
7
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@skip_on_cudasim("CUDA Profiler unsupported in the simulator")
|
|
11
|
+
class TestProfiler(CUDATestCase):
|
|
12
|
+
def test_profiling(self):
|
|
13
|
+
with cuda.profiling():
|
|
14
|
+
a = cuda.device_array(10)
|
|
15
|
+
del a
|
|
16
|
+
|
|
17
|
+
with cuda.profiling():
|
|
18
|
+
a = cuda.device_array(100)
|
|
19
|
+
del a
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
unittest.main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import threading
|
|
5
|
+
from numba import cuda
|
|
6
|
+
from numba.cuda.cudadrv.driver import driver
|
|
7
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
|
8
|
+
from queue import Queue
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestResetDevice(CUDATestCase):
|
|
12
|
+
def test_reset_device(self):
|
|
13
|
+
def newthread(exception_queue):
|
|
14
|
+
try:
|
|
15
|
+
devices = range(driver.get_device_count())
|
|
16
|
+
for _ in range(2):
|
|
17
|
+
for d in devices:
|
|
18
|
+
cuda.select_device(d)
|
|
19
|
+
cuda.close()
|
|
20
|
+
except Exception as e:
|
|
21
|
+
exception_queue.put(e)
|
|
22
|
+
|
|
23
|
+
# Do test on a separate thread so that we don't affect
|
|
24
|
+
# the current context in the main thread.
|
|
25
|
+
|
|
26
|
+
exception_queue = Queue()
|
|
27
|
+
t = threading.Thread(target=newthread, args=(exception_queue,))
|
|
28
|
+
t.start()
|
|
29
|
+
t.join()
|
|
30
|
+
|
|
31
|
+
exceptions = []
|
|
32
|
+
while not exception_queue.empty():
|
|
33
|
+
exceptions.append(exception_queue.get())
|
|
34
|
+
self.assertEqual(exceptions, [])
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
if __name__ == "__main__":
|
|
38
|
+
unittest.main()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import concurrent.futures
|
|
5
|
+
import multiprocessing
|
|
6
|
+
import os
|
|
7
|
+
from numba.cuda.testing import unittest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def set_visible_devices_and_check():
|
|
11
|
+
from numba import cuda
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
15
|
+
return len(cuda.gpus.lst)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestVisibleDevices(unittest.TestCase):
|
|
19
|
+
def test_visible_devices_set_after_import(self):
|
|
20
|
+
# See Issue #6149. This test checks that we can set
|
|
21
|
+
# CUDA_VISIBLE_DEVICES after importing Numba and have the value
|
|
22
|
+
# reflected in the available list of GPUs. Prior to the fix for this
|
|
23
|
+
# issue, Numba made a call to runtime.get_version() on import that
|
|
24
|
+
# initialized the driver and froze the list of available devices before
|
|
25
|
+
# CUDA_VISIBLE_DEVICES could be set by the user.
|
|
26
|
+
|
|
27
|
+
# Avoid importing cuda at the top level so that
|
|
28
|
+
# set_visible_devices_and_check gets to import it first in its process
|
|
29
|
+
from numba import cuda
|
|
30
|
+
|
|
31
|
+
if len(cuda.gpus.lst) in (0, 1):
|
|
32
|
+
self.skipTest("This test requires multiple GPUs")
|
|
33
|
+
|
|
34
|
+
if os.environ.get("CUDA_VISIBLE_DEVICES"):
|
|
35
|
+
msg = "Cannot test when CUDA_VISIBLE_DEVICES already set"
|
|
36
|
+
self.skipTest(msg)
|
|
37
|
+
|
|
38
|
+
with concurrent.futures.ProcessPoolExecutor(
|
|
39
|
+
mp_context=multiprocessing.get_context("spawn")
|
|
40
|
+
) as exe:
|
|
41
|
+
future = exe.submit(set_visible_devices_and_check)
|
|
42
|
+
|
|
43
|
+
visible_gpu_count = future.result()
|
|
44
|
+
assert visible_gpu_count == 1
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
unittest.main()
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# Test does not work on some cards.
|
|
6
|
+
#
|
|
7
|
+
import threading
|
|
8
|
+
from queue import Queue
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from numba import cuda
|
|
12
|
+
from numba.cuda.testing import unittest, CUDATestCase
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def newthread(exception_queue):
|
|
16
|
+
try:
|
|
17
|
+
cuda.select_device(0)
|
|
18
|
+
stream = cuda.stream()
|
|
19
|
+
A = np.arange(100)
|
|
20
|
+
dA = cuda.to_device(A, stream=stream)
|
|
21
|
+
stream.synchronize()
|
|
22
|
+
del dA
|
|
23
|
+
del stream
|
|
24
|
+
cuda.synchronize()
|
|
25
|
+
except Exception as e:
|
|
26
|
+
exception_queue.put(e)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestSelectDevice(CUDATestCase):
|
|
30
|
+
def test_select_device(self):
|
|
31
|
+
exception_queue = Queue()
|
|
32
|
+
for i in range(10):
|
|
33
|
+
t = threading.Thread(target=newthread, args=(exception_queue,))
|
|
34
|
+
t.start()
|
|
35
|
+
t.join()
|
|
36
|
+
|
|
37
|
+
exceptions = []
|
|
38
|
+
while not exception_queue.empty():
|
|
39
|
+
exceptions.append(exception_queue.get())
|
|
40
|
+
self.assertEqual(exceptions, [])
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
if __name__ == "__main__":
|
|
44
|
+
unittest.main()
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import functools
|
|
6
|
+
import threading
|
|
7
|
+
import numpy as np
|
|
8
|
+
from numba import cuda
|
|
9
|
+
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def with_asyncio_loop(f):
|
|
13
|
+
@functools.wraps(f)
|
|
14
|
+
def runner(*args, **kwds):
|
|
15
|
+
loop = asyncio.new_event_loop()
|
|
16
|
+
loop.set_debug(True)
|
|
17
|
+
try:
|
|
18
|
+
return loop.run_until_complete(f(*args, **kwds))
|
|
19
|
+
finally:
|
|
20
|
+
loop.close()
|
|
21
|
+
|
|
22
|
+
return runner
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@unittest.skip("Disabled temporarily due to Issue #317")
|
|
26
|
+
@skip_on_cudasim("CUDA Driver API unsupported in the simulator")
|
|
27
|
+
class TestCudaStream(CUDATestCase):
|
|
28
|
+
def test_add_callback(self):
|
|
29
|
+
def callback(stream, status, event):
|
|
30
|
+
event.set()
|
|
31
|
+
|
|
32
|
+
stream = cuda.stream()
|
|
33
|
+
callback_event = threading.Event()
|
|
34
|
+
stream.add_callback(callback, callback_event)
|
|
35
|
+
self.assertTrue(callback_event.wait(1.0))
|
|
36
|
+
|
|
37
|
+
def test_add_callback_with_default_arg(self):
|
|
38
|
+
callback_event = threading.Event()
|
|
39
|
+
|
|
40
|
+
def callback(stream, status, arg):
|
|
41
|
+
self.assertIsNone(arg)
|
|
42
|
+
callback_event.set()
|
|
43
|
+
|
|
44
|
+
stream = cuda.stream()
|
|
45
|
+
stream.add_callback(callback)
|
|
46
|
+
self.assertTrue(callback_event.wait(1.0))
|
|
47
|
+
|
|
48
|
+
@with_asyncio_loop
|
|
49
|
+
async def test_async_done(self):
|
|
50
|
+
stream = cuda.stream()
|
|
51
|
+
await stream.async_done()
|
|
52
|
+
|
|
53
|
+
@with_asyncio_loop
|
|
54
|
+
async def test_parallel_tasks(self):
|
|
55
|
+
async def async_cuda_fn(value_in: float) -> float:
|
|
56
|
+
stream = cuda.stream()
|
|
57
|
+
h_src, h_dst = cuda.pinned_array(8), cuda.pinned_array(8)
|
|
58
|
+
h_src[:] = value_in
|
|
59
|
+
d_ary = cuda.to_device(h_src, stream=stream)
|
|
60
|
+
d_ary.copy_to_host(h_dst, stream=stream)
|
|
61
|
+
done_result = await stream.async_done()
|
|
62
|
+
self.assertEqual(done_result, stream)
|
|
63
|
+
return h_dst.mean()
|
|
64
|
+
|
|
65
|
+
values_in = [1, 2, 3, 4]
|
|
66
|
+
tasks = [asyncio.create_task(async_cuda_fn(v)) for v in values_in]
|
|
67
|
+
values_out = await asyncio.gather(*tasks)
|
|
68
|
+
self.assertTrue(np.allclose(values_in, values_out))
|
|
69
|
+
|
|
70
|
+
@with_asyncio_loop
|
|
71
|
+
async def test_multiple_async_done(self):
|
|
72
|
+
stream = cuda.stream()
|
|
73
|
+
done_aws = [stream.async_done() for _ in range(4)]
|
|
74
|
+
done = await asyncio.gather(*done_aws)
|
|
75
|
+
for d in done:
|
|
76
|
+
self.assertEqual(d, stream)
|
|
77
|
+
|
|
78
|
+
@with_asyncio_loop
|
|
79
|
+
async def test_multiple_async_done_multiple_streams(self):
|
|
80
|
+
streams = [cuda.stream() for _ in range(4)]
|
|
81
|
+
done_aws = [stream.async_done() for stream in streams]
|
|
82
|
+
done = await asyncio.gather(*done_aws)
|
|
83
|
+
|
|
84
|
+
# Ensure we got the four original streams in done
|
|
85
|
+
self.assertSetEqual(set(done), set(streams))
|
|
86
|
+
|
|
87
|
+
@with_asyncio_loop
|
|
88
|
+
async def test_cancelled_future(self):
|
|
89
|
+
stream = cuda.stream()
|
|
90
|
+
done1, done2 = stream.async_done(), stream.async_done()
|
|
91
|
+
done1.cancel()
|
|
92
|
+
await done2
|
|
93
|
+
self.assertTrue(done1.cancelled())
|
|
94
|
+
self.assertTrue(done2.done())
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@skip_on_cudasim("CUDA Driver API unsupported in the simulator")
|
|
98
|
+
class TestFailingStream(CUDATestCase):
|
|
99
|
+
# This test can only be run in isolation because it corrupts the CUDA
|
|
100
|
+
# context, which cannot be recovered from within the same process. It is
|
|
101
|
+
# left here so that it can be run manually for debugging / testing purposes
|
|
102
|
+
# - or may be re-enabled if in future there is infrastructure added for
|
|
103
|
+
# running tests in a separate process (a subprocess cannot be used because
|
|
104
|
+
# CUDA will have been initialized before the fork, so it cannot be used in
|
|
105
|
+
# the child process).
|
|
106
|
+
@unittest.skip
|
|
107
|
+
@with_asyncio_loop
|
|
108
|
+
async def test_failed_stream(self):
|
|
109
|
+
ctx = cuda.current_context()
|
|
110
|
+
module = ctx.create_module_ptx("""
|
|
111
|
+
.version 6.5
|
|
112
|
+
.target sm_30
|
|
113
|
+
.address_size 64
|
|
114
|
+
.visible .entry failing_kernel() { trap; }
|
|
115
|
+
""")
|
|
116
|
+
failing_kernel = module.get_function("failing_kernel")
|
|
117
|
+
|
|
118
|
+
stream = cuda.stream()
|
|
119
|
+
failing_kernel.configure((1,), (1,), stream=stream).__call__()
|
|
120
|
+
done = stream.async_done()
|
|
121
|
+
with self.assertRaises(Exception):
|
|
122
|
+
await done
|
|
123
|
+
self.assertIsNotNone(done.exception())
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
unittest.main()
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from numba.cuda.tests import load_testsuite
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_tests(loader, tests, pattern):
|
|
9
|
+
return load_testsuite(loader, os.path.dirname(__file__))
|